simba-uw-tf-dev 4.6.4__py3-none-any.whl → 4.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of simba-uw-tf-dev might be problematic. Click here for more details.

Files changed (53) hide show
  1. simba/data_processors/blob_location_computer.py +1 -1
  2. simba/data_processors/cuda/geometry.py +45 -27
  3. simba/data_processors/cuda/image.py +1624 -1600
  4. simba/data_processors/cuda/statistics.py +72 -25
  5. simba/data_processors/cuda/timeseries.py +1 -1
  6. simba/data_processors/egocentric_aligner.py +25 -7
  7. simba/data_processors/kleinberg_calculator.py +6 -2
  8. simba/feature_extractors/feature_subsets.py +14 -7
  9. simba/feature_extractors/straub_tail_analyzer.py +4 -6
  10. simba/labelling/standard_labeller.py +1 -1
  11. simba/mixins/geometry_mixin.py +8 -8
  12. simba/mixins/image_mixin.py +14 -14
  13. simba/mixins/statistics_mixin.py +48 -11
  14. simba/mixins/timeseries_features_mixin.py +1 -1
  15. simba/mixins/train_model_mixin.py +65 -27
  16. simba/model/inference_batch.py +1 -1
  17. simba/model/yolo_seg_inference.py +3 -3
  18. simba/plotting/heat_mapper_clf_mp.py +2 -2
  19. simba/pose_importers/simba_blob_importer.py +3 -3
  20. simba/roi_tools/roi_aggregate_stats_mp.py +1 -1
  21. simba/roi_tools/roi_clf_calculator_mp.py +1 -1
  22. simba/sandbox/analyze_runtimes.py +30 -0
  23. simba/sandbox/cuda/egocentric_rotator.py +374 -374
  24. simba/sandbox/proboscis_to_tip.py +28 -0
  25. simba/sandbox/test_directionality.py +47 -0
  26. simba/sandbox/test_nonstatic_directionality.py +27 -0
  27. simba/sandbox/test_pycharm_cuda.py +51 -0
  28. simba/sandbox/test_simba_install.py +41 -0
  29. simba/sandbox/test_static_directionality.py +26 -0
  30. simba/sandbox/test_static_directionality_2d.py +26 -0
  31. simba/sandbox/verify_env.py +42 -0
  32. simba/third_party_label_appenders/transform/coco_keypoints_to_yolo.py +3 -3
  33. simba/third_party_label_appenders/transform/coco_keypoints_to_yolo_bbox.py +2 -2
  34. simba/ui/pop_ups/fsttc_pop_up.py +27 -25
  35. simba/ui/pop_ups/kleinberg_pop_up.py +3 -2
  36. simba/utils/custom_feature_extractor.py +1 -1
  37. simba/utils/data.py +2 -3
  38. simba/utils/errors.py +441 -440
  39. simba/utils/lookups.py +1203 -1203
  40. simba/utils/read_write.py +70 -31
  41. simba/utils/yolo.py +10 -1
  42. simba/video_processors/blob_tracking_executor.py +2 -2
  43. simba/video_processors/clahe_ui.py +1 -1
  44. simba/video_processors/egocentric_video_rotator.py +44 -39
  45. simba/video_processors/multi_cropper.py +1 -1
  46. simba/video_processors/video_processing.py +5264 -5233
  47. simba/video_processors/videos_to_frames.py +43 -33
  48. {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/METADATA +4 -3
  49. {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/RECORD +53 -44
  50. {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/LICENSE +0 -0
  51. {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/WHEEL +0 -0
  52. {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/entry_points.txt +0 -0
  53. {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,8 @@ from sklearn.metrics import (adjusted_mutual_info_score, adjusted_rand_score,
8
8
  fowlkes_mallows_score)
9
9
  from sklearn.neighbors import LocalOutlierFactor
10
10
 
11
+ from simba.utils.printing import SimbaTimer
12
+
11
13
  try:
12
14
  from typing import Literal
13
15
  except:
@@ -538,7 +540,8 @@ class Statistics(FeatureExtractionMixin):
538
540
  sample_1: np.ndarray,
539
541
  sample_2: np.ndarray,
540
542
  fill_value: Optional[int] = 1,
541
- bucket_method: Literal["fd", "doane", "auto", "scott", "stone", "rice", "sturges", "sqrt"] = "auto") -> float:
543
+ bucket_method: Literal["fd", "doane", "auto", "scott", "stone", "rice", "sturges", "sqrt"] = "auto",
544
+ verbose: bool = False) -> float:
542
545
 
543
546
  r"""
544
547
  Compute Kullback-Leibler divergence between two distributions.
@@ -562,6 +565,7 @@ class Statistics(FeatureExtractionMixin):
562
565
  :returns: Kullback-Leibler divergence between ``sample_1`` and ``sample_2``
563
566
  :rtype: float
564
567
  """
568
+ timer = SimbaTimer(start=True)
565
569
  check_valid_array(data=sample_1, source=Statistics.kullback_leibler_divergence.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
566
570
  check_valid_array(data=sample_2, source=Statistics.kullback_leibler_divergence.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
567
571
  check_str(name=f"{self.__class__.__name__} bucket_method", value=bucket_method, options=Options.BUCKET_METHODS.value)
@@ -573,7 +577,10 @@ class Statistics(FeatureExtractionMixin):
573
577
  sample_1_hist[sample_1_hist == 0] = fill_value
574
578
  sample_2_hist[sample_2_hist == 0] = fill_value
575
579
  sample_1_hist, sample_2_hist = sample_1_hist / np.sum(sample_1_hist), sample_2_hist / np.sum(sample_2_hist)
576
- return stats.entropy(pk=sample_1_hist, qk=sample_2_hist)
580
+ kl = stats.entropy(pk=sample_1_hist, qk=sample_2_hist)
581
+ timer.stop_timer()
582
+ if verbose: print(f'KL divergence performed on {sample_1.shape[0]} observations (elapsed time: {timer.elapsed_time_str}s)')
583
+ return kl
577
584
 
578
585
  def rolling_kullback_leibler_divergence(
579
586
  self,
@@ -3271,10 +3278,34 @@ class Statistics(FeatureExtractionMixin):
3271
3278
 
3272
3279
  Youden's J statistic is a measure of the overall performance of a binary classification test, taking into account both sensitivity (true positive rate) and specificity (true negative rate).
3273
3280
 
3274
- :param sample_1: The first binary array.
3275
- :param sample_2: The second binary array.
3276
- :return: Youden's J statistic.
3281
+ The Youden's J statistic is calculated as:
3282
+
3283
+ .. math::
3284
+ J = \text{sensitivity} + \text{specificity} - 1
3285
+
3286
+ where:
3287
+
3288
+ - :math:`\text{sensitivity} = \frac{TP}{TP + FN}` is the true positive rate
3289
+ - :math:`\text{specificity} = \frac{TN}{TN + FP}` is the true negative rate
3290
+
3291
+ The statistic ranges from -1 to 1, where:
3292
+ - :math:`J = 1` indicates perfect classification
3293
+ - :math:`J = 0` indicates the test performs no better than random
3294
+ - :math:`J < 0` indicates the test performs worse than random
3295
+
3296
+ :param sample_1: The first binary array (ground truth or reference).
3297
+ :param sample_2: The second binary array (predictions or test results).
3298
+ :return: Youden's J statistic. Returns NaN if either sensitivity or specificity cannot be calculated (division by zero).
3277
3299
  :rtype: float
3300
+
3301
+ :references:
3302
+ .. [1] Youden, W. J. (1950). Index for rating diagnostic tests. Cancer, 3(1), 32-35.
3303
+ https://acsjournals.onlinelibrary.wiley.com/doi/abs/10.1002/1097-0142(1950)3:1%3C32::AID-CNCR2820030106%3E3.0.CO;2-3
3304
+
3305
+ :example:
3306
+ >>> y_true = np.array([1, 1, 0, 0, 1, 0, 1, 1, 0, 0])
3307
+ >>> y_pred = np.array([1, 1, 0, 1, 1, 0, 1, 0, 0, 0])
3308
+ >>> j = Statistics.youden_j(sample_1=y_true, sample_2=y_pred)
3278
3309
  """
3279
3310
 
3280
3311
  check_valid_array(data=sample_1, source=f'{Statistics.youden_j.__name__} sample_1', accepted_ndims=(1,), accepted_values=[0, 1])
@@ -4250,7 +4281,7 @@ class Statistics(FeatureExtractionMixin):
4250
4281
  return separation_trace / compactness
4251
4282
 
4252
4283
  @staticmethod
4253
- def i_index(x: np.ndarray, y: np.ndarray):
4284
+ def i_index(x: np.ndarray, y: np.ndarray, verbose: bool = False) -> float:
4254
4285
 
4255
4286
  """
4256
4287
  Calculate the I-Index for evaluating clustering quality.
@@ -4275,9 +4306,10 @@ class Statistics(FeatureExtractionMixin):
4275
4306
  >>> X, y = make_blobs(n_samples=5000, centers=20, n_features=3, random_state=0, cluster_std=0.1)
4276
4307
  >>> Statistics.i_index(x=X, y=y)
4277
4308
  """
4309
+ timer = SimbaTimer(start=True)
4278
4310
  check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
4279
4311
  check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
4280
- _ = get_unique_values_in_iterable(data=y, name=Statistics.i_index.__name__, min=2)
4312
+ cluster_cnt = get_unique_values_in_iterable(data=y, name=Statistics.i_index.__name__, min=2)
4281
4313
  unique_y = np.unique(y)
4282
4314
  n_y = unique_y.shape[0]
4283
4315
  global_centroid = np.mean(x, axis=0)
@@ -4289,7 +4321,12 @@ class Statistics(FeatureExtractionMixin):
4289
4321
  cluster_centroid = np.mean(cluster_obs, axis=0)
4290
4322
  swc += np.sum(np.linalg.norm(cluster_obs - cluster_centroid, axis=1) ** 2)
4291
4323
 
4292
- return sst / (n_y * swc)
4324
+
4325
+ i_index = np.float32(sst / (n_y * swc))
4326
+ timer.stop_timer()
4327
+ if verbose: print(f'I-index for {x.shape[0]} observations in {cluster_cnt} clusters computed (elapsed time: {timer.elapsed_time_str}s)')
4328
+ return i_index
4329
+
4293
4330
 
4294
4331
  @staticmethod
4295
4332
  def sd_index(x: np.ndarray, y: np.ndarray) -> float:
@@ -5291,7 +5328,7 @@ class Statistics(FeatureExtractionMixin):
5291
5328
  """
5292
5329
  Compute one-way ANOVAs comparing each column (axis 1) on two arrays.
5293
5330
 
5294
- .. notes::
5331
+ .. note::
5295
5332
  Use for computing and presenting aggregate statistics. Not suitable for featurization.
5296
5333
 
5297
5334
  .. seealso::
@@ -5329,7 +5366,7 @@ class Statistics(FeatureExtractionMixin):
5329
5366
  """
5330
5367
  Compute Kruskal-Wallis comparing each column (axis 1) on two arrays.
5331
5368
 
5332
- .. notes::
5369
+ .. note::
5333
5370
  Use for computing and presenting aggregate statistics. Not suitable for featurization.
5334
5371
 
5335
5372
  .. seealso::
@@ -5366,7 +5403,7 @@ class Statistics(FeatureExtractionMixin):
5366
5403
  """
5367
5404
  Compute pairwise grouped Tukey-HSD tests.
5368
5405
 
5369
- .. notes::
5406
+ .. note::
5370
5407
  Use for computing and presenting aggregate statistics. Not suitable for featurization.
5371
5408
 
5372
5409
  :param np.ndarray data: 2D array with observations rowwise (axis 0) and features columnwise (axis 1)
@@ -2198,7 +2198,7 @@ class TimeseriesFeatureMixin(object):
2198
2198
  :example:
2199
2199
  >>> x = np.random.randint(0, 100, (400, 2))
2200
2200
  >>> results_1 = TimeseriesFeatureMixin.sliding_entropy_of_directional_changes(x=x, bins=16, window_size=5.0, sample_rate=30)
2201
- >>> x = pd.read_csv(r"C:\troubleshooting\two_black_animals_14bp\project_folder\csv\input_csv\Together_1.csv")[['Ear_left_1_x', 'Ear_left_1_y']].values
2201
+ >>> x = pd.read_csv(r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/input_csv/Together_1.csv")[['Ear_left_1_x', 'Ear_left_1_y']].values
2202
2202
  >>> results_2 = TimeseriesFeatureMixin.sliding_entropy_of_directional_changes(x=x, bins=16, window_size=5.0, sample_rate=30)
2203
2203
  """
2204
2204
 
@@ -77,10 +77,10 @@ from simba.utils.errors import (ClassifierInferenceError, CorruptedFileError,
77
77
  SamplingError, SimBAModuleNotFoundError)
78
78
  from simba.utils.lookups import get_meta_data_file_headers, get_table
79
79
  from simba.utils.printing import SimbaTimer, stdout_success
80
- from simba.utils.read_write import (find_core_cnt, get_fn_ext,
81
- get_memory_usage_of_df, get_pkg_version,
82
- read_config_entry, read_df, read_meta_file,
83
- str_2_bool)
80
+ from simba.utils.read_write import (find_core_cnt, get_current_time,
81
+ get_fn_ext, get_memory_usage_of_df,
82
+ get_pkg_version, read_config_entry,
83
+ read_df, read_meta_file, str_2_bool)
84
84
  from simba.utils.warnings import (GPUToolsWarning, MissingUserInputWarning,
85
85
  MultiProcessingFailedWarning,
86
86
  NoModuleWarning, NotEnoughDataWarning,
@@ -1383,18 +1383,39 @@ class TrainModelMixin(object):
1383
1383
  x_df: Union[pd.DataFrame, np.ndarray],
1384
1384
  multiclass: bool = False,
1385
1385
  model_name: Optional[str] = None,
1386
- data_path: Optional[Union[str, os.PathLike]] = None) -> np.ndarray:
1386
+ data_path: Optional[Union[str, os.PathLike]] = None,
1387
+ verbose: bool = False) -> np.ndarray:
1387
1388
 
1388
1389
  """
1389
- :param RandomForestClassifier clf: Random forest classifier object
1390
- :param Union[pd.DataFrame, np.ndarray] x_df: Features for data to predict as a dataframe or array of size (M,N).
1391
- :param bool multiclass: If True, the classifier predicts more than 2 targets. Else, boolean classifier.
1392
- :param Optional[str] model_name: Name of model
1393
- :param Optional[str] data_path: Path to model on disk
1394
- :return np.ndarray: 2D array with frame represented by rows and present/absent probabilities as columns
1395
- :raises FeatureNumberMismatchError: If shape of x_df and clf.n_features_ or n_features_in_ show mismatch
1390
+ Helper to predict class probabilities using a fitted random forest classifier.
1391
+
1392
+ Computes prediction probabilities for binary or multiclass classification using either
1393
+ scikit-learn or cuML RandomForestClassifier. For binary classifiers, returns the
1394
+ probability of the positive class (class 1). For multiclass classifiers, returns
1395
+ probabilities for all classes.
1396
+
1397
+ .. csv-table::
1398
+ :header: EXPECTED RUNTIMES
1399
+ :file: ../../docs/tables/clf_predict_proba.csv
1400
+ :widths: 10, 45, 45
1401
+ :align: center
1402
+ :header-rows: 1
1403
+
1404
+ .. seealso::
1405
+ To fit a classifier, see :func:`simba.mixins.train_model_mixin.TrainModelMixin.clf_fit`
1406
+ To define a classifier, see :func:`simba.mixins.train_model_mixin.TrainModelMixin.clf_define`
1407
+
1408
+ :param Union[RandomForestClassifier, cuRF] clf: Fitted random forest classifier object from sklearn or cuml.
1409
+ :param Union[pd.DataFrame, np.ndarray] x_df: Features for data to predict. DataFrame or array of shape (n_samples, n_features).
1410
+ :param bool multiclass: If True, the classifier predicts more than 2 classes. If False, binary classifier (default: False).
1411
+ :param Optional[str] model_name: Name of the model for error messages and logging. Default: None.
1412
+ :param Optional[Union[str, os.PathLike]] data_path: Path to the data file being processed, used in error messages. Default: None.
1413
+ :param bool verbose: If True, print inference progress and timing information. Default: False.
1414
+ :return np.ndarray: Prediction probabilities. For binary classifiers: 1D array of shape (n_samples,) with probability of positive class. For multiclass: 2D array of shape (n_samples, n_classes) with probabilities for each class.
1415
+
1396
1416
  """
1397
1417
 
1418
+ timer = SimbaTimer(start=True)
1398
1419
  if hasattr(clf, "n_features_"):
1399
1420
  clf_n_features = clf.n_features_
1400
1421
  elif hasattr(clf, "n_features_in_"):
@@ -1420,6 +1441,8 @@ class TrainModelMixin(object):
1420
1441
  p_vals = clf.predict_proba(x_df)
1421
1442
  if multiclass and (clf.n_classes_ != p_vals.shape[1]):
1422
1443
  raise ClassifierInferenceError(msg=f"The classifier {model_name} (data path: {data_path}) is a multiclassifier expected to create {clf.n_classes_} behavior probabilities. However, it produced probabilities for {p_vals.shape[1]} behaviors. See The SimBA GitHub FAQ page or Gitter for more information and suggested fixes.", source=self.__class__.__name__)
1444
+ timer.stop_timer()
1445
+ if verbose: print(f'Inference for model {model_name} over {x_df.shape[0]} observations complete ({timer.elapsed_time_str}s).')
1423
1446
  if not multiclass:
1424
1447
  if isinstance(p_vals, pd.DataFrame):
1425
1448
  return p_vals[1].values
@@ -1447,7 +1470,7 @@ class TrainModelMixin(object):
1447
1470
  bootstrap: Optional[bool] = True,
1448
1471
  verbose: Optional[int] = 1,
1449
1472
  class_weight: Optional[dict] = None,
1450
- cuda: Optional[bool] = False) -> RandomForestClassifier:
1473
+ cuda: Optional[bool] = False) -> Union[RandomForestClassifier, cuRF]:
1451
1474
 
1452
1475
  if not cuda:
1453
1476
  # NOTE: LOKY ISSUES ON WINDOWS WITH SCIKIT IF THE CORE COUNT EXCEEDS 61.
@@ -1482,20 +1505,32 @@ class TrainModelMixin(object):
1482
1505
  clf: Union[RandomForestClassifier, cuRF],
1483
1506
  x_df: pd.DataFrame,
1484
1507
  y_df: pd.DataFrame,
1485
- ) -> RandomForestClassifier:
1508
+ verbose: bool = False) -> Union[RandomForestClassifier, cuRF]:
1486
1509
 
1487
1510
  """
1488
- Helper to fit clf model
1511
+ Helper to fit clf model.
1489
1512
 
1490
- :param clf: Un-fitted random forest classifier object
1513
+ .. csv-table::
1514
+ :header: EXPECTED RUNTIMES
1515
+ :file: ../../docs/tables/clf_fit.csv
1516
+ :widths: 20, 20, 30, 30
1517
+ :align: center
1518
+ :header-rows: 1
1519
+
1520
+ .. seealso::
1521
+ To define a cuml/sklearn object, see :func:`simba.mixins.train_model_mixin.TrainModelMixin.clf_define`
1522
+
1523
+ :param clf: Un-fitted random forest classifier object, either from sklearn or cuml.
1491
1524
  :param pd.DataFrame x_df: Pandas dataframe with features.
1492
1525
  :param pd.DataFrame y_df: Pandas dataframe/Series with target
1493
1526
  :return: Fitted random forest classifier object
1494
1527
  :rtype: RandomForestClassifier
1495
1528
  """
1496
1529
 
1530
+ timer = SimbaTimer(start=True)
1497
1531
  nan_features = x_df[~x_df.applymap(np.isreal).all(1)]
1498
1532
  nan_target = y_df.loc[pd.to_numeric(y_df).isna()]
1533
+ using_cuda = True if CUML in str(clf.__class__.__module__).lower() else False
1499
1534
  if len(nan_features) > 0:
1500
1535
  raise FaultyTrainingSetError(
1501
1536
  msg=f"{len(nan_features)} frame(s) in your project_folder/csv/targets_inserted directory contains FEATURES with non-numerical values",
@@ -1504,9 +1539,16 @@ class TrainModelMixin(object):
1504
1539
  raise FaultyTrainingSetError(
1505
1540
  msg=f"{len(nan_target)} frame(s) in your project_folder/csv/targets_inserted directory contains ANNOTATIONS with non-numerical values",
1506
1541
  source=self.__class__.__name__)
1542
+ if verbose: print(f'[{get_current_time()}] Fitting classifier for {len(x_df)} observations (cuda: {"True" if using_cuda else "False"})...')
1543
+ if using_cuda:
1544
+ x_data = x_df.values if isinstance(x_df, pd.DataFrame) else x_df
1545
+ y_data = y_df.values if isinstance(y_df, (pd.DataFrame, pd.Series)) else y_df
1546
+ clf.fit(x_data, y_data)
1547
+ else:
1548
+ clf.fit(x_df, y_df)
1507
1549
 
1508
- clf.fit(x_df, y_df)
1509
-
1550
+ timer.stop_timer()
1551
+ if verbose: print(f'[{get_current_time()}] Classifier fitted in {timer.elapsed_time_str}s.')
1510
1552
  return clf
1511
1553
 
1512
1554
  @staticmethod
@@ -1563,9 +1605,7 @@ class TrainModelMixin(object):
1563
1605
  :rtype: Tuple[pd.DataFrame, List[int]]
1564
1606
 
1565
1607
  """
1566
- if (platform.system() == "Darwin") and (
1567
- multiprocessing.get_start_method() != "spawn"
1568
- ):
1608
+ if (platform.system() == "Darwin") and (multiprocessing.get_start_method() != "spawn"):
1569
1609
  multiprocessing.set_start_method("spawn", force=True)
1570
1610
  cpu_cnt, _ = find_core_cnt()
1571
1611
  df_lst, frame_numbers_lst = [], []
@@ -1592,9 +1632,7 @@ class TrainModelMixin(object):
1592
1632
  :, ~df_concat.columns.str.contains("^Unnamed")
1593
1633
  ].astype(np.float32)
1594
1634
  memory_size = get_memory_usage_of_df(df=df_concat)
1595
- print(
1596
- f'Dataset size: {memory_size["megabytes"]}MB / {memory_size["gigabytes"]}GB'
1597
- )
1635
+ print(f'Dataset size: {memory_size["megabytes"]}MB / {memory_size["gigabytes"]}GB')
1598
1636
 
1599
1637
  return df_concat, frame_numbers_lst
1600
1638
 
@@ -2607,9 +2645,9 @@ class TrainModelMixin(object):
2607
2645
  :param bool plot: If True, create SHAP aggregation and plots.
2608
2646
 
2609
2647
  :example:
2610
- >>> CONFIG_PATH = r"C:\troubleshooting\mitra\project_folder\project_config.ini"
2611
- >>> RF_PATH = r"C:\troubleshooting\mitra\models\validations\straub_tail_5_new\straub_tail_5.sav"
2612
- >>> DATA_PATH = r"C:\troubleshooting\mitra\project_folder\csv\targets_inserted\new_straub\appended\501_MA142_Gi_CNO_0514.csv"
2648
+ >>> CONFIG_PATH = r"C:/troubleshooting/mitra/project_folder/project_config.ini"
2649
+ >>> RF_PATH = r"C:/troubleshooting/mitra/models/validations/straub_tail_5_new/straub_tail_5.sav"
2650
+ >>> DATA_PATH = r"C:/troubleshooting/mitra/project_folder/csv/targets_inserted/new_straub/appended/501_MA142_Gi_CNO_0514.csv"
2613
2651
  >>> config = ConfigReader(config_path=CONFIG_PATH)
2614
2652
  >>> df = read_df(file_path=DATA_PATH, file_type='csv')
2615
2653
  >>> y = df['straub_tail']
@@ -45,7 +45,7 @@ class InferenceBatch(TrainModelMixin, ConfigReader):
45
45
  >>> inferencer.run()
46
46
 
47
47
  :example II:
48
- >>> inferencer = InferenceBatch(config_path=r"D:\troubleshooting\mitra\project_folder\project_config.ini", features_dir=r"D:\troubleshooting\mitra\project_folder\videos\bg_removed\rotated\tail_features\APPENDED")
48
+ >>> inferencer = InferenceBatch(config_path=r"D:/troubleshooting/mitra/project_folder/project_config.ini", features_dir=r"D:/troubleshooting/mitra/project_folder/videos/bg_removed/rotated/tail_features/APPENDED")
49
49
  >>> inferencer.run()
50
50
  """
51
51
 
@@ -55,9 +55,9 @@ class YOLOSegmentationInference():
55
55
  To visualize the segmentation results, see :func:`simba.plotting.yolo_seg_visualizer.YOLOSegmentationVisualizer`
56
56
 
57
57
  :example:
58
- >>> weights_path = r"D:\platea\yolo_071525\mdl\train3\weights\best.pt"
59
- >>> video_path = r"D:\platea\platea_videos\videos\clipped\10B_Mouse_5-choice_MustTouchTrainingNEWFINAL_a7.mp4"
60
- >>> save_dir=r"D:\platea\platea_videos\videos\yolo_results"
58
+ >>> weights_path = r"D:/platea/yolo_071525/mdl/train3/weights/best.pt"
59
+ >>> video_path = r"D:/platea/platea_videos/videos/clipped/10B_Mouse_5-choice_MustTouchTrainingNEWFINAL_a7.mp4"
60
+ >>> save_dir = r"D:/platea/platea_videos/videos/yolo_results"
61
61
  >>> runner = YOLOSegmentationInference(weights_path=weights_path, video_path=video_path, save_dir=save_dir, verbose=True, device=0, format=None, stream=True, batch_size=10, imgsz=320, interpolate=True, threshold=0.8, retina_msk=True)
62
62
  >>> runner.run()
63
63
 
@@ -98,14 +98,14 @@ class HeatMapperClfMultiprocess(ConfigReader, PlottingMixin):
98
98
 
99
99
 
100
100
  :example II:
101
- >>> test = HeatMapperClfMultiprocess(config_path=r"C:\troubleshooting\RAT_NOR\project_folder\project_config.ini",
101
+ >>> test = HeatMapperClfMultiprocess(config_path=r"C:/troubleshooting/RAT_NOR/project_folder/project_config.ini",
102
102
  >>> style_attr = {'palette': 'jet', 'shading': 'gouraud', 'bin_size': 50, 'max_scale': 'auto'},
103
103
  >>> final_img_setting=True,
104
104
  >>> video_setting=True,
105
105
  >>> frame_setting=True,
106
106
  >>> bodypart='Ear_left',
107
107
  >>> clf_name='straub_tail',
108
- >>> data_paths=[r"C:\troubleshooting\RAT_NOR\project_folder\csv\test\2022-06-20_NOB_DOT_4.csv"])
108
+ >>> data_paths=[r"C:/troubleshooting/RAT_NOR/project_folder/csv/test/2022-06-20_NOB_DOT_4.csv"])
109
109
  >>> test.run()
110
110
  """
111
111
 
@@ -44,10 +44,10 @@ class SimBABlobImporter(ConfigReader):
44
44
  :param Optional[bool] verbose: If True, prints progress messages. Default: True.
45
45
 
46
46
  :example:
47
- >>> r = SimBABlobImporter(config_path=r"C:\troubleshooting\simba_blob_project\project_folder\project_config.ini", data_path=r'C:\troubleshooting\simba_blob_project\data')
47
+ >>> r = SimBABlobImporter(config_path=r"C:/troubleshooting/simba_blob_project/project_folder/project_config.ini", data_path=r'C:/troubleshooting/simba_blob_project/data')
48
48
  >>> r.run()
49
- >>> r = SimBABlobImporter(config_path=r"C:\troubleshooting\simba_blob_project\project_folder\project_config.ini",
50
- ... data_path=r'C:\troubleshooting\simba_blob_project\data',
49
+ >>> r = SimBABlobImporter(config_path=r"C:/troubleshooting/simba_blob_project/project_folder/project_config.ini",
50
+ ... data_path=r'C:/troubleshooting/simba_blob_project/data',
51
51
  ... smoothing_settings={'method': 'savitzky-golay', 'time_window': 100},
52
52
  ... interpolation_settings={'method': 'nearest', 'type': 'body-parts'})
53
53
  >>> r.run()
@@ -168,7 +168,7 @@ class ROIAggregateStatisticsAnalyzerMultiprocess(ConfigReader, FeatureExtraction
168
168
  :param save_path (str | os.PathLike, optional): Path to save summary statistics.
169
169
 
170
170
  :example:
171
- >>> analyzer = ROIAggregateStatisticsAnalyzerMultiprocess(config_path=r"C:\troubleshooting\mitra\project_folder\project_config.ini", body_parts=['Center'], first_entry_time=True, threshold=0.0, calculate_distances=True, transpose=False, detailed_bout_data=True)
171
+ >>> analyzer = ROIAggregateStatisticsAnalyzerMultiprocess(config_path=r"C:/troubleshooting/mitra/project_folder/project_config.ini", body_parts=['Center'], first_entry_time=True, threshold=0.0, calculate_distances=True, transpose=False, detailed_bout_data=True)
172
172
  >>> analyzer.run()
173
173
  >>> analyzer.save()
174
174
  """
@@ -150,7 +150,7 @@ class ROIClfCalculatorMultiprocess(ConfigReader):
150
150
  'GitHub tutorial <https://github.com/sgoldenlab/simba/blob/master/docs/Scenario2.md#part-4--analyze-machine-results`__.
151
151
 
152
152
  :example:
153
- >>> analyzer = ROIClfCalculatorMultiprocess(config_path=r"D:\troubleshooting\maplight_ri\project_folder\project_config.ini", bp_names=['resident_NOSE'], clf_names=['attack'], clf_time=True, started_bout_cnt=True, ended_bout_cnt=False, bout_table=True, transpose=True, core_cnt=20)
153
+ >>> analyzer = ROIClfCalculatorMultiprocess(config_path=r"D:/troubleshooting/maplight_ri/project_folder/project_config.ini", bp_names=['resident_NOSE'], clf_names=['attack'], clf_time=True, started_bout_cnt=True, ended_bout_cnt=False, bout_table=True, transpose=True, core_cnt=20)
154
154
  >>> analyzer.run()
155
155
  >>> analyzer.save()
156
156
  """
@@ -0,0 +1,30 @@
1
+ """Analyze runtime statistics for directionality_to_nonstatic_target"""
2
+ import numpy as np
3
+ from collections import defaultdict
4
+
5
+ # Parse the runtime data
6
+ data = {
7
+ 10000: [0.4389, 0.0008, 0.0012],
8
+ 100000: [0.0063, 0.0052, 0.0052],
9
+ 1000000: [0.0768, 0.0306, 0.0239],
10
+ 10000000: [0.2195, 0.2122, 0.2083],
11
+ 50000000: [1.8936, 1.5664, 1.2548]
12
+ }
13
+
14
+ # Calculate statistics
15
+ print("=" * 80)
16
+ print(f"{'Observations':<15} {'Mean (s)':<12} {'Std (s)':<12} {'Min (s)':<12} {'Max (s)':<12} {'Throughput (M obs/s)':<20}")
17
+ print("=" * 80)
18
+
19
+ for obs_count in sorted(data.keys()):
20
+ times = np.array(data[obs_count])
21
+ mean_time = np.mean(times)
22
+ std_time = np.std(times)
23
+ min_time = np.min(times)
24
+ max_time = np.max(times)
25
+ throughput = obs_count / (mean_time * 1_000_000) # Million observations per second
26
+
27
+ print(f"{obs_count:<15,} {mean_time:<12.4f} {std_time:<12.4f} {min_time:<12.4f} {max_time:<12.4f} {throughput:<20.2f}")
28
+
29
+ print("=" * 80)
30
+ print("\nNote: First run typically includes JIT compilation overhead (especially for 10k observations)")