simba-uw-tf-dev 4.6.6__py3-none-any.whl → 4.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. simba/data_processors/blob_location_computer.py +1 -1
  2. simba/data_processors/cuda/image.py +12 -8
  3. simba/data_processors/cuda/statistics.py +57 -18
  4. simba/data_processors/cuda/timeseries.py +1 -1
  5. simba/data_processors/egocentric_aligner.py +1 -1
  6. simba/feature_extractors/feature_subsets.py +2 -2
  7. simba/feature_extractors/straub_tail_analyzer.py +4 -4
  8. simba/labelling/standard_labeller.py +1 -1
  9. simba/mixins/geometry_mixin.py +8 -8
  10. simba/mixins/image_mixin.py +14 -14
  11. simba/mixins/statistics_mixin.py +39 -9
  12. simba/mixins/timeseries_features_mixin.py +1 -1
  13. simba/mixins/train_model_mixin.py +65 -27
  14. simba/model/inference_batch.py +1 -1
  15. simba/model/yolo_seg_inference.py +3 -3
  16. simba/plotting/heat_mapper_clf_mp.py +2 -2
  17. simba/pose_importers/simba_blob_importer.py +3 -3
  18. simba/roi_tools/roi_aggregate_stats_mp.py +1 -1
  19. simba/roi_tools/roi_clf_calculator_mp.py +1 -1
  20. simba/sandbox/analyze_runtimes.py +30 -30
  21. simba/sandbox/test_directionality.py +47 -47
  22. simba/sandbox/test_nonstatic_directionality.py +27 -27
  23. simba/sandbox/test_pycharm_cuda.py +51 -51
  24. simba/sandbox/test_simba_install.py +41 -41
  25. simba/sandbox/test_static_directionality.py +26 -26
  26. simba/sandbox/test_static_directionality_2d.py +26 -26
  27. simba/sandbox/verify_env.py +42 -42
  28. simba/third_party_label_appenders/transform/coco_keypoints_to_yolo.py +3 -3
  29. simba/third_party_label_appenders/transform/coco_keypoints_to_yolo_bbox.py +2 -2
  30. simba/utils/custom_feature_extractor.py +1 -1
  31. simba/utils/data.py +2 -2
  32. simba/utils/read_write.py +32 -18
  33. simba/utils/yolo.py +10 -1
  34. simba/video_processors/blob_tracking_executor.py +2 -2
  35. simba/video_processors/clahe_ui.py +1 -1
  36. simba/video_processors/egocentric_video_rotator.py +3 -3
  37. simba/video_processors/multi_cropper.py +1 -1
  38. simba/video_processors/video_processing.py +26 -9
  39. simba/video_processors/videos_to_frames.py +2 -2
  40. {simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/METADATA +3 -2
  41. {simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/RECORD +45 -45
  42. {simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/LICENSE +0 -0
  43. {simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/WHEEL +0 -0
  44. {simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/entry_points.txt +0 -0
  45. {simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/top_level.txt +0 -0
@@ -51,7 +51,7 @@ class BlobLocationComputer(object):
51
51
  :param Optional[bool] multiprocessing: If True, video background subtraction will be done using multiprocessing. Default is False.
52
52
 
53
53
  :example:
54
- >>> x = BlobLocationComputer(data_path=r"C:\troubleshooting\RAT_NOR\project_folder\videos\2022-06-20_NOB_DOT_4_downsampled_bg_subtracted.mp4", multiprocessing=True, gpu=True, batch_size=2000, save_dir=r"C:\blob_positions")
54
+ >>> x = BlobLocationComputer(data_path=r"C:/troubleshooting/RAT_NOR/project_folder/videos/2022-06-20_NOB_DOT_4_downsampled_bg_subtracted.mp4", multiprocessing=True, gpu=True, batch_size=2000, save_dir=r"C:/blob_positions")
55
55
  >>> x.run()
56
56
  """
57
57
  def __init__(self,
@@ -331,10 +331,12 @@ def _digital(data, results):
331
331
 
332
332
  def img_stack_brightness(x: np.ndarray,
333
333
  method: Optional[Literal['photometric', 'digital']] = 'digital',
334
- ignore_black: Optional[bool] = True) -> np.ndarray:
334
+ ignore_black: bool = True,
335
+ verbose: bool = False) -> np.ndarray:
335
336
  """
336
337
  Calculate the average brightness of a stack of images using a specified method.
337
338
 
339
+ Useful for analyzing light cues or brightness changes over time. For example, compute brightness in images containing a light cue ROI, then perform clustering (e.g., k-means) on brightness values to identify frames when the light cue is on vs off.
338
340
 
339
341
  - **Photometric Method**: The brightness is calculated using the formula:
340
342
 
@@ -346,7 +348,7 @@ def img_stack_brightness(x: np.ndarray,
346
348
  .. math::
347
349
  \text{brightness} = 0.299 \cdot R + 0.587 \cdot G + 0.114 \cdot B
348
350
 
349
- .. selalso::
351
+ .. seealso::
350
352
  For CPU function see :func:`~simba.mixins.image_mixin.ImageMixin.brightness_intensity`.
351
353
 
352
354
  :param np.ndarray x: A 4D array of images with dimensions (N, H, W, C), where N is the number of images, H and W are the height and width, and C is the number of channels (RGB).
@@ -363,7 +365,7 @@ def img_stack_brightness(x: np.ndarray,
363
365
 
364
366
  check_instance(source=img_stack_brightness.__name__, instance=x, accepted_types=(np.ndarray,))
365
367
  check_if_valid_img(data=x[0], source=img_stack_brightness.__name__)
366
- x = np.ascontiguousarray(x).astype(np.uint8)
368
+ x, timer = np.ascontiguousarray(x).astype(np.uint8), SimbaTimer(start=True)
367
369
  if x.ndim == 4:
368
370
  grid_x = (x.shape[1] + 16 - 1) // 16
369
371
  grid_y = (x.shape[2] + 16 - 1) // 16
@@ -383,7 +385,8 @@ def img_stack_brightness(x: np.ndarray,
383
385
  else:
384
386
  results = deepcopy(x)
385
387
  results = np.mean(results, axis=(1, 2))
386
-
388
+ timer.stop_timer()
389
+ if verbose: print(f'Brightness computed in {results.shape[0]} images (elapsed time {timer.elapsed_time_str}s)')
387
390
  return results
388
391
 
389
392
 
@@ -1602,10 +1605,11 @@ def pose_plotter(data: Union[str, os.PathLike, np.ndarray],
1602
1605
  # SAVE_PATH = "/mnt/c/troubleshooting/mitra/project_folder/frames/output/pose_ex/test.mp4"
1603
1606
  #
1604
1607
  #
1605
- DATA_PATH = "/mnt/d/troubleshooting/mitra/project_folder/csv/outlier_corrected_movement_location/592_MA147_CNO1_0515.csv"
1606
- VIDEO_PATH = "/mnt/d/troubleshooting/mitra/project_folder/videos/592_MA147_CNO1_0515.mp4"
1607
- SAVE_PATH = "/mnt/d/troubleshooting/mitra/project_folder/videos/test_cuda.mp4"
1608
- pose_plotter(data=DATA_PATH, video_path=VIDEO_PATH, save_path=SAVE_PATH, circle_size=10, batch_size=100)
1608
+ if __name__ == "__main__":
1609
+ DATA_PATH = "/mnt/d/troubleshooting/mitra/project_folder/csv/outlier_corrected_movement_location/592_MA147_CNO1_0515.csv"
1610
+ VIDEO_PATH = "/mnt/d/troubleshooting/mitra/project_folder/videos/592_MA147_CNO1_0515.mp4"
1611
+ SAVE_PATH = "/mnt/d/troubleshooting/mitra/project_folder/videos/test_cuda.mp4"
1612
+ pose_plotter(data=DATA_PATH, video_path=VIDEO_PATH, save_path=SAVE_PATH, circle_size=10, batch_size=100)
1609
1613
 
1610
1614
 
1611
1615
 
@@ -3,7 +3,7 @@ __author__ = "Simon Nilsson; sronilsson@gmail.com"
3
3
 
4
4
  import math
5
5
  from itertools import combinations
6
- from typing import Optional, Tuple
6
+ from typing import Optional, Tuple, Union
7
7
 
8
8
  from simba.utils.printing import SimbaTimer
9
9
 
@@ -19,16 +19,20 @@ from scipy.spatial import ConvexHull
19
19
  from simba.utils.read_write import get_unique_values_in_iterable, read_df
20
20
  from simba.utils.warnings import GPUToolsWarning
21
21
 
22
+
22
23
  try:
23
24
  import cupy as cp
24
- from cuml.metrics import kl_divergence as kl_divergence_gpu
25
- #from cuml.metrics.cluster.adjusted_rand_index import adjusted_rand_score
26
- #from cuml.metrics.cluster.silhouette_score import cython_silhouette_score
27
25
  from cupyx.scipy.spatial.distance import cdist
28
26
  except Exception as e:
29
27
  GPUToolsWarning(msg=f'GPU tools not detected, reverting to CPU: {e.args}')
30
28
  import numpy as cp
31
29
  from scipy.spatial.distance import cdist
30
+ try:
31
+ from cuml.metrics import kl_divergence as kl_divergence_gpu
32
+ from cuml.metrics.cluster.adjusted_rand_index import adjusted_rand_score
33
+ from cuml.metrics.cluster.silhouette_score import cython_silhouette_score
34
+ except Exception as e:
35
+ GPUToolsWarning(msg=f'GPU tools not detected, reverting to CPU: {e.args}')
32
36
  from scipy.stats import entropy as kl_divergence_gpu
33
37
  from sklearn.metrics import adjusted_rand_score
34
38
  from sklearn.metrics import silhouette_score as cython_silhouette_score
@@ -41,7 +45,7 @@ except:
41
45
  from simba.data_processors.cuda.utils import _cuda_are_rows_equal
42
46
  from simba.mixins.statistics_mixin import Statistics
43
47
  from simba.utils.checks import (check_int, check_str, check_valid_array,
44
- check_valid_tuple)
48
+ check_valid_tuple, check_float)
45
49
  from simba.utils.data import bucket_data
46
50
  from simba.utils.enums import Formats
47
51
 
@@ -381,9 +385,10 @@ def sliding_min(x: np.ndarray, time_window: float, sample_rate: int) -> np.ndarr
381
385
 
382
386
  def sliding_spearmans_rank(x: np.ndarray,
383
387
  y: np.ndarray,
384
- time_window: float,
385
- sample_rate: int,
386
- batch_size: Optional[int] = int(1.6e+7)) -> np.ndarray:
388
+ time_window: Union[float, int],
389
+ sample_rate: Union[float, int],
390
+ batch_size: Optional[int] = int(1.6e+7),
391
+ verbose: bool = False) -> np.ndarray:
387
392
  """
388
393
  Computes the Spearman's rank correlation coefficient between two 1D arrays `x` and `y`
389
394
  over sliding windows of size `time_window * sample_rate`. The computation is performed
@@ -414,7 +419,13 @@ def sliding_spearmans_rank(x: np.ndarray,
414
419
  >>> sliding_spearmans_rank(x, y, time_window=0.5, sample_rate=2)
415
420
  """
416
421
 
417
- window_size = int(np.ceil(time_window * sample_rate))
422
+ timer = SimbaTimer(start=True)
423
+ check_valid_array(data=x, source=f'{sliding_spearmans_rank.__name__} x', accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
424
+ check_valid_array(data=y, source=f'{sliding_spearmans_rank.__name__} y', accepted_ndims=(1,), accepted_axis_0_shape=(x.shape[0],), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
425
+ check_float(name=f'{sliding_spearmans_rank.__name__} time_window', value=time_window, allow_zero=False, allow_negative=False, raise_error=True)
426
+ check_float(name=f'{sliding_spearmans_rank.__name__} sample_rate', value=sample_rate, allow_zero=False, allow_negative=False, raise_error=True)
427
+ check_int(name=f'{sliding_spearmans_rank.__name__} batch_size', value=batch_size, allow_zero=False, allow_negative=False, raise_error=True)
428
+ window_size = np.int32(np.ceil(time_window * sample_rate))
418
429
  n = x.shape[0]
419
430
  results = cp.full(n, -1, dtype=cp.float32)
420
431
 
@@ -434,7 +445,11 @@ def sliding_spearmans_rank(x: np.ndarray,
434
445
 
435
446
  results[left + window_size - 1:right] = s
436
447
 
437
- return cp.asnumpy(results)
448
+ r = cp.asnumpy(results)
449
+ timer.stop_timer()
450
+ if verbose: print(f'Sliding Spearmans rank for {x.shape[0]} observations computed (elapsed time: {timer.elapsed_time_str}s)')
451
+ return r
452
+
438
453
 
439
454
 
440
455
 
@@ -539,6 +554,12 @@ def euclidean_distance_to_static_point(data: np.ndarray,
539
554
  """
540
555
  Computes the Euclidean distance between each point in a given 2D array `data` and a static point using GPU acceleration.
541
556
 
557
+ .. seealso::
558
+ For CPU-based distance to static point (ROI center), see :func:`simba.mixins.feature_extraction_mixin.FeatureExtractionMixin.framewise_euclidean_distance_roi`
559
+ For CPU-based framewise Euclidean distance, see :func:`simba.mixins.feature_extraction_mixin.FeatureExtractionMixin.framewise_euclidean_distance`
560
+ For GPU CuPy solution for distance between two sets of points, see :func:`simba.data_processors.cuda.statistics.get_euclidean_distance_cupy`
561
+ For GPU numba CUDA solution for distance between two sets of points, see :func:`simba.data_processors.cuda.statistics.get_euclidean_distance_cuda`
562
+
542
563
  :param data: A 2D array of shape (N, 2), where N is the number of points, and each point is represented by its (x, y) coordinates. The array can represent pixel coordinates.
543
564
  :param point: A tuple of two integers representing the static point (x, y) in the same space as `data`.
544
565
  :param pixels_per_millimeter: A scaling factor that indicates how many pixels correspond to one millimeter. Defaults to 1 if no scaling is necessary.
@@ -790,13 +811,31 @@ def xie_beni(x: np.ndarray, y: np.ndarray) -> float:
790
811
  return xb
791
812
 
792
813
 
793
- def i_index(x: np.ndarray, y: np.ndarray):
814
+ def i_index(x: np.ndarray, y: np.ndarray, verbose: bool = False) -> float:
794
815
  """
795
816
  Calculate the I-Index for evaluating clustering quality.
796
817
 
797
818
  The I-Index is a metric that measures the compactness and separation of clusters.
798
819
  A higher I-Index indicates better clustering with compact and well-separated clusters.
799
820
 
821
+ .. csv-table::
822
+ :header: EXPECTED RUNTIMES
823
+ :file: ../../../docs/tables/i_index_cuda.csv
824
+ :widths: 10, 45, 45
825
+ :align: center
826
+ :header-rows: 1
827
+
828
+ The I-Index is calculated as:
829
+
830
+ .. math::
831
+ I = \frac{SST}{k \times SWC}
832
+
833
+ where:
834
+
835
+ - :math:`SST = \sum_{i=1}^{n} \|x_i - \mu\|^2` is the total sum of squares (sum of squared distances from all points to the global centroid)
836
+ - :math:`k` is the number of clusters
837
+ - :math:`SWC = \sum_{c=1}^{k} \sum_{i \in c} \|x_i - \mu_c\|^2` is the within-cluster sum of squares (sum of squared distances from points to their cluster centroids)
838
+
800
839
  .. seealso::
801
840
  To compute Xie-Beni on the CPU, use :func:`~simba.mixins.statistics_mixin.Statistics.i_index`
802
841
 
@@ -807,17 +846,16 @@ def i_index(x: np.ndarray, y: np.ndarray):
807
846
 
808
847
  :references:
809
848
  .. [1] Zhao, Q., Xu, M., Fränti, P. (2009). Sum-of-Squares Based Cluster Validity Index and Significance Analysis.
810
- In: Kolehmainen, M., Toivanen, P., Beliczynski, B. (eds) Adaptive and Natural Computing Algorithms. ICANNGA 2009.
811
- Lecture Notes in Computer Science, vol 5495. Springer, Berlin, Heidelberg. https://doi.org/10.1007/978-3-642-04921-7_32
849
+ In: Kolehmainen, M., Toivanen, P., Beliczynski, B. (eds) Adaptive and Natural Computing Algorithms. ICANNGA 2009. Lecture Notes in Computer Science, vol 5495. Springer, Berlin, Heidelberg. https://doi.org/10.1007/978-3-642-04921-7_32
812
850
 
813
851
  :example:
814
852
  >>> X, y = make_blobs(n_samples=5000, centers=20, n_features=3, random_state=0, cluster_std=0.1)
815
853
  >>> i_index(x=X, y=y)
816
854
  """
855
+ timer = SimbaTimer(start=True)
817
856
  check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
818
- check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value,
819
- accepted_axis_0_shape=[x.shape[0], ])
820
- _ = get_unique_values_in_iterable(data=y, name=i_index.__name__, min=2)
857
+ check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
858
+ cluster_cnt = get_unique_values_in_iterable(data=y, name=i_index.__name__, min=2)
821
859
  x, y = cp.array(x), cp.array(y)
822
860
  unique_y = cp.unique(y)
823
861
  n_y = unique_y.shape[0]
@@ -831,10 +869,11 @@ def i_index(x: np.ndarray, y: np.ndarray):
831
869
  swc += cp.sum(cp.linalg.norm(cluster_obs - cluster_centroid, axis=1) ** 2)
832
870
 
833
871
  i_idx = sst / (n_y * swc)
834
-
872
+ i_idx = np.float32(i_idx.get()) if hasattr(i_idx, 'get') else np.float32(i_idx)
873
+ timer.stop_timer()
874
+ if verbose: print(f'I-index for {x.shape[0]} observations in {cluster_cnt} clusters computed (elapsed time: {timer.elapsed_time_str}s)')
835
875
  return i_idx
836
876
 
837
-
838
877
  def kullback_leibler_divergence_gpu(x: np.ndarray,
839
878
  y: np.ndarray,
840
879
  fill_value: int = 1,
@@ -307,7 +307,7 @@ def sliding_hjort_parameters_gpu(data: np.ndarray, window_sizes: np.ndarray, sam
307
307
  """
308
308
  Compute Hjorth parameters over sliding windows on the GPU.
309
309
 
310
- .. seelalso::
310
+ .. seealso::
311
311
  For CPU implementation, see :`simba.mixins.timeseries_features_mixin.TimeseriesFeatureMixin.hjort_parameters`
312
312
 
313
313
  :param np.ndarray data: 1D numeric array of signal data.
@@ -52,7 +52,7 @@ class EgocentricalAligner():
52
52
  :param Optional[int] core_cnt: Number of CPU cores to use for video rotation; `-1` uses all available cores.
53
53
 
54
54
  :example:
55
- >>> aligner = EgocentricalAligner(rotate_video=True, anchor_1='tail_base', anchor_2='nose', data_dir=r"/data_dir", videos_dir=r'/videos_dir', save_dir=r"/save_dir", video_info=r"C:\troubleshooting\mitra\project_folder\logs\video_info.csv", direction=0, anchor_location=(250, 250), fill_clr=(0, 0, 0))
55
+ >>> aligner = EgocentricalAligner(rotate_video=True, anchor_1='tail_base', anchor_2='nose', data_dir=r"/data_dir", videos_dir=r'/videos_dir', save_dir=r"/save_dir", video_info=r"C:/troubleshooting/mitra/project_folder/logs/video_info.csv", direction=0, anchor_location=(250, 250), fill_clr=(0, 0, 0))
56
56
  >>> aligner.run()
57
57
  """
58
58
 
@@ -83,12 +83,12 @@ class FeatureSubsetsCalculator(ConfigReader, TrainModelMixin):
83
83
  :align: center
84
84
 
85
85
  :example:
86
- >>> test = FeatureSubsetsCalculator(config_path=r"C:\troubleshooting\mitra\project_folder\project_config.ini",
86
+ >>> test = FeatureSubsetsCalculator(config_path=r"C:/troubleshooting/mitra/project_folder/project_config.ini",
87
87
  >>> feature_families=[FRAME_BP_MOVEMENT, WITHIN_ANIMAL_THREE_POINT_ANGLES],
88
88
  >>> append_to_features_extracted=False,
89
89
  >>> file_checks=False,
90
90
  >>> append_to_targets_inserted=False,
91
- >>> save_dir=r"C:\troubleshooting\mitra\project_folder\csv\new_features")
91
+ >>> save_dir=r"C:/troubleshooting/mitra/project_folder/csv/new_features")
92
92
  >>> test.run()
93
93
  """
94
94
 
@@ -44,10 +44,10 @@ class StraubTailAnalyzer(ConfigReader):
44
44
  .. [1] Lazaro et al., Brainwide Genetic Capture for Conscious State Transitions, `biorxiv`, doi: https://doi.org/10.1101/2025.03.28.646066
45
45
 
46
46
  :example:
47
- >>> runner = StraubTailAnalyzer(config_path=r"C:\troubleshooting\mitra\project_folder\project_config.ini",
48
- >>> data_dir=r'C:\troubleshooting\mitra\project_folder\videos\additional\bg_removed\rotated',
49
- >>> video_dir=r'C:\troubleshooting\mitra\project_folder\videos\additional\bg_removed\rotated',
50
- >>> save_dir=r'C:\troubleshooting\mitra\project_folder\videos\additional\bg_removed\rotated\tail_features_additional',
47
+ >>> runner = StraubTailAnalyzer(config_path=r"C:/troubleshooting/mitra/project_folder/project_config.ini",
48
+ >>> data_dir=r'C:/troubleshooting/mitra/project_folder/videos/additional/bg_removed/rotated',
49
+ >>> video_dir=r'C:/troubleshooting/mitra/project_folder/videos/additional/bg_removed/rotated',
50
+ >>> save_dir=r'C:/troubleshooting/mitra/project_folder/videos/additional/bg_removed/rotated/tail_features_additional',
51
51
  >>> anchor_points=('tail_base', 'tail_center', 'tail_tip'),
52
52
  >>> body_parts=('nose', 'left_ear', 'right_ear', 'right_side', 'left_side', 'tail_base'))
53
53
  >>> runner.run()
@@ -64,7 +64,7 @@ class LabellingInterface(ConfigReader):
64
64
  :param bool continuing: Set True to resume annotations from an existing targets file. Defaults to False.
65
65
 
66
66
  :example:
67
- >>> _ = LabellingInterface(config_path=r"C:\troubleshooting\mitra\project_folder\project_config.ini", file_path=r"C:\troubleshooting\mitra\project_folder\videos\501_MA142_Gi_CNO_0521.mp4", thresholds=None, continuing=False)
67
+ >>> _ = LabellingInterface(config_path=r"C:/troubleshooting/mitra/project_folder/project_config.ini", file_path=r"C:/troubleshooting/mitra/project_folder/videos/501_MA142_Gi_CNO_0521.mp4", thresholds=None, continuing=False)
68
68
  """
69
69
 
70
70
  def __init__(self,
@@ -1556,7 +1556,7 @@ class GeometryMixin(object):
1556
1556
  :rtype: List[float]
1557
1557
 
1558
1558
  :example:
1559
- >>> df = read_df(file_path=r"C:\troubleshooting\two_black_animals_14bp\project_folder\csv\outlier_corrected_movement_location\Together_2.csv", file_type='csv').astype(int)
1559
+ >>> df = read_df(file_path=r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/outlier_corrected_movement_location/Together_2.csv", file_type='csv').astype(int)
1560
1560
  >>> animal_1_cols = [x for x in df.columns if '_1_' in x and not '_p' in x]
1561
1561
  >>> animal_2_cols = [x for x in df.columns if '_2_' in x and not '_p' in x]
1562
1562
  >>> animal_1_arr = df[animal_1_cols].values.reshape(len(df), int(len(animal_1_cols)/ 2), 2)
@@ -1622,7 +1622,7 @@ class GeometryMixin(object):
1622
1622
  :return List[float]: List of overlap between corresponding Polygons. If overlap 1, else 0.
1623
1623
 
1624
1624
  :example:
1625
- >>> df = read_df(file_path=r"C:\troubleshooting\two_black_animals_14bp\project_folder\csv\outlier_corrected_movement_location\Together_2.csv", file_type='csv').astype(int)
1625
+ >>> df = read_df(file_path=r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/outlier_corrected_movement_location/Together_2.csv", file_type='csv').astype(int)
1626
1626
  >>> animal_1_cols = [x for x in df.columns if '_1_' in x and not '_p' in x]
1627
1627
  >>> animal_2_cols = [x for x in df.columns if '_2_' in x and not '_p' in x]
1628
1628
  >>> animal_1_arr = df[animal_1_cols].values.reshape(len(df), int(len(animal_1_cols)/ 2), 2)
@@ -1693,7 +1693,7 @@ class GeometryMixin(object):
1693
1693
  :rtype: List[float]
1694
1694
 
1695
1695
  :example:
1696
- >>> df = read_df(file_path=r"C:\troubleshooting\two_black_animals_14bp\project_folder\csv\outlier_corrected_movement_location\Together_2.csv", file_type='csv').astype(int)
1696
+ >>> df = read_df(file_path=r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/outlier_corrected_movement_location/Together_2.csv", file_type='csv').astype(int)
1697
1697
  >>> animal_1_cols = [x for x in df.columns if '_1_' in x and not '_p' in x]
1698
1698
  >>> animal_2_cols = [x for x in df.columns if '_2_' in x and not '_p' in x]
1699
1699
  >>> animal_1_arr = df[animal_1_cols].values.reshape(len(df), int(len(animal_1_cols)/ 2), 2)
@@ -1763,7 +1763,7 @@ class GeometryMixin(object):
1763
1763
  :rtype: List[Polygon]
1764
1764
 
1765
1765
  :example:
1766
- >>> df = read_df(file_path=r"C:\troubleshooting\two_black_animals_14bp\project_folder\csv\outlier_corrected_movement_location\Together_2.csv", file_type='csv').astype(int)
1766
+ >>> df = read_df(file_path=r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/outlier_corrected_movement_location/Together_2.csv", file_type='csv').astype(int)
1767
1767
  >>> animal_1_cols = [x for x in df.columns if '_1_' in x and not '_p' in x]
1768
1768
  >>> animal_1_arr = df[animal_1_cols].values.reshape(len(df), int(len(animal_1_cols)/ 2), 2)
1769
1769
  >>> animal_1_geo = GeometryMixin.bodyparts_to_polygon(data=animal_1_arr)
@@ -3525,10 +3525,10 @@ class GeometryMixin(object):
3525
3525
  :rtype: Tuple[Dict[Tuple[int, int], Dict[Tuple[int, int], float]], Dict[Tuple[int, int], Dict[Tuple[int, int], int]]]
3526
3526
 
3527
3527
  :example:
3528
- >>> video_meta_data = get_video_meta_data(video_path=r"C:\troubleshooting\mitra\project_folder\videos\708_MA149_Gq_CNO_0515.mp4")
3528
+ >>> video_meta_data = get_video_meta_data(video_path=r"C:/troubleshooting/mitra/project_folder/videos/708_MA149_Gq_CNO_0515.mp4")
3529
3529
  >>> w, h = video_meta_data['width'], video_meta_data['height']
3530
3530
  >>> grid = GeometryMixin().bucket_img_into_grid_square(bucket_grid_size=(5, 5), bucket_grid_size_mm=None, img_size=(h, w), verbose=False)[0]
3531
- >>> data = read_df(file_path=r'C:\troubleshooting\mitra\project_folder\csv\outlier_corrected_movement_location\708_MA149_Gq_CNO_0515.csv', file_type='csv')[['Nose_x', 'Nose_y']].values
3531
+ >>> data = read_df(file_path=r'C:/troubleshooting/mitra/project_folder/csv/outlier_corrected_movement_location/708_MA149_Gq_CNO_0515.csv', file_type='csv')[['Nose_x', 'Nose_y']].values
3532
3532
  >>> transition_probabilities, _ = geometry_transition_probabilities(data=data, grid=grid)
3533
3533
  """
3534
3534
 
@@ -3990,7 +3990,7 @@ class GeometryMixin(object):
3990
3990
  :rtype: np.ndarray
3991
3991
 
3992
3992
  :example:
3993
- >>> data_path = r"C:\troubleshooting\mitra\project_folder\csv\outlier_corrected_movement_location\FRR_gq_Saline_0624.csv"
3993
+ >>> data_path = r"C:/troubleshooting/mitra/project_folder/csv/outlier_corrected_movement_location/FRR_gq_Saline_0624.csv"
3994
3994
  >>> animal_data = read_df(file_path=data_path, file_type='csv', usecols=['Nose_x', 'Nose_y', 'Tail_base_x', 'Tail_base_y', 'Left_side_x', 'Left_side_y', 'Right_side_x', 'Right_side_y']).values.reshape(-1, 4, 2)[0:20].astype(np.int32)
3995
3995
  >>> animal_polygons = GeometryMixin().bodyparts_to_polygon(data=animal_data)
3996
3996
  >>> GeometryMixin.geometries_to_exterior_keypoints(geometries=animal_polygons)
@@ -4160,7 +4160,7 @@ class GeometryMixin(object):
4160
4160
  :rtype: Union[None, Dict[Any, dict]]
4161
4161
 
4162
4162
  :example I:
4163
- >>> results = GeometryMixin.sleap_csv_to_geometries(data=r"C:\troubleshooting\ants\pose_data\ant.csv")
4163
+ >>> results = GeometryMixin.sleap_csv_to_geometries(data=r"C:/troubleshooting/ants/pose_data/ant.csv")
4164
4164
  >>> # Results structure: {track_id: {frame_idx: Polygon, ...}, ...}
4165
4165
 
4166
4166
  :example II
@@ -57,17 +57,16 @@ class ImageMixin(object):
57
57
  pass
58
58
 
59
59
  @staticmethod
60
- def brightness_intensity(imgs: List[np.ndarray], ignore_black: Optional[bool] = True) -> List[float]:
60
+ def brightness_intensity(imgs: Union[List[np.ndarray], np.ndarray], ignore_black: bool = True, verbose: bool = False) -> np.ndarray:
61
61
  """
62
62
  Compute the average brightness intensity within each image within a list.
63
63
 
64
64
  For example, (i) create a list of images containing a light cue ROI, (ii) compute brightness in each image, (iii) perform kmeans on brightness, and get the frames when the light cue is on vs off.
65
65
 
66
66
  .. seealso::
67
- For GPU acceleration, see :func:`simba.data_processors.cuda.image.img_stack_brightness`.
68
- For geometry based brightness, see :func:`simba.mixins.geometry_mixin.GeometryMixin.get_geometry_brightness_intensity`
67
+ For GPU acceleration, see :func:`simba.data_processors.cuda.image.img_stack_brightness`. For geometry based brightness, see :func:`simba.mixins.geometry_mixin.GeometryMixin.get_geometry_brightness_intensity`
69
68
 
70
- :param List[np.ndarray] imgs: List of images as arrays to calculate average brightness intensity within.
69
+ :param Union[List[np.ndarray], np.ndarray] imgs: List of images as arrays or 3/4d array of images to calculate average brightness intensity within.
71
70
  :param Optional[bool] ignore_black: If True, ignores black pixels. If the images are sliced non-rectangular geometric shapes created by ``slice_shapes_in_img``, then pixels that don't belong to the shape has been masked in black.
72
71
  :returns: List of floats of size len(imgs) with brightness intensities.
73
72
  :rtype: List[float]
@@ -77,14 +76,12 @@ class ImageMixin(object):
77
76
  >>> ImageMixin.brightness_intensity(imgs=[img], ignore_black=False)
78
77
  >>> [159.0]
79
78
  """
80
- results = []
81
- check_instance(source=f"{ImageMixin().brightness_intensity.__name__} imgs", instance=imgs, accepted_types=list)
82
- for cnt, img in enumerate(imgs):
83
- check_instance(
84
- source=f"{ImageMixin().brightness_intensity.__name__} img {cnt}",
85
- instance=img,
86
- accepted_types=np.ndarray,
87
- )
79
+ results, timer = [], SimbaTimer(start=True)
80
+ check_instance(source=f"{ImageMixin().brightness_intensity.__name__} imgs", instance=imgs, accepted_types=(list, np.ndarray,))
81
+ if isinstance(imgs, np.ndarray): imgs = np.array(imgs)
82
+ for img_cnt in range(imgs.shape[0]):
83
+ img = imgs[img_cnt]
84
+ check_instance(source=f"{ImageMixin().brightness_intensity.__name__} img {img_cnt}", instance=img, accepted_types=np.ndarray)
88
85
  if len(img) == 0:
89
86
  results.append(0)
90
87
  else:
@@ -92,7 +89,10 @@ class ImageMixin(object):
92
89
  results.append(np.ceil(np.average(img[img != 0])))
93
90
  else:
94
91
  results.append(np.ceil(np.average(img)))
95
- return results
92
+ b = np.array(results).astype(np.float32)
93
+ timer.stop_timer()
94
+ if verbose: print(f'Brightness computed in {b.shape[0]} images (elapsed time {timer.elapsed_time_str}s)')
95
+
96
96
 
97
97
  @staticmethod
98
98
  def gaussian_blur(img: np.ndarray, kernel_size: Optional[Tuple] = (9, 9)) -> np.ndarray:
@@ -1898,7 +1898,7 @@ class ImageMixin(object):
1898
1898
  :rtype: np.ndarray
1899
1899
 
1900
1900
  :example:
1901
- >>> VIDEO_PATH = r"D:\EPM_2\EPM_1.mp4"
1901
+ >>> VIDEO_PATH = r"D:/EPM_2/EPM_1.mp4"
1902
1902
  >>> img = read_img_batch_from_video(video_path=VIDEO_PATH, greyscale=True, start_frm=0, end_frm=15, core_cnt=1)
1903
1903
  >>> imgs = np.stack(list(img.values()))
1904
1904
  >>> resized_img = resize_img_stack(imgs=imgs)
@@ -3278,10 +3278,34 @@ class Statistics(FeatureExtractionMixin):
3278
3278
 
3279
3279
  Youden's J statistic is a measure of the overall performance of a binary classification test, taking into account both sensitivity (true positive rate) and specificity (true negative rate).
3280
3280
 
3281
- :param sample_1: The first binary array.
3282
- :param sample_2: The second binary array.
3283
- :return: Youden's J statistic.
3281
+ The Youden's J statistic is calculated as:
3282
+
3283
+ .. math::
3284
+ J = \text{sensitivity} + \text{specificity} - 1
3285
+
3286
+ where:
3287
+
3288
+ - :math:`\text{sensitivity} = \frac{TP}{TP + FN}` is the true positive rate
3289
+ - :math:`\text{specificity} = \frac{TN}{TN + FP}` is the true negative rate
3290
+
3291
+ The statistic ranges from -1 to 1, where:
3292
+ - :math:`J = 1` indicates perfect classification
3293
+ - :math:`J = 0` indicates the test performs no better than random
3294
+ - :math:`J < 0` indicates the test performs worse than random
3295
+
3296
+ :param sample_1: The first binary array (ground truth or reference).
3297
+ :param sample_2: The second binary array (predictions or test results).
3298
+ :return: Youden's J statistic. Returns NaN if either sensitivity or specificity cannot be calculated (division by zero).
3284
3299
  :rtype: float
3300
+
3301
+ :references:
3302
+ .. [1] Youden, W. J. (1950). Index for rating diagnostic tests. Cancer, 3(1), 32-35.
3303
+ https://acsjournals.onlinelibrary.wiley.com/doi/abs/10.1002/1097-0142(1950)3:1%3C32::AID-CNCR2820030106%3E3.0.CO;2-3
3304
+
3305
+ :example:
3306
+ >>> y_true = np.array([1, 1, 0, 0, 1, 0, 1, 1, 0, 0])
3307
+ >>> y_pred = np.array([1, 1, 0, 1, 1, 0, 1, 0, 0, 0])
3308
+ >>> j = Statistics.youden_j(sample_1=y_true, sample_2=y_pred)
3285
3309
  """
3286
3310
 
3287
3311
  check_valid_array(data=sample_1, source=f'{Statistics.youden_j.__name__} sample_1', accepted_ndims=(1,), accepted_values=[0, 1])
@@ -4257,7 +4281,7 @@ class Statistics(FeatureExtractionMixin):
4257
4281
  return separation_trace / compactness
4258
4282
 
4259
4283
  @staticmethod
4260
- def i_index(x: np.ndarray, y: np.ndarray):
4284
+ def i_index(x: np.ndarray, y: np.ndarray, verbose: bool = False) -> float:
4261
4285
 
4262
4286
  """
4263
4287
  Calculate the I-Index for evaluating clustering quality.
@@ -4282,9 +4306,10 @@ class Statistics(FeatureExtractionMixin):
4282
4306
  >>> X, y = make_blobs(n_samples=5000, centers=20, n_features=3, random_state=0, cluster_std=0.1)
4283
4307
  >>> Statistics.i_index(x=X, y=y)
4284
4308
  """
4309
+ timer = SimbaTimer(start=True)
4285
4310
  check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
4286
4311
  check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
4287
- _ = get_unique_values_in_iterable(data=y, name=Statistics.i_index.__name__, min=2)
4312
+ cluster_cnt = get_unique_values_in_iterable(data=y, name=Statistics.i_index.__name__, min=2)
4288
4313
  unique_y = np.unique(y)
4289
4314
  n_y = unique_y.shape[0]
4290
4315
  global_centroid = np.mean(x, axis=0)
@@ -4296,7 +4321,12 @@ class Statistics(FeatureExtractionMixin):
4296
4321
  cluster_centroid = np.mean(cluster_obs, axis=0)
4297
4322
  swc += np.sum(np.linalg.norm(cluster_obs - cluster_centroid, axis=1) ** 2)
4298
4323
 
4299
- return sst / (n_y * swc)
4324
+
4325
+ i_index = np.float32(sst / (n_y * swc))
4326
+ timer.stop_timer()
4327
+ if verbose: print(f'I-index for {x.shape[0]} observations in {cluster_cnt} clusters computed (elapsed time: {timer.elapsed_time_str}s)')
4328
+ return i_index
4329
+
4300
4330
 
4301
4331
  @staticmethod
4302
4332
  def sd_index(x: np.ndarray, y: np.ndarray) -> float:
@@ -5298,7 +5328,7 @@ class Statistics(FeatureExtractionMixin):
5298
5328
  """
5299
5329
  Compute one-way ANOVAs comparing each column (axis 1) on two arrays.
5300
5330
 
5301
- .. notes::
5331
+ .. note::
5302
5332
  Use for computing and presenting aggregate statistics. Not suitable for featurization.
5303
5333
 
5304
5334
  .. seealso::
@@ -5336,7 +5366,7 @@ class Statistics(FeatureExtractionMixin):
5336
5366
  """
5337
5367
  Compute Kruskal-Wallis comparing each column (axis 1) on two arrays.
5338
5368
 
5339
- .. notes::
5369
+ .. note::
5340
5370
  Use for computing and presenting aggregate statistics. Not suitable for featurization.
5341
5371
 
5342
5372
  .. seealso::
@@ -5373,7 +5403,7 @@ class Statistics(FeatureExtractionMixin):
5373
5403
  """
5374
5404
  Compute pairwise grouped Tukey-HSD tests.
5375
5405
 
5376
- .. notes::
5406
+ .. note::
5377
5407
  Use for computing and presenting aggregate statistics. Not suitable for featurization.
5378
5408
 
5379
5409
  :param np.ndarray data: 2D array with observations rowwise (axis 0) and features columnwise (axis 1)
@@ -2198,7 +2198,7 @@ class TimeseriesFeatureMixin(object):
2198
2198
  :example:
2199
2199
  >>> x = np.random.randint(0, 100, (400, 2))
2200
2200
  >>> results_1 = TimeseriesFeatureMixin.sliding_entropy_of_directional_changes(x=x, bins=16, window_size=5.0, sample_rate=30)
2201
- >>> x = pd.read_csv(r"C:\troubleshooting\two_black_animals_14bp\project_folder\csv\input_csv\Together_1.csv")[['Ear_left_1_x', 'Ear_left_1_y']].values
2201
+ >>> x = pd.read_csv(r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/input_csv/Together_1.csv")[['Ear_left_1_x', 'Ear_left_1_y']].values
2202
2202
  >>> results_2 = TimeseriesFeatureMixin.sliding_entropy_of_directional_changes(x=x, bins=16, window_size=5.0, sample_rate=30)
2203
2203
  """
2204
2204