simba-uw-tf-dev 4.6.4__py3-none-any.whl → 4.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- simba/data_processors/cuda/geometry.py +45 -27
- simba/data_processors/cuda/image.py +1620 -1600
- simba/data_processors/cuda/statistics.py +17 -9
- simba/data_processors/egocentric_aligner.py +24 -6
- simba/data_processors/kleinberg_calculator.py +6 -2
- simba/feature_extractors/feature_subsets.py +12 -5
- simba/feature_extractors/straub_tail_analyzer.py +0 -2
- simba/mixins/statistics_mixin.py +9 -2
- simba/sandbox/analyze_runtimes.py +30 -0
- simba/sandbox/cuda/egocentric_rotator.py +374 -374
- simba/sandbox/proboscis_to_tip.py +28 -0
- simba/sandbox/test_directionality.py +47 -0
- simba/sandbox/test_nonstatic_directionality.py +27 -0
- simba/sandbox/test_pycharm_cuda.py +51 -0
- simba/sandbox/test_simba_install.py +41 -0
- simba/sandbox/test_static_directionality.py +26 -0
- simba/sandbox/test_static_directionality_2d.py +26 -0
- simba/sandbox/verify_env.py +42 -0
- simba/ui/pop_ups/fsttc_pop_up.py +27 -25
- simba/ui/pop_ups/kleinberg_pop_up.py +3 -2
- simba/utils/data.py +0 -1
- simba/utils/errors.py +441 -440
- simba/utils/lookups.py +1203 -1203
- simba/utils/read_write.py +38 -13
- simba/video_processors/egocentric_video_rotator.py +41 -36
- simba/video_processors/video_processing.py +5247 -5233
- simba/video_processors/videos_to_frames.py +41 -31
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.6.dist-info}/METADATA +2 -2
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.6.dist-info}/RECORD +33 -24
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.6.dist-info}/LICENSE +0 -0
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.6.dist-info}/WHEEL +0 -0
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.6.dist-info}/entry_points.txt +0 -0
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.6.dist-info}/top_level.txt +0 -0
|
@@ -5,6 +5,8 @@ import math
|
|
|
5
5
|
from itertools import combinations
|
|
6
6
|
from typing import Optional, Tuple
|
|
7
7
|
|
|
8
|
+
from simba.utils.printing import SimbaTimer
|
|
9
|
+
|
|
8
10
|
try:
|
|
9
11
|
from typing import Literal
|
|
10
12
|
except:
|
|
@@ -20,11 +22,11 @@ from simba.utils.warnings import GPUToolsWarning
|
|
|
20
22
|
try:
|
|
21
23
|
import cupy as cp
|
|
22
24
|
from cuml.metrics import kl_divergence as kl_divergence_gpu
|
|
23
|
-
from cuml.metrics.cluster.adjusted_rand_index import adjusted_rand_score
|
|
24
|
-
from cuml.metrics.cluster.silhouette_score import cython_silhouette_score
|
|
25
|
+
#from cuml.metrics.cluster.adjusted_rand_index import adjusted_rand_score
|
|
26
|
+
#from cuml.metrics.cluster.silhouette_score import cython_silhouette_score
|
|
25
27
|
from cupyx.scipy.spatial.distance import cdist
|
|
26
|
-
except:
|
|
27
|
-
GPUToolsWarning(msg='GPU tools not detected, reverting to CPU')
|
|
28
|
+
except Exception as e:
|
|
29
|
+
GPUToolsWarning(msg=f'GPU tools not detected, reverting to CPU: {e.args}')
|
|
28
30
|
import numpy as cp
|
|
29
31
|
from scipy.spatial.distance import cdist
|
|
30
32
|
from scipy.stats import entropy as kl_divergence_gpu
|
|
@@ -227,7 +229,6 @@ def get_euclidean_distance_cupy(x: np.ndarray,
|
|
|
227
229
|
using CuPy for GPU acceleration. The computation is performed in batches to handle large
|
|
228
230
|
datasets efficiently.
|
|
229
231
|
|
|
230
|
-
|
|
231
232
|
.. seealso::
|
|
232
233
|
For CPU function see :func:`~simba.mixins.feature_extraction_mixin.FeatureExtractionMixin.framewise_euclidean_distance`.
|
|
233
234
|
For CUDA JIT function see :func:`~simba.data_processors.cuda.statistics.get_euclidean_distance_cuda`.
|
|
@@ -834,8 +835,11 @@ def i_index(x: np.ndarray, y: np.ndarray):
|
|
|
834
835
|
return i_idx
|
|
835
836
|
|
|
836
837
|
|
|
837
|
-
def kullback_leibler_divergence_gpu(x: np.ndarray,
|
|
838
|
-
|
|
838
|
+
def kullback_leibler_divergence_gpu(x: np.ndarray,
|
|
839
|
+
y: np.ndarray,
|
|
840
|
+
fill_value: int = 1,
|
|
841
|
+
bucket_method: Literal["fd", "doane", "auto", "scott", "stone", "rice", "sturges", "sqrt"] = "scott",
|
|
842
|
+
verbose: bool = False) -> float:
|
|
839
843
|
"""
|
|
840
844
|
Compute Kullback-Leibler divergence between two distributions.
|
|
841
845
|
|
|
@@ -847,7 +851,6 @@ def kullback_leibler_divergence_gpu(x: np.ndarray, y: np.ndarray, fill_value: in
|
|
|
847
851
|
.. seealso::
|
|
848
852
|
For CPU implementation, see :func:`simba.mixins.statistics_mixin.Statistics.kullback_leibler_divergence`.
|
|
849
853
|
|
|
850
|
-
|
|
851
854
|
:param ndarray x: First 1d array representing feature values.
|
|
852
855
|
:param ndarray y: Second 1d array representing feature values.
|
|
853
856
|
:param Optional[int] fill_value: Optional pseudo-value to use to fill empty buckets in ``y`` histogram
|
|
@@ -860,13 +863,18 @@ def kullback_leibler_divergence_gpu(x: np.ndarray, y: np.ndarray, fill_value: in
|
|
|
860
863
|
>>> kl = kullback_leibler_divergence_gpu(x=x, y=y)
|
|
861
864
|
"""
|
|
862
865
|
|
|
866
|
+
timer = SimbaTimer(start=True)
|
|
867
|
+
|
|
863
868
|
bin_width, bin_count = bucket_data(data=x, method=bucket_method)
|
|
864
869
|
r = np.array([np.min(x), np.max(x)])
|
|
865
870
|
x_hist = Statistics._hist_1d(data=x, bin_count=bin_count, range=r)
|
|
866
871
|
y_hist = Statistics._hist_1d(data=y, bin_count=bin_count, range=r)
|
|
867
872
|
y_hist[y_hist == 0] = fill_value
|
|
868
873
|
x_hist, y_hist = x_hist / np.sum(x_hist), y_hist / np.sum(y_hist)
|
|
869
|
-
|
|
874
|
+
r = kl_divergence_gpu(P=x_hist.astype(np.float32), Q=y_hist.astype(np.float32), convert_dtype=False)
|
|
875
|
+
timer.stop_timer()
|
|
876
|
+
if verbose: print(f'KL divergence performed on {x.shape[0]} observations (elapsed time: {timer.elapsed_time_str}s)')
|
|
877
|
+
return r
|
|
870
878
|
|
|
871
879
|
|
|
872
880
|
@cuda.jit()
|
|
@@ -7,7 +7,8 @@ import pandas as pd
|
|
|
7
7
|
from simba.utils.checks import (check_if_dir_exists, check_if_valid_rgb_tuple,
|
|
8
8
|
check_int, check_str, check_valid_boolean,
|
|
9
9
|
check_valid_dataframe, check_valid_tuple)
|
|
10
|
-
from simba.utils.data import egocentrically_align_pose_numba
|
|
10
|
+
from simba.utils.data import (egocentrically_align_pose_numba, get_cpu_pool,
|
|
11
|
+
terminate_cpu_pool)
|
|
11
12
|
from simba.utils.enums import Formats, Options
|
|
12
13
|
from simba.utils.errors import InvalidInputError
|
|
13
14
|
from simba.utils.printing import SimbaTimer, stdout_success
|
|
@@ -73,7 +74,7 @@ class EgocentricalAligner():
|
|
|
73
74
|
check_str(name=f'{self.__class__.__name__} anchor_1', value=anchor_1, allow_blank=False)
|
|
74
75
|
check_str(name=f'{self.__class__.__name__} anchor_2', value=anchor_2, allow_blank=False)
|
|
75
76
|
check_int(name=f'{self.__class__.__name__} core_cnt', value=core_cnt, min_value=-1, max_value=find_core_cnt()[0], unaccepted_vals=[0])
|
|
76
|
-
if core_cnt == -1
|
|
77
|
+
self.core_cnt = find_core_cnt()[0] if core_cnt == -1 or core_cnt > find_core_cnt()[0] else core_cnt
|
|
77
78
|
check_int(name=f'{self.__class__.__name__} direction', value=direction, min_value=0, max_value=360)
|
|
78
79
|
if isinstance(anchor_location, tuple):
|
|
79
80
|
check_valid_tuple(x=anchor_location, source=f'{self.__class__.__name__} anchor_location', accepted_lengths=(2,), valid_dtypes=(int,))
|
|
@@ -98,6 +99,7 @@ class EgocentricalAligner():
|
|
|
98
99
|
|
|
99
100
|
def run(self):
|
|
100
101
|
timer = SimbaTimer(start=True)
|
|
102
|
+
self.pool = None if not self.rotate_video else get_cpu_pool(core_cnt=self.core_cnt, source=self.__class__.__name__)
|
|
101
103
|
for file_cnt, file_path in enumerate(self.data_paths):
|
|
102
104
|
video_timer = SimbaTimer(start=True)
|
|
103
105
|
_, self.video_name, _ = get_fn_ext(filepath=file_path)
|
|
@@ -127,8 +129,7 @@ class EgocentricalAligner():
|
|
|
127
129
|
if self.verbose:
|
|
128
130
|
print(f'{self.video_name} complete, saved at {save_path} (elapsed time: {video_timer.elapsed_time_str}s)')
|
|
129
131
|
if self.rotate_video:
|
|
130
|
-
if self.verbose:
|
|
131
|
-
print(f'Rotating video {self.video_name}...')
|
|
132
|
+
if self.verbose: print(f'Rotating video {self.video_name}...')
|
|
132
133
|
video_path = find_video_of_file(video_dir=self.videos_dir, filename=self.video_name, raise_error=False)
|
|
133
134
|
save_path = os.path.join(self.save_dir, f'{self.video_name}.mp4')
|
|
134
135
|
video_rotator = EgocentricVideoRotator(video_path=video_path,
|
|
@@ -139,11 +140,13 @@ class EgocentricalAligner():
|
|
|
139
140
|
gpu=self.gpu,
|
|
140
141
|
fill_clr=self.fill_clr,
|
|
141
142
|
core_cnt=self.core_cnt,
|
|
142
|
-
save_path=save_path
|
|
143
|
+
save_path=save_path,
|
|
144
|
+
pool=self.pool)
|
|
143
145
|
video_rotator.run()
|
|
144
146
|
if self.verbose:
|
|
145
147
|
print(f'Rotated data for video {self.video_name} ({file_cnt+1}/{len(self.data_paths)}) saved in {self.save_dir}.')
|
|
146
148
|
timer.stop_timer()
|
|
149
|
+
terminate_cpu_pool(pool=self.pool, source=self.__class__.__name__)
|
|
147
150
|
stdout_success(msg=f'Egocentrically aligned data for {len(self.data_paths)} files saved in {self.save_dir}', elapsed_time=timer.elapsed_time_str)
|
|
148
151
|
|
|
149
152
|
|
|
@@ -156,9 +159,24 @@ class EgocentricalAligner():
|
|
|
156
159
|
# direction=0,
|
|
157
160
|
# gpu=True,
|
|
158
161
|
# anchor_location=(600, 300),
|
|
159
|
-
# fill_clr=(128,128,128)
|
|
162
|
+
# fill_clr=(128,128,128),
|
|
163
|
+
# core_cnt=18)
|
|
160
164
|
# aligner.run()
|
|
161
165
|
|
|
166
|
+
|
|
167
|
+
if __name__ == "__main__":
|
|
168
|
+
aligner = EgocentricalAligner(anchor_1='butt/proximal tail',
|
|
169
|
+
anchor_2='snout',
|
|
170
|
+
data_dir=r'C:\troubleshooting\open_field_below\project_folder\csv\outlier_corrected_movement_location',
|
|
171
|
+
videos_dir=r'C:\troubleshooting\open_field_below\project_folder\videos',
|
|
172
|
+
save_dir=r"C:\troubleshooting\open_field_below\project_folder\videos\rotated",
|
|
173
|
+
direction=0,
|
|
174
|
+
gpu=True,
|
|
175
|
+
anchor_location=(600, 300),
|
|
176
|
+
fill_clr=(128,128,128),
|
|
177
|
+
core_cnt=18)
|
|
178
|
+
aligner.run()
|
|
179
|
+
|
|
162
180
|
# aligner = EgocentricalAligner(anchor_1='tail_base',
|
|
163
181
|
# anchor_2='nose',
|
|
164
182
|
# data_dir=r'C:\Users\sroni\OneDrive\Desktop\rotate_ex\data',
|
|
@@ -13,10 +13,14 @@ from simba.data_processors.pybursts_calculator import kleinberg_burst_detection
|
|
|
13
13
|
from simba.mixins.config_reader import ConfigReader
|
|
14
14
|
from simba.utils.checks import (check_float, check_if_dir_exists,
|
|
15
15
|
check_if_filepath_list_is_empty, check_int,
|
|
16
|
-
check_that_column_exist,
|
|
16
|
+
check_that_column_exist, check_valid_boolean,
|
|
17
|
+
check_valid_lst)
|
|
17
18
|
from simba.utils.enums import Paths, TagNames
|
|
18
19
|
from simba.utils.printing import SimbaTimer, log_event, stdout_success
|
|
19
|
-
from simba.utils.read_write import
|
|
20
|
+
from simba.utils.read_write import (copy_files_to_directory,
|
|
21
|
+
find_files_of_filetypes_in_directory,
|
|
22
|
+
get_current_time, get_fn_ext, read_df,
|
|
23
|
+
remove_a_folder, write_df)
|
|
20
24
|
from simba.utils.warnings import KleinbergWarning
|
|
21
25
|
|
|
22
26
|
|
|
@@ -154,11 +154,11 @@ class FeatureSubsetsCalculator(ConfigReader, TrainModelMixin):
|
|
|
154
154
|
self.within_animal_four_point_combs[animal] = np.array(list(combinations(animal_bps, 4)))
|
|
155
155
|
|
|
156
156
|
def _get_two_point_bp_distances(self):
|
|
157
|
-
for c in self.two_point_combs:
|
|
157
|
+
for cnt, c in enumerate(self.two_point_combs):
|
|
158
158
|
x1, y1, x2, y2 = list(sum([(f"{x}_x", f"{y}_y") for (x, y) in zip(c, c)], ()))
|
|
159
159
|
bp1 = self.data_df[[x1, y1]].values
|
|
160
160
|
bp2 = self.data_df[[x2, y2]].values
|
|
161
|
-
self.results[f"Distance (mm) {c[0]}-{c[1]}"] = FeatureExtractionMixin.
|
|
161
|
+
self.results[f"Distance (mm) {c[0]}-{c[1]}"] = FeatureExtractionMixin.bodypart_distance(bp1_coords=bp1.astype(np.int32), bp2_coords=bp2.astype(np.int32), px_per_mm=np.float64(self.px_per_mm), in_centimeters=False)
|
|
162
162
|
|
|
163
163
|
def __get_three_point_angles(self):
|
|
164
164
|
for animal, points in self.within_animal_three_point_combs.items():
|
|
@@ -342,13 +342,20 @@ class FeatureSubsetsCalculator(ConfigReader, TrainModelMixin):
|
|
|
342
342
|
|
|
343
343
|
|
|
344
344
|
|
|
345
|
+
# test = FeatureSubsetsCalculator(config_path=r"C:\troubleshooting\srami0619\project_folder\project_config.ini",
|
|
346
|
+
# feature_families=[TWO_POINT_BP_DISTANCES],
|
|
347
|
+
# append_to_features_extracted=False,
|
|
348
|
+
# file_checks=True,
|
|
349
|
+
# append_to_targets_inserted=False)
|
|
350
|
+
# test.run()
|
|
351
|
+
|
|
352
|
+
|
|
345
353
|
|
|
346
354
|
# test = FeatureSubsetsCalculator(config_path=r"C:\troubleshooting\mitra\project_folder\project_config.ini",
|
|
347
|
-
# feature_families=[
|
|
355
|
+
# feature_families=[TWO_POINT_BP_DISTANCES],
|
|
348
356
|
# append_to_features_extracted=False,
|
|
349
357
|
# file_checks=True,
|
|
350
|
-
# append_to_targets_inserted=False
|
|
351
|
-
# save_dir=r"C:\troubleshooting\mitra\project_folder\csv\feature_subset")
|
|
358
|
+
# append_to_targets_inserted=False)
|
|
352
359
|
# test.run()
|
|
353
360
|
|
|
354
361
|
#
|
simba/mixins/statistics_mixin.py
CHANGED
|
@@ -8,6 +8,8 @@ from sklearn.metrics import (adjusted_mutual_info_score, adjusted_rand_score,
|
|
|
8
8
|
fowlkes_mallows_score)
|
|
9
9
|
from sklearn.neighbors import LocalOutlierFactor
|
|
10
10
|
|
|
11
|
+
from simba.utils.printing import SimbaTimer
|
|
12
|
+
|
|
11
13
|
try:
|
|
12
14
|
from typing import Literal
|
|
13
15
|
except:
|
|
@@ -538,7 +540,8 @@ class Statistics(FeatureExtractionMixin):
|
|
|
538
540
|
sample_1: np.ndarray,
|
|
539
541
|
sample_2: np.ndarray,
|
|
540
542
|
fill_value: Optional[int] = 1,
|
|
541
|
-
bucket_method: Literal["fd", "doane", "auto", "scott", "stone", "rice", "sturges", "sqrt"] = "auto"
|
|
543
|
+
bucket_method: Literal["fd", "doane", "auto", "scott", "stone", "rice", "sturges", "sqrt"] = "auto",
|
|
544
|
+
verbose: bool = False) -> float:
|
|
542
545
|
|
|
543
546
|
r"""
|
|
544
547
|
Compute Kullback-Leibler divergence between two distributions.
|
|
@@ -562,6 +565,7 @@ class Statistics(FeatureExtractionMixin):
|
|
|
562
565
|
:returns: Kullback-Leibler divergence between ``sample_1`` and ``sample_2``
|
|
563
566
|
:rtype: float
|
|
564
567
|
"""
|
|
568
|
+
timer = SimbaTimer(start=True)
|
|
565
569
|
check_valid_array(data=sample_1, source=Statistics.kullback_leibler_divergence.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
|
|
566
570
|
check_valid_array(data=sample_2, source=Statistics.kullback_leibler_divergence.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
|
|
567
571
|
check_str(name=f"{self.__class__.__name__} bucket_method", value=bucket_method, options=Options.BUCKET_METHODS.value)
|
|
@@ -573,7 +577,10 @@ class Statistics(FeatureExtractionMixin):
|
|
|
573
577
|
sample_1_hist[sample_1_hist == 0] = fill_value
|
|
574
578
|
sample_2_hist[sample_2_hist == 0] = fill_value
|
|
575
579
|
sample_1_hist, sample_2_hist = sample_1_hist / np.sum(sample_1_hist), sample_2_hist / np.sum(sample_2_hist)
|
|
576
|
-
|
|
580
|
+
kl = stats.entropy(pk=sample_1_hist, qk=sample_2_hist)
|
|
581
|
+
timer.stop_timer()
|
|
582
|
+
if verbose: print(f'KL divergence performed on {sample_1.shape[0]} observations (elapsed time: {timer.elapsed_time_str}s)')
|
|
583
|
+
return kl
|
|
577
584
|
|
|
578
585
|
def rolling_kullback_leibler_divergence(
|
|
579
586
|
self,
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Analyze runtime statistics for directionality_to_nonstatic_target"""
|
|
2
|
+
import numpy as np
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
|
|
5
|
+
# Parse the runtime data
|
|
6
|
+
data = {
|
|
7
|
+
10000: [0.4389, 0.0008, 0.0012],
|
|
8
|
+
100000: [0.0063, 0.0052, 0.0052],
|
|
9
|
+
1000000: [0.0768, 0.0306, 0.0239],
|
|
10
|
+
10000000: [0.2195, 0.2122, 0.2083],
|
|
11
|
+
50000000: [1.8936, 1.5664, 1.2548]
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
# Calculate statistics
|
|
15
|
+
print("=" * 80)
|
|
16
|
+
print(f"{'Observations':<15} {'Mean (s)':<12} {'Std (s)':<12} {'Min (s)':<12} {'Max (s)':<12} {'Throughput (M obs/s)':<20}")
|
|
17
|
+
print("=" * 80)
|
|
18
|
+
|
|
19
|
+
for obs_count in sorted(data.keys()):
|
|
20
|
+
times = np.array(data[obs_count])
|
|
21
|
+
mean_time = np.mean(times)
|
|
22
|
+
std_time = np.std(times)
|
|
23
|
+
min_time = np.min(times)
|
|
24
|
+
max_time = np.max(times)
|
|
25
|
+
throughput = obs_count / (mean_time * 1_000_000) # Million observations per second
|
|
26
|
+
|
|
27
|
+
print(f"{obs_count:<15,} {mean_time:<12.4f} {std_time:<12.4f} {min_time:<12.4f} {max_time:<12.4f} {throughput:<20.2f}")
|
|
28
|
+
|
|
29
|
+
print("=" * 80)
|
|
30
|
+
print("\nNote: First run typically includes JIT compilation overhead (especially for 10k observations)")
|