simba-uw-tf-dev 4.6.4__py3-none-any.whl → 4.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of simba-uw-tf-dev might be problematic. Click here for more details.
- simba/data_processors/blob_location_computer.py +1 -1
- simba/data_processors/cuda/geometry.py +45 -27
- simba/data_processors/cuda/image.py +1624 -1600
- simba/data_processors/cuda/statistics.py +72 -25
- simba/data_processors/cuda/timeseries.py +1 -1
- simba/data_processors/egocentric_aligner.py +25 -7
- simba/data_processors/kleinberg_calculator.py +6 -2
- simba/feature_extractors/feature_subsets.py +14 -7
- simba/feature_extractors/straub_tail_analyzer.py +4 -6
- simba/labelling/standard_labeller.py +1 -1
- simba/mixins/geometry_mixin.py +8 -8
- simba/mixins/image_mixin.py +14 -14
- simba/mixins/statistics_mixin.py +48 -11
- simba/mixins/timeseries_features_mixin.py +1 -1
- simba/mixins/train_model_mixin.py +65 -27
- simba/model/inference_batch.py +1 -1
- simba/model/yolo_seg_inference.py +3 -3
- simba/plotting/heat_mapper_clf_mp.py +2 -2
- simba/pose_importers/simba_blob_importer.py +3 -3
- simba/roi_tools/roi_aggregate_stats_mp.py +1 -1
- simba/roi_tools/roi_clf_calculator_mp.py +1 -1
- simba/sandbox/analyze_runtimes.py +30 -0
- simba/sandbox/cuda/egocentric_rotator.py +374 -374
- simba/sandbox/proboscis_to_tip.py +28 -0
- simba/sandbox/test_directionality.py +47 -0
- simba/sandbox/test_nonstatic_directionality.py +27 -0
- simba/sandbox/test_pycharm_cuda.py +51 -0
- simba/sandbox/test_simba_install.py +41 -0
- simba/sandbox/test_static_directionality.py +26 -0
- simba/sandbox/test_static_directionality_2d.py +26 -0
- simba/sandbox/verify_env.py +42 -0
- simba/third_party_label_appenders/transform/coco_keypoints_to_yolo.py +3 -3
- simba/third_party_label_appenders/transform/coco_keypoints_to_yolo_bbox.py +2 -2
- simba/ui/pop_ups/fsttc_pop_up.py +27 -25
- simba/ui/pop_ups/kleinberg_pop_up.py +3 -2
- simba/utils/custom_feature_extractor.py +1 -1
- simba/utils/data.py +2 -3
- simba/utils/errors.py +441 -440
- simba/utils/lookups.py +1203 -1203
- simba/utils/read_write.py +70 -31
- simba/utils/yolo.py +10 -1
- simba/video_processors/blob_tracking_executor.py +2 -2
- simba/video_processors/clahe_ui.py +1 -1
- simba/video_processors/egocentric_video_rotator.py +44 -39
- simba/video_processors/multi_cropper.py +1 -1
- simba/video_processors/video_processing.py +5264 -5233
- simba/video_processors/videos_to_frames.py +43 -33
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/METADATA +4 -3
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/RECORD +53 -44
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/LICENSE +0 -0
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/WHEEL +0 -0
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/entry_points.txt +0 -0
- {simba_uw_tf_dev-4.6.4.dist-info → simba_uw_tf_dev-4.6.7.dist-info}/top_level.txt +0 -0
simba/mixins/statistics_mixin.py
CHANGED
|
@@ -8,6 +8,8 @@ from sklearn.metrics import (adjusted_mutual_info_score, adjusted_rand_score,
|
|
|
8
8
|
fowlkes_mallows_score)
|
|
9
9
|
from sklearn.neighbors import LocalOutlierFactor
|
|
10
10
|
|
|
11
|
+
from simba.utils.printing import SimbaTimer
|
|
12
|
+
|
|
11
13
|
try:
|
|
12
14
|
from typing import Literal
|
|
13
15
|
except:
|
|
@@ -538,7 +540,8 @@ class Statistics(FeatureExtractionMixin):
|
|
|
538
540
|
sample_1: np.ndarray,
|
|
539
541
|
sample_2: np.ndarray,
|
|
540
542
|
fill_value: Optional[int] = 1,
|
|
541
|
-
bucket_method: Literal["fd", "doane", "auto", "scott", "stone", "rice", "sturges", "sqrt"] = "auto"
|
|
543
|
+
bucket_method: Literal["fd", "doane", "auto", "scott", "stone", "rice", "sturges", "sqrt"] = "auto",
|
|
544
|
+
verbose: bool = False) -> float:
|
|
542
545
|
|
|
543
546
|
r"""
|
|
544
547
|
Compute Kullback-Leibler divergence between two distributions.
|
|
@@ -562,6 +565,7 @@ class Statistics(FeatureExtractionMixin):
|
|
|
562
565
|
:returns: Kullback-Leibler divergence between ``sample_1`` and ``sample_2``
|
|
563
566
|
:rtype: float
|
|
564
567
|
"""
|
|
568
|
+
timer = SimbaTimer(start=True)
|
|
565
569
|
check_valid_array(data=sample_1, source=Statistics.kullback_leibler_divergence.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
|
|
566
570
|
check_valid_array(data=sample_2, source=Statistics.kullback_leibler_divergence.__name__, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
|
|
567
571
|
check_str(name=f"{self.__class__.__name__} bucket_method", value=bucket_method, options=Options.BUCKET_METHODS.value)
|
|
@@ -573,7 +577,10 @@ class Statistics(FeatureExtractionMixin):
|
|
|
573
577
|
sample_1_hist[sample_1_hist == 0] = fill_value
|
|
574
578
|
sample_2_hist[sample_2_hist == 0] = fill_value
|
|
575
579
|
sample_1_hist, sample_2_hist = sample_1_hist / np.sum(sample_1_hist), sample_2_hist / np.sum(sample_2_hist)
|
|
576
|
-
|
|
580
|
+
kl = stats.entropy(pk=sample_1_hist, qk=sample_2_hist)
|
|
581
|
+
timer.stop_timer()
|
|
582
|
+
if verbose: print(f'KL divergence performed on {sample_1.shape[0]} observations (elapsed time: {timer.elapsed_time_str}s)')
|
|
583
|
+
return kl
|
|
577
584
|
|
|
578
585
|
def rolling_kullback_leibler_divergence(
|
|
579
586
|
self,
|
|
@@ -3271,10 +3278,34 @@ class Statistics(FeatureExtractionMixin):
|
|
|
3271
3278
|
|
|
3272
3279
|
Youden's J statistic is a measure of the overall performance of a binary classification test, taking into account both sensitivity (true positive rate) and specificity (true negative rate).
|
|
3273
3280
|
|
|
3274
|
-
|
|
3275
|
-
|
|
3276
|
-
|
|
3281
|
+
The Youden's J statistic is calculated as:
|
|
3282
|
+
|
|
3283
|
+
.. math::
|
|
3284
|
+
J = \text{sensitivity} + \text{specificity} - 1
|
|
3285
|
+
|
|
3286
|
+
where:
|
|
3287
|
+
|
|
3288
|
+
- :math:`\text{sensitivity} = \frac{TP}{TP + FN}` is the true positive rate
|
|
3289
|
+
- :math:`\text{specificity} = \frac{TN}{TN + FP}` is the true negative rate
|
|
3290
|
+
|
|
3291
|
+
The statistic ranges from -1 to 1, where:
|
|
3292
|
+
- :math:`J = 1` indicates perfect classification
|
|
3293
|
+
- :math:`J = 0` indicates the test performs no better than random
|
|
3294
|
+
- :math:`J < 0` indicates the test performs worse than random
|
|
3295
|
+
|
|
3296
|
+
:param sample_1: The first binary array (ground truth or reference).
|
|
3297
|
+
:param sample_2: The second binary array (predictions or test results).
|
|
3298
|
+
:return: Youden's J statistic. Returns NaN if either sensitivity or specificity cannot be calculated (division by zero).
|
|
3277
3299
|
:rtype: float
|
|
3300
|
+
|
|
3301
|
+
:references:
|
|
3302
|
+
.. [1] Youden, W. J. (1950). Index for rating diagnostic tests. Cancer, 3(1), 32-35.
|
|
3303
|
+
https://acsjournals.onlinelibrary.wiley.com/doi/abs/10.1002/1097-0142(1950)3:1%3C32::AID-CNCR2820030106%3E3.0.CO;2-3
|
|
3304
|
+
|
|
3305
|
+
:example:
|
|
3306
|
+
>>> y_true = np.array([1, 1, 0, 0, 1, 0, 1, 1, 0, 0])
|
|
3307
|
+
>>> y_pred = np.array([1, 1, 0, 1, 1, 0, 1, 0, 0, 0])
|
|
3308
|
+
>>> j = Statistics.youden_j(sample_1=y_true, sample_2=y_pred)
|
|
3278
3309
|
"""
|
|
3279
3310
|
|
|
3280
3311
|
check_valid_array(data=sample_1, source=f'{Statistics.youden_j.__name__} sample_1', accepted_ndims=(1,), accepted_values=[0, 1])
|
|
@@ -4250,7 +4281,7 @@ class Statistics(FeatureExtractionMixin):
|
|
|
4250
4281
|
return separation_trace / compactness
|
|
4251
4282
|
|
|
4252
4283
|
@staticmethod
|
|
4253
|
-
def i_index(x: np.ndarray, y: np.ndarray):
|
|
4284
|
+
def i_index(x: np.ndarray, y: np.ndarray, verbose: bool = False) -> float:
|
|
4254
4285
|
|
|
4255
4286
|
"""
|
|
4256
4287
|
Calculate the I-Index for evaluating clustering quality.
|
|
@@ -4275,9 +4306,10 @@ class Statistics(FeatureExtractionMixin):
|
|
|
4275
4306
|
>>> X, y = make_blobs(n_samples=5000, centers=20, n_features=3, random_state=0, cluster_std=0.1)
|
|
4276
4307
|
>>> Statistics.i_index(x=X, y=y)
|
|
4277
4308
|
"""
|
|
4309
|
+
timer = SimbaTimer(start=True)
|
|
4278
4310
|
check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
|
|
4279
4311
|
check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
|
|
4280
|
-
|
|
4312
|
+
cluster_cnt = get_unique_values_in_iterable(data=y, name=Statistics.i_index.__name__, min=2)
|
|
4281
4313
|
unique_y = np.unique(y)
|
|
4282
4314
|
n_y = unique_y.shape[0]
|
|
4283
4315
|
global_centroid = np.mean(x, axis=0)
|
|
@@ -4289,7 +4321,12 @@ class Statistics(FeatureExtractionMixin):
|
|
|
4289
4321
|
cluster_centroid = np.mean(cluster_obs, axis=0)
|
|
4290
4322
|
swc += np.sum(np.linalg.norm(cluster_obs - cluster_centroid, axis=1) ** 2)
|
|
4291
4323
|
|
|
4292
|
-
|
|
4324
|
+
|
|
4325
|
+
i_index = np.float32(sst / (n_y * swc))
|
|
4326
|
+
timer.stop_timer()
|
|
4327
|
+
if verbose: print(f'I-index for {x.shape[0]} observations in {cluster_cnt} clusters computed (elapsed time: {timer.elapsed_time_str}s)')
|
|
4328
|
+
return i_index
|
|
4329
|
+
|
|
4293
4330
|
|
|
4294
4331
|
@staticmethod
|
|
4295
4332
|
def sd_index(x: np.ndarray, y: np.ndarray) -> float:
|
|
@@ -5291,7 +5328,7 @@ class Statistics(FeatureExtractionMixin):
|
|
|
5291
5328
|
"""
|
|
5292
5329
|
Compute one-way ANOVAs comparing each column (axis 1) on two arrays.
|
|
5293
5330
|
|
|
5294
|
-
..
|
|
5331
|
+
.. note::
|
|
5295
5332
|
Use for computing and presenting aggregate statistics. Not suitable for featurization.
|
|
5296
5333
|
|
|
5297
5334
|
.. seealso::
|
|
@@ -5329,7 +5366,7 @@ class Statistics(FeatureExtractionMixin):
|
|
|
5329
5366
|
"""
|
|
5330
5367
|
Compute Kruskal-Wallis comparing each column (axis 1) on two arrays.
|
|
5331
5368
|
|
|
5332
|
-
..
|
|
5369
|
+
.. note::
|
|
5333
5370
|
Use for computing and presenting aggregate statistics. Not suitable for featurization.
|
|
5334
5371
|
|
|
5335
5372
|
.. seealso::
|
|
@@ -5366,7 +5403,7 @@ class Statistics(FeatureExtractionMixin):
|
|
|
5366
5403
|
"""
|
|
5367
5404
|
Compute pairwise grouped Tukey-HSD tests.
|
|
5368
5405
|
|
|
5369
|
-
..
|
|
5406
|
+
.. note::
|
|
5370
5407
|
Use for computing and presenting aggregate statistics. Not suitable for featurization.
|
|
5371
5408
|
|
|
5372
5409
|
:param np.ndarray data: 2D array with observations rowwise (axis 0) and features columnwise (axis 1)
|
|
@@ -2198,7 +2198,7 @@ class TimeseriesFeatureMixin(object):
|
|
|
2198
2198
|
:example:
|
|
2199
2199
|
>>> x = np.random.randint(0, 100, (400, 2))
|
|
2200
2200
|
>>> results_1 = TimeseriesFeatureMixin.sliding_entropy_of_directional_changes(x=x, bins=16, window_size=5.0, sample_rate=30)
|
|
2201
|
-
>>> x = pd.read_csv(r"C
|
|
2201
|
+
>>> x = pd.read_csv(r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/input_csv/Together_1.csv")[['Ear_left_1_x', 'Ear_left_1_y']].values
|
|
2202
2202
|
>>> results_2 = TimeseriesFeatureMixin.sliding_entropy_of_directional_changes(x=x, bins=16, window_size=5.0, sample_rate=30)
|
|
2203
2203
|
"""
|
|
2204
2204
|
|
|
@@ -77,10 +77,10 @@ from simba.utils.errors import (ClassifierInferenceError, CorruptedFileError,
|
|
|
77
77
|
SamplingError, SimBAModuleNotFoundError)
|
|
78
78
|
from simba.utils.lookups import get_meta_data_file_headers, get_table
|
|
79
79
|
from simba.utils.printing import SimbaTimer, stdout_success
|
|
80
|
-
from simba.utils.read_write import (find_core_cnt,
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
str_2_bool)
|
|
80
|
+
from simba.utils.read_write import (find_core_cnt, get_current_time,
|
|
81
|
+
get_fn_ext, get_memory_usage_of_df,
|
|
82
|
+
get_pkg_version, read_config_entry,
|
|
83
|
+
read_df, read_meta_file, str_2_bool)
|
|
84
84
|
from simba.utils.warnings import (GPUToolsWarning, MissingUserInputWarning,
|
|
85
85
|
MultiProcessingFailedWarning,
|
|
86
86
|
NoModuleWarning, NotEnoughDataWarning,
|
|
@@ -1383,18 +1383,39 @@ class TrainModelMixin(object):
|
|
|
1383
1383
|
x_df: Union[pd.DataFrame, np.ndarray],
|
|
1384
1384
|
multiclass: bool = False,
|
|
1385
1385
|
model_name: Optional[str] = None,
|
|
1386
|
-
data_path: Optional[Union[str, os.PathLike]] = None
|
|
1386
|
+
data_path: Optional[Union[str, os.PathLike]] = None,
|
|
1387
|
+
verbose: bool = False) -> np.ndarray:
|
|
1387
1388
|
|
|
1388
1389
|
"""
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1390
|
+
Helper to predict class probabilities using a fitted random forest classifier.
|
|
1391
|
+
|
|
1392
|
+
Computes prediction probabilities for binary or multiclass classification using either
|
|
1393
|
+
scikit-learn or cuML RandomForestClassifier. For binary classifiers, returns the
|
|
1394
|
+
probability of the positive class (class 1). For multiclass classifiers, returns
|
|
1395
|
+
probabilities for all classes.
|
|
1396
|
+
|
|
1397
|
+
.. csv-table::
|
|
1398
|
+
:header: EXPECTED RUNTIMES
|
|
1399
|
+
:file: ../../docs/tables/clf_predict_proba.csv
|
|
1400
|
+
:widths: 10, 45, 45
|
|
1401
|
+
:align: center
|
|
1402
|
+
:header-rows: 1
|
|
1403
|
+
|
|
1404
|
+
.. seealso::
|
|
1405
|
+
To fit a classifier, see :func:`simba.mixins.train_model_mixin.TrainModelMixin.clf_fit`
|
|
1406
|
+
To define a classifier, see :func:`simba.mixins.train_model_mixin.TrainModelMixin.clf_define`
|
|
1407
|
+
|
|
1408
|
+
:param Union[RandomForestClassifier, cuRF] clf: Fitted random forest classifier object from sklearn or cuml.
|
|
1409
|
+
:param Union[pd.DataFrame, np.ndarray] x_df: Features for data to predict. DataFrame or array of shape (n_samples, n_features).
|
|
1410
|
+
:param bool multiclass: If True, the classifier predicts more than 2 classes. If False, binary classifier (default: False).
|
|
1411
|
+
:param Optional[str] model_name: Name of the model for error messages and logging. Default: None.
|
|
1412
|
+
:param Optional[Union[str, os.PathLike]] data_path: Path to the data file being processed, used in error messages. Default: None.
|
|
1413
|
+
:param bool verbose: If True, print inference progress and timing information. Default: False.
|
|
1414
|
+
:return np.ndarray: Prediction probabilities. For binary classifiers: 1D array of shape (n_samples,) with probability of positive class. For multiclass: 2D array of shape (n_samples, n_classes) with probabilities for each class.
|
|
1415
|
+
|
|
1396
1416
|
"""
|
|
1397
1417
|
|
|
1418
|
+
timer = SimbaTimer(start=True)
|
|
1398
1419
|
if hasattr(clf, "n_features_"):
|
|
1399
1420
|
clf_n_features = clf.n_features_
|
|
1400
1421
|
elif hasattr(clf, "n_features_in_"):
|
|
@@ -1420,6 +1441,8 @@ class TrainModelMixin(object):
|
|
|
1420
1441
|
p_vals = clf.predict_proba(x_df)
|
|
1421
1442
|
if multiclass and (clf.n_classes_ != p_vals.shape[1]):
|
|
1422
1443
|
raise ClassifierInferenceError(msg=f"The classifier {model_name} (data path: {data_path}) is a multiclassifier expected to create {clf.n_classes_} behavior probabilities. However, it produced probabilities for {p_vals.shape[1]} behaviors. See The SimBA GitHub FAQ page or Gitter for more information and suggested fixes.", source=self.__class__.__name__)
|
|
1444
|
+
timer.stop_timer()
|
|
1445
|
+
if verbose: print(f'Inference for model {model_name} over {x_df.shape[0]} observations complete ({timer.elapsed_time_str}s).')
|
|
1423
1446
|
if not multiclass:
|
|
1424
1447
|
if isinstance(p_vals, pd.DataFrame):
|
|
1425
1448
|
return p_vals[1].values
|
|
@@ -1447,7 +1470,7 @@ class TrainModelMixin(object):
|
|
|
1447
1470
|
bootstrap: Optional[bool] = True,
|
|
1448
1471
|
verbose: Optional[int] = 1,
|
|
1449
1472
|
class_weight: Optional[dict] = None,
|
|
1450
|
-
cuda: Optional[bool] = False) -> RandomForestClassifier:
|
|
1473
|
+
cuda: Optional[bool] = False) -> Union[RandomForestClassifier, cuRF]:
|
|
1451
1474
|
|
|
1452
1475
|
if not cuda:
|
|
1453
1476
|
# NOTE: LOKY ISSUES ON WINDOWS WITH SCIKIT IF THE CORE COUNT EXCEEDS 61.
|
|
@@ -1482,20 +1505,32 @@ class TrainModelMixin(object):
|
|
|
1482
1505
|
clf: Union[RandomForestClassifier, cuRF],
|
|
1483
1506
|
x_df: pd.DataFrame,
|
|
1484
1507
|
y_df: pd.DataFrame,
|
|
1485
|
-
) -> RandomForestClassifier:
|
|
1508
|
+
verbose: bool = False) -> Union[RandomForestClassifier, cuRF]:
|
|
1486
1509
|
|
|
1487
1510
|
"""
|
|
1488
|
-
Helper to fit clf model
|
|
1511
|
+
Helper to fit clf model.
|
|
1489
1512
|
|
|
1490
|
-
|
|
1513
|
+
.. csv-table::
|
|
1514
|
+
:header: EXPECTED RUNTIMES
|
|
1515
|
+
:file: ../../docs/tables/clf_fit.csv
|
|
1516
|
+
:widths: 20, 20, 30, 30
|
|
1517
|
+
:align: center
|
|
1518
|
+
:header-rows: 1
|
|
1519
|
+
|
|
1520
|
+
.. seealso::
|
|
1521
|
+
To define a cuml/sklearn object, see :func:`simba.mixins.train_model_mixin.TrainModelMixin.clf_define`
|
|
1522
|
+
|
|
1523
|
+
:param clf: Un-fitted random forest classifier object, either from sklearn or cuml.
|
|
1491
1524
|
:param pd.DataFrame x_df: Pandas dataframe with features.
|
|
1492
1525
|
:param pd.DataFrame y_df: Pandas dataframe/Series with target
|
|
1493
1526
|
:return: Fitted random forest classifier object
|
|
1494
1527
|
:rtype: RandomForestClassifier
|
|
1495
1528
|
"""
|
|
1496
1529
|
|
|
1530
|
+
timer = SimbaTimer(start=True)
|
|
1497
1531
|
nan_features = x_df[~x_df.applymap(np.isreal).all(1)]
|
|
1498
1532
|
nan_target = y_df.loc[pd.to_numeric(y_df).isna()]
|
|
1533
|
+
using_cuda = True if CUML in str(clf.__class__.__module__).lower() else False
|
|
1499
1534
|
if len(nan_features) > 0:
|
|
1500
1535
|
raise FaultyTrainingSetError(
|
|
1501
1536
|
msg=f"{len(nan_features)} frame(s) in your project_folder/csv/targets_inserted directory contains FEATURES with non-numerical values",
|
|
@@ -1504,9 +1539,16 @@ class TrainModelMixin(object):
|
|
|
1504
1539
|
raise FaultyTrainingSetError(
|
|
1505
1540
|
msg=f"{len(nan_target)} frame(s) in your project_folder/csv/targets_inserted directory contains ANNOTATIONS with non-numerical values",
|
|
1506
1541
|
source=self.__class__.__name__)
|
|
1542
|
+
if verbose: print(f'[{get_current_time()}] Fitting classifier for {len(x_df)} observations (cuda: {"True" if using_cuda else "False"})...')
|
|
1543
|
+
if using_cuda:
|
|
1544
|
+
x_data = x_df.values if isinstance(x_df, pd.DataFrame) else x_df
|
|
1545
|
+
y_data = y_df.values if isinstance(y_df, (pd.DataFrame, pd.Series)) else y_df
|
|
1546
|
+
clf.fit(x_data, y_data)
|
|
1547
|
+
else:
|
|
1548
|
+
clf.fit(x_df, y_df)
|
|
1507
1549
|
|
|
1508
|
-
|
|
1509
|
-
|
|
1550
|
+
timer.stop_timer()
|
|
1551
|
+
if verbose: print(f'[{get_current_time()}] Classifier fitted in {timer.elapsed_time_str}s.')
|
|
1510
1552
|
return clf
|
|
1511
1553
|
|
|
1512
1554
|
@staticmethod
|
|
@@ -1563,9 +1605,7 @@ class TrainModelMixin(object):
|
|
|
1563
1605
|
:rtype: Tuple[pd.DataFrame, List[int]]
|
|
1564
1606
|
|
|
1565
1607
|
"""
|
|
1566
|
-
if (platform.system() == "Darwin") and (
|
|
1567
|
-
multiprocessing.get_start_method() != "spawn"
|
|
1568
|
-
):
|
|
1608
|
+
if (platform.system() == "Darwin") and (multiprocessing.get_start_method() != "spawn"):
|
|
1569
1609
|
multiprocessing.set_start_method("spawn", force=True)
|
|
1570
1610
|
cpu_cnt, _ = find_core_cnt()
|
|
1571
1611
|
df_lst, frame_numbers_lst = [], []
|
|
@@ -1592,9 +1632,7 @@ class TrainModelMixin(object):
|
|
|
1592
1632
|
:, ~df_concat.columns.str.contains("^Unnamed")
|
|
1593
1633
|
].astype(np.float32)
|
|
1594
1634
|
memory_size = get_memory_usage_of_df(df=df_concat)
|
|
1595
|
-
print(
|
|
1596
|
-
f'Dataset size: {memory_size["megabytes"]}MB / {memory_size["gigabytes"]}GB'
|
|
1597
|
-
)
|
|
1635
|
+
print(f'Dataset size: {memory_size["megabytes"]}MB / {memory_size["gigabytes"]}GB')
|
|
1598
1636
|
|
|
1599
1637
|
return df_concat, frame_numbers_lst
|
|
1600
1638
|
|
|
@@ -2607,9 +2645,9 @@ class TrainModelMixin(object):
|
|
|
2607
2645
|
:param bool plot: If True, create SHAP aggregation and plots.
|
|
2608
2646
|
|
|
2609
2647
|
:example:
|
|
2610
|
-
>>> CONFIG_PATH = r"C
|
|
2611
|
-
>>> RF_PATH = r"C
|
|
2612
|
-
>>> DATA_PATH = r"C
|
|
2648
|
+
>>> CONFIG_PATH = r"C:/troubleshooting/mitra/project_folder/project_config.ini"
|
|
2649
|
+
>>> RF_PATH = r"C:/troubleshooting/mitra/models/validations/straub_tail_5_new/straub_tail_5.sav"
|
|
2650
|
+
>>> DATA_PATH = r"C:/troubleshooting/mitra/project_folder/csv/targets_inserted/new_straub/appended/501_MA142_Gi_CNO_0514.csv"
|
|
2613
2651
|
>>> config = ConfigReader(config_path=CONFIG_PATH)
|
|
2614
2652
|
>>> df = read_df(file_path=DATA_PATH, file_type='csv')
|
|
2615
2653
|
>>> y = df['straub_tail']
|
simba/model/inference_batch.py
CHANGED
|
@@ -45,7 +45,7 @@ class InferenceBatch(TrainModelMixin, ConfigReader):
|
|
|
45
45
|
>>> inferencer.run()
|
|
46
46
|
|
|
47
47
|
:example II:
|
|
48
|
-
>>> inferencer = InferenceBatch(config_path=r"D
|
|
48
|
+
>>> inferencer = InferenceBatch(config_path=r"D:/troubleshooting/mitra/project_folder/project_config.ini", features_dir=r"D:/troubleshooting/mitra/project_folder/videos/bg_removed/rotated/tail_features/APPENDED")
|
|
49
49
|
>>> inferencer.run()
|
|
50
50
|
"""
|
|
51
51
|
|
|
@@ -55,9 +55,9 @@ class YOLOSegmentationInference():
|
|
|
55
55
|
To visualize the segmentation results, see :func:`simba.plotting.yolo_seg_visualizer.YOLOSegmentationVisualizer`
|
|
56
56
|
|
|
57
57
|
:example:
|
|
58
|
-
>>> weights_path = r"D
|
|
59
|
-
>>> video_path = r"D
|
|
60
|
-
>>> save_dir=r"D
|
|
58
|
+
>>> weights_path = r"D:/platea/yolo_071525/mdl/train3/weights/best.pt"
|
|
59
|
+
>>> video_path = r"D:/platea/platea_videos/videos/clipped/10B_Mouse_5-choice_MustTouchTrainingNEWFINAL_a7.mp4"
|
|
60
|
+
>>> save_dir = r"D:/platea/platea_videos/videos/yolo_results"
|
|
61
61
|
>>> runner = YOLOSegmentationInference(weights_path=weights_path, video_path=video_path, save_dir=save_dir, verbose=True, device=0, format=None, stream=True, batch_size=10, imgsz=320, interpolate=True, threshold=0.8, retina_msk=True)
|
|
62
62
|
>>> runner.run()
|
|
63
63
|
|
|
@@ -98,14 +98,14 @@ class HeatMapperClfMultiprocess(ConfigReader, PlottingMixin):
|
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
:example II:
|
|
101
|
-
>>> test = HeatMapperClfMultiprocess(config_path=r"C
|
|
101
|
+
>>> test = HeatMapperClfMultiprocess(config_path=r"C:/troubleshooting/RAT_NOR/project_folder/project_config.ini",
|
|
102
102
|
>>> style_attr = {'palette': 'jet', 'shading': 'gouraud', 'bin_size': 50, 'max_scale': 'auto'},
|
|
103
103
|
>>> final_img_setting=True,
|
|
104
104
|
>>> video_setting=True,
|
|
105
105
|
>>> frame_setting=True,
|
|
106
106
|
>>> bodypart='Ear_left',
|
|
107
107
|
>>> clf_name='straub_tail',
|
|
108
|
-
>>> data_paths=[r"C
|
|
108
|
+
>>> data_paths=[r"C:/troubleshooting/RAT_NOR/project_folder/csv/test/2022-06-20_NOB_DOT_4.csv"])
|
|
109
109
|
>>> test.run()
|
|
110
110
|
"""
|
|
111
111
|
|
|
@@ -44,10 +44,10 @@ class SimBABlobImporter(ConfigReader):
|
|
|
44
44
|
:param Optional[bool] verbose: If True, prints progress messages. Default: True.
|
|
45
45
|
|
|
46
46
|
:example:
|
|
47
|
-
>>> r = SimBABlobImporter(config_path=r"C
|
|
47
|
+
>>> r = SimBABlobImporter(config_path=r"C:/troubleshooting/simba_blob_project/project_folder/project_config.ini", data_path=r'C:/troubleshooting/simba_blob_project/data')
|
|
48
48
|
>>> r.run()
|
|
49
|
-
>>> r = SimBABlobImporter(config_path=r"C
|
|
50
|
-
... data_path=r'C
|
|
49
|
+
>>> r = SimBABlobImporter(config_path=r"C:/troubleshooting/simba_blob_project/project_folder/project_config.ini",
|
|
50
|
+
... data_path=r'C:/troubleshooting/simba_blob_project/data',
|
|
51
51
|
... smoothing_settings={'method': 'savitzky-golay', 'time_window': 100},
|
|
52
52
|
... interpolation_settings={'method': 'nearest', 'type': 'body-parts'})
|
|
53
53
|
>>> r.run()
|
|
@@ -168,7 +168,7 @@ class ROIAggregateStatisticsAnalyzerMultiprocess(ConfigReader, FeatureExtraction
|
|
|
168
168
|
:param save_path (str | os.PathLike, optional): Path to save summary statistics.
|
|
169
169
|
|
|
170
170
|
:example:
|
|
171
|
-
>>> analyzer = ROIAggregateStatisticsAnalyzerMultiprocess(config_path=r"C
|
|
171
|
+
>>> analyzer = ROIAggregateStatisticsAnalyzerMultiprocess(config_path=r"C:/troubleshooting/mitra/project_folder/project_config.ini", body_parts=['Center'], first_entry_time=True, threshold=0.0, calculate_distances=True, transpose=False, detailed_bout_data=True)
|
|
172
172
|
>>> analyzer.run()
|
|
173
173
|
>>> analyzer.save()
|
|
174
174
|
"""
|
|
@@ -150,7 +150,7 @@ class ROIClfCalculatorMultiprocess(ConfigReader):
|
|
|
150
150
|
'GitHub tutorial <https://github.com/sgoldenlab/simba/blob/master/docs/Scenario2.md#part-4--analyze-machine-results`__.
|
|
151
151
|
|
|
152
152
|
:example:
|
|
153
|
-
>>> analyzer = ROIClfCalculatorMultiprocess(config_path=r"D
|
|
153
|
+
>>> analyzer = ROIClfCalculatorMultiprocess(config_path=r"D:/troubleshooting/maplight_ri/project_folder/project_config.ini", bp_names=['resident_NOSE'], clf_names=['attack'], clf_time=True, started_bout_cnt=True, ended_bout_cnt=False, bout_table=True, transpose=True, core_cnt=20)
|
|
154
154
|
>>> analyzer.run()
|
|
155
155
|
>>> analyzer.save()
|
|
156
156
|
"""
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Analyze runtime statistics for directionality_to_nonstatic_target"""
|
|
2
|
+
import numpy as np
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
|
|
5
|
+
# Parse the runtime data
|
|
6
|
+
data = {
|
|
7
|
+
10000: [0.4389, 0.0008, 0.0012],
|
|
8
|
+
100000: [0.0063, 0.0052, 0.0052],
|
|
9
|
+
1000000: [0.0768, 0.0306, 0.0239],
|
|
10
|
+
10000000: [0.2195, 0.2122, 0.2083],
|
|
11
|
+
50000000: [1.8936, 1.5664, 1.2548]
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
# Calculate statistics
|
|
15
|
+
print("=" * 80)
|
|
16
|
+
print(f"{'Observations':<15} {'Mean (s)':<12} {'Std (s)':<12} {'Min (s)':<12} {'Max (s)':<12} {'Throughput (M obs/s)':<20}")
|
|
17
|
+
print("=" * 80)
|
|
18
|
+
|
|
19
|
+
for obs_count in sorted(data.keys()):
|
|
20
|
+
times = np.array(data[obs_count])
|
|
21
|
+
mean_time = np.mean(times)
|
|
22
|
+
std_time = np.std(times)
|
|
23
|
+
min_time = np.min(times)
|
|
24
|
+
max_time = np.max(times)
|
|
25
|
+
throughput = obs_count / (mean_time * 1_000_000) # Million observations per second
|
|
26
|
+
|
|
27
|
+
print(f"{obs_count:<15,} {mean_time:<12.4f} {std_time:<12.4f} {min_time:<12.4f} {max_time:<12.4f} {throughput:<20.2f}")
|
|
28
|
+
|
|
29
|
+
print("=" * 80)
|
|
30
|
+
print("\nNote: First run typically includes JIT compilation overhead (especially for 10k observations)")
|