PyPI - simba-uw-tf-dev - Versions diffs - 4.6.6__py3-none-any.whl → 4.6.8__py3-none-any.whl - Mend

simba-uw-tf-dev 4.6.6py3-none-any.whl → 4.6.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of simba-uw-tf-dev might be problematic. Click here for more details.

Files changed (49) hide show

simba/assets/.recent_projects.txt +1 -0
simba/data_processors/blob_location_computer.py +1 -1
simba/data_processors/circling_detector.py +30 -13
simba/data_processors/cuda/image.py +53 -25
simba/data_processors/cuda/statistics.py +57 -19
simba/data_processors/cuda/timeseries.py +1 -1
simba/data_processors/egocentric_aligner.py +1 -1
simba/data_processors/freezing_detector.py +54 -50
simba/feature_extractors/feature_subsets.py +2 -2
simba/feature_extractors/mitra_feature_extractor.py +2 -2
simba/feature_extractors/straub_tail_analyzer.py +4 -4
simba/labelling/standard_labeller.py +1 -1
simba/mixins/config_reader.py +5 -2
simba/mixins/geometry_mixin.py +8 -8
simba/mixins/image_mixin.py +14 -14
simba/mixins/plotting_mixin.py +28 -10
simba/mixins/statistics_mixin.py +39 -9
simba/mixins/timeseries_features_mixin.py +1 -1
simba/mixins/train_model_mixin.py +65 -27
simba/model/inference_batch.py +1 -1
simba/model/yolo_seg_inference.py +3 -3
simba/outlier_tools/skip_outlier_correction.py +1 -1
simba/plotting/gantt_creator.py +29 -10
simba/plotting/gantt_creator_mp.py +50 -17
simba/plotting/heat_mapper_clf_mp.py +2 -2
simba/pose_importers/simba_blob_importer.py +3 -3
simba/roi_tools/roi_aggregate_stats_mp.py +1 -1
simba/roi_tools/roi_clf_calculator_mp.py +1 -1
simba/third_party_label_appenders/transform/coco_keypoints_to_yolo.py +3 -3
simba/third_party_label_appenders/transform/coco_keypoints_to_yolo_bbox.py +2 -2
simba/ui/pop_ups/clf_plot_pop_up.py +2 -2
simba/ui/pop_ups/gantt_pop_up.py +31 -6
simba/ui/pop_ups/video_processing_pop_up.py +1 -1
simba/utils/custom_feature_extractor.py +1 -1
simba/utils/data.py +2 -2
simba/utils/read_write.py +32 -18
simba/utils/yolo.py +10 -1
simba/video_processors/blob_tracking_executor.py +2 -2
simba/video_processors/clahe_ui.py +1 -1
simba/video_processors/egocentric_video_rotator.py +3 -3
simba/video_processors/multi_cropper.py +1 -1
simba/video_processors/video_processing.py +27 -10
simba/video_processors/videos_to_frames.py +2 -2
{simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.8.dist-info}/METADATA +3 -2
{simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.8.dist-info}/RECORD +49 -49
{simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.8.dist-info}/LICENSE +0 -0
{simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.8.dist-info}/WHEEL +0 -0
{simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.8.dist-info}/entry_points.txt +0 -0
{simba_uw_tf_dev-4.6.6.dist-info → simba_uw_tf_dev-4.6.8.dist-info}/top_level.txt +0 -0

simba/feature_extractors/straub_tail_analyzer.py CHANGED Viewed

@@ -44,10 +44,10 @@ class StraubTailAnalyzer(ConfigReader):
     .. [1] Lazaro et al., Brainwide Genetic Capture for Conscious State Transitions, `biorxiv`, doi: https://doi.org/10.1101/2025.03.28.646066
     :example:
-    >>> runner = StraubTailAnalyzer(config_path=r"C:\troubleshooting\mitra\project_folder\project_config.ini",
-    >>>                            data_dir=r'C:\troubleshooting\mitra\project_folder\videos\additional\bg_removed\rotated',
-    >>>                            video_dir=r'C:\troubleshooting\mitra\project_folder\videos\additional\bg_removed\rotated',
-    >>>                            save_dir=r'C:\troubleshooting\mitra\project_folder\videos\additional\bg_removed\rotated\tail_features_additional',
+    >>> runner = StraubTailAnalyzer(config_path=r"C:/troubleshooting/mitra/project_folder/project_config.ini",
+    >>>                            data_dir=r'C:/troubleshooting/mitra/project_folder/videos/additional/bg_removed/rotated',
+    >>>                            video_dir=r'C:/troubleshooting/mitra/project_folder/videos/additional/bg_removed/rotated',
+    >>>                            save_dir=r'C:/troubleshooting/mitra/project_folder/videos/additional/bg_removed/rotated/tail_features_additional',
     >>>                            anchor_points=('tail_base', 'tail_center', 'tail_tip'),
     >>>                            body_parts=('nose', 'left_ear', 'right_ear', 'right_side', 'left_side', 'tail_base'))
     >>> runner.run()

simba/labelling/standard_labeller.py CHANGED Viewed

@@ -64,7 +64,7 @@ class LabellingInterface(ConfigReader):
     :param bool continuing: Set True to resume annotations from an existing targets file. Defaults to False.
     :example:
-    >>> _ = LabellingInterface(config_path=r"C:\troubleshooting\mitra\project_folder\project_config.ini", file_path=r"C:\troubleshooting\mitra\project_folder\videos\501_MA142_Gi_CNO_0521.mp4", thresholds=None, continuing=False)
+    >>> _ = LabellingInterface(config_path=r"C:/troubleshooting/mitra/project_folder/project_config.ini", file_path=r"C:/troubleshooting/mitra/project_folder/videos/501_MA142_Gi_CNO_0521.mp4", thresholds=None, continuing=False)
     """
     def __init__(self,

simba/mixins/config_reader.py CHANGED Viewed

@@ -41,8 +41,8 @@ from simba.utils.read_write import (find_core_cnt, get_all_clf_names,
                                     get_fn_ext, read_config_file, read_df,
                                     read_project_path_and_file_type, write_df)
 from simba.utils.warnings import (BodypartColumnNotFoundWarning,
-                                  InvalidValueWarning, NoDataFoundWarning,
-                                  NoFileFoundWarning)
+                                  DuplicateNamesWarning, InvalidValueWarning,
+                                  NoDataFoundWarning, NoFileFoundWarning)
 class ConfigReader(object):
@@ -610,11 +610,14 @@ class ConfigReader(object):
         >>> config_reader.get_bp_headers()
         """
+        duplicates = list({x for x in self.body_parts_lst if self.body_parts_lst.count(x) > 1})
+        if len(duplicates) > 0: DuplicateNamesWarning(msg=f'The pose configuration file at {self.body_parts_path} contains duplicate entries: {duplicates}', source=self.__class__.__name__)
         self.bp_headers = []
         for bp in self.body_parts_lst:
             c1, c2, c3 = (f"{bp}_x", f"{bp}_y", f"{bp}_p")
             self.bp_headers.extend((c1, c2, c3))
     def read_config_entry(
         self,
         config: ConfigParser,

simba/mixins/geometry_mixin.py CHANGED Viewed

@@ -1556,7 +1556,7 @@ class GeometryMixin(object):
         :rtype: List[float]
         :example:
-        >>> df = read_df(file_path=r"C:\troubleshooting\two_black_animals_14bp\project_folder\csv\outlier_corrected_movement_location\Together_2.csv", file_type='csv').astype(int)
+        >>> df = read_df(file_path=r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/outlier_corrected_movement_location/Together_2.csv", file_type='csv').astype(int)
         >>> animal_1_cols = [x for x in df.columns if '_1_' in x and not '_p' in x]
         >>> animal_2_cols = [x for x in df.columns if '_2_' in x and not '_p' in x]
         >>> animal_1_arr = df[animal_1_cols].values.reshape(len(df), int(len(animal_1_cols)/ 2), 2)
@@ -1622,7 +1622,7 @@ class GeometryMixin(object):
         :return List[float]: List of overlap between corresponding Polygons. If overlap 1, else 0.
         :example:
-        >>> df = read_df(file_path=r"C:\troubleshooting\two_black_animals_14bp\project_folder\csv\outlier_corrected_movement_location\Together_2.csv", file_type='csv').astype(int)
+        >>> df = read_df(file_path=r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/outlier_corrected_movement_location/Together_2.csv", file_type='csv').astype(int)
         >>> animal_1_cols = [x for x in df.columns if '_1_' in x and not '_p' in x]
         >>> animal_2_cols = [x for x in df.columns if '_2_' in x and not '_p' in x]
         >>> animal_1_arr = df[animal_1_cols].values.reshape(len(df), int(len(animal_1_cols)/ 2), 2)
@@ -1693,7 +1693,7 @@ class GeometryMixin(object):
         :rtype: List[float]
         :example:
-        >>> df = read_df(file_path=r"C:\troubleshooting\two_black_animals_14bp\project_folder\csv\outlier_corrected_movement_location\Together_2.csv", file_type='csv').astype(int)
+        >>> df = read_df(file_path=r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/outlier_corrected_movement_location/Together_2.csv", file_type='csv').astype(int)
         >>> animal_1_cols = [x for x in df.columns if '_1_' in x and not '_p' in x]
         >>> animal_2_cols = [x for x in df.columns if '_2_' in x and not '_p' in x]
         >>> animal_1_arr = df[animal_1_cols].values.reshape(len(df), int(len(animal_1_cols)/ 2), 2)
@@ -1763,7 +1763,7 @@ class GeometryMixin(object):
         :rtype: List[Polygon]
         :example:
-        >>> df = read_df(file_path=r"C:\troubleshooting\two_black_animals_14bp\project_folder\csv\outlier_corrected_movement_location\Together_2.csv", file_type='csv').astype(int)
+        >>> df = read_df(file_path=r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/outlier_corrected_movement_location/Together_2.csv", file_type='csv').astype(int)
         >>> animal_1_cols = [x for x in df.columns if '_1_' in x and not '_p' in x]
         >>> animal_1_arr = df[animal_1_cols].values.reshape(len(df), int(len(animal_1_cols)/ 2), 2)
         >>> animal_1_geo = GeometryMixin.bodyparts_to_polygon(data=animal_1_arr)
@@ -3525,10 +3525,10 @@ class GeometryMixin(object):
         :rtype: Tuple[Dict[Tuple[int, int], Dict[Tuple[int, int], float]], Dict[Tuple[int, int], Dict[Tuple[int, int], int]]]
         :example:
-        >>> video_meta_data = get_video_meta_data(video_path=r"C:\troubleshooting\mitra\project_folder\videos\708_MA149_Gq_CNO_0515.mp4")
+        >>> video_meta_data = get_video_meta_data(video_path=r"C:/troubleshooting/mitra/project_folder/videos/708_MA149_Gq_CNO_0515.mp4")
         >>> w, h = video_meta_data['width'], video_meta_data['height']
         >>> grid = GeometryMixin().bucket_img_into_grid_square(bucket_grid_size=(5, 5), bucket_grid_size_mm=None, img_size=(h, w), verbose=False)[0]
-        >>> data = read_df(file_path=r'C:\troubleshooting\mitra\project_folder\csv\outlier_corrected_movement_location\708_MA149_Gq_CNO_0515.csv', file_type='csv')[['Nose_x', 'Nose_y']].values
+        >>> data = read_df(file_path=r'C:/troubleshooting/mitra/project_folder/csv/outlier_corrected_movement_location/708_MA149_Gq_CNO_0515.csv', file_type='csv')[['Nose_x', 'Nose_y']].values
         >>> transition_probabilities, _ = geometry_transition_probabilities(data=data, grid=grid)
         """
@@ -3990,7 +3990,7 @@ class GeometryMixin(object):
         :rtype: np.ndarray
         :example:
-        >>> data_path = r"C:\troubleshooting\mitra\project_folder\csv\outlier_corrected_movement_location\FRR_gq_Saline_0624.csv"
+        >>> data_path = r"C:/troubleshooting/mitra/project_folder/csv/outlier_corrected_movement_location/FRR_gq_Saline_0624.csv"
         >>> animal_data = read_df(file_path=data_path, file_type='csv', usecols=['Nose_x', 'Nose_y', 'Tail_base_x', 'Tail_base_y', 'Left_side_x', 'Left_side_y', 'Right_side_x', 'Right_side_y']).values.reshape(-1, 4, 2)[0:20].astype(np.int32)
         >>> animal_polygons = GeometryMixin().bodyparts_to_polygon(data=animal_data)
         >>> GeometryMixin.geometries_to_exterior_keypoints(geometries=animal_polygons)
@@ -4160,7 +4160,7 @@ class GeometryMixin(object):
          :rtype: Union[None, Dict[Any, dict]]
          :example I:
-             >>> results = GeometryMixin.sleap_csv_to_geometries(data=r"C:\troubleshooting\ants\pose_data\ant.csv")
+             >>> results = GeometryMixin.sleap_csv_to_geometries(data=r"C:/troubleshooting/ants/pose_data/ant.csv")
              >>> # Results structure: {track_id: {frame_idx: Polygon, ...}, ...}
         :example II

simba/mixins/image_mixin.py CHANGED Viewed

@@ -57,17 +57,16 @@ class ImageMixin(object):
         pass
     @staticmethod
-    def brightness_intensity(imgs: List[np.ndarray], ignore_black: Optional[bool] = True) -> List[float]:
+    def brightness_intensity(imgs: Union[List[np.ndarray], np.ndarray], ignore_black: bool = True, verbose: bool = False) -> np.ndarray:
         """
         Compute the average brightness intensity within each image within a list.
         For example, (i) create a list of images containing a light cue ROI, (ii) compute brightness in each image, (iii) perform kmeans on brightness, and get the frames when the light cue is on vs off.
         .. seealso::
-           For GPU acceleration, see :func:`simba.data_processors.cuda.image.img_stack_brightness`.
-           For geometry based brightness, see :func:`simba.mixins.geometry_mixin.GeometryMixin.get_geometry_brightness_intensity`
+           For GPU acceleration, see :func:`simba.data_processors.cuda.image.img_stack_brightness`. For geometry based brightness, see :func:`simba.mixins.geometry_mixin.GeometryMixin.get_geometry_brightness_intensity`
-        :param List[np.ndarray] imgs: List of images as arrays to calculate average brightness intensity within.
+        :param Union[List[np.ndarray], np.ndarray] imgs: List of images as arrays or 3/4d array of images to calculate average brightness intensity within.
         :param Optional[bool] ignore_black: If True, ignores black pixels. If the images are sliced non-rectangular geometric shapes created by ``slice_shapes_in_img``, then pixels that don't belong to the shape has been masked in black.
         :returns: List of floats of size len(imgs) with brightness intensities.
         :rtype: List[float]
@@ -77,14 +76,12 @@ class ImageMixin(object):
         >>> ImageMixin.brightness_intensity(imgs=[img], ignore_black=False)
         >>> [159.0]
         """
-        results = []
-        check_instance(source=f"{ImageMixin().brightness_intensity.__name__} imgs", instance=imgs, accepted_types=list)
-        for cnt, img in enumerate(imgs):
-            check_instance(
-                source=f"{ImageMixin().brightness_intensity.__name__} img {cnt}",
-                instance=img,
-                accepted_types=np.ndarray,
-            )
+        results, timer = [], SimbaTimer(start=True)
+        check_instance(source=f"{ImageMixin().brightness_intensity.__name__} imgs", instance=imgs, accepted_types=(list, np.ndarray,))
+        if isinstance(imgs, np.ndarray): imgs = np.array(imgs)
+        for img_cnt in range(imgs.shape[0]):
+            img = imgs[img_cnt]
+            check_instance(source=f"{ImageMixin().brightness_intensity.__name__} img {img_cnt}", instance=img, accepted_types=np.ndarray)
             if len(img) == 0:
                 results.append(0)
             else:
@@ -92,7 +89,10 @@ class ImageMixin(object):
                     results.append(np.ceil(np.average(img[img != 0])))
                 else:
                     results.append(np.ceil(np.average(img)))
-        return results
+        b = np.array(results).astype(np.float32)
+        timer.stop_timer()
+        if verbose: print(f'Brightness computed in {b.shape[0]} images (elapsed time {timer.elapsed_time_str}s)')
     @staticmethod
     def gaussian_blur(img: np.ndarray, kernel_size: Optional[Tuple] = (9, 9)) -> np.ndarray:
@@ -1898,7 +1898,7 @@ class ImageMixin(object):
         :rtype: np.ndarray
         :example:
-        >>> VIDEO_PATH = r"D:\EPM_2\EPM_1.mp4"
+        >>> VIDEO_PATH = r"D:/EPM_2/EPM_1.mp4"
         >>> img = read_img_batch_from_video(video_path=VIDEO_PATH, greyscale=True, start_frm=0, end_frm=15, core_cnt=1)
         >>> imgs = np.stack(list(img.values()))
         >>> resized_img = resize_img_stack(imgs=imgs)

simba/mixins/plotting_mixin.py CHANGED Viewed

@@ -39,7 +39,7 @@ from simba.utils.data import create_color_palette, detect_bouts
 from simba.utils.enums import Formats, Keys, Options, Paths
 from simba.utils.errors import InvalidInputError
 from simba.utils.lookups import (get_categorical_palettes, get_color_dict,
-                                 get_named_colors)
+                                 get_fonts, get_named_colors)
 from simba.utils.printing import SimbaTimer, stdout_success
 from simba.utils.read_write import (find_files_of_filetypes_in_directory,
                                     get_fn_ext, get_video_meta_data, read_df,
@@ -342,16 +342,28 @@ class PlottingMixin(object):
                         height: int = 480,
                         font_size: int = 8,
                         font_rotation: int = 45,
+                        font: Optional[str] = None,
                         save_path: Optional[str] = None,
+                        edge_clr: Optional[str] = 'black',
                         hhmmss: bool = False) -> Union[None, np.ndarray]:
         video_timer = SimbaTimer(start=True)
         colour_tuple_x = list(np.arange(3.5, 203.5, 5))
+        original_font_family = copy(plt.rcParams['font.family']) if isinstance(plt.rcParams['font.family'], list) else plt.rcParams['font.family']
+        if font is not None:
+            available_fonts = get_fonts()
+            if font in available_fonts:
+                matplotlib.font_manager._get_font.cache_clear()
+                plt.rcParams['font.family'] = font
+            else:
+                matplotlib.font_manager._get_font.cache_clear()
+                plt.rcParams['font.family'] = [font, 'sans-serif']
         fig, ax = plt.subplots()
-        fig.patch.set_facecolor('#fafafa')
-        ax.set_facecolor('#ffffff')
         fig.patch.set_facecolor('white')
-        plt.title(video_name, fontsize=font_size + 6, pad=15, fontweight='bold')
+        plt.title(video_name, fontsize=font_size + 6, pad=25, fontweight='bold')
         ax.spines['top'].set_visible(False)
         ax.spines['right'].set_visible(False)
         ax.spines['left'].set_color('#666666')
@@ -367,7 +379,7 @@ class PlottingMixin(object):
                 if event[0] == x:
                     ix = clf_names.index(x)
                     data_event = event[1][["Start_time", "Bout_time"]]
-                    ax.broken_barh(data_event.values, (colour_tuple_x[ix], 3), facecolors=palette[ix])
+                    ax.broken_barh(data_event.values, (colour_tuple_x[ix], 3), facecolors=palette[ix], edgecolor=edge_clr, linewidth=0.5, alpha=0.9)
         x_ticks_seconds = np.round(np.linspace(0, x_length / fps, 6))
         x_ticks_locs = x_ticks_seconds
@@ -375,18 +387,19 @@ class PlottingMixin(object):
         if hhmmss:
             x_lbls = [seconds_to_timestamp(sec) for sec in x_ticks_seconds]
         else:
-            x_lbls = x_ticks_seconds
+            x_lbls = [int(x) for x in x_ticks_seconds]
-        #x_ticks_locs = x_lbls = np.round(np.linspace(0, x_length / fps, 6))
         ax.set_xticks(x_ticks_locs)
         ax.set_xticklabels(x_lbls)
         ax.set_ylim(0, colour_tuple_x[len(clf_names)])
         ax.set_yticks(np.arange(5, 5 * len(clf_names) + 1, 5))
-        ax.set_yticklabels(clf_names, rotation=font_rotation)
+        ax.set_yticklabels(clf_names, rotation=font_rotation, ha='right', va='center')
         ax.tick_params(axis="both", labelsize=font_size)
         plt.xlabel(x_label, fontsize=font_size + 3)
-        ax.yaxis.grid(True, linewidth=1.5, color='gray', alpha=0.4, linestyle='--')
-        #ax.yaxis.grid(True)
+        ax.grid(True, axis='both', linewidth=1.0, color='gray', alpha=0.2, linestyle='--', which='major')
+        plt.subplots_adjust(left=0.1, right=0.95, top=0.9, bottom=0.15)
+        plt.tight_layout()
         buffer_ = io.BytesIO()
         plt.savefig(buffer_, format="png")
         buffer_.seek(0)
@@ -397,6 +410,11 @@ class PlottingMixin(object):
         frame = np.uint8(open_cv_image)
         buffer_.close()
         plt.close('all')
+        if font is not None:
+            plt.rcParams['font.family'] = original_font_family
+            matplotlib.font_manager._get_font.cache_clear()
         if save_path is not None:
             cv2.imwrite(save_path, frame)
             video_timer.stop_timer()

simba/mixins/statistics_mixin.py CHANGED Viewed

@@ -3278,10 +3278,34 @@ class Statistics(FeatureExtractionMixin):
         Youden's J statistic is a measure of the overall performance of a binary classification test, taking into account both sensitivity (true positive rate) and specificity (true negative rate).
-        :param sample_1: The first binary array.
-        :param sample_2: The second binary array.
-        :return: Youden's J statistic.
+        The Youden's J statistic is calculated as:
+        .. math::
+            J = \text{sensitivity} + \text{specificity} - 1
+        where:
+        - :math:`\text{sensitivity} = \frac{TP}{TP + FN}` is the true positive rate
+        - :math:`\text{specificity} = \frac{TN}{TN + FP}` is the true negative rate
+        The statistic ranges from -1 to 1, where:
+        - :math:`J = 1` indicates perfect classification
+        - :math:`J = 0` indicates the test performs no better than random
+        - :math:`J < 0` indicates the test performs worse than random
+        :param sample_1: The first binary array (ground truth or reference).
+        :param sample_2: The second binary array (predictions or test results).
+        :return: Youden's J statistic. Returns NaN if either sensitivity or specificity cannot be calculated (division by zero).
         :rtype: float
+        :references:
+            .. [1] Youden, W. J. (1950). Index for rating diagnostic tests. Cancer, 3(1), 32-35.
+                   https://acsjournals.onlinelibrary.wiley.com/doi/abs/10.1002/1097-0142(1950)3:1%3C32::AID-CNCR2820030106%3E3.0.CO;2-3
+        :example:
+        >>> y_true = np.array([1, 1, 0, 0, 1, 0, 1, 1, 0, 0])
+        >>> y_pred = np.array([1, 1, 0, 1, 1, 0, 1, 0, 0, 0])
+        >>> j = Statistics.youden_j(sample_1=y_true, sample_2=y_pred)
         """
         check_valid_array(data=sample_1, source=f'{Statistics.youden_j.__name__} sample_1', accepted_ndims=(1,), accepted_values=[0, 1])
@@ -4257,7 +4281,7 @@ class Statistics(FeatureExtractionMixin):
         return separation_trace / compactness
     @staticmethod
-    def i_index(x: np.ndarray, y: np.ndarray):
+    def i_index(x: np.ndarray, y: np.ndarray, verbose: bool = False) -> float:
         """
         Calculate the I-Index for evaluating clustering quality.
@@ -4282,9 +4306,10 @@ class Statistics(FeatureExtractionMixin):
         >>> X, y = make_blobs(n_samples=5000, centers=20, n_features=3, random_state=0, cluster_std=0.1)
         >>> Statistics.i_index(x=X, y=y)
         """
+        timer = SimbaTimer(start=True)
         check_valid_array(data=x, accepted_ndims=(2,), accepted_dtypes=Formats.NUMERIC_DTYPES.value)
         check_valid_array(data=y, accepted_ndims=(1,), accepted_dtypes=Formats.NUMERIC_DTYPES.value, accepted_axis_0_shape=[x.shape[0], ])
-        _ = get_unique_values_in_iterable(data=y, name=Statistics.i_index.__name__, min=2)
+        cluster_cnt = get_unique_values_in_iterable(data=y, name=Statistics.i_index.__name__, min=2)
         unique_y = np.unique(y)
         n_y = unique_y.shape[0]
         global_centroid = np.mean(x, axis=0)
@@ -4296,7 +4321,12 @@ class Statistics(FeatureExtractionMixin):
             cluster_centroid = np.mean(cluster_obs, axis=0)
             swc += np.sum(np.linalg.norm(cluster_obs - cluster_centroid, axis=1) ** 2)
-        return sst / (n_y * swc)
+        i_index = np.float32(sst / (n_y * swc))
+        timer.stop_timer()
+        if verbose: print(f'I-index for {x.shape[0]} observations in {cluster_cnt} clusters computed (elapsed time: {timer.elapsed_time_str}s)')
+        return i_index
     @staticmethod
     def sd_index(x: np.ndarray, y: np.ndarray) -> float:
@@ -5298,7 +5328,7 @@ class Statistics(FeatureExtractionMixin):
         """
         Compute one-way ANOVAs comparing each column (axis 1) on two arrays.
-        .. notes::
+        .. note::
            Use for computing and presenting aggregate statistics. Not suitable for featurization.
         .. seealso::
@@ -5336,7 +5366,7 @@ class Statistics(FeatureExtractionMixin):
         """
         Compute Kruskal-Wallis comparing each column (axis 1) on two arrays.
-        .. notes::
+        .. note::
            Use for computing and presenting aggregate statistics. Not suitable for featurization.
         .. seealso::
@@ -5373,7 +5403,7 @@ class Statistics(FeatureExtractionMixin):
         """
         Compute pairwise grouped Tukey-HSD tests.
-        .. notes::
+        .. note::
            Use for computing and presenting aggregate statistics. Not suitable for featurization.
         :param np.ndarray data: 2D array  with observations rowwise (axis 0) and features columnwise (axis 1)

simba/mixins/timeseries_features_mixin.py CHANGED Viewed

@@ -2198,7 +2198,7 @@ class TimeseriesFeatureMixin(object):
         :example:
         >>> x = np.random.randint(0, 100, (400, 2))
         >>> results_1 = TimeseriesFeatureMixin.sliding_entropy_of_directional_changes(x=x, bins=16, window_size=5.0, sample_rate=30)
-        >>> x = pd.read_csv(r"C:\troubleshooting\two_black_animals_14bp\project_folder\csv\input_csv\Together_1.csv")[['Ear_left_1_x', 'Ear_left_1_y']].values
+        >>> x = pd.read_csv(r"C:/troubleshooting/two_black_animals_14bp/project_folder/csv/input_csv/Together_1.csv")[['Ear_left_1_x', 'Ear_left_1_y']].values
         >>> results_2 = TimeseriesFeatureMixin.sliding_entropy_of_directional_changes(x=x, bins=16, window_size=5.0, sample_rate=30)
         """

simba/mixins/train_model_mixin.py CHANGED Viewed

@@ -77,10 +77,10 @@ from simba.utils.errors import (ClassifierInferenceError, CorruptedFileError,
                                 SamplingError, SimBAModuleNotFoundError)
 from simba.utils.lookups import get_meta_data_file_headers, get_table
 from simba.utils.printing import SimbaTimer, stdout_success
-from simba.utils.read_write import (find_core_cnt, get_fn_ext,
-                                    get_memory_usage_of_df, get_pkg_version,
-                                    read_config_entry, read_df, read_meta_file,
-                                    str_2_bool)
+from simba.utils.read_write import (find_core_cnt, get_current_time,
+                                    get_fn_ext, get_memory_usage_of_df,
+                                    get_pkg_version, read_config_entry,
+                                    read_df, read_meta_file, str_2_bool)
 from simba.utils.warnings import (GPUToolsWarning, MissingUserInputWarning,
                                   MultiProcessingFailedWarning,
                                   NoModuleWarning, NotEnoughDataWarning,
@@ -1383,18 +1383,39 @@ class TrainModelMixin(object):
                           x_df: Union[pd.DataFrame, np.ndarray],
                           multiclass: bool = False,
                           model_name: Optional[str] = None,
-                          data_path: Optional[Union[str, os.PathLike]] = None) -> np.ndarray:
+                          data_path: Optional[Union[str, os.PathLike]] = None,
+                          verbose: bool = False) -> np.ndarray:
         """
-        :param RandomForestClassifier clf: Random forest classifier object
-        :param Union[pd.DataFrame, np.ndarray] x_df: Features for data to predict as a dataframe or array of size (M,N).
-        :param bool multiclass: If True, the classifier predicts more than 2 targets. Else, boolean classifier.
-        :param Optional[str] model_name: Name of model
-        :param Optional[str] data_path: Path to model on disk
-        :return np.ndarray: 2D array with frame represented by rows and present/absent probabilities as columns
-        :raises FeatureNumberMismatchError: If shape of x_df and clf.n_features_ or n_features_in_ show mismatch
+        Helper to predict class probabilities using a fitted random forest classifier.
+        Computes prediction probabilities for binary or multiclass classification using either
+        scikit-learn or cuML RandomForestClassifier. For binary classifiers, returns the
+        probability of the positive class (class 1). For multiclass classifiers, returns
+        probabilities for all classes.
+        .. csv-table::
+           :header: EXPECTED RUNTIMES
+           :file: ../../docs/tables/clf_predict_proba.csv
+           :widths: 10, 45, 45
+           :align: center
+           :header-rows: 1
+        .. seealso::
+           To fit a classifier, see :func:`simba.mixins.train_model_mixin.TrainModelMixin.clf_fit`
+           To define a classifier, see :func:`simba.mixins.train_model_mixin.TrainModelMixin.clf_define`
+        :param Union[RandomForestClassifier, cuRF] clf: Fitted random forest classifier object from sklearn or cuml.
+        :param Union[pd.DataFrame, np.ndarray] x_df: Features for data to predict. DataFrame or array of shape (n_samples, n_features).
+        :param bool multiclass: If True, the classifier predicts more than 2 classes. If False, binary classifier (default: False).
+        :param Optional[str] model_name: Name of the model for error messages and logging. Default: None.
+        :param Optional[Union[str, os.PathLike]] data_path: Path to the data file being processed, used in error messages. Default: None.
+        :param bool verbose: If True, print inference progress and timing information. Default: False.
+        :return np.ndarray: Prediction probabilities. For binary classifiers: 1D array of shape (n_samples,) with probability of positive class. For multiclass: 2D array of shape (n_samples, n_classes) with probabilities for each class.
         """
+        timer = SimbaTimer(start=True)
         if hasattr(clf, "n_features_"):
             clf_n_features = clf.n_features_
         elif hasattr(clf, "n_features_in_"):
@@ -1420,6 +1441,8 @@ class TrainModelMixin(object):
         p_vals = clf.predict_proba(x_df)
         if multiclass and (clf.n_classes_ != p_vals.shape[1]):
             raise ClassifierInferenceError(msg=f"The classifier {model_name} (data path: {data_path}) is a multiclassifier expected to create {clf.n_classes_} behavior probabilities. However, it produced probabilities for {p_vals.shape[1]} behaviors. See The SimBA GitHub FAQ page or Gitter for more information and suggested fixes.", source=self.__class__.__name__)
+        timer.stop_timer()
+        if verbose: print(f'Inference for model {model_name} over {x_df.shape[0]} observations complete ({timer.elapsed_time_str}s).')
         if not multiclass:
             if isinstance(p_vals, pd.DataFrame):
                 return p_vals[1].values
@@ -1447,7 +1470,7 @@ class TrainModelMixin(object):
                    bootstrap: Optional[bool] = True,
                    verbose: Optional[int] = 1,
                    class_weight: Optional[dict] = None,
-                   cuda: Optional[bool] = False) -> RandomForestClassifier:
+                   cuda: Optional[bool] = False) -> Union[RandomForestClassifier, cuRF]:
         if not cuda:
             # NOTE: LOKY ISSUES ON WINDOWS WITH SCIKIT IF THE CORE COUNT EXCEEDS 61.
@@ -1482,20 +1505,32 @@ class TrainModelMixin(object):
                 clf: Union[RandomForestClassifier, cuRF],
                 x_df: pd.DataFrame,
                 y_df: pd.DataFrame,
-                ) -> RandomForestClassifier:
+                verbose: bool = False) -> Union[RandomForestClassifier, cuRF]:
         """
-        Helper to fit clf model
+        Helper to fit clf model.
-        :param clf: Un-fitted random forest classifier object
+        .. csv-table::
+           :header: EXPECTED RUNTIMES
+           :file: ../../docs/tables/clf_fit.csv
+           :widths: 20, 20, 30, 30
+           :align: center
+           :header-rows: 1
+        .. seealso::
+           To define a cuml/sklearn object, see :func:`simba.mixins.train_model_mixin.TrainModelMixin.clf_define`
+        :param clf: Un-fitted random forest classifier object, either from sklearn or cuml.
         :param pd.DataFrame x_df: Pandas dataframe with features.
         :param pd.DataFrame y_df: Pandas dataframe/Series with target
         :return: Fitted random forest classifier object
         :rtype: RandomForestClassifier
         """
+        timer = SimbaTimer(start=True)
         nan_features = x_df[~x_df.applymap(np.isreal).all(1)]
         nan_target = y_df.loc[pd.to_numeric(y_df).isna()]
+        using_cuda = True if CUML in str(clf.__class__.__module__).lower() else False
         if len(nan_features) > 0:
             raise FaultyTrainingSetError(
                 msg=f"{len(nan_features)} frame(s) in your project_folder/csv/targets_inserted directory contains FEATURES with non-numerical values",
@@ -1504,9 +1539,16 @@ class TrainModelMixin(object):
             raise FaultyTrainingSetError(
                 msg=f"{len(nan_target)} frame(s) in your project_folder/csv/targets_inserted directory contains ANNOTATIONS with non-numerical values",
                 source=self.__class__.__name__)
+        if verbose: print(f'[{get_current_time()}] Fitting classifier for {len(x_df)} observations (cuda: {"True" if using_cuda else "False"})...')
+        if using_cuda:
+            x_data = x_df.values if isinstance(x_df, pd.DataFrame) else x_df
+            y_data = y_df.values if isinstance(y_df, (pd.DataFrame, pd.Series)) else y_df
+            clf.fit(x_data, y_data)
+        else:
+            clf.fit(x_df, y_df)
-        clf.fit(x_df, y_df)
+        timer.stop_timer()
+        if verbose: print(f'[{get_current_time()}] Classifier fitted in {timer.elapsed_time_str}s.')
         return clf
     @staticmethod
@@ -1563,9 +1605,7 @@ class TrainModelMixin(object):
         :rtype: Tuple[pd.DataFrame, List[int]]
         """
-        if (platform.system() == "Darwin") and (
-                multiprocessing.get_start_method() != "spawn"
-        ):
+        if (platform.system() == "Darwin") and (multiprocessing.get_start_method() != "spawn"):
             multiprocessing.set_start_method("spawn", force=True)
         cpu_cnt, _ = find_core_cnt()
         df_lst, frame_numbers_lst = [], []
@@ -1592,9 +1632,7 @@ class TrainModelMixin(object):
                         :, ~df_concat.columns.str.contains("^Unnamed")
                         ].astype(np.float32)
             memory_size = get_memory_usage_of_df(df=df_concat)
-            print(
-                f'Dataset size: {memory_size["megabytes"]}MB / {memory_size["gigabytes"]}GB'
-            )
+            print(f'Dataset size: {memory_size["megabytes"]}MB / {memory_size["gigabytes"]}GB')
             return df_concat, frame_numbers_lst
@@ -2607,9 +2645,9 @@ class TrainModelMixin(object):
         :param bool plot: If True, create SHAP aggregation and plots.
         :example:
-        >>> CONFIG_PATH = r"C:\troubleshooting\mitra\project_folder\project_config.ini"
-        >>> RF_PATH = r"C:\troubleshooting\mitra\models\validations\straub_tail_5_new\straub_tail_5.sav"
-        >>> DATA_PATH = r"C:\troubleshooting\mitra\project_folder\csv\targets_inserted\new_straub\appended\501_MA142_Gi_CNO_0514.csv"
+        >>> CONFIG_PATH = r"C:/troubleshooting/mitra/project_folder/project_config.ini"
+        >>> RF_PATH = r"C:/troubleshooting/mitra/models/validations/straub_tail_5_new/straub_tail_5.sav"
+        >>> DATA_PATH = r"C:/troubleshooting/mitra/project_folder/csv/targets_inserted/new_straub/appended/501_MA142_Gi_CNO_0514.csv"
         >>> config = ConfigReader(config_path=CONFIG_PATH)
         >>> df = read_df(file_path=DATA_PATH, file_type='csv')
         >>> y = df['straub_tail']

simba/model/inference_batch.py CHANGED Viewed

@@ -45,7 +45,7 @@ class InferenceBatch(TrainModelMixin, ConfigReader):
     >>> inferencer.run()
     :example II:
-    >>> inferencer = InferenceBatch(config_path=r"D:\troubleshooting\mitra\project_folder\project_config.ini", features_dir=r"D:\troubleshooting\mitra\project_folder\videos\bg_removed\rotated\tail_features\APPENDED")
+    >>> inferencer = InferenceBatch(config_path=r"D:/troubleshooting/mitra/project_folder/project_config.ini", features_dir=r"D:/troubleshooting/mitra/project_folder/videos/bg_removed/rotated/tail_features/APPENDED")
     >>> inferencer.run()
     """

simba/model/yolo_seg_inference.py CHANGED Viewed

@@ -55,9 +55,9 @@ class YOLOSegmentationInference():
        To visualize the segmentation results, see :func:`simba.plotting.yolo_seg_visualizer.YOLOSegmentationVisualizer`
     :example:
-    >>> weights_path = r"D:\platea\yolo_071525\mdl\train3\weights\best.pt"
-    >>> video_path = r"D:\platea\platea_videos\videos\clipped\10B_Mouse_5-choice_MustTouchTrainingNEWFINAL_a7.mp4"
-    >>> save_dir=r"D:\platea\platea_videos\videos\yolo_results"
+    >>> weights_path = r"D:/platea/yolo_071525/mdl/train3/weights/best.pt"
+    >>> video_path = r"D:/platea/platea_videos/videos/clipped/10B_Mouse_5-choice_MustTouchTrainingNEWFINAL_a7.mp4"
+    >>> save_dir = r"D:/platea/platea_videos/videos/yolo_results"
     >>> runner = YOLOSegmentationInference(weights_path=weights_path, video_path=video_path, save_dir=save_dir, verbose=True, device=0, format=None, stream=True, batch_size=10, imgsz=320, interpolate=True, threshold=0.8, retina_msk=True)
     >>> runner.run()

simba/outlier_tools/skip_outlier_correction.py CHANGED Viewed

@@ -47,5 +47,5 @@ class OutlierCorrectionSkipper(ConfigReader):
         self.timer.stop_timer()
         stdout_success(msg=f"Skipped outlier correction for {len(self.input_csv_paths)} files", elapsed_time=self.timer.elapsed_time_str)
-# test = OutlierCorrectionSkipper(config_path='/Users/simon/Desktop/envs/troubleshooting/naresh/project_folder/project_config.ini')
+# test = OutlierCorrectionSkipper(config_path=r"E:\troubleshooting\mitra_emergence\project_folder\project_config.ini")
 # test.run()

simba-uw-tf-dev 4.6.6__py3-none-any.whl → 4.6.8__py3-none-any.whl

Potentially problematic release.

simba-uw-tf-dev 4.6.6py3-none-any.whl → 4.6.8py3-none-any.whl