PyPI - sleap-nn - Versions diffs - 0.0.5__py3-none-any.whl → 0.1.0a0__py3-none-any.whl - Mend

sleap-nn 0.0.5py3-none-any.whl → 0.1.0a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

sleap_nn/__init__.py +6 -1
sleap_nn/cli.py +142 -3
sleap_nn/config/data_config.py +44 -7
sleap_nn/config/get_config.py +22 -20
sleap_nn/config/trainer_config.py +12 -0
sleap_nn/data/augmentation.py +54 -2
sleap_nn/data/custom_datasets.py +22 -22
sleap_nn/data/instance_cropping.py +70 -5
sleap_nn/data/normalization.py +45 -2
sleap_nn/data/providers.py +26 -0
sleap_nn/evaluation.py +99 -23
sleap_nn/inference/__init__.py +6 -0
sleap_nn/inference/peak_finding.py +10 -2
sleap_nn/inference/predictors.py +115 -20
sleap_nn/inference/provenance.py +292 -0
sleap_nn/inference/topdown.py +55 -47
sleap_nn/predict.py +187 -10
sleap_nn/system_info.py +443 -0
sleap_nn/tracking/tracker.py +8 -1
sleap_nn/train.py +64 -40
sleap_nn/training/callbacks.py +317 -5
sleap_nn/training/lightning_modules.py +325 -180
sleap_nn/training/model_trainer.py +308 -22
sleap_nn/training/utils.py +367 -2
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/METADATA +22 -32
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/RECORD +30 -28
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/WHEEL +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/top_level.txt +0 -0

sleap_nn/inference/topdown.py CHANGED Viewed

@@ -47,9 +47,6 @@ class CentroidCrop(L.LightningModule):
         crop_hw: Tuple (height, width) representing the crop size.
         input_scale: Float indicating if the images should be resized before being
             passed to the model.
-        precrop_resize: Float indicating the factor by which the original images
-            (not images resized for centroid model) should be resized before cropping.
-            Note: This resize happens only after getting the predictions for centroid model.
         max_stride: Maximum stride in a model that the images must be divisible by.
             If > 1, this will pad the bottom and right of the images to ensure they meet
             this divisibility criteria. Padding is applied after the scaling specified
@@ -74,7 +71,6 @@ class CentroidCrop(L.LightningModule):
         return_crops: bool = False,
         crop_hw: Optional[List[int]] = None,
         input_scale: float = 1.0,
-        precrop_resize: float = 1.0,
         max_stride: int = 1,
         use_gt_centroids: bool = False,
         anchor_ind: Optional[int] = None,
@@ -92,22 +88,25 @@ class CentroidCrop(L.LightningModule):
         self.return_crops = return_crops
         self.crop_hw = crop_hw
         self.input_scale = input_scale
-        self.precrop_resize = precrop_resize
         self.max_stride = max_stride
         self.use_gt_centroids = use_gt_centroids
         self.anchor_ind = anchor_ind
-    def _generate_crops(self, inputs):
+    def _generate_crops(self, inputs, cms: Optional[torch.Tensor] = None):
         """Generate Crops from the predicted centroids."""
         crops_dict = []
-        for centroid, centroid_val, image, fidx, vidx, sz, eff_sc in zip(
-            self.refined_peaks_batched,
-            self.peak_vals_batched,
-            inputs["image"],
-            inputs["frame_idx"],
-            inputs["video_idx"],
-            inputs["orig_size"],
-            inputs["eff_scale"],
+        if cms is not None:
+            cms = cms.detach()
+        for idx, (centroid, centroid_val, image, fidx, vidx, sz, eff_sc) in enumerate(
+            zip(
+                self.refined_peaks_batched,
+                self.peak_vals_batched,
+                inputs["image"],
+                inputs["frame_idx"],
+                inputs["video_idx"],
+                inputs["orig_size"],
+                inputs["eff_scale"],
+            )
         ):
             if torch.any(torch.isnan(centroid)):
                 if torch.all(torch.isnan(centroid)):
@@ -149,6 +148,11 @@ class CentroidCrop(L.LightningModule):
             ex["instance_image"] = instance_image.unsqueeze(dim=1)
             ex["orig_size"] = torch.cat([torch.Tensor(sz)] * n)
             ex["eff_scale"] = torch.Tensor([eff_sc] * n)
+            ex["pred_centroids"] = centroid
+            if self.return_confmaps:
+                ex["pred_centroid_confmaps"] = torch.cat(
+                    [cms[idx].unsqueeze(dim=0)] * n
+                )
             crops_dict.append(ex)
         return crops_dict
@@ -204,12 +208,6 @@ class CentroidCrop(L.LightningModule):
             if self.return_crops:
                 crops_dict = self._generate_crops(inputs)
-                inputs["image"] = resize_image(inputs["image"], self.precrop_resize)
-                inputs["centroids"] *= self.precrop_resize
-                scaled_refined_peaks = []
-                for ref_peak in self.refined_peaks_batched:
-                    scaled_refined_peaks.append(ref_peak * self.precrop_resize)
-                self.refined_peaks_batched = scaled_refined_peaks
                 return crops_dict
             else:
                 return inputs
@@ -274,19 +272,13 @@ class CentroidCrop(L.LightningModule):
             # Generate crops if return_crops=True to pass the crops to CenteredInstance model.
             if self.return_crops:
-                inputs["image"] = resize_image(inputs["image"], self.precrop_resize)
-                scaled_refined_peaks = []
-                for ref_peak in self.refined_peaks_batched:
-                    scaled_refined_peaks.append(ref_peak * self.precrop_resize)
-                self.refined_peaks_batched = scaled_refined_peaks
                 inputs.update(
                     {
                         "centroids": self.refined_peaks_batched,
                         "centroid_vals": self.peak_vals_batched,
                     }
                 )
-                crops_dict = self._generate_crops(inputs)
+                crops_dict = self._generate_crops(inputs, cms)
                 return crops_dict
             else:
                 # batch the peaks to pass it to FindInstancePeaksGroundTruth class.
@@ -359,7 +351,11 @@ class FindInstancePeaksGroundTruth(L.LightningModule):
     def forward(self, batch: Dict[str, torch.Tensor]) -> Dict[str, np.array]:
         """Return the ground truth instance peaks given a set of crops."""
-        b, _, max_inst, nodes, _ = batch["instances"].shape
+        b, _, _, nodes, _ = batch["instances"].shape
+        # Use number of centroids as max_inst to ensure consistent output shape
+        # This handles the case where max_instances limits centroids but instances
+        # tensor has a different (global) max_instances from the labels file
+        num_centroids = batch["centroids"].shape[2]
         inst = (
             batch["instances"].unsqueeze(dim=-4).float()
         )  # (batch, 1, 1, n_inst, nodes, 2)
@@ -389,26 +385,26 @@ class FindInstancePeaksGroundTruth(L.LightningModule):
         parsed = 0
         for i in range(b):
             if i not in matched_batch_inds:
-                batch_peaks = torch.full((max_inst, nodes, 2), torch.nan)
-                vals = torch.full((max_inst, nodes), torch.nan)
+                batch_peaks = torch.full((num_centroids, nodes, 2), torch.nan)
+                vals = torch.full((num_centroids, nodes), torch.nan)
             else:
                 c = counts[i]
                 batch_peaks = peaks_list[parsed : parsed + c]
                 num_inst = len(batch_peaks)
                 vals = torch.ones((num_inst, nodes))
-                if c < max_inst:
+                if c < num_centroids:
                     batch_peaks = torch.cat(
                         [
                             batch_peaks,
-                            torch.full((max_inst - num_inst, nodes, 2), torch.nan),
+                            torch.full((num_centroids - num_inst, nodes, 2), torch.nan),
                         ]
                     )
                     vals = torch.cat(
-                        [vals, torch.full((max_inst - num_inst, nodes), torch.nan)]
+                        [vals, torch.full((num_centroids - num_inst, nodes), torch.nan)]
                     )
                 else:
-                    batch_peaks = batch_peaks[:max_inst]
-                    vals = vals[:max_inst]
+                    batch_peaks = batch_peaks[:num_centroids]
+                    vals = vals[:num_centroids]
                 parsed += c
             batch_peaks = batch_peaks.unsqueeze(dim=0)
@@ -432,33 +428,45 @@ class FindInstancePeaksGroundTruth(L.LightningModule):
         peaks_output["pred_instance_peaks"] = peaks
         peaks_output["pred_peak_values"] = peaks_vals
-        batch_size, num_centroids = (
-            batch["centroids"].shape[0],
-            batch["centroids"].shape[2],
-        )
+        batch_size = batch["centroids"].shape[0]
         output_dict = {}
         output_dict["centroid"] = batch["centroids"].squeeze(dim=1).reshape(-1, 1, 2)
         output_dict["centroid_val"] = batch["centroid_vals"].reshape(-1)
-        output_dict["pred_instance_peaks"] = batch["pred_instance_peaks"].reshape(
-            -1, nodes, 2
+        output_dict["pred_instance_peaks"] = peaks_output[
+            "pred_instance_peaks"
+        ].reshape(-1, nodes, 2)
+        output_dict["pred_peak_values"] = peaks_output["pred_peak_values"].reshape(
+            -1, nodes
         )
-        output_dict["pred_peak_values"] = batch["pred_peak_values"].reshape(-1, nodes)
         output_dict["instance_bbox"] = torch.zeros(
             (batch_size * num_centroids, 1, 4, 2)
         )
         frame_inds = []
         video_inds = []
         orig_szs = []
+        images = []
+        centroid_confmaps = []
         for b_idx in range(b):
             curr_batch_size = len(batch["centroids"][b_idx][0])
             frame_inds.extend([batch["frame_idx"][b_idx]] * curr_batch_size)
             video_inds.extend([batch["video_idx"][b_idx]] * curr_batch_size)
             orig_szs.append(torch.cat([batch["orig_size"][b_idx]] * curr_batch_size))
+            images.append(
+                batch["image"][b_idx].unsqueeze(0).repeat(curr_batch_size, 1, 1, 1, 1)
+            )
+            if "pred_centroid_confmaps" in batch:
+                centroid_confmaps.append(
+                    batch["pred_centroid_confmaps"][b_idx]
+                    .unsqueeze(0)
+                    .repeat(curr_batch_size, 1, 1, 1)
+                )
         output_dict["frame_idx"] = torch.tensor(frame_inds)
         output_dict["video_idx"] = torch.tensor(video_inds)
         output_dict["orig_size"] = torch.concatenate(orig_szs, dim=0)
+        output_dict["image"] = torch.cat(images, dim=0)
+        if centroid_confmaps:
+            output_dict["pred_centroid_confmaps"] = torch.cat(centroid_confmaps, dim=0)
         return output_dict
@@ -548,6 +556,8 @@ class FindInstancePeaks(L.LightningModule):
         # Network forward pass.
         # resize and pad the input image
         input_image = inputs["instance_image"]
+        # resize the crop image
+        input_image = resize_image(input_image, self.input_scale)
         if self.max_stride != 1:
             input_image = apply_pad_to_stride(input_image, self.max_stride)
@@ -569,8 +579,6 @@ class FindInstancePeaks(L.LightningModule):
             inputs["eff_scale"].unsqueeze(dim=1).unsqueeze(dim=2).to(peak_points.device)
         )
-        inputs["instance_bbox"] = inputs["instance_bbox"] / self.input_scale
         inputs["instance_bbox"] = inputs["instance_bbox"] / (
             inputs["eff_scale"]
             .unsqueeze(dim=1)
@@ -679,6 +687,8 @@ class TopDownMultiClassFindInstancePeaks(L.LightningModule):
         # Network forward pass.
         # resize and pad the input image
         input_image = inputs["instance_image"]
+        # resize the crop image
+        input_image = resize_image(input_image, self.input_scale)
         if self.max_stride != 1:
             input_image = apply_pad_to_stride(input_image, self.max_stride)
@@ -702,8 +712,6 @@ class TopDownMultiClassFindInstancePeaks(L.LightningModule):
             inputs["eff_scale"].unsqueeze(dim=1).unsqueeze(dim=2).to(peak_points.device)
         )
-        inputs["instance_bbox"] = inputs["instance_bbox"] / self.input_scale
         inputs["instance_bbox"] = inputs["instance_bbox"] / (
             inputs["eff_scale"]
             .unsqueeze(dim=1)

sleap_nn/predict.py CHANGED Viewed

@@ -15,6 +15,11 @@ from sleap_nn.tracking.tracker import (
     connect_single_breaks,
     cull_instances,
 )
+from sleap_nn.system_info import get_startup_info_string
+from sleap_nn.inference.provenance import (
+    build_inference_provenance,
+    build_tracking_only_provenance,
+)
 from omegaconf import OmegaConf
 import sleap_io as sio
 from pathlib import Path
@@ -58,6 +63,8 @@ def run_inference(
     anchor_part: Optional[str] = None,
     only_labeled_frames: bool = False,
     only_suggested_frames: bool = False,
+    exclude_user_labeled: bool = False,
+    only_predicted_frames: bool = False,
     no_empty_frames: bool = False,
     batch_size: int = 4,
     queue_maxsize: int = 8,
@@ -136,6 +143,8 @@ def run_inference(
                 provided, the anchor part in the `training_config.yaml` is used. Default: `None`.
         only_labeled_frames: (bool) `True` if inference should be run only on user-labeled frames. Default: `False`.
         only_suggested_frames: (bool) `True` if inference should be run only on unlabeled suggested frames. Default: `False`.
+        exclude_user_labeled: (bool) `True` to skip frames that have user-labeled instances. Default: `False`.
+        only_predicted_frames: (bool) `True` to run inference only on frames that already have predictions. Default: `False`.
         no_empty_frames: (bool) `True` if empty frames that did not have predictions should be cleared before saving to output. Default: `False`.
         batch_size: (int) Number of samples per batch. Default: 4.
         queue_maxsize: (int) Maximum size of the frame buffer queue. Default: 8.
@@ -145,7 +154,7 @@ def run_inference(
         video_input_format: (str) The input_format for HDF5 videos.
         frames: (list) List of frames indices. If `None`, all frames in the video are used. Default: None.
         crop_size: (int) Crop size. If not provided, the crop size from training_config.yaml is used.
-                Default: None.
+                If `input_scale` is provided, then the cropped image will be resized according to `input_scale`. Default: None.
         peak_threshold: (float) Minimum confidence threshold. Peaks with values below
                 this will be ignored. Default: 0.2. This can also be `List[float]` for topdown
                 centroid and centered-instance model, where the first element corresponds
@@ -256,6 +265,27 @@ def run_inference(
         "scale": input_scale,
     }
+    # Validate mutually exclusive frame filter flags
+    if only_labeled_frames and exclude_user_labeled:
+        message = (
+            "--only_labeled_frames and --exclude_user_labeled are mutually exclusive "
+            "(would result in zero frames)"
+        )
+        logger.error(message)
+        raise ValueError(message)
+    if (
+        only_predicted_frames
+        and data_path is not None
+        and not data_path.endswith(".slp")
+    ):
+        message = (
+            "--only_predicted_frames requires a .slp file input "
+            "(need Labels to know which frames have predictions)"
+        )
+        logger.error(message)
+        raise ValueError(message)
     if model_paths is None or not len(
         model_paths
     ):  # if model paths is not provided, run tracking-only pipeline.
@@ -273,7 +303,8 @@ def run_inference(
                 raise ValueError(message)
             start_inf_time = time()
-            start_timestamp = str(datetime.now())
+            start_datetime = datetime.now()
+            start_timestamp = str(start_datetime)
             logger.info(f"Started tracking at: {start_timestamp}")
             labels = sio.load_slp(data_path) if input_labels is None else input_labels
@@ -302,7 +333,22 @@ def run_inference(
             if post_connect_single_breaks or tracking_pre_cull_to_target:
                 if tracking_target_instance_count is None and max_instances is None:
-                    message = "Both tracking_target_instance_count and max_instances is set to 0. To connect single breaks or pre-cull to target, at least one of them should be set to an integer."
+                    features_requested = []
+                    if post_connect_single_breaks:
+                        features_requested.append("--post_connect_single_breaks")
+                    if tracking_pre_cull_to_target:
+                        features_requested.append("--tracking_pre_cull_to_target")
+                    features_str = " and ".join(features_requested)
+                    if max_tracks is not None:
+                        suggestion = f"Add --tracking_target_instance_count {max_tracks} to your command (using your --max_tracks value)."
+                    else:
+                        suggestion = "Add --tracking_target_instance_count N where N is the expected number of instances per frame."
+                    message = (
+                        f"{features_str} requires --tracking_target_instance_count to be set. "
+                        f"{suggestion}"
+                    )
                     logger.error(message)
                     raise ValueError(message)
                 elif tracking_target_instance_count is None:
@@ -332,21 +378,53 @@ def run_inference(
                 tracking_clean_iou_threshold=tracking_clean_iou_threshold,
             )
-            finish_timestamp = str(datetime.now())
+            end_datetime = datetime.now()
+            finish_timestamp = str(end_datetime)
             total_elapsed = time() - start_inf_time
             logger.info(f"Finished tracking at: {finish_timestamp}")
             logger.info(f"Total runtime: {total_elapsed} secs")
+            # Build tracking-only provenance
+            tracking_params = {
+                "window_size": tracking_window_size,
+                "min_new_track_points": min_new_track_points,
+                "candidates_method": candidates_method,
+                "min_match_points": min_match_points,
+                "features": features,
+                "scoring_method": scoring_method,
+                "scoring_reduction": scoring_reduction,
+                "robust_best_instance": robust_best_instance,
+                "track_matching_method": track_matching_method,
+                "max_tracks": max_tracks,
+                "use_flow": use_flow,
+                "post_connect_single_breaks": post_connect_single_breaks,
+            }
+            provenance = build_tracking_only_provenance(
+                input_labels=labels,
+                input_path=data_path,
+                start_time=start_datetime,
+                end_time=end_datetime,
+                tracking_params=tracking_params,
+                frames_processed=len(tracked_frames),
+            )
             output = sio.Labels(
                 labeled_frames=tracked_frames,
                 videos=labels.videos,
                 skeletons=labels.skeletons,
+                provenance=provenance,
             )
     else:
         start_inf_time = time()
-        start_timestamp = str(datetime.now())
+        start_datetime = datetime.now()
+        start_timestamp = str(start_datetime)
         logger.info(f"Started inference at: {start_timestamp}")
+        logger.info(get_startup_info_string())
+        # Convert device to string if it's a torch.device object
+        if hasattr(device, "type"):
+            device = str(device)
         if device == "auto":
             device = (
@@ -387,7 +465,22 @@ def run_inference(
         ):
             if post_connect_single_breaks or tracking_pre_cull_to_target:
                 if tracking_target_instance_count is None and max_instances is None:
-                    message = "Both tracking_target_instance_count and max_instances is set to 0. To connect single breaks or pre-cull to target, at least one of them should be set to an integer."
+                    features_requested = []
+                    if post_connect_single_breaks:
+                        features_requested.append("--post_connect_single_breaks")
+                    if tracking_pre_cull_to_target:
+                        features_requested.append("--tracking_pre_cull_to_target")
+                    features_str = " and ".join(features_requested)
+                    if max_tracks is not None:
+                        suggestion = f"Add --tracking_target_instance_count {max_tracks} to your command (using your --max_tracks value)."
+                    else:
+                        suggestion = "Add --tracking_target_instance_count N or --max_instances N where N is the expected number of instances per frame."
+                    message = (
+                        f"{features_str} requires --tracking_target_instance_count or --max_instances to be set. "
+                        f"{suggestion}"
+                    )
                     logger.error(message)
                     raise ValueError(message)
                 elif tracking_target_instance_count is None:
@@ -448,6 +541,8 @@ def run_inference(
             frames=frames,
             only_labeled_frames=only_labeled_frames,
             only_suggested_frames=only_suggested_frames,
+            exclude_user_labeled=exclude_user_labeled,
+            only_predicted_frames=only_predicted_frames,
             video_index=video_index,
             video_dataset=video_dataset,
             video_input_format=video_input_format,
@@ -492,12 +587,94 @@ def run_inference(
                 skeletons=output.skeletons,
             )
-        finish_timestamp = str(datetime.now())
+        end_datetime = datetime.now()
+        finish_timestamp = str(end_datetime)
         total_elapsed = time() - start_inf_time
         logger.info(f"Finished inference at: {finish_timestamp}")
-        logger.info(
-            f"Total runtime: {total_elapsed} secs"
-        )  # TODO: add number of predicted frames
+        logger.info(f"Total runtime: {total_elapsed} secs")
+        # Determine input labels for provenance preservation
+        input_labels_for_prov = None
+        if input_labels is not None:
+            input_labels_for_prov = input_labels
+        elif data_path is not None and data_path.endswith(".slp"):
+            # Load input labels to preserve provenance (if not already loaded)
+            try:
+                input_labels_for_prov = sio.load_slp(data_path)
+            except Exception:
+                pass
+        # Build inference parameters for provenance
+        inference_params = {
+            "peak_threshold": peak_threshold,
+            "integral_refinement": integral_refinement,
+            "integral_patch_size": integral_patch_size,
+            "batch_size": batch_size,
+            "max_instances": max_instances,
+            "crop_size": crop_size,
+            "input_scale": input_scale,
+            "anchor_part": anchor_part,
+        }
+        # Build tracking parameters if tracking was enabled
+        tracking_params_prov = None
+        if tracking:
+            tracking_params_prov = {
+                "window_size": tracking_window_size,
+                "min_new_track_points": min_new_track_points,
+                "candidates_method": candidates_method,
+                "min_match_points": min_match_points,
+                "features": features,
+                "scoring_method": scoring_method,
+                "scoring_reduction": scoring_reduction,
+                "robust_best_instance": robust_best_instance,
+                "track_matching_method": track_matching_method,
+                "max_tracks": max_tracks,
+                "use_flow": use_flow,
+                "post_connect_single_breaks": post_connect_single_breaks,
+            }
+        # Determine frame selection method
+        frame_selection_method = "all"
+        if only_labeled_frames:
+            frame_selection_method = "labeled"
+        elif only_suggested_frames:
+            frame_selection_method = "suggested"
+        elif only_predicted_frames:
+            frame_selection_method = "predicted"
+        elif frames is not None:
+            frame_selection_method = "specified"
+        # Determine model type from predictor class
+        predictor_type_map = {
+            "TopDownPredictor": "top_down",
+            "SingleInstancePredictor": "single_instance",
+            "BottomUpPredictor": "bottom_up",
+            "BottomUpMultiClassPredictor": "bottom_up_multi_class",
+            "TopDownMultiClassPredictor": "top_down_multi_class",
+        }
+        model_type = predictor_type_map.get(type(predictor).__name__)
+        # Build and set provenance (only for Labels objects)
+        if make_labels and isinstance(output, sio.Labels):
+            provenance = build_inference_provenance(
+                model_paths=model_paths,
+                model_type=model_type,
+                start_time=start_datetime,
+                end_time=end_datetime,
+                input_labels=input_labels_for_prov,
+                input_path=data_path,
+                frames_processed=(
+                    len(output.labeled_frames)
+                    if hasattr(output, "labeled_frames")
+                    else None
+                ),
+                frame_selection_method=frame_selection_method,
+                inference_params=inference_params,
+                tracking_params=tracking_params_prov,
+                device=device,
+            )
+            output.provenance = provenance
     if no_empty_frames:
         output.clean(frames=True, skeletons=False)

sleap-nn 0.0.5__py3-none-any.whl → 0.1.0a0__py3-none-any.whl

sleap-nn 0.0.5py3-none-any.whl → 0.1.0a0py3-none-any.whl