PyPI - sleap-nn - Versions diffs - 0.1.0a2__py3-none-any.whl → 0.1.0a4__py3-none-any.whl - Mend

sleap-nn 0.1.0a2py3-none-any.whl → 0.1.0a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

sleap_nn/__init__.py +1 -1
sleap_nn/architectures/convnext.py +5 -0
sleap_nn/architectures/encoder_decoder.py +25 -6
sleap_nn/architectures/swint.py +8 -0
sleap_nn/cli.py +168 -39
sleap_nn/evaluation.py +8 -0
sleap_nn/export/__init__.py +21 -0
sleap_nn/export/cli.py +1778 -0
sleap_nn/export/exporters/__init__.py +51 -0
sleap_nn/export/exporters/onnx_exporter.py +80 -0
sleap_nn/export/exporters/tensorrt_exporter.py +291 -0
sleap_nn/export/metadata.py +225 -0
sleap_nn/export/predictors/__init__.py +63 -0
sleap_nn/export/predictors/base.py +22 -0
sleap_nn/export/predictors/onnx.py +154 -0
sleap_nn/export/predictors/tensorrt.py +312 -0
sleap_nn/export/utils.py +307 -0
sleap_nn/export/wrappers/__init__.py +25 -0
sleap_nn/export/wrappers/base.py +96 -0
sleap_nn/export/wrappers/bottomup.py +243 -0
sleap_nn/export/wrappers/bottomup_multiclass.py +195 -0
sleap_nn/export/wrappers/centered_instance.py +56 -0
sleap_nn/export/wrappers/centroid.py +58 -0
sleap_nn/export/wrappers/single_instance.py +83 -0
sleap_nn/export/wrappers/topdown.py +180 -0
sleap_nn/export/wrappers/topdown_multiclass.py +304 -0
sleap_nn/inference/peak_finding.py +47 -17
sleap_nn/inference/postprocessing.py +284 -0
sleap_nn/inference/predictors.py +213 -106
sleap_nn/predict.py +35 -7
sleap_nn/train.py +64 -0
sleap_nn/training/callbacks.py +69 -22
sleap_nn/training/lightning_modules.py +332 -30
sleap_nn/training/model_trainer.py +67 -67
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/METADATA +13 -1
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/RECORD +40 -19
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/WHEEL +0 -0
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/top_level.txt +0 -0

sleap_nn/inference/predictors.py CHANGED Viewed

@@ -56,6 +56,8 @@ from rich.progress import (
     MofNCompleteColumn,
 )
 from time import time
+import json
+import sys
 def _filter_user_labeled_frames(
@@ -133,6 +135,8 @@ class Predictor(ABC):
             `backbone_config`. This determines the downsampling factor applied by the backbone,
             and is used to ensure that input images are padded or resized to be compatible
             with the model's architecture. Default: 16.
+        gui: If True, outputs JSON progress lines for GUI integration instead of
+            Rich progress bars. Default: False.
     """
     preprocess: bool = True
@@ -152,6 +156,7 @@ class Predictor(ABC):
     ] = None
     instances_key: bool = False
     max_stride: int = 16
+    gui: bool = False
     @classmethod
     def from_model_paths(
@@ -381,6 +386,102 @@ class Predictor(ABC):
                     v[n] = v[n].cpu().numpy()
         return output
+    def _process_batch(self) -> tuple:
+        """Process a single batch of frames from the pipeline.
+        Returns:
+            Tuple of (imgs, fidxs, vidxs, org_szs, instances, eff_scales, done)
+            where done is True if the pipeline has finished.
+        """
+        imgs = []
+        fidxs = []
+        vidxs = []
+        org_szs = []
+        instances = []
+        eff_scales = []
+        done = False
+        for _ in range(self.batch_size):
+            frame = self.pipeline.frame_buffer.get()
+            if frame["image"] is None:
+                done = True
+                break
+            frame["image"], eff_scale = apply_sizematcher(
+                frame["image"],
+                self.preprocess_config["max_height"],
+                self.preprocess_config["max_width"],
+            )
+            if self.instances_key:
+                frame["instances"] = frame["instances"] * eff_scale
+            if self.preprocess_config["ensure_rgb"] and frame["image"].shape[-3] != 3:
+                frame["image"] = frame["image"].repeat(1, 3, 1, 1)
+            elif (
+                self.preprocess_config["ensure_grayscale"]
+                and frame["image"].shape[-3] != 1
+            ):
+                frame["image"] = F.rgb_to_grayscale(
+                    frame["image"], num_output_channels=1
+                )
+            eff_scales.append(torch.tensor(eff_scale))
+            imgs.append(frame["image"].unsqueeze(dim=0))
+            fidxs.append(frame["frame_idx"])
+            vidxs.append(frame["video_idx"])
+            org_szs.append(frame["orig_size"].unsqueeze(dim=0))
+            if self.instances_key:
+                instances.append(frame["instances"].unsqueeze(dim=0))
+        return imgs, fidxs, vidxs, org_szs, instances, eff_scales, done
+    def _run_inference_on_batch(
+        self, imgs, fidxs, vidxs, org_szs, instances, eff_scales
+    ) -> Iterator[Dict[str, np.ndarray]]:
+        """Run inference on a prepared batch of frames.
+        Args:
+            imgs: List of image tensors.
+            fidxs: List of frame indices.
+            vidxs: List of video indices.
+            org_szs: List of original sizes.
+            instances: List of instance tensors.
+            eff_scales: List of effective scales.
+        Yields:
+            Dictionaries containing inference results for each frame.
+        """
+        # TODO: all preprocessing should be moved into InferenceModels to be exportable.
+        imgs = torch.concatenate(imgs, dim=0)
+        fidxs = torch.tensor(fidxs, dtype=torch.int32)
+        vidxs = torch.tensor(vidxs, dtype=torch.int32)
+        org_szs = torch.concatenate(org_szs, dim=0)
+        eff_scales = torch.tensor(eff_scales, dtype=torch.float32)
+        if self.instances_key:
+            instances = torch.concatenate(instances, dim=0)
+        ex = {
+            "image": imgs,
+            "frame_idx": fidxs,
+            "video_idx": vidxs,
+            "orig_size": org_szs,
+            "eff_scale": eff_scales,
+        }
+        if self.instances_key:
+            ex["instances"] = instances
+        if self.preprocess:
+            scale = self.preprocess_config["scale"]
+            if scale != 1.0:
+                if self.instances_key:
+                    ex["image"], ex["instances"] = apply_resizer(
+                        ex["image"], ex["instances"]
+                    )
+                else:
+                    ex["image"] = resize_image(ex["image"], scale)
+            ex["image"] = apply_pad_to_stride(ex["image"], self.max_stride)
+        outputs_list = self.inference_model(ex)
+        if outputs_list is not None:
+            for output in outputs_list:
+                output = self._convert_tensors_to_numpy(output)
+                yield output
     def _predict_generator(self) -> Iterator[Dict[str, np.ndarray]]:
         """Create a generator that yields batches of inference results.
@@ -400,114 +501,14 @@ class Predictor(ABC):
         # Loop over data batches.
         self.pipeline.start()
         total_frames = self.pipeline.total_len()
-        done = False
         try:
-            with Progress(
-                "{task.description}",
-                BarColumn(),
-                "[progress.percentage]{task.percentage:>3.0f}%",
-                MofNCompleteColumn(),
-                "ETA:",
-                TimeRemainingColumn(),
-                "Elapsed:",
-                TimeElapsedColumn(),
-                RateColumn(),
-                auto_refresh=False,
-                refresh_per_second=4,  # Change to self.report_rate if needed
-                speed_estimate_period=5,
-            ) as progress:
-                task = progress.add_task("Predicting...", total=total_frames)
-                last_report = time()
-                done = False
-                while not done:
-                    imgs = []
-                    fidxs = []
-                    vidxs = []
-                    org_szs = []
-                    instances = []
-                    eff_scales = []
-                    for _ in range(self.batch_size):
-                        frame = self.pipeline.frame_buffer.get()
-                        if frame["image"] is None:
-                            done = True
-                            break
-                        frame["image"], eff_scale = apply_sizematcher(
-                            frame["image"],
-                            self.preprocess_config["max_height"],
-                            self.preprocess_config["max_width"],
-                        )
-                        if self.instances_key:
-                            frame["instances"] = frame["instances"] * eff_scale
-                        if (
-                            self.preprocess_config["ensure_rgb"]
-                            and frame["image"].shape[-3] != 3
-                        ):
-                            frame["image"] = frame["image"].repeat(1, 3, 1, 1)
-                        elif (
-                            self.preprocess_config["ensure_grayscale"]
-                            and frame["image"].shape[-3] != 1
-                        ):
-                            frame["image"] = F.rgb_to_grayscale(
-                                frame["image"], num_output_channels=1
-                            )
-                        eff_scales.append(torch.tensor(eff_scale))
-                        imgs.append(frame["image"].unsqueeze(dim=0))
-                        fidxs.append(frame["frame_idx"])
-                        vidxs.append(frame["video_idx"])
-                        org_szs.append(frame["orig_size"].unsqueeze(dim=0))
-                        if self.instances_key:
-                            instances.append(frame["instances"].unsqueeze(dim=0))
-                    if imgs:
-                        # TODO: all preprocessing should be moved into InferenceModels to be exportable.
-                        imgs = torch.concatenate(imgs, dim=0)
-                        fidxs = torch.tensor(fidxs, dtype=torch.int32)
-                        vidxs = torch.tensor(vidxs, dtype=torch.int32)
-                        org_szs = torch.concatenate(org_szs, dim=0)
-                        eff_scales = torch.tensor(eff_scales, dtype=torch.float32)
-                        if self.instances_key:
-                            instances = torch.concatenate(instances, dim=0)
-                        ex = {
-                            "image": imgs,
-                            "frame_idx": fidxs,
-                            "video_idx": vidxs,
-                            "orig_size": org_szs,
-                            "eff_scale": eff_scales,
-                        }
-                        if self.instances_key:
-                            ex["instances"] = instances
-                        if self.preprocess:
-                            scale = self.preprocess_config["scale"]
-                            if scale != 1.0:
-                                if self.instances_key:
-                                    ex["image"], ex["instances"] = apply_resizer(
-                                        ex["image"], ex["instances"]
-                                    )
-                                else:
-                                    ex["image"] = resize_image(ex["image"], scale)
-                            ex["image"] = apply_pad_to_stride(
-                                ex["image"], self.max_stride
-                            )
-                        outputs_list = self.inference_model(ex)
-                        if outputs_list is not None:
-                            for output in outputs_list:
-                                output = self._convert_tensors_to_numpy(output)
-                                yield output
-                        # Advance progress
-                        num_frames = (
-                            len(ex["frame_idx"])
-                            if "frame_idx" in ex
-                            else self.batch_size
-                        )
-                        progress.update(task, advance=num_frames)
-                    # Manually refresh progress bar
-                    if time() - last_report > 0.25:
-                        progress.refresh()
-                        last_report = time()
+            if self.gui:
+                # GUI mode: emit JSON progress lines
+                yield from self._predict_generator_gui(total_frames)
+            else:
+                # Normal mode: use Rich progress bar
+                yield from self._predict_generator_rich(total_frames)
         except KeyboardInterrupt:
             logger.info("Inference interrupted by user")
@@ -520,6 +521,112 @@ class Predictor(ABC):
         self.pipeline.join()
+    def _predict_generator_gui(
+        self, total_frames: int
+    ) -> Iterator[Dict[str, np.ndarray]]:
+        """Generator for GUI mode with JSON progress output.
+        Args:
+            total_frames: Total number of frames to process.
+        Yields:
+            Dictionaries containing inference results for each frame.
+        """
+        start_time = time()
+        frames_processed = 0
+        last_report = time()
+        done = False
+        while not done:
+            imgs, fidxs, vidxs, org_szs, instances, eff_scales, done = (
+                self._process_batch()
+            )
+            if imgs:
+                yield from self._run_inference_on_batch(
+                    imgs, fidxs, vidxs, org_szs, instances, eff_scales
+                )
+                # Update progress
+                num_frames = len(fidxs)
+                frames_processed += num_frames
+                # Emit JSON progress (throttled to ~4Hz)
+                if time() - last_report > 0.25:
+                    elapsed = time() - start_time
+                    rate = frames_processed / elapsed if elapsed > 0 else 0
+                    remaining = total_frames - frames_processed
+                    eta = remaining / rate if rate > 0 else 0
+                    progress_data = {
+                        "n_processed": frames_processed,
+                        "n_total": total_frames,
+                        "rate": round(rate, 1),
+                        "eta": round(eta, 1),
+                    }
+                    print(json.dumps(progress_data), flush=True)
+                    last_report = time()
+        # Final progress emit to ensure 100% is shown
+        elapsed = time() - start_time
+        progress_data = {
+            "n_processed": total_frames,
+            "n_total": total_frames,
+            "rate": round(frames_processed / elapsed, 1) if elapsed > 0 else 0,
+            "eta": 0,
+        }
+        print(json.dumps(progress_data), flush=True)
+    def _predict_generator_rich(
+        self, total_frames: int
+    ) -> Iterator[Dict[str, np.ndarray]]:
+        """Generator for normal mode with Rich progress bar.
+        Args:
+            total_frames: Total number of frames to process.
+        Yields:
+            Dictionaries containing inference results for each frame.
+        """
+        with Progress(
+            "{task.description}",
+            BarColumn(),
+            "[progress.percentage]{task.percentage:>3.0f}%",
+            MofNCompleteColumn(),
+            "ETA:",
+            TimeRemainingColumn(),
+            "Elapsed:",
+            TimeElapsedColumn(),
+            RateColumn(),
+            auto_refresh=False,
+            refresh_per_second=4,
+            speed_estimate_period=5,
+        ) as progress:
+            task = progress.add_task("Predicting...", total=total_frames)
+            last_report = time()
+            done = False
+            while not done:
+                imgs, fidxs, vidxs, org_szs, instances, eff_scales, done = (
+                    self._process_batch()
+                )
+                if imgs:
+                    yield from self._run_inference_on_batch(
+                        imgs, fidxs, vidxs, org_szs, instances, eff_scales
+                    )
+                    # Advance progress
+                    num_frames = len(fidxs)
+                    progress.update(task, advance=num_frames)
+                # Manually refresh progress bar
+                if time() - last_report > 0.25:
+                    progress.refresh()
+                    last_report = time()
+        self.pipeline.join()
     def predict(
         self,
         make_labels: bool = True,

sleap_nn/predict.py CHANGED Viewed

@@ -74,6 +74,9 @@ def run_inference(
     frames: Optional[list] = None,
     crop_size: Optional[int] = None,
     peak_threshold: Union[float, List[float]] = 0.2,
+    filter_overlapping: bool = False,
+    filter_overlapping_method: str = "iou",
+    filter_overlapping_threshold: float = 0.8,
     integral_refinement: Optional[str] = "integral",
     integral_patch_size: int = 5,
     return_confmaps: bool = False,
@@ -110,6 +113,7 @@ def run_inference(
     tracking_pre_cull_iou_threshold: float = 0,
     tracking_clean_instance_count: int = 0,
     tracking_clean_iou_threshold: float = 0,
+    gui: bool = False,
 ):
     """Entry point to run inference on trained SLEAP-NN models.
@@ -160,6 +164,15 @@ def run_inference(
                 centroid and centered-instance model, where the first element corresponds
                 to centroid model peak finding threshold and the second element is for
                 centered-instance model peak finding.
+        filter_overlapping: (bool) If True, removes overlapping instances after
+                inference using greedy NMS. Applied independently of tracking.
+                Default: False.
+        filter_overlapping_method: (str) Similarity metric for filtering overlapping
+                instances. One of "iou" (bounding box) or "oks" (keypoint similarity).
+                Default: "iou".
+        filter_overlapping_threshold: (float) Similarity threshold for filtering.
+                Instances with similarity > threshold are removed (keeping higher-scoring).
+                Typical values: 0.3 (aggressive) to 0.8 (permissive). Default: 0.8.
         integral_refinement: (str) If `None`, returns the grid-aligned peaks with no refinement.
                 If `"integral"`, peaks will be refined with integral regression.
                 Default: `"integral"`.
@@ -250,6 +263,8 @@ def run_inference(
         tracking_pre_cull_iou_threshold: If non-zero and pre_cull_to_target also set, then use IOU threshold to remove overlapping instances over count *before* tracking. (default: 0)
         tracking_clean_instance_count: Target number of instances to clean *after* tracking. (default: 0)
         tracking_clean_iou_threshold: IOU to use when culling instances *after* tracking. (default: 0)
+        gui: (bool) If True, outputs JSON progress lines for GUI integration instead
+                of Rich progress bars. Default: False.
     Returns:
         Returns `sio.Labels` object if `make_labels` is True. Else this function returns
@@ -433,13 +448,6 @@ def run_inference(
                 else "mps" if torch.backends.mps.is_available() else "cpu"
             )
-        if integral_refinement is not None and device == "mps":  # TODO
-            # kornia/geometry/transform/imgwarp.py:382: in get_perspective_transform. NotImplementedError: The operator 'aten::_linalg_solve_ex.result' is not currently implemented for the MPS device. If you want this op to be added in priority during the prototype phase of this feature, please comment on https://github.com/pytorch/pytorch/issues/77764. As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.
-            logger.info(
-                "Integral refinement is not supported with MPS accelerator. Setting integral refinement to None."
-            )
-            integral_refinement = None
         logger.info(f"Using device: {device}")
         # initializes the inference model
@@ -458,6 +466,9 @@ def run_inference(
             anchor_part=anchor_part,
         )
+        # Set GUI mode for progress output
+        predictor.gui = gui
         if (
             tracking
             and not isinstance(predictor, BottomUpMultiClassPredictor)
@@ -553,6 +564,20 @@ def run_inference(
             make_labels=make_labels,
         )
+        # Filter overlapping instances (independent of tracking)
+        if filter_overlapping and make_labels:
+            from sleap_nn.inference.postprocessing import filter_overlapping_instances
+            output = filter_overlapping_instances(
+                output,
+                threshold=filter_overlapping_threshold,
+                method=filter_overlapping_method,
+            )
+            logger.info(
+                f"Filtered overlapping instances with {filter_overlapping_method.upper()} "
+                f"threshold: {filter_overlapping_threshold}"
+            )
         if tracking:
             lfs = [x for x in output]
             if tracking_clean_instance_count > 0:
@@ -607,6 +632,9 @@ def run_inference(
         # Build inference parameters for provenance
         inference_params = {
             "peak_threshold": peak_threshold,
+            "filter_overlapping": filter_overlapping,
+            "filter_overlapping_method": filter_overlapping_method,
+            "filter_overlapping_threshold": filter_overlapping_threshold,
             "integral_refinement": integral_refinement,
             "integral_patch_size": integral_patch_size,
             "batch_size": batch_size,

sleap_nn/train.py CHANGED Viewed

@@ -118,6 +118,70 @@ def run_training(
                 logger.info(f"p90 dist: {metrics['distance_metrics']['p90']}")
                 logger.info(f"p50 dist: {metrics['distance_metrics']['p50']}")
+                # Log test metrics to wandb summary
+                if (
+                    d_name.startswith("test")
+                    and trainer.config.trainer_config.use_wandb
+                ):
+                    import wandb
+                    if wandb.run is not None:
+                        summary_metrics = {
+                            f"eval/{d_name}/mOKS": metrics["mOKS"]["mOKS"],
+                            f"eval/{d_name}/oks_voc_mAP": metrics["voc_metrics"][
+                                "oks_voc.mAP"
+                            ],
+                            f"eval/{d_name}/oks_voc_mAR": metrics["voc_metrics"][
+                                "oks_voc.mAR"
+                            ],
+                            f"eval/{d_name}/mPCK": metrics["pck_metrics"]["mPCK"],
+                            f"eval/{d_name}/PCK_5": metrics["pck_metrics"]["PCK@5"],
+                            f"eval/{d_name}/PCK_10": metrics["pck_metrics"]["PCK@10"],
+                            f"eval/{d_name}/distance_avg": metrics["distance_metrics"][
+                                "avg"
+                            ],
+                            f"eval/{d_name}/distance_p50": metrics["distance_metrics"][
+                                "p50"
+                            ],
+                            f"eval/{d_name}/distance_p95": metrics["distance_metrics"][
+                                "p95"
+                            ],
+                            f"eval/{d_name}/distance_p99": metrics["distance_metrics"][
+                                "p99"
+                            ],
+                            f"eval/{d_name}/visibility_precision": metrics[
+                                "visibility_metrics"
+                            ]["precision"],
+                            f"eval/{d_name}/visibility_recall": metrics[
+                                "visibility_metrics"
+                            ]["recall"],
+                        }
+                        for key, value in summary_metrics.items():
+                            wandb.run.summary[key] = value
+            # Finish wandb run and cleanup after all evaluation is complete
+            if trainer.config.trainer_config.use_wandb:
+                import wandb
+                import shutil
+                if wandb.run is not None:
+                    wandb.finish()
+                # Delete local wandb logs if configured
+                wandb_config = trainer.config.trainer_config.wandb
+                should_delete_wandb_logs = wandb_config.delete_local_logs is True or (
+                    wandb_config.delete_local_logs is None
+                    and wandb_config.wandb_mode != "offline"
+                )
+                if should_delete_wandb_logs:
+                    wandb_dir = run_path / "wandb"
+                    if wandb_dir.exists():
+                        logger.info(
+                            f"Deleting local wandb logs at {wandb_dir}... "
+                            "(set trainer_config.wandb.delete_local_logs=false to disable)"
+                        )
+                        shutil.rmtree(wandb_dir, ignore_errors=True)
 def train(
     train_labels_path: Optional[List[str]] = None,

sleap-nn 0.1.0a2__py3-none-any.whl → 0.1.0a4__py3-none-any.whl

sleap-nn 0.1.0a2py3-none-any.whl → 0.1.0a4py3-none-any.whl