PyPI - sleap-nn - Versions diffs - 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl - Mend

sleap-nn 0.1.0a1py3-none-any.whl → 0.1.0a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

sleap_nn/__init__.py +1 -1
sleap_nn/cli.py +36 -0
sleap_nn/config/trainer_config.py +18 -0
sleap_nn/evaluation.py +81 -22
sleap_nn/export/__init__.py +21 -0
sleap_nn/export/cli.py +1778 -0
sleap_nn/export/exporters/__init__.py +51 -0
sleap_nn/export/exporters/onnx_exporter.py +80 -0
sleap_nn/export/exporters/tensorrt_exporter.py +291 -0
sleap_nn/export/metadata.py +225 -0
sleap_nn/export/predictors/__init__.py +63 -0
sleap_nn/export/predictors/base.py +22 -0
sleap_nn/export/predictors/onnx.py +154 -0
sleap_nn/export/predictors/tensorrt.py +312 -0
sleap_nn/export/utils.py +307 -0
sleap_nn/export/wrappers/__init__.py +25 -0
sleap_nn/export/wrappers/base.py +96 -0
sleap_nn/export/wrappers/bottomup.py +243 -0
sleap_nn/export/wrappers/bottomup_multiclass.py +195 -0
sleap_nn/export/wrappers/centered_instance.py +56 -0
sleap_nn/export/wrappers/centroid.py +58 -0
sleap_nn/export/wrappers/single_instance.py +83 -0
sleap_nn/export/wrappers/topdown.py +180 -0
sleap_nn/export/wrappers/topdown_multiclass.py +304 -0
sleap_nn/inference/bottomup.py +86 -20
sleap_nn/inference/postprocessing.py +284 -0
sleap_nn/predict.py +29 -0
sleap_nn/train.py +64 -0
sleap_nn/training/callbacks.py +324 -8
sleap_nn/training/lightning_modules.py +542 -32
sleap_nn/training/model_trainer.py +48 -57
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/METADATA +13 -2
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/RECORD +37 -16
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/WHEEL +0 -0
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/top_level.txt +0 -0

sleap_nn/training/callbacks.py CHANGED Viewed

@@ -295,8 +295,8 @@ class WandBVizCallback(Callback):
                 suffix = "" if mode_name == "direct" else f"_{mode_name}"
                 train_img = renderer.render(train_data, caption=f"Train Epoch {epoch}")
                 val_img = renderer.render(val_data, caption=f"Val Epoch {epoch}")
-                log_dict[f"train_predictions{suffix}"] = train_img
-                log_dict[f"val_predictions{suffix}"] = val_img
+                log_dict[f"viz/train/predictions{suffix}"] = train_img
+                log_dict[f"viz/val/predictions{suffix}"] = val_img
             if log_dict:
                 # Include epoch so wandb can use it as x-axis (via define_metric)
@@ -394,8 +394,8 @@ class WandBVizCallbackWithPAFs(WandBVizCallback):
                 suffix = "" if mode_name == "direct" else f"_{mode_name}"
                 train_img = renderer.render(train_data, caption=f"Train Epoch {epoch}")
                 val_img = renderer.render(val_data, caption=f"Val Epoch {epoch}")
-                log_dict[f"train_predictions{suffix}"] = train_img
-                log_dict[f"val_predictions{suffix}"] = val_img
+                log_dict[f"viz/train/predictions{suffix}"] = train_img
+                log_dict[f"viz/val/predictions{suffix}"] = val_img
             # Render PAFs (always use matplotlib/direct for PAFs)
             from io import BytesIO
@@ -408,7 +408,7 @@ class WandBVizCallbackWithPAFs(WandBVizCallback):
             buf.seek(0)
             plt.close(train_pafs_fig)
             train_pafs_pil = Image.open(buf)
-            log_dict["train_pafs"] = wandb.Image(
+            log_dict["viz/train/pafs"] = wandb.Image(
                 train_pafs_pil, caption=f"Train PAFs Epoch {epoch}"
             )
@@ -418,7 +418,7 @@ class WandBVizCallbackWithPAFs(WandBVizCallback):
             buf.seek(0)
             plt.close(val_pafs_fig)
             val_pafs_pil = Image.open(buf)
-            log_dict["val_pafs"] = wandb.Image(
+            log_dict["viz/val/pafs"] = wandb.Image(
                 val_pafs_pil, caption=f"Val PAFs Epoch {epoch}"
             )
@@ -444,8 +444,8 @@ class WandBVizCallbackWithPAFs(WandBVizCallback):
                             epoch,
                             train_img,
                             val_img,
-                            log_dict["train_pafs"],
-                            log_dict["val_pafs"],
+                            log_dict["viz/train/pafs"],
+                            log_dict["viz/val/pafs"],
                         ]
                     ],
                 )
@@ -662,3 +662,319 @@ class ProgressReporterZMQ(Callback):
         return {
             k: float(v.item()) if hasattr(v, "item") else v for k, v in logs.items()
         }
+class EpochEndEvaluationCallback(Callback):
+    """Callback to run full evaluation metrics at end of validation epochs.
+    This callback collects predictions and ground truth during validation,
+    then runs the full evaluation pipeline (OKS, mAP, PCK, etc.) and logs
+    metrics to WandB.
+    Attributes:
+        skeleton: sio.Skeleton for creating instances.
+        videos: List of sio.Video objects.
+        eval_frequency: Run evaluation every N epochs (default: 1).
+        oks_stddev: OKS standard deviation (default: 0.025).
+        oks_scale: Optional OKS scale override.
+        metrics_to_log: List of metric keys to log.
+    """
+    def __init__(
+        self,
+        skeleton: "sio.Skeleton",
+        videos: list,
+        eval_frequency: int = 1,
+        oks_stddev: float = 0.025,
+        oks_scale: Optional[float] = None,
+        metrics_to_log: Optional[list] = None,
+    ):
+        """Initialize the callback.
+        Args:
+            skeleton: sio.Skeleton for creating instances.
+            videos: List of sio.Video objects.
+            eval_frequency: Run evaluation every N epochs (default: 1).
+            oks_stddev: OKS standard deviation (default: 0.025).
+            oks_scale: Optional OKS scale override.
+            metrics_to_log: List of metric keys to log. If None, logs all available.
+        """
+        super().__init__()
+        self.skeleton = skeleton
+        self.videos = videos
+        self.eval_frequency = eval_frequency
+        self.oks_stddev = oks_stddev
+        self.oks_scale = oks_scale
+        self.metrics_to_log = metrics_to_log or [
+            "mOKS",
+            "oks_voc.mAP",
+            "oks_voc.mAR",
+            "distance/avg",
+            "distance/p50",
+            "distance/p95",
+            "distance/p99",
+            "mPCK",
+            "PCK@5",
+            "PCK@10",
+            "visibility_precision",
+            "visibility_recall",
+        ]
+    def on_validation_epoch_start(self, trainer, pl_module):
+        """Enable prediction collection at the start of validation.
+        Skip during sanity check to avoid inference issues.
+        """
+        if trainer.sanity_checking:
+            return
+        pl_module._collect_val_predictions = True
+    def on_validation_epoch_end(self, trainer, pl_module):
+        """Run evaluation and log metrics at end of validation epoch."""
+        import sleap_io as sio
+        import numpy as np
+        from lightning.pytorch.loggers import WandbLogger
+        from sleap_nn.evaluation import Evaluator
+        # Check frequency (epoch is 0-indexed, so add 1)
+        if (trainer.current_epoch + 1) % self.eval_frequency != 0:
+            pl_module._collect_val_predictions = False
+            return
+        # Only run on rank 0 for distributed training
+        if not trainer.is_global_zero:
+            pl_module._collect_val_predictions = False
+            return
+        # Check if we have predictions
+        if not pl_module.val_predictions or not pl_module.val_ground_truth:
+            logger.warning("No predictions collected for epoch-end evaluation")
+            pl_module._collect_val_predictions = False
+            return
+        try:
+            # Build sio.Labels from accumulated predictions and ground truth
+            pred_labels = self._build_pred_labels(pl_module.val_predictions, sio, np)
+            gt_labels = self._build_gt_labels(pl_module.val_ground_truth, sio, np)
+            # Check if we have valid frames to evaluate
+            if len(pred_labels) == 0:
+                logger.warning(
+                    "No valid predictions for epoch-end evaluation "
+                    "(all predictions may be empty or NaN)"
+                )
+                pl_module._collect_val_predictions = False
+                pl_module.val_predictions = []
+                pl_module.val_ground_truth = []
+                return
+            # Run evaluation
+            evaluator = Evaluator(
+                ground_truth_instances=gt_labels,
+                predicted_instances=pred_labels,
+                oks_stddev=self.oks_stddev,
+                oks_scale=self.oks_scale,
+                user_labels_only=False,  # All validation frames are "user" frames
+            )
+            metrics = evaluator.evaluate()
+            # Log to WandB
+            self._log_metrics(trainer, metrics, trainer.current_epoch)
+            logger.info(
+                f"Epoch {trainer.current_epoch} evaluation: "
+                f"PCK@5={metrics['pck_metrics']['PCK@5']:.4f}, "
+                f"mOKS={metrics['mOKS']['mOKS']:.4f}, "
+                f"mAP={metrics['voc_metrics']['oks_voc.mAP']:.4f}"
+            )
+        except Exception as e:
+            logger.warning(f"Epoch-end evaluation failed: {e}")
+        # Cleanup
+        pl_module._collect_val_predictions = False
+        pl_module.val_predictions = []
+        pl_module.val_ground_truth = []
+    def _build_pred_labels(self, predictions: list, sio, np) -> "sio.Labels":
+        """Convert prediction dicts to sio.Labels."""
+        labeled_frames = []
+        for pred in predictions:
+            pred_peaks = pred["pred_peaks"]
+            pred_scores = pred["pred_scores"]
+            # Handle NaN/missing predictions
+            if pred_peaks is None or (
+                isinstance(pred_peaks, np.ndarray) and np.isnan(pred_peaks).all()
+            ):
+                continue
+            # Handle multi-instance predictions (bottomup)
+            if len(pred_peaks.shape) == 2:
+                # Single instance: (n_nodes, 2) -> (1, n_nodes, 2)
+                pred_peaks = pred_peaks.reshape(1, -1, 2)
+                pred_scores = pred_scores.reshape(1, -1)
+            instances = []
+            for inst_idx in range(len(pred_peaks)):
+                inst_points = pred_peaks[inst_idx]
+                inst_scores = pred_scores[inst_idx] if pred_scores is not None else None
+                # Skip if all NaN
+                if np.isnan(inst_points).all():
+                    continue
+                inst = sio.PredictedInstance.from_numpy(
+                    points_data=inst_points,
+                    skeleton=self.skeleton,
+                    point_scores=(
+                        inst_scores
+                        if inst_scores is not None
+                        else np.ones(len(inst_points))
+                    ),
+                    score=(
+                        float(np.nanmean(inst_scores))
+                        if inst_scores is not None
+                        else 1.0
+                    ),
+                )
+                instances.append(inst)
+            if instances:
+                lf = sio.LabeledFrame(
+                    video=self.videos[pred["video_idx"]],
+                    frame_idx=pred["frame_idx"],
+                    instances=instances,
+                )
+                labeled_frames.append(lf)
+        return sio.Labels(
+            videos=self.videos,
+            skeletons=[self.skeleton],
+            labeled_frames=labeled_frames,
+        )
+    def _build_gt_labels(self, ground_truth: list, sio, np) -> "sio.Labels":
+        """Convert ground truth dicts to sio.Labels."""
+        labeled_frames = []
+        for gt in ground_truth:
+            instances = []
+            gt_instances = gt["gt_instances"]
+            # Handle shape variations
+            if len(gt_instances.shape) == 2:
+                # (n_nodes, 2) -> (1, n_nodes, 2)
+                gt_instances = gt_instances.reshape(1, -1, 2)
+            for i in range(min(gt["num_instances"], len(gt_instances))):
+                inst_data = gt_instances[i]
+                if np.isnan(inst_data).all():
+                    continue
+                inst = sio.Instance.from_numpy(
+                    points_data=inst_data,
+                    skeleton=self.skeleton,
+                )
+                instances.append(inst)
+            if instances:
+                lf = sio.LabeledFrame(
+                    video=self.videos[gt["video_idx"]],
+                    frame_idx=gt["frame_idx"],
+                    instances=instances,
+                )
+                labeled_frames.append(lf)
+        return sio.Labels(
+            videos=self.videos,
+            skeletons=[self.skeleton],
+            labeled_frames=labeled_frames,
+        )
+    def _log_metrics(self, trainer, metrics: dict, epoch: int):
+        """Log evaluation metrics to WandB."""
+        import numpy as np
+        from lightning.pytorch.loggers import WandbLogger
+        # Get WandB logger
+        wandb_logger = None
+        for log in trainer.loggers:
+            if isinstance(log, WandbLogger):
+                wandb_logger = log
+                break
+        if wandb_logger is None:
+            return
+        log_dict = {"epoch": epoch}
+        # Extract key metrics with consistent naming
+        # All eval metrics use eval/val/ prefix since they're computed on validation data
+        if "mOKS" in self.metrics_to_log:
+            log_dict["eval/val/mOKS"] = metrics["mOKS"]["mOKS"]
+        if "oks_voc.mAP" in self.metrics_to_log:
+            log_dict["eval/val/oks_voc_mAP"] = metrics["voc_metrics"]["oks_voc.mAP"]
+        if "oks_voc.mAR" in self.metrics_to_log:
+            log_dict["eval/val/oks_voc_mAR"] = metrics["voc_metrics"]["oks_voc.mAR"]
+        # Distance metrics grouped under eval/val/distance/
+        if "distance/avg" in self.metrics_to_log:
+            val = metrics["distance_metrics"]["avg"]
+            if not np.isnan(val):
+                log_dict["eval/val/distance/avg"] = val
+        if "distance/p50" in self.metrics_to_log:
+            val = metrics["distance_metrics"]["p50"]
+            if not np.isnan(val):
+                log_dict["eval/val/distance/p50"] = val
+        if "distance/p95" in self.metrics_to_log:
+            val = metrics["distance_metrics"]["p95"]
+            if not np.isnan(val):
+                log_dict["eval/val/distance/p95"] = val
+        if "distance/p99" in self.metrics_to_log:
+            val = metrics["distance_metrics"]["p99"]
+            if not np.isnan(val):
+                log_dict["eval/val/distance/p99"] = val
+        # PCK metrics
+        if "mPCK" in self.metrics_to_log:
+            log_dict["eval/val/mPCK"] = metrics["pck_metrics"]["mPCK"]
+        # PCK at specific thresholds (precomputed in evaluation.py)
+        if "PCK@5" in self.metrics_to_log:
+            log_dict["eval/val/PCK_5"] = metrics["pck_metrics"]["PCK@5"]
+        if "PCK@10" in self.metrics_to_log:
+            log_dict["eval/val/PCK_10"] = metrics["pck_metrics"]["PCK@10"]
+        # Visibility metrics
+        if "visibility_precision" in self.metrics_to_log:
+            val = metrics["visibility_metrics"]["precision"]
+            if not np.isnan(val):
+                log_dict["eval/val/visibility_precision"] = val
+        if "visibility_recall" in self.metrics_to_log:
+            val = metrics["visibility_metrics"]["recall"]
+            if not np.isnan(val):
+                log_dict["eval/val/visibility_recall"] = val
+        wandb_logger.experiment.log(log_dict, commit=False)
+        # Update best metrics in summary (excluding epoch)
+        for key, value in log_dict.items():
+            if key == "epoch":
+                continue
+            # Create summary key like "best/eval/val/mOKS"
+            summary_key = f"best/{key}"
+            current_best = wandb_logger.experiment.summary.get(summary_key)
+            # For distance metrics, lower is better; for others, higher is better
+            is_distance = "distance" in key
+            if current_best is None:
+                wandb_logger.experiment.summary[summary_key] = value
+            elif is_distance and value < current_best:
+                wandb_logger.experiment.summary[summary_key] = value
+            elif not is_distance and value > current_best:
+                wandb_logger.experiment.summary[summary_key] = value

sleap-nn 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl

sleap-nn 0.1.0a1py3-none-any.whl → 0.1.0a3py3-none-any.whl