PyPI - sleap-nn - Versions diffs - 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl - Mend

sleap-nn 0.1.0a1py3-none-any.whl → 0.1.0a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

sleap_nn/__init__.py +1 -1
sleap_nn/config/trainer_config.py +18 -0
sleap_nn/evaluation.py +73 -22
sleap_nn/inference/bottomup.py +86 -20
sleap_nn/training/callbacks.py +274 -0
sleap_nn/training/lightning_modules.py +210 -2
sleap_nn/training/model_trainer.py +23 -0
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/METADATA +2 -2
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/RECORD +13 -13
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/WHEEL +0 -0
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/top_level.txt +0 -0

sleap_nn/__init__.py CHANGED Viewed

@@ -50,7 +50,7 @@ logger.add(
     colorize=False,
 )
-__version__ = "0.1.0a1"
+__version__ = "0.1.0a2"
 # Public API
 from sleap_nn.evaluation import load_metrics

sleap_nn/config/trainer_config.py CHANGED Viewed

@@ -208,6 +208,23 @@ class EarlyStoppingConfig:
     stop_training_on_plateau: bool = True
+@define
+class EvalConfig:
+    """Configuration for epoch-end evaluation.
+    Attributes:
+        enabled: (bool) Enable epoch-end evaluation metrics. *Default*: `False`.
+        frequency: (int) Evaluate every N epochs. *Default*: `1`.
+        oks_stddev: (float) OKS standard deviation for evaluation. *Default*: `0.025`.
+        oks_scale: (float) OKS scale override. If None, uses default. *Default*: `None`.
+    """
+    enabled: bool = False
+    frequency: int = field(default=1, validator=validators.ge(1))
+    oks_stddev: float = field(default=0.025, validator=validators.gt(0))
+    oks_scale: Optional[float] = None
 @define
 class HardKeypointMiningConfig:
     """Configuration for online hard keypoint mining.
@@ -310,6 +327,7 @@ class TrainerConfig:
         factory=HardKeypointMiningConfig
     )
     zmq: Optional[ZMQConfig] = field(factory=ZMQConfig)  # Required for SLEAP GUI
+    eval: EvalConfig = field(factory=EvalConfig)  # Epoch-end evaluation config
     @staticmethod
     def validate_optimizer_name(value):

sleap_nn/evaluation.py CHANGED Viewed

@@ -29,11 +29,27 @@ def get_instances(labeled_frame: sio.LabeledFrame) -> List[MatchInstance]:
     """
     instance_list = []
     frame_idx = labeled_frame.frame_idx
-    video_path = (
-        labeled_frame.video.backend.source_filename
-        if hasattr(labeled_frame.video.backend, "source_filename")
-        else labeled_frame.video.backend.filename
-    )
+    # Extract video path with fallbacks for embedded videos
+    video = labeled_frame.video
+    video_path = None
+    if video is not None:
+        backend = getattr(video, "backend", None)
+        if backend is not None:
+            # Try source_filename first (for embedded videos with provenance)
+            video_path = getattr(backend, "source_filename", None)
+            if video_path is None:
+                video_path = getattr(backend, "filename", None)
+        # Fallback to video.filename if backend doesn't have it
+        if video_path is None:
+            video_path = getattr(video, "filename", None)
+            # Handle list filenames (image sequences)
+            if isinstance(video_path, list) and video_path:
+                video_path = video_path[0]
+    # Final fallback: use a unique identifier
+    if video_path is None:
+        video_path = f"video_{id(video)}" if video is not None else "unknown"
     for instance in labeled_frame.instances:
         match_instance = MatchInstance(
             instance=instance, frame_idx=frame_idx, video_path=video_path
@@ -47,6 +63,10 @@ def find_frame_pairs(
 ) -> List[Tuple[sio.LabeledFrame, sio.LabeledFrame]]:
     """Find corresponding frames across two sets of labels.
+    This function uses sleap-io's robust video matching API to handle various
+    scenarios including embedded videos, cross-platform paths, and videos with
+    different metadata.
     Args:
         labels_gt: A `sio.Labels` instance with ground truth instances.
         labels_pr: A `sio.Labels` instance with predicted instances.
@@ -56,16 +76,15 @@ def find_frame_pairs(
     Returns:
         A list of pairs of `sio.LabeledFrame`s in the form `(frame_gt, frame_pr)`.
     """
+    # Use sleap-io's robust video matching API (added in 0.6.2)
+    # The match() method returns a MatchResult with video_map: {pred_video: gt_video}
+    match_result = labels_gt.match(labels_pr)
     frame_pairs = []
-    for video_gt in labels_gt.videos:
-        # Find matching video instance in predictions.
-        video_pr = None
-        for video in labels_pr.videos:
-            if video_gt.matches_content(video) and video_gt.matches_path(video):
-                video_pr = video
-                break
-        if video_pr is None:
+    # Iterate over matched video pairs (pred_video -> gt_video mapping)
+    for video_pr, video_gt in match_result.video_map.items():
+        if video_gt is None:
+            # No match found for this prediction video
             continue
         # Find labeled frames in this video.
@@ -786,11 +805,26 @@ def run_evaluation(
     """Evaluate SLEAP-NN model predictions against ground truth labels."""
     logger.info("Loading ground truth labels...")
     ground_truth_instances = sio.load_slp(ground_truth_path)
+    logger.info(
+        f"  Ground truth: {len(ground_truth_instances.videos)} videos, "
+        f"{len(ground_truth_instances.labeled_frames)} frames"
+    )
     logger.info("Loading predicted labels...")
     predicted_instances = sio.load_slp(predicted_path)
+    logger.info(
+        f"  Predictions: {len(predicted_instances.videos)} videos, "
+        f"{len(predicted_instances.labeled_frames)} frames"
+    )
+    logger.info("Matching videos and frames...")
+    # Get match stats before creating evaluator
+    match_result = ground_truth_instances.match(predicted_instances)
+    logger.info(
+        f"  Videos matched: {match_result.n_videos_matched}/{len(match_result.video_map)}"
+    )
-    logger.info("Creating evaluator...")
+    logger.info("Matching instances...")
     evaluator = Evaluator(
         ground_truth_instances=ground_truth_instances,
         predicted_instances=predicted_instances,
@@ -799,21 +833,38 @@ def run_evaluation(
         match_threshold=match_threshold,
         user_labels_only=user_labels_only,
     )
+    logger.info(
+        f"  Frame pairs: {len(evaluator.frame_pairs)}, "
+        f"Matched instances: {len(evaluator.positive_pairs)}, "
+        f"Unmatched GT: {len(evaluator.false_negatives)}"
+    )
     logger.info("Computing evaluation metrics...")
     metrics = evaluator.evaluate()
+    # Compute PCK at specific thresholds (5 and 10 pixels)
+    dists = metrics["distance_metrics"]["dists"]
+    dists_clean = np.copy(dists)
+    dists_clean[np.isnan(dists_clean)] = np.inf
+    pck_5 = (dists_clean < 5).mean()
+    pck_10 = (dists_clean < 10).mean()
     # Print key metrics
     logger.info("Evaluation Results:")
-    logger.info(f"mOKS: {metrics['mOKS']['mOKS']:.4f}")
-    logger.info(f"mAP (OKS VOC): {metrics['voc_metrics']['oks_voc.mAP']:.4f}")
-    logger.info(f"mAR (OKS VOC): {metrics['voc_metrics']['oks_voc.mAR']:.4f}")
-    logger.info(f"Average Distance: {metrics['distance_metrics']['avg']:.4f}")
-    logger.info(f"mPCK: {metrics['pck_metrics']['mPCK']:.4f}")
+    logger.info(f"  mOKS: {metrics['mOKS']['mOKS']:.4f}")
+    logger.info(f"  mAP (OKS VOC): {metrics['voc_metrics']['oks_voc.mAP']:.4f}")
+    logger.info(f"  mAR (OKS VOC): {metrics['voc_metrics']['oks_voc.mAR']:.4f}")
+    logger.info(f"  Average Distance: {metrics['distance_metrics']['avg']:.2f} px")
+    logger.info(f"  dist.p50: {metrics['distance_metrics']['p50']:.2f} px")
+    logger.info(f"  dist.p95: {metrics['distance_metrics']['p95']:.2f} px")
+    logger.info(f"  dist.p99: {metrics['distance_metrics']['p99']:.2f} px")
+    logger.info(f"  mPCK: {metrics['pck_metrics']['mPCK']:.4f}")
+    logger.info(f"  PCK@5px: {pck_5:.4f}")
+    logger.info(f"  PCK@10px: {pck_10:.4f}")
     logger.info(
-        f"Visibility Precision: {metrics['visibility_metrics']['precision']:.4f}"
+        f"  Visibility Precision: {metrics['visibility_metrics']['precision']:.4f}"
     )
-    logger.info(f"Visibility Recall: {metrics['visibility_metrics']['recall']:.4f}")
+    logger.info(f"  Visibility Recall: {metrics['visibility_metrics']['recall']:.4f}")
     # Save metrics if path provided
     if save_metrics:

sleap_nn/inference/bottomup.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Inference modules for BottomUp models."""
+import logging
 from typing import Dict, Optional
 import torch
 import lightning as L
@@ -7,6 +8,8 @@ from sleap_nn.inference.peak_finding import find_local_peaks
 from sleap_nn.inference.paf_grouping import PAFScorer
 from sleap_nn.inference.identity import classify_peaks_from_maps
+logger = logging.getLogger(__name__)
 class BottomUpInferenceModel(L.LightningModule):
     """BottomUp Inference model.
@@ -63,8 +66,28 @@ class BottomUpInferenceModel(L.LightningModule):
         return_pafs: Optional[bool] = False,
         return_paf_graph: Optional[bool] = False,
         input_scale: float = 1.0,
+        max_peaks_per_node: Optional[int] = None,
     ):
-        """Initialise the model attributes."""
+        """Initialise the model attributes.
+        Args:
+            torch_model: A `nn.Module` that accepts images and predicts confidence maps.
+            paf_scorer: A `PAFScorer` instance for grouping instances.
+            cms_output_stride: Output stride of confidence maps relative to images.
+            pafs_output_stride: Output stride of PAFs relative to images.
+            peak_threshold: Minimum confidence map value for valid peaks.
+            refinement: Peak refinement method: None, "integral", or "local".
+            integral_patch_size: Size of patches for integral refinement.
+            return_confmaps: If True, return confidence maps in output.
+            return_pafs: If True, return PAFs in output.
+            return_paf_graph: If True, return intermediate PAF graph in output.
+            input_scale: Scale factor applied to input images.
+            max_peaks_per_node: Maximum number of peaks allowed per node before
+                skipping PAF scoring. If any node has more peaks than this limit,
+                empty predictions are returned. This prevents combinatorial explosion
+                during early training when confidence maps are noisy. Set to None to
+                disable this check (default). Recommended value: 100.
+        """
         super().__init__()
         self.torch_model = torch_model
         self.paf_scorer = paf_scorer
@@ -77,6 +100,7 @@ class BottomUpInferenceModel(L.LightningModule):
         self.return_pafs = return_pafs
         self.return_paf_graph = return_paf_graph
         self.input_scale = input_scale
+        self.max_peaks_per_node = max_peaks_per_node
     def _generate_cms_peaks(self, cms):
         # TODO: append nans to batch them -> tensor (vectorize the initial paf grouping steps)
@@ -124,26 +148,68 @@ class BottomUpInferenceModel(L.LightningModule):
         )  # (batch, h, w, 2*edges)
         cms_peaks, cms_peak_vals, cms_peak_channel_inds = self._generate_cms_peaks(cms)
-        (
-            predicted_instances,
-            predicted_peak_scores,
-            predicted_instance_scores,
-            edge_inds,
-            edge_peak_inds,
-            line_scores,
-        ) = self.paf_scorer.predict(
-            pafs=pafs,
-            peaks=cms_peaks,
-            peak_vals=cms_peak_vals,
-            peak_channel_inds=cms_peak_channel_inds,
-        )
-        predicted_instances = [p / self.input_scale for p in predicted_instances]
-        predicted_instances_adjusted = []
-        for idx, p in enumerate(predicted_instances):
-            predicted_instances_adjusted.append(
-                p / inputs["eff_scale"][idx].to(p.device)
+        # Check if too many peaks per node (prevents combinatorial explosion)
+        skip_paf_scoring = False
+        if self.max_peaks_per_node is not None:
+            n_nodes = cms.shape[1]
+            for b in range(self.batch_size):
+                for node_idx in range(n_nodes):
+                    n_peaks = int((cms_peak_channel_inds[b] == node_idx).sum().item())
+                    if n_peaks > self.max_peaks_per_node:
+                        logger.warning(
+                            f"Skipping PAF scoring: node {node_idx} has {n_peaks} peaks "
+                            f"(max_peaks_per_node={self.max_peaks_per_node}). "
+                            f"Model may need more training."
+                        )
+                        skip_paf_scoring = True
+                        break
+                if skip_paf_scoring:
+                    break
+        if skip_paf_scoring:
+            # Return empty predictions for each sample
+            device = cms.device
+            n_nodes = cms.shape[1]
+            predicted_instances_adjusted = []
+            predicted_peak_scores = []
+            predicted_instance_scores = []
+            for _ in range(self.batch_size):
+                predicted_instances_adjusted.append(
+                    torch.full((0, n_nodes, 2), float("nan"), device=device)
+                )
+                predicted_peak_scores.append(
+                    torch.full((0, n_nodes), float("nan"), device=device)
+                )
+                predicted_instance_scores.append(torch.tensor([], device=device))
+            edge_inds = [
+                torch.tensor([], dtype=torch.int32, device=device)
+            ] * self.batch_size
+            edge_peak_inds = [
+                torch.tensor([], dtype=torch.int32, device=device).reshape(0, 2)
+            ] * self.batch_size
+            line_scores = [torch.tensor([], device=device)] * self.batch_size
+        else:
+            (
+                predicted_instances,
+                predicted_peak_scores,
+                predicted_instance_scores,
+                edge_inds,
+                edge_peak_inds,
+                line_scores,
+            ) = self.paf_scorer.predict(
+                pafs=pafs,
+                peaks=cms_peaks,
+                peak_vals=cms_peak_vals,
+                peak_channel_inds=cms_peak_channel_inds,
             )
+            predicted_instances = [p / self.input_scale for p in predicted_instances]
+            predicted_instances_adjusted = []
+            for idx, p in enumerate(predicted_instances):
+                predicted_instances_adjusted.append(
+                    p / inputs["eff_scale"][idx].to(p.device)
+                )
         out = {
             "pred_instance_peaks": predicted_instances_adjusted,
             "pred_peak_values": predicted_peak_scores,

sleap_nn/training/callbacks.py CHANGED Viewed

@@ -662,3 +662,277 @@ class ProgressReporterZMQ(Callback):
         return {
             k: float(v.item()) if hasattr(v, "item") else v for k, v in logs.items()
         }
+class EpochEndEvaluationCallback(Callback):
+    """Callback to run full evaluation metrics at end of validation epochs.
+    This callback collects predictions and ground truth during validation,
+    then runs the full evaluation pipeline (OKS, mAP, PCK, etc.) and logs
+    metrics to WandB.
+    Attributes:
+        skeleton: sio.Skeleton for creating instances.
+        videos: List of sio.Video objects.
+        eval_frequency: Run evaluation every N epochs (default: 1).
+        oks_stddev: OKS standard deviation (default: 0.025).
+        oks_scale: Optional OKS scale override.
+        metrics_to_log: List of metric keys to log.
+    """
+    def __init__(
+        self,
+        skeleton: "sio.Skeleton",
+        videos: list,
+        eval_frequency: int = 1,
+        oks_stddev: float = 0.025,
+        oks_scale: Optional[float] = None,
+        metrics_to_log: Optional[list] = None,
+    ):
+        """Initialize the callback.
+        Args:
+            skeleton: sio.Skeleton for creating instances.
+            videos: List of sio.Video objects.
+            eval_frequency: Run evaluation every N epochs (default: 1).
+            oks_stddev: OKS standard deviation (default: 0.025).
+            oks_scale: Optional OKS scale override.
+            metrics_to_log: List of metric keys to log. If None, logs all available.
+        """
+        super().__init__()
+        self.skeleton = skeleton
+        self.videos = videos
+        self.eval_frequency = eval_frequency
+        self.oks_stddev = oks_stddev
+        self.oks_scale = oks_scale
+        self.metrics_to_log = metrics_to_log or [
+            "mOKS",
+            "oks_voc.mAP",
+            "oks_voc.mAR",
+            "avg_distance",
+            "p50_distance",
+            "mPCK",
+            "visibility_precision",
+            "visibility_recall",
+        ]
+    def on_validation_epoch_start(self, trainer, pl_module):
+        """Enable prediction collection at the start of validation.
+        Skip during sanity check to avoid inference issues.
+        """
+        if trainer.sanity_checking:
+            return
+        pl_module._collect_val_predictions = True
+    def on_validation_epoch_end(self, trainer, pl_module):
+        """Run evaluation and log metrics at end of validation epoch."""
+        import sleap_io as sio
+        import numpy as np
+        from lightning.pytorch.loggers import WandbLogger
+        from sleap_nn.evaluation import Evaluator
+        # Check frequency (epoch is 0-indexed, so add 1)
+        if (trainer.current_epoch + 1) % self.eval_frequency != 0:
+            pl_module._collect_val_predictions = False
+            return
+        # Only run on rank 0 for distributed training
+        if not trainer.is_global_zero:
+            pl_module._collect_val_predictions = False
+            return
+        # Check if we have predictions
+        if not pl_module.val_predictions or not pl_module.val_ground_truth:
+            logger.warning("No predictions collected for epoch-end evaluation")
+            pl_module._collect_val_predictions = False
+            return
+        try:
+            # Build sio.Labels from accumulated predictions and ground truth
+            pred_labels = self._build_pred_labels(pl_module.val_predictions, sio, np)
+            gt_labels = self._build_gt_labels(pl_module.val_ground_truth, sio, np)
+            # Check if we have valid frames to evaluate
+            if len(pred_labels) == 0:
+                logger.warning(
+                    "No valid predictions for epoch-end evaluation "
+                    "(all predictions may be empty or NaN)"
+                )
+                pl_module._collect_val_predictions = False
+                pl_module.val_predictions = []
+                pl_module.val_ground_truth = []
+                return
+            # Run evaluation
+            evaluator = Evaluator(
+                ground_truth_instances=gt_labels,
+                predicted_instances=pred_labels,
+                oks_stddev=self.oks_stddev,
+                oks_scale=self.oks_scale,
+                user_labels_only=False,  # All validation frames are "user" frames
+            )
+            metrics = evaluator.evaluate()
+            # Log to WandB
+            self._log_metrics(trainer, metrics, trainer.current_epoch)
+            logger.info(
+                f"Epoch {trainer.current_epoch} evaluation: "
+                f"mOKS={metrics['mOKS']['mOKS']:.4f}, "
+                f"mAP={metrics['voc_metrics']['oks_voc.mAP']:.4f}"
+            )
+        except Exception as e:
+            logger.warning(f"Epoch-end evaluation failed: {e}")
+        # Cleanup
+        pl_module._collect_val_predictions = False
+        pl_module.val_predictions = []
+        pl_module.val_ground_truth = []
+    def _build_pred_labels(self, predictions: list, sio, np) -> "sio.Labels":
+        """Convert prediction dicts to sio.Labels."""
+        labeled_frames = []
+        for pred in predictions:
+            pred_peaks = pred["pred_peaks"]
+            pred_scores = pred["pred_scores"]
+            # Handle NaN/missing predictions
+            if pred_peaks is None or (
+                isinstance(pred_peaks, np.ndarray) and np.isnan(pred_peaks).all()
+            ):
+                continue
+            # Handle multi-instance predictions (bottomup)
+            if len(pred_peaks.shape) == 2:
+                # Single instance: (n_nodes, 2) -> (1, n_nodes, 2)
+                pred_peaks = pred_peaks.reshape(1, -1, 2)
+                pred_scores = pred_scores.reshape(1, -1)
+            instances = []
+            for inst_idx in range(len(pred_peaks)):
+                inst_points = pred_peaks[inst_idx]
+                inst_scores = pred_scores[inst_idx] if pred_scores is not None else None
+                # Skip if all NaN
+                if np.isnan(inst_points).all():
+                    continue
+                inst = sio.PredictedInstance.from_numpy(
+                    points_data=inst_points,
+                    skeleton=self.skeleton,
+                    point_scores=(
+                        inst_scores
+                        if inst_scores is not None
+                        else np.ones(len(inst_points))
+                    ),
+                    score=(
+                        float(np.nanmean(inst_scores))
+                        if inst_scores is not None
+                        else 1.0
+                    ),
+                )
+                instances.append(inst)
+            if instances:
+                lf = sio.LabeledFrame(
+                    video=self.videos[pred["video_idx"]],
+                    frame_idx=pred["frame_idx"],
+                    instances=instances,
+                )
+                labeled_frames.append(lf)
+        return sio.Labels(
+            videos=self.videos,
+            skeletons=[self.skeleton],
+            labeled_frames=labeled_frames,
+        )
+    def _build_gt_labels(self, ground_truth: list, sio, np) -> "sio.Labels":
+        """Convert ground truth dicts to sio.Labels."""
+        labeled_frames = []
+        for gt in ground_truth:
+            instances = []
+            gt_instances = gt["gt_instances"]
+            # Handle shape variations
+            if len(gt_instances.shape) == 2:
+                # (n_nodes, 2) -> (1, n_nodes, 2)
+                gt_instances = gt_instances.reshape(1, -1, 2)
+            for i in range(min(gt["num_instances"], len(gt_instances))):
+                inst_data = gt_instances[i]
+                if np.isnan(inst_data).all():
+                    continue
+                inst = sio.Instance.from_numpy(
+                    points_data=inst_data,
+                    skeleton=self.skeleton,
+                )
+                instances.append(inst)
+            if instances:
+                lf = sio.LabeledFrame(
+                    video=self.videos[gt["video_idx"]],
+                    frame_idx=gt["frame_idx"],
+                    instances=instances,
+                )
+                labeled_frames.append(lf)
+        return sio.Labels(
+            videos=self.videos,
+            skeletons=[self.skeleton],
+            labeled_frames=labeled_frames,
+        )
+    def _log_metrics(self, trainer, metrics: dict, epoch: int):
+        """Log evaluation metrics to WandB."""
+        import numpy as np
+        from lightning.pytorch.loggers import WandbLogger
+        # Get WandB logger
+        wandb_logger = None
+        for log in trainer.loggers:
+            if isinstance(log, WandbLogger):
+                wandb_logger = log
+                break
+        if wandb_logger is None:
+            return
+        log_dict = {"epoch": epoch}
+        # Extract key metrics with consistent naming
+        if "mOKS" in self.metrics_to_log:
+            log_dict["val_mOKS"] = metrics["mOKS"]["mOKS"]
+        if "oks_voc.mAP" in self.metrics_to_log:
+            log_dict["val_oks_voc_mAP"] = metrics["voc_metrics"]["oks_voc.mAP"]
+        if "oks_voc.mAR" in self.metrics_to_log:
+            log_dict["val_oks_voc_mAR"] = metrics["voc_metrics"]["oks_voc.mAR"]
+        if "avg_distance" in self.metrics_to_log:
+            val = metrics["distance_metrics"]["avg"]
+            if not np.isnan(val):
+                log_dict["val_avg_distance"] = val
+        if "p50_distance" in self.metrics_to_log:
+            val = metrics["distance_metrics"]["p50"]
+            if not np.isnan(val):
+                log_dict["val_p50_distance"] = val
+        if "mPCK" in self.metrics_to_log:
+            log_dict["val_mPCK"] = metrics["pck_metrics"]["mPCK"]
+        if "visibility_precision" in self.metrics_to_log:
+            val = metrics["visibility_metrics"]["precision"]
+            if not np.isnan(val):
+                log_dict["val_visibility_precision"] = val
+        if "visibility_recall" in self.metrics_to_log:
+            val = metrics["visibility_metrics"]["recall"]
+            if not np.isnan(val):
+                log_dict["val_visibility_recall"] = val
+        wandb_logger.experiment.log(log_dict, commit=False)

sleap_nn/training/lightning_modules.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """This module has the LightningModule classes for all model types."""
-from typing import Optional, Union, Dict, Any
+from typing import Optional, Union, Dict, Any, List
 import time
 from torch import nn
 import numpy as np
@@ -184,6 +184,11 @@ class LightningModel(L.LightningModule):
         self.val_loss = {}
         self.learning_rate = {}
+        # For epoch-end evaluation
+        self.val_predictions: List[Dict] = []
+        self.val_ground_truth: List[Dict] = []
+        self._collect_val_predictions: bool = False
         # Initialization for encoder and decoder stacks.
         if self.init_weights == "xavier":
             self.model.apply(xavier_init_weights)
@@ -331,6 +336,9 @@ class LightningModel(L.LightningModule):
     def on_validation_epoch_start(self):
         """Configure the val timer at the beginning of each epoch."""
         self.val_start_time = time.time()
+        # Clear accumulated predictions for new epoch
+        self.val_predictions = []
+        self.val_ground_truth = []
     def on_validation_epoch_end(self):
         """Configure the val timer at the end of every epoch."""
@@ -639,6 +647,51 @@ class SingleInstanceLightningModule(LightningModel):
             sync_dist=True,
         )
+        # Collect predictions for epoch-end evaluation if enabled
+        if self._collect_val_predictions:
+            with torch.no_grad():
+                # Squeeze n_samples dim from image for inference (batch, 1, C, H, W) -> (batch, C, H, W)
+                inference_batch = {k: v for k, v in batch.items()}
+                if inference_batch["image"].ndim == 5:
+                    inference_batch["image"] = inference_batch["image"].squeeze(1)
+                inference_output = self.single_instance_inf_layer(inference_batch)
+                if isinstance(inference_output, list):
+                    inference_output = inference_output[0]
+            batch_size = len(batch["frame_idx"])
+            for i in range(batch_size):
+                eff = batch["eff_scale"][i].cpu().numpy()
+                # Predictions are already in original image space (inference divides by eff_scale)
+                pred_peaks = inference_output["pred_instance_peaks"][i].cpu().numpy()
+                pred_scores = inference_output["pred_peak_values"][i].cpu().numpy()
+                # Transform GT from preprocessed to original image space
+                # Note: instances have shape (1, max_inst, n_nodes, 2) - squeeze n_samples dim
+                gt_prep = batch["instances"][i].cpu().numpy()
+                if gt_prep.ndim == 4:
+                    gt_prep = gt_prep.squeeze(0)  # (max_inst, n_nodes, 2)
+                gt_orig = gt_prep / eff
+                num_inst = batch["num_instances"][i].item()
+                gt_orig = gt_orig[:num_inst]  # Only valid instances
+                self.val_predictions.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "pred_peaks": pred_peaks,
+                        "pred_scores": pred_scores,
+                    }
+                )
+                self.val_ground_truth.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "gt_instances": gt_orig,
+                        "num_instances": num_inst,
+                    }
+                )
 class TopDownCenteredInstanceLightningModule(LightningModel):
     """Lightning Module for TopDownCenteredInstance Model.
@@ -856,6 +909,62 @@ class TopDownCenteredInstanceLightningModule(LightningModel):
             sync_dist=True,
         )
+        # Collect predictions for epoch-end evaluation if enabled
+        if self._collect_val_predictions:
+            # SAVE bbox BEFORE inference (it modifies in-place!)
+            bbox_prep_saved = batch["instance_bbox"].clone()
+            with torch.no_grad():
+                inference_output = self.instance_peaks_inf_layer(batch)
+            batch_size = len(batch["frame_idx"])
+            for i in range(batch_size):
+                eff = batch["eff_scale"][i].cpu().numpy()
+                # Predictions from inference (crop-relative, original scale)
+                pred_peaks_crop = (
+                    inference_output["pred_instance_peaks"][i].cpu().numpy()
+                )
+                pred_scores = inference_output["pred_peak_values"][i].cpu().numpy()
+                # Compute bbox offset in original space from SAVED prep bbox
+                # bbox has shape (n_samples=1, 4, 2) where 4 corners
+                bbox_prep = bbox_prep_saved[i].squeeze(0).cpu().numpy()  # (4, 2)
+                bbox_top_left_orig = (
+                    bbox_prep[0] / eff
+                )  # Top-left corner in original space
+                # Full image coordinates (original space)
+                pred_peaks_full = pred_peaks_crop + bbox_top_left_orig
+                # GT transform: crop-relative preprocessed -> full image original
+                gt_crop_prep = (
+                    batch["instance"][i].squeeze(0).cpu().numpy()
+                )  # (n_nodes, 2)
+                gt_crop_orig = gt_crop_prep / eff
+                gt_full_orig = gt_crop_orig + bbox_top_left_orig
+                self.val_predictions.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "pred_peaks": pred_peaks_full.reshape(
+                            1, -1, 2
+                        ),  # (1, n_nodes, 2)
+                        "pred_scores": pred_scores.reshape(1, -1),  # (1, n_nodes)
+                    }
+                )
+                self.val_ground_truth.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "gt_instances": gt_full_orig.reshape(
+                            1, -1, 2
+                        ),  # (1, n_nodes, 2)
+                        "num_instances": 1,
+                    }
+                )
 class CentroidLightningModule(LightningModel):
     """Lightning Module for Centroid Model.
@@ -1034,6 +1143,57 @@ class CentroidLightningModule(LightningModel):
             sync_dist=True,
         )
+        # Collect predictions for epoch-end evaluation if enabled
+        if self._collect_val_predictions:
+            with torch.no_grad():
+                inference_output = self.centroid_inf_layer(batch)
+            batch_size = len(batch["frame_idx"])
+            for i in range(batch_size):
+                eff = batch["eff_scale"][i].cpu().numpy()
+                # Predictions are in original image space (inference divides by eff_scale)
+                # centroids shape: (batch, 1, max_instances, 2) - squeeze to (max_instances, 2)
+                pred_centroids = (
+                    inference_output["centroids"][i].squeeze(0).cpu().numpy()
+                )
+                pred_vals = inference_output["centroid_vals"][i].cpu().numpy()
+                # Transform GT centroids from preprocessed to original image space
+                gt_centroids_prep = (
+                    batch["centroids"][i].cpu().numpy()
+                )  # (n_samples=1, max_inst, 2)
+                gt_centroids_orig = gt_centroids_prep.squeeze(0) / eff  # (max_inst, 2)
+                num_inst = batch["num_instances"][i].item()
+                # Filter to valid instances (non-NaN)
+                valid_pred_mask = ~np.isnan(pred_centroids).any(axis=1)
+                pred_centroids = pred_centroids[valid_pred_mask]
+                pred_vals = pred_vals[valid_pred_mask]
+                gt_centroids_valid = gt_centroids_orig[:num_inst]
+                self.val_predictions.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "pred_peaks": pred_centroids.reshape(
+                            -1, 1, 2
+                        ),  # (n_inst, 1, 2)
+                        "pred_scores": pred_vals.reshape(-1, 1),  # (n_inst, 1)
+                    }
+                )
+                self.val_ground_truth.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "gt_instances": gt_centroids_valid.reshape(
+                            -1, 1, 2
+                        ),  # (n_inst, 1, 2)
+                        "num_instances": num_inst,
+                    }
+                )
 class BottomUpLightningModule(LightningModel):
     """Lightning Module for BottomUp Model.
@@ -1126,12 +1286,13 @@ class BottomUpLightningModule(LightningModel):
         self.bottomup_inf_layer = BottomUpInferenceModel(
             torch_model=self.forward,
             paf_scorer=paf_scorer,
-            peak_threshold=0.2,
+            peak_threshold=0.1,  # Lower threshold for epoch-end eval during training
             input_scale=1.0,
             return_confmaps=True,
             return_pafs=True,
             cms_output_stride=self.head_configs.bottomup.confmaps.output_stride,
             pafs_output_stride=self.head_configs.bottomup.pafs.output_stride,
+            max_peaks_per_node=100,  # Prevents combinatorial explosion in early training
         )
         self.node_names = list(self.head_configs.bottomup.confmaps.part_names)
@@ -1340,6 +1501,53 @@ class BottomUpLightningModule(LightningModel):
             sync_dist=True,
         )
+        # Collect predictions for epoch-end evaluation if enabled
+        if self._collect_val_predictions:
+            with torch.no_grad():
+                # Note: Do NOT squeeze the image here - the forward() method expects
+                # (batch, n_samples, C, H, W) and handles the n_samples squeeze internally
+                inference_output = self.bottomup_inf_layer(batch)
+                if isinstance(inference_output, list):
+                    inference_output = inference_output[0]
+            batch_size = len(batch["frame_idx"])
+            for i in range(batch_size):
+                eff = batch["eff_scale"][i].cpu().numpy()
+                # Predictions are already in original space (variable number of instances)
+                pred_peaks = inference_output["pred_instance_peaks"][i]
+                pred_scores = inference_output["pred_peak_values"][i]
+                if torch.is_tensor(pred_peaks):
+                    pred_peaks = pred_peaks.cpu().numpy()
+                if torch.is_tensor(pred_scores):
+                    pred_scores = pred_scores.cpu().numpy()
+                # Transform GT to original space
+                # Note: instances have shape (1, max_inst, n_nodes, 2) - squeeze n_samples dim
+                gt_prep = batch["instances"][i].cpu().numpy()
+                if gt_prep.ndim == 4:
+                    gt_prep = gt_prep.squeeze(0)  # (max_inst, n_nodes, 2)
+                gt_orig = gt_prep / eff
+                num_inst = batch["num_instances"][i].item()
+                gt_orig = gt_orig[:num_inst]  # Only valid instances
+                self.val_predictions.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "pred_peaks": pred_peaks,  # Original space, variable instances
+                        "pred_scores": pred_scores,
+                    }
+                )
+                self.val_ground_truth.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "gt_instances": gt_orig,  # Original space
+                        "num_instances": num_inst,
+                    }
+                )
 class BottomUpMultiClassLightningModule(LightningModel):
     """Lightning Module for BottomUp ID Model.

sleap_nn/training/model_trainer.py CHANGED Viewed

@@ -61,6 +61,7 @@ from sleap_nn.training.callbacks import (
     WandBVizCallbackWithPAFs,
     CSVLoggerCallback,
     SleapProgressBar,
+    EpochEndEvaluationCallback,
 )
 from sleap_nn import RANK
 from sleap_nn.legacy_models import get_keras_first_layer_channels
@@ -1086,6 +1087,18 @@ class ModelTrainer:
         if self.config.trainer_config.enable_progress_bar:
             callbacks.append(SleapProgressBar())
+        # Add epoch-end evaluation callback if enabled
+        if self.config.trainer_config.eval.enabled:
+            callbacks.append(
+                EpochEndEvaluationCallback(
+                    skeleton=self.skeletons[0],
+                    videos=self.val_labels[0].videos,
+                    eval_frequency=self.config.trainer_config.eval.frequency,
+                    oks_stddev=self.config.trainer_config.eval.oks_stddev,
+                    oks_scale=self.config.trainer_config.eval.oks_scale,
+                )
+            )
         return loggers, callbacks
     def _delete_cache_imgs(self):
@@ -1281,6 +1294,16 @@ class ModelTrainer:
                 wandb.define_metric("train_pafs*", step_metric="epoch")
                 wandb.define_metric("val_pafs*", step_metric="epoch")
+                # Evaluation metrics use epoch as x-axis
+                wandb.define_metric("val_mOKS", step_metric="epoch")
+                wandb.define_metric("val_oks_voc_mAP", step_metric="epoch")
+                wandb.define_metric("val_oks_voc_mAR", step_metric="epoch")
+                wandb.define_metric("val_avg_distance", step_metric="epoch")
+                wandb.define_metric("val_p50_distance", step_metric="epoch")
+                wandb.define_metric("val_mPCK", step_metric="epoch")
+                wandb.define_metric("val_visibility_precision", step_metric="epoch")
+                wandb.define_metric("val_visibility_recall", step_metric="epoch")
                 self.config.trainer_config.wandb.current_run_id = wandb.run.id
                 wandb.config["run_name"] = self.config.trainer_config.wandb.name
                 wandb.config["run_config"] = OmegaConf.to_container(

{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sleap-nn
-Version: 0.1.0a1
+Version: 0.1.0a2
 Summary: Neural network backend for training and inference for animal pose estimation.
 Author-email: Divya Seshadri Murali <dimurali@salk.edu>, Elizabeth Berrigan <eberrigan@salk.edu>, Vincent Tu <vitu@ucsd.edu>, Liezl Maree <lmaree@salk.edu>, David Samy <davidasamy@gmail.com>, Talmo Pereira <talmo@salk.edu>
 License: BSD-3-Clause
@@ -13,7 +13,7 @@ Classifier: Programming Language :: Python :: 3.13
 Requires-Python: <3.14,>=3.11
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: sleap-io<0.7.0,>=0.6.0
+Requires-Dist: sleap-io<0.7.0,>=0.6.2
 Requires-Dist: numpy
 Requires-Dist: lightning
 Requires-Dist: kornia

{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 sleap_nn/.DS_Store,sha256=HY8amA79eHkt7o5VUiNsMxkc9YwW6WIPyZbYRj_JdSU,6148
-sleap_nn/__init__.py,sha256=l5Lwiad8GOurqkAhMwWw8-UcpH6af2TnMURf-oKj_U8,1362
+sleap_nn/__init__.py,sha256=s3sIImYR5tiP-PfftEj7J8P1Au2nRXj4XWowznrVwm8,1362
 sleap_nn/cli.py,sha256=U4hpEcOxK7a92GeItY95E2DRm5P1ME1GqU__mxaDcW0,21167
-sleap_nn/evaluation.py,sha256=3u7y85wFoBgCwOB2xOGTJIDrd2dUPWOo4m0s0oW3da4,31095
+sleap_nn/evaluation.py,sha256=sKwLnHbCcaNzPs7CJtgRmFcDRFwPMjCxB92viZvinVI,33498
 sleap_nn/legacy_models.py,sha256=8aGK30DZv3pW2IKDBEWH1G2mrytjaxPQD4miPUehj0M,20258
 sleap_nn/predict.py,sha256=8QKjRbS-L-6HF1NFJWioBPv3HSzUpFr2oGEB5hRJzQA,35523
 sleap_nn/system_info.py,sha256=7tWe3y6s872nDbrZoHIdSs-w4w46Z4dEV2qCV-Fe7No,14711
@@ -19,7 +19,7 @@ sleap_nn/config/__init__.py,sha256=l0xV1uJsGJfMPfWAqlUR7Ivu4cSCWsP-3Y9ueyPESuk,4
 sleap_nn/config/data_config.py,sha256=5a5YlXm4V9qGvkqgFNy6o0XJ_Q06UFjpYJXmNHfvXEI,24021
 sleap_nn/config/get_config.py,sha256=rjNUffKU9z-ohLwrOVmJNGCqwUM93eh68h4KJfrSy8Y,42396
 sleap_nn/config/model_config.py,sha256=XFIbqFno7IkX0Se5WF_2_7aUalAlC2SvpDe-uP2TttM,57582
-sleap_nn/config/trainer_config.py,sha256=ZMXxns6VYakgYHRhkM541Eje76DdaTdDi4FFPNjJtP4,28413
+sleap_nn/config/trainer_config.py,sha256=Ob2UqU10DXsQOnDb0iJxy0qc82CfP6FkQZQkrCvTEEY,29120
 sleap_nn/config/training_job_config.py,sha256=v12_ME_tBUg8JFwOxJNW4sDQn-SedDhiJOGz-TlRwT0,5861
 sleap_nn/config/utils.py,sha256=GgWgVs7_N7ifsJ5OQG3_EyOagNyN3Dx7wS2BAlkaRkg,5553
 sleap_nn/data/__init__.py,sha256=eMNvFJFa3gv5Rq8oK5wzo6zt1pOlwUGYf8EQii6bq7c,54
@@ -35,7 +35,7 @@ sleap_nn/data/providers.py,sha256=0x6GFP1s1c08ji4p0M5V6p-dhT4Z9c-SI_Aw1DWX-uM,14
 sleap_nn/data/resizing.py,sha256=YFpSQduIBkRK39FYmrqDL-v8zMySlEs6TJxh6zb_0ZU,5076
 sleap_nn/data/utils.py,sha256=rT0w7KMOTlzaeKWq1TqjbgC4Lvjz_G96McllvEOqXx8,5641
 sleap_nn/inference/__init__.py,sha256=eVkCmKrxHlDFJIlZTf8B5XEOcSyw-gPQymXMY5uShOM,170
-sleap_nn/inference/bottomup.py,sha256=NqN-G8TzAOsvCoL3bttEjA1iGsuveLOnOCXIUeFCdSA,13684
+sleap_nn/inference/bottomup.py,sha256=3s90aRlpIcRnSNe-R5-qiuX3S48kCWMpCl8YuNnTEDI,17084
 sleap_nn/inference/identity.py,sha256=GjNDL9MfGqNyQaK4AE8JQCAE8gpMuE_Y-3r3Gpa53CE,6540
 sleap_nn/inference/paf_grouping.py,sha256=7Fo9lCAj-zcHgv5rI5LIMYGcixCGNt_ZbSNs8Dik7l8,69973
 sleap_nn/inference/peak_finding.py,sha256=L9LdYKt_Bfw7cxo6xEpgF8wXcZAwq5plCfmKJ839N40,13014
@@ -52,14 +52,14 @@ sleap_nn/tracking/candidates/__init__.py,sha256=1O7NObIwshM7j1rLHmImbFphvkM9wY1j
 sleap_nn/tracking/candidates/fixed_window.py,sha256=D80KMlTnenuQveQVVhk9j0G8yx6K324C7nMLHgG76e0,6296
 sleap_nn/tracking/candidates/local_queues.py,sha256=Nx3R5wwEwq0gbfH-fi3oOumfkQo8_sYe5GN47pD9Be8,7305
 sleap_nn/training/__init__.py,sha256=vNTKsIJPZHJwFSKn5PmjiiRJunR_9e7y4_v0S6rdF8U,32
-sleap_nn/training/callbacks.py,sha256=TVnQ6plNC2MnlTiY2rSCRuw2WRk5cQSziek_VPUcOEg,25994
-sleap_nn/training/lightning_modules.py,sha256=G3c4xJkYWW-iSRawzkgTqkGd4lTsbPiMTcB5Nvq7jes,85512
+sleap_nn/training/callbacks.py,sha256=ZO88NFGZi53Wn4qM6yp3Bk3HFmhkYSGqeMc1QJKirLo,35995
+sleap_nn/training/lightning_modules.py,sha256=slkVtQ7r6LatWLYzxcq6x1RALYNyHTRcqiXXwD-x0PA,95420
 sleap_nn/training/losses.py,sha256=gbdinUURh4QUzjmNd2UJpt4FXwecqKy9gHr65JZ1bZk,1632
-sleap_nn/training/model_trainer.py,sha256=loCmEX0DfBtdV_pN-W8s31fn2_L-lbpWaq3OQXeSp-0,59337
+sleap_nn/training/model_trainer.py,sha256=mf6FOdGDal2mMP0F1xD9jVQ54wbUST0ovRt6OjXzVyg,60580
 sleap_nn/training/utils.py,sha256=ivdkZEI0DkTCm6NPszsaDOh9jSfozkONZdl6TvvQUWI,20398
-sleap_nn-0.1.0a1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-sleap_nn-0.1.0a1.dist-info/METADATA,sha256=h3d4WPIu_JunY32jaRqJ4-fXp4KruTWT57FWb3L6dps,5637
-sleap_nn-0.1.0a1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-sleap_nn-0.1.0a1.dist-info/entry_points.txt,sha256=zfl5Y3hidZxWBvo8qXvu5piJAXJ_l6v7xVFm0gNiUoI,46
-sleap_nn-0.1.0a1.dist-info/top_level.txt,sha256=Kz68iQ55K75LWgSeqz4V4SCMGeFFYH-KGBOyhQh3xZE,9
-sleap_nn-0.1.0a1.dist-info/RECORD,,
+sleap_nn-0.1.0a2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+sleap_nn-0.1.0a2.dist-info/METADATA,sha256=w0dUxvJerGIpu4hlYgGbimjCAooCcf_4NcAzo8T5Sos,5637
+sleap_nn-0.1.0a2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+sleap_nn-0.1.0a2.dist-info/entry_points.txt,sha256=zfl5Y3hidZxWBvo8qXvu5piJAXJ_l6v7xVFm0gNiUoI,46
+sleap_nn-0.1.0a2.dist-info/top_level.txt,sha256=Kz68iQ55K75LWgSeqz4V4SCMGeFFYH-KGBOyhQh3xZE,9
+sleap_nn-0.1.0a2.dist-info/RECORD,,

{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/WHEEL RENAMED Viewed

File without changes

{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a2.dist-info}/top_level.txt RENAMED Viewed

File without changes

sleap-nn 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl

sleap-nn 0.1.0a1py3-none-any.whl → 0.1.0a2py3-none-any.whl