PyPI - sleap-nn - Versions diffs - 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl - Mend

sleap-nn 0.1.0a1py3-none-any.whl → 0.1.0a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

sleap_nn/__init__.py +1 -1
sleap_nn/cli.py +36 -0
sleap_nn/config/trainer_config.py +18 -0
sleap_nn/evaluation.py +81 -22
sleap_nn/export/__init__.py +21 -0
sleap_nn/export/cli.py +1778 -0
sleap_nn/export/exporters/__init__.py +51 -0
sleap_nn/export/exporters/onnx_exporter.py +80 -0
sleap_nn/export/exporters/tensorrt_exporter.py +291 -0
sleap_nn/export/metadata.py +225 -0
sleap_nn/export/predictors/__init__.py +63 -0
sleap_nn/export/predictors/base.py +22 -0
sleap_nn/export/predictors/onnx.py +154 -0
sleap_nn/export/predictors/tensorrt.py +312 -0
sleap_nn/export/utils.py +307 -0
sleap_nn/export/wrappers/__init__.py +25 -0
sleap_nn/export/wrappers/base.py +96 -0
sleap_nn/export/wrappers/bottomup.py +243 -0
sleap_nn/export/wrappers/bottomup_multiclass.py +195 -0
sleap_nn/export/wrappers/centered_instance.py +56 -0
sleap_nn/export/wrappers/centroid.py +58 -0
sleap_nn/export/wrappers/single_instance.py +83 -0
sleap_nn/export/wrappers/topdown.py +180 -0
sleap_nn/export/wrappers/topdown_multiclass.py +304 -0
sleap_nn/inference/bottomup.py +86 -20
sleap_nn/inference/postprocessing.py +284 -0
sleap_nn/predict.py +29 -0
sleap_nn/train.py +64 -0
sleap_nn/training/callbacks.py +324 -8
sleap_nn/training/lightning_modules.py +542 -32
sleap_nn/training/model_trainer.py +48 -57
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/METADATA +13 -2
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/RECORD +37 -16
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/WHEEL +0 -0
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.1.0a1.dist-info → sleap_nn-0.1.0a3.dist-info}/top_level.txt +0 -0

sleap_nn/training/lightning_modules.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """This module has the LightningModule classes for all model types."""
-from typing import Optional, Union, Dict, Any
+from typing import Optional, Union, Dict, Any, List
 import time
 from torch import nn
 import numpy as np
@@ -184,6 +184,15 @@ class LightningModel(L.LightningModule):
         self.val_loss = {}
         self.learning_rate = {}
+        # For epoch-averaged loss tracking
+        self._epoch_loss_sum = 0.0
+        self._epoch_loss_count = 0
+        # For epoch-end evaluation
+        self.val_predictions: List[Dict] = []
+        self.val_ground_truth: List[Dict] = []
+        self._collect_val_predictions: bool = False
         # Initialization for encoder and decoder stacks.
         if self.init_weights == "xavier":
             self.model.apply(xavier_init_weights)
@@ -305,12 +314,20 @@ class LightningModel(L.LightningModule):
     def on_train_epoch_start(self):
         """Configure the train timer at the beginning of each epoch."""
         self.train_start_time = time.time()
+        # Reset epoch loss tracking
+        self._epoch_loss_sum = 0.0
+        self._epoch_loss_count = 0
+    def _accumulate_loss(self, loss: torch.Tensor):
+        """Accumulate loss for epoch-averaged logging. Call this in training_step."""
+        self._epoch_loss_sum += loss.detach().item()
+        self._epoch_loss_count += 1
     def on_train_epoch_end(self):
         """Configure the train timer at the end of every epoch."""
         train_time = time.time() - self.train_start_time
         self.log(
-            "train_time",
+            "train/time",
             train_time,
             prog_bar=False,
             on_step=False,
@@ -327,16 +344,43 @@ class LightningModel(L.LightningModule):
             logger=True,
             sync_dist=True,
         )
+        # Log epoch-averaged training loss
+        if self._epoch_loss_count > 0:
+            avg_loss = self._epoch_loss_sum / self._epoch_loss_count
+            self.log(
+                "train/loss",
+                avg_loss,
+                prog_bar=False,
+                on_step=False,
+                on_epoch=True,
+                logger=True,
+                sync_dist=True,
+            )
+        # Log current learning rate (useful for monitoring LR schedulers)
+        if self.trainer.optimizers:
+            lr = self.trainer.optimizers[0].param_groups[0]["lr"]
+            self.log(
+                "train/lr",
+                lr,
+                prog_bar=False,
+                on_step=False,
+                on_epoch=True,
+                logger=True,
+                sync_dist=True,
+            )
     def on_validation_epoch_start(self):
         """Configure the val timer at the beginning of each epoch."""
         self.val_start_time = time.time()
+        # Clear accumulated predictions for new epoch
+        self.val_predictions = []
+        self.val_ground_truth = []
     def on_validation_epoch_end(self):
         """Configure the val timer at the end of every epoch."""
         val_time = time.time() - self.val_start_time
         self.log(
-            "val_time",
+            "val/time",
             val_time,
             prog_bar=False,
             on_step=False,
@@ -344,6 +388,16 @@ class LightningModel(L.LightningModule):
             logger=True,
             sync_dist=True,
         )
+        # Log epoch explicitly so val/* metrics can use it as x-axis in wandb
+        # (mirrors what on_train_epoch_end does for train/* metrics)
+        self.log(
+            "epoch",
+            float(self.current_epoch),
+            on_step=False,
+            on_epoch=True,
+            logger=True,
+            sync_dist=True,
+        )
     def training_step(self, batch, batch_idx):
         """Training step."""
@@ -412,7 +466,7 @@ class LightningModel(L.LightningModule):
             "optimizer": optimizer,
             "lr_scheduler": {
                 "scheduler": scheduler,
-                "monitor": "val_loss",
+                "monitor": "val/loss",
             },
         }
@@ -591,7 +645,7 @@ class SingleInstanceLightningModule(LightningModel):
             channel_wise_loss = torch.sum(mse, dim=(0, 2, 3)) / (batch_size * h * w)
             for node_idx, name in enumerate(self.node_names):
                 self.log(
-                    f"{name}",
+                    f"train/confmaps/{name}",
                     channel_wise_loss[node_idx],
                     prog_bar=False,
                     on_step=False,
@@ -599,8 +653,9 @@ class SingleInstanceLightningModule(LightningModel):
                     logger=True,
                     sync_dist=True,
                 )
+        # Log step-level loss (every batch, uses global_step x-axis)
         self.log(
-            "train_loss",
+            "loss",
             loss,
             prog_bar=True,
             on_step=True,
@@ -608,6 +663,8 @@ class SingleInstanceLightningModule(LightningModel):
             logger=True,
             sync_dist=True,
         )
+        # Accumulate for epoch-averaged loss (logged in on_train_epoch_end)
+        self._accumulate_loss(loss)
         return loss
     def validation_step(self, batch, batch_idx):
@@ -630,7 +687,7 @@ class SingleInstanceLightningModule(LightningModel):
             )
             val_loss = val_loss + ohkm_loss
         self.log(
-            "val_loss",
+            "val/loss",
             val_loss,
             prog_bar=True,
             on_step=False,
@@ -639,6 +696,51 @@ class SingleInstanceLightningModule(LightningModel):
             sync_dist=True,
         )
+        # Collect predictions for epoch-end evaluation if enabled
+        if self._collect_val_predictions:
+            with torch.no_grad():
+                # Squeeze n_samples dim from image for inference (batch, 1, C, H, W) -> (batch, C, H, W)
+                inference_batch = {k: v for k, v in batch.items()}
+                if inference_batch["image"].ndim == 5:
+                    inference_batch["image"] = inference_batch["image"].squeeze(1)
+                inference_output = self.single_instance_inf_layer(inference_batch)
+                if isinstance(inference_output, list):
+                    inference_output = inference_output[0]
+            batch_size = len(batch["frame_idx"])
+            for i in range(batch_size):
+                eff = batch["eff_scale"][i].cpu().numpy()
+                # Predictions are already in original image space (inference divides by eff_scale)
+                pred_peaks = inference_output["pred_instance_peaks"][i].cpu().numpy()
+                pred_scores = inference_output["pred_peak_values"][i].cpu().numpy()
+                # Transform GT from preprocessed to original image space
+                # Note: instances have shape (1, max_inst, n_nodes, 2) - squeeze n_samples dim
+                gt_prep = batch["instances"][i].cpu().numpy()
+                if gt_prep.ndim == 4:
+                    gt_prep = gt_prep.squeeze(0)  # (max_inst, n_nodes, 2)
+                gt_orig = gt_prep / eff
+                num_inst = batch["num_instances"][i].item()
+                gt_orig = gt_orig[:num_inst]  # Only valid instances
+                self.val_predictions.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "pred_peaks": pred_peaks,
+                        "pred_scores": pred_scores,
+                    }
+                )
+                self.val_ground_truth.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "gt_instances": gt_orig,
+                        "num_instances": num_inst,
+                    }
+                )
 class TopDownCenteredInstanceLightningModule(LightningModel):
     """Lightning Module for TopDownCenteredInstance Model.
@@ -807,7 +909,7 @@ class TopDownCenteredInstanceLightningModule(LightningModel):
             channel_wise_loss = torch.sum(mse, dim=(0, 2, 3)) / (batch_size * h * w)
             for node_idx, name in enumerate(self.node_names):
                 self.log(
-                    f"{name}",
+                    f"train/confmaps/{name}",
                     channel_wise_loss[node_idx],
                     prog_bar=False,
                     on_step=False,
@@ -816,8 +918,9 @@ class TopDownCenteredInstanceLightningModule(LightningModel):
                     sync_dist=True,
                 )
+        # Log step-level loss (every batch, uses global_step x-axis)
         self.log(
-            "train_loss",
+            "loss",
             loss,
             prog_bar=True,
             on_step=True,
@@ -825,6 +928,8 @@ class TopDownCenteredInstanceLightningModule(LightningModel):
             logger=True,
             sync_dist=True,
         )
+        # Accumulate for epoch-averaged loss (logged in on_train_epoch_end)
+        self._accumulate_loss(loss)
         return loss
     def validation_step(self, batch, batch_idx):
@@ -847,7 +952,7 @@ class TopDownCenteredInstanceLightningModule(LightningModel):
             )
             val_loss = val_loss + ohkm_loss
         self.log(
-            "val_loss",
+            "val/loss",
             val_loss,
             prog_bar=True,
             on_step=False,
@@ -856,6 +961,62 @@ class TopDownCenteredInstanceLightningModule(LightningModel):
             sync_dist=True,
         )
+        # Collect predictions for epoch-end evaluation if enabled
+        if self._collect_val_predictions:
+            # SAVE bbox BEFORE inference (it modifies in-place!)
+            bbox_prep_saved = batch["instance_bbox"].clone()
+            with torch.no_grad():
+                inference_output = self.instance_peaks_inf_layer(batch)
+            batch_size = len(batch["frame_idx"])
+            for i in range(batch_size):
+                eff = batch["eff_scale"][i].cpu().numpy()
+                # Predictions from inference (crop-relative, original scale)
+                pred_peaks_crop = (
+                    inference_output["pred_instance_peaks"][i].cpu().numpy()
+                )
+                pred_scores = inference_output["pred_peak_values"][i].cpu().numpy()
+                # Compute bbox offset in original space from SAVED prep bbox
+                # bbox has shape (n_samples=1, 4, 2) where 4 corners
+                bbox_prep = bbox_prep_saved[i].squeeze(0).cpu().numpy()  # (4, 2)
+                bbox_top_left_orig = (
+                    bbox_prep[0] / eff
+                )  # Top-left corner in original space
+                # Full image coordinates (original space)
+                pred_peaks_full = pred_peaks_crop + bbox_top_left_orig
+                # GT transform: crop-relative preprocessed -> full image original
+                gt_crop_prep = (
+                    batch["instance"][i].squeeze(0).cpu().numpy()
+                )  # (n_nodes, 2)
+                gt_crop_orig = gt_crop_prep / eff
+                gt_full_orig = gt_crop_orig + bbox_top_left_orig
+                self.val_predictions.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "pred_peaks": pred_peaks_full.reshape(
+                            1, -1, 2
+                        ),  # (1, n_nodes, 2)
+                        "pred_scores": pred_scores.reshape(1, -1),  # (1, n_nodes)
+                    }
+                )
+                self.val_ground_truth.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "gt_instances": gt_full_orig.reshape(
+                            1, -1, 2
+                        ),  # (1, n_nodes, 2)
+                        "num_instances": 1,
+                    }
+                )
 class CentroidLightningModule(LightningModel):
     """Lightning Module for Centroid Model.
@@ -1004,8 +1165,9 @@ class CentroidLightningModule(LightningModel):
         y_preds = self.model(X)["CentroidConfmapsHead"]
         loss = nn.MSELoss()(y_preds, y)
+        # Log step-level loss (every batch, uses global_step x-axis)
         self.log(
-            "train_loss",
+            "loss",
             loss,
             prog_bar=True,
             on_step=True,
@@ -1013,6 +1175,8 @@ class CentroidLightningModule(LightningModel):
             logger=True,
             sync_dist=True,
         )
+        # Accumulate for epoch-averaged loss (logged in on_train_epoch_end)
+        self._accumulate_loss(loss)
         return loss
     def validation_step(self, batch, batch_idx):
@@ -1025,7 +1189,7 @@ class CentroidLightningModule(LightningModel):
         y_preds = self.model(X)["CentroidConfmapsHead"]
         val_loss = nn.MSELoss()(y_preds, y)
         self.log(
-            "val_loss",
+            "val/loss",
             val_loss,
             prog_bar=True,
             on_step=False,
@@ -1034,6 +1198,57 @@ class CentroidLightningModule(LightningModel):
             sync_dist=True,
         )
+        # Collect predictions for epoch-end evaluation if enabled
+        if self._collect_val_predictions:
+            with torch.no_grad():
+                inference_output = self.centroid_inf_layer(batch)
+            batch_size = len(batch["frame_idx"])
+            for i in range(batch_size):
+                eff = batch["eff_scale"][i].cpu().numpy()
+                # Predictions are in original image space (inference divides by eff_scale)
+                # centroids shape: (batch, 1, max_instances, 2) - squeeze to (max_instances, 2)
+                pred_centroids = (
+                    inference_output["centroids"][i].squeeze(0).cpu().numpy()
+                )
+                pred_vals = inference_output["centroid_vals"][i].cpu().numpy()
+                # Transform GT centroids from preprocessed to original image space
+                gt_centroids_prep = (
+                    batch["centroids"][i].cpu().numpy()
+                )  # (n_samples=1, max_inst, 2)
+                gt_centroids_orig = gt_centroids_prep.squeeze(0) / eff  # (max_inst, 2)
+                num_inst = batch["num_instances"][i].item()
+                # Filter to valid instances (non-NaN)
+                valid_pred_mask = ~np.isnan(pred_centroids).any(axis=1)
+                pred_centroids = pred_centroids[valid_pred_mask]
+                pred_vals = pred_vals[valid_pred_mask]
+                gt_centroids_valid = gt_centroids_orig[:num_inst]
+                self.val_predictions.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "pred_peaks": pred_centroids.reshape(
+                            -1, 1, 2
+                        ),  # (n_inst, 1, 2)
+                        "pred_scores": pred_vals.reshape(-1, 1),  # (n_inst, 1)
+                    }
+                )
+                self.val_ground_truth.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "gt_instances": gt_centroids_valid.reshape(
+                            -1, 1, 2
+                        ),  # (n_inst, 1, 2)
+                        "num_instances": num_inst,
+                    }
+                )
 class BottomUpLightningModule(LightningModel):
     """Lightning Module for BottomUp Model.
@@ -1126,12 +1341,13 @@ class BottomUpLightningModule(LightningModel):
         self.bottomup_inf_layer = BottomUpInferenceModel(
             torch_model=self.forward,
             paf_scorer=paf_scorer,
-            peak_threshold=0.2,
+            peak_threshold=0.1,  # Lower threshold for epoch-end eval during training
             input_scale=1.0,
             return_confmaps=True,
             return_pafs=True,
             cms_output_stride=self.head_configs.bottomup.confmaps.output_stride,
             pafs_output_stride=self.head_configs.bottomup.pafs.output_stride,
+            max_peaks_per_node=100,  # Prevents combinatorial explosion in early training
         )
         self.node_names = list(self.head_configs.bottomup.confmaps.part_names)
@@ -1248,8 +1464,9 @@ class BottomUpLightningModule(LightningModel):
             "PartAffinityFieldsHead": pafs_loss,
         }
         loss = sum([s * losses[t] for s, t in zip(self.loss_weights, losses)])
+        # Log step-level loss (every batch, uses global_step x-axis)
         self.log(
-            "train_loss",
+            "loss",
             loss,
             prog_bar=True,
             on_step=True,
@@ -1257,8 +1474,10 @@ class BottomUpLightningModule(LightningModel):
             logger=True,
             sync_dist=True,
         )
+        # Accumulate for epoch-averaged loss (logged in on_train_epoch_end)
+        self._accumulate_loss(loss)
         self.log(
-            "train_confmap_loss",
+            "train/confmaps_loss",
             confmap_loss,
             on_step=False,
             on_epoch=True,
@@ -1266,7 +1485,7 @@ class BottomUpLightningModule(LightningModel):
             sync_dist=True,
         )
         self.log(
-            "train_paf_loss",
+            "train/paf_loss",
             pafs_loss,
             on_step=False,
             on_epoch=True,
@@ -1315,7 +1534,7 @@ class BottomUpLightningModule(LightningModel):
         val_loss = sum([s * losses[t] for s, t in zip(self.loss_weights, losses)])
         self.log(
-            "val_loss",
+            "val/loss",
             val_loss,
             prog_bar=True,
             on_step=False,
@@ -1324,7 +1543,7 @@ class BottomUpLightningModule(LightningModel):
             sync_dist=True,
         )
         self.log(
-            "val_confmap_loss",
+            "val/confmaps_loss",
             confmap_loss,
             on_step=False,
             on_epoch=True,
@@ -1332,7 +1551,7 @@ class BottomUpLightningModule(LightningModel):
             sync_dist=True,
         )
         self.log(
-            "val_paf_loss",
+            "val/paf_loss",
             pafs_loss,
             on_step=False,
             on_epoch=True,
@@ -1340,6 +1559,53 @@ class BottomUpLightningModule(LightningModel):
             sync_dist=True,
         )
+        # Collect predictions for epoch-end evaluation if enabled
+        if self._collect_val_predictions:
+            with torch.no_grad():
+                # Note: Do NOT squeeze the image here - the forward() method expects
+                # (batch, n_samples, C, H, W) and handles the n_samples squeeze internally
+                inference_output = self.bottomup_inf_layer(batch)
+                if isinstance(inference_output, list):
+                    inference_output = inference_output[0]
+            batch_size = len(batch["frame_idx"])
+            for i in range(batch_size):
+                eff = batch["eff_scale"][i].cpu().numpy()
+                # Predictions are already in original space (variable number of instances)
+                pred_peaks = inference_output["pred_instance_peaks"][i]
+                pred_scores = inference_output["pred_peak_values"][i]
+                if torch.is_tensor(pred_peaks):
+                    pred_peaks = pred_peaks.cpu().numpy()
+                if torch.is_tensor(pred_scores):
+                    pred_scores = pred_scores.cpu().numpy()
+                # Transform GT to original space
+                # Note: instances have shape (1, max_inst, n_nodes, 2) - squeeze n_samples dim
+                gt_prep = batch["instances"][i].cpu().numpy()
+                if gt_prep.ndim == 4:
+                    gt_prep = gt_prep.squeeze(0)  # (max_inst, n_nodes, 2)
+                gt_orig = gt_prep / eff
+                num_inst = batch["num_instances"][i].item()
+                gt_orig = gt_orig[:num_inst]  # Only valid instances
+                self.val_predictions.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "pred_peaks": pred_peaks,  # Original space, variable instances
+                        "pred_scores": pred_scores,
+                    }
+                )
+                self.val_ground_truth.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "gt_instances": gt_orig,  # Original space
+                        "num_instances": num_inst,
+                    }
+                )
 class BottomUpMultiClassLightningModule(LightningModel):
     """Lightning Module for BottomUp ID Model.
@@ -1541,8 +1807,9 @@ class BottomUpMultiClassLightningModule(LightningModel):
             "ClassMapsHead": classmaps_loss,
         }
         loss = sum([s * losses[t] for s, t in zip(self.loss_weights, losses)])
+        # Log step-level loss (every batch, uses global_step x-axis)
         self.log(
-            "train_loss",
+            "loss",
             loss,
             prog_bar=True,
             on_step=True,
@@ -1550,8 +1817,10 @@ class BottomUpMultiClassLightningModule(LightningModel):
             logger=True,
             sync_dist=True,
         )
+        # Accumulate for epoch-averaged loss (logged in on_train_epoch_end)
+        self._accumulate_loss(loss)
         self.log(
-            "train_confmap_loss",
+            "train/confmaps_loss",
             confmap_loss,
             on_step=False,
             on_epoch=True,
@@ -1559,13 +1828,67 @@ class BottomUpMultiClassLightningModule(LightningModel):
             sync_dist=True,
         )
         self.log(
-            "train_classmap_loss",
+            "train/classmap_loss",
             classmaps_loss,
             on_step=False,
             on_epoch=True,
             logger=True,
             sync_dist=True,
         )
+        # Compute classification accuracy at GT keypoint locations
+        with torch.no_grad():
+            # Get output stride for class maps
+            cms_stride = self.head_configs.multi_class_bottomup.class_maps.output_stride
+            # Get GT instances and sample class maps at those locations
+            instances = batch["instances"]  # (batch, n_samples, max_inst, n_nodes, 2)
+            if instances.dim() == 5:
+                instances = instances.squeeze(1)  # (batch, max_inst, n_nodes, 2)
+            num_instances = batch["num_instances"]  # (batch,)
+            correct = 0
+            total = 0
+            for b in range(instances.shape[0]):
+                n_inst = num_instances[b].item()
+                for inst_idx in range(n_inst):
+                    for node_idx in range(instances.shape[2]):
+                        # Get keypoint location (in input image space)
+                        kp = instances[b, inst_idx, node_idx]  # (2,) = (x, y)
+                        if torch.isnan(kp).any():
+                            continue
+                        # Convert to class map space
+                        x_cm = (
+                            (kp[0] / cms_stride)
+                            .long()
+                            .clamp(0, classmaps.shape[-1] - 1)
+                        )
+                        y_cm = (
+                            (kp[1] / cms_stride)
+                            .long()
+                            .clamp(0, classmaps.shape[-2] - 1)
+                        )
+                        # Sample predicted and GT class at this location
+                        pred_class = classmaps[b, :, y_cm, x_cm].argmax()
+                        gt_class = y_classmap[b, :, y_cm, x_cm].argmax()
+                        if pred_class == gt_class:
+                            correct += 1
+                        total += 1
+            if total > 0:
+                class_accuracy = torch.tensor(correct / total, device=X.device)
+                self.log(
+                    "train/class_accuracy",
+                    class_accuracy,
+                    on_step=False,
+                    on_epoch=True,
+                    logger=True,
+                    sync_dist=True,
+                )
         return loss
     def validation_step(self, batch, batch_idx):
@@ -1599,7 +1922,7 @@ class BottomUpMultiClassLightningModule(LightningModel):
         val_loss = sum([s * losses[t] for s, t in zip(self.loss_weights, losses)])
         self.log(
-            "val_loss",
+            "val/loss",
             val_loss,
             prog_bar=True,
             on_step=False,
@@ -1608,7 +1931,7 @@ class BottomUpMultiClassLightningModule(LightningModel):
             sync_dist=True,
         )
         self.log(
-            "val_confmap_loss",
+            "val/confmaps_loss",
             confmap_loss,
             on_step=False,
             on_epoch=True,
@@ -1616,7 +1939,7 @@ class BottomUpMultiClassLightningModule(LightningModel):
             sync_dist=True,
         )
         self.log(
-            "val_classmap_loss",
+            "val/classmap_loss",
             classmaps_loss,
             on_step=False,
             on_epoch=True,
@@ -1624,6 +1947,106 @@ class BottomUpMultiClassLightningModule(LightningModel):
             sync_dist=True,
         )
+        # Compute classification accuracy at GT keypoint locations
+        with torch.no_grad():
+            # Get output stride for class maps
+            cms_stride = self.head_configs.multi_class_bottomup.class_maps.output_stride
+            # Get GT instances and sample class maps at those locations
+            instances = batch["instances"]  # (batch, n_samples, max_inst, n_nodes, 2)
+            if instances.dim() == 5:
+                instances = instances.squeeze(1)  # (batch, max_inst, n_nodes, 2)
+            num_instances = batch["num_instances"]  # (batch,)
+            correct = 0
+            total = 0
+            for b in range(instances.shape[0]):
+                n_inst = num_instances[b].item()
+                for inst_idx in range(n_inst):
+                    for node_idx in range(instances.shape[2]):
+                        # Get keypoint location (in input image space)
+                        kp = instances[b, inst_idx, node_idx]  # (2,) = (x, y)
+                        if torch.isnan(kp).any():
+                            continue
+                        # Convert to class map space
+                        x_cm = (
+                            (kp[0] / cms_stride)
+                            .long()
+                            .clamp(0, classmaps.shape[-1] - 1)
+                        )
+                        y_cm = (
+                            (kp[1] / cms_stride)
+                            .long()
+                            .clamp(0, classmaps.shape[-2] - 1)
+                        )
+                        # Sample predicted and GT class at this location
+                        pred_class = classmaps[b, :, y_cm, x_cm].argmax()
+                        gt_class = y_classmap[b, :, y_cm, x_cm].argmax()
+                        if pred_class == gt_class:
+                            correct += 1
+                        total += 1
+            if total > 0:
+                class_accuracy = torch.tensor(correct / total, device=X.device)
+                self.log(
+                    "val/class_accuracy",
+                    class_accuracy,
+                    on_step=False,
+                    on_epoch=True,
+                    logger=True,
+                    sync_dist=True,
+                )
+        # Collect predictions for epoch-end evaluation if enabled
+        if self._collect_val_predictions:
+            with torch.no_grad():
+                # Note: Do NOT squeeze the image here - the forward() method expects
+                # (batch, n_samples, C, H, W) and handles the n_samples squeeze internally
+                inference_output = self.bottomup_inf_layer(batch)
+                if isinstance(inference_output, list):
+                    inference_output = inference_output[0]
+            batch_size = len(batch["frame_idx"])
+            for i in range(batch_size):
+                eff = batch["eff_scale"][i].cpu().numpy()
+                # Predictions are already in original space (variable number of instances)
+                pred_peaks = inference_output["pred_instance_peaks"][i]
+                pred_scores = inference_output["pred_peak_values"][i]
+                if torch.is_tensor(pred_peaks):
+                    pred_peaks = pred_peaks.cpu().numpy()
+                if torch.is_tensor(pred_scores):
+                    pred_scores = pred_scores.cpu().numpy()
+                # Transform GT to original space
+                # Note: instances have shape (1, max_inst, n_nodes, 2) - squeeze n_samples dim
+                gt_prep = batch["instances"][i].cpu().numpy()
+                if gt_prep.ndim == 4:
+                    gt_prep = gt_prep.squeeze(0)  # (max_inst, n_nodes, 2)
+                gt_orig = gt_prep / eff
+                num_inst = batch["num_instances"][i].item()
+                gt_orig = gt_orig[:num_inst]  # Only valid instances
+                self.val_predictions.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "pred_peaks": pred_peaks,  # Original space, variable instances
+                        "pred_scores": pred_scores,
+                    }
+                )
+                self.val_ground_truth.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "gt_instances": gt_orig,  # Original space
+                        "num_instances": num_inst,
+                    }
+                )
 class TopDownCenteredInstanceMultiClassLightningModule(LightningModel):
     """Lightning Module for TopDownCenteredInstance ID Model.
@@ -1803,7 +2226,7 @@ class TopDownCenteredInstanceMultiClassLightningModule(LightningModel):
             channel_wise_loss = torch.sum(mse, dim=(0, 2, 3)) / (batch_size * h * w)
             for node_idx, name in enumerate(self.node_names):
                 self.log(
-                    f"{name}",
+                    f"train/confmaps/{name}",
                     channel_wise_loss[node_idx],
                     prog_bar=False,
                     on_step=False,
@@ -1812,8 +2235,9 @@ class TopDownCenteredInstanceMultiClassLightningModule(LightningModel):
                     sync_dist=True,
                 )
+        # Log step-level loss (every batch, uses global_step x-axis)
         self.log(
-            "train_loss",
+            "loss",
             loss,
             prog_bar=True,
             on_step=True,
@@ -1821,8 +2245,10 @@ class TopDownCenteredInstanceMultiClassLightningModule(LightningModel):
             logger=True,
             sync_dist=True,
         )
+        # Accumulate for epoch-averaged loss (logged in on_train_epoch_end)
+        self._accumulate_loss(loss)
         self.log(
-            "train_confmap_loss",
+            "train/confmaps_loss",
             confmap_loss,
             on_step=False,
             on_epoch=True,
@@ -1830,13 +2256,27 @@ class TopDownCenteredInstanceMultiClassLightningModule(LightningModel):
             sync_dist=True,
         )
         self.log(
-            "train_classvector_loss",
+            "train/classvector_loss",
             classvector_loss,
             on_step=False,
             on_epoch=True,
             logger=True,
             sync_dist=True,
         )
+        # Compute classification accuracy
+        with torch.no_grad():
+            pred_classes = torch.argmax(classvector, dim=1)
+            gt_classes = torch.argmax(y_classvector, dim=1)
+            class_accuracy = (pred_classes == gt_classes).float().mean()
+        self.log(
+            "train/class_accuracy",
+            class_accuracy,
+            on_step=False,
+            on_epoch=True,
+            logger=True,
+            sync_dist=True,
+        )
         return loss
     def validation_step(self, batch, batch_idx):
@@ -1868,7 +2308,7 @@ class TopDownCenteredInstanceMultiClassLightningModule(LightningModel):
         }
         val_loss = sum([s * losses[t] for s, t in zip(self.loss_weights, losses)])
         self.log(
-            "val_loss",
+            "val/loss",
             val_loss,
             prog_bar=True,
             on_step=False,
@@ -1877,7 +2317,7 @@ class TopDownCenteredInstanceMultiClassLightningModule(LightningModel):
             sync_dist=True,
         )
         self.log(
-            "val_confmap_loss",
+            "val/confmaps_loss",
             confmap_loss,
             on_step=False,
             on_epoch=True,
@@ -1885,10 +2325,80 @@ class TopDownCenteredInstanceMultiClassLightningModule(LightningModel):
             sync_dist=True,
         )
         self.log(
-            "val_classvector_loss",
+            "val/classvector_loss",
             classvector_loss,
             on_step=False,
             on_epoch=True,
             logger=True,
             sync_dist=True,
         )
+        # Compute classification accuracy
+        with torch.no_grad():
+            pred_classes = torch.argmax(classvector, dim=1)
+            gt_classes = torch.argmax(y_classvector, dim=1)
+            class_accuracy = (pred_classes == gt_classes).float().mean()
+        self.log(
+            "val/class_accuracy",
+            class_accuracy,
+            on_step=False,
+            on_epoch=True,
+            logger=True,
+            sync_dist=True,
+        )
+        # Collect predictions for epoch-end evaluation if enabled
+        if self._collect_val_predictions:
+            # SAVE bbox BEFORE inference (it modifies in-place!)
+            bbox_prep_saved = batch["instance_bbox"].clone()
+            with torch.no_grad():
+                inference_output = self.instance_peaks_inf_layer(batch)
+            batch_size = len(batch["frame_idx"])
+            for i in range(batch_size):
+                eff = batch["eff_scale"][i].cpu().numpy()
+                # Predictions from inference (crop-relative, original scale)
+                pred_peaks_crop = (
+                    inference_output["pred_instance_peaks"][i].cpu().numpy()
+                )
+                pred_scores = inference_output["pred_peak_values"][i].cpu().numpy()
+                # Compute bbox offset in original space from SAVED prep bbox
+                # bbox has shape (n_samples=1, 4, 2) where 4 corners
+                bbox_prep = bbox_prep_saved[i].squeeze(0).cpu().numpy()  # (4, 2)
+                bbox_top_left_orig = (
+                    bbox_prep[0] / eff
+                )  # Top-left corner in original space
+                # Full image coordinates (original space)
+                pred_peaks_full = pred_peaks_crop + bbox_top_left_orig
+                # GT transform: crop-relative preprocessed -> full image original
+                gt_crop_prep = (
+                    batch["instance"][i].squeeze(0).cpu().numpy()
+                )  # (n_nodes, 2)
+                gt_crop_orig = gt_crop_prep / eff
+                gt_full_orig = gt_crop_orig + bbox_top_left_orig
+                self.val_predictions.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "pred_peaks": pred_peaks_full.reshape(
+                            1, -1, 2
+                        ),  # (1, n_nodes, 2)
+                        "pred_scores": pred_scores.reshape(1, -1),  # (1, n_nodes)
+                    }
+                )
+                self.val_ground_truth.append(
+                    {
+                        "video_idx": batch["video_idx"][i].item(),
+                        "frame_idx": batch["frame_idx"][i].item(),
+                        "gt_instances": gt_full_orig.reshape(
+                            1, -1, 2
+                        ),  # (1, n_nodes, 2)
+                        "num_instances": 1,
+                    }
+                )

sleap-nn 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl

sleap-nn 0.1.0a1py3-none-any.whl → 0.1.0a3py3-none-any.whl