PyPI - sleap-nn - Versions diffs - 0.0.5__py3-none-any.whl → 0.1.0a0__py3-none-any.whl - Mend

sleap-nn 0.0.5py3-none-any.whl → 0.1.0a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

sleap_nn/__init__.py +6 -1
sleap_nn/cli.py +142 -3
sleap_nn/config/data_config.py +44 -7
sleap_nn/config/get_config.py +22 -20
sleap_nn/config/trainer_config.py +12 -0
sleap_nn/data/augmentation.py +54 -2
sleap_nn/data/custom_datasets.py +22 -22
sleap_nn/data/instance_cropping.py +70 -5
sleap_nn/data/normalization.py +45 -2
sleap_nn/data/providers.py +26 -0
sleap_nn/evaluation.py +99 -23
sleap_nn/inference/__init__.py +6 -0
sleap_nn/inference/peak_finding.py +10 -2
sleap_nn/inference/predictors.py +115 -20
sleap_nn/inference/provenance.py +292 -0
sleap_nn/inference/topdown.py +55 -47
sleap_nn/predict.py +187 -10
sleap_nn/system_info.py +443 -0
sleap_nn/tracking/tracker.py +8 -1
sleap_nn/train.py +64 -40
sleap_nn/training/callbacks.py +317 -5
sleap_nn/training/lightning_modules.py +325 -180
sleap_nn/training/model_trainer.py +308 -22
sleap_nn/training/utils.py +367 -2
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/METADATA +22 -32
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/RECORD +30 -28
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/WHEEL +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/top_level.txt +0 -0

sleap_nn/training/model_trainer.py CHANGED Viewed

@@ -20,7 +20,11 @@ from itertools import cycle, count
 from omegaconf import DictConfig, OmegaConf
 from lightning.pytorch.loggers import WandbLogger
 from sleap_nn.data.utils import check_cache_memory
-from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
+from lightning.pytorch.callbacks import (
+    ModelCheckpoint,
+    EarlyStopping,
+    LearningRateMonitor,
+)
 from lightning.pytorch.profilers import (
     SimpleProfiler,
     AdvancedProfiler,
@@ -28,7 +32,11 @@ from lightning.pytorch.profilers import (
     PassThroughProfiler,
 )
 from sleap_io.io.skeleton import SkeletonYAMLEncoder
-from sleap_nn.data.instance_cropping import find_instance_crop_size
+from sleap_nn.data.instance_cropping import (
+    find_instance_crop_size,
+    find_max_instance_bbox_size,
+    compute_augmentation_padding,
+)
 from sleap_nn.data.providers import get_max_height_width
 from sleap_nn.data.custom_datasets import (
     get_train_val_dataloaders,
@@ -49,7 +57,10 @@ from sleap_nn.training.callbacks import (
     TrainingControllerZMQ,
     MatplotlibSaver,
     WandBPredImageLogger,
+    WandBVizCallback,
+    WandBVizCallbackWithPAFs,
     CSVLoggerCallback,
+    SleapProgressBar,
 )
 from sleap_nn import RANK
 from sleap_nn.legacy_models import get_keras_first_layer_channels
@@ -207,6 +218,52 @@ class ModelTrainer:
                 trainer_devices = 1
         return trainer_devices
+    def _count_labeled_frames(
+        self, labels_list: List[sio.Labels], user_only: bool = True
+    ) -> int:
+        """Count labeled frames, optionally filtering to user-labeled only.
+        Args:
+            labels_list: List of Labels objects to count frames from.
+            user_only: If True, count only frames with user instances.
+        Returns:
+            Total count of labeled frames.
+        """
+        total = 0
+        for label in labels_list:
+            if user_only:
+                total += sum(1 for lf in label if lf.has_user_instances)
+            else:
+                total += len(label)
+        return total
+    def _filter_to_user_labeled(self, labels: sio.Labels) -> sio.Labels:
+        """Filter a Labels object to only include user-labeled frames.
+        Args:
+            labels: Labels object to filter.
+        Returns:
+            New Labels object containing only frames with user instances.
+        """
+        # Filter labeled frames to only those with user instances
+        user_lfs = [lf for lf in labels if lf.has_user_instances]
+        # Set instances to user instances only
+        for lf in user_lfs:
+            lf.instances = lf.user_instances
+        # Create new Labels with filtered frames
+        return sio.Labels(
+            labeled_frames=user_lfs,
+            videos=labels.videos,
+            skeletons=labels.skeletons,
+            tracks=labels.tracks,
+            suggestions=labels.suggestions,
+            provenance=labels.provenance,
+        )
     def _setup_train_val_labels(
         self,
         labels: Optional[List[sio.Labels]] = None,
@@ -218,21 +275,35 @@ class ModelTrainer:
         total_val_lfs = 0
         self.skeletons = labels[0].skeletons
+        # Check if we should count only user-labeled frames
+        user_instances_only = OmegaConf.select(
+            self.config, "data_config.user_instances_only", default=True
+        )
         # check if all `.slp` file shave same skeleton structure (if multiple slp file paths are provided)
         skeleton = self.skeletons[0]
         for index, train_label in enumerate(labels):
             skel_temp = train_label.skeletons[0]
             skeletons_equal = skeleton.matches(skel_temp)
-            if skeletons_equal:
-                total_train_lfs += len(train_label)
-            else:
+            if not skeletons_equal:
                 message = f"The skeletons in the training labels: {index + 1} do not match the skeleton in the first training label file."
                 logger.error(message)
                 raise ValueError(message)
-        if val_labels is None or not len(val_labels):
+        # Check for same-data mode (train = val, for intentional overfitting)
+        use_same = OmegaConf.select(
+            self.config, "data_config.use_same_data_for_val", default=False
+        )
+        if use_same:
+            # Same mode: use identical data for train and val (for overfitting)
+            logger.info("Using same data for train and val (overfit mode)")
+            self.train_labels = labels
+            self.val_labels = labels
+            total_train_lfs = self._count_labeled_frames(labels, user_instances_only)
+            total_val_lfs = total_train_lfs
+        elif val_labels is None or not len(val_labels):
             # if val labels are not provided, split from train
-            total_train_lfs = 0
             val_fraction = OmegaConf.select(
                 self.config, "data_config.validation_fraction", default=0.1
             )
@@ -250,13 +321,14 @@ class ModelTrainer:
                 )
                 self.train_labels.append(train_split)
                 self.val_labels.append(val_split)
+                # make_training_splits returns only user-labeled frames
                 total_train_lfs += len(train_split)
                 total_val_lfs += len(val_split)
         else:
             self.train_labels = labels
             self.val_labels = val_labels
-            for val_l in self.val_labels:
-                total_val_lfs += len(val_l)
+            total_train_lfs = self._count_labeled_frames(labels, user_instances_only)
+            total_val_lfs = self._count_labeled_frames(val_labels, user_instances_only)
         logger.info(f"# Train Labeled frames: {total_train_lfs}")
         logger.info(f"# Val Labeled frames: {total_val_lfs}")
@@ -291,13 +363,70 @@ class ModelTrainer:
             ):
                 # compute crop size if not provided in config
                 if crop_size is None:
+                    # Get padding from config or auto-compute from augmentation settings
+                    padding = self.config.data_config.preprocessing.crop_padding
+                    if padding is None:
+                        # Auto-compute padding based on augmentation settings
+                        aug_config = self.config.data_config.augmentation_config
+                        if (
+                            self.config.data_config.use_augmentations_train
+                            and aug_config is not None
+                            and aug_config.geometric is not None
+                        ):
+                            geo = aug_config.geometric
+                            # Check if rotation is enabled (via rotation_p or affine_p)
+                            rotation_enabled = (
+                                geo.rotation_p is not None and geo.rotation_p > 0
+                            ) or (
+                                geo.rotation_p is None
+                                and geo.scale_p is None
+                                and geo.translate_p is None
+                                and geo.affine_p > 0
+                            )
+                            # Check if scale is enabled (via scale_p or affine_p)
+                            scale_enabled = (
+                                geo.scale_p is not None and geo.scale_p > 0
+                            ) or (
+                                geo.rotation_p is None
+                                and geo.scale_p is None
+                                and geo.translate_p is None
+                                and geo.affine_p > 0
+                            )
+                            if rotation_enabled or scale_enabled:
+                                # First find the actual max bbox size from labels
+                                bbox_size = find_max_instance_bbox_size(train_label)
+                                bbox_size = max(
+                                    bbox_size,
+                                    self.config.data_config.preprocessing.min_crop_size
+                                    or 100,
+                                )
+                                rotation_max = (
+                                    max(
+                                        abs(geo.rotation_min),
+                                        abs(geo.rotation_max),
+                                    )
+                                    if rotation_enabled
+                                    else 0.0
+                                )
+                                scale_max = geo.scale_max if scale_enabled else 1.0
+                                padding = compute_augmentation_padding(
+                                    bbox_size=bbox_size,
+                                    rotation_max=rotation_max,
+                                    scale_max=scale_max,
+                                )
+                            else:
+                                padding = 0
+                        else:
+                            padding = 0
                     crop_sz = find_instance_crop_size(
                         labels=train_label,
+                        padding=padding,
                         maximum_stride=self.config.model_config.backbone_config[
                             f"{self.backbone_type}"
                         ]["max_stride"],
                         min_crop_size=self.config.data_config.preprocessing.min_crop_size,
-                        input_scaling=self.config.data_config.preprocessing.scale,
                     )
                     if crop_sz > max_crop_size:
@@ -509,6 +638,14 @@ class ModelTrainer:
         """Compute config parameters."""
         logger.info("Setting up config...")
+        # Normalize empty strings to None for optional wandb fields
+        if self.config.trainer_config.wandb.prv_runid == "":
+            self.config.trainer_config.wandb.prv_runid = None
+        # Default wandb run name to trainer run_name if not specified
+        if self.config.trainer_config.wandb.name is None:
+            self.config.trainer_config.wandb.name = self.config.trainer_config.run_name
         # compute preprocessing parameters from the labels objects and fill in the config
         self._setup_preprocessing_config()
@@ -565,7 +702,7 @@ class ModelTrainer:
         self._verify_model_input_channels()
     def _setup_model_ckpt_dir(self):
-        """Create the model ckpt folder."""
+        """Create the model ckpt folder and save ground truth labels."""
         ckpt_path = (
             Path(self.config.trainer_config.ckpt_dir)
             / self.config.trainer_config.run_name
@@ -581,16 +718,61 @@ class ModelTrainer:
                 raise OSError(message)
         if RANK in [0, -1]:
+            # Check if we should filter to user-labeled frames only
+            user_instances_only = OmegaConf.select(
+                self.config, "data_config.user_instances_only", default=True
+            )
+            # Save train and val ground truth labels
             for idx, (train, val) in enumerate(zip(self.train_labels, self.val_labels)):
-                train.save(
-                    Path(ckpt_path) / f"labels_train_gt_{idx}.slp",
+                # Filter to user-labeled frames if needed (for evaluation)
+                if user_instances_only:
+                    train_filtered = self._filter_to_user_labeled(train)
+                    val_filtered = self._filter_to_user_labeled(val)
+                else:
+                    train_filtered = train
+                    val_filtered = val
+                train_filtered.save(
+                    Path(ckpt_path) / f"labels_gt.train.{idx}.slp",
                     restore_original_videos=False,
                 )
-                val.save(
-                    Path(ckpt_path) / f"labels_val_gt_{idx}.slp",
+                val_filtered.save(
+                    Path(ckpt_path) / f"labels_gt.val.{idx}.slp",
                     restore_original_videos=False,
                 )
+            # Save test ground truth labels if test paths are provided
+            test_file_path = OmegaConf.select(
+                self.config, "data_config.test_file_path", default=None
+            )
+            if test_file_path is not None:
+                # Normalize to list of strings
+                if isinstance(test_file_path, str):
+                    test_paths = [test_file_path]
+                else:
+                    test_paths = list(test_file_path)
+                for idx, test_path in enumerate(test_paths):
+                    # Only save if it's a .slp file (not a video file)
+                    if test_path.endswith(".slp") or test_path.endswith(".pkg.slp"):
+                        try:
+                            test_labels = sio.load_slp(test_path)
+                            if user_instances_only:
+                                test_filtered = self._filter_to_user_labeled(
+                                    test_labels
+                                )
+                            else:
+                                test_filtered = test_labels
+                            test_filtered.save(
+                                Path(ckpt_path) / f"labels_gt.test.{idx}.slp",
+                                restore_original_videos=False,
+                            )
+                        except Exception as e:
+                            logger.warning(
+                                f"Could not save test ground truth for {test_path}: {e}"
+                            )
     def _setup_viz_datasets(self):
         """Setup dataloaders."""
         data_viz_config = self.config.copy()
@@ -716,6 +898,10 @@ class ModelTrainer:
             )
             loggers.append(wandb_logger)
+            # Learning rate monitor callback - logs LR at each step for dynamic schedulers
+            # Only added when wandb is enabled since it requires a logger
+            callbacks.append(LearningRateMonitor(logging_interval="step"))
             # save the configs as yaml in the checkpoint dir
             # Mask API key in both configs to prevent saving to disk
             self.config.trainer_config.wandb.api_key = ""
@@ -814,13 +1000,80 @@ class ModelTrainer:
             if self.config.trainer_config.use_wandb and OmegaConf.select(
                 self.config, "trainer_config.wandb.save_viz_imgs_wandb", default=False
             ):
-                callbacks.append(
-                    WandBPredImageLogger(
-                        viz_folder=viz_dir,
-                        wandb_run_name=self.config.trainer_config.wandb.name,
-                        is_bottomup=(self.model_type == "bottomup"),
-                    )
+                # Get wandb viz config options
+                viz_enabled = OmegaConf.select(
+                    self.config, "trainer_config.wandb.viz_enabled", default=True
                 )
+                viz_boxes = OmegaConf.select(
+                    self.config, "trainer_config.wandb.viz_boxes", default=False
+                )
+                viz_masks = OmegaConf.select(
+                    self.config, "trainer_config.wandb.viz_masks", default=False
+                )
+                viz_box_size = OmegaConf.select(
+                    self.config, "trainer_config.wandb.viz_box_size", default=5.0
+                )
+                viz_confmap_threshold = OmegaConf.select(
+                    self.config,
+                    "trainer_config.wandb.viz_confmap_threshold",
+                    default=0.1,
+                )
+                log_viz_table = OmegaConf.select(
+                    self.config, "trainer_config.wandb.log_viz_table", default=False
+                )
+                # Create viz data pipelines for wandb callback
+                wandb_train_viz_pipeline = cycle(copy.deepcopy(viz_train_dataset))
+                wandb_val_viz_pipeline = cycle(copy.deepcopy(viz_val_dataset))
+                if self.model_type == "bottomup":
+                    # Bottom-up model needs PAF visualizations
+                    wandb_train_pafs_pipeline = cycle(copy.deepcopy(viz_train_dataset))
+                    wandb_val_pafs_pipeline = cycle(copy.deepcopy(viz_val_dataset))
+                    callbacks.append(
+                        WandBVizCallbackWithPAFs(
+                            train_viz_fn=lambda: self.lightning_model.get_visualization_data(
+                                next(wandb_train_viz_pipeline)
+                            ),
+                            val_viz_fn=lambda: self.lightning_model.get_visualization_data(
+                                next(wandb_val_viz_pipeline)
+                            ),
+                            train_pafs_viz_fn=lambda: self.lightning_model.get_visualization_data(
+                                next(wandb_train_pafs_pipeline), include_pafs=True
+                            ),
+                            val_pafs_viz_fn=lambda: self.lightning_model.get_visualization_data(
+                                next(wandb_val_pafs_pipeline), include_pafs=True
+                            ),
+                            viz_enabled=viz_enabled,
+                            viz_boxes=viz_boxes,
+                            viz_masks=viz_masks,
+                            box_size=viz_box_size,
+                            confmap_threshold=viz_confmap_threshold,
+                            log_table=log_viz_table,
+                        )
+                    )
+                else:
+                    # Standard models
+                    callbacks.append(
+                        WandBVizCallback(
+                            train_viz_fn=lambda: self.lightning_model.get_visualization_data(
+                                next(wandb_train_viz_pipeline)
+                            ),
+                            val_viz_fn=lambda: self.lightning_model.get_visualization_data(
+                                next(wandb_val_viz_pipeline)
+                            ),
+                            viz_enabled=viz_enabled,
+                            viz_boxes=viz_boxes,
+                            viz_masks=viz_masks,
+                            box_size=viz_box_size,
+                            confmap_threshold=viz_confmap_threshold,
+                            log_table=log_viz_table,
+                        )
+                    )
+        # Add custom progress bar with better metric formatting
+        if self.config.trainer_config.enable_progress_bar:
+            callbacks.append(SleapProgressBar())
         return loggers, callbacks
@@ -959,7 +1212,7 @@ class ModelTrainer:
         logger.info(f"Backbone model: {self.lightning_model.model.backbone}")
         logger.info(f"Head model: {self.lightning_model.model.head_layers}")
         total_params = sum(p.numel() for p in self.lightning_model.parameters())
-        logger.info(f"Total model parameters: {total_params}")
+        logger.info(f"Total model parameters: {total_params:,}")
         self.config.model_config.total_params = total_params
         # setup dataloaders
@@ -1000,6 +1253,23 @@ class ModelTrainer:
                         id=self.config.trainer_config.wandb.prv_runid,
                         group=self.config.trainer_config.wandb.group,
                     )
+                # Define custom x-axes for wandb metrics
+                # Epoch-level metrics use epoch as x-axis, step-level use default global_step
+                wandb.define_metric("epoch")
+                wandb.define_metric("val_loss", step_metric="epoch")
+                wandb.define_metric("val_time", step_metric="epoch")
+                wandb.define_metric("train_time", step_metric="epoch")
+                # Per-node losses use epoch as x-axis
+                for node_name in self.skeletons[0].node_names:
+                    wandb.define_metric(node_name, step_metric="epoch")
+                # Visualization images use epoch as x-axis
+                wandb.define_metric("train_predictions*", step_metric="epoch")
+                wandb.define_metric("val_predictions*", step_metric="epoch")
+                wandb.define_metric("train_pafs*", step_metric="epoch")
+                wandb.define_metric("val_pafs*", step_metric="epoch")
                 self.config.trainer_config.wandb.current_run_id = wandb.run.id
                 wandb.config["run_name"] = self.config.trainer_config.wandb.name
                 wandb.config["run_config"] = OmegaConf.to_container(
@@ -1017,6 +1287,9 @@ class ModelTrainer:
         self.trainer.strategy.barrier()
+        # Flag to track if training was interrupted (not completed normally)
+        training_interrupted = False
         try:
             logger.info(
                 f"Finished trainer set up. [{time.time() - start_setup_time:.1f}s]"
@@ -1032,6 +1305,7 @@ class ModelTrainer:
         except KeyboardInterrupt:
             logger.info("Stopping training...")
+            training_interrupted = True
         finally:
             logger.info(
@@ -1063,3 +1337,15 @@ class ModelTrainer:
                     if viz_dir.exists():
                         logger.info(f"Deleting viz folder at {viz_dir}...")
                         shutil.rmtree(viz_dir, ignore_errors=True)
+            # Clean up entire run folder if training was interrupted (KeyboardInterrupt)
+            if training_interrupted and self.trainer.global_rank == 0:
+                run_dir = (
+                    Path(self.config.trainer_config.ckpt_dir)
+                    / self.config.trainer_config.run_name
+                )
+                if run_dir.exists():
+                    logger.info(
+                        f"Training canceled - cleaning up run folder at {run_dir}..."
+                    )
+                    shutil.rmtree(run_dir, ignore_errors=True)

sleap-nn 0.0.5__py3-none-any.whl → 0.1.0a0__py3-none-any.whl

sleap-nn 0.0.5py3-none-any.whl → 0.1.0a0py3-none-any.whl