PyPI - sleap-nn - Versions diffs - 0.0.5__py3-none-any.whl → 0.1.0a0__py3-none-any.whl - Mend

sleap-nn 0.0.5py3-none-any.whl → 0.1.0a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

sleap_nn/__init__.py +6 -1
sleap_nn/cli.py +142 -3
sleap_nn/config/data_config.py +44 -7
sleap_nn/config/get_config.py +22 -20
sleap_nn/config/trainer_config.py +12 -0
sleap_nn/data/augmentation.py +54 -2
sleap_nn/data/custom_datasets.py +22 -22
sleap_nn/data/instance_cropping.py +70 -5
sleap_nn/data/normalization.py +45 -2
sleap_nn/data/providers.py +26 -0
sleap_nn/evaluation.py +99 -23
sleap_nn/inference/__init__.py +6 -0
sleap_nn/inference/peak_finding.py +10 -2
sleap_nn/inference/predictors.py +115 -20
sleap_nn/inference/provenance.py +292 -0
sleap_nn/inference/topdown.py +55 -47
sleap_nn/predict.py +187 -10
sleap_nn/system_info.py +443 -0
sleap_nn/tracking/tracker.py +8 -1
sleap_nn/train.py +64 -40
sleap_nn/training/callbacks.py +317 -5
sleap_nn/training/lightning_modules.py +325 -180
sleap_nn/training/model_trainer.py +308 -22
sleap_nn/training/utils.py +367 -2
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/METADATA +22 -32
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/RECORD +30 -28
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/WHEEL +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/top_level.txt +0 -0

sleap_nn/__init__.py CHANGED Viewed

@@ -48,4 +48,9 @@ logger.add(
     format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} | {message}",
 )
-__version__ = "0.0.5"
+__version__ = "0.1.0a0"
+# Public API
+from sleap_nn.evaluation import load_metrics
+__all__ = ["load_metrics", "__version__"]

sleap_nn/cli.py CHANGED Viewed

@@ -4,14 +4,24 @@ import click
 from loguru import logger
 from pathlib import Path
 from omegaconf import OmegaConf, DictConfig
+import sleap_io as sio
 from sleap_nn.predict import run_inference, frame_list
 from sleap_nn.evaluation import run_evaluation
 from sleap_nn.train import run_training
+from sleap_nn import __version__
 import hydra
 import sys
 from click import Command
+def print_version(ctx, param, value):
+    """Print version and exit."""
+    if not value or ctx.resilient_parsing:
+        return
+    click.echo(f"sleap-nn {__version__}")
+    ctx.exit()
 class TrainCommand(Command):
     """Custom command class that overrides help behavior for train command."""
@@ -20,7 +30,26 @@ class TrainCommand(Command):
         show_training_help()
+def parse_path_map(ctx, param, value):
+    """Parse (old, new) path pairs into a dictionary for path mapping options."""
+    if not value:
+        return None
+    result = {}
+    for old_path, new_path in value:
+        result[old_path] = Path(new_path).as_posix()
+    return result
 @click.group()
+@click.option(
+    "--version",
+    "-v",
+    is_flag=True,
+    callback=print_version,
+    expose_value=False,
+    is_eager=True,
+    help="Show version and exit.",
+)
 def cli():
     """SLEAP-NN: Neural network backend for training and inference for animal pose estimation.
@@ -29,6 +58,7 @@ def cli():
     train    - Run training workflow
     track    - Run inference/ tracking workflow
     eval     - Run evaluation workflow
+    system   - Display system information and GPU status
     """
     pass
@@ -67,8 +97,38 @@ For a detailed list of all available config options, please refer to https://nn.
 @click.option(
     "--config-dir", "-d", type=str, default=".", help="Configuration directory path"
 )
+@click.option(
+    "--video-paths",
+    "-v",
+    multiple=True,
+    help="Video paths to replace existing paths in the labels file. "
+    "Order must match the order of videos in the labels file. "
+    "Can be specified multiple times. "
+    "Example: --video-paths /path/to/vid1.mp4 --video-paths /path/to/vid2.mp4",
+)
+@click.option(
+    "--video-path-map",
+    nargs=2,
+    multiple=True,
+    callback=parse_path_map,
+    metavar="OLD NEW",
+    help="Map old video path to new path. Takes two arguments: old path and new path. "
+    "Can be specified multiple times. "
+    'Example: --video-path-map "/old/vid.mp`4" "/new/vid.mp4"',
+)
+@click.option(
+    "--prefix-map",
+    nargs=2,
+    multiple=True,
+    callback=parse_path_map,
+    metavar="OLD NEW",
+    help="Map old path prefix to new prefix. Takes two arguments: old prefix and new prefix. "
+    "Updates ALL videos that share the same prefix. Useful when moving data between machines. "
+    "Can be specified multiple times. "
+    'Example: --prefix-map "/old/server/path" "/new/local/path"',
+)
 @click.argument("overrides", nargs=-1, type=click.UNPROCESSED)
-def train(config_name, config_dir, overrides):
+def train(config_name, config_dir, video_paths, video_path_map, prefix_map, overrides):
     """Run training workflow with Hydra config overrides.
     Examples:
@@ -97,7 +157,62 @@ def train(config_name, config_dir, overrides):
         logger.info("Input config:")
         logger.info("\n" + OmegaConf.to_yaml(cfg))
-        run_training(cfg)
+        # Handle video path replacement options
+        train_labels = None
+        val_labels = None
+        # Check that only one replacement option is used
+        # video_paths is a tuple (empty if not used), others are None or dict
+        has_video_paths = len(video_paths) > 0
+        has_video_path_map = video_path_map is not None
+        has_prefix_map = prefix_map is not None
+        options_used = sum([has_video_paths, has_video_path_map, has_prefix_map])
+        if options_used > 1:
+            raise click.UsageError(
+                "Cannot use multiple path replacement options. "
+                "Choose one of: --video-paths, --video-path-map, or --prefix-map."
+            )
+        if options_used == 1:
+            # Load train labels
+            train_labels = [
+                sio.load_slp(path) for path in cfg.data_config.train_labels_path
+            ]
+            # Load val labels if they exist
+            if (
+                cfg.data_config.val_labels_path is not None
+                and len(cfg.data_config.val_labels_path) > 0
+            ):
+                val_labels = [
+                    sio.load_slp(path) for path in cfg.data_config.val_labels_path
+                ]
+            # Build replacement arguments based on option used
+            if has_video_paths:
+                # List of paths (order must match videos in labels file)
+                replace_kwargs = {
+                    "new_filenames": [Path(p).as_posix() for p in video_paths]
+                }
+            elif has_video_path_map:
+                # Dictionary mapping old filenames to new filenames
+                replace_kwargs = {"filename_map": video_path_map}
+            else:  # has_prefix_map
+                # Dictionary mapping old prefixes to new prefixes
+                replace_kwargs = {"prefix_map": prefix_map}
+            # Apply replacement to train labels
+            for labels in train_labels:
+                labels.replace_filenames(**replace_kwargs)
+            # Apply replacement to val labels if they exist
+            if val_labels:
+                for labels in val_labels:
+                    labels.replace_filenames(**replace_kwargs)
+        run_training(config=cfg, train_labels=train_labels, val_labels=val_labels)
 @cli.command()
@@ -209,6 +324,18 @@ def train(config_name, config_dir, overrides):
     default=False,
     help="Only run inference on unlabeled suggested frames when running on labels dataset. This is useful for generating predictions for initialization during labeling.",
 )
+@click.option(
+    "--exclude_user_labeled",
+    is_flag=True,
+    default=False,
+    help="Skip frames that have user-labeled instances. Useful when predicting on entire video but skipping already-labeled frames.",
+)
+@click.option(
+    "--only_predicted_frames",
+    is_flag=True,
+    default=False,
+    help="Only run inference on frames that already have predictions. Requires .slp input file. Useful for re-predicting with a different model.",
+)
 @click.option(
     "--no_empty_frames",
     is_flag=True,
@@ -282,7 +409,7 @@ def train(config_name, config_dir, overrides):
     "--crop_size",
     type=int,
     default=None,
-    help="Crop size. If not provided, the crop size from training_config.yaml is used.",
+    help="Crop size. If not provided, the crop size from training_config.yaml is used. If `input_scale` is provided, then the cropped image will be resized according to `input_scale`.",
 )
 @click.option(
     "--peak_threshold",
@@ -474,5 +601,17 @@ def eval(**kwargs):
     run_evaluation(**kwargs)
+@cli.command()
+def system():
+    """Display system information and GPU status.
+    Shows Python version, platform, PyTorch version, CUDA availability,
+    driver version with compatibility check, GPU details, and package versions.
+    """
+    from sleap_nn.system_info import print_system_info
+    print_system_info()
 if __name__ == "__main__":
     cli()

sleap_nn/config/data_config.py CHANGED Viewed

@@ -6,7 +6,7 @@ the parameters required to initialize the data config.
 from attrs import define, field, validators
 from omegaconf import MISSING
-from typing import Optional, Tuple, Any, List
+from typing import Optional, Tuple, Any, List, Union
 from loguru import logger
 import sleap_io as sio
 import yaml
@@ -20,11 +20,15 @@ class PreprocessingConfig:
     Attributes:
         ensure_rgb: (bool) True if the input image should have 3 channels (RGB image). If input has only one channel when this is set to `True`, then the images from single-channel is replicated along the channel axis. If the image has three channels and this is set to False, then we retain the three channels. *Default*: `False`.
         ensure_grayscale: (bool) True if the input image should only have a single channel. If input has three channels (RGB) and this is set to True, then we convert the image to grayscale (single-channel) image. If the source image has only one channel and this is set to False, then we retain the single channel input. *Default*: `False`.
-        max_height: (int) Maximum height the image should be padded to. If not provided, the original image size will be retained. *Default*: `None`.
-        max_width: (int) Maximum width the image should be padded to. If not provided, the original image size will be retained. *Default*: `None`.
+        max_height: (int) Maximum height the original image should be resized and padded to. If not provided, the original image size will be retained. *Default*: `None`.
+        max_width: (int) Maximum width the original image should be resized and padded to. If not provided, the original image size will be retained. *Default*: `None`.
         scale: (float) Factor to resize the image dimensions by, specified as a float. *Default*: `1.0`.
-        crop_size: (int) Crop size of each instance for centered-instance model. If `None`, this would be automatically computed based on the largest instance in the `sio.Labels` file. *Default*: `None`.
+        crop_size: (int) Crop size of each instance for centered-instance model. If `None`, this would be automatically computed based on the largest instance in the `sio.Labels` file.
+            If `scale` is provided, then the cropped image will be resized according to `scale`.*Default*: `None`.
         min_crop_size: (int) Minimum crop size to be used if `crop_size` is `None`. *Default*: `100`.
+        crop_padding: (int) Padding in pixels to add around the instance bounding box when computing crop size.
+            If `None`, padding is auto-computed based on augmentation settings (rotation/scale).
+            Only used when `crop_size` is `None`. *Default*: `None`.
     """
     ensure_rgb: bool = False
@@ -36,6 +40,7 @@ class PreprocessingConfig:
     )
     crop_size: Optional[int] = None
     min_crop_size: Optional[int] = 100  # to help app work in case of error
+    crop_padding: Optional[int] = None
     def validate_scale(self):
         """Scale Validation.
@@ -104,11 +109,14 @@ class GeometricConfig:
     Attributes:
         rotation_min: (float) Minimum rotation angle in degrees. A random angle in (rotation_min, rotation_max) will be sampled and applied to both images and keypoints. Set to 0 to disable rotation augmentation. *Default*: `-15.0`.
         rotation_max: (float) Maximum rotation angle in degrees. A random angle in (rotation_min, rotation_max) will be sampled and applied to both images and keypoints. Set to 0 to disable rotation augmentation. *Default*: `15.0`.
+        rotation_p: (float, optional) Probability of applying random rotation independently. If set, rotation is applied separately from scale/translate. If `None`, falls back to `affine_p` for bundled behavior. *Default*: `None`.
         scale_min: (float) Minimum scaling factor. If scale_min and scale_max are provided, the scale is randomly sampled from the range scale_min <= scale <= scale_max for isotropic scaling. *Default*: `0.9`.
         scale_max: (float) Maximum scaling factor. If scale_min and scale_max are provided, the scale is randomly sampled from the range scale_min <= scale <= scale_max for isotropic scaling. *Default*: `1.1`.
+        scale_p: (float, optional) Probability of applying random scaling independently. If set, scaling is applied separately from rotation/translate. If `None`, falls back to `affine_p` for bundled behavior. *Default*: `None`.
         translate_width: (float) Maximum absolute fraction for horizontal translation. For example, if translate_width=a, then horizontal shift is randomly sampled in the range -img_width * a < dx < img_width * a. Will not translate by default. *Default*: `0.0`.
         translate_height: (float) Maximum absolute fraction for vertical translation. For example, if translate_height=a, then vertical shift is randomly sampled in the range -img_height * a < dy < img_height * a. Will not translate by default. *Default*: `0.0`.
-        affine_p: (float) Probability of applying random affine transformations. *Default*: `0.0`.
+        translate_p: (float, optional) Probability of applying random translation independently. If set, translation is applied separately from rotation/scale. If `None`, falls back to `affine_p` for bundled behavior. *Default*: `None`.
+        affine_p: (float) Probability of applying random affine transformations (rotation, scale, translate bundled together). Used for backwards compatibility when individual `*_p` params are not set. *Default*: `0.0`.
         erase_scale_min: (float) Minimum value of range of proportion of erased area against input image. *Default*: `0.0001`.
         erase_scale_max: (float) Maximum value of range of proportion of erased area against input image. *Default*: `0.01`.
         erase_ratio_min: (float) Minimum value of range of aspect ratio of erased area. *Default*: `1.0`.
@@ -121,10 +129,13 @@ class GeometricConfig:
     rotation_min: float = field(default=-15.0, validator=validators.ge(-180))
     rotation_max: float = field(default=15.0, validator=validators.le(180))
+    rotation_p: Optional[float] = field(default=None)
     scale_min: float = field(default=0.9, validator=validators.ge(0))
     scale_max: float = field(default=1.1, validator=validators.ge(0))
+    scale_p: Optional[float] = field(default=None)
     translate_width: float = 0.0
     translate_height: float = 0.0
+    translate_p: Optional[float] = field(default=None)
     affine_p: float = field(default=0.0, validator=validate_proportion)
     erase_scale_min: float = 0.0001
     erase_scale_max: float = 0.01
@@ -149,6 +160,28 @@ class AugmentationConfig:
     geometric: Optional[GeometricConfig] = None
+def validate_test_file_path(instance, attribute, value):
+    """Validate test_file_path to accept str or List[str].
+    Args:
+        instance: The instance being validated.
+        attribute: The attribute being validated.
+        value: The value to validate.
+    Raises:
+        ValueError: If value is not None, str, or list of strings.
+    """
+    if value is None:
+        return
+    if isinstance(value, str):
+        return
+    if isinstance(value, (list, tuple)) and all(isinstance(p, str) for p in value):
+        return
+    message = f"{attribute.name} must be a string or list of strings, got {type(value).__name__}"
+    logger.error(message)
+    raise ValueError(message)
 @define
 class DataConfig:
     """Data configuration.
@@ -157,7 +190,8 @@ class DataConfig:
         train_labels_path: (List[str]) List of paths to training data (`.slp` file(s)). *Default*: `None`.
         val_labels_path: (List[str]) List of paths to validation data (`.slp` file(s)). *Default*: `None`.
         validation_fraction: (float) Float between 0 and 1 specifying the fraction of the training set to sample for generating the validation set. The remaining labeled frames will be left in the training set. If the `validation_labels` are already specified, this has no effect. *Default*: `0.1`.
-        test_file_path: (str) Path to test dataset (`.slp` file or `.mp4` file). *Note*: This is used only with CLI to get evaluation on test set after training is completed. *Default*: `None`.
+        use_same_data_for_val: (bool) If `True`, use the same data for both training and validation (train = val). Useful for intentional overfitting on small datasets. When enabled, `val_labels_path` and `validation_fraction` are ignored. *Default*: `False`.
+        test_file_path: (str or List[str]) Path or list of paths to test dataset(s) (`.slp` file(s) or `.mp4` file(s)). *Note*: This is used only with CLI to get evaluation on test set after training is completed. *Default*: `None`.
         provider: (str) Provider class to read the input sleap files. Only "LabelsReader" is currently supported for the training pipeline. *Default*: `"LabelsReader"`.
         user_instances_only: (bool) `True` if only user labeled instances should be used for training. If `False`, both user labeled and predicted instances would be used. *Default*: `True`.
         data_pipeline_fw: (str) Framework to create the data loaders. One of [`torch_dataset`, `torch_dataset_cache_img_memory`, `torch_dataset_cache_img_disk`]. *Default*: `"torch_dataset"`. (Note: When using `torch_dataset`, `num_workers` in `trainer_config` should be set to 0 as multiprocessing doesn't work with pickling video backends.)
@@ -173,7 +207,10 @@ class DataConfig:
     train_labels_path: Optional[List[str]] = None
     val_labels_path: Optional[List[str]] = None  # TODO : revisit MISSING!
     validation_fraction: float = 0.1
-    test_file_path: Optional[str] = None
+    use_same_data_for_val: bool = False
+    test_file_path: Optional[Any] = field(
+        default=None, validator=validate_test_file_path
+    )
     provider: str = "LabelsReader"
     user_instances_only: bool = True
     data_pipeline_fw: str = "torch_dataset"

sleap_nn/config/get_config.py CHANGED Viewed

@@ -131,27 +131,18 @@ def get_aug_config(
         for g in geometric_aug:
             if g == "rotation":
-                aug_config.geometric.affine_p = 1.0
-                aug_config.geometric.scale_min = 1.0
-                aug_config.geometric.scale_max = 1.0
-                aug_config.geometric.translate_height = 0
-                aug_config.geometric.translate_width = 0
+                # Use new independent rotation probability
+                aug_config.geometric.rotation_p = 1.0
             elif g == "scale":
+                # Use new independent scale probability
                 aug_config.geometric.scale_min = 0.9
                 aug_config.geometric.scale_max = 1.1
-                aug_config.geometric.affine_p = 1.0
-                aug_config.geometric.rotation_min = 0
-                aug_config.geometric.rotation_max = 0
-                aug_config.geometric.translate_height = 0
-                aug_config.geometric.translate_width = 0
+                aug_config.geometric.scale_p = 1.0
             elif g == "translate":
+                # Use new independent translate probability
                 aug_config.geometric.translate_height = 0.2
                 aug_config.geometric.translate_width = 0.2
-                aug_config.geometric.affine_p = 1.0
-                aug_config.geometric.rotation_min = 0
-                aug_config.geometric.rotation_max = 0
-                aug_config.geometric.scale_min = 1.0
-                aug_config.geometric.scale_max = 1.0
+                aug_config.geometric.translate_p = 1.0
             elif g == "erase_scale":
                 aug_config.geometric.erase_p = 1.0
             elif g == "mixup":
@@ -456,7 +447,8 @@ def get_data_config(
     train_labels_path: Optional[List[str]] = None,
     val_labels_path: Optional[List[str]] = None,
     validation_fraction: float = 0.1,
-    test_file_path: Optional[str] = None,
+    use_same_data_for_val: bool = False,
+    test_file_path: Optional[Union[str, List[str]]] = None,
     provider: str = "LabelsReader",
     user_instances_only: bool = True,
     data_pipeline_fw: str = "torch_dataset",
@@ -470,6 +462,7 @@ def get_data_config(
     max_width: Optional[int] = None,
     crop_size: Optional[int] = None,
     min_crop_size: Optional[int] = 100,
+    crop_padding: Optional[int] = None,
     use_augmentations_train: bool = False,
     intensity_aug: Optional[Union[str, List[str], Dict[str, Any]]] = None,
     geometry_aug: Optional[Union[str, List[str], Dict[str, Any]]] = None,
@@ -486,7 +479,11 @@ def get_data_config(
             training set to sample for generating the validation set. The remaining
             labeled frames will be left in the training set. If the `validation_labels`
             are already specified, this has no effect. Default: 0.1.
-        test_file_path: Path to test dataset (`.slp` file or `.mp4` file).
+        use_same_data_for_val: If `True`, use the same data for both training and
+            validation (train = val). Useful for intentional overfitting on small
+            datasets. When enabled, `val_labels_path` and `validation_fraction` are
+            ignored. Default: False.
+        test_file_path: Path or list of paths to test dataset(s) (`.slp` file(s) or `.mp4` file(s)).
             Note: This is used to get evaluation on test set after training is completed.
         provider: Provider class to read the input sleap files. Only "LabelsReader"
             supported for the training pipeline. Default: "LabelsReader".
@@ -508,14 +505,17 @@ def get_data_config(
         is set to True, then we convert the image to grayscale (single-channel)
         image. If the source image has only one channel and this is set to False, then we retain the single channel input. Default: `False`.
         scale: Factor to resize the image dimensions by, specified as a float. Default: 1.0.
-        max_height: Maximum height the image should be padded to. If not provided, the
+        max_height: Maximum height the original image should be resized and padded to. If not provided, the
             original image size will be retained. Default: None.
-        max_width: Maximum width the image should be padded to. If not provided, the
+        max_width: Maximum width the original image should be resized and padded to. If not provided, the
             original image size will be retained. Default: None.
         crop_size: Crop size of each instance for centered-instance model.
             If `None`, this would be automatically computed based on the largest instance
-            in the `sio.Labels` file. Default: None.
+            in the `sio.Labels` file. If `scale` is provided, then the cropped image will be resized according to `scale`. Default: None.
         min_crop_size: Minimum crop size to be used if `crop_size` is `None`. Default: 100.
+        crop_padding: Padding in pixels to add around instance bounding box when computing
+            crop size. If `None`, padding is auto-computed based on augmentation settings.
+            Only used when `crop_size` is `None`. Default: None.
         use_augmentations_train: True if the data augmentation should be applied to the
             training data, else False. Default: False.
         intensity_aug: One of ["uniform_noise", "gaussian_noise", "contrast", "brightness"]
@@ -541,6 +541,7 @@ def get_data_config(
         scale=scale,
         crop_size=crop_size,
         min_crop_size=min_crop_size,
+        crop_padding=crop_padding,
     )
     augmentation_config = None
     if use_augmentations_train:
@@ -553,6 +554,7 @@ def get_data_config(
         train_labels_path=train_labels_path,
         val_labels_path=val_labels_path,
         validation_fraction=validation_fraction,
+        use_same_data_for_val=use_same_data_for_val,
         test_file_path=test_file_path,
         provider=provider,
         user_instances_only=user_instances_only,

sleap_nn/config/trainer_config.py CHANGED Viewed

@@ -84,6 +84,12 @@ class WandBConfig:
         prv_runid: (str) Previous run ID if training should be resumed from a previous ckpt. *Default*: `None`.
         group: (str) Group for wandb logging. *Default*: `None`.
         current_run_id: (str) Run ID for the current model training. (stored once the training starts). *Default*: `None`.
+        viz_enabled: (bool) If True, log pre-rendered matplotlib images to wandb. *Default*: `True`.
+        viz_boxes: (bool) If True, log interactive keypoint boxes. *Default*: `False`.
+        viz_masks: (bool) If True, log confidence map overlay masks. *Default*: `False`.
+        viz_box_size: (float) Size of keypoint boxes in pixels (for viz_boxes). *Default*: `5.0`.
+        viz_confmap_threshold: (float) Threshold for confidence map masks (for viz_masks). *Default*: `0.1`.
+        log_viz_table: (bool) If True, also log images to a wandb.Table for backwards compatibility. *Default*: `False`.
     """
     entity: Optional[str] = None
@@ -95,6 +101,12 @@ class WandBConfig:
     prv_runid: Optional[str] = None
     group: Optional[str] = None
     current_run_id: Optional[str] = None
+    viz_enabled: bool = True
+    viz_boxes: bool = False
+    viz_masks: bool = False
+    viz_box_size: float = 5.0
+    viz_confmap_threshold: float = 0.1
+    log_viz_table: bool = False
 @define

sleap_nn/data/augmentation.py CHANGED Viewed

@@ -112,10 +112,13 @@ def apply_geometric_augmentation(
     instances: torch.Tensor,
     rotation_min: Optional[float] = -15.0,
     rotation_max: Optional[float] = 15.0,
+    rotation_p: Optional[float] = None,
     scale_min: Optional[float] = 0.9,
     scale_max: Optional[float] = 1.1,
+    scale_p: Optional[float] = None,
     translate_width: Optional[float] = 0.02,
     translate_height: Optional[float] = 0.02,
+    translate_p: Optional[float] = None,
     affine_p: float = 0.0,
     erase_scale_min: Optional[float] = 0.0001,
     erase_scale_max: Optional[float] = 0.01,
@@ -133,11 +136,18 @@ def apply_geometric_augmentation(
         instances: Input keypoints. (n_samples, n_instances, n_nodes, 2) or (n_samples, n_nodes, 2)
         rotation_min: Minimum rotation angle in degrees. Default: -15.0.
         rotation_max: Maximum rotation angle in degrees. Default: 15.0.
+        rotation_p: Probability of applying random rotation independently. If None,
+            falls back to affine_p for bundled behavior. Default: None.
         scale_min: Minimum scaling factor for isotropic scaling. Default: 0.9.
         scale_max: Maximum scaling factor for isotropic scaling. Default: 1.1.
+        scale_p: Probability of applying random scaling independently. If None,
+            falls back to affine_p for bundled behavior. Default: None.
         translate_width: Maximum absolute fraction for horizontal translation. Default: 0.02.
         translate_height: Maximum absolute fraction for vertical translation. Default: 0.02.
-        affine_p: Probability of applying random affine transformations. Default: 0.0.
+        translate_p: Probability of applying random translation independently. If None,
+            falls back to affine_p for bundled behavior. Default: None.
+        affine_p: Probability of applying random affine transformations (rotation, scale,
+            translate bundled). Used when individual *_p params are None. Default: 0.0.
         erase_scale_min: Minimum value of range of proportion of erased area against input image. Default: 0.0001.
         erase_scale_max: Maximum value of range of proportion of erased area against input image. Default: 0.01.
         erase_ratio_min: Minimum value of range of aspect ratio of erased area. Default: 1.
@@ -151,7 +161,49 @@ def apply_geometric_augmentation(
         Returns tuple: (image, instances) with augmentation applied.
     """
     aug_stack = []
-    if affine_p > 0:
+    # Check if any individual probability is set
+    use_independent = (
+        rotation_p is not None or scale_p is not None or translate_p is not None
+    )
+    if use_independent:
+        # New behavior: Apply augmentations independently with separate probabilities
+        if rotation_p is not None and rotation_p > 0:
+            aug_stack.append(
+                K.augmentation.RandomRotation(
+                    degrees=(rotation_min, rotation_max),
+                    p=rotation_p,
+                    keepdim=True,
+                    same_on_batch=True,
+                )
+            )
+        if scale_p is not None and scale_p > 0:
+            aug_stack.append(
+                K.augmentation.RandomAffine(
+                    degrees=0,  # No rotation
+                    translate=None,  # No translation
+                    scale=(scale_min, scale_max),
+                    p=scale_p,
+                    keepdim=True,
+                    same_on_batch=True,
+                )
+            )
+        if translate_p is not None and translate_p > 0:
+            aug_stack.append(
+                K.augmentation.RandomAffine(
+                    degrees=0,  # No rotation
+                    translate=(translate_width, translate_height),
+                    scale=None,  # No scaling
+                    p=translate_p,
+                    keepdim=True,
+                    same_on_batch=True,
+                )
+            )
+    elif affine_p > 0:
+        # Legacy behavior: Bundled affine transformation
         aug_stack.append(
             K.augmentation.RandomAffine(
                 degrees=(rotation_min, rotation_max),

sleap_nn/data/custom_datasets.py CHANGED Viewed

@@ -177,6 +177,9 @@ class BaseDataset(Dataset):
                 if self.user_instances_only:
                     if lf.user_instances is not None and len(lf.user_instances) > 0:
                         lf.instances = lf.user_instances
+                    else:
+                        # Skip frames without user instances
+                        continue
                 is_empty = True
                 for _, inst in enumerate(lf.instances):
                     if not inst.is_empty:  # filter all NaN instances.
@@ -684,15 +687,12 @@ class CenteredInstanceDataset(BaseDataset):
             the images aren't cached and loaded from the `.slp` file on each access.
         cache_img_path: Path to save the `.jpg` files. If `None`, current working dir is used.
         use_existing_imgs: Use existing imgs/ chunks in the `cache_img_path`.
-        crop_size: Crop size of each instance for centered-instance model.
+        crop_size: Crop size of each instance for centered-instance model. If `scale` is provided, then the cropped image will be resized according to `scale`.
         rank: Indicates the rank of the process. Used during distributed training to ensure that image storage to
             disk occurs only once across all workers.
         confmap_head_config: DictConfig object with all the keys in the `head_config` section.
             (required keys: `sigma`, `output_stride`, `part_names` and `anchor_part` depending on the model type ).
         labels_list: List of `sio.Labels` objects. Used to store the labels in the cache. (only used if `cache_img` is `None`)
-    Note: If scale is provided for centered-instance model, the images are cropped out
-    from the scaled image with the given crop size.
     """
     def __init__(
@@ -748,6 +748,9 @@ class CenteredInstanceDataset(BaseDataset):
                 if self.user_instances_only:
                     if lf.user_instances is not None and len(lf.user_instances) > 0:
                         lf.instances = lf.user_instances
+                    else:
+                        # Skip frames without user instances
+                        continue
                 for inst_idx, inst in enumerate(lf.instances):
                     if not inst.is_empty:  # filter all NaN instances.
                         video_idx = labels[labels_idx].videos.index(lf.video)
@@ -834,13 +837,6 @@ class CenteredInstanceDataset(BaseDataset):
         )
         instances = instances * eff_scale
-        # resize image
-        image, instances = apply_resizer(
-            image,
-            instances,
-            scale=self.scale,
-        )
         # get the centroids based on the anchor idx
         centroids = generate_centroids(instances, anchor_ind=self.anchor_ind)
@@ -901,6 +897,13 @@ class CenteredInstanceDataset(BaseDataset):
         sample["instance"] = center_instance  # (n_samples=1, n_nodes, 2)
         sample["centroid"] = centered_centroid  # (n_samples=1, 2)
+        # resize the cropped image
+        sample["instance_image"], sample["instance"] = apply_resizer(
+            sample["instance_image"],
+            sample["instance"],
+            scale=self.scale,
+        )
         # Pad the image (if needed) according max stride
         sample["instance_image"] = apply_pad_to_stride(
             sample["instance_image"], max_stride=self.max_stride
@@ -959,7 +962,7 @@ class TopDownCenteredInstanceMultiClassDataset(CenteredInstanceDataset):
             the images aren't cached and loaded from the `.slp` file on each access.
         cache_img_path: Path to save the `.jpg` files. If `None`, current working dir is used.
         use_existing_imgs: Use existing imgs/ chunks in the `cache_img_path`.
-        crop_size: Crop size of each instance for centered-instance model.
+        crop_size: Crop size of each instance for centered-instance model. If `scale` is provided, then the cropped image will be resized according to `scale`.
         rank: Indicates the rank of the process. Used during distributed training to ensure that image storage to
             disk occurs only once across all workers.
         confmap_head_config: DictConfig object with all the keys in the `head_config` section.
@@ -967,9 +970,6 @@ class TopDownCenteredInstanceMultiClassDataset(CenteredInstanceDataset):
         class_vectors_head_config: DictConfig object with all the keys in the `head_config` section.
             (required keys: `classes`, `num_fc_layers`, `num_fc_units`, `output_stride`, `loss_weight`).
         labels_list: List of `sio.Labels` objects. Used to store the labels in the cache. (only used if `cache_img` is `None`)
-    Note: If scale is provided for centered-instance model, the images are cropped out
-    from the scaled image with the given crop size.
     """
     def __init__(
@@ -1082,13 +1082,6 @@ class TopDownCenteredInstanceMultiClassDataset(CenteredInstanceDataset):
         )
         instances = instances * eff_scale
-        # resize image
-        image, instances = apply_resizer(
-            image,
-            instances,
-            scale=self.scale,
-        )
         # get class vectors
         track_ids = torch.Tensor(
             [
@@ -1165,6 +1158,13 @@ class TopDownCenteredInstanceMultiClassDataset(CenteredInstanceDataset):
         sample["instance"] = center_instance  # (n_samples=1, n_nodes, 2)
         sample["centroid"] = centered_centroid  # (n_samples=1, 2)
+        # resize image
+        sample["instance_image"], sample["instance"] = apply_resizer(
+            sample["instance_image"],
+            sample["instance"],
+            scale=self.scale,
+        )
         # Pad the image (if needed) according max stride
         sample["instance_image"] = apply_pad_to_stride(
             sample["instance_image"], max_stride=self.max_stride

sleap-nn 0.0.5__py3-none-any.whl → 0.1.0a0__py3-none-any.whl

sleap-nn 0.0.5py3-none-any.whl → 0.1.0a0py3-none-any.whl