PyPI - sleap-nn - Versions diffs - 0.0.5__py3-none-any.whl → 0.1.0a0__py3-none-any.whl - Mend

sleap-nn 0.0.5py3-none-any.whl → 0.1.0a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

sleap_nn/__init__.py +6 -1
sleap_nn/cli.py +142 -3
sleap_nn/config/data_config.py +44 -7
sleap_nn/config/get_config.py +22 -20
sleap_nn/config/trainer_config.py +12 -0
sleap_nn/data/augmentation.py +54 -2
sleap_nn/data/custom_datasets.py +22 -22
sleap_nn/data/instance_cropping.py +70 -5
sleap_nn/data/normalization.py +45 -2
sleap_nn/data/providers.py +26 -0
sleap_nn/evaluation.py +99 -23
sleap_nn/inference/__init__.py +6 -0
sleap_nn/inference/peak_finding.py +10 -2
sleap_nn/inference/predictors.py +115 -20
sleap_nn/inference/provenance.py +292 -0
sleap_nn/inference/topdown.py +55 -47
sleap_nn/predict.py +187 -10
sleap_nn/system_info.py +443 -0
sleap_nn/tracking/tracker.py +8 -1
sleap_nn/train.py +64 -40
sleap_nn/training/callbacks.py +317 -5
sleap_nn/training/lightning_modules.py +325 -180
sleap_nn/training/model_trainer.py +308 -22
sleap_nn/training/utils.py +367 -2
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/METADATA +22 -32
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/RECORD +30 -28
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/WHEEL +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0a0.dist-info}/top_level.txt +0 -0

sleap_nn/data/instance_cropping.py CHANGED Viewed

@@ -8,11 +8,79 @@ import torch
 from kornia.geometry.transform import crop_and_resize
+def compute_augmentation_padding(
+    bbox_size: float,
+    rotation_max: float = 0.0,
+    scale_max: float = 1.0,
+) -> int:
+    """Compute padding needed to accommodate augmentation transforms.
+    When rotation and scaling augmentations are applied, the bounding box of an
+    instance can expand beyond its original size. This function calculates the
+    padding needed to ensure the full instance remains visible after augmentation.
+    Args:
+        bbox_size: The size of the instance bounding box (max of width/height).
+        rotation_max: Maximum absolute rotation angle in degrees. For symmetric
+            rotation ranges like [-180, 180], pass 180.
+        scale_max: Maximum scaling factor. For scale range [0.9, 1.1], pass 1.1.
+    Returns:
+        Padding in pixels to add around the bounding box (total, not per side).
+    """
+    if rotation_max == 0.0 and scale_max <= 1.0:
+        return 0
+    # For a square bbox rotated by angle θ, the new bbox has side length:
+    # L' = L * (|cos(θ)| + |sin(θ)|)
+    # Maximum expansion occurs at 45°: L' = L * sqrt(2)
+    # For arbitrary angle: we use the worst case within the rotation range
+    rotation_rad = math.radians(min(abs(rotation_max), 90))
+    rotation_factor = abs(math.cos(rotation_rad)) + abs(math.sin(rotation_rad))
+    # For angles > 45°, the factor increases, max at 45° = sqrt(2)
+    # But for angles approaching 90°, it goes back to 1
+    # Worst case in any range including 45° is sqrt(2)
+    if abs(rotation_max) >= 45:
+        rotation_factor = math.sqrt(2)
+    # Combined expansion factor
+    expansion_factor = rotation_factor * max(scale_max, 1.0)
+    # Total padding needed (both sides)
+    expanded_size = bbox_size * expansion_factor
+    padding = expanded_size - bbox_size
+    return int(math.ceil(padding))
+def find_max_instance_bbox_size(labels: sio.Labels) -> float:
+    """Find the maximum bounding box dimension across all instances in labels.
+    Args:
+        labels: A `sio.Labels` containing user-labeled instances.
+    Returns:
+        The maximum bounding box dimension (max of width or height) across all instances.
+    """
+    max_length = 0.0
+    for lf in labels:
+        for inst in lf.instances:
+            if not inst.is_empty:
+                pts = inst.numpy()
+                diff_x = np.nanmax(pts[:, 0]) - np.nanmin(pts[:, 0])
+                diff_x = 0 if np.isnan(diff_x) else diff_x
+                max_length = np.maximum(max_length, diff_x)
+                diff_y = np.nanmax(pts[:, 1]) - np.nanmin(pts[:, 1])
+                diff_y = 0 if np.isnan(diff_y) else diff_y
+                max_length = np.maximum(max_length, diff_y)
+    return float(max_length)
 def find_instance_crop_size(
     labels: sio.Labels,
     padding: int = 0,
     maximum_stride: int = 2,
-    input_scaling: float = 1.0,
     min_crop_size: Optional[int] = None,
 ) -> int:
     """Compute the size of the largest instance bounding box from labels.
@@ -23,8 +91,6 @@ def find_instance_crop_size(
         maximum_stride: Ensure that the returned crop size is divisible by this value.
             Useful for ensuring that the crop size will not be truncated in a given
             architecture.
-        input_scaling: Float factor indicating the scale of the input images if any
-            scaling will be done before cropping.
         min_crop_size: The crop size set by the user.
     Returns:
@@ -32,7 +98,7 @@ def find_instance_crop_size(
         will contain the instances when cropped. The returned crop size will be larger
         or equal to the input `min_crop_size`.
-        This accounts for stride, padding and scaling when ensuring divisibility.
+        This accounts for stride and padding when ensuring divisibility.
     """
     # Check if user-specified crop size is divisible by max stride
     min_crop_size = 0 if min_crop_size is None else min_crop_size
@@ -46,7 +112,6 @@ def find_instance_crop_size(
         for inst in lf.instances:
             if not inst.is_empty:  # only if at least one point is not nan
                 pts = inst.numpy()
-                pts *= input_scaling
                 diff_x = np.nanmax(pts[:, 0]) - np.nanmin(pts[:, 0])
                 diff_x = 0 if np.isnan(diff_x) else diff_x
                 max_length = np.maximum(max_length, diff_x)

sleap_nn/data/normalization.py CHANGED Viewed

@@ -4,6 +4,36 @@ import torch
 import torchvision.transforms.v2.functional as F
+def normalize_on_gpu(image: torch.Tensor) -> torch.Tensor:
+    """Normalize image tensor on GPU after transfer.
+    This function is called in the model's forward() method after the image has been
+    transferred to GPU. It converts uint8 images to float32 and normalizes to [0, 1].
+    By performing normalization on GPU after transfer, we reduce PCIe bandwidth by 4x
+    (transferring 1 byte/pixel as uint8 instead of 4 bytes/pixel as float32). This
+    provides up to 17x speedup for the transfer+normalization stage.
+    This function handles two cases:
+    1. uint8 tensor with values in [0, 255] -> convert to float32 and divide by 255
+    2. float32 tensor with values in [0, 255] (e.g., from preprocessing that cast to
+       float32 without normalizing) -> divide by 255
+    Args:
+        image: Tensor image that may be uint8 or float32 with values in [0, 255] range.
+    Returns:
+        Float32 tensor normalized to [0, 1] range.
+    """
+    if not torch.is_floating_point(image):
+        # uint8 -> float32 normalized
+        image = image.float() / 255.0
+    elif image.max() > 1.0:
+        # float32 but not normalized (values > 1 indicate [0, 255] range)
+        image = image / 255.0
+    return image
 def convert_to_grayscale(image: torch.Tensor):
     """Convert given image to Grayscale image (single-channel).
@@ -38,8 +68,21 @@ def convert_to_rgb(image: torch.Tensor):
     return image
-def apply_normalization(image: torch.Tensor):
-    """Normalize image tensor."""
+def apply_normalization(image: torch.Tensor) -> torch.Tensor:
+    """Normalize image tensor from uint8 [0, 255] to float32 [0, 1].
+    This function is used during training data preprocessing where augmentation
+    operations (kornia) require float32 input.
+    For inference, normalization is deferred to GPU via `normalize_on_gpu()` in the
+    model's forward() method to reduce PCIe bandwidth.
+    Args:
+        image: Tensor image (typically uint8 with values in [0, 255]).
+    Returns:
+        Float32 tensor normalized to [0, 1] range.
+    """
     if not torch.is_floating_point(image):
         image = image.to(torch.float32) / 255.0
     return image

sleap_nn/data/providers.py CHANGED Viewed

@@ -71,6 +71,8 @@ def process_lf(
     for inst in instances_list:
         if not inst.is_empty:
             instances.append(inst.numpy())
+    if len(instances) == 0:
+        return None
     instances = np.stack(instances, axis=0)
     # Add singleton time dimension for single frames.
@@ -233,6 +235,8 @@ class LabelsReader(Thread):
         instances_key: bool = False,
         only_labeled_frames: bool = False,
         only_suggested_frames: bool = False,
+        exclude_user_labeled: bool = False,
+        only_predicted_frames: bool = False,
     ):
         """Initialize attribute of the class."""
         super().__init__()
@@ -245,6 +249,8 @@ class LabelsReader(Thread):
         self.only_labeled_frames = only_labeled_frames
         self.only_suggested_frames = only_suggested_frames
+        self.exclude_user_labeled = exclude_user_labeled
+        self.only_predicted_frames = only_predicted_frames
         # Filter to only user labeled instances
         if self.only_labeled_frames:
@@ -265,6 +271,20 @@ class LabelsReader(Thread):
                     )
                     self.filtered_lfs.append(new_lf)
+        # Filter out user labeled frames
+        elif self.exclude_user_labeled:
+            self.filtered_lfs = []
+            for lf in self.labels:
+                if not lf.has_user_instances:
+                    self.filtered_lfs.append(lf)
+        # Filter to only predicted frames
+        elif self.only_predicted_frames:
+            self.filtered_lfs = []
+            for lf in self.labels:
+                if lf.has_predicted_instances:
+                    self.filtered_lfs.append(lf)
         else:
             self.filtered_lfs = [lf for lf in self.labels]
@@ -300,6 +320,8 @@ class LabelsReader(Thread):
         instances_key: bool = False,
         only_labeled_frames: bool = False,
         only_suggested_frames: bool = False,
+        exclude_user_labeled: bool = False,
+        only_predicted_frames: bool = False,
     ):
         """Create LabelsReader from a .slp filename."""
         labels = sio.load_slp(filename)
@@ -310,6 +332,8 @@ class LabelsReader(Thread):
             instances_key,
             only_labeled_frames,
             only_suggested_frames,
+            exclude_user_labeled,
+            only_predicted_frames,
         )
     def run(self):
@@ -333,6 +357,8 @@ class LabelsReader(Thread):
                     for inst in lf:
                         if not inst.is_empty:
                             instances.append(inst.numpy())
+                    if len(instances) == 0:
+                        continue
                     instances = np.stack(instances, axis=0)
                     # Add singleton time dimension for single frames.

sleap_nn/evaluation.py CHANGED Viewed

@@ -61,18 +61,9 @@ def find_frame_pairs(
         # Find matching video instance in predictions.
         video_pr = None
         for video in labels_pr.videos:
-            if (
-                isinstance(video.backend, type(video_gt.backend))
-                and video.filename == video_gt.filename
-            ):
-                same_dataset = (
-                    (video.backend.dataset == video_gt.backend.dataset)
-                    if hasattr(video.backend, "dataset")
-                    else True
-                )  # `dataset` attr exists only for hdf5 backend not for mediavideo
-                if same_dataset:
-                    video_pr = video
-                    break
+            if video_gt.matches_content(video) and video_gt.matches_path(video):
+                video_pr = video
+                break
         if video_pr is None:
             continue
@@ -678,24 +669,109 @@ class Evaluator:
         return metrics
-def load_metrics(model_path: str, split="val"):
-    """Load the metrics for a given model and split.
+def _find_metrics_file(model_dir: Path, split: str, dataset_idx: int) -> Path:
+    """Find the metrics file in a model directory.
+    Tries new naming format first, then falls back to old format.
+    If split is "test" and not found, falls back to "val".
+    """
+    # Try new naming format first: metrics.{split}.{idx}.npz
+    metrics_path = model_dir / f"metrics.{split}.{dataset_idx}.npz"
+    if metrics_path.exists():
+        return metrics_path
+    # Fall back to old naming format: {split}_{idx}_pred_metrics.npz
+    metrics_path = model_dir / f"{split}_{dataset_idx}_pred_metrics.npz"
+    if metrics_path.exists():
+        return metrics_path
+    # If split is "test" and not found, try "val" fallback
+    if split == "test":
+        return _find_metrics_file(model_dir, "val", dataset_idx)
+    # Return the new format path (will raise FileNotFoundError later)
+    return model_dir / f"metrics.{split}.{dataset_idx}.npz"
+def _load_npz_metrics(metrics_path: Path) -> dict:
+    """Load metrics from an npz file, supporting both old and new formats.
+    New format: single "metrics" key containing a dict with all metrics.
+    Old format: individual metric keys at top level (voc_metrics, mOKS, etc.).
+    """
+    with np.load(metrics_path, allow_pickle=True) as data:
+        keys = list(data.keys())
+        # New format: single "metrics" key containing dict
+        if "metrics" in keys:
+            return data["metrics"].item()
+        # Old format: individual metric keys at top level
+        expected_keys = {
+            "voc_metrics",
+            "mOKS",
+            "distance_metrics",
+            "pck_metrics",
+            "visibility_metrics",
+        }
+        if expected_keys.issubset(set(keys)):
+            return {
+                k: data[k].item() if data[k].ndim == 0 else data[k]
+                for k in expected_keys
+            }
+        # Unknown format - return all keys as dict
+        return {k: data[k].item() if data[k].ndim == 0 else data[k] for k in keys}
+def load_metrics(
+    path: str,
+    split: str = "test",
+    dataset_idx: int = 0,
+) -> dict:
+    """Load metrics from a model folder or metrics file.
+    This function supports both the new format (single "metrics" key) and the old
+    format (individual metric keys at top level). It also handles both old and new
+    file naming conventions in model folders.
     Args:
-        model_path: Path to a model folder or metrics file (.npz).
-        split: Name of the split to load the metrics for. Must be `"train"`, `"val"` or
-            `"test"` (default: `"val"`). Ignored if a path to a metrics NPZ file is
-            provided.
+        path: Path to a model folder or metrics file (.npz).
+        split: Name of the split to load. Must be "train", "val", or "test".
+            Default: "test". If "test" is not found, falls back to "val".
+            Ignored if path points directly to a .npz file.
+        dataset_idx: Index of the dataset (for multi-dataset training).
+            Default: 0. Ignored if path points directly to a .npz file.
+    Returns:
+        Dictionary containing metrics with keys: voc_metrics, mOKS,
+        distance_metrics, pck_metrics, visibility_metrics.
+    Raises:
+        FileNotFoundError: If no metrics file is found.
+    Examples:
+        >>> # Load from model folder (tries test, falls back to val)
+        >>> metrics = load_metrics("/path/to/model")
+        >>> print(metrics["mOKS"]["mOKS"])
+        >>> # Load specific split and dataset
+        >>> metrics = load_metrics("/path/to/model", split="val", dataset_idx=1)
+        >>> # Load directly from npz file
+        >>> metrics = load_metrics("/path/to/metrics.val.0.npz")
     """
-    if Path(model_path).suffix == ".npz":
-        metrics_path = Path(model_path)
+    path = Path(path)
+    if path.suffix == ".npz":
+        metrics_path = path
     else:
-        metrics_path = Path(model_path) / f"{split}_0_pred_metrics.npz"
+        metrics_path = _find_metrics_file(path, split, dataset_idx)
     if not metrics_path.exists():
         raise FileNotFoundError(f"Metrics file not found at {metrics_path}")
-    with np.load(metrics_path, allow_pickle=True) as data:
-        return data["metrics"].item()
+    return _load_npz_metrics(metrics_path)
 def run_evaluation(

sleap_nn/inference/__init__.py CHANGED Viewed

@@ -1 +1,7 @@
 """Inference-related modules."""
+from sleap_nn.inference.provenance import (
+    build_inference_provenance,
+    build_tracking_only_provenance,
+    merge_provenance,
+)

sleap_nn/inference/peak_finding.py CHANGED Viewed

@@ -47,15 +47,23 @@ def crop_bboxes(
     width = abs(bboxes[0, 1, 0] - bboxes[0, 0, 0])
     box_size = tuple(torch.round(torch.Tensor((height + 1, width + 1))).to(torch.int32))
+    # Store original dtype for conversion back after cropping.
+    original_dtype = images.dtype
+    # Kornia's crop_and_resize requires float32 input.
+    images_to_crop = images[sample_inds]
+    if not torch.is_floating_point(images_to_crop):
+        images_to_crop = images_to_crop.float()
     # Crop.
     crops = crop_and_resize(
-        images[sample_inds],  # (n_boxes, channels, height, width)
+        images_to_crop,  # (n_boxes, channels, height, width)
         boxes=bboxes,
         size=box_size,
     )
     # Cast back to original dtype and return.
-    crops = crops.to(images.dtype)
+    crops = crops.to(original_dtype)
     return crops

sleap-nn 0.0.5__py3-none-any.whl → 0.1.0a0__py3-none-any.whl

sleap-nn 0.0.5py3-none-any.whl → 0.1.0a0py3-none-any.whl