PyPI - sleap-nn - Versions diffs - 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

sleap-nn 0.0.5py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

sleap_nn/__init__.py +9 -2
sleap_nn/architectures/convnext.py +5 -0
sleap_nn/architectures/encoder_decoder.py +25 -6
sleap_nn/architectures/swint.py +8 -0
sleap_nn/cli.py +489 -46
sleap_nn/config/data_config.py +51 -8
sleap_nn/config/get_config.py +32 -24
sleap_nn/config/trainer_config.py +88 -0
sleap_nn/data/augmentation.py +61 -200
sleap_nn/data/custom_datasets.py +433 -61
sleap_nn/data/instance_cropping.py +71 -6
sleap_nn/data/normalization.py +45 -2
sleap_nn/data/providers.py +26 -0
sleap_nn/data/resizing.py +2 -2
sleap_nn/data/skia_augmentation.py +414 -0
sleap_nn/data/utils.py +135 -17
sleap_nn/evaluation.py +177 -42
sleap_nn/export/__init__.py +21 -0
sleap_nn/export/cli.py +1778 -0
sleap_nn/export/exporters/__init__.py +51 -0
sleap_nn/export/exporters/onnx_exporter.py +80 -0
sleap_nn/export/exporters/tensorrt_exporter.py +291 -0
sleap_nn/export/metadata.py +225 -0
sleap_nn/export/predictors/__init__.py +63 -0
sleap_nn/export/predictors/base.py +22 -0
sleap_nn/export/predictors/onnx.py +154 -0
sleap_nn/export/predictors/tensorrt.py +312 -0
sleap_nn/export/utils.py +307 -0
sleap_nn/export/wrappers/__init__.py +25 -0
sleap_nn/export/wrappers/base.py +96 -0
sleap_nn/export/wrappers/bottomup.py +243 -0
sleap_nn/export/wrappers/bottomup_multiclass.py +195 -0
sleap_nn/export/wrappers/centered_instance.py +56 -0
sleap_nn/export/wrappers/centroid.py +58 -0
sleap_nn/export/wrappers/single_instance.py +83 -0
sleap_nn/export/wrappers/topdown.py +180 -0
sleap_nn/export/wrappers/topdown_multiclass.py +304 -0
sleap_nn/inference/__init__.py +6 -0
sleap_nn/inference/bottomup.py +86 -20
sleap_nn/inference/peak_finding.py +93 -16
sleap_nn/inference/postprocessing.py +284 -0
sleap_nn/inference/predictors.py +339 -137
sleap_nn/inference/provenance.py +292 -0
sleap_nn/inference/topdown.py +55 -47
sleap_nn/legacy_models.py +65 -11
sleap_nn/predict.py +224 -19
sleap_nn/system_info.py +443 -0
sleap_nn/tracking/tracker.py +8 -1
sleap_nn/train.py +138 -44
sleap_nn/training/callbacks.py +1258 -5
sleap_nn/training/lightning_modules.py +902 -220
sleap_nn/training/model_trainer.py +424 -111
sleap_nn/training/schedulers.py +191 -0
sleap_nn/training/utils.py +367 -2
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0.dist-info}/METADATA +35 -33
sleap_nn-0.1.0.dist-info/RECORD +88 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0.dist-info}/WHEEL +1 -1
sleap_nn-0.0.5.dist-info/RECORD +0 -63
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0.dist-info}/top_level.txt +0 -0

sleap_nn/data/utils.py CHANGED Viewed

@@ -1,12 +1,14 @@
 """Miscellaneous utility functions for data processing."""
 from typing import Tuple, List, Any, Optional
+import sys
 import torch
 from omegaconf import DictConfig
 import sleap_io as sio
 from sleap_nn.config.utils import get_model_type_from_cfg
 import psutil
 import numpy as np
+from loguru import logger
 from sleap_nn.data.providers import get_max_instances
@@ -115,35 +117,151 @@ def check_memory(
     return img_mem
+def estimate_cache_memory(
+    train_labels: List[sio.Labels],
+    val_labels: List[sio.Labels],
+    num_workers: int = 0,
+    memory_buffer: float = 0.2,
+) -> dict:
+    """Estimate memory requirements for in-memory caching dataset pipeline.
+    This function calculates the total memory needed for caching images, accounting for:
+    - Raw image data size
+    - Python object overhead (dictionary keys, numpy array wrappers)
+    - DataLoader worker memory overhead (Copy-on-Write duplication on Unix systems)
+    - General memory buffer for training overhead
+    When using DataLoader with num_workers > 0, worker processes are spawned via fork()
+    on Unix systems. While Copy-on-Write (CoW) initially shares memory, Python's reference
+    counting can trigger memory page duplication when workers access cached data.
+    Args:
+        train_labels: List of `sleap_io.Labels` objects for training data.
+        val_labels: List of `sleap_io.Labels` objects for validation data.
+        num_workers: Number of DataLoader worker processes. When > 0, additional memory
+            overhead is estimated for worker process duplication.
+        memory_buffer: Fraction of memory to reserve as buffer for training overhead
+            (model weights, activations, gradients, etc.). Default: 0.2 (20%).
+    Returns:
+        dict: Memory estimation breakdown with keys:
+            - 'raw_cache_bytes': Raw image data size in bytes
+            - 'python_overhead_bytes': Estimated Python object overhead
+            - 'worker_overhead_bytes': Estimated memory for DataLoader workers
+            - 'buffer_bytes': Memory buffer for training overhead
+            - 'total_bytes': Total estimated memory requirement
+            - 'available_bytes': Available system memory
+            - 'sufficient': True if total <= available, False otherwise
+    """
+    # Calculate raw image cache size
+    train_cache_bytes = 0
+    val_cache_bytes = 0
+    num_train_samples = 0
+    num_val_samples = 0
+    for train, val in zip(train_labels, val_labels):
+        train_cache_bytes += check_memory(train)
+        val_cache_bytes += check_memory(val)
+        num_train_samples += len(train)
+        num_val_samples += len(val)
+    raw_cache_bytes = train_cache_bytes + val_cache_bytes
+    total_samples = num_train_samples + num_val_samples
+    # Python object overhead: dict keys, numpy array wrappers, tuple keys
+    # Estimate ~200 bytes per sample for Python object overhead
+    python_overhead_per_sample = 200
+    python_overhead_bytes = total_samples * python_overhead_per_sample
+    # Worker memory overhead
+    # When num_workers > 0, workers are forked or spawned depending on platform.
+    # Default start methods (Python 3.8+):
+    #   - Linux: fork (Copy-on-Write, partial memory duplication)
+    #   - macOS: spawn (full dataset copy to each worker, changed in Python 3.8)
+    #   - Windows: spawn (full dataset copy to each worker)
+    worker_overhead_bytes = 0
+    if num_workers > 0:
+        if sys.platform == "linux":
+            # Linux uses fork() with Copy-on-Write by default
+            # Estimate 25% duplication per worker due to Python refcounting
+            # triggering CoW page copies
+            worker_overhead_bytes = int(raw_cache_bytes * 0.25 * num_workers)
+            if num_workers >= 4:
+                logger.info(
+                    f"Using in-memory caching with {num_workers} DataLoader workers. "
+                    f"Estimated additional memory for workers: "
+                    f"{worker_overhead_bytes / (1024**3):.2f} GB"
+                )
+        else:
+            # macOS (darwin) and Windows use spawn - dataset is copied to each worker
+            # Since Python 3.8, macOS defaults to spawn due to fork safety issues
+            # With caching enabled, we avoid pickling labels_list, but the cache
+            # dict is still part of the dataset and gets copied to each worker
+            worker_overhead_bytes = int(raw_cache_bytes * 0.5 * num_workers)
+            platform_name = "macOS" if sys.platform == "darwin" else "Windows"
+            logger.warning(
+                f"Using in-memory caching with {num_workers} DataLoader workers on {platform_name}. "
+                f"Memory usage may be significantly higher than estimated (~{worker_overhead_bytes / (1024**3):.1f} GB extra) "
+                f"due to spawn-based multiprocessing. "
+                f"Consider using disk caching or num_workers=0 for large datasets."
+            )
+    # Memory buffer for training overhead (model, gradients, activations)
+    subtotal = raw_cache_bytes + python_overhead_bytes + worker_overhead_bytes
+    buffer_bytes = int(subtotal * memory_buffer)
+    total_bytes = subtotal + buffer_bytes
+    available_bytes = psutil.virtual_memory().available
+    return {
+        "raw_cache_bytes": raw_cache_bytes,
+        "python_overhead_bytes": python_overhead_bytes,
+        "worker_overhead_bytes": worker_overhead_bytes,
+        "buffer_bytes": buffer_bytes,
+        "total_bytes": total_bytes,
+        "available_bytes": available_bytes,
+        "sufficient": total_bytes <= available_bytes,
+        "num_samples": total_samples,
+    }
 def check_cache_memory(
     train_labels: List[sio.Labels],
     val_labels: List[sio.Labels],
     memory_buffer: float = 0.2,
+    num_workers: int = 0,
 ) -> bool:
     """Check memory requirements for in-memory caching dataset pipeline.
+    This function determines if the system has sufficient memory for in-memory
+    image caching, accounting for DataLoader worker processes.
     Args:
         train_labels: List of `sleap_io.Labels` objects for training data.
         val_labels: List of `sleap_io.Labels` objects for validation data.
-        memory_buffer: Fraction of the total image memory required for caching that
-            should be reserved as a buffer.
+        memory_buffer: Fraction of memory to reserve as buffer. Default: 0.2 (20%).
+        num_workers: Number of DataLoader worker processes. When > 0, additional memory
+            overhead is estimated for worker process duplication.
     Returns:
         bool: True if the total memory required for caching is within available system
             memory, False otherwise.
     """
-    train_cache_memory_final = 0
-    val_cache_memory_final = 0
-    for train, val in zip(train_labels, val_labels):
-        train_cache_memory = check_memory(train)
-        val_cache_memory = check_memory(val)
-        train_cache_memory_final += train_cache_memory
-        val_cache_memory_final += val_cache_memory
-    total_cache_memory = train_cache_memory_final + val_cache_memory_final
-    total_cache_memory += memory_buffer * total_cache_memory  # memory required in bytes
-    available_memory = psutil.virtual_memory().available  # available memory in bytes
-    if total_cache_memory > available_memory:
-        return False
-    return True
+    estimate = estimate_cache_memory(
+        train_labels=train_labels,
+        val_labels=val_labels,
+        num_workers=num_workers,
+        memory_buffer=memory_buffer,
+    )
+    if not estimate["sufficient"]:
+        total_gb = estimate["total_bytes"] / (1024**3)
+        available_gb = estimate["available_bytes"] / (1024**3)
+        raw_gb = estimate["raw_cache_bytes"] / (1024**3)
+        logger.info(
+            f"Memory check failed: need ~{total_gb:.2f} GB "
+            f"(raw cache: {raw_gb:.2f} GB, {estimate['num_samples']} samples), "
+            f"available: {available_gb:.2f} GB"
+        )
+    return estimate["sufficient"]

sleap_nn/evaluation.py CHANGED Viewed

@@ -29,11 +29,27 @@ def get_instances(labeled_frame: sio.LabeledFrame) -> List[MatchInstance]:
     """
     instance_list = []
     frame_idx = labeled_frame.frame_idx
-    video_path = (
-        labeled_frame.video.backend.source_filename
-        if hasattr(labeled_frame.video.backend, "source_filename")
-        else labeled_frame.video.backend.filename
-    )
+    # Extract video path with fallbacks for embedded videos
+    video = labeled_frame.video
+    video_path = None
+    if video is not None:
+        backend = getattr(video, "backend", None)
+        if backend is not None:
+            # Try source_filename first (for embedded videos with provenance)
+            video_path = getattr(backend, "source_filename", None)
+            if video_path is None:
+                video_path = getattr(backend, "filename", None)
+        # Fallback to video.filename if backend doesn't have it
+        if video_path is None:
+            video_path = getattr(video, "filename", None)
+            # Handle list filenames (image sequences)
+            if isinstance(video_path, list) and video_path:
+                video_path = video_path[0]
+    # Final fallback: use a unique identifier
+    if video_path is None:
+        video_path = f"video_{id(video)}" if video is not None else "unknown"
     for instance in labeled_frame.instances:
         match_instance = MatchInstance(
             instance=instance, frame_idx=frame_idx, video_path=video_path
@@ -47,6 +63,10 @@ def find_frame_pairs(
 ) -> List[Tuple[sio.LabeledFrame, sio.LabeledFrame]]:
     """Find corresponding frames across two sets of labels.
+    This function uses sleap-io's robust video matching API to handle various
+    scenarios including embedded videos, cross-platform paths, and videos with
+    different metadata.
     Args:
         labels_gt: A `sio.Labels` instance with ground truth instances.
         labels_pr: A `sio.Labels` instance with predicted instances.
@@ -56,25 +76,15 @@ def find_frame_pairs(
     Returns:
         A list of pairs of `sio.LabeledFrame`s in the form `(frame_gt, frame_pr)`.
     """
+    # Use sleap-io's robust video matching API (added in 0.6.2)
+    # The match() method returns a MatchResult with video_map: {pred_video: gt_video}
+    match_result = labels_gt.match(labels_pr)
     frame_pairs = []
-    for video_gt in labels_gt.videos:
-        # Find matching video instance in predictions.
-        video_pr = None
-        for video in labels_pr.videos:
-            if (
-                isinstance(video.backend, type(video_gt.backend))
-                and video.filename == video_gt.filename
-            ):
-                same_dataset = (
-                    (video.backend.dataset == video_gt.backend.dataset)
-                    if hasattr(video.backend, "dataset")
-                    else True
-                )  # `dataset` attr exists only for hdf5 backend not for mediavideo
-                if same_dataset:
-                    video_pr = video
-                    break
-        if video_pr is None:
+    # Iterate over matched video pairs (pred_video -> gt_video mapping)
+    for video_pr, video_gt in match_result.video_map.items():
+        if video_gt is None:
+            # No match found for this prediction video
             continue
         # Find labeled frames in this video.
@@ -629,11 +639,19 @@ class Evaluator:
         mPCK_parts = pcks.mean(axis=0).mean(axis=-1)
         mPCK = mPCK_parts.mean()
+        # Precompute PCK at common thresholds
+        idx_5 = np.argmin(np.abs(thresholds - 5))
+        idx_10 = np.argmin(np.abs(thresholds - 10))
+        pck5 = pcks[:, :, idx_5].mean()
+        pck10 = pcks[:, :, idx_10].mean()
         return {
             "thresholds": thresholds,
             "pcks": pcks,
             "mPCK_parts": mPCK_parts,
             "mPCK": mPCK,
+            "PCK@5": pck5,
+            "PCK@10": pck10,
         }
     def visibility_metrics(self):
@@ -678,24 +696,109 @@ class Evaluator:
         return metrics
-def load_metrics(model_path: str, split="val"):
-    """Load the metrics for a given model and split.
+def _find_metrics_file(model_dir: Path, split: str, dataset_idx: int) -> Path:
+    """Find the metrics file in a model directory.
+    Tries new naming format first, then falls back to old format.
+    If split is "test" and not found, falls back to "val".
+    """
+    # Try new naming format first: metrics.{split}.{idx}.npz
+    metrics_path = model_dir / f"metrics.{split}.{dataset_idx}.npz"
+    if metrics_path.exists():
+        return metrics_path
+    # Fall back to old naming format: {split}_{idx}_pred_metrics.npz
+    metrics_path = model_dir / f"{split}_{dataset_idx}_pred_metrics.npz"
+    if metrics_path.exists():
+        return metrics_path
+    # If split is "test" and not found, try "val" fallback
+    if split == "test":
+        return _find_metrics_file(model_dir, "val", dataset_idx)
+    # Return the new format path (will raise FileNotFoundError later)
+    return model_dir / f"metrics.{split}.{dataset_idx}.npz"
+def _load_npz_metrics(metrics_path: Path) -> dict:
+    """Load metrics from an npz file, supporting both old and new formats.
+    New format: single "metrics" key containing a dict with all metrics.
+    Old format: individual metric keys at top level (voc_metrics, mOKS, etc.).
+    """
+    with np.load(metrics_path, allow_pickle=True) as data:
+        keys = list(data.keys())
+        # New format: single "metrics" key containing dict
+        if "metrics" in keys:
+            return data["metrics"].item()
+        # Old format: individual metric keys at top level
+        expected_keys = {
+            "voc_metrics",
+            "mOKS",
+            "distance_metrics",
+            "pck_metrics",
+            "visibility_metrics",
+        }
+        if expected_keys.issubset(set(keys)):
+            return {
+                k: data[k].item() if data[k].ndim == 0 else data[k]
+                for k in expected_keys
+            }
+        # Unknown format - return all keys as dict
+        return {k: data[k].item() if data[k].ndim == 0 else data[k] for k in keys}
+def load_metrics(
+    path: str,
+    split: str = "test",
+    dataset_idx: int = 0,
+) -> dict:
+    """Load metrics from a model folder or metrics file.
+    This function supports both the new format (single "metrics" key) and the old
+    format (individual metric keys at top level). It also handles both old and new
+    file naming conventions in model folders.
     Args:
-        model_path: Path to a model folder or metrics file (.npz).
-        split: Name of the split to load the metrics for. Must be `"train"`, `"val"` or
-            `"test"` (default: `"val"`). Ignored if a path to a metrics NPZ file is
-            provided.
+        path: Path to a model folder or metrics file (.npz).
+        split: Name of the split to load. Must be "train", "val", or "test".
+            Default: "test". If "test" is not found, falls back to "val".
+            Ignored if path points directly to a .npz file.
+        dataset_idx: Index of the dataset (for multi-dataset training).
+            Default: 0. Ignored if path points directly to a .npz file.
+    Returns:
+        Dictionary containing metrics with keys: voc_metrics, mOKS,
+        distance_metrics, pck_metrics, visibility_metrics.
+    Raises:
+        FileNotFoundError: If no metrics file is found.
+    Examples:
+        >>> # Load from model folder (tries test, falls back to val)
+        >>> metrics = load_metrics("/path/to/model")
+        >>> print(metrics["mOKS"]["mOKS"])
+        >>> # Load specific split and dataset
+        >>> metrics = load_metrics("/path/to/model", split="val", dataset_idx=1)
+        >>> # Load directly from npz file
+        >>> metrics = load_metrics("/path/to/metrics.val.0.npz")
     """
-    if Path(model_path).suffix == ".npz":
-        metrics_path = Path(model_path)
+    path = Path(path)
+    if path.suffix == ".npz":
+        metrics_path = path
     else:
-        metrics_path = Path(model_path) / f"{split}_0_pred_metrics.npz"
+        metrics_path = _find_metrics_file(path, split, dataset_idx)
     if not metrics_path.exists():
         raise FileNotFoundError(f"Metrics file not found at {metrics_path}")
-    with np.load(metrics_path, allow_pickle=True) as data:
-        return data["metrics"].item()
+    return _load_npz_metrics(metrics_path)
 def run_evaluation(
@@ -710,11 +813,26 @@ def run_evaluation(
     """Evaluate SLEAP-NN model predictions against ground truth labels."""
     logger.info("Loading ground truth labels...")
     ground_truth_instances = sio.load_slp(ground_truth_path)
+    logger.info(
+        f"  Ground truth: {len(ground_truth_instances.videos)} videos, "
+        f"{len(ground_truth_instances.labeled_frames)} frames"
+    )
     logger.info("Loading predicted labels...")
     predicted_instances = sio.load_slp(predicted_path)
+    logger.info(
+        f"  Predictions: {len(predicted_instances.videos)} videos, "
+        f"{len(predicted_instances.labeled_frames)} frames"
+    )
+    logger.info("Matching videos and frames...")
+    # Get match stats before creating evaluator
+    match_result = ground_truth_instances.match(predicted_instances)
+    logger.info(
+        f"  Videos matched: {match_result.n_videos_matched}/{len(match_result.video_map)}"
+    )
-    logger.info("Creating evaluator...")
+    logger.info("Matching instances...")
     evaluator = Evaluator(
         ground_truth_instances=ground_truth_instances,
         predicted_instances=predicted_instances,
@@ -723,21 +841,38 @@ def run_evaluation(
         match_threshold=match_threshold,
         user_labels_only=user_labels_only,
     )
+    logger.info(
+        f"  Frame pairs: {len(evaluator.frame_pairs)}, "
+        f"Matched instances: {len(evaluator.positive_pairs)}, "
+        f"Unmatched GT: {len(evaluator.false_negatives)}"
+    )
     logger.info("Computing evaluation metrics...")
     metrics = evaluator.evaluate()
+    # Compute PCK at specific thresholds (5 and 10 pixels)
+    dists = metrics["distance_metrics"]["dists"]
+    dists_clean = np.copy(dists)
+    dists_clean[np.isnan(dists_clean)] = np.inf
+    pck_5 = (dists_clean < 5).mean()
+    pck_10 = (dists_clean < 10).mean()
     # Print key metrics
     logger.info("Evaluation Results:")
-    logger.info(f"mOKS: {metrics['mOKS']['mOKS']:.4f}")
-    logger.info(f"mAP (OKS VOC): {metrics['voc_metrics']['oks_voc.mAP']:.4f}")
-    logger.info(f"mAR (OKS VOC): {metrics['voc_metrics']['oks_voc.mAR']:.4f}")
-    logger.info(f"Average Distance: {metrics['distance_metrics']['avg']:.4f}")
-    logger.info(f"mPCK: {metrics['pck_metrics']['mPCK']:.4f}")
+    logger.info(f"  mOKS: {metrics['mOKS']['mOKS']:.4f}")
+    logger.info(f"  mAP (OKS VOC): {metrics['voc_metrics']['oks_voc.mAP']:.4f}")
+    logger.info(f"  mAR (OKS VOC): {metrics['voc_metrics']['oks_voc.mAR']:.4f}")
+    logger.info(f"  Average Distance: {metrics['distance_metrics']['avg']:.2f} px")
+    logger.info(f"  dist.p50: {metrics['distance_metrics']['p50']:.2f} px")
+    logger.info(f"  dist.p95: {metrics['distance_metrics']['p95']:.2f} px")
+    logger.info(f"  dist.p99: {metrics['distance_metrics']['p99']:.2f} px")
+    logger.info(f"  mPCK: {metrics['pck_metrics']['mPCK']:.4f}")
+    logger.info(f"  PCK@5px: {pck_5:.4f}")
+    logger.info(f"  PCK@10px: {pck_10:.4f}")
     logger.info(
-        f"Visibility Precision: {metrics['visibility_metrics']['precision']:.4f}"
+        f"  Visibility Precision: {metrics['visibility_metrics']['precision']:.4f}"
     )
-    logger.info(f"Visibility Recall: {metrics['visibility_metrics']['recall']:.4f}")
+    logger.info(f"  Visibility Recall: {metrics['visibility_metrics']['recall']:.4f}")
     # Save metrics if path provided
     if save_metrics:

sleap_nn/export/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Export utilities for sleap-nn."""
+from sleap_nn.export.exporters import export_model, export_to_onnx, export_to_tensorrt
+from sleap_nn.export.metadata import ExportMetadata
+from sleap_nn.export.predictors import (
+    load_exported_model,
+    ONNXPredictor,
+    TensorRTPredictor,
+)
+from sleap_nn.export.utils import build_bottomup_candidate_template
+__all__ = [
+    "export_model",
+    "export_to_onnx",
+    "export_to_tensorrt",
+    "load_exported_model",
+    "ONNXPredictor",
+    "TensorRTPredictor",
+    "ExportMetadata",
+    "build_bottomup_candidate_template",
+]

sleap-nn 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl

sleap-nn 0.0.5py3-none-any.whl → 0.1.0py3-none-any.whl