PyPI - sleap-nn - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.0a1__py3-none-any.whl - Mend

sleap-nn 0.1.0py3-none-any.whl → 0.1.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

sleap_nn/__init__.py +1 -1
sleap_nn/architectures/convnext.py +0 -5
sleap_nn/architectures/encoder_decoder.py +6 -25
sleap_nn/architectures/swint.py +0 -8
sleap_nn/cli.py +60 -364
sleap_nn/config/data_config.py +5 -11
sleap_nn/config/get_config.py +4 -5
sleap_nn/config/trainer_config.py +0 -71
sleap_nn/data/augmentation.py +241 -50
sleap_nn/data/custom_datasets.py +34 -364
sleap_nn/data/instance_cropping.py +1 -1
sleap_nn/data/resizing.py +2 -2
sleap_nn/data/utils.py +17 -135
sleap_nn/evaluation.py +22 -81
sleap_nn/inference/bottomup.py +20 -86
sleap_nn/inference/peak_finding.py +19 -88
sleap_nn/inference/predictors.py +117 -224
sleap_nn/legacy_models.py +11 -65
sleap_nn/predict.py +9 -37
sleap_nn/train.py +4 -69
sleap_nn/training/callbacks.py +105 -1046
sleap_nn/training/lightning_modules.py +65 -602
sleap_nn/training/model_trainer.py +204 -201
{sleap_nn-0.1.0.dist-info → sleap_nn-0.1.0a1.dist-info}/METADATA +3 -15
sleap_nn-0.1.0a1.dist-info/RECORD +65 -0
{sleap_nn-0.1.0.dist-info → sleap_nn-0.1.0a1.dist-info}/WHEEL +1 -1
sleap_nn/data/skia_augmentation.py +0 -414
sleap_nn/export/__init__.py +0 -21
sleap_nn/export/cli.py +0 -1778
sleap_nn/export/exporters/__init__.py +0 -51
sleap_nn/export/exporters/onnx_exporter.py +0 -80
sleap_nn/export/exporters/tensorrt_exporter.py +0 -291
sleap_nn/export/metadata.py +0 -225
sleap_nn/export/predictors/__init__.py +0 -63
sleap_nn/export/predictors/base.py +0 -22
sleap_nn/export/predictors/onnx.py +0 -154
sleap_nn/export/predictors/tensorrt.py +0 -312
sleap_nn/export/utils.py +0 -307
sleap_nn/export/wrappers/__init__.py +0 -25
sleap_nn/export/wrappers/base.py +0 -96
sleap_nn/export/wrappers/bottomup.py +0 -243
sleap_nn/export/wrappers/bottomup_multiclass.py +0 -195
sleap_nn/export/wrappers/centered_instance.py +0 -56
sleap_nn/export/wrappers/centroid.py +0 -58
sleap_nn/export/wrappers/single_instance.py +0 -83
sleap_nn/export/wrappers/topdown.py +0 -180
sleap_nn/export/wrappers/topdown_multiclass.py +0 -304
sleap_nn/inference/postprocessing.py +0 -284
sleap_nn/training/schedulers.py +0 -191
sleap_nn-0.1.0.dist-info/RECORD +0 -88
{sleap_nn-0.1.0.dist-info → sleap_nn-0.1.0a1.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.1.0.dist-info → sleap_nn-0.1.0a1.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.1.0.dist-info → sleap_nn-0.1.0a1.dist-info}/top_level.txt +0 -0

sleap_nn/data/utils.py CHANGED Viewed

@@ -1,14 +1,12 @@
 """Miscellaneous utility functions for data processing."""
 from typing import Tuple, List, Any, Optional
-import sys
 import torch
 from omegaconf import DictConfig
 import sleap_io as sio
 from sleap_nn.config.utils import get_model_type_from_cfg
 import psutil
 import numpy as np
-from loguru import logger
 from sleap_nn.data.providers import get_max_instances
@@ -117,151 +115,35 @@ def check_memory(
     return img_mem
-def estimate_cache_memory(
-    train_labels: List[sio.Labels],
-    val_labels: List[sio.Labels],
-    num_workers: int = 0,
-    memory_buffer: float = 0.2,
-) -> dict:
-    """Estimate memory requirements for in-memory caching dataset pipeline.
-    This function calculates the total memory needed for caching images, accounting for:
-    - Raw image data size
-    - Python object overhead (dictionary keys, numpy array wrappers)
-    - DataLoader worker memory overhead (Copy-on-Write duplication on Unix systems)
-    - General memory buffer for training overhead
-    When using DataLoader with num_workers > 0, worker processes are spawned via fork()
-    on Unix systems. While Copy-on-Write (CoW) initially shares memory, Python's reference
-    counting can trigger memory page duplication when workers access cached data.
-    Args:
-        train_labels: List of `sleap_io.Labels` objects for training data.
-        val_labels: List of `sleap_io.Labels` objects for validation data.
-        num_workers: Number of DataLoader worker processes. When > 0, additional memory
-            overhead is estimated for worker process duplication.
-        memory_buffer: Fraction of memory to reserve as buffer for training overhead
-            (model weights, activations, gradients, etc.). Default: 0.2 (20%).
-    Returns:
-        dict: Memory estimation breakdown with keys:
-            - 'raw_cache_bytes': Raw image data size in bytes
-            - 'python_overhead_bytes': Estimated Python object overhead
-            - 'worker_overhead_bytes': Estimated memory for DataLoader workers
-            - 'buffer_bytes': Memory buffer for training overhead
-            - 'total_bytes': Total estimated memory requirement
-            - 'available_bytes': Available system memory
-            - 'sufficient': True if total <= available, False otherwise
-    """
-    # Calculate raw image cache size
-    train_cache_bytes = 0
-    val_cache_bytes = 0
-    num_train_samples = 0
-    num_val_samples = 0
-    for train, val in zip(train_labels, val_labels):
-        train_cache_bytes += check_memory(train)
-        val_cache_bytes += check_memory(val)
-        num_train_samples += len(train)
-        num_val_samples += len(val)
-    raw_cache_bytes = train_cache_bytes + val_cache_bytes
-    total_samples = num_train_samples + num_val_samples
-    # Python object overhead: dict keys, numpy array wrappers, tuple keys
-    # Estimate ~200 bytes per sample for Python object overhead
-    python_overhead_per_sample = 200
-    python_overhead_bytes = total_samples * python_overhead_per_sample
-    # Worker memory overhead
-    # When num_workers > 0, workers are forked or spawned depending on platform.
-    # Default start methods (Python 3.8+):
-    #   - Linux: fork (Copy-on-Write, partial memory duplication)
-    #   - macOS: spawn (full dataset copy to each worker, changed in Python 3.8)
-    #   - Windows: spawn (full dataset copy to each worker)
-    worker_overhead_bytes = 0
-    if num_workers > 0:
-        if sys.platform == "linux":
-            # Linux uses fork() with Copy-on-Write by default
-            # Estimate 25% duplication per worker due to Python refcounting
-            # triggering CoW page copies
-            worker_overhead_bytes = int(raw_cache_bytes * 0.25 * num_workers)
-            if num_workers >= 4:
-                logger.info(
-                    f"Using in-memory caching with {num_workers} DataLoader workers. "
-                    f"Estimated additional memory for workers: "
-                    f"{worker_overhead_bytes / (1024**3):.2f} GB"
-                )
-        else:
-            # macOS (darwin) and Windows use spawn - dataset is copied to each worker
-            # Since Python 3.8, macOS defaults to spawn due to fork safety issues
-            # With caching enabled, we avoid pickling labels_list, but the cache
-            # dict is still part of the dataset and gets copied to each worker
-            worker_overhead_bytes = int(raw_cache_bytes * 0.5 * num_workers)
-            platform_name = "macOS" if sys.platform == "darwin" else "Windows"
-            logger.warning(
-                f"Using in-memory caching with {num_workers} DataLoader workers on {platform_name}. "
-                f"Memory usage may be significantly higher than estimated (~{worker_overhead_bytes / (1024**3):.1f} GB extra) "
-                f"due to spawn-based multiprocessing. "
-                f"Consider using disk caching or num_workers=0 for large datasets."
-            )
-    # Memory buffer for training overhead (model, gradients, activations)
-    subtotal = raw_cache_bytes + python_overhead_bytes + worker_overhead_bytes
-    buffer_bytes = int(subtotal * memory_buffer)
-    total_bytes = subtotal + buffer_bytes
-    available_bytes = psutil.virtual_memory().available
-    return {
-        "raw_cache_bytes": raw_cache_bytes,
-        "python_overhead_bytes": python_overhead_bytes,
-        "worker_overhead_bytes": worker_overhead_bytes,
-        "buffer_bytes": buffer_bytes,
-        "total_bytes": total_bytes,
-        "available_bytes": available_bytes,
-        "sufficient": total_bytes <= available_bytes,
-        "num_samples": total_samples,
-    }
 def check_cache_memory(
     train_labels: List[sio.Labels],
     val_labels: List[sio.Labels],
     memory_buffer: float = 0.2,
-    num_workers: int = 0,
 ) -> bool:
     """Check memory requirements for in-memory caching dataset pipeline.
-    This function determines if the system has sufficient memory for in-memory
-    image caching, accounting for DataLoader worker processes.
     Args:
         train_labels: List of `sleap_io.Labels` objects for training data.
         val_labels: List of `sleap_io.Labels` objects for validation data.
-        memory_buffer: Fraction of memory to reserve as buffer. Default: 0.2 (20%).
-        num_workers: Number of DataLoader worker processes. When > 0, additional memory
-            overhead is estimated for worker process duplication.
+        memory_buffer: Fraction of the total image memory required for caching that
+            should be reserved as a buffer.
     Returns:
         bool: True if the total memory required for caching is within available system
             memory, False otherwise.
     """
-    estimate = estimate_cache_memory(
-        train_labels=train_labels,
-        val_labels=val_labels,
-        num_workers=num_workers,
-        memory_buffer=memory_buffer,
-    )
-    if not estimate["sufficient"]:
-        total_gb = estimate["total_bytes"] / (1024**3)
-        available_gb = estimate["available_bytes"] / (1024**3)
-        raw_gb = estimate["raw_cache_bytes"] / (1024**3)
-        logger.info(
-            f"Memory check failed: need ~{total_gb:.2f} GB "
-            f"(raw cache: {raw_gb:.2f} GB, {estimate['num_samples']} samples), "
-            f"available: {available_gb:.2f} GB"
-        )
-    return estimate["sufficient"]
+    train_cache_memory_final = 0
+    val_cache_memory_final = 0
+    for train, val in zip(train_labels, val_labels):
+        train_cache_memory = check_memory(train)
+        val_cache_memory = check_memory(val)
+        train_cache_memory_final += train_cache_memory
+        val_cache_memory_final += val_cache_memory
+    total_cache_memory = train_cache_memory_final + val_cache_memory_final
+    total_cache_memory += memory_buffer * total_cache_memory  # memory required in bytes
+    available_memory = psutil.virtual_memory().available  # available memory in bytes
+    if total_cache_memory > available_memory:
+        return False
+    return True

sleap_nn/evaluation.py CHANGED Viewed

@@ -29,27 +29,11 @@ def get_instances(labeled_frame: sio.LabeledFrame) -> List[MatchInstance]:
     """
     instance_list = []
     frame_idx = labeled_frame.frame_idx
-    # Extract video path with fallbacks for embedded videos
-    video = labeled_frame.video
-    video_path = None
-    if video is not None:
-        backend = getattr(video, "backend", None)
-        if backend is not None:
-            # Try source_filename first (for embedded videos with provenance)
-            video_path = getattr(backend, "source_filename", None)
-            if video_path is None:
-                video_path = getattr(backend, "filename", None)
-        # Fallback to video.filename if backend doesn't have it
-        if video_path is None:
-            video_path = getattr(video, "filename", None)
-            # Handle list filenames (image sequences)
-            if isinstance(video_path, list) and video_path:
-                video_path = video_path[0]
-    # Final fallback: use a unique identifier
-    if video_path is None:
-        video_path = f"video_{id(video)}" if video is not None else "unknown"
+    video_path = (
+        labeled_frame.video.backend.source_filename
+        if hasattr(labeled_frame.video.backend, "source_filename")
+        else labeled_frame.video.backend.filename
+    )
     for instance in labeled_frame.instances:
         match_instance = MatchInstance(
             instance=instance, frame_idx=frame_idx, video_path=video_path
@@ -63,10 +47,6 @@ def find_frame_pairs(
 ) -> List[Tuple[sio.LabeledFrame, sio.LabeledFrame]]:
     """Find corresponding frames across two sets of labels.
-    This function uses sleap-io's robust video matching API to handle various
-    scenarios including embedded videos, cross-platform paths, and videos with
-    different metadata.
     Args:
         labels_gt: A `sio.Labels` instance with ground truth instances.
         labels_pr: A `sio.Labels` instance with predicted instances.
@@ -76,15 +56,16 @@ def find_frame_pairs(
     Returns:
         A list of pairs of `sio.LabeledFrame`s in the form `(frame_gt, frame_pr)`.
     """
-    # Use sleap-io's robust video matching API (added in 0.6.2)
-    # The match() method returns a MatchResult with video_map: {pred_video: gt_video}
-    match_result = labels_gt.match(labels_pr)
     frame_pairs = []
-    # Iterate over matched video pairs (pred_video -> gt_video mapping)
-    for video_pr, video_gt in match_result.video_map.items():
-        if video_gt is None:
-            # No match found for this prediction video
+    for video_gt in labels_gt.videos:
+        # Find matching video instance in predictions.
+        video_pr = None
+        for video in labels_pr.videos:
+            if video_gt.matches_content(video) and video_gt.matches_path(video):
+                video_pr = video
+                break
+        if video_pr is None:
             continue
         # Find labeled frames in this video.
@@ -639,19 +620,11 @@ class Evaluator:
         mPCK_parts = pcks.mean(axis=0).mean(axis=-1)
         mPCK = mPCK_parts.mean()
-        # Precompute PCK at common thresholds
-        idx_5 = np.argmin(np.abs(thresholds - 5))
-        idx_10 = np.argmin(np.abs(thresholds - 10))
-        pck5 = pcks[:, :, idx_5].mean()
-        pck10 = pcks[:, :, idx_10].mean()
         return {
             "thresholds": thresholds,
             "pcks": pcks,
             "mPCK_parts": mPCK_parts,
             "mPCK": mPCK,
-            "PCK@5": pck5,
-            "PCK@10": pck10,
         }
     def visibility_metrics(self):
@@ -813,26 +786,11 @@ def run_evaluation(
     """Evaluate SLEAP-NN model predictions against ground truth labels."""
     logger.info("Loading ground truth labels...")
     ground_truth_instances = sio.load_slp(ground_truth_path)
-    logger.info(
-        f"  Ground truth: {len(ground_truth_instances.videos)} videos, "
-        f"{len(ground_truth_instances.labeled_frames)} frames"
-    )
     logger.info("Loading predicted labels...")
     predicted_instances = sio.load_slp(predicted_path)
-    logger.info(
-        f"  Predictions: {len(predicted_instances.videos)} videos, "
-        f"{len(predicted_instances.labeled_frames)} frames"
-    )
-    logger.info("Matching videos and frames...")
-    # Get match stats before creating evaluator
-    match_result = ground_truth_instances.match(predicted_instances)
-    logger.info(
-        f"  Videos matched: {match_result.n_videos_matched}/{len(match_result.video_map)}"
-    )
-    logger.info("Matching instances...")
+    logger.info("Creating evaluator...")
     evaluator = Evaluator(
         ground_truth_instances=ground_truth_instances,
         predicted_instances=predicted_instances,
@@ -841,38 +799,21 @@ def run_evaluation(
         match_threshold=match_threshold,
         user_labels_only=user_labels_only,
     )
-    logger.info(
-        f"  Frame pairs: {len(evaluator.frame_pairs)}, "
-        f"Matched instances: {len(evaluator.positive_pairs)}, "
-        f"Unmatched GT: {len(evaluator.false_negatives)}"
-    )
     logger.info("Computing evaluation metrics...")
     metrics = evaluator.evaluate()
-    # Compute PCK at specific thresholds (5 and 10 pixels)
-    dists = metrics["distance_metrics"]["dists"]
-    dists_clean = np.copy(dists)
-    dists_clean[np.isnan(dists_clean)] = np.inf
-    pck_5 = (dists_clean < 5).mean()
-    pck_10 = (dists_clean < 10).mean()
     # Print key metrics
     logger.info("Evaluation Results:")
-    logger.info(f"  mOKS: {metrics['mOKS']['mOKS']:.4f}")
-    logger.info(f"  mAP (OKS VOC): {metrics['voc_metrics']['oks_voc.mAP']:.4f}")
-    logger.info(f"  mAR (OKS VOC): {metrics['voc_metrics']['oks_voc.mAR']:.4f}")
-    logger.info(f"  Average Distance: {metrics['distance_metrics']['avg']:.2f} px")
-    logger.info(f"  dist.p50: {metrics['distance_metrics']['p50']:.2f} px")
-    logger.info(f"  dist.p95: {metrics['distance_metrics']['p95']:.2f} px")
-    logger.info(f"  dist.p99: {metrics['distance_metrics']['p99']:.2f} px")
-    logger.info(f"  mPCK: {metrics['pck_metrics']['mPCK']:.4f}")
-    logger.info(f"  PCK@5px: {pck_5:.4f}")
-    logger.info(f"  PCK@10px: {pck_10:.4f}")
+    logger.info(f"mOKS: {metrics['mOKS']['mOKS']:.4f}")
+    logger.info(f"mAP (OKS VOC): {metrics['voc_metrics']['oks_voc.mAP']:.4f}")
+    logger.info(f"mAR (OKS VOC): {metrics['voc_metrics']['oks_voc.mAR']:.4f}")
+    logger.info(f"Average Distance: {metrics['distance_metrics']['avg']:.4f}")
+    logger.info(f"mPCK: {metrics['pck_metrics']['mPCK']:.4f}")
     logger.info(
-        f"  Visibility Precision: {metrics['visibility_metrics']['precision']:.4f}"
+        f"Visibility Precision: {metrics['visibility_metrics']['precision']:.4f}"
     )
-    logger.info(f"  Visibility Recall: {metrics['visibility_metrics']['recall']:.4f}")
+    logger.info(f"Visibility Recall: {metrics['visibility_metrics']['recall']:.4f}")
     # Save metrics if path provided
     if save_metrics:

sleap_nn/inference/bottomup.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """Inference modules for BottomUp models."""
-import logging
 from typing import Dict, Optional
 import torch
 import lightning as L
@@ -8,8 +7,6 @@ from sleap_nn.inference.peak_finding import find_local_peaks
 from sleap_nn.inference.paf_grouping import PAFScorer
 from sleap_nn.inference.identity import classify_peaks_from_maps
-logger = logging.getLogger(__name__)
 class BottomUpInferenceModel(L.LightningModule):
     """BottomUp Inference model.
@@ -66,28 +63,8 @@ class BottomUpInferenceModel(L.LightningModule):
         return_pafs: Optional[bool] = False,
         return_paf_graph: Optional[bool] = False,
         input_scale: float = 1.0,
-        max_peaks_per_node: Optional[int] = None,
     ):
-        """Initialise the model attributes.
-        Args:
-            torch_model: A `nn.Module` that accepts images and predicts confidence maps.
-            paf_scorer: A `PAFScorer` instance for grouping instances.
-            cms_output_stride: Output stride of confidence maps relative to images.
-            pafs_output_stride: Output stride of PAFs relative to images.
-            peak_threshold: Minimum confidence map value for valid peaks.
-            refinement: Peak refinement method: None, "integral", or "local".
-            integral_patch_size: Size of patches for integral refinement.
-            return_confmaps: If True, return confidence maps in output.
-            return_pafs: If True, return PAFs in output.
-            return_paf_graph: If True, return intermediate PAF graph in output.
-            input_scale: Scale factor applied to input images.
-            max_peaks_per_node: Maximum number of peaks allowed per node before
-                skipping PAF scoring. If any node has more peaks than this limit,
-                empty predictions are returned. This prevents combinatorial explosion
-                during early training when confidence maps are noisy. Set to None to
-                disable this check (default). Recommended value: 100.
-        """
+        """Initialise the model attributes."""
         super().__init__()
         self.torch_model = torch_model
         self.paf_scorer = paf_scorer
@@ -100,7 +77,6 @@ class BottomUpInferenceModel(L.LightningModule):
         self.return_pafs = return_pafs
         self.return_paf_graph = return_paf_graph
         self.input_scale = input_scale
-        self.max_peaks_per_node = max_peaks_per_node
     def _generate_cms_peaks(self, cms):
         # TODO: append nans to batch them -> tensor (vectorize the initial paf grouping steps)
@@ -148,68 +124,26 @@ class BottomUpInferenceModel(L.LightningModule):
         )  # (batch, h, w, 2*edges)
         cms_peaks, cms_peak_vals, cms_peak_channel_inds = self._generate_cms_peaks(cms)
-        # Check if too many peaks per node (prevents combinatorial explosion)
-        skip_paf_scoring = False
-        if self.max_peaks_per_node is not None:
-            n_nodes = cms.shape[1]
-            for b in range(self.batch_size):
-                for node_idx in range(n_nodes):
-                    n_peaks = int((cms_peak_channel_inds[b] == node_idx).sum().item())
-                    if n_peaks > self.max_peaks_per_node:
-                        logger.warning(
-                            f"Skipping PAF scoring: node {node_idx} has {n_peaks} peaks "
-                            f"(max_peaks_per_node={self.max_peaks_per_node}). "
-                            f"Model may need more training."
-                        )
-                        skip_paf_scoring = True
-                        break
-                if skip_paf_scoring:
-                    break
-        if skip_paf_scoring:
-            # Return empty predictions for each sample
-            device = cms.device
-            n_nodes = cms.shape[1]
-            predicted_instances_adjusted = []
-            predicted_peak_scores = []
-            predicted_instance_scores = []
-            for _ in range(self.batch_size):
-                predicted_instances_adjusted.append(
-                    torch.full((0, n_nodes, 2), float("nan"), device=device)
-                )
-                predicted_peak_scores.append(
-                    torch.full((0, n_nodes), float("nan"), device=device)
-                )
-                predicted_instance_scores.append(torch.tensor([], device=device))
-            edge_inds = [
-                torch.tensor([], dtype=torch.int32, device=device)
-            ] * self.batch_size
-            edge_peak_inds = [
-                torch.tensor([], dtype=torch.int32, device=device).reshape(0, 2)
-            ] * self.batch_size
-            line_scores = [torch.tensor([], device=device)] * self.batch_size
-        else:
-            (
-                predicted_instances,
-                predicted_peak_scores,
-                predicted_instance_scores,
-                edge_inds,
-                edge_peak_inds,
-                line_scores,
-            ) = self.paf_scorer.predict(
-                pafs=pafs,
-                peaks=cms_peaks,
-                peak_vals=cms_peak_vals,
-                peak_channel_inds=cms_peak_channel_inds,
-            )
-            predicted_instances = [p / self.input_scale for p in predicted_instances]
-            predicted_instances_adjusted = []
-            for idx, p in enumerate(predicted_instances):
-                predicted_instances_adjusted.append(
-                    p / inputs["eff_scale"][idx].to(p.device)
-                )
+        (
+            predicted_instances,
+            predicted_peak_scores,
+            predicted_instance_scores,
+            edge_inds,
+            edge_peak_inds,
+            line_scores,
+        ) = self.paf_scorer.predict(
+            pafs=pafs,
+            peaks=cms_peaks,
+            peak_vals=cms_peak_vals,
+            peak_channel_inds=cms_peak_channel_inds,
+        )
+        predicted_instances = [p / self.input_scale for p in predicted_instances]
+        predicted_instances_adjusted = []
+        for idx, p in enumerate(predicted_instances):
+            predicted_instances_adjusted.append(
+                p / inputs["eff_scale"][idx].to(p.device)
+            )
         out = {
             "pred_instance_peaks": predicted_instances_adjusted,
             "pred_peak_values": predicted_peak_scores,

sleap_nn/inference/peak_finding.py CHANGED Viewed

@@ -2,60 +2,18 @@
 from typing import Optional, Tuple
+import kornia as K
+import numpy as np
 import torch
-import torch.nn.functional as F
+from kornia.geometry.transform import crop_and_resize
 from sleap_nn.data.instance_cropping import make_centered_bboxes
-def morphological_dilation(image: torch.Tensor, kernel: torch.Tensor) -> torch.Tensor:
-    """Apply morphological dilation using max pooling.
-    This is a pure PyTorch replacement for kornia.morphology.dilation.
-    For non-maximum suppression, it computes the maximum of 8 neighbors
-    (excluding the center pixel).
-    Args:
-        image: Input tensor of shape (B, 1, H, W).
-        kernel: Dilation kernel (3x3 expected for NMS).
-    Returns:
-        Dilated tensor of same shape as input.
-    """
-    # Pad the image to handle border pixels
-    padded = F.pad(image, (1, 1, 1, 1), mode="constant", value=float("-inf"))
-    # Extract 3x3 patches using unfold
-    # Shape: (B, 1, H, W, 3, 3)
-    patches = padded.unfold(2, 3, 1).unfold(3, 3, 1)
-    # Reshape to (B, 1, H, W, 9)
-    b, c, h, w, kh, kw = patches.shape
-    patches = patches.reshape(b, c, h, w, -1)
-    # Apply kernel mask (kernel has 0 at center, 1 elsewhere for NMS)
-    # Reshape kernel to (1, 1, 1, 1, 9)
-    kernel_flat = kernel.reshape(-1).to(patches.device)
-    kernel_mask = kernel_flat > 0
-    # Set non-kernel positions to -inf so they don't affect max
-    patches_masked = patches.clone()
-    patches_masked[..., ~kernel_mask] = float("-inf")
-    # Take max over the kernel neighborhood
-    max_vals = patches_masked.max(dim=-1)[0]
-    return max_vals
 def crop_bboxes(
     images: torch.Tensor, bboxes: torch.Tensor, sample_inds: torch.Tensor
 ) -> torch.Tensor:
-    """Crop bounding boxes from a batch of images using fast tensor indexing.
-    This uses tensor unfold operations to extract patches, which is significantly
-    faster than kornia's crop_and_resize (17-51x speedup) as it avoids perspective
-    transform computations.
+    """Crop bounding boxes from a batch of images.
     Args:
         images: Tensor of shape (samples, channels, height, width) of a batch of images.
@@ -69,7 +27,7 @@ def crop_bboxes(
             box should be cropped from.
     Returns:
-        A tensor of shape (n_bboxes, channels, crop_height, crop_width) of the same
+        A tensor of shape (n_bboxes, crop_height, crop_width, channels) of the same
         dtype as the input image. The crop size is inferred from the bounding box
         coordinates.
@@ -84,52 +42,25 @@ def crop_bboxes(
     See also: `make_centered_bboxes`
     """
-    n_crops = bboxes.shape[0]
-    if n_crops == 0:
-        # Return empty tensor; use default crop size since we can't infer from bboxes
-        return torch.empty(
-            0, images.shape[1], 0, 0, device=images.device, dtype=images.dtype
-        )
     # Compute bounding box size to use for crops.
-    height = int(abs(bboxes[0, 3, 1] - bboxes[0, 0, 1]).item()) + 1
-    width = int(abs(bboxes[0, 1, 0] - bboxes[0, 0, 0]).item()) + 1
+    height = abs(bboxes[0, 3, 1] - bboxes[0, 0, 1])
+    width = abs(bboxes[0, 1, 0] - bboxes[0, 0, 0])
+    box_size = tuple(torch.round(torch.Tensor((height + 1, width + 1))).to(torch.int32))
     # Store original dtype for conversion back after cropping.
     original_dtype = images.dtype
-    device = images.device
-    n_samples, channels, img_h, img_w = images.shape
-    half_h, half_w = height // 2, width // 2
-    # Pad images for edge handling.
-    images_padded = F.pad(
-        images.float(), (half_w, half_w, half_h, half_h), mode="constant", value=0
-    )
+    # Kornia's crop_and_resize requires float32 input.
+    images_to_crop = images[sample_inds]
+    if not torch.is_floating_point(images_to_crop):
+        images_to_crop = images_to_crop.float()
-    # Extract all possible patches using unfold (creates a view, no copy).
-    # Shape after unfold: (n_samples, channels, img_h, img_w, height, width)
-    patches = images_padded.unfold(2, height, 1).unfold(3, width, 1)
-    # Get crop centers from bboxes.
-    # The bbox top-left is at index 0, with (x, y) coordinates.
-    # We need the center of the crop (peak location), which is top-left + half_size.
-    # Ensure bboxes are on the same device as images for index computation.
-    bboxes_on_device = bboxes.to(device)
-    crop_x = (bboxes_on_device[:, 0, 0] + half_w).to(torch.long)
-    crop_y = (bboxes_on_device[:, 0, 1] + half_h).to(torch.long)
-    # Clamp indices to valid bounds to handle edge cases where centroids
-    # might be at or beyond image boundaries.
-    crop_x = torch.clamp(crop_x, 0, patches.shape[3] - 1)
-    crop_y = torch.clamp(crop_y, 0, patches.shape[2] - 1)
-    # Select crops using advanced indexing.
-    # Convert sample_inds to tensor if it's a list.
-    if not isinstance(sample_inds, torch.Tensor):
-        sample_inds = torch.tensor(sample_inds, device=device)
-    sample_inds_long = sample_inds.to(device=device, dtype=torch.long)
-    crops = patches[sample_inds_long, :, crop_y, crop_x]
-    # Shape: (n_crops, channels, height, width)
+    # Crop.
+    crops = crop_and_resize(
+        images_to_crop,  # (n_boxes, channels, height, width)
+        boxes=bboxes,
+        size=box_size,
+    )
     # Cast back to original dtype and return.
     crops = crops.to(original_dtype)
@@ -313,7 +244,7 @@ def find_local_peaks_rough(
     flat_img = cms.reshape(-1, 1, height, width)
     # Perform dilation filtering to find local maxima per channel and reshape back.
-    max_img = morphological_dilation(flat_img, kernel.to(flat_img.device))
+    max_img = K.morphology.dilation(flat_img, kernel.to(flat_img.device))
     max_img = max_img.reshape(-1, channels, height, width)
     # Filter for maxima and threshold.

sleap-nn 0.1.0__py3-none-any.whl → 0.1.0a1__py3-none-any.whl

sleap-nn 0.1.0py3-none-any.whl → 0.1.0a1py3-none-any.whl