PyPI - sleap-nn - Versions diffs - 0.1.0a2__py3-none-any.whl → 0.1.0a4__py3-none-any.whl - Mend

sleap-nn 0.1.0a2py3-none-any.whl → 0.1.0a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

sleap_nn/__init__.py +1 -1
sleap_nn/architectures/convnext.py +5 -0
sleap_nn/architectures/encoder_decoder.py +25 -6
sleap_nn/architectures/swint.py +8 -0
sleap_nn/cli.py +168 -39
sleap_nn/evaluation.py +8 -0
sleap_nn/export/__init__.py +21 -0
sleap_nn/export/cli.py +1778 -0
sleap_nn/export/exporters/__init__.py +51 -0
sleap_nn/export/exporters/onnx_exporter.py +80 -0
sleap_nn/export/exporters/tensorrt_exporter.py +291 -0
sleap_nn/export/metadata.py +225 -0
sleap_nn/export/predictors/__init__.py +63 -0
sleap_nn/export/predictors/base.py +22 -0
sleap_nn/export/predictors/onnx.py +154 -0
sleap_nn/export/predictors/tensorrt.py +312 -0
sleap_nn/export/utils.py +307 -0
sleap_nn/export/wrappers/__init__.py +25 -0
sleap_nn/export/wrappers/base.py +96 -0
sleap_nn/export/wrappers/bottomup.py +243 -0
sleap_nn/export/wrappers/bottomup_multiclass.py +195 -0
sleap_nn/export/wrappers/centered_instance.py +56 -0
sleap_nn/export/wrappers/centroid.py +58 -0
sleap_nn/export/wrappers/single_instance.py +83 -0
sleap_nn/export/wrappers/topdown.py +180 -0
sleap_nn/export/wrappers/topdown_multiclass.py +304 -0
sleap_nn/inference/peak_finding.py +47 -17
sleap_nn/inference/postprocessing.py +284 -0
sleap_nn/inference/predictors.py +213 -106
sleap_nn/predict.py +35 -7
sleap_nn/train.py +64 -0
sleap_nn/training/callbacks.py +69 -22
sleap_nn/training/lightning_modules.py +332 -30
sleap_nn/training/model_trainer.py +67 -67
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/METADATA +13 -1
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/RECORD +40 -19
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/WHEEL +0 -0
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a4.dist-info}/top_level.txt +0 -0

sleap_nn/export/wrappers/topdown_multiclass.py ADDED Viewed

@@ -0,0 +1,304 @@
+"""ONNX wrapper for top-down multiclass (supervised ID) models."""
+from typing import Dict
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from sleap_nn.export.wrappers.base import BaseExportWrapper
+class TopDownMultiClassONNXWrapper(BaseExportWrapper):
+    """ONNX-exportable wrapper for top-down multiclass (supervised ID) models.
+    This wrapper handles models that output both confidence maps for keypoint
+    detection and class logits for identity classification. It runs on instance
+    crops (centered around detected centroids).
+    Expects input images as uint8 tensors in [0, 255].
+    Attributes:
+        model: The underlying PyTorch model (centered instance + class vectors heads).
+        output_stride: Output stride of the confmap head.
+        input_scale: Scale factor applied to input images before inference.
+        n_classes: Number of identity classes.
+    """
+    def __init__(
+        self,
+        model: nn.Module,
+        output_stride: int = 2,
+        input_scale: float = 1.0,
+        n_classes: int = 2,
+    ):
+        """Initialize the wrapper.
+        Args:
+            model: The underlying PyTorch model.
+            output_stride: Output stride of the confidence maps.
+            input_scale: Scale factor for input images.
+            n_classes: Number of identity classes (e.g., 2 for male/female).
+        """
+        super().__init__(model)
+        self.output_stride = output_stride
+        self.input_scale = input_scale
+        self.n_classes = n_classes
+    def forward(self, image: torch.Tensor) -> Dict[str, torch.Tensor]:
+        """Run top-down multiclass inference on crops.
+        Args:
+            image: Input image tensor of shape (batch, channels, height, width).
+                   Expected to be uint8 in [0, 255].
+        Returns:
+            Dictionary with keys:
+                - "peaks": Predicted peak coordinates (batch, n_nodes, 2) in (x, y).
+                - "peak_vals": Peak confidence values (batch, n_nodes).
+                - "class_logits": Raw class logits (batch, n_classes).
+            The class assignment is done on CPU using Hungarian matching
+            via `get_class_inds_from_vectors()`.
+        """
+        # Normalize uint8 [0, 255] to float32 [0, 1]
+        image = self._normalize_uint8(image)
+        # Apply input scaling if needed
+        if self.input_scale != 1.0:
+            height = int(image.shape[-2] * self.input_scale)
+            width = int(image.shape[-1] * self.input_scale)
+            image = F.interpolate(
+                image, size=(height, width), mode="bilinear", align_corners=False
+            )
+        # Forward pass
+        out = self.model(image)
+        # Extract outputs
+        confmaps = self._extract_tensor(out, ["centered", "instance", "confmap"])
+        class_logits = self._extract_tensor(out, ["class", "vector"])
+        # Find global peaks (one per node)
+        peaks, peak_vals = self._find_global_peaks(confmaps)
+        # Scale peaks back to input coordinates
+        peaks = peaks * (self.output_stride / self.input_scale)
+        return {
+            "peaks": peaks,
+            "peak_vals": peak_vals,
+            "class_logits": class_logits,
+        }
+class TopDownMultiClassCombinedONNXWrapper(BaseExportWrapper):
+    """ONNX-exportable wrapper for combined centroid + multiclass instance models.
+    This wrapper combines a centroid detection model with a centered instance
+    multiclass model. It performs:
+    1. Centroid detection on full images
+    2. Cropping around each centroid using vectorized grid_sample
+    3. Instance keypoint detection + identity classification on each crop
+    Expects input images as uint8 tensors in [0, 255].
+    """
+    def __init__(
+        self,
+        centroid_model: nn.Module,
+        instance_model: nn.Module,
+        max_instances: int = 20,
+        crop_size: tuple = (192, 192),
+        centroid_output_stride: int = 4,
+        instance_output_stride: int = 2,
+        centroid_input_scale: float = 1.0,
+        instance_input_scale: float = 1.0,
+        n_nodes: int = 13,
+        n_classes: int = 2,
+    ):
+        """Initialize the combined wrapper.
+        Args:
+            centroid_model: Model for centroid detection.
+            instance_model: Model for instance keypoints + class prediction.
+            max_instances: Maximum number of instances to detect.
+            crop_size: Size of crops around centroids (height, width).
+            centroid_output_stride: Output stride of centroid model.
+            instance_output_stride: Output stride of instance model.
+            centroid_input_scale: Input scale for centroid model.
+            instance_input_scale: Input scale for instance model.
+            n_nodes: Number of keypoint nodes per instance.
+            n_classes: Number of identity classes.
+        """
+        super().__init__(centroid_model)  # Primary model is centroid
+        self.instance_model = instance_model
+        self.max_instances = max_instances
+        self.crop_size = crop_size
+        self.centroid_output_stride = centroid_output_stride
+        self.instance_output_stride = instance_output_stride
+        self.centroid_input_scale = centroid_input_scale
+        self.instance_input_scale = instance_input_scale
+        self.n_nodes = n_nodes
+        self.n_classes = n_classes
+        # Pre-compute base grid for crop extraction (same as TopDownONNXWrapper)
+        crop_h, crop_w = crop_size
+        y_crop = torch.linspace(-1, 1, crop_h, dtype=torch.float32)
+        x_crop = torch.linspace(-1, 1, crop_w, dtype=torch.float32)
+        grid_y, grid_x = torch.meshgrid(y_crop, x_crop, indexing="ij")
+        base_grid = torch.stack([grid_x, grid_y], dim=-1)
+        self.register_buffer("base_grid", base_grid, persistent=False)
+    def forward(self, image: torch.Tensor) -> Dict[str, torch.Tensor]:
+        """Run combined top-down multiclass inference.
+        Args:
+            image: Input image tensor of shape (batch, channels, height, width).
+                   Expected to be uint8 in [0, 255].
+        Returns:
+            Dictionary with keys:
+                - "centroids": Detected centroids (batch, max_instances, 2).
+                - "centroid_vals": Centroid confidence values (batch, max_instances).
+                - "peaks": Instance peaks (batch, max_instances, n_nodes, 2).
+                - "peak_vals": Peak values (batch, max_instances, n_nodes).
+                - "class_logits": Class logits per instance (batch, max_instances, n_classes).
+                - "instance_valid": Validity mask (batch, max_instances).
+        """
+        # Normalize input
+        image = self._normalize_uint8(image)
+        batch_size, channels, height, width = image.shape
+        # Apply centroid input scaling
+        scaled_image = image
+        if self.centroid_input_scale != 1.0:
+            scaled_h = int(height * self.centroid_input_scale)
+            scaled_w = int(width * self.centroid_input_scale)
+            scaled_image = F.interpolate(
+                scaled_image,
+                size=(scaled_h, scaled_w),
+                mode="bilinear",
+                align_corners=False,
+            )
+        # Centroid detection
+        centroid_out = self.model(scaled_image)
+        centroid_cms = self._extract_tensor(centroid_out, ["centroid", "confmap"])
+        centroids, centroid_vals, instance_valid = self._find_topk_peaks(
+            centroid_cms, self.max_instances
+        )
+        centroids = centroids * (
+            self.centroid_output_stride / self.centroid_input_scale
+        )
+        # Extract crops using vectorized grid_sample (same as TopDownONNXWrapper)
+        crops = self._extract_crops(image, centroids)
+        crops_flat = crops.reshape(
+            batch_size * self.max_instances,
+            channels,
+            self.crop_size[0],
+            self.crop_size[1],
+        )
+        # Apply instance input scaling if needed
+        if self.instance_input_scale != 1.0:
+            scaled_h = int(self.crop_size[0] * self.instance_input_scale)
+            scaled_w = int(self.crop_size[1] * self.instance_input_scale)
+            crops_flat = F.interpolate(
+                crops_flat,
+                size=(scaled_h, scaled_w),
+                mode="bilinear",
+                align_corners=False,
+            )
+        # Instance model forward (batch all crops)
+        instance_out = self.instance_model(crops_flat)
+        instance_cms = self._extract_tensor(
+            instance_out, ["centered", "instance", "confmap"]
+        )
+        instance_class = self._extract_tensor(instance_out, ["class", "vector"])
+        # Find peaks in all crops
+        crop_peaks, crop_peak_vals = self._find_global_peaks(instance_cms)
+        crop_peaks = crop_peaks * (
+            self.instance_output_stride / self.instance_input_scale
+        )
+        # Reshape to batch x instances x nodes x 2
+        crop_peaks = crop_peaks.reshape(batch_size, self.max_instances, self.n_nodes, 2)
+        peak_vals = crop_peak_vals.reshape(batch_size, self.max_instances, self.n_nodes)
+        # Reshape class logits
+        class_logits = instance_class.reshape(
+            batch_size, self.max_instances, self.n_classes
+        )
+        # Transform peaks from crop coordinates to full image coordinates
+        crop_offset = centroids.unsqueeze(2) - image.new_tensor(
+            [self.crop_size[1] / 2.0, self.crop_size[0] / 2.0]
+        )
+        peaks = crop_peaks + crop_offset
+        # Zero out invalid instances
+        invalid_mask = ~instance_valid
+        centroids = centroids.masked_fill(invalid_mask.unsqueeze(-1), 0.0)
+        centroid_vals = centroid_vals.masked_fill(invalid_mask, 0.0)
+        peaks = peaks.masked_fill(invalid_mask.unsqueeze(-1).unsqueeze(-1), 0.0)
+        peak_vals = peak_vals.masked_fill(invalid_mask.unsqueeze(-1), 0.0)
+        class_logits = class_logits.masked_fill(invalid_mask.unsqueeze(-1), 0.0)
+        return {
+            "centroids": centroids,
+            "centroid_vals": centroid_vals,
+            "peaks": peaks,
+            "peak_vals": peak_vals,
+            "class_logits": class_logits,
+            "instance_valid": instance_valid,
+        }
+    def _extract_crops(
+        self,
+        image: torch.Tensor,
+        centroids: torch.Tensor,
+    ) -> torch.Tensor:
+        """Extract crops around centroids using grid_sample.
+        This is the same vectorized implementation as TopDownONNXWrapper.
+        """
+        batch_size, channels, height, width = image.shape
+        crop_h, crop_w = self.crop_size
+        n_instances = centroids.shape[1]
+        scale_x = crop_w / width
+        scale_y = crop_h / height
+        scale = image.new_tensor([scale_x, scale_y])
+        base_grid = self.base_grid.to(device=image.device, dtype=image.dtype)
+        scaled_grid = base_grid * scale
+        scaled_grid = scaled_grid.unsqueeze(0).unsqueeze(0)
+        scaled_grid = scaled_grid.expand(batch_size, n_instances, -1, -1, -1)
+        norm_centroids = torch.zeros_like(centroids)
+        norm_centroids[..., 0] = (centroids[..., 0] / (width - 1)) * 2 - 1
+        norm_centroids[..., 1] = (centroids[..., 1] / (height - 1)) * 2 - 1
+        offset = norm_centroids.unsqueeze(2).unsqueeze(2)
+        sample_grid = scaled_grid + offset
+        image_expanded = image.unsqueeze(1).expand(-1, n_instances, -1, -1, -1)
+        image_flat = image_expanded.reshape(
+            batch_size * n_instances, channels, height, width
+        )
+        grid_flat = sample_grid.reshape(batch_size * n_instances, crop_h, crop_w, 2)
+        crops_flat = F.grid_sample(
+            image_flat,
+            grid_flat,
+            mode="bilinear",
+            padding_mode="zeros",
+            align_corners=True,
+        )
+        crops = crops_flat.reshape(batch_size, n_instances, channels, crop_h, crop_w)
+        return crops

sleap_nn/inference/peak_finding.py CHANGED Viewed

@@ -3,9 +3,8 @@
 from typing import Optional, Tuple
 import kornia as K
-import numpy as np
 import torch
-from kornia.geometry.transform import crop_and_resize
+import torch.nn.functional as F
 from sleap_nn.data.instance_cropping import make_centered_bboxes
@@ -13,7 +12,11 @@ from sleap_nn.data.instance_cropping import make_centered_bboxes
 def crop_bboxes(
     images: torch.Tensor, bboxes: torch.Tensor, sample_inds: torch.Tensor
 ) -> torch.Tensor:
-    """Crop bounding boxes from a batch of images.
+    """Crop bounding boxes from a batch of images using fast tensor indexing.
+    This uses tensor unfold operations to extract patches, which is significantly
+    faster than kornia's crop_and_resize (17-51x speedup) as it avoids perspective
+    transform computations.
     Args:
         images: Tensor of shape (samples, channels, height, width) of a batch of images.
@@ -27,7 +30,7 @@ def crop_bboxes(
             box should be cropped from.
     Returns:
-        A tensor of shape (n_bboxes, crop_height, crop_width, channels) of the same
+        A tensor of shape (n_bboxes, channels, crop_height, crop_width) of the same
         dtype as the input image. The crop size is inferred from the bounding box
         coordinates.
@@ -42,26 +45,53 @@ def crop_bboxes(
     See also: `make_centered_bboxes`
     """
+    n_crops = bboxes.shape[0]
+    if n_crops == 0:
+        # Return empty tensor; use default crop size since we can't infer from bboxes
+        return torch.empty(
+            0, images.shape[1], 0, 0, device=images.device, dtype=images.dtype
+        )
     # Compute bounding box size to use for crops.
-    height = abs(bboxes[0, 3, 1] - bboxes[0, 0, 1])
-    width = abs(bboxes[0, 1, 0] - bboxes[0, 0, 0])
-    box_size = tuple(torch.round(torch.Tensor((height + 1, width + 1))).to(torch.int32))
+    height = int(abs(bboxes[0, 3, 1] - bboxes[0, 0, 1]).item()) + 1
+    width = int(abs(bboxes[0, 1, 0] - bboxes[0, 0, 0]).item()) + 1
     # Store original dtype for conversion back after cropping.
     original_dtype = images.dtype
+    device = images.device
+    n_samples, channels, img_h, img_w = images.shape
+    half_h, half_w = height // 2, width // 2
-    # Kornia's crop_and_resize requires float32 input.
-    images_to_crop = images[sample_inds]
-    if not torch.is_floating_point(images_to_crop):
-        images_to_crop = images_to_crop.float()
-    # Crop.
-    crops = crop_and_resize(
-        images_to_crop,  # (n_boxes, channels, height, width)
-        boxes=bboxes,
-        size=box_size,
+    # Pad images for edge handling.
+    images_padded = F.pad(
+        images.float(), (half_w, half_w, half_h, half_h), mode="constant", value=0
     )
+    # Extract all possible patches using unfold (creates a view, no copy).
+    # Shape after unfold: (n_samples, channels, img_h, img_w, height, width)
+    patches = images_padded.unfold(2, height, 1).unfold(3, width, 1)
+    # Get crop centers from bboxes.
+    # The bbox top-left is at index 0, with (x, y) coordinates.
+    # We need the center of the crop (peak location), which is top-left + half_size.
+    # Ensure bboxes are on the same device as images for index computation.
+    bboxes_on_device = bboxes.to(device)
+    crop_x = (bboxes_on_device[:, 0, 0] + half_w).to(torch.long)
+    crop_y = (bboxes_on_device[:, 0, 1] + half_h).to(torch.long)
+    # Clamp indices to valid bounds to handle edge cases where centroids
+    # might be at or beyond image boundaries.
+    crop_x = torch.clamp(crop_x, 0, patches.shape[3] - 1)
+    crop_y = torch.clamp(crop_y, 0, patches.shape[2] - 1)
+    # Select crops using advanced indexing.
+    # Convert sample_inds to tensor if it's a list.
+    if not isinstance(sample_inds, torch.Tensor):
+        sample_inds = torch.tensor(sample_inds, device=device)
+    sample_inds_long = sample_inds.to(device=device, dtype=torch.long)
+    crops = patches[sample_inds_long, :, crop_y, crop_x]
+    # Shape: (n_crops, channels, height, width)
     # Cast back to original dtype and return.
     crops = crops.to(original_dtype)
     return crops

sleap_nn/inference/postprocessing.py ADDED Viewed

@@ -0,0 +1,284 @@
+"""Inference-level postprocessing filters for pose predictions.
+This module provides filters that run after model inference but before tracking.
+These filters are independent of tracking configuration and can be used standalone.
+"""
+from typing import List, Literal
+import numpy as np
+import sleap_io as sio
+def filter_overlapping_instances(
+    labels: sio.Labels,
+    threshold: float = 0.8,
+    method: Literal["iou", "oks"] = "iou",
+) -> sio.Labels:
+    """Filter overlapping instances using greedy non-maximum suppression.
+    Removes duplicate/overlapping instances by applying greedy NMS based on
+    either bounding box IOU or Object Keypoint Similarity (OKS). When two
+    instances overlap above the threshold, the lower-scoring one is removed.
+    This filter runs independently of tracking and can be used to clean up
+    model outputs before saving or further processing.
+    Args:
+        labels: Labels object with predicted instances to filter.
+        threshold: Similarity threshold for considering instances as overlapping.
+            Instances with similarity > threshold are candidates for removal.
+            Lower values are more aggressive (remove more).
+            Typical values: 0.3 (aggressive) to 0.8 (permissive).
+        method: Similarity metric to use for comparing instances.
+            "iou": Bounding box intersection-over-union.
+            "oks": Object Keypoint Similarity (pose-based).
+    Returns:
+        The input Labels object with overlapping instances removed.
+        Modification is done in place, but the object is also returned
+        for convenience.
+    Example:
+        >>> # Filter instances with >80% bounding box overlap
+        >>> labels = filter_overlapping_instances(labels, threshold=0.8, method="iou")
+        >>> # Filter using OKS similarity
+        >>> labels = filter_overlapping_instances(labels, threshold=0.5, method="oks")
+    Note:
+        - Only affects frames with 2+ predicted instances
+        - Uses instance.score for ranking; higher scores are preferred
+        - For IOU: bounding boxes computed from non-NaN keypoints
+        - For OKS: uses standard COCO OKS formula with bbox-derived scale
+    """
+    for lf in labels.labeled_frames:
+        if len(lf.instances) <= 1:
+            continue
+        # Separate predicted instances (have scores) from other instances
+        predicted = []
+        other = []
+        for inst in lf.instances:
+            if isinstance(inst, sio.PredictedInstance):
+                predicted.append(inst)
+            else:
+                other.append(inst)
+        # Only filter predicted instances
+        if len(predicted) <= 1:
+            continue
+        # Get scores
+        scores = np.array([_instance_score(inst) for inst in predicted])
+        # Apply greedy NMS with selected method
+        if method == "iou":
+            bboxes = np.array([_instance_bbox(inst) for inst in predicted])
+            keep_indices = _nms_greedy_iou(bboxes, scores, threshold)
+        elif method == "oks":
+            points = [inst.numpy() for inst in predicted]
+            keep_indices = _nms_greedy_oks(points, scores, threshold)
+        else:
+            raise ValueError(f"Unknown method: {method}. Use 'iou' or 'oks'.")
+        # Reconstruct instance list: kept predicted + other instances
+        kept_predicted = [predicted[i] for i in keep_indices]
+        lf.instances = kept_predicted + other
+    return labels
+def _instance_bbox(instance: sio.PredictedInstance) -> np.ndarray:
+    """Compute axis-aligned bounding box from instance keypoints.
+    Args:
+        instance: Instance with keypoints.
+    Returns:
+        Bounding box as [xmin, ymin, xmax, ymax].
+        Returns [0, 0, 0, 0] if no valid keypoints.
+    """
+    pts = instance.numpy()  # (n_nodes, 2)
+    valid = ~np.isnan(pts).any(axis=1)
+    if not valid.any():
+        return np.array([0.0, 0.0, 0.0, 0.0])
+    pts = pts[valid]
+    return np.array(
+        [pts[:, 0].min(), pts[:, 1].min(), pts[:, 0].max(), pts[:, 1].max()]
+    )
+def _instance_score(instance: sio.PredictedInstance) -> float:
+    """Get instance confidence score.
+    Args:
+        instance: Predicted instance.
+    Returns:
+        Instance score, or 1.0 if not available.
+    """
+    return getattr(instance, "score", 1.0)
+def _nms_greedy_iou(
+    bboxes: np.ndarray,
+    scores: np.ndarray,
+    threshold: float,
+) -> List[int]:
+    """Apply greedy NMS using bounding box IOU.
+    Args:
+        bboxes: Bounding boxes of shape (N, 4) as [xmin, ymin, xmax, ymax].
+        scores: Confidence scores of shape (N,).
+        threshold: IOU threshold for suppression.
+    Returns:
+        List of indices to keep, in order of decreasing score.
+    """
+    if len(bboxes) == 0:
+        return []
+    # Sort by score descending
+    order = scores.argsort()[::-1].tolist()
+    keep = []
+    while order:
+        # Take highest scoring remaining instance
+        i = order.pop(0)
+        keep.append(i)
+        if not order:
+            break
+        # Compute IOU with all remaining instances
+        remaining_indices = np.array(order)
+        similarities = _compute_iou_one_to_many(bboxes[i], bboxes[remaining_indices])
+        # Keep only instances with similarity <= threshold
+        mask = similarities <= threshold
+        order = [order[j] for j in range(len(order)) if mask[j]]
+    return keep
+def _nms_greedy_oks(
+    points_list: List[np.ndarray],
+    scores: np.ndarray,
+    threshold: float,
+) -> List[int]:
+    """Apply greedy NMS using Object Keypoint Similarity (OKS).
+    Args:
+        points_list: List of keypoint arrays, each of shape (n_nodes, 2).
+        scores: Confidence scores of shape (N,).
+        threshold: OKS threshold for suppression.
+    Returns:
+        List of indices to keep, in order of decreasing score.
+    """
+    if len(points_list) == 0:
+        return []
+    # Sort by score descending
+    order = scores.argsort()[::-1].tolist()
+    keep = []
+    while order:
+        # Take highest scoring remaining instance
+        i = order.pop(0)
+        keep.append(i)
+        if not order:
+            break
+        # Compute OKS with all remaining instances
+        similarities = np.array(
+            [_compute_oks(points_list[i], points_list[j]) for j in order]
+        )
+        # Keep only instances with similarity <= threshold
+        mask = similarities <= threshold
+        order = [order[j] for j in range(len(order)) if mask[j]]
+    return keep
+def _compute_iou_one_to_many(box: np.ndarray, boxes: np.ndarray) -> np.ndarray:
+    """Compute IOU between one box and multiple boxes.
+    Args:
+        box: Single box of shape (4,) as [xmin, ymin, xmax, ymax].
+        boxes: Multiple boxes of shape (N, 4).
+    Returns:
+        IOU values of shape (N,).
+    """
+    # Intersection coordinates
+    inter_xmin = np.maximum(box[0], boxes[:, 0])
+    inter_ymin = np.maximum(box[1], boxes[:, 1])
+    inter_xmax = np.minimum(box[2], boxes[:, 2])
+    inter_ymax = np.minimum(box[3], boxes[:, 3])
+    # Intersection area (0 if no overlap)
+    inter_w = np.maximum(0.0, inter_xmax - inter_xmin)
+    inter_h = np.maximum(0.0, inter_ymax - inter_ymin)
+    inter_area = inter_w * inter_h
+    # Individual areas
+    area_a = (box[2] - box[0]) * (box[3] - box[1])
+    area_b = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+    # Union area
+    union_area = area_a + area_b - inter_area
+    # IOU (avoid division by zero)
+    return np.where(union_area > 0, inter_area / union_area, 0.0)
+def _compute_oks(
+    points_a: np.ndarray,
+    points_b: np.ndarray,
+    kappa: float = 0.1,
+) -> float:
+    """Compute Object Keypoint Similarity (OKS) between two instances.
+    Uses a simplified OKS formula where all keypoints have equal weight
+    and scale is derived from the bounding box of the reference instance.
+    Args:
+        points_a: Keypoints of first instance, shape (n_nodes, 2).
+        points_b: Keypoints of second instance, shape (n_nodes, 2).
+        kappa: Per-keypoint constant controlling falloff. Default 0.1.
+    Returns:
+        OKS value in [0, 1]. Higher means more similar.
+    """
+    # Find valid keypoints (present in both instances)
+    valid_a = ~np.isnan(points_a).any(axis=1)
+    valid_b = ~np.isnan(points_b).any(axis=1)
+    valid = valid_a & valid_b
+    if not valid.any():
+        return 0.0
+    # Compute scale from bounding box area of instance A
+    pts_a_valid = points_a[valid_a]
+    if len(pts_a_valid) < 2:
+        return 0.0
+    bbox_w = pts_a_valid[:, 0].max() - pts_a_valid[:, 0].min()
+    bbox_h = pts_a_valid[:, 1].max() - pts_a_valid[:, 1].min()
+    scale_sq = bbox_w * bbox_h
+    if scale_sq <= 0:
+        return 0.0
+    # Compute squared distances for valid keypoints
+    d_sq = np.sum((points_a[valid] - points_b[valid]) ** 2, axis=1)
+    # OKS formula: mean of exp(-d^2 / (2 * s^2 * k^2))
+    oks_per_kpt = np.exp(-d_sq / (2 * scale_sq * kappa**2))
+    return float(np.mean(oks_per_kpt))

sleap-nn 0.1.0a2__py3-none-any.whl → 0.1.0a4__py3-none-any.whl

sleap-nn 0.1.0a2py3-none-any.whl → 0.1.0a4py3-none-any.whl