PyPI - sleap-nn - Versions diffs - 0.1.0a2__py3-none-any.whl → 0.1.0a3__py3-none-any.whl - Mend

sleap-nn 0.1.0a2py3-none-any.whl → 0.1.0a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

sleap_nn/__init__.py +1 -1
sleap_nn/cli.py +36 -0
sleap_nn/evaluation.py +8 -0
sleap_nn/export/__init__.py +21 -0
sleap_nn/export/cli.py +1778 -0
sleap_nn/export/exporters/__init__.py +51 -0
sleap_nn/export/exporters/onnx_exporter.py +80 -0
sleap_nn/export/exporters/tensorrt_exporter.py +291 -0
sleap_nn/export/metadata.py +225 -0
sleap_nn/export/predictors/__init__.py +63 -0
sleap_nn/export/predictors/base.py +22 -0
sleap_nn/export/predictors/onnx.py +154 -0
sleap_nn/export/predictors/tensorrt.py +312 -0
sleap_nn/export/utils.py +307 -0
sleap_nn/export/wrappers/__init__.py +25 -0
sleap_nn/export/wrappers/base.py +96 -0
sleap_nn/export/wrappers/bottomup.py +243 -0
sleap_nn/export/wrappers/bottomup_multiclass.py +195 -0
sleap_nn/export/wrappers/centered_instance.py +56 -0
sleap_nn/export/wrappers/centroid.py +58 -0
sleap_nn/export/wrappers/single_instance.py +83 -0
sleap_nn/export/wrappers/topdown.py +180 -0
sleap_nn/export/wrappers/topdown_multiclass.py +304 -0
sleap_nn/inference/postprocessing.py +284 -0
sleap_nn/predict.py +29 -0
sleap_nn/train.py +64 -0
sleap_nn/training/callbacks.py +62 -20
sleap_nn/training/lightning_modules.py +332 -30
sleap_nn/training/model_trainer.py +35 -67
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a3.dist-info}/METADATA +12 -1
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a3.dist-info}/RECORD +35 -14
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a3.dist-info}/WHEEL +0 -0
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a3.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a3.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.1.0a2.dist-info → sleap_nn-0.1.0a3.dist-info}/top_level.txt +0 -0

sleap_nn/export/utils.py ADDED Viewed

@@ -0,0 +1,307 @@
+"""Utilities for export workflows."""
+from __future__ import annotations
+from pathlib import Path
+from typing import List, Optional, Tuple
+from omegaconf import DictConfig, OmegaConf
+from sleap_nn.config.training_job_config import TrainingJobConfig
+from sleap_nn.config.utils import get_backbone_type_from_cfg, get_model_type_from_cfg
+def load_training_config(model_dir: str | Path) -> DictConfig:
+    """Load training configuration from a model directory."""
+    model_dir = Path(model_dir)
+    yaml_path = model_dir / "training_config.yaml"
+    json_path = model_dir / "training_config.json"
+    if yaml_path.exists():
+        return OmegaConf.load(yaml_path.as_posix())
+    if json_path.exists():
+        return TrainingJobConfig.load_sleap_config(json_path.as_posix())
+    raise FileNotFoundError(
+        f"No training_config.yaml or training_config.json found in {model_dir}"
+    )
+def resolve_input_scale(cfg: DictConfig) -> float:
+    """Resolve preprocessing scale from config."""
+    scale = cfg.data_config.preprocessing.scale
+    # Check for list/tuple or OmegaConf ListConfig
+    if isinstance(scale, (list, tuple)) or (
+        hasattr(scale, "__iter__")
+        and hasattr(scale, "__len__")
+        and not isinstance(scale, str)
+    ):
+        return float(scale[0]) if len(scale) > 0 else 1.0
+    return float(scale)
+def resolve_input_channels(cfg: DictConfig) -> int:
+    """Resolve input channels from backbone config."""
+    backbone_type = get_backbone_type_from_cfg(cfg)
+    return int(cfg.model_config.backbone_config[backbone_type].in_channels)
+def resolve_output_stride(cfg: DictConfig, model_type: str) -> int:
+    """Resolve output stride from head config."""
+    head_cfg = cfg.model_config.head_configs[model_type]
+    if head_cfg is None:
+        return 1
+    if hasattr(head_cfg, "confmaps") and head_cfg.confmaps is not None:
+        return int(head_cfg.confmaps.output_stride)
+    if hasattr(head_cfg, "pafs") and head_cfg.pafs is not None:
+        return int(head_cfg.pafs.output_stride)
+    return 1
+def resolve_pafs_output_stride(cfg: DictConfig) -> int:
+    """Resolve PAFs output stride for bottom-up models."""
+    bottomup_cfg = getattr(cfg.model_config.head_configs, "bottomup", None)
+    if bottomup_cfg is not None and bottomup_cfg.pafs is not None:
+        return int(bottomup_cfg.pafs.output_stride)
+    return 1
+def resolve_class_maps_output_stride(cfg: DictConfig) -> int:
+    """Resolve class maps output stride for multiclass bottom-up models."""
+    mc_bottomup_cfg = getattr(
+        cfg.model_config.head_configs, "multi_class_bottomup", None
+    )
+    if mc_bottomup_cfg is not None and mc_bottomup_cfg.class_maps is not None:
+        return int(mc_bottomup_cfg.class_maps.output_stride)
+    return 8
+def resolve_class_names(cfg: DictConfig, model_type: str) -> List[str]:
+    """Resolve class names for multiclass models."""
+    head_cfg = cfg.model_config.head_configs.get(model_type)
+    if head_cfg is None:
+        return []
+    # Top-down multiclass: class_vectors.classes
+    if hasattr(head_cfg, "class_vectors") and head_cfg.class_vectors is not None:
+        classes = getattr(head_cfg.class_vectors, "classes", None)
+        if classes:
+            return list(classes)
+    # Bottom-up multiclass: class_maps.classes
+    if hasattr(head_cfg, "class_maps") and head_cfg.class_maps is not None:
+        classes = getattr(head_cfg.class_maps, "classes", None)
+        if classes:
+            return list(classes)
+    return []
+def resolve_n_classes(cfg: DictConfig, model_type: str) -> int:
+    """Resolve number of classes for multiclass models."""
+    class_names = resolve_class_names(cfg, model_type)
+    return len(class_names) if class_names else 0
+def resolve_crop_size(cfg: DictConfig) -> Optional[Tuple[int, int]]:
+    """Resolve crop size from preprocessing config."""
+    crop_size = cfg.data_config.preprocessing.crop_size
+    if crop_size is None:
+        return None
+    # Check for list/tuple or OmegaConf ListConfig
+    if isinstance(crop_size, (list, tuple)) or (
+        hasattr(crop_size, "__iter__")
+        and hasattr(crop_size, "__len__")
+        and not isinstance(crop_size, (str, int))
+    ):
+        if len(crop_size) == 2:
+            return int(crop_size[0]), int(crop_size[1])
+        if len(crop_size) == 1:
+            return int(crop_size[0]), int(crop_size[0])
+    return int(crop_size), int(crop_size)
+def resolve_node_names(cfg: DictConfig, model_type: str) -> List[str]:
+    """Resolve node names for metadata."""
+    skeleton_nodes = _node_names_from_skeletons(cfg.data_config.skeletons)
+    if skeleton_nodes:
+        return skeleton_nodes
+    head_cfg = cfg.model_config.head_configs.get(model_type)
+    if head_cfg is None:
+        return []
+    if hasattr(head_cfg, "confmaps") and head_cfg.confmaps is not None:
+        part_names = getattr(head_cfg.confmaps, "part_names", None)
+        if part_names:
+            return list(part_names)
+    if model_type == "centroid":
+        anchor = getattr(head_cfg.confmaps, "anchor_part", None) if head_cfg else None
+        return [anchor] if anchor else ["centroid"]
+    return []
+def resolve_edge_inds(cfg: DictConfig, node_names: List[str]) -> List[Tuple[int, int]]:
+    """Resolve edge indices for metadata."""
+    edges = _edge_inds_from_skeletons(cfg.data_config.skeletons)
+    if edges:
+        return _normalize_edges(edges, node_names)
+    bottomup_cfg = getattr(cfg.model_config.head_configs, "bottomup", None)
+    if bottomup_cfg is not None and bottomup_cfg.pafs is not None:
+        edges = bottomup_cfg.pafs.edges
+        if edges:
+            return _normalize_edges(edges, node_names)
+    return []
+def resolve_model_type(cfg: DictConfig) -> str:
+    """Return model type from config."""
+    return get_model_type_from_cfg(cfg)
+def resolve_backbone_type(cfg: DictConfig) -> str:
+    """Return backbone type from config."""
+    return get_backbone_type_from_cfg(cfg)
+def resolve_input_shape(
+    cfg: DictConfig,
+    input_height: Optional[int] = None,
+    input_width: Optional[int] = None,
+) -> Tuple[int, int, int, int]:
+    """Resolve a dummy input shape for export."""
+    channels = resolve_input_channels(cfg)
+    height = input_height or cfg.data_config.preprocessing.max_height or 512
+    width = input_width or cfg.data_config.preprocessing.max_width or 512
+    return 1, channels, int(height), int(width)
+def _node_names_from_skeletons(skeletons) -> List[str]:
+    if not skeletons:
+        return []
+    skeleton = skeletons[0]
+    if hasattr(skeleton, "nodes"):
+        try:
+            return [node.name for node in skeleton.nodes]
+        except Exception:
+            pass
+    if isinstance(skeleton, dict):
+        nodes = skeleton.get("nodes")
+        if nodes:
+            if isinstance(nodes[0], dict):
+                return [node.get("name", "") for node in nodes if node.get("name")]
+            return [str(node) for node in nodes]
+        node_names = skeleton.get("node_names")
+        if node_names:
+            return [str(name) for name in node_names]
+    return []
+def _edge_inds_from_skeletons(skeletons) -> List:
+    if not skeletons:
+        return []
+    skeleton = skeletons[0]
+    if hasattr(skeleton, "edge_inds"):
+        try:
+            return list(skeleton.edge_inds)
+        except Exception:
+            pass
+    if isinstance(skeleton, dict):
+        edges = skeleton.get("edges") or skeleton.get("edge_inds")
+        if edges:
+            return list(edges)
+    return []
+def _normalize_edges(edges: List, node_names: List[str]) -> List[Tuple[int, int]]:
+    if not edges:
+        return []
+    if not node_names:
+        return [(int(src), int(dst)) for src, dst in edges]
+    if isinstance(edges[0][0], str):
+        name_to_idx = {name: idx for idx, name in enumerate(node_names)}
+        normalized = []
+        for src, dst in edges:
+            if src in name_to_idx and dst in name_to_idx:
+                normalized.append((name_to_idx[src], name_to_idx[dst]))
+        return normalized
+    return [(int(src), int(dst)) for src, dst in edges]
+def build_bottomup_candidate_template(
+    n_nodes: int, max_peaks_per_node: int, edge_inds: List[Tuple[int, int]]
+) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor"]:
+    """Build candidate template matching ONNX wrapper's line_scores ordering.
+    The ONNX BottomUpONNXWrapper produces line_scores with shape (n_edges, k*k) where
+    for each edge connecting (src_node, dst_node), position i*k + j corresponds to:
+    - src peak flat index: src_node * k + i
+    - dst peak flat index: dst_node * k + j
+    This function builds edge_inds and edge_peak_inds tensors that match this exact
+    ordering, so that line_scores_flat[idx] corresponds to edge_peak_inds[idx].
+    Args:
+        n_nodes: Number of nodes in the skeleton.
+        max_peaks_per_node: Maximum peaks per node (k) used during export.
+        edge_inds: List of (src_node, dst_node) tuples defining skeleton edges.
+    Returns:
+        Tuple of (peak_channel_inds, edge_inds_tensor, edge_peak_inds_tensor):
+        - peak_channel_inds: (n_nodes * k,) tensor mapping flat peak index to node
+        - edge_inds_tensor: (n_edges * k * k,) tensor of edge indices for each candidate
+        - edge_peak_inds_tensor: (n_edges * k * k, 2) tensor of (src, dst) peak indices
+    Example:
+        >>> from sleap_nn.export.utils import build_bottomup_candidate_template
+        >>> peak_ch, edge_inds, edge_peaks = build_bottomup_candidate_template(
+        ...     n_nodes=15, max_peaks_per_node=20, edge_inds=[(1, 2), (1, 5)]
+        ... )
+        >>> # Use with ONNX output:
+        >>> line_scores_flat = line_scores.reshape(-1)
+        >>> valid_scores = line_scores_flat[valid_mask]
+        >>> valid_edge_peaks = edge_peaks[valid_mask]
+    Note:
+        This function is necessary because `get_connection_candidates()` in
+        `sleap_nn.inference.paf_grouping` uses unstable argsort, which shuffles
+        peak indices within each node and breaks alignment with ONNX output ordering.
+    """
+    import torch
+    k = max_peaks_per_node
+    n_edges = len(edge_inds)
+    # peak_channel_inds: [0,0,...0, 1,1,...1, ...] (k times each)
+    peak_channel_inds = torch.arange(n_nodes, dtype=torch.int32).repeat_interleave(k)
+    edge_inds_list = []
+    edge_peak_inds_list = []
+    for edge_idx, (src_node, dst_node) in enumerate(edge_inds):
+        # Build k*k candidate pairs in row-major order (i*k + j)
+        # src indices: [src_node*k + 0, src_node*k + 0, ..., src_node*k + 1, ...]
+        # dst indices: [dst_node*k + 0, dst_node*k + 1, ..., dst_node*k + 0, ...]
+        src_base = src_node * k
+        dst_base = dst_node * k
+        src_indices = torch.arange(k, dtype=torch.int32).repeat_interleave(k) + src_base
+        dst_indices = torch.arange(k, dtype=torch.int32).repeat(k) + dst_base
+        edge_inds_list.append(torch.full((k * k,), edge_idx, dtype=torch.int32))
+        edge_peak_inds_list.append(torch.stack([src_indices, dst_indices], dim=1))
+    if edge_inds_list:
+        edge_inds_tensor = torch.cat(edge_inds_list)
+        edge_peak_inds_tensor = torch.cat(edge_peak_inds_list)
+    else:
+        edge_inds_tensor = torch.empty((0,), dtype=torch.int32)
+        edge_peak_inds_tensor = torch.empty((0, 2), dtype=torch.int32)
+    return peak_channel_inds, edge_inds_tensor, edge_peak_inds_tensor

sleap_nn/export/wrappers/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""ONNX/TensorRT export wrappers."""
+from sleap_nn.export.wrappers.base import BaseExportWrapper
+from sleap_nn.export.wrappers.centroid import CentroidONNXWrapper
+from sleap_nn.export.wrappers.centered_instance import CenteredInstanceONNXWrapper
+from sleap_nn.export.wrappers.topdown import TopDownONNXWrapper
+from sleap_nn.export.wrappers.bottomup import BottomUpONNXWrapper
+from sleap_nn.export.wrappers.single_instance import SingleInstanceONNXWrapper
+from sleap_nn.export.wrappers.topdown_multiclass import (
+    TopDownMultiClassONNXWrapper,
+    TopDownMultiClassCombinedONNXWrapper,
+)
+from sleap_nn.export.wrappers.bottomup_multiclass import BottomUpMultiClassONNXWrapper
+__all__ = [
+    "BaseExportWrapper",
+    "CentroidONNXWrapper",
+    "CenteredInstanceONNXWrapper",
+    "TopDownONNXWrapper",
+    "BottomUpONNXWrapper",
+    "SingleInstanceONNXWrapper",
+    "TopDownMultiClassONNXWrapper",
+    "TopDownMultiClassCombinedONNXWrapper",
+    "BottomUpMultiClassONNXWrapper",
+]

sleap_nn/export/wrappers/base.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""Base classes and shared helpers for export wrappers."""
+from __future__ import annotations
+from typing import Iterable, List, Tuple
+import torch
+from torch import nn
+from torch.nn import functional as F
+class BaseExportWrapper(nn.Module):
+    """Base class for ONNX-exportable wrappers."""
+    def __init__(self, model: nn.Module):
+        """Initialize wrapper with the underlying model.
+        Args:
+            model: The PyTorch model to wrap for export.
+        """
+        super().__init__()
+        self.model = model
+    @staticmethod
+    def _normalize_uint8(image: torch.Tensor) -> torch.Tensor:
+        """Normalize unnormalized uint8 (or [0, 255] float) images to [0, 1]."""
+        if image.dtype != torch.float32:
+            image = image.float()
+        return image / 255.0
+    @staticmethod
+    def _extract_tensor(output, key_hints: Iterable[str]) -> torch.Tensor:
+        if isinstance(output, dict):
+            for key in output:
+                for hint in key_hints:
+                    if hint.lower() in key.lower():
+                        return output[key]
+            return next(iter(output.values()))
+        return output
+    @staticmethod
+    def _find_topk_peaks(
+        confmaps: torch.Tensor, k: int
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Top-K peak finding with NMS via max pooling."""
+        batch_size, _, height, width = confmaps.shape
+        pooled = F.max_pool2d(confmaps, kernel_size=3, stride=1, padding=1)
+        is_peak = (confmaps == pooled) & (confmaps > 0)
+        confmaps_flat = confmaps.reshape(batch_size, height * width)
+        is_peak_flat = is_peak.reshape(batch_size, height * width)
+        masked = torch.where(
+            is_peak_flat, confmaps_flat, torch.full_like(confmaps_flat, -1e9)
+        )
+        values, indices = torch.topk(masked, k=k, dim=1)
+        y = indices // width
+        x = indices % width
+        peaks = torch.stack([x.float(), y.float()], dim=-1)
+        valid = values > 0
+        return peaks, values, valid
+    @staticmethod
+    def _find_topk_peaks_per_node(
+        confmaps: torch.Tensor, k: int
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Top-K peak finding per channel with NMS via max pooling."""
+        batch_size, n_nodes, height, width = confmaps.shape
+        pooled = F.max_pool2d(confmaps, kernel_size=3, stride=1, padding=1)
+        is_peak = (confmaps == pooled) & (confmaps > 0)
+        confmaps_flat = confmaps.reshape(batch_size, n_nodes, height * width)
+        is_peak_flat = is_peak.reshape(batch_size, n_nodes, height * width)
+        masked = torch.where(
+            is_peak_flat, confmaps_flat, torch.full_like(confmaps_flat, -1e9)
+        )
+        values, indices = torch.topk(masked, k=k, dim=2)
+        y = indices // width
+        x = indices % width
+        peaks = torch.stack([x.float(), y.float()], dim=-1)
+        valid = values > 0
+        return peaks, values, valid
+    @staticmethod
+    def _find_global_peaks(
+        confmaps: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Find global maxima per channel."""
+        batch_size, channels, height, width = confmaps.shape
+        flat = confmaps.reshape(batch_size, channels, height * width)
+        values, indices = flat.max(dim=-1)
+        y = indices // width
+        x = indices % width
+        peaks = torch.stack([x.float(), y.float()], dim=-1)
+        return peaks, values

sleap_nn/export/wrappers/bottomup.py ADDED Viewed

@@ -0,0 +1,243 @@
+"""Bottom-up ONNX wrapper."""
+from __future__ import annotations
+from typing import Dict, Tuple
+import torch
+from torch import nn
+from torch.nn import functional as F
+from sleap_nn.export.wrappers.base import BaseExportWrapper
+class BottomUpONNXWrapper(BaseExportWrapper):
+    """ONNX-exportable wrapper for bottom-up inference up to PAF scoring.
+    Expects input images as uint8 tensors in [0, 255].
+    """
+    def __init__(
+        self,
+        model: nn.Module,
+        skeleton_edges: list,
+        n_nodes: int,
+        max_peaks_per_node: int = 20,
+        n_line_points: int = 10,
+        cms_output_stride: int = 4,
+        pafs_output_stride: int = 8,
+        max_edge_length_ratio: float = 0.25,
+        dist_penalty_weight: float = 1.0,
+        input_scale: float = 1.0,
+    ) -> None:
+        """Initialize bottom-up ONNX wrapper.
+        Args:
+            model: Bottom-up model producing confidence maps and PAFs.
+            skeleton_edges: List of (src, dst) edge tuples defining skeleton.
+            n_nodes: Number of nodes in the skeleton.
+            max_peaks_per_node: Maximum peaks to detect per node type.
+            n_line_points: Points to sample along PAF edges.
+            cms_output_stride: Confidence map output stride.
+            pafs_output_stride: PAF output stride.
+            max_edge_length_ratio: Maximum edge length as ratio of image size.
+            dist_penalty_weight: Weight for distance penalty in scoring.
+            input_scale: Input scaling factor.
+        """
+        super().__init__(model)
+        self.n_nodes = n_nodes
+        self.n_edges = len(skeleton_edges)
+        self.max_peaks_per_node = max_peaks_per_node
+        self.n_line_points = n_line_points
+        self.cms_output_stride = cms_output_stride
+        self.pafs_output_stride = pafs_output_stride
+        self.max_edge_length_ratio = max_edge_length_ratio
+        self.dist_penalty_weight = dist_penalty_weight
+        self.input_scale = input_scale
+        edge_src = torch.tensor([e[0] for e in skeleton_edges], dtype=torch.long)
+        edge_dst = torch.tensor([e[1] for e in skeleton_edges], dtype=torch.long)
+        self.register_buffer("edge_src", edge_src)
+        self.register_buffer("edge_dst", edge_dst)
+        line_samples = torch.linspace(0, 1, n_line_points, dtype=torch.float32)
+        self.register_buffer("line_samples", line_samples)
+    def forward(self, image: torch.Tensor) -> Dict[str, torch.Tensor]:
+        """Run bottom-up inference and return fixed-size outputs.
+        Note: confmaps and pafs are NOT returned to avoid D2H transfer bottleneck.
+        Peak detection and PAF scoring are performed on GPU within this wrapper.
+        """
+        image = self._normalize_uint8(image)
+        if self.input_scale != 1.0:
+            height = int(image.shape[-2] * self.input_scale)
+            width = int(image.shape[-1] * self.input_scale)
+            image = F.interpolate(
+                image, size=(height, width), mode="bilinear", align_corners=False
+            )
+        batch_size, _, height, width = image.shape
+        out = self.model(image)
+        if isinstance(out, dict):
+            confmaps = self._extract_tensor(out, ["confmap", "multiinstance"])
+            pafs = self._extract_tensor(out, ["paf", "affinity"])
+        else:
+            confmaps, pafs = out[:2]
+        peaks, peak_vals, peak_mask = self._find_topk_peaks_per_node(
+            confmaps, self.max_peaks_per_node
+        )
+        peaks = peaks * self.cms_output_stride
+        # Compute max_edge_length to match PyTorch implementation:
+        # max_edge_length = ratio * max(paf_dims) * pafs_stride
+        # PAFs shape is (batch, 2*edges, H, W)
+        _, n_paf_channels, paf_height, paf_width = pafs.shape
+        max_paf_dim = max(n_paf_channels, paf_height, paf_width)
+        max_edge_length = torch.tensor(
+            self.max_edge_length_ratio * max_paf_dim * self.pafs_output_stride,
+            dtype=peaks.dtype,
+            device=peaks.device,
+        )
+        line_scores, candidate_mask = self._score_all_candidates(
+            pafs, peaks, peak_mask, max_edge_length
+        )
+        # Only return final outputs needed for CPU-side grouping.
+        # Do NOT return confmaps/pafs - they are large (~29 MB/batch) and
+        # cause D2H transfer bottleneck. Peak detection and PAF scoring
+        # are already done on GPU above.
+        return {
+            "peaks": peaks,
+            "peak_vals": peak_vals,
+            "peak_mask": peak_mask,
+            "line_scores": line_scores,
+            "candidate_mask": candidate_mask,
+        }
+    def _score_all_candidates(
+        self,
+        pafs: torch.Tensor,
+        peaks: torch.Tensor,
+        peak_mask: torch.Tensor,
+        max_edge_length: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Score all K*K candidate connections for each edge."""
+        batch_size = peaks.shape[0]
+        k = self.max_peaks_per_node
+        n_edges = self.n_edges
+        _, _, paf_height, paf_width = pafs.shape
+        src_peaks = peaks[:, self.edge_src, :, :]
+        dst_peaks = peaks[:, self.edge_dst, :, :]
+        src_mask = peak_mask[:, self.edge_src, :]
+        dst_mask = peak_mask[:, self.edge_dst, :]
+        src_peaks_exp = src_peaks.unsqueeze(3).expand(-1, -1, -1, k, -1)
+        dst_peaks_exp = dst_peaks.unsqueeze(2).expand(-1, -1, k, -1, -1)
+        src_mask_exp = src_mask.unsqueeze(3).expand(-1, -1, -1, k)
+        dst_mask_exp = dst_mask.unsqueeze(2).expand(-1, -1, k, -1)
+        candidate_mask = src_mask_exp & dst_mask_exp
+        src_peaks_flat = src_peaks_exp.reshape(batch_size, n_edges, k * k, 2)
+        dst_peaks_flat = dst_peaks_exp.reshape(batch_size, n_edges, k * k, 2)
+        candidate_mask_flat = candidate_mask.reshape(batch_size, n_edges, k * k)
+        spatial_vecs = dst_peaks_flat - src_peaks_flat
+        spatial_lengths = torch.norm(spatial_vecs, dim=-1, keepdim=True).clamp(min=1e-6)
+        spatial_vecs_norm = spatial_vecs / spatial_lengths
+        line_samples = self.line_samples.view(1, 1, 1, -1, 1)
+        src_exp = src_peaks_flat.unsqueeze(3)
+        dst_exp = dst_peaks_flat.unsqueeze(3)
+        line_points = src_exp + line_samples * (dst_exp - src_exp)
+        line_points_paf = line_points / self.pafs_output_stride
+        line_x = line_points_paf[..., 0].clamp(0, paf_width - 1)
+        line_y = line_points_paf[..., 1].clamp(0, paf_height - 1)
+        line_scores = self._sample_and_score_lines(
+            pafs,
+            line_x,
+            line_y,
+            spatial_vecs_norm,
+            spatial_lengths.squeeze(-1),
+            max_edge_length,
+        )
+        line_scores = line_scores.masked_fill(~candidate_mask_flat, -2.0)
+        return line_scores, candidate_mask_flat
+    def _sample_and_score_lines(
+        self,
+        pafs: torch.Tensor,
+        line_x: torch.Tensor,
+        line_y: torch.Tensor,
+        spatial_vecs_norm: torch.Tensor,
+        spatial_lengths: torch.Tensor,
+        max_edge_length: torch.Tensor,
+    ) -> torch.Tensor:
+        """Sample PAF values along lines and compute scores."""
+        batch_size, n_edges, k2, n_points = line_x.shape
+        _, _, paf_height, paf_width = pafs.shape
+        all_scores = []
+        for edge_idx in range(n_edges):
+            paf_x = pafs[:, 2 * edge_idx, :, :]
+            paf_y = pafs[:, 2 * edge_idx + 1, :, :]
+            lx = line_x[:, edge_idx, :, :]
+            ly = line_y[:, edge_idx, :, :]
+            lx_norm = (lx / (paf_width - 1)) * 2 - 1
+            ly_norm = (ly / (paf_height - 1)) * 2 - 1
+            grid = torch.stack([lx_norm, ly_norm], dim=-1)
+            paf_x_samples = F.grid_sample(
+                paf_x.unsqueeze(1),
+                grid,
+                mode="bilinear",
+                padding_mode="zeros",
+                align_corners=True,
+            ).squeeze(1)
+            paf_y_samples = F.grid_sample(
+                paf_y.unsqueeze(1),
+                grid,
+                mode="bilinear",
+                padding_mode="zeros",
+                align_corners=True,
+            ).squeeze(1)
+            paf_samples = torch.stack([paf_x_samples, paf_y_samples], dim=-1)
+            disp_vec = spatial_vecs_norm[:, edge_idx, :, :]
+            dot_products = (paf_samples * disp_vec.unsqueeze(2)).sum(dim=-1)
+            mean_scores = dot_products.mean(dim=-1)
+            edge_lengths = spatial_lengths[:, edge_idx, :]
+            dist_penalty = self._compute_distance_penalty(edge_lengths, max_edge_length)
+            all_scores.append(mean_scores + dist_penalty)
+        return torch.stack(all_scores, dim=1)
+    def _compute_distance_penalty(
+        self, distances: torch.Tensor, max_edge_length: torch.Tensor
+    ) -> torch.Tensor:
+        """Compute distance penalty for edge candidates.
+        Matches the PyTorch implementation in sleap_nn.inference.paf_grouping.
+        Penalty is 0 when distance <= max_edge_length, and negative when longer.
+        """
+        # Match PyTorch: penalty = clamp((max_edge_length / distance) - 1, max=0) * weight
+        penalty = torch.clamp((max_edge_length / distances) - 1, max=0)
+        return penalty * self.dist_penalty_weight

sleap-nn 0.1.0a2__py3-none-any.whl → 0.1.0a3__py3-none-any.whl

sleap-nn 0.1.0a2py3-none-any.whl → 0.1.0a3py3-none-any.whl