PyPI - sleap-nn - Versions diffs - 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

sleap-nn 0.0.5py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

sleap_nn/__init__.py +9 -2
sleap_nn/architectures/convnext.py +5 -0
sleap_nn/architectures/encoder_decoder.py +25 -6
sleap_nn/architectures/swint.py +8 -0
sleap_nn/cli.py +489 -46
sleap_nn/config/data_config.py +51 -8
sleap_nn/config/get_config.py +32 -24
sleap_nn/config/trainer_config.py +88 -0
sleap_nn/data/augmentation.py +61 -200
sleap_nn/data/custom_datasets.py +433 -61
sleap_nn/data/instance_cropping.py +71 -6
sleap_nn/data/normalization.py +45 -2
sleap_nn/data/providers.py +26 -0
sleap_nn/data/resizing.py +2 -2
sleap_nn/data/skia_augmentation.py +414 -0
sleap_nn/data/utils.py +135 -17
sleap_nn/evaluation.py +177 -42
sleap_nn/export/__init__.py +21 -0
sleap_nn/export/cli.py +1778 -0
sleap_nn/export/exporters/__init__.py +51 -0
sleap_nn/export/exporters/onnx_exporter.py +80 -0
sleap_nn/export/exporters/tensorrt_exporter.py +291 -0
sleap_nn/export/metadata.py +225 -0
sleap_nn/export/predictors/__init__.py +63 -0
sleap_nn/export/predictors/base.py +22 -0
sleap_nn/export/predictors/onnx.py +154 -0
sleap_nn/export/predictors/tensorrt.py +312 -0
sleap_nn/export/utils.py +307 -0
sleap_nn/export/wrappers/__init__.py +25 -0
sleap_nn/export/wrappers/base.py +96 -0
sleap_nn/export/wrappers/bottomup.py +243 -0
sleap_nn/export/wrappers/bottomup_multiclass.py +195 -0
sleap_nn/export/wrappers/centered_instance.py +56 -0
sleap_nn/export/wrappers/centroid.py +58 -0
sleap_nn/export/wrappers/single_instance.py +83 -0
sleap_nn/export/wrappers/topdown.py +180 -0
sleap_nn/export/wrappers/topdown_multiclass.py +304 -0
sleap_nn/inference/__init__.py +6 -0
sleap_nn/inference/bottomup.py +86 -20
sleap_nn/inference/peak_finding.py +93 -16
sleap_nn/inference/postprocessing.py +284 -0
sleap_nn/inference/predictors.py +339 -137
sleap_nn/inference/provenance.py +292 -0
sleap_nn/inference/topdown.py +55 -47
sleap_nn/legacy_models.py +65 -11
sleap_nn/predict.py +224 -19
sleap_nn/system_info.py +443 -0
sleap_nn/tracking/tracker.py +8 -1
sleap_nn/train.py +138 -44
sleap_nn/training/callbacks.py +1258 -5
sleap_nn/training/lightning_modules.py +902 -220
sleap_nn/training/model_trainer.py +424 -111
sleap_nn/training/schedulers.py +191 -0
sleap_nn/training/utils.py +367 -2
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0.dist-info}/METADATA +35 -33
sleap_nn-0.1.0.dist-info/RECORD +88 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0.dist-info}/WHEEL +1 -1
sleap_nn-0.0.5.dist-info/RECORD +0 -63
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.0.5.dist-info → sleap_nn-0.1.0.dist-info}/top_level.txt +0 -0

sleap_nn/data/instance_cropping.py CHANGED Viewed

@@ -5,14 +5,82 @@ import math
 import numpy as np
 import sleap_io as sio
 import torch
-from kornia.geometry.transform import crop_and_resize
+from sleap_nn.data.skia_augmentation import crop_and_resize_skia as crop_and_resize
+def compute_augmentation_padding(
+    bbox_size: float,
+    rotation_max: float = 0.0,
+    scale_max: float = 1.0,
+) -> int:
+    """Compute padding needed to accommodate augmentation transforms.
+    When rotation and scaling augmentations are applied, the bounding box of an
+    instance can expand beyond its original size. This function calculates the
+    padding needed to ensure the full instance remains visible after augmentation.
+    Args:
+        bbox_size: The size of the instance bounding box (max of width/height).
+        rotation_max: Maximum absolute rotation angle in degrees. For symmetric
+            rotation ranges like [-180, 180], pass 180.
+        scale_max: Maximum scaling factor. For scale range [0.9, 1.1], pass 1.1.
+    Returns:
+        Padding in pixels to add around the bounding box (total, not per side).
+    """
+    if rotation_max == 0.0 and scale_max <= 1.0:
+        return 0
+    # For a square bbox rotated by angle θ, the new bbox has side length:
+    # L' = L * (|cos(θ)| + |sin(θ)|)
+    # Maximum expansion occurs at 45°: L' = L * sqrt(2)
+    # For arbitrary angle: we use the worst case within the rotation range
+    rotation_rad = math.radians(min(abs(rotation_max), 90))
+    rotation_factor = abs(math.cos(rotation_rad)) + abs(math.sin(rotation_rad))
+    # For angles > 45°, the factor increases, max at 45° = sqrt(2)
+    # But for angles approaching 90°, it goes back to 1
+    # Worst case in any range including 45° is sqrt(2)
+    if abs(rotation_max) >= 45:
+        rotation_factor = math.sqrt(2)
+    # Combined expansion factor
+    expansion_factor = rotation_factor * max(scale_max, 1.0)
+    # Total padding needed (both sides)
+    expanded_size = bbox_size * expansion_factor
+    padding = expanded_size - bbox_size
+    return int(math.ceil(padding))
+def find_max_instance_bbox_size(labels: sio.Labels) -> float:
+    """Find the maximum bounding box dimension across all instances in labels.
+    Args:
+        labels: A `sio.Labels` containing user-labeled instances.
+    Returns:
+        The maximum bounding box dimension (max of width or height) across all instances.
+    """
+    max_length = 0.0
+    for lf in labels:
+        for inst in lf.instances:
+            if not inst.is_empty:
+                pts = inst.numpy()
+                diff_x = np.nanmax(pts[:, 0]) - np.nanmin(pts[:, 0])
+                diff_x = 0 if np.isnan(diff_x) else diff_x
+                max_length = np.maximum(max_length, diff_x)
+                diff_y = np.nanmax(pts[:, 1]) - np.nanmin(pts[:, 1])
+                diff_y = 0 if np.isnan(diff_y) else diff_y
+                max_length = np.maximum(max_length, diff_y)
+    return float(max_length)
 def find_instance_crop_size(
     labels: sio.Labels,
     padding: int = 0,
     maximum_stride: int = 2,
-    input_scaling: float = 1.0,
     min_crop_size: Optional[int] = None,
 ) -> int:
     """Compute the size of the largest instance bounding box from labels.
@@ -23,8 +91,6 @@ def find_instance_crop_size(
         maximum_stride: Ensure that the returned crop size is divisible by this value.
             Useful for ensuring that the crop size will not be truncated in a given
             architecture.
-        input_scaling: Float factor indicating the scale of the input images if any
-            scaling will be done before cropping.
         min_crop_size: The crop size set by the user.
     Returns:
@@ -32,7 +98,7 @@ def find_instance_crop_size(
         will contain the instances when cropped. The returned crop size will be larger
         or equal to the input `min_crop_size`.
-        This accounts for stride, padding and scaling when ensuring divisibility.
+        This accounts for stride and padding when ensuring divisibility.
     """
     # Check if user-specified crop size is divisible by max stride
     min_crop_size = 0 if min_crop_size is None else min_crop_size
@@ -46,7 +112,6 @@ def find_instance_crop_size(
         for inst in lf.instances:
             if not inst.is_empty:  # only if at least one point is not nan
                 pts = inst.numpy()
-                pts *= input_scaling
                 diff_x = np.nanmax(pts[:, 0]) - np.nanmin(pts[:, 0])
                 diff_x = 0 if np.isnan(diff_x) else diff_x
                 max_length = np.maximum(max_length, diff_x)

sleap_nn/data/normalization.py CHANGED Viewed

@@ -4,6 +4,36 @@ import torch
 import torchvision.transforms.v2.functional as F
+def normalize_on_gpu(image: torch.Tensor) -> torch.Tensor:
+    """Normalize image tensor on GPU after transfer.
+    This function is called in the model's forward() method after the image has been
+    transferred to GPU. It converts uint8 images to float32 and normalizes to [0, 1].
+    By performing normalization on GPU after transfer, we reduce PCIe bandwidth by 4x
+    (transferring 1 byte/pixel as uint8 instead of 4 bytes/pixel as float32). This
+    provides up to 17x speedup for the transfer+normalization stage.
+    This function handles two cases:
+    1. uint8 tensor with values in [0, 255] -> convert to float32 and divide by 255
+    2. float32 tensor with values in [0, 255] (e.g., from preprocessing that cast to
+       float32 without normalizing) -> divide by 255
+    Args:
+        image: Tensor image that may be uint8 or float32 with values in [0, 255] range.
+    Returns:
+        Float32 tensor normalized to [0, 1] range.
+    """
+    if not torch.is_floating_point(image):
+        # uint8 -> float32 normalized
+        image = image.float() / 255.0
+    elif image.max() > 1.0:
+        # float32 but not normalized (values > 1 indicate [0, 255] range)
+        image = image / 255.0
+    return image
 def convert_to_grayscale(image: torch.Tensor):
     """Convert given image to Grayscale image (single-channel).
@@ -38,8 +68,21 @@ def convert_to_rgb(image: torch.Tensor):
     return image
-def apply_normalization(image: torch.Tensor):
-    """Normalize image tensor."""
+def apply_normalization(image: torch.Tensor) -> torch.Tensor:
+    """Normalize image tensor from uint8 [0, 255] to float32 [0, 1].
+    This function is used during training data preprocessing where augmentation
+    operations (kornia) require float32 input.
+    For inference, normalization is deferred to GPU via `normalize_on_gpu()` in the
+    model's forward() method to reduce PCIe bandwidth.
+    Args:
+        image: Tensor image (typically uint8 with values in [0, 255]).
+    Returns:
+        Float32 tensor normalized to [0, 1] range.
+    """
     if not torch.is_floating_point(image):
         image = image.to(torch.float32) / 255.0
     return image

sleap_nn/data/providers.py CHANGED Viewed

@@ -71,6 +71,8 @@ def process_lf(
     for inst in instances_list:
         if not inst.is_empty:
             instances.append(inst.numpy())
+    if len(instances) == 0:
+        return None
     instances = np.stack(instances, axis=0)
     # Add singleton time dimension for single frames.
@@ -233,6 +235,8 @@ class LabelsReader(Thread):
         instances_key: bool = False,
         only_labeled_frames: bool = False,
         only_suggested_frames: bool = False,
+        exclude_user_labeled: bool = False,
+        only_predicted_frames: bool = False,
     ):
         """Initialize attribute of the class."""
         super().__init__()
@@ -245,6 +249,8 @@ class LabelsReader(Thread):
         self.only_labeled_frames = only_labeled_frames
         self.only_suggested_frames = only_suggested_frames
+        self.exclude_user_labeled = exclude_user_labeled
+        self.only_predicted_frames = only_predicted_frames
         # Filter to only user labeled instances
         if self.only_labeled_frames:
@@ -265,6 +271,20 @@ class LabelsReader(Thread):
                     )
                     self.filtered_lfs.append(new_lf)
+        # Filter out user labeled frames
+        elif self.exclude_user_labeled:
+            self.filtered_lfs = []
+            for lf in self.labels:
+                if not lf.has_user_instances:
+                    self.filtered_lfs.append(lf)
+        # Filter to only predicted frames
+        elif self.only_predicted_frames:
+            self.filtered_lfs = []
+            for lf in self.labels:
+                if lf.has_predicted_instances:
+                    self.filtered_lfs.append(lf)
         else:
             self.filtered_lfs = [lf for lf in self.labels]
@@ -300,6 +320,8 @@ class LabelsReader(Thread):
         instances_key: bool = False,
         only_labeled_frames: bool = False,
         only_suggested_frames: bool = False,
+        exclude_user_labeled: bool = False,
+        only_predicted_frames: bool = False,
     ):
         """Create LabelsReader from a .slp filename."""
         labels = sio.load_slp(filename)
@@ -310,6 +332,8 @@ class LabelsReader(Thread):
             instances_key,
             only_labeled_frames,
             only_suggested_frames,
+            exclude_user_labeled,
+            only_predicted_frames,
         )
     def run(self):
@@ -333,6 +357,8 @@ class LabelsReader(Thread):
                     for inst in lf:
                         if not inst.is_empty:
                             instances.append(inst.numpy())
+                    if len(instances) == 0:
+                        continue
                     instances = np.stack(instances, axis=0)
                     # Add singleton time dimension for single frames.

sleap_nn/data/resizing.py CHANGED Viewed

@@ -63,7 +63,7 @@ def apply_pad_to_stride(image: torch.Tensor, max_stride: int) -> torch.Tensor:
                 image,
                 (0, pad_width, 0, pad_height),
                 mode="constant",
-            ).to(torch.float32)
+            )
     return image
@@ -136,7 +136,7 @@ def apply_sizematcher(
             image,
             (0, pad_width, 0, pad_height),
             mode="constant",
-        ).to(torch.float32)
+        )
         return image, eff_scale_ratio
     else:

sleap_nn/data/skia_augmentation.py ADDED Viewed

@@ -0,0 +1,414 @@
+"""Skia-based augmentation functions that operate on uint8 tensors.
+This module provides augmentation functions using skia-python that:
+1. Match the exact API of sleap_nn.data.augmentation
+2. Operate on uint8 tensors throughout (avoiding float32 conversions)
+3. Provide ~1.5x faster augmentation compared to Kornia
+Usage:
+    from sleap_nn.data.skia_augmentation import (
+        apply_intensity_augmentation_skia,
+        apply_geometric_augmentation_skia,
+    )
+    # Apply augmentations (uint8 in, uint8 out)
+    image, instances = apply_intensity_augmentation_skia(image, instances, **config)
+    image, instances = apply_geometric_augmentation_skia(image, instances, **config)
+"""
+from typing import Optional, Tuple
+import numpy as np
+import torch
+import skia
+def apply_intensity_augmentation_skia(
+    image: torch.Tensor,
+    instances: torch.Tensor,
+    uniform_noise_min: float = 0.0,
+    uniform_noise_max: float = 0.04,
+    uniform_noise_p: float = 0.0,
+    gaussian_noise_mean: float = 0.02,
+    gaussian_noise_std: float = 0.004,
+    gaussian_noise_p: float = 0.0,
+    contrast_min: float = 0.5,
+    contrast_max: float = 2.0,
+    contrast_p: float = 0.0,
+    brightness_min: float = 1.0,
+    brightness_max: float = 1.0,
+    brightness_p: float = 0.0,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Apply intensity augmentations on uint8 image tensor.
+    Matches API of sleap_nn.data.augmentation.apply_intensity_augmentation.
+    Args:
+        image: Input tensor of shape (1, C, H, W) with dtype uint8 or float32.
+        instances: Keypoints tensor (not modified, just passed through).
+        uniform_noise_min: Minimum uniform noise (0-1 scale, maps to 0-255).
+        uniform_noise_max: Maximum uniform noise (0-1 scale).
+        uniform_noise_p: Probability of uniform noise.
+        gaussian_noise_mean: Gaussian noise mean (0-1 scale).
+        gaussian_noise_std: Gaussian noise std (0-1 scale).
+        gaussian_noise_p: Probability of Gaussian noise.
+        contrast_min: Minimum contrast factor.
+        contrast_max: Maximum contrast factor.
+        contrast_p: Probability of contrast adjustment.
+        brightness_min: Minimum brightness factor.
+        brightness_max: Maximum brightness factor.
+        brightness_p: Probability of brightness adjustment.
+    Returns:
+        Tuple of (augmented_image, instances). Image dtype matches input.
+    """
+    # Convert to numpy for Skia processing
+    is_float = image.dtype == torch.float32
+    if is_float:
+        img_np = (image[0].permute(1, 2, 0).numpy() * 255).astype(np.uint8)
+    else:
+        img_np = image[0].permute(1, 2, 0).numpy()
+    result = img_np.copy()
+    # Apply uniform noise (in uint8 space)
+    if uniform_noise_p > 0 and np.random.random() < uniform_noise_p:
+        noise = np.random.randint(
+            int(uniform_noise_min * 255),
+            int(uniform_noise_max * 255) + 1,
+            img_np.shape,
+            dtype=np.int16,
+        )
+        result = np.clip(result.astype(np.int16) + noise, 0, 255).astype(np.uint8)
+    # Apply Gaussian noise (in uint8 space)
+    if gaussian_noise_p > 0 and np.random.random() < gaussian_noise_p:
+        noise = np.random.normal(
+            gaussian_noise_mean * 255, gaussian_noise_std * 255, img_np.shape
+        ).astype(np.int16)
+        result = np.clip(result.astype(np.int16) + noise, 0, 255).astype(np.uint8)
+    # Apply contrast using lookup table (pure uint8)
+    if contrast_p > 0 and np.random.random() < contrast_p:
+        factor = np.random.uniform(contrast_min, contrast_max)
+        lut = np.arange(256, dtype=np.float32)
+        lut = np.clip((lut - 127.5) * factor + 127.5, 0, 255).astype(np.uint8)
+        result = lut[result]
+    # Apply brightness using lookup table (pure uint8)
+    if brightness_p > 0 and np.random.random() < brightness_p:
+        factor = np.random.uniform(brightness_min, brightness_max)
+        lut = np.arange(256, dtype=np.float32)
+        lut = np.clip(lut * factor, 0, 255).astype(np.uint8)
+        result = lut[result]
+    # Convert back to tensor
+    result_tensor = torch.from_numpy(result).permute(2, 0, 1).unsqueeze(0)
+    if is_float:
+        result_tensor = result_tensor.float() / 255.0
+    return result_tensor, instances
+def apply_geometric_augmentation_skia(
+    image: torch.Tensor,
+    instances: torch.Tensor,
+    rotation_min: float = -15.0,
+    rotation_max: float = 15.0,
+    rotation_p: Optional[float] = None,
+    scale_min: float = 0.9,
+    scale_max: float = 1.1,
+    scale_p: Optional[float] = None,
+    translate_width: float = 0.02,
+    translate_height: float = 0.02,
+    translate_p: Optional[float] = None,
+    affine_p: float = 0.0,
+    erase_scale_min: float = 0.0001,
+    erase_scale_max: float = 0.01,
+    erase_ratio_min: float = 1.0,
+    erase_ratio_max: float = 1.0,
+    erase_p: float = 0.0,
+    mixup_lambda_min: float = 0.01,
+    mixup_lambda_max: float = 0.05,
+    mixup_p: float = 0.0,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Apply geometric augmentations using Skia.
+    Matches API of sleap_nn.data.augmentation.apply_geometric_augmentation.
+    Args:
+        image: Input tensor of shape (1, C, H, W) with dtype uint8 or float32.
+        instances: Keypoints tensor of shape (1, n_instances, n_nodes, 2) or (1, n_nodes, 2).
+        rotation_min: Minimum rotation angle in degrees.
+        rotation_max: Maximum rotation angle in degrees.
+        rotation_p: Probability of rotation (independent). None = use affine_p.
+        scale_min: Minimum scale factor.
+        scale_max: Maximum scale factor.
+        scale_p: Probability of scaling (independent). None = use affine_p.
+        translate_width: Max horizontal translation as fraction of width.
+        translate_height: Max vertical translation as fraction of height.
+        translate_p: Probability of translation (independent). None = use affine_p.
+        affine_p: Probability of bundled affine transform.
+        erase_scale_min: Min proportion of image to erase.
+        erase_scale_max: Max proportion of image to erase.
+        erase_ratio_min: Min aspect ratio of erased area.
+        erase_ratio_max: Max aspect ratio of erased area.
+        erase_p: Probability of random erasing.
+        mixup_lambda_min: Min mixup strength (not implemented).
+        mixup_lambda_max: Max mixup strength (not implemented).
+        mixup_p: Probability of mixup (not implemented).
+    Returns:
+        Tuple of (augmented_image, augmented_instances). Image dtype matches input.
+    """
+    # Convert to numpy for Skia processing
+    is_float = image.dtype == torch.float32
+    if is_float:
+        img_np = (image[0].permute(1, 2, 0).numpy() * 255).astype(np.uint8)
+    else:
+        img_np = image[0].permute(1, 2, 0).numpy().copy()
+    h, w = img_np.shape[:2]
+    cx, cy = w / 2, h / 2
+    # Build transformation matrix
+    matrix = skia.Matrix()
+    has_transform = False
+    use_independent = (
+        rotation_p is not None or scale_p is not None or translate_p is not None
+    )
+    if use_independent:
+        if (
+            rotation_p is not None
+            and rotation_p > 0
+            and np.random.random() < rotation_p
+        ):
+            angle = np.random.uniform(rotation_min, rotation_max)
+            rot_matrix = skia.Matrix()
+            rot_matrix.setRotate(angle, cx, cy)
+            matrix = matrix.preConcat(rot_matrix)
+            has_transform = True
+        if scale_p is not None and scale_p > 0 and np.random.random() < scale_p:
+            scale = np.random.uniform(scale_min, scale_max)
+            scale_matrix = skia.Matrix()
+            scale_matrix.setScale(scale, scale, cx, cy)
+            matrix = matrix.preConcat(scale_matrix)
+            has_transform = True
+        if (
+            translate_p is not None
+            and translate_p > 0
+            and np.random.random() < translate_p
+        ):
+            tx = np.random.uniform(-translate_width, translate_width) * w
+            ty = np.random.uniform(-translate_height, translate_height) * h
+            trans_matrix = skia.Matrix()
+            trans_matrix.setTranslate(tx, ty)
+            matrix = matrix.preConcat(trans_matrix)
+            has_transform = True
+    elif affine_p > 0 and np.random.random() < affine_p:
+        angle = np.random.uniform(rotation_min, rotation_max)
+        scale = np.random.uniform(scale_min, scale_max)
+        tx = np.random.uniform(-translate_width, translate_width) * w
+        ty = np.random.uniform(-translate_height, translate_height) * h
+        matrix.setRotate(angle, cx, cy)
+        matrix.preScale(scale, scale, cx, cy)
+        matrix.preTranslate(tx, ty)
+        has_transform = True
+    # Apply geometric transform
+    if has_transform:
+        img_np = _transform_image_skia(img_np, matrix)
+        instances = _transform_keypoints_tensor(instances, matrix)
+    # Apply random erasing
+    if erase_p > 0 and np.random.random() < erase_p:
+        img_np = _apply_random_erase(
+            img_np, erase_scale_min, erase_scale_max, erase_ratio_min, erase_ratio_max
+        )
+    # Convert back to tensor
+    result_tensor = torch.from_numpy(img_np).permute(2, 0, 1).unsqueeze(0)
+    if is_float:
+        result_tensor = result_tensor.float() / 255.0
+    return result_tensor, instances
+def _transform_image_skia(image: np.ndarray, matrix: skia.Matrix) -> np.ndarray:
+    """Transform image using Skia matrix (uint8 in, uint8 out)."""
+    h, w = image.shape[:2]
+    channels = image.shape[2] if image.ndim == 3 else 1
+    # Skia needs RGBA
+    if channels == 1:
+        image_rgba = np.stack(
+            [image.squeeze()] * 3 + [np.full((h, w), 255, dtype=np.uint8)], axis=-1
+        )
+    elif channels == 3:
+        alpha = np.full((h, w, 1), 255, dtype=np.uint8)
+        image_rgba = np.concatenate([image, alpha], axis=-1)
+    else:
+        raise ValueError(f"Unsupported channels: {channels}")
+    image_rgba = np.ascontiguousarray(image_rgba, dtype=np.uint8)
+    skia_image = skia.Image.fromarray(
+        image_rgba, colorType=skia.ColorType.kRGBA_8888_ColorType
+    )
+    surface = skia.Surface(w, h)
+    canvas = surface.getCanvas()
+    canvas.clear(skia.Color4f(0, 0, 0, 1))
+    canvas.setMatrix(matrix)
+    paint = skia.Paint()
+    paint.setAntiAlias(True)
+    sampling = skia.SamplingOptions(skia.FilterMode.kLinear)
+    canvas.drawImage(skia_image, 0, 0, sampling, paint)
+    result = surface.makeImageSnapshot().toarray()
+    if channels == 1:
+        return result[:, :, 0:1]
+    return result[:, :, :channels]
+def _transform_keypoints_tensor(
+    keypoints: torch.Tensor, matrix: skia.Matrix
+) -> torch.Tensor:
+    """Transform keypoints tensor using Skia matrix."""
+    if keypoints.numel() == 0:
+        return keypoints
+    original_shape = keypoints.shape
+    flat = keypoints.reshape(-1, 2).numpy()
+    # Handle NaN values
+    valid_mask = ~np.isnan(flat).any(axis=1)
+    transformed = flat.copy()
+    if valid_mask.any():
+        valid_pts = flat[valid_mask]
+        skia_pts = [skia.Point(float(p[0]), float(p[1])) for p in valid_pts]
+        mapped = matrix.mapPoints(skia_pts)
+        transformed[valid_mask] = np.array([[p.x(), p.y()] for p in mapped])
+    return torch.from_numpy(transformed.reshape(original_shape).astype(np.float32))
+def _apply_random_erase(
+    image: np.ndarray,
+    scale_min: float,
+    scale_max: float,
+    ratio_min: float,
+    ratio_max: float,
+) -> np.ndarray:
+    """Apply random erasing (uint8)."""
+    h, w = image.shape[:2]
+    area = h * w
+    erase_area = np.random.uniform(scale_min, scale_max) * area
+    aspect_ratio = np.random.uniform(ratio_min, ratio_max)
+    erase_h = int(np.sqrt(erase_area * aspect_ratio))
+    erase_w = int(np.sqrt(erase_area / aspect_ratio))
+    if erase_h >= h or erase_w >= w:
+        return image
+    y = np.random.randint(0, h - erase_h)
+    x = np.random.randint(0, w - erase_w)
+    result = image.copy()
+    channels = image.shape[2] if image.ndim == 3 else 1
+    fill = np.random.randint(0, 256, size=(channels,), dtype=np.uint8)
+    result[y : y + erase_h, x : x + erase_w] = fill
+    return result
+def crop_and_resize_skia(
+    image: torch.Tensor,
+    boxes: torch.Tensor,
+    size: Tuple[int, int],
+) -> torch.Tensor:
+    """Crop and resize image regions using Skia.
+    Replacement for kornia.geometry.transform.crop_and_resize.
+    Args:
+        image: Input tensor of shape (1, C, H, W).
+        boxes: Bounding boxes tensor of shape (1, 4, 2) with corners:
+            [top-left, top-right, bottom-right, bottom-left].
+        size: Output size (height, width).
+    Returns:
+        Cropped and resized tensor of shape (1, C, out_h, out_w).
+    """
+    is_float = image.dtype == torch.float32
+    if is_float:
+        img_np = (image[0].permute(1, 2, 0).numpy() * 255).astype(np.uint8)
+    else:
+        img_np = image[0].permute(1, 2, 0).numpy()
+    h, w = img_np.shape[:2]
+    out_h, out_w = size
+    channels = img_np.shape[2] if img_np.ndim == 3 else 1
+    # Get box coordinates (top-left and bottom-right)
+    box = boxes[0].numpy()  # (4, 2)
+    x1, y1 = box[0]  # top-left
+    x2, y2 = box[2]  # bottom-right
+    crop_w = x2 - x1
+    crop_h = y2 - y1
+    # Create transformation matrix
+    matrix = skia.Matrix()
+    scale_x = out_w / crop_w
+    scale_y = out_h / crop_h
+    matrix.setScale(scale_x, scale_y)
+    matrix.preTranslate(-x1, -y1)
+    # Skia needs RGBA
+    if channels == 1:
+        image_rgba = np.stack(
+            [img_np.squeeze()] * 3 + [np.full((h, w), 255, dtype=np.uint8)], axis=-1
+        )
+    elif channels == 3:
+        alpha = np.full((h, w, 1), 255, dtype=np.uint8)
+        image_rgba = np.concatenate([img_np, alpha], axis=-1)
+    else:
+        raise ValueError(f"Unsupported channels: {channels}")
+    image_rgba = np.ascontiguousarray(image_rgba, dtype=np.uint8)
+    skia_image = skia.Image.fromarray(
+        image_rgba, colorType=skia.ColorType.kRGBA_8888_ColorType
+    )
+    surface = skia.Surface(out_w, out_h)
+    canvas = surface.getCanvas()
+    canvas.clear(skia.Color4f(0, 0, 0, 1))
+    canvas.setMatrix(matrix)
+    paint = skia.Paint()
+    paint.setAntiAlias(True)
+    sampling = skia.SamplingOptions(skia.FilterMode.kLinear)
+    canvas.drawImage(skia_image, 0, 0, sampling, paint)
+    result = surface.makeImageSnapshot().toarray()
+    if channels == 1:
+        result = result[:, :, 0:1]
+    else:
+        result = result[:, :, :channels]
+    result_tensor = torch.from_numpy(result).permute(2, 0, 1).unsqueeze(0)
+    if is_float:
+        result_tensor = result_tensor.float() / 255.0
+    return result_tensor

sleap-nn 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl

sleap-nn 0.0.5py3-none-any.whl → 0.1.0py3-none-any.whl