PyPI - sleap-nn - Versions diffs - 0.1.0a3__py3-none-any.whl → 0.1.0a4__py3-none-any.whl - Mend

sleap-nn 0.1.0a3py3-none-any.whl → 0.1.0a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

sleap_nn/__init__.py +1 -1
sleap_nn/architectures/convnext.py +5 -0
sleap_nn/architectures/encoder_decoder.py +25 -6
sleap_nn/architectures/swint.py +8 -0
sleap_nn/cli.py +132 -39
sleap_nn/inference/peak_finding.py +47 -17
sleap_nn/inference/predictors.py +213 -106
sleap_nn/predict.py +6 -7
sleap_nn/training/callbacks.py +7 -2
sleap_nn/training/model_trainer.py +32 -0
{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/METADATA +2 -1
{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/RECORD +16 -16
{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/WHEEL +0 -0
{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/entry_points.txt +0 -0
{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/top_level.txt +0 -0

sleap_nn/__init__.py CHANGED Viewed

@@ -50,7 +50,7 @@ logger.add(
     colorize=False,
 )
-__version__ = "0.1.0a3"
+__version__ = "0.1.0a4"
 # Public API
 from sleap_nn.evaluation import load_metrics

sleap_nn/architectures/convnext.py CHANGED Viewed

@@ -281,6 +281,10 @@ class ConvNextWrapper(nn.Module):
             # Keep the block output filters the same
             x_in_shape = int(self.arch["channels"][-1] * filters_rate)
+        # Encoder channels for skip connections (reversed to match decoder order)
+        # The forward pass uses enc_output[::2][::-1] for skip features
+        encoder_channels = self.arch["channels"][::-1]
         self.dec = Decoder(
             x_in_shape=x_in_shape,
             current_stride=self.current_stride,
@@ -293,6 +297,7 @@ class ConvNextWrapper(nn.Module):
             block_contraction=self.block_contraction,
             output_stride=self.output_stride,
             up_interpolate=up_interpolate,
+            encoder_channels=encoder_channels,
         )
         if len(self.dec.decoder_stack):

sleap_nn/architectures/encoder_decoder.py CHANGED Viewed

@@ -25,7 +25,7 @@ classes.
 See the `EncoderDecoder` base class for requirements for creating new architectures.
 """
-from typing import List, Text, Tuple, Union
+from typing import List, Optional, Text, Tuple, Union
 from collections import OrderedDict
 import torch
 from torch import nn
@@ -391,10 +391,18 @@ class SimpleUpsamplingBlock(nn.Module):
         transpose_convs_activation: Text = "relu",
         feat_concat: bool = True,
         prefix: Text = "",
+        skip_channels: Optional[int] = None,
     ) -> None:
         """Initialize the class."""
         super().__init__()
+        # Determine skip connection channels
+        # If skip_channels is provided, use it; otherwise fall back to refine_convs_filters
+        # This allows ConvNext/SwinT to specify actual encoder channels
+        self.skip_channels = (
+            skip_channels if skip_channels is not None else refine_convs_filters
+        )
         self.x_in_shape = x_in_shape
         self.current_stride = current_stride
         self.upsampling_stride = upsampling_stride
@@ -469,13 +477,13 @@ class SimpleUpsamplingBlock(nn.Module):
                     first_conv_in_channels = refine_convs_filters
                 else:
                     if self.up_interpolate:
-                        # With interpolation, input is x_in_shape + feature channels
-                        # The feature channels are the same as x_in_shape since they come from the same level
-                        first_conv_in_channels = x_in_shape + refine_convs_filters
+                        # With interpolation, input is x_in_shape + skip_channels
+                        # skip_channels may differ from refine_convs_filters for ConvNext/SwinT
+                        first_conv_in_channels = x_in_shape + self.skip_channels
                     else:
-                        # With transpose conv, input is transpose_conv_output + feature channels
+                        # With transpose conv, input is transpose_conv_output + skip_channels
                         first_conv_in_channels = (
-                            refine_convs_filters + transpose_convs_filters
+                            self.skip_channels + transpose_convs_filters
                         )
             else:
                 if not self.feat_concat:
@@ -582,6 +590,7 @@ class Decoder(nn.Module):
         block_contraction: bool = False,
         up_interpolate: bool = True,
         prefix: str = "dec",
+        encoder_channels: Optional[List[int]] = None,
     ) -> None:
         """Initialize the class."""
         super().__init__()
@@ -598,6 +607,7 @@ class Decoder(nn.Module):
         self.block_contraction = block_contraction
         self.prefix = prefix
         self.stride_to_filters = {}
+        self.encoder_channels = encoder_channels
         self.current_strides = []
         self.residuals = 0
@@ -624,6 +634,13 @@ class Decoder(nn.Module):
             next_stride = current_stride // 2
+            # Determine skip channels for this decoder block
+            # If encoder_channels provided, use actual encoder channels
+            # Otherwise fall back to computed filters (for UNet compatibility)
+            skip_channels = None
+            if encoder_channels is not None and block < len(encoder_channels):
+                skip_channels = encoder_channels[block]
             if self.stem_blocks > 0 and block >= down_blocks + self.stem_blocks:
                 # This accounts for the case where we dont have any more down block features to concatenate with.
                 # In this case, add a simple upsampling block with a conv layer and with no concatenation
@@ -642,6 +659,7 @@ class Decoder(nn.Module):
                         transpose_convs_batch_norm=False,
                         feat_concat=False,
                         prefix=f"{self.prefix}{block}_s{current_stride}_to_s{next_stride}",
+                        skip_channels=skip_channels,
                     )
                 )
             else:
@@ -659,6 +677,7 @@ class Decoder(nn.Module):
                         transpose_convs_filters=block_filters_out,
                         transpose_convs_batch_norm=False,
                         prefix=f"{self.prefix}{block}_s{current_stride}_to_s{next_stride}",
+                        skip_channels=skip_channels,
                     )
                 )

sleap_nn/architectures/swint.py CHANGED Viewed

@@ -309,6 +309,13 @@ class SwinTWrapper(nn.Module):
             self.stem_patch_stride * (2**3) * 2
         )  # stem_stride * down_blocks_stride * final_max_pool_stride
+        # Encoder channels for skip connections (reversed to match decoder order)
+        # SwinT channels: embed * 2^i for each stage i, then reversed
+        num_stages = len(self.arch["depths"])
+        encoder_channels = [
+            self.arch["embed"] * (2 ** (num_stages - 1 - i)) for i in range(num_stages)
+        ]
         self.dec = Decoder(
             x_in_shape=block_filters,
             current_stride=self.current_stride,
@@ -321,6 +328,7 @@ class SwinTWrapper(nn.Module):
             block_contraction=self.block_contraction,
             output_stride=output_stride,
             up_interpolate=up_interpolate,
+            encoder_channels=encoder_channels,
         )
         if len(self.dec.decoder_stack):

sleap_nn/cli.py CHANGED Viewed

@@ -1,6 +1,7 @@
-"""Unified CLI for SLEAP-NN using Click."""
+"""Unified CLI for SLEAP-NN using rich-click for styled output."""
-import click
+import rich_click as click
+from click import Command
 from loguru import logger
 from pathlib import Path
 from omegaconf import OmegaConf, DictConfig
@@ -13,7 +14,36 @@ from sleap_nn.train import run_training
 from sleap_nn import __version__
 import hydra
 import sys
-from click import Command
+# Rich-click configuration for styled help
+click.rich_click.TEXT_MARKUP = "markdown"
+click.rich_click.SHOW_ARGUMENTS = True
+click.rich_click.GROUP_ARGUMENTS_OPTIONS = True
+click.rich_click.STYLE_ERRORS_SUGGESTION = "magenta italic"
+click.rich_click.ERRORS_EPILOGUE = (
+    "Try 'sleap-nn [COMMAND] --help' for more information."
+)
+def is_config_path(arg: str) -> bool:
+    """Check if an argument looks like a config file path.
+    Returns True if the arg ends with .yaml or .yml.
+    """
+    return arg.endswith(".yaml") or arg.endswith(".yml")
+def split_config_path(config_path: str) -> tuple:
+    """Split a full config path into (config_dir, config_name).
+    Args:
+        config_path: Full path to a config file.
+    Returns:
+        Tuple of (config_dir, config_name) where config_dir is an absolute path.
+    """
+    path = Path(config_path).resolve()
+    return path.parent.as_posix(), path.name
 def print_version(ctx, param, value):
@@ -66,38 +96,77 @@ def cli():
 def show_training_help():
-    """Display training help information."""
-    help_text = """
-sleap-nn train — Train SLEAP models from a config YAML file.
-Usage:
-  sleap-nn train --config-dir <dir> --config-name <name> [overrides]
-Common overrides:
-  trainer_config.max_epochs=100
-  trainer_config.batch_size=32
-Examples:
-  Start new run:
-    sleap-nn train --config-dir /path/to/config_dir/ --config-name myrun
-  Resume 20 more epochs:
-    sleap-nn train --config-dir /path/to/config_dir/ --config-name myrun \\
-      trainer_config.resume_ckpt_path=<path/to/ckpt> \\
-      trainer_config.max_epochs=20
-Tips:
-  - Use -m/--multirun for sweeps; outputs go under hydra.sweep.dir.
-  - For Hydra flags and completion, use --hydra-help.
-For a detailed list of all available config options, please refer to https://nn.sleap.ai/config/.
+    """Display training help information with rich formatting."""
+    from rich.console import Console
+    from rich.panel import Panel
+    from rich.markdown import Markdown
+    console = Console()
+    help_md = """
+## Usage
+```
+sleap-nn train <config.yaml> [overrides]
+sleap-nn train --config <path/to/config.yaml> [overrides]
+```
+## Common Overrides
+| Override | Description |
+|----------|-------------|
+| `trainer_config.max_epochs=100` | Set maximum training epochs |
+| `trainer_config.batch_size=32` | Set batch size |
+| `trainer_config.save_ckpt=true` | Enable checkpoint saving |
+## Examples
+**Start a new training run:**
+```bash
+sleap-nn train path/to/config.yaml
+sleap-nn train --config path/to/config.yaml
+```
+**With overrides:**
+```bash
+sleap-nn train config.yaml trainer_config.max_epochs=100
+```
+**Resume training:**
+```bash
+sleap-nn train config.yaml trainer_config.resume_ckpt_path=/path/to/ckpt
+```
+**Legacy usage (still supported):**
+```bash
+sleap-nn train --config-dir /path/to/dir --config-name myrun
+```
+## Tips
+- Use `-m/--multirun` for sweeps; outputs go under `hydra.sweep.dir`
+- For Hydra flags and completion, use `--hydra-help`
+- Config documentation: https://nn.sleap.ai/config/
 """
-    click.echo(help_text)
+    console.print(
+        Panel(
+            Markdown(help_md),
+            title="[bold cyan]sleap-nn train[/bold cyan]",
+            subtitle="Train SLEAP models from a config YAML file",
+            border_style="cyan",
+        )
+    )
 @cli.command(cls=TrainCommand)
-@click.option("--config-name", "-c", type=str, help="Configuration file name")
 @click.option(
-    "--config-dir", "-d", type=str, default=".", help="Configuration directory path"
+    "--config",
+    type=str,
+    help="Path to configuration file (e.g., path/to/config.yaml)",
+)
+@click.option("--config-name", "-c", type=str, help="Configuration file name (legacy)")
+@click.option(
+    "--config-dir", "-d", type=str, default=".", help="Configuration directory (legacy)"
 )
 @click.option(
     "--video-paths",
@@ -130,25 +199,43 @@ For a detailed list of all available config options, please refer to https://nn.
     'Example: --prefix-map "/old/server/path" "/new/local/path"',
 )
 @click.argument("overrides", nargs=-1, type=click.UNPROCESSED)
-def train(config_name, config_dir, video_paths, video_path_map, prefix_map, overrides):
+def train(
+    config, config_name, config_dir, video_paths, video_path_map, prefix_map, overrides
+):
     """Run training workflow with Hydra config overrides.
     Examples:
-        sleap-nn train --config-name myconfig --config-dir /path/to/config_dir/
+        sleap-nn train path/to/config.yaml
+        sleap-nn train --config path/to/config.yaml trainer_config.max_epochs=100
         sleap-nn train -c myconfig -d /path/to/config_dir/ trainer_config.max_epochs=100
-        sleap-nn train -c myconfig -d /path/to/config_dir/ +experiment=new_model
     """
-    # Show help if no config name provided
-    if not config_name:
+    # Convert overrides to a mutable list
+    overrides = list(overrides)
+    # Check if the first positional arg is a config path (not a Hydra override)
+    config_from_positional = None
+    if overrides and is_config_path(overrides[0]):
+        config_from_positional = overrides.pop(0)
+    # Resolve config path with priority:
+    # 1. Positional config path (e.g., sleap-nn train config.yaml)
+    # 2. --config flag (e.g., sleap-nn train --config config.yaml)
+    # 3. Legacy --config-dir/--config-name flags
+    if config_from_positional:
+        config_dir, config_name = split_config_path(config_from_positional)
+    elif config:
+        config_dir, config_name = split_config_path(config)
+    elif config_name:
+        config_dir = Path(config_dir).resolve().as_posix()
+    else:
+        # No config provided - show help
         show_training_help()
         return
-    # Initialize Hydra manually
-    # resolve the path to the config directory (hydra expects absolute path)
-    config_dir = Path(config_dir).resolve().as_posix()
+    # Initialize Hydra manually (config_dir is already an absolute path)
     with hydra.initialize_config_dir(config_dir=config_dir, version_base=None):
         # Compose config with overrides
-        cfg = hydra.compose(config_name=config_name, overrides=list(overrides))
+        cfg = hydra.compose(config_name=config_name, overrides=overrides)
         # Validate config
         if not hasattr(cfg, "model_config") or not cfg.model_config:
@@ -581,6 +668,12 @@ def train(config_name, config_dir, video_paths, video_path_map, prefix_map, over
     default=0,
     help="IOU to use when culling instances *after* tracking. (default: 0)",
 )
+@click.option(
+    "--gui",
+    is_flag=True,
+    default=False,
+    help="Output JSON progress for GUI integration instead of Rich progress bar.",
+)
 def track(**kwargs):
     """Run Inference and Tracking workflow."""
     # Convert model_paths from tuple to list

sleap_nn/inference/peak_finding.py CHANGED Viewed

@@ -3,9 +3,8 @@
 from typing import Optional, Tuple
 import kornia as K
-import numpy as np
 import torch
-from kornia.geometry.transform import crop_and_resize
+import torch.nn.functional as F
 from sleap_nn.data.instance_cropping import make_centered_bboxes
@@ -13,7 +12,11 @@ from sleap_nn.data.instance_cropping import make_centered_bboxes
 def crop_bboxes(
     images: torch.Tensor, bboxes: torch.Tensor, sample_inds: torch.Tensor
 ) -> torch.Tensor:
-    """Crop bounding boxes from a batch of images.
+    """Crop bounding boxes from a batch of images using fast tensor indexing.
+    This uses tensor unfold operations to extract patches, which is significantly
+    faster than kornia's crop_and_resize (17-51x speedup) as it avoids perspective
+    transform computations.
     Args:
         images: Tensor of shape (samples, channels, height, width) of a batch of images.
@@ -27,7 +30,7 @@ def crop_bboxes(
             box should be cropped from.
     Returns:
-        A tensor of shape (n_bboxes, crop_height, crop_width, channels) of the same
+        A tensor of shape (n_bboxes, channels, crop_height, crop_width) of the same
         dtype as the input image. The crop size is inferred from the bounding box
         coordinates.
@@ -42,26 +45,53 @@ def crop_bboxes(
     See also: `make_centered_bboxes`
     """
+    n_crops = bboxes.shape[0]
+    if n_crops == 0:
+        # Return empty tensor; use default crop size since we can't infer from bboxes
+        return torch.empty(
+            0, images.shape[1], 0, 0, device=images.device, dtype=images.dtype
+        )
     # Compute bounding box size to use for crops.
-    height = abs(bboxes[0, 3, 1] - bboxes[0, 0, 1])
-    width = abs(bboxes[0, 1, 0] - bboxes[0, 0, 0])
-    box_size = tuple(torch.round(torch.Tensor((height + 1, width + 1))).to(torch.int32))
+    height = int(abs(bboxes[0, 3, 1] - bboxes[0, 0, 1]).item()) + 1
+    width = int(abs(bboxes[0, 1, 0] - bboxes[0, 0, 0]).item()) + 1
     # Store original dtype for conversion back after cropping.
     original_dtype = images.dtype
+    device = images.device
+    n_samples, channels, img_h, img_w = images.shape
+    half_h, half_w = height // 2, width // 2
-    # Kornia's crop_and_resize requires float32 input.
-    images_to_crop = images[sample_inds]
-    if not torch.is_floating_point(images_to_crop):
-        images_to_crop = images_to_crop.float()
-    # Crop.
-    crops = crop_and_resize(
-        images_to_crop,  # (n_boxes, channels, height, width)
-        boxes=bboxes,
-        size=box_size,
+    # Pad images for edge handling.
+    images_padded = F.pad(
+        images.float(), (half_w, half_w, half_h, half_h), mode="constant", value=0
     )
+    # Extract all possible patches using unfold (creates a view, no copy).
+    # Shape after unfold: (n_samples, channels, img_h, img_w, height, width)
+    patches = images_padded.unfold(2, height, 1).unfold(3, width, 1)
+    # Get crop centers from bboxes.
+    # The bbox top-left is at index 0, with (x, y) coordinates.
+    # We need the center of the crop (peak location), which is top-left + half_size.
+    # Ensure bboxes are on the same device as images for index computation.
+    bboxes_on_device = bboxes.to(device)
+    crop_x = (bboxes_on_device[:, 0, 0] + half_w).to(torch.long)
+    crop_y = (bboxes_on_device[:, 0, 1] + half_h).to(torch.long)
+    # Clamp indices to valid bounds to handle edge cases where centroids
+    # might be at or beyond image boundaries.
+    crop_x = torch.clamp(crop_x, 0, patches.shape[3] - 1)
+    crop_y = torch.clamp(crop_y, 0, patches.shape[2] - 1)
+    # Select crops using advanced indexing.
+    # Convert sample_inds to tensor if it's a list.
+    if not isinstance(sample_inds, torch.Tensor):
+        sample_inds = torch.tensor(sample_inds, device=device)
+    sample_inds_long = sample_inds.to(device=device, dtype=torch.long)
+    crops = patches[sample_inds_long, :, crop_y, crop_x]
+    # Shape: (n_crops, channels, height, width)
     # Cast back to original dtype and return.
     crops = crops.to(original_dtype)
     return crops

sleap_nn/inference/predictors.py CHANGED Viewed

@@ -56,6 +56,8 @@ from rich.progress import (
     MofNCompleteColumn,
 )
 from time import time
+import json
+import sys
 def _filter_user_labeled_frames(
@@ -133,6 +135,8 @@ class Predictor(ABC):
             `backbone_config`. This determines the downsampling factor applied by the backbone,
             and is used to ensure that input images are padded or resized to be compatible
             with the model's architecture. Default: 16.
+        gui: If True, outputs JSON progress lines for GUI integration instead of
+            Rich progress bars. Default: False.
     """
     preprocess: bool = True
@@ -152,6 +156,7 @@ class Predictor(ABC):
     ] = None
     instances_key: bool = False
     max_stride: int = 16
+    gui: bool = False
     @classmethod
     def from_model_paths(
@@ -381,6 +386,102 @@ class Predictor(ABC):
                     v[n] = v[n].cpu().numpy()
         return output
+    def _process_batch(self) -> tuple:
+        """Process a single batch of frames from the pipeline.
+        Returns:
+            Tuple of (imgs, fidxs, vidxs, org_szs, instances, eff_scales, done)
+            where done is True if the pipeline has finished.
+        """
+        imgs = []
+        fidxs = []
+        vidxs = []
+        org_szs = []
+        instances = []
+        eff_scales = []
+        done = False
+        for _ in range(self.batch_size):
+            frame = self.pipeline.frame_buffer.get()
+            if frame["image"] is None:
+                done = True
+                break
+            frame["image"], eff_scale = apply_sizematcher(
+                frame["image"],
+                self.preprocess_config["max_height"],
+                self.preprocess_config["max_width"],
+            )
+            if self.instances_key:
+                frame["instances"] = frame["instances"] * eff_scale
+            if self.preprocess_config["ensure_rgb"] and frame["image"].shape[-3] != 3:
+                frame["image"] = frame["image"].repeat(1, 3, 1, 1)
+            elif (
+                self.preprocess_config["ensure_grayscale"]
+                and frame["image"].shape[-3] != 1
+            ):
+                frame["image"] = F.rgb_to_grayscale(
+                    frame["image"], num_output_channels=1
+                )
+            eff_scales.append(torch.tensor(eff_scale))
+            imgs.append(frame["image"].unsqueeze(dim=0))
+            fidxs.append(frame["frame_idx"])
+            vidxs.append(frame["video_idx"])
+            org_szs.append(frame["orig_size"].unsqueeze(dim=0))
+            if self.instances_key:
+                instances.append(frame["instances"].unsqueeze(dim=0))
+        return imgs, fidxs, vidxs, org_szs, instances, eff_scales, done
+    def _run_inference_on_batch(
+        self, imgs, fidxs, vidxs, org_szs, instances, eff_scales
+    ) -> Iterator[Dict[str, np.ndarray]]:
+        """Run inference on a prepared batch of frames.
+        Args:
+            imgs: List of image tensors.
+            fidxs: List of frame indices.
+            vidxs: List of video indices.
+            org_szs: List of original sizes.
+            instances: List of instance tensors.
+            eff_scales: List of effective scales.
+        Yields:
+            Dictionaries containing inference results for each frame.
+        """
+        # TODO: all preprocessing should be moved into InferenceModels to be exportable.
+        imgs = torch.concatenate(imgs, dim=0)
+        fidxs = torch.tensor(fidxs, dtype=torch.int32)
+        vidxs = torch.tensor(vidxs, dtype=torch.int32)
+        org_szs = torch.concatenate(org_szs, dim=0)
+        eff_scales = torch.tensor(eff_scales, dtype=torch.float32)
+        if self.instances_key:
+            instances = torch.concatenate(instances, dim=0)
+        ex = {
+            "image": imgs,
+            "frame_idx": fidxs,
+            "video_idx": vidxs,
+            "orig_size": org_szs,
+            "eff_scale": eff_scales,
+        }
+        if self.instances_key:
+            ex["instances"] = instances
+        if self.preprocess:
+            scale = self.preprocess_config["scale"]
+            if scale != 1.0:
+                if self.instances_key:
+                    ex["image"], ex["instances"] = apply_resizer(
+                        ex["image"], ex["instances"]
+                    )
+                else:
+                    ex["image"] = resize_image(ex["image"], scale)
+            ex["image"] = apply_pad_to_stride(ex["image"], self.max_stride)
+        outputs_list = self.inference_model(ex)
+        if outputs_list is not None:
+            for output in outputs_list:
+                output = self._convert_tensors_to_numpy(output)
+                yield output
     def _predict_generator(self) -> Iterator[Dict[str, np.ndarray]]:
         """Create a generator that yields batches of inference results.
@@ -400,114 +501,14 @@ class Predictor(ABC):
         # Loop over data batches.
         self.pipeline.start()
         total_frames = self.pipeline.total_len()
-        done = False
         try:
-            with Progress(
-                "{task.description}",
-                BarColumn(),
-                "[progress.percentage]{task.percentage:>3.0f}%",
-                MofNCompleteColumn(),
-                "ETA:",
-                TimeRemainingColumn(),
-                "Elapsed:",
-                TimeElapsedColumn(),
-                RateColumn(),
-                auto_refresh=False,
-                refresh_per_second=4,  # Change to self.report_rate if needed
-                speed_estimate_period=5,
-            ) as progress:
-                task = progress.add_task("Predicting...", total=total_frames)
-                last_report = time()
-                done = False
-                while not done:
-                    imgs = []
-                    fidxs = []
-                    vidxs = []
-                    org_szs = []
-                    instances = []
-                    eff_scales = []
-                    for _ in range(self.batch_size):
-                        frame = self.pipeline.frame_buffer.get()
-                        if frame["image"] is None:
-                            done = True
-                            break
-                        frame["image"], eff_scale = apply_sizematcher(
-                            frame["image"],
-                            self.preprocess_config["max_height"],
-                            self.preprocess_config["max_width"],
-                        )
-                        if self.instances_key:
-                            frame["instances"] = frame["instances"] * eff_scale
-                        if (
-                            self.preprocess_config["ensure_rgb"]
-                            and frame["image"].shape[-3] != 3
-                        ):
-                            frame["image"] = frame["image"].repeat(1, 3, 1, 1)
-                        elif (
-                            self.preprocess_config["ensure_grayscale"]
-                            and frame["image"].shape[-3] != 1
-                        ):
-                            frame["image"] = F.rgb_to_grayscale(
-                                frame["image"], num_output_channels=1
-                            )
-                        eff_scales.append(torch.tensor(eff_scale))
-                        imgs.append(frame["image"].unsqueeze(dim=0))
-                        fidxs.append(frame["frame_idx"])
-                        vidxs.append(frame["video_idx"])
-                        org_szs.append(frame["orig_size"].unsqueeze(dim=0))
-                        if self.instances_key:
-                            instances.append(frame["instances"].unsqueeze(dim=0))
-                    if imgs:
-                        # TODO: all preprocessing should be moved into InferenceModels to be exportable.
-                        imgs = torch.concatenate(imgs, dim=0)
-                        fidxs = torch.tensor(fidxs, dtype=torch.int32)
-                        vidxs = torch.tensor(vidxs, dtype=torch.int32)
-                        org_szs = torch.concatenate(org_szs, dim=0)
-                        eff_scales = torch.tensor(eff_scales, dtype=torch.float32)
-                        if self.instances_key:
-                            instances = torch.concatenate(instances, dim=0)
-                        ex = {
-                            "image": imgs,
-                            "frame_idx": fidxs,
-                            "video_idx": vidxs,
-                            "orig_size": org_szs,
-                            "eff_scale": eff_scales,
-                        }
-                        if self.instances_key:
-                            ex["instances"] = instances
-                        if self.preprocess:
-                            scale = self.preprocess_config["scale"]
-                            if scale != 1.0:
-                                if self.instances_key:
-                                    ex["image"], ex["instances"] = apply_resizer(
-                                        ex["image"], ex["instances"]
-                                    )
-                                else:
-                                    ex["image"] = resize_image(ex["image"], scale)
-                            ex["image"] = apply_pad_to_stride(
-                                ex["image"], self.max_stride
-                            )
-                        outputs_list = self.inference_model(ex)
-                        if outputs_list is not None:
-                            for output in outputs_list:
-                                output = self._convert_tensors_to_numpy(output)
-                                yield output
-                        # Advance progress
-                        num_frames = (
-                            len(ex["frame_idx"])
-                            if "frame_idx" in ex
-                            else self.batch_size
-                        )
-                        progress.update(task, advance=num_frames)
-                    # Manually refresh progress bar
-                    if time() - last_report > 0.25:
-                        progress.refresh()
-                        last_report = time()
+            if self.gui:
+                # GUI mode: emit JSON progress lines
+                yield from self._predict_generator_gui(total_frames)
+            else:
+                # Normal mode: use Rich progress bar
+                yield from self._predict_generator_rich(total_frames)
         except KeyboardInterrupt:
             logger.info("Inference interrupted by user")
@@ -520,6 +521,112 @@ class Predictor(ABC):
         self.pipeline.join()
+    def _predict_generator_gui(
+        self, total_frames: int
+    ) -> Iterator[Dict[str, np.ndarray]]:
+        """Generator for GUI mode with JSON progress output.
+        Args:
+            total_frames: Total number of frames to process.
+        Yields:
+            Dictionaries containing inference results for each frame.
+        """
+        start_time = time()
+        frames_processed = 0
+        last_report = time()
+        done = False
+        while not done:
+            imgs, fidxs, vidxs, org_szs, instances, eff_scales, done = (
+                self._process_batch()
+            )
+            if imgs:
+                yield from self._run_inference_on_batch(
+                    imgs, fidxs, vidxs, org_szs, instances, eff_scales
+                )
+                # Update progress
+                num_frames = len(fidxs)
+                frames_processed += num_frames
+                # Emit JSON progress (throttled to ~4Hz)
+                if time() - last_report > 0.25:
+                    elapsed = time() - start_time
+                    rate = frames_processed / elapsed if elapsed > 0 else 0
+                    remaining = total_frames - frames_processed
+                    eta = remaining / rate if rate > 0 else 0
+                    progress_data = {
+                        "n_processed": frames_processed,
+                        "n_total": total_frames,
+                        "rate": round(rate, 1),
+                        "eta": round(eta, 1),
+                    }
+                    print(json.dumps(progress_data), flush=True)
+                    last_report = time()
+        # Final progress emit to ensure 100% is shown
+        elapsed = time() - start_time
+        progress_data = {
+            "n_processed": total_frames,
+            "n_total": total_frames,
+            "rate": round(frames_processed / elapsed, 1) if elapsed > 0 else 0,
+            "eta": 0,
+        }
+        print(json.dumps(progress_data), flush=True)
+    def _predict_generator_rich(
+        self, total_frames: int
+    ) -> Iterator[Dict[str, np.ndarray]]:
+        """Generator for normal mode with Rich progress bar.
+        Args:
+            total_frames: Total number of frames to process.
+        Yields:
+            Dictionaries containing inference results for each frame.
+        """
+        with Progress(
+            "{task.description}",
+            BarColumn(),
+            "[progress.percentage]{task.percentage:>3.0f}%",
+            MofNCompleteColumn(),
+            "ETA:",
+            TimeRemainingColumn(),
+            "Elapsed:",
+            TimeElapsedColumn(),
+            RateColumn(),
+            auto_refresh=False,
+            refresh_per_second=4,
+            speed_estimate_period=5,
+        ) as progress:
+            task = progress.add_task("Predicting...", total=total_frames)
+            last_report = time()
+            done = False
+            while not done:
+                imgs, fidxs, vidxs, org_szs, instances, eff_scales, done = (
+                    self._process_batch()
+                )
+                if imgs:
+                    yield from self._run_inference_on_batch(
+                        imgs, fidxs, vidxs, org_szs, instances, eff_scales
+                    )
+                    # Advance progress
+                    num_frames = len(fidxs)
+                    progress.update(task, advance=num_frames)
+                # Manually refresh progress bar
+                if time() - last_report > 0.25:
+                    progress.refresh()
+                    last_report = time()
+        self.pipeline.join()
     def predict(
         self,
         make_labels: bool = True,

sleap_nn/predict.py CHANGED Viewed

@@ -113,6 +113,7 @@ def run_inference(
     tracking_pre_cull_iou_threshold: float = 0,
     tracking_clean_instance_count: int = 0,
     tracking_clean_iou_threshold: float = 0,
+    gui: bool = False,
 ):
     """Entry point to run inference on trained SLEAP-NN models.
@@ -262,6 +263,8 @@ def run_inference(
         tracking_pre_cull_iou_threshold: If non-zero and pre_cull_to_target also set, then use IOU threshold to remove overlapping instances over count *before* tracking. (default: 0)
         tracking_clean_instance_count: Target number of instances to clean *after* tracking. (default: 0)
         tracking_clean_iou_threshold: IOU to use when culling instances *after* tracking. (default: 0)
+        gui: (bool) If True, outputs JSON progress lines for GUI integration instead
+                of Rich progress bars. Default: False.
     Returns:
         Returns `sio.Labels` object if `make_labels` is True. Else this function returns
@@ -445,13 +448,6 @@ def run_inference(
                 else "mps" if torch.backends.mps.is_available() else "cpu"
             )
-        if integral_refinement is not None and device == "mps":  # TODO
-            # kornia/geometry/transform/imgwarp.py:382: in get_perspective_transform. NotImplementedError: The operator 'aten::_linalg_solve_ex.result' is not currently implemented for the MPS device. If you want this op to be added in priority during the prototype phase of this feature, please comment on https://github.com/pytorch/pytorch/issues/77764. As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.
-            logger.info(
-                "Integral refinement is not supported with MPS accelerator. Setting integral refinement to None."
-            )
-            integral_refinement = None
         logger.info(f"Using device: {device}")
         # initializes the inference model
@@ -470,6 +466,9 @@ def run_inference(
             anchor_part=anchor_part,
         )
+        # Set GUI mode for progress output
+        predictor.gui = gui
         if (
             tracking
             and not isinstance(predictor, BottomUpMultiClassPredictor)

sleap_nn/training/callbacks.py CHANGED Viewed

@@ -85,10 +85,15 @@ class CSVLoggerCallback(Callback):
                 if key == "epoch":
                     log_data["epoch"] = trainer.current_epoch
                 elif key == "learning_rate":
-                    # Handle both direct logging and LearningRateMonitor format (lr-*)
+                    # Handle multiple formats:
+                    # 1. Direct "learning_rate" key
+                    # 2. "train/lr" key (current format from lightning modules)
+                    # 3. "lr-*" keys from LearningRateMonitor (legacy)
                     value = metrics.get(key, None)
                     if value is None:
-                        # Look for lr-* keys from LearningRateMonitor
+                        value = metrics.get("train/lr", None)
+                    if value is None:
+                        # Look for lr-* keys from LearningRateMonitor (legacy)
                         for metric_key in metrics.keys():
                             if metric_key.startswith("lr-"):
                                 value = metrics[metric_key]

sleap_nn/training/model_trainer.py CHANGED Viewed

@@ -849,6 +849,7 @@ class ModelTrainer:
                 "train/time",
                 "val/time",
             ]
+            # Add model-specific keys for wandb parity
             if self.model_type in [
                 "single_instance",
                 "centered_instance",
@@ -857,6 +858,37 @@ class ModelTrainer:
                 csv_log_keys.extend(
                     [f"train/confmaps/{name}" for name in self.skeletons[0].node_names]
                 )
+            if self.model_type == "bottomup":
+                csv_log_keys.extend(
+                    [
+                        "train/confmaps_loss",
+                        "train/paf_loss",
+                        "val/confmaps_loss",
+                        "val/paf_loss",
+                    ]
+                )
+            if self.model_type == "multi_class_bottomup":
+                csv_log_keys.extend(
+                    [
+                        "train/confmaps_loss",
+                        "train/classmap_loss",
+                        "train/class_accuracy",
+                        "val/confmaps_loss",
+                        "val/classmap_loss",
+                        "val/class_accuracy",
+                    ]
+                )
+            if self.model_type == "multi_class_topdown":
+                csv_log_keys.extend(
+                    [
+                        "train/confmaps_loss",
+                        "train/classvector_loss",
+                        "train/class_accuracy",
+                        "val/confmaps_loss",
+                        "val/classvector_loss",
+                        "val/class_accuracy",
+                    ]
+                )
             csv_logger = CSVLoggerCallback(
                 filepath=Path(self.config.trainer_config.ckpt_dir)
                 / self.config.trainer_config.run_name

{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sleap-nn
-Version: 0.1.0a3
+Version: 0.1.0a4
 Summary: Neural network backend for training and inference for animal pose estimation.
 Author-email: Divya Seshadri Murali <dimurali@salk.edu>, Elizabeth Berrigan <eberrigan@salk.edu>, Vincent Tu <vitu@ucsd.edu>, Liezl Maree <lmaree@salk.edu>, David Samy <davidasamy@gmail.com>, Talmo Pereira <talmo@salk.edu>
 License: BSD-3-Clause
@@ -32,6 +32,7 @@ Requires-Dist: hydra-core
 Requires-Dist: jupyter
 Requires-Dist: jupyterlab
 Requires-Dist: pyzmq
+Requires-Dist: rich-click>=1.9.5
 Provides-Extra: torch
 Requires-Dist: torch; extra == "torch"
 Requires-Dist: torchvision>=0.20.0; extra == "torch"

{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 sleap_nn/.DS_Store,sha256=HY8amA79eHkt7o5VUiNsMxkc9YwW6WIPyZbYRj_JdSU,6148
-sleap_nn/__init__.py,sha256=rBA8WZU2_28NOBxfsqkSeA1hMG4pKEPiB2_yZYmhCIg,1362
-sleap_nn/cli.py,sha256=O_edfRK6imWE8KFECQ-Ux0sqlLLbsyQYQjTLtQ63ry4,22295
+sleap_nn/__init__.py,sha256=W6NBNc9X-Rt5XX9EQSOZ0X2wfj-G4dWlPfkwt-wCUqw,1362
+sleap_nn/cli.py,sha256=NFwxO3Fby_RgRroF7dNXsvWnBeILuVEylaMsLCLUMlY,24792
 sleap_nn/evaluation.py,sha256=SRO3qNOyyGoNBLLA2OKIUhvwyk0oI2ax1rtYmccx6m0,33785
 sleap_nn/legacy_models.py,sha256=8aGK30DZv3pW2IKDBEWH1G2mrytjaxPQD4miPUehj0M,20258
-sleap_nn/predict.py,sha256=B0YhCdJ35TLOydpVvc3gHdkN45pf_ruhi7jZ_6Rrfqs,37089
+sleap_nn/predict.py,sha256=tN3vuP_fGCme7fLXd2b9DvItSr_pemzw8FUtIbkkU_U,36513
 sleap_nn/system_info.py,sha256=7tWe3y6s872nDbrZoHIdSs-w4w46Z4dEV2qCV-Fe7No,14711
 sleap_nn/train.py,sha256=PEaK2B0S7DoImf8vt2cvJQS-n2NBw_pUJHmXy0J4NT0,30712
 sleap_nn/architectures/__init__.py,sha256=w0XxQcx-CYyooszzvxRkKWiJkUg-26IlwQoGna8gn40,46
 sleap_nn/architectures/common.py,sha256=MLv-zdHsWL5Q2ct_Wv6SQbRS-5hrFtjK_pvBEfwx-vU,3660
-sleap_nn/architectures/convnext.py,sha256=l9lMJDxIMb-9MI3ShOtVwbOUMuwOLtSQlxiVyYHqjvE,13953
-sleap_nn/architectures/encoder_decoder.py,sha256=f3DUFJo6RrIUposdC3Ytyblr5J0tAeZ_si9dm_m_PhM,28339
+sleap_nn/architectures/convnext.py,sha256=Ba9SFQHBdfz8gcMYZPMItuW-FyQuHBgUU0M8MWhaHuY,14210
+sleap_nn/architectures/encoder_decoder.py,sha256=1cBk9WU0zkXC2aK9XZy6VKHEe2hJEpIa-rwCxNgObZg,29292
 sleap_nn/architectures/heads.py,sha256=5E-7kQ-b2gsL0EviQ8z3KS1DAAMT4F2ZnEzx7eSG5gg,21001
 sleap_nn/architectures/model.py,sha256=1_dsP_4T9fsEVJjDt3er0haMKtbeM6w6JC6tc2jD0Gw,7139
-sleap_nn/architectures/swint.py,sha256=S66Wd0j8Hp-rGlv1C60WSw3AwGyAyGetgfwpL0nIK_M,14687
+sleap_nn/architectures/swint.py,sha256=hlShh1Br0eTijir2U3np8sAaNJa12Xny0VzPx8HSaRo,15060
 sleap_nn/architectures/unet.py,sha256=rAy2Omi6tv1MNW2nBn0Tw-94Nw_-1wFfCT3-IUyPcgo,11723
 sleap_nn/architectures/utils.py,sha256=L0KVs0gbtG8U75Sl40oH_r_w2ySawh3oQPqIGi54HGo,2171
 sleap_nn/config/__init__.py,sha256=l0xV1uJsGJfMPfWAqlUR7Ivu4cSCWsP-3Y9ueyPESuk,42
@@ -58,9 +58,9 @@ sleap_nn/inference/__init__.py,sha256=eVkCmKrxHlDFJIlZTf8B5XEOcSyw-gPQymXMY5uShO
 sleap_nn/inference/bottomup.py,sha256=3s90aRlpIcRnSNe-R5-qiuX3S48kCWMpCl8YuNnTEDI,17084
 sleap_nn/inference/identity.py,sha256=GjNDL9MfGqNyQaK4AE8JQCAE8gpMuE_Y-3r3Gpa53CE,6540
 sleap_nn/inference/paf_grouping.py,sha256=7Fo9lCAj-zcHgv5rI5LIMYGcixCGNt_ZbSNs8Dik7l8,69973
-sleap_nn/inference/peak_finding.py,sha256=L9LdYKt_Bfw7cxo6xEpgF8wXcZAwq5plCfmKJ839N40,13014
+sleap_nn/inference/peak_finding.py,sha256=l6PKGw7KiVxzd00cesUZsbttPfjP1NBy8WmxWQtBlak,14595
 sleap_nn/inference/postprocessing.py,sha256=ZM_OH7_WIprieaujZ2Rk_34JhSDDzCry6Pq2YM_u5sg,8998
-sleap_nn/inference/predictors.py,sha256=U114RlgOXKGm5iz1lnTfE3aN9S0WCh6gWhVP3KVewfc,158046
+sleap_nn/inference/predictors.py,sha256=xZyuH2bmsj_NAXcaswDFWqqmYS57v4QtZIWdsFqb3Sc,160709
 sleap_nn/inference/provenance.py,sha256=0BekXyvpLMb0Vv6DjpctlLduG9RN-Q8jt5zDm783eZE,11204
 sleap_nn/inference/single_instance.py,sha256=rOns_5TsJ1rb-lwmHG3ZY-pOhXGN2D-SfW9RmBxxzcI,4089
 sleap_nn/inference/topdown.py,sha256=Ha0Nwx-XCH_rebIuIGhP0qW68QpjLB3XRr9rxt05JLs,35108
@@ -73,14 +73,14 @@ sleap_nn/tracking/candidates/__init__.py,sha256=1O7NObIwshM7j1rLHmImbFphvkM9wY1j
 sleap_nn/tracking/candidates/fixed_window.py,sha256=D80KMlTnenuQveQVVhk9j0G8yx6K324C7nMLHgG76e0,6296
 sleap_nn/tracking/candidates/local_queues.py,sha256=Nx3R5wwEwq0gbfH-fi3oOumfkQo8_sYe5GN47pD9Be8,7305
 sleap_nn/training/__init__.py,sha256=vNTKsIJPZHJwFSKn5PmjiiRJunR_9e7y4_v0S6rdF8U,32
-sleap_nn/training/callbacks.py,sha256=7FvYLLoOxbjIu9v9zjDmzHZC4TW6y_P-N9J9GliHlSc,37944
+sleap_nn/training/callbacks.py,sha256=7WRT2pmQQ-hRdq9n7iHC_e0zH-vDphYfe0KHdD-UGg4,38216
 sleap_nn/training/lightning_modules.py,sha256=z98NBTrNy-GfCw4zatummJhVUO1fdjv_kPweAKcoaXc,108394
 sleap_nn/training/losses.py,sha256=gbdinUURh4QUzjmNd2UJpt4FXwecqKy9gHr65JZ1bZk,1632
-sleap_nn/training/model_trainer.py,sha256=LHBIpjcI6WCJUxAHFKA-DSesNT3Nk4-RhSKlDaP4Fz4,58715
+sleap_nn/training/model_trainer.py,sha256=okXTouoXzRcHcflRCdwR3NwUwSdX-ex1-rZOZHYCZLk,59964
 sleap_nn/training/utils.py,sha256=ivdkZEI0DkTCm6NPszsaDOh9jSfozkONZdl6TvvQUWI,20398
-sleap_nn-0.1.0a3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-sleap_nn-0.1.0a3.dist-info/METADATA,sha256=nx3yU6SNyPf_hNqCO1183zj3OEuxX3frrMqIr0MfNbo,6145
-sleap_nn-0.1.0a3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-sleap_nn-0.1.0a3.dist-info/entry_points.txt,sha256=zfl5Y3hidZxWBvo8qXvu5piJAXJ_l6v7xVFm0gNiUoI,46
-sleap_nn-0.1.0a3.dist-info/top_level.txt,sha256=Kz68iQ55K75LWgSeqz4V4SCMGeFFYH-KGBOyhQh3xZE,9
-sleap_nn-0.1.0a3.dist-info/RECORD,,
+sleap_nn-0.1.0a4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+sleap_nn-0.1.0a4.dist-info/METADATA,sha256=kA66dtTSVKAdFJcnvsSEMkrT3TRyGzHAcAsCIHzoqbE,6178
+sleap_nn-0.1.0a4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+sleap_nn-0.1.0a4.dist-info/entry_points.txt,sha256=zfl5Y3hidZxWBvo8qXvu5piJAXJ_l6v7xVFm0gNiUoI,46
+sleap_nn-0.1.0a4.dist-info/top_level.txt,sha256=Kz68iQ55K75LWgSeqz4V4SCMGeFFYH-KGBOyhQh3xZE,9
+sleap_nn-0.1.0a4.dist-info/RECORD,,

{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/WHEEL RENAMED Viewed

File without changes

{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{sleap_nn-0.1.0a3.dist-info → sleap_nn-0.1.0a4.dist-info}/top_level.txt RENAMED Viewed

File without changes

sleap-nn 0.1.0a3__py3-none-any.whl → 0.1.0a4__py3-none-any.whl

sleap-nn 0.1.0a3py3-none-any.whl → 0.1.0a4py3-none-any.whl