PyPI - jabs-core - Versions diffs - 0.1.0a1__tar.gz - Mend

jabs-core 0.1.0a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

jabs_core-0.1.0a1/PKG-INFO +43 -0
jabs_core-0.1.0a1/README.md +28 -0
jabs_core-0.1.0a1/pyproject.toml +44 -0
jabs_core-0.1.0a1/src/jabs/core/__init__.py +1 -0
jabs_core-0.1.0a1/src/jabs/core/abstract/__init__.py +7 -0
jabs_core-0.1.0a1/src/jabs/core/abstract/pose_est.py +421 -0
jabs_core-0.1.0a1/src/jabs/core/constants.py +15 -0
jabs_core-0.1.0a1/src/jabs/core/enums/__init__.py +12 -0
jabs_core-0.1.0a1/src/jabs/core/enums/classifier_types.py +9 -0
jabs_core-0.1.0a1/src/jabs/core/enums/cv_grouping.py +15 -0
jabs_core-0.1.0a1/src/jabs/core/enums/units.py +8 -0
jabs_core-0.1.0a1/src/jabs/core/exceptions.py +28 -0
jabs_core-0.1.0a1/src/jabs/core/utils/__init__.py +12 -0
jabs_core-0.1.0a1/src/jabs/core/utils/pose_util.py +36 -0
jabs_core-0.1.0a1/src/jabs/core/utils/process_pool_manager.py +223 -0
jabs_core-0.1.0a1/src/jabs/core/utils/sampleposeintervals.py +269 -0
jabs_core-0.1.0a1/src/jabs/core/utils/update_checker.py +54 -0
jabs_core-0.1.0a1/src/jabs/core/utils/utilities.py +64 -0

jabs_core-0.1.0a1/PKG-INFO ADDED Viewed

@@ -0,0 +1,43 @@
+Metadata-Version: 2.3
+Name: jabs-core
+Version: 0.1.0a1
+Summary: Add your description here
+Requires-Dist: packaging>=24.0
+Requires-Dist: toml>=0.10.2,<0.11.0
+Requires-Dist: h5py>=3.10.0,<4.0.0
+Requires-Dist: shapely>=2.0.1,<3.0.0
+Requires-Dist: numpy>=2.0.0,<3.0.0
+Requires-Dist: opencv-python-headless>=4.8.1.78,<5.0.0
+Requires-Python: >=3.10, <3.15
+Project-URL: Repository, https://github.com/KumarLabJax/JABS-behavior-classifier
+Project-URL: Issues, https://github.com/KumarLabJax/JABS-behavior-classifier/issues
+Description-Content-Type: text/markdown
+# JABS Core (`jabs-core`)
+The infrastructure and shared utility layer for the JABS.
+## Overview
+`jabs-core` provides low-level, domain-agnostic utilities used across all JABS packages.
+It is designed to be lightweight and free of heavy scientific dependencies (like
+`scikit-learn` or `pandas`), making it safe to import at any level of the hierarchy.
+## Responsibilities
+- **Shared Constants**: Global constants used for file compression and configuration.
+- **Exceptions**: Centralized exception hierarchy (`JabsError`, `PoseHashException`,
+  etc.).
+- **Infrastructure**: Base classes for registries and plugin discovery systems.
+- **Abstract Bases**: High-level interface definitions (e.g., the `PoseEstimation`
+  abstract base).
+- **Utility Functions**: Generic helpers for file hashing, logging configuration, and
+  basic string/path manipulation.
+## Package Structure
+- `jabs.core.constants`: Global constants.
+- `jabs.core.exceptions`: Shared exception classes.
+- `jabs.core.abstract`: Abstract base classes for the system.
+- `jabs.core.utils`: Generic utility functions.
+- `jabs.core.enums`: Shared enumerations (e.g., `ClassifierType`).

jabs_core-0.1.0a1/README.md ADDED Viewed

@@ -0,0 +1,28 @@
+# JABS Core (`jabs-core`)
+The infrastructure and shared utility layer for the JABS.
+## Overview
+`jabs-core` provides low-level, domain-agnostic utilities used across all JABS packages.
+It is designed to be lightweight and free of heavy scientific dependencies (like
+`scikit-learn` or `pandas`), making it safe to import at any level of the hierarchy.
+## Responsibilities
+- **Shared Constants**: Global constants used for file compression and configuration.
+- **Exceptions**: Centralized exception hierarchy (`JabsError`, `PoseHashException`,
+  etc.).
+- **Infrastructure**: Base classes for registries and plugin discovery systems.
+- **Abstract Bases**: High-level interface definitions (e.g., the `PoseEstimation`
+  abstract base).
+- **Utility Functions**: Generic helpers for file hashing, logging configuration, and
+  basic string/path manipulation.
+## Package Structure
+- `jabs.core.constants`: Global constants.
+- `jabs.core.exceptions`: Shared exception classes.
+- `jabs.core.abstract`: Abstract base classes for the system.
+- `jabs.core.utils`: Generic utility functions.
+- `jabs.core.enums`: Shared enumerations (e.g., `ClassifierType`).

jabs_core-0.1.0a1/pyproject.toml ADDED Viewed

@@ -0,0 +1,44 @@
+[project]
+name = "jabs-core"
+version = "0.1.0a1"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10,<3.15"
+dependencies = [
+  "packaging>=24.0",
+  "toml>=0.10.2,<0.11.0",
+  "h5py>=3.10.0,<4.0.0",
+  "shapely>=2.0.1,<3.0.0",
+  "numpy>=2.0.0,<3.0.0",
+  "opencv-python-headless>=4.8.1.78,<5.0.0",
+]
+[dependency-groups]
+dev = [
+  {include-group = "lint"},
+  {include-group = "test"},
+  {include-group = "docs"},
+  "pre-commit>=4.2.0,<5.0.0",
+  "matplotlib>=3.9.3,<4.0.0",
+]
+test = [
+  "pytest>=8.3.4,<9.0.0",
+  "pytest-cov>=7.0.0",
+]
+lint = [
+  "ruff>=0.11.5,<0.12.0",
+]
+docs = [
+  "mkdocs>=1.6.1",
+]
+[project.urls]
+Repository = "https://github.com/KumarLabJax/JABS-behavior-classifier"
+Issues = "https://github.com/KumarLabJax/JABS-behavior-classifier/issues"
+[build-system]
+requires = ["uv_build>=0.9.26,<0.10.0"]
+build-backend = "uv_build"
+[tool.uv.build-backend]
+module-name = "jabs.core"

jabs_core-0.1.0a1/src/jabs/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """The root of the jabs.core package."""

jabs_core-0.1.0a1/src/jabs/core/abstract/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""JABS Abstract Base Classes"""
+from .pose_est import PoseEstimation
+__all__ = [
+    "PoseEstimation",
+]

jabs_core-0.1.0a1/src/jabs/core/abstract/pose_est.py ADDED Viewed

@@ -0,0 +1,421 @@
+import enum
+import logging
+from abc import ABC, abstractmethod
+from pathlib import Path
+import h5py
+import joblib
+import numpy as np
+from shapely.geometry import MultiPoint
+from jabs.core.utils import hash_file
+MINIMUM_CONFIDENCE = 0.3
+class PoseEstimation(ABC):
+    """Abstract base class for pose estimation data handlers.
+    Provides a common interface for loading, accessing, and processing pose data
+    from HDF5 files. Defines methods for retrieving keypoints, confidence masks, identity
+    presence, and static objects, as well as utilities for geometric computations such as
+    convex hulls and bearing angles. All pose estimation versioned classes should inherit
+    from this base class.
+    Args:
+        file_path (Path): Path to the pose HDF5 file.
+        cache_dir (Path | None): Optional cache directory for intermediate data.
+        fps (int): Frames per second for the video.
+    Abstract Methods:
+        get_points(frame_index, identity, scale): Get points and mask for an identity in a frame.
+        get_identity_poses(identity, scale): Get all points and masks for an identity.
+        get_identity_point_mask(identity): Get the point mask array for a given identity.
+        identity_mask(identity): Get the identity mask for a given identity.
+        identity_to_track: Get the identity-to-track mapping for this file.
+        format_major_version: Returns the major version of the pose file format.
+    Methods:
+        get_identity_convex_hulls(identity): Get convex hulls for an identity across frames.
+        compute_bearing(points): Compute the bearing angle for a single frame.
+        compute_all_bearings(identity): Compute bearing angles for all frames of an identity.
+        get_pose_file_attributes(path): Static method to get HDF5 file attributes.
+    Properties:
+        num_frames (int): Number of frames.
+        identities (list): List of identities.
+        num_identities (int): Number of identities.
+        cm_per_pixel (float | None): Centimeters per pixel.
+        fps (int): Frames per second.
+        pose_file (Path): Path to the pose file.
+        hash (str): Hash of the pose file.
+        static_objects (dict): Static objects in the pose file.
+        num_lixit_keypoints (int): Number of lixit keypoints (default 0).
+        external_identities (list[int] | None): Mapping to external identities.
+    """
+    class KeypointIndex(enum.IntEnum):
+        """enum defining the 12 keypoint indexes"""
+        NOSE = 0
+        LEFT_EAR = 1
+        RIGHT_EAR = 2
+        BASE_NECK = 3
+        LEFT_FRONT_PAW = 4
+        RIGHT_FRONT_PAW = 5
+        CENTER_SPINE = 6
+        LEFT_REAR_PAW = 7
+        RIGHT_REAR_PAW = 8
+        BASE_TAIL = 9
+        MID_TAIL = 10
+        TIP_TAIL = 11
+    # Connected segments to use when full 12 keypoints are available.
+    FULL_CONNECTED_SEGMENTS = (
+        (
+            KeypointIndex.LEFT_FRONT_PAW,
+            KeypointIndex.CENTER_SPINE,
+            KeypointIndex.RIGHT_FRONT_PAW,
+        ),
+        (
+            KeypointIndex.LEFT_REAR_PAW,
+            KeypointIndex.BASE_TAIL,
+            KeypointIndex.RIGHT_REAR_PAW,
+        ),
+        (
+            KeypointIndex.NOSE,
+            KeypointIndex.BASE_NECK,
+            KeypointIndex.CENTER_SPINE,
+            KeypointIndex.BASE_TAIL,
+            KeypointIndex.MID_TAIL,
+            KeypointIndex.TIP_TAIL,
+        ),
+    )
+    # Pose based on the Envision Hydra model will have fewer keypoints,
+    # so we adjust the connected segments accordingly.
+    NVSN_CONNECTED_SEGMENTS = (
+        (
+            KeypointIndex.LEFT_EAR,
+            KeypointIndex.NOSE,
+            KeypointIndex.RIGHT_EAR,
+        ),
+        (
+            KeypointIndex.NOSE,
+            KeypointIndex.BASE_TAIL,
+            KeypointIndex.TIP_TAIL,
+        ),
+    )
+    _CACHE_FILE_VERSION = 1
+    def __init__(self, file_path: Path, cache_dir: Path | None = None, fps: int = 30):
+        """initialize new object from h5 file
+        Args:
+            file_path: path to pose_est_v2.h5 file
+            cache_dir: optional cache directory, used to cache convex
+                hulls
+            fps: frames per second, used for scaling time series
+                features
+        for faster loading
+        from "per frame" to "per second"
+        """
+        super().__init__()
+        self._num_frames = 0
+        self._identities = []
+        self._external_identities: list[str] | None = None
+        self._convex_hull_cache = {}
+        self._path = file_path
+        self._cache_dir = cache_dir
+        self._cm_per_pixel = None
+        self._hash = hash_file(file_path)
+        self._fps = fps
+        self._static_objects = {}
+        # check cache version, if it doesn't match, clear the cache file for this pose file
+        if self._cache_dir is not None and not self.check_cache_version():
+            cache_file = self._cache_file_path()
+            if cache_file and cache_file.exists():
+                try:
+                    cache_file.unlink()
+                except Exception:
+                    logging.warning("Unable to delete old cache file %s", cache_file)
+                    pass
+    @property
+    def num_frames(self) -> int:
+        """return the number of frames in the pose_est file"""
+        return self._num_frames
+    @property
+    def identities(self):
+        """return list of integer identities generated from file"""
+        return self._identities
+    @property
+    def num_identities(self) -> int:
+        """get the number of identities in the pose file"""
+        return len(self._identities)
+    @property
+    def cm_per_pixel(self):
+        """get centimeters per pixel for video/pose"""
+        return self._cm_per_pixel
+    @property
+    def fps(self):
+        """get frames per second"""
+        return self._fps
+    @property
+    def pose_file(self):
+        """get the path to the pose file"""
+        return self._path
+    @property
+    def hash(self):
+        """get the hash of the pose file"""
+        return self._hash
+    @abstractmethod
+    def get_points(self, frame_index: int, identity: int, scale: float | None = None):
+        """return points and point masks for an individual frame
+        Args:
+            frame_index: frame index of points and masks to be returned
+            identity: identity to return points for
+            scale: optional scale factor, set to cm_per_pixel to convert
+                poses from pixel coordinates to cm coordinates
+        Returns:
+            numpy array of points (12,2), numpy array of point masks (12,)
+        """
+        pass
+    @abstractmethod
+    def get_identity_poses(self, identity: int, scale: float | None = None):
+        """return all points and point masks
+        Args:
+            identity: identity to return points for
+            scale: optional scale factor, set to cm_per_pixel to convert
+                poses from pixel coordinates to cm coordinates
+        Returns:
+            numpy array of points (#frames, 12, 2), numpy array of point masks (#frames, 12)
+        """
+        pass
+    @abstractmethod
+    def get_identity_point_mask(self, identity):
+        """get the point mask array for a given identity
+        Args:
+            identity: identity to return point mask for
+        Returns:
+            array of point masks (#frames, 12)
+        """
+        pass
+    @abstractmethod
+    def get_reduced_point_mask(self):
+        """Returns a boolean array of length 12 indicating which keypoints are valid.
+        Determines which keypoints are valid for any identity across all frames.
+        Returns:
+            numpy array of shape (12,) with boolean values indicating validity
+            of each keypoint.
+        """
+        pass
+    def get_connected_segments(self):
+        """Get the segments to use for rendering connections between the keypoints
+        Returns:
+            list of tuples, where each tuple contains the indexes of the keypoints
+            that form a connected segment
+        """
+        return PoseEstimation.FULL_CONNECTED_SEGMENTS
+    @abstractmethod
+    def identity_mask(self, identity):
+        """get the identity mask (indicates if specified identity is present in each frame)
+        Args:
+            identity: identity to get masks for
+        Returns:
+            numpy array of size (#frames,)
+        """
+        pass
+    @property
+    @abstractmethod
+    def identity_to_track(self):
+        """get the identity to track mapping for this file"""
+        pass
+    @property
+    @abstractmethod
+    def format_major_version(self):
+        """an integer giving the major version of the format"""
+        pass
+    @property
+    def static_objects(self):
+        """get static objects from the pose file"""
+        return self._static_objects
+    def get_identity_convex_hulls(self, identity):
+        """get a list of length #frames containing convex hulls for the given identity.
+        The convex hulls are calculated using all valid points except for the
+        middle of tail and tip of tail points.
+        Args:
+            identity: identity to return points for
+        Returns:
+            the convex hulls in pixel units (array elements will be None
+            if there is no valid convex hull for that frame)
+        """
+        if identity in self._convex_hull_cache:
+            return self._convex_hull_cache[identity]
+        else:
+            convex_hulls = None
+            path = None
+            if self._cache_dir is not None:
+                path = (
+                    self._cache_dir
+                    / "convex_hulls"
+                    / self._path.with_suffix("").name
+                    / f"convex_hulls_{identity}.pickle"
+                )
+                path.parents[0].mkdir(mode=0o775, parents=True, exist_ok=True)
+                try:
+                    with path.open("rb") as f:
+                        convex_hulls = joblib.load(f)
+                except Exception:
+                    # we weren't able to read in the cached convex hulls,
+                    # just ignore the exception and we'll generate them
+                    pass
+            if convex_hulls is None:
+                points, point_masks = self.get_identity_poses(identity)
+                # Omit tail from convex hull
+                body_points = points[:, :-2, :]
+                body_point_masks = point_masks[:, :-2]
+                convex_hulls = []
+                for frame_index in range(self.num_frames):
+                    if sum(body_point_masks[frame_index, :]) >= 3:
+                        filtered_points = body_points[
+                            frame_index, body_point_masks[frame_index, :] == 1, :
+                        ]
+                        convex_hulls.append(MultiPoint(filtered_points).convex_hull)
+                    else:
+                        convex_hulls.append(None)
+                if path:
+                    with path.open("wb") as f:
+                        joblib.dump(convex_hulls, f)
+            self._convex_hull_cache[identity] = convex_hulls
+            return convex_hulls
+    def compute_bearing(self, points: np.ndarray, use_nose: bool = False):
+        """compute the bearing of the animal using base tail and base neck keypoints
+        Args:
+            points (np.ndarray): the points for a single frame (12,2) array
+            use_nose (bool): use nose keypoint instead of base neck, used when
+              we have a reduced keypoint pose that lacks base neck
+        """
+        # fall back to use nose instead of base neck if base neck is absent from this pose file
+        # (for example, 5 keypoint pose instead of 12)
+        if use_nose:
+            p1_xy = points[self.KeypointIndex.NOSE.value].astype(np.float32)
+        else:
+            p1_xy = points[self.KeypointIndex.BASE_NECK.value].astype(np.float32)
+        p2_xy = points[self.KeypointIndex.BASE_TAIL.value].astype(np.float32)
+        offset_xy = p1_xy - p2_xy
+        angle_rad = np.arctan2(offset_xy[1], offset_xy[0])
+        return np.degrees(angle_rad)
+    def compute_all_bearings(self, identity):
+        """compute the bearing for each frame for a given identity"""
+        use_nose = not self.get_reduced_point_mask()[self.KeypointIndex.BASE_NECK.value]
+        if use_nose:
+            logging.warning("Falling back to using nose keypoint for bearing computation")
+        bearings = np.full(self.num_frames, np.nan, dtype=np.float32)
+        for i in range(self.num_frames):
+            points, mask = self.get_points(i, identity)
+            if points is not None:
+                bearings[i] = self.compute_bearing(points, use_nose)
+        return bearings
+    @staticmethod
+    def get_pose_file_attributes(path: Path) -> dict:
+        """get the attributes from the pose file's hdf5 file"""
+        with h5py.File(path, "r") as pose_h5:
+            attrs = dict(pose_h5.attrs)
+            attrs["poseest"] = dict(pose_h5["poseest"].attrs)
+            return attrs
+    @property
+    def num_lixit_keypoints(self) -> int:
+        """get the number of lixit keypoints
+        always 0 for pose file versions <5
+        """
+        return 0
+    @property
+    def external_identities(self) -> list[str] | None:
+        """get the jabs identity to external identity mapping"""
+        return self._external_identities
+    def identity_index_to_display(self, identity_index: int) -> str:
+        """Convert an identity index to a display string.
+        Args:
+            identity_index (int): The identity index to convert.
+        Returns:
+            str: The display string for the identity.
+        """
+        if self.external_identities and 0 <= identity_index < len(self.external_identities):
+            return self.external_identities[identity_index]
+        return str(identity_index)
+    def check_cache_version(self) -> bool:
+        """Check if the cache version matches the expected version.
+        Returns:
+            bool: True if the cache version matches, False otherwise.
+        """
+        try:
+            with h5py.File(self._cache_file_path(), "r") as cache_h5:
+                cache_version = cache_h5.attrs.get("cache_file_version", None)
+                return cache_version == self._CACHE_FILE_VERSION
+        except Exception:
+            return False
+    def _cache_file_path(self) -> Path | None:
+        """Get the path to the cache file for this pose file.
+        Returns:
+            Path | None: The path to the cache file, or None if no cache directory is set.
+        """
+        if self._cache_dir is None:
+            return None
+        filename = self._path.name.replace(".h5", "_cache.h5")
+        return self._cache_dir / filename

jabs_core-0.1.0a1/src/jabs/core/constants.py ADDED Viewed

@@ -0,0 +1,15 @@
+ORG_NAME = "JAX"
+APP_NAME = "JABS"
+APP_NAME_LONG = f"{ORG_NAME} Animal Behavior System"
+# a hard coded random seed used for the final training
+# This is not used during cross-validation, but to ensure that final classifier is reproducible
+# we use this fixed seed when training the final model after cross validation.
+FINAL_TRAIN_SEED = 0xAB3BDB
+# some defaults for compressing hdf5 output
+COMPRESSION = "gzip"
+COMPRESSION_OPTS_DEFAULT = 6
+# settings keys for project settings stored in the project.json file
+CV_GROUPING_KEY = "cv_grouping"

jabs_core-0.1.0a1/src/jabs/core/enums/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""Module for defining enums used in JABS"""
+from .classifier_types import ClassifierType
+from .cv_grouping import DEFAULT_CV_GROUPING_STRATEGY, CrossValidationGroupingStrategy
+from .units import ProjectDistanceUnit
+__all__ = [
+    "DEFAULT_CV_GROUPING_STRATEGY",
+    "ClassifierType",
+    "CrossValidationGroupingStrategy",
+    "ProjectDistanceUnit",
+]

jabs_core-0.1.0a1/src/jabs/core/enums/classifier_types.py ADDED Viewed

@@ -0,0 +1,9 @@
+from enum import Enum
+class ClassifierType(str, Enum):
+    """Classifier type for the project."""
+    RANDOM_FOREST = "Random Forest"
+    CATBOOST = "CatBoost"
+    XGBOOST = "XGBoost"

jabs_core-0.1.0a1/src/jabs/core/enums/cv_grouping.py ADDED Viewed

@@ -0,0 +1,15 @@
+from enum import Enum
+class CrossValidationGroupingStrategy(str, Enum):
+    """Cross-validation grouping type for the project.
+    Inheriting from str allows for easy serialization to/from JSON (the enum will
+    automatically be serialized using the enum value).
+    """
+    INDIVIDUAL = "Individual Animal"
+    VIDEO = "Video"
+DEFAULT_CV_GROUPING_STRATEGY = CrossValidationGroupingStrategy.INDIVIDUAL

jabs_core-0.1.0a1/src/jabs/core/enums/units.py ADDED Viewed

@@ -0,0 +1,8 @@
+import enum
+class ProjectDistanceUnit(enum.IntEnum):
+    """Distance unit for the project."""
+    PIXEL = 0
+    CM = 1

jabs_core-0.1.0a1/src/jabs/core/exceptions.py ADDED Viewed

@@ -0,0 +1,28 @@
+class PoseHashException(Exception):
+    """Exception raised when the hash of a pose file does not match the expected value."""
+    pass
+class PoseIdEmbeddingException(Exception):
+    """Exception raised for invalid instance_embed_id values in pose file."""
+    pass
+class MissingBehaviorError(Exception):
+    """Exception raised when a behavior is not found in the prediction file."""
+    pass
+class FeatureVersionException(Exception):
+    """exception raised when the version of the features in the h5 file is not compatible with the current version of JABS"""
+    pass
+class DistanceScaleException(Exception):
+    """exception raised when the distance scale factor in the h5 file don't match what the classifier expects"""
+    pass

jabs_core-0.1.0a1/src/jabs/core/utils/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""JABS utilities"""
+from .update_checker import check_for_update, is_pypi_install
+from .utilities import get_bool_env_var, hash_file, hide_stderr
+__all__ = [
+    "check_for_update",
+    "get_bool_env_var",
+    "hash_file",
+    "hide_stderr",
+    "is_pypi_install",
+]

jabs_core-0.1.0a1/src/jabs/core/utils/pose_util.py ADDED Viewed

@@ -0,0 +1,36 @@
+from collections.abc import Generator, Iterable
+import numpy as np
+from jabs.core.abstract import PoseEstimation
+def gen_line_fragments(
+    connected_segments: Iterable[Iterable[PoseEstimation.KeypointIndex]],
+    exclude_points: np.ndarray,
+) -> Generator[list[int], None, None]:
+    """generate line fragments from the connected segments.
+    This will break up segments if a point within the segment is excluded,
+    or will remove the segment completely if it does not have at least two points
+    Args:
+        connected_segments: Iterable of Iterables of KeypointIndex, where each inner
+            Iterable represents a segment of connected keypoints
+        exclude_points: numpy array of points to exclude when generating segments
+    Yields:
+        yields lists of Keypoint indexes that make up the segments to draw
+    """
+    curr_fragment = []
+    for curr_pt_indexes in connected_segments:
+        for curr_pt_index in curr_pt_indexes:
+            if curr_pt_index.value in exclude_points:
+                if len(curr_fragment) >= 2:
+                    yield curr_fragment
+                curr_fragment = []
+            else:
+                curr_fragment.append(curr_pt_index.value)
+        if len(curr_fragment) >= 2:
+            yield curr_fragment
+        curr_fragment = []

jabs_core-0.1.0a1/src/jabs/core/utils/process_pool_manager.py ADDED Viewed

@@ -0,0 +1,223 @@
+import contextlib
+import logging
+import os
+import threading
+import time
+from collections.abc import Callable, Iterable
+from concurrent.futures import Future, ProcessPoolExecutor
+from multiprocessing import shared_memory
+from typing import Any
+logger = logging.getLogger(__name__)
+MAX_POOL_WORKERS = 6
+def _noop() -> None:
+    """No-op function for warming up worker processes.
+    Must be at module level to be pickleable by ProcessPoolExecutor.
+    """
+    return None
+class ProcessPoolManager:
+    """
+    Manage a shared ProcessPoolExecutor with warm-up and safe shutdown.
+    Attributes:
+        _max_workers (int | None): Maximum number of worker processes. Passed to
+            ProcessPoolExecutor when created.
+        _initializer (Callable[..., object] | None): Optional function executed in
+            each worker process when it starts.
+        _initargs (tuple[object, ...]): Arguments passed to the initializer.
+        _name (str): Logical name for debugging/logging.
+        _executor (ProcessPoolExecutor | None): The lazily-created underlying
+            process pool. None until first use.
+        _lock (threading.RLock): Protects access to `_executor` and `_is_shutdown`.
+        _is_shutdown (bool): Whether shutdown() has been called. Prevents reuse once
+            the pool has been shut down.
+    Args:
+        max_workers (int | None): Maximum number of worker processes. Defaults to
+            os.cpu_count() if None.
+        initializer (Callable | None): Optional function run in each worker process
+            when it starts.
+        initargs (tuple): Arguments passed to the initializer.
+        name (str): Optional name used only for debugging/logging.
+    """
+    def __init__(
+        self,
+        max_workers: int | None = None,
+        *,
+        initializer: Callable[..., object] | None = None,
+        initargs: tuple[object, ...] = (),
+        name: str = "ProcessPoolManager",
+    ) -> None:
+        logger.debug(f"PPM __init__ name={name} id={id(self)}")
+        requested_workers = max_workers or (os.cpu_count() or 1)
+        self._max_workers: int = max(1, min(requested_workers, MAX_POOL_WORKERS))
+        self._initializer = initializer
+        self._initargs = initargs
+        self._name = name
+        self._executor: ProcessPoolExecutor | None = None
+        self._lock = threading.RLock()  # protects _executor and _is_shutdown
+        self._is_shutdown = False
+        self._cancel_shm: shared_memory.SharedMemory | None = None
+    @property
+    def max_workers(self) -> int:
+        """Maximum number of worker processes in the pool."""
+        return self._max_workers
+    @property
+    def name(self) -> str:
+        """Logical name of the ProcessPoolManager for debugging/logging."""
+        return self._name
+    def _ensure_cancel_shm(self) -> shared_memory.SharedMemory:
+        """Create the shared-memory cancel flag on first use, if not shut down."""
+        with self._lock:
+            if self._is_shutdown:
+                raise RuntimeError(f"{self._name} has been shut down")
+            if self._cancel_shm is None:
+                shm = shared_memory.SharedMemory(create=True, size=1)
+                # 0 = not cancelled, 1 = cancelled
+                shm.buf[0] = 0
+                self._cancel_shm = shm
+            return self._cancel_shm
+    @property
+    def cancel_flag_name(self) -> str | None:
+        """Name of the shared-memory cancel flag, or None if shut down.
+        Callers can pass this name to worker functions so they can open the
+        shared memory and cooperatively check for cancellation.
+        """
+        with self._lock:
+            if self._is_shutdown:
+                return None
+        shm = self._ensure_cancel_shm()
+        return shm.name
+    def set_cancelled(self) -> None:
+        """Set the cancel flag to 1, signalling cooperative cancellation."""
+        with self._lock:
+            if self._is_shutdown:
+                return
+            shm = self._cancel_shm or self._ensure_cancel_shm()
+            shm.buf[0] = 1
+    def clear_cancelled(self) -> None:
+        """Reset the cancel flag back to 0."""
+        with self._lock:
+            if self._cancel_shm is not None:
+                self._cancel_shm.buf[0] = 0
+    def _ensure_executor(self) -> ProcessPoolExecutor:
+        """Create the executor on first use, if not shut down."""
+        with self._lock:
+            if self._is_shutdown:
+                raise RuntimeError(f"{self._name} has been shut down")
+            if self._executor is None:
+                # noinspection PyTypeChecker
+                self._executor = ProcessPoolExecutor(
+                    max_workers=self._max_workers,
+                    initializer=self._initializer,
+                    initargs=self._initargs,
+                )
+            return self._executor
+    def submit(self, fn: Callable[..., Any], *args: Any, **kwargs: Any) -> Future:
+        """Submit a task to the process pool."""
+        executor = self._ensure_executor()
+        return executor.submit(fn, *args, **kwargs)
+    def map(
+        self,
+        fn: Callable[[Any], Any],
+        iterable: Iterable[Any],
+        chunksize: int = 1,
+    ) -> Iterable[Any]:
+        """Map over an iterable using the process pool."""
+        executor = self._ensure_executor()
+        return executor.map(fn, iterable, chunksize=chunksize)
+    def warm_up(self, wait: bool = True) -> None:
+        """Eagerly start worker processes and optionally run trivial tasks.
+        This is useful if you want the cost of spawning processes and running
+        initializers to happen at a controlled time (e.g., on app startup)
+        instead of on the first real submit().
+        Args:
+            wait (bool): If True, submit and wait for trivial tasks to complete
+                in each worker process. This ensures that all workers are fully
+                initialized and ready to accept real tasks. If False, only starts
+                the processes without waiting for task completion.
+        """
+        start_time = time.time()
+        logger.debug(f"PPM warm_up name={self._name} id={id(self)}")
+        executor = self._ensure_executor()
+        self._ensure_cancel_shm()
+        if not wait:
+            return
+        futures = [executor.submit(_noop) for _ in range(self._max_workers)]
+        for f in futures:
+            with contextlib.suppress(Exception):
+                f.result()
+        elapsed = time.time() - start_time
+        logger.debug(
+            f"PPM warm_up name={self._name} id={id(self)} COMPLETED in {elapsed:.2f} seconds"
+        )
+    def shutdown(self, *, wait: bool = True, cancel_futures: bool = False) -> None:
+        """Explicitly shut down the process pool.
+        After shutdown, the manager cannot be reused.
+        """
+        with self._lock:
+            self._is_shutdown = True
+            executor = self._executor
+            if executor is not None:
+                with contextlib.suppress(Exception):
+                    executor.shutdown(wait=wait, cancel_futures=cancel_futures)
+                self._executor = None
+            if self._cancel_shm is not None:
+                with contextlib.suppress(Exception):
+                    self._cancel_shm.close()
+                    self._cancel_shm.unlink()
+                self._cancel_shm = None
+    def __enter__(self) -> "ProcessPoolManager":
+        """Enter context manager, returning self.
+        Allows using the manager in a 'with' statement for automatic cleanup.
+        """
+        self._ensure_executor()
+        return self
+    def __exit__(self, exc_type, exc, tb) -> None:
+        """Exit context manager, shutting down the process pool."""
+        self.shutdown(wait=True, cancel_futures=False)
+    def __del__(self) -> None:
+        """Best-effort cleanup if user code forgets to call shutdown().
+        Note: __del__ is not guaranteed to run at interpreter shutdown, so
+        you should still call shutdown() or use the manager as a context manager.
+        """
+        with contextlib.suppress(Exception):
+            self.shutdown(wait=False, cancel_futures=True)

jabs_core-0.1.0a1/src/jabs/core/utils/sampleposeintervals.py ADDED Viewed

@@ -0,0 +1,269 @@
+import argparse
+import os
+import random
+import cv2
+import h5py
+# Command line example of using this script:
+#
+#   share_root='/run/user/1000/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar'
+#   python src/utils/sampleposeintervals.py \
+#       --batch-file UCSD_Rotta_TS_v2.txt \
+#       --root-dir "${share_root}" \
+#       --out-dir UCSD_Rotta_TS_v2-intervals \
+#       --out-frame-count 9000 \
+#       --start-frame 54000 \
+#       --pose-version 3
+#
+#   share_root='/media/sheppk/TOSHIBA EXT/rotta-data/UCSD_Rotta_TS_v2-vidcache'
+#   python src/utils/sampleposeintervals.py \
+#       --batch-file "${share_root}/batch.txt" \
+#       --root-dir "${share_root}" \
+#       --out-dir UCSD_Rotta_TS_v2-intervals-2021-05-25 \
+#       --out-frame-count 9000 \
+#       --start-frame 27000 \
+#       --pose-version 3
+#   python src/utils/sampleposeintervals.py \
+#       --batch-file ~/projects/social-interaction/data/bxd-batch-early-morning-2021-06-09.txt \
+#       --root-dir '/run/user/1000/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar' \
+#       --out-dir bxd-batch-early-morning-2021-06-09 \
+#       --out-frame-count 9000 \
+#       --start-frame 54000 \
+#       --pose-version 3
+#   python src/utils/sampleposeintervals.py \
+#       --batch-file temp/B6J-and-BTBR-3M-strangers-4-day-rand-2021-05-24.txt \
+#       --root-dir '/media/sheppk/TOSHIBA EXT/rotta-data/B6J-and-BTBR-3M-strangers-4-day-rand-2021-05-24' \
+#       --out-dir B6J-and-BTBR-3M-strangers-4-day-rand-samples-2021-05-24 \
+#       --out-frame-count 3600 \
+#       --start-frame 6000 \
+#       --pose-version 3
+#   python src/utils/sampleposeintervals.py \
+#       --batch-file temp/B6J-and-BTBR-3M-strangers-4-day-rand-2021-05-24.txt \
+#       --root-dir '/media/sheppk/TOSHIBA EXT/rotta-data/B6J_and_BTBR_3M_stranger_4day_2021-07-20' \
+#       --out-dir temp/B6J-and-BTBR-3M-strangers-4-day-rand-samples-2021-08-05 \
+#       --out-frame-count 3600 \
+#       --start-frame 6000 \
+#       --only-pose \
+#       --pose-version 4
+#   rclone copy --transfers 4 --progress \
+#       --include-from /home/sheppk/projects/behavior-classifier/temp/BTBR_3M_stranger_4day-subset-avi.txt \
+#       "labdropbox:/KumarLab's shared workspace/VideoData/MDS_Tests/BTBR_3M_stranger_4day" \
+#       /media/sheppk/TOSHIBA\ EXT/BTBR_3M_stranger_4day-2021-08-24
+#   rclone copy --transfers 4 --progress \
+#       --include-from /home/sheppk/projects/behavior-classifier/temp/BTBR_3M_stranger_4day-subset-pose.txt \
+#       /home/sheppk/sshfs/winterproj/bgeuther/IdentityInfer/Data/BTBR_3M_stranger_4day \
+#       /media/sheppk/TOSHIBA\ EXT/BTBR_3M_stranger_4day-2021-08-24
+#   python src/utils/sampleposeintervals.py \
+#       --batch-file /media/sheppk/TOSHIBA\ EXT/BTBR_3M_stranger_4day-2021-08-24/batch.txt \
+#       --root-dir /media/sheppk/TOSHIBA\ EXT/BTBR_3M_stranger_4day-2021-08-24 \
+#       --out-dir /media/sheppk/TOSHIBA\ EXT/BTBR_3M_stranger_4day-2021-08-24-samples \
+#       --out-frame-count 3600 \
+#       --start-frame 6000 \
+#       --pose-version 4
+def main():
+    """sample pose intervals"""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--batch-file",
+        help="path to the file that is a new-line separated list of all videos to process",
+        required=True,
+    )
+    parser.add_argument(
+        "--root-dir",
+        help="the root directory. All paths given in the batch files are relative to this root",
+        required=True,
+    )
+    parser.add_argument(
+        "--out-dir",
+        help="output directory. The videos and pose files for sampled intervals are saved to this dir",
+        required=True,
+    )
+    parser.add_argument(
+        "--out-frame-count",
+        help="this defines how many frames to save. Assuming 30fps a value of 1800 corresponds to one minute",
+        required=True,
+        type=int,
+    )
+    parser.add_argument(
+        "--start-frame",
+        help="this argument specifies which frame we start at. If this option is not specified we randomly select"
+        " a start frame from the video.",
+        required=False,
+        type=int,
+    )
+    parser.add_argument(
+        "--pose-version",
+        help="give the integer version number that should be used for pose",
+        default=2,
+        type=int,
+        choices=(2, 3, 4, 5),
+    )
+    parser.add_argument(
+        "--only-pose",
+        help="if specified this option will sample pose data and exclude video from output",
+        action="store_true",
+    )
+    args = parser.parse_args()
+    if args.pose_version == 2:
+        pose_suffix = "_pose_est_v2.h5"
+    elif args.pose_version == 3:
+        pose_suffix = "_pose_est_v3.h5"
+    elif args.pose_version == 4:
+        pose_suffix = "_pose_est_v4.h5"
+    elif args.pose_version == 5:
+        pose_suffix = "_pose_est_v5.h5"
+    else:
+        raise NotImplementedError("pose version not implemented: " + str(args.pose_version))
+    os.makedirs(args.out_dir, exist_ok=True)
+    with open(args.batch_file) as batch_file:
+        for line in batch_file:
+            vid_filename = line.strip()
+            if vid_filename:
+                print("Processing:", vid_filename)
+                vid_path = os.path.join(args.root_dir, vid_filename)
+                vid_path_root, _ = os.path.splitext(vid_path)
+                pose_in_path = vid_path_root + pose_suffix
+                if not args.only_pose and not os.path.isfile(vid_path):
+                    print("WARNING: missing video path:", vid_path)
+                    continue
+                if not os.path.isfile(pose_in_path):
+                    print("WARNING: missing pose path:", pose_in_path)
+                    continue
+                with h5py.File(pose_in_path, "r") as pose_in:
+                    frame_count = pose_in["poseest"]["confidence"].shape[0]
+                    last_candidate_frame = frame_count - args.out_frame_count
+                    if last_candidate_frame <= 0:
+                        print(
+                            f"WARNING: {vid_filename} skipped because it only contains {frame_count} frames"
+                        )
+                        continue
+                    if args.start_frame is None:
+                        out_start_frame_index = random.randrange(last_candidate_frame)
+                    else:
+                        out_start_frame_index = args.start_frame - 1
+                    vid_out_filename = vid_filename.replace("/", "+").replace("\\", "+")
+                    vid_out_path = os.path.join(args.out_dir, vid_out_filename)
+                    vid_out_path_root, _ = os.path.splitext(vid_out_path)
+                    vid_out_path = (
+                        vid_out_path_root + "_" + str(out_start_frame_index + 1) + ".avi"
+                    )
+                    pose_out_path = (
+                        vid_out_path_root + "_" + str(out_start_frame_index + 1) + pose_suffix
+                    )
+                    with h5py.File(pose_out_path, "w") as pose_out:
+                        # pose v2 stuff
+                        start = out_start_frame_index
+                        stop = start + args.out_frame_count
+                        pose_out["poseest/points"] = pose_in["poseest/points"][start:stop, ...]
+                        pose_out["poseest/confidence"] = pose_in["poseest/confidence"][
+                            start:stop, ...
+                        ]
+                        # pose v3 stuff
+                        if "instance_count" in pose_in["poseest"]:
+                            pose_out["poseest/instance_count"] = pose_in["poseest/instance_count"][
+                                start:stop, ...
+                            ]
+                        if "instance_embedding" in pose_in["poseest"]:
+                            pose_out["poseest/instance_embedding"] = pose_in[
+                                "poseest/instance_embedding"
+                            ][start:stop, ...]
+                        if "instance_track_id" in pose_in["poseest"]:
+                            pose_out["poseest/instance_track_id"] = pose_in[
+                                "poseest/instance_track_id"
+                            ][start:stop, ...]
+                        # pose v4 stuff
+                        if "id_mask" in pose_in["poseest"]:
+                            pose_out["poseest/id_mask"] = pose_in["poseest/id_mask"][
+                                start:stop, ...
+                            ]
+                        if "identity_embeds" in pose_in["poseest"]:
+                            pose_out["poseest/identity_embeds"] = pose_in[
+                                "poseest/identity_embeds"
+                            ][start:stop, ...]
+                        if "instance_embed_id" in pose_in["poseest"]:
+                            pose_out["poseest/instance_embed_id"] = pose_in[
+                                "poseest/instance_embed_id"
+                            ][start:stop, ...]
+                        if "instance_id_center" in pose_in["poseest"]:
+                            pose_out["poseest/instance_id_center"] = pose_in[
+                                "poseest/instance_id_center"
+                            ][:]
+                        # v5 specific stuff
+                        if "static_objects" in pose_in:
+                            static_group = pose_out.create_group("static_objects")
+                            for dataset in pose_in["static_objects"]:
+                                static_group.create_dataset(
+                                    dataset, data=pose_in["static_objects"][dataset]
+                                )
+                        # copy attributes
+                        for attr in pose_in["poseest"].attrs:
+                            pose_out["poseest"].attrs[attr] = pose_in["poseest"].attrs[attr]
+                    cap = None
+                    writer = None
+                    if not args.only_pose:
+                        try:
+                            cap = cv2.VideoCapture(vid_path)
+                            if not cap.isOpened():
+                                print(f"WARNING: failed to open {vid_filename}")
+                                continue
+                            cap.set(cv2.CAP_PROP_POS_FRAMES, out_start_frame_index)
+                            if not cap.isOpened():
+                                print(f"WARNING: failed to seek to start frame {vid_filename}")
+                                continue
+                            writer = cv2.VideoWriter(
+                                vid_out_path,
+                                cv2.VideoWriter_fourcc(*"MJPG"),
+                                30,
+                                (
+                                    int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+                                    int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
+                                ),
+                            )
+                            for _ in range(args.out_frame_count):
+                                if not cap.isOpened():
+                                    print(f"WARNING: {vid_filename} ended prematurely")
+                                    break
+                                ret, frame = cap.read()
+                                if ret:
+                                    writer.write(frame)
+                                else:
+                                    print(f"WARNING: {vid_filename} ended prematurely")
+                                    break
+                        finally:
+                            if cap is not None:
+                                cap.release()
+                            if writer is not None:
+                                writer.release()
+if __name__ == "__main__":
+    main()

jabs_core-0.1.0a1/src/jabs/core/utils/update_checker.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""Utilities for checking PyPI for JABS updates."""
+import json
+import logging
+import urllib.request
+from importlib import metadata
+from packaging.version import parse as parse_version
+# TODO: Consider moving this to jabs.core
+from jabs.version import version_str
+logger = logging.getLogger(__name__)
+def check_for_update() -> tuple[bool, str | None, str]:
+    """Check PyPI for newer version of jabs-behavior-classifier.
+    Returns:
+        tuple: (has_update: bool, latest_version: str | None, current_version: str)
+            - has_update: True if a newer version is available
+            - latest_version: Latest version string from PyPI, or None if check failed
+            - current_version: Current installed version string
+    """
+    try:
+        current_version = version_str()
+        with urllib.request.urlopen(
+            "https://pypi.org/pypi/jabs-behavior-classifier/json", timeout=5
+        ) as response:
+            data = json.loads(response.read())
+            latest_version = data["info"]["version"]
+        has_update = parse_version(latest_version) > parse_version(current_version)
+        return has_update, latest_version, current_version
+    except Exception as e:
+        logger.warning(f"Failed to check for updates: {e}")
+        return False, None, version_str()
+def is_pypi_install() -> bool:
+    """Check if jabs-behavior-classifier was installed from PyPI.
+    Returns:
+        bool: True if installed via pip from PyPI, False otherwise
+    """
+    try:
+        dist = metadata.distribution("jabs-behavior-classifier")
+        # Check if installer was pip
+        installer = dist.read_text("INSTALLER")
+        return installer is not None and installer.strip() in ("pip", "uv")
+    except Exception as e:
+        logger.debug(f"Could not determine installation method: {e}")
+        return False

jabs_core-0.1.0a1/src/jabs/core/utils/utilities.py ADDED Viewed

@@ -0,0 +1,64 @@
+import hashlib
+import os
+import sys
+from collections.abc import Generator
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Any
+@contextmanager
+def hide_stderr() -> Generator[int, Any, None]:
+    """Context manager to temporarily suppress output to standard error (stderr).
+    Redirects all output sent to stderr to os.devnull while the context is active,
+    restoring stderr to its original state upon exit.
+    Yields:
+        int: The file descriptor for stderr.
+    """
+    fd = sys.stderr.fileno()
+    # copy fd before it is overwritten
+    with os.fdopen(os.dup(fd), "wb") as copied:
+        sys.stderr.flush()
+        # open destination
+        with open(os.devnull, "wb") as fout:
+            os.dup2(fout.fileno(), fd)
+        try:
+            yield fd
+        finally:
+            # restore stderr to its previous value
+            sys.stderr.flush()
+            os.dup2(copied.fileno(), fd)
+def hash_file(file: Path):
+    """return hash"""
+    chunk_size = 8192
+    with file.open("rb") as f:
+        h = hashlib.blake2b(digest_size=20)
+        c = f.read(chunk_size)
+        while c:
+            h.update(c)
+            c = f.read(chunk_size)
+    return h.hexdigest()
+def get_bool_env_var(var_name, default_value=False) -> bool:
+    """Gets a boolean value from an environment variable.
+    Args:
+        var_name: The name of the environment variable.
+        default_value: The default value to return if the variable is
+            not set or invalid.
+    Returns:
+        A boolean value.
+    """
+    value = os.getenv(var_name)
+    if value is None:
+        return default_value
+    return value.lower() in ("true", "1", "yes", "on", "y", "t")