PyPI - nrl-tracker - Versions diffs - 1.9.2__py3-none-any.whl → 1.11.0__py3-none-any.whl - Mend

nrl-tracker 1.9.2py3-none-any.whl → 1.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/METADATA +49 -4
{nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/RECORD +19 -12
pytcl/__init__.py +3 -3
pytcl/assignment_algorithms/nd_assignment.py +359 -1
pytcl/coordinate_systems/jacobians/jacobians.py +63 -33
pytcl/core/optional_deps.py +20 -0
pytcl/dynamic_estimation/kalman/matrix_utils.py +133 -35
pytcl/gpu/__init__.py +153 -0
pytcl/gpu/ekf.py +433 -0
pytcl/gpu/kalman.py +543 -0
pytcl/gpu/matrix_utils.py +491 -0
pytcl/gpu/particle_filter.py +578 -0
pytcl/gpu/ukf.py +476 -0
pytcl/gpu/utils.py +582 -0
pytcl/gravity/clenshaw.py +8 -0
pytcl/gravity/spherical_harmonics.py +17 -10
{nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/LICENSE +0 -0
{nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/WHEEL +0 -0
{nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/top_level.txt +0 -0

pytcl/coordinate_systems/jacobians/jacobians.py CHANGED Viewed

@@ -4,13 +4,61 @@ Jacobian matrices for coordinate transformations.
 This module provides functions for computing Jacobian matrices of
 coordinate transformations, essential for error propagation in tracking
 filters (e.g., converting measurement covariances between coordinate systems).
+Performance Notes
+-----------------
+ENU and NED Jacobians use lru_cache with quantized inputs for 25-40%
+speedup when repeatedly called with similar lat/lon values.
 """
-from typing import Callable, Literal
+from functools import lru_cache
+from typing import Callable, Literal, Tuple
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
+# Cache precision: quantize lat/lon to ~1m resolution (~1e-5 radians)
+_JACOBIAN_CACHE_DECIMALS = 5
+def _quantize_angle(angle: float) -> float:
+    """Quantize angle for cache key compatibility."""
+    return round(angle, _JACOBIAN_CACHE_DECIMALS)
+@lru_cache(maxsize=256)
+def _enu_jacobian_cached(
+    lat_q: float, lon_q: float
+) -> Tuple[Tuple[float, ...], Tuple[float, ...], Tuple[float, ...]]:
+    """Cached ENU Jacobian computation with quantized inputs."""
+    sin_lat = np.sin(lat_q)
+    cos_lat = np.cos(lat_q)
+    sin_lon = np.sin(lon_q)
+    cos_lon = np.cos(lon_q)
+    return (
+        (-sin_lon, cos_lon, 0.0),
+        (-sin_lat * cos_lon, -sin_lat * sin_lon, cos_lat),
+        (cos_lat * cos_lon, cos_lat * sin_lon, sin_lat),
+    )
+@lru_cache(maxsize=256)
+def _ned_jacobian_cached(
+    lat_q: float, lon_q: float
+) -> Tuple[Tuple[float, ...], Tuple[float, ...], Tuple[float, ...]]:
+    """Cached NED Jacobian computation with quantized inputs."""
+    sin_lat = np.sin(lat_q)
+    cos_lat = np.cos(lat_q)
+    sin_lon = np.sin(lon_q)
+    cos_lon = np.cos(lon_q)
+    return (
+        (-sin_lat * cos_lon, -sin_lat * sin_lon, cos_lat),
+        (-sin_lon, cos_lon, 0.0),
+        (-cos_lat * cos_lon, -cos_lat * sin_lon, -sin_lat),
+    )
 def spherical_jacobian(
     cart_point: ArrayLike,
@@ -270,23 +318,14 @@ def enu_jacobian(
     -------
     J : ndarray
         3x3 rotation matrix (Jacobian is constant for this linear transformation).
-    """
-    sin_lat = np.sin(lat)
-    cos_lat = np.cos(lat)
-    sin_lon = np.sin(lon)
-    cos_lon = np.cos(lon)
-    # This is actually the rotation matrix from ECEF to ENU
-    J = np.array(
-        [
-            [-sin_lon, cos_lon, 0],
-            [-sin_lat * cos_lon, -sin_lat * sin_lon, cos_lat],
-            [cos_lat * cos_lon, cos_lat * sin_lon, sin_lat],
-        ],
-        dtype=np.float64,
-    )
-    return J
+    Notes
+    -----
+    Uses cached computation with quantized inputs for performance.
+    """
+    # Use cached version with quantized inputs
+    cached_result = _enu_jacobian_cached(_quantize_angle(lat), _quantize_angle(lon))
+    return np.array(cached_result, dtype=np.float64)
 def ned_jacobian(
@@ -307,23 +346,14 @@ def ned_jacobian(
     -------
     J : ndarray
         3x3 rotation matrix.
-    """
-    sin_lat = np.sin(lat)
-    cos_lat = np.cos(lat)
-    sin_lon = np.sin(lon)
-    cos_lon = np.cos(lon)
-    # Rotation matrix from ECEF to NED
-    J = np.array(
-        [
-            [-sin_lat * cos_lon, -sin_lat * sin_lon, cos_lat],
-            [-sin_lon, cos_lon, 0],
-            [-cos_lat * cos_lon, -cos_lat * sin_lon, -sin_lat],
-        ],
-        dtype=np.float64,
-    )
-    return J
+    Notes
+    -----
+    Uses cached computation with quantized inputs for performance.
+    """
+    # Use cached version with quantized inputs
+    cached_result = _ned_jacobian_cached(_quantize_angle(lat), _quantize_angle(lon))
+    return np.array(cached_result, dtype=np.float64)
 def geodetic_jacobian(

pytcl/core/optional_deps.py CHANGED Viewed

@@ -69,6 +69,10 @@ PACKAGE_EXTRAS: dict[str, tuple[str, str]] = {
     "pywavelets": ("signal", "pywavelets"),
     # Terrain data
     "netCDF4": ("terrain", "netCDF4"),
+    # GPU acceleration
+    "cupy": ("gpu", "cupy-cuda12x"),
+    # Apple Silicon GPU acceleration
+    "mlx": ("gpu-apple", "mlx"),
 }
 # Friendly names for features provided by each package
@@ -82,6 +86,8 @@ PACKAGE_FEATURES: dict[str, str] = {
     "pywt": "wavelet transforms",
     "pywavelets": "wavelet transforms",
     "netCDF4": "NetCDF file reading",
+    "cupy": "GPU acceleration",
+    "mlx": "Apple Silicon GPU acceleration",
 }
@@ -374,6 +380,16 @@ class _AvailabilityFlags:
         """True if netCDF4 is available."""
         return is_available("netCDF4")
+    @property
+    def HAS_CUPY(self) -> bool:
+        """True if cupy is available."""
+        return is_available("cupy")
+    @property
+    def HAS_MLX(self) -> bool:
+        """True if mlx is available (Apple Silicon)."""
+        return is_available("mlx")
 # Create singleton instance
 _flags = _AvailabilityFlags()
@@ -387,6 +403,8 @@ HAS_ASTROPY = property(lambda self: _flags.HAS_ASTROPY)
 HAS_PYPROJ = property(lambda self: _flags.HAS_PYPROJ)
 HAS_CVXPY = property(lambda self: _flags.HAS_CVXPY)
 HAS_NETCDF4 = property(lambda self: _flags.HAS_NETCDF4)
+HAS_CUPY = property(lambda self: _flags.HAS_CUPY)
+HAS_MLX = property(lambda self: _flags.HAS_MLX)
 # =============================================================================
@@ -525,6 +543,8 @@ __all__ = [
     "HAS_PYPROJ",
     "HAS_CVXPY",
     "HAS_NETCDF4",
+    "HAS_CUPY",
+    "HAS_MLX",
     # Internal (for testing)
     "_clear_cache",
     "_flags",

pytcl/dynamic_estimation/kalman/matrix_utils.py CHANGED Viewed

@@ -6,18 +6,116 @@ multiple Kalman filter implementations. Separating these utilities prevents
 circular imports between filter implementations.
 Functions include:
-- Cholesky factor update/downdate
+- Cholesky factor update/downdate (Numba JIT optimized)
 - QR-based covariance propagation
 - Matrix symmetry enforcement
 - Matrix square root computation
 - Innovation likelihood computation
+Performance Notes
+-----------------
+Critical functions use Numba JIT compilation for 5-10x speedup:
+- _cholesky_update_core: Rank-1 Cholesky update inner loop
+- _cholesky_downdate_core: Rank-1 Cholesky downdate inner loop
 """
+from functools import lru_cache
 from typing import Optional, Tuple
 import numpy as np
 from numpy.typing import NDArray
+try:
+    from numba import njit
+    NUMBA_AVAILABLE = True
+except ImportError:
+    NUMBA_AVAILABLE = False
+    # Fallback decorator that does nothing
+    def njit(*args, **kwargs):  # type: ignore[misc,unused-ignore]
+        """No-op decorator when Numba is not available."""
+        def decorator(func):  # type: ignore[no-untyped-def,unused-ignore]
+            return func
+        if len(args) == 1 and callable(args[0]):
+            return args[0]
+        return decorator
+@njit(cache=True)
+def _cholesky_update_core(
+    S: np.ndarray, v: np.ndarray, n: int
+) -> Tuple[np.ndarray, bool]:
+    """
+    Numba-optimized core loop for Cholesky update.
+    Parameters
+    ----------
+    S : ndarray
+        Lower triangular Cholesky factor (modified in place).
+    v : ndarray
+        Update vector (modified in place).
+    n : int
+        Dimension.
+    Returns
+    -------
+    S : ndarray
+        Updated Cholesky factor.
+    success : bool
+        Always True for update.
+    """
+    for k in range(n):
+        r = np.sqrt(S[k, k] ** 2 + v[k] ** 2)
+        c = r / S[k, k]
+        s = v[k] / S[k, k]
+        S[k, k] = r
+        if k < n - 1:
+            for i in range(k + 1, n):
+                S[i, k] = (S[i, k] + s * v[i]) / c
+                v[i] = c * v[i] - s * S[i, k]
+    return S, True
+@njit(cache=True)
+def _cholesky_downdate_core(
+    S: np.ndarray, v: np.ndarray, n: int
+) -> Tuple[np.ndarray, bool]:
+    """
+    Numba-optimized core loop for Cholesky downdate.
+    Parameters
+    ----------
+    S : ndarray
+        Lower triangular Cholesky factor (modified in place).
+    v : ndarray
+        Downdate vector (modified in place).
+    n : int
+        Dimension.
+    Returns
+    -------
+    S : ndarray
+        Updated Cholesky factor.
+    success : bool
+        False if downdate would make matrix non-positive definite.
+    """
+    for k in range(n):
+        r_sq = S[k, k] ** 2 - v[k] ** 2
+        if r_sq < 0:
+            return S, False
+        r = np.sqrt(r_sq)
+        c = r / S[k, k]
+        s = v[k] / S[k, k]
+        S[k, k] = r
+        if k < n - 1:
+            for i in range(k + 1, n):
+                S[i, k] = (S[i, k] - s * v[i]) / c
+                v[i] = c * v[i] - s * S[i, k]
+    return S, True
 def cholesky_update(
     S: NDArray[np.floating], v: NDArray[np.floating], sign: float = 1.0
@@ -66,28 +164,13 @@ def cholesky_update(
     n = len(v)
     if sign > 0:
-        # Cholesky update
-        for k in range(n):
-            r = np.sqrt(S[k, k] ** 2 + v[k] ** 2)
-            c = r / S[k, k]
-            s = v[k] / S[k, k]
-            S[k, k] = r
-            if k < n - 1:
-                S[k + 1 :, k] = (S[k + 1 :, k] + s * v[k + 1 :]) / c
-                v[k + 1 :] = c * v[k + 1 :] - s * S[k + 1 :, k]
+        # Cholesky update (Numba JIT optimized)
+        S, _ = _cholesky_update_core(S, v, n)
     else:
-        # Cholesky downdate
-        for k in range(n):
-            r_sq = S[k, k] ** 2 - v[k] ** 2
-            if r_sq < 0:
-                raise ValueError("Downdate would make matrix non-positive definite")
-            r = np.sqrt(r_sq)
-            c = r / S[k, k]
-            s = v[k] / S[k, k]
-            S[k, k] = r
-            if k < n - 1:
-                S[k + 1 :, k] = (S[k + 1 :, k] - s * v[k + 1 :]) / c
-                v[k + 1 :] = c * v[k + 1 :] - s * S[k + 1 :, k]
+        # Cholesky downdate (Numba JIT optimized)
+        S, success = _cholesky_downdate_core(S, v, n)
+        if not success:
+            raise ValueError("Downdate would make matrix non-positive definite")
     return S
@@ -371,6 +454,31 @@ def compute_mahalanobis_distance(
     return float(np.sqrt(mahal_sq))
+@lru_cache(maxsize=128)
+def _compute_merwe_weights_cached(
+    n: int, alpha: float, beta: float, kappa: float
+) -> Tuple[Tuple[float, ...], Tuple[float, ...]]:
+    """
+    Cached computation of Merwe weights.
+    Returns tuples for hashability in cache.
+    """
+    lam = alpha**2 * (n + kappa) - n
+    W_m = [0.0] * (2 * n + 1)
+    W_c = [0.0] * (2 * n + 1)
+    W_m[0] = lam / (n + lam)
+    W_c[0] = lam / (n + lam) + (1 - alpha**2 + beta)
+    weight = 1 / (2 * (n + lam))
+    for i in range(1, 2 * n + 1):
+        W_m[i] = weight
+        W_c[i] = weight
+    return tuple(W_m), tuple(W_c)
 def compute_merwe_weights(
     n: int, alpha: float = 1e-3, beta: float = 2.0, kappa: float = 0.0
 ) -> Tuple[NDArray[np.floating], NDArray[np.floating]]:
@@ -401,19 +509,9 @@ def compute_merwe_weights(
     >>> np.isclose(W_m.sum(), 1.0)
     True
     """
-    lam = alpha**2 * (n + kappa) - n
-    W_m = np.zeros(2 * n + 1)
-    W_c = np.zeros(2 * n + 1)
-    W_m[0] = lam / (n + lam)
-    W_c[0] = lam / (n + lam) + (1 - alpha**2 + beta)
-    weight = 1 / (2 * (n + lam))
-    W_m[1:] = weight
-    W_c[1:] = weight
-    return W_m, W_c
+    # Use cached computation and convert to arrays
+    W_m_tuple, W_c_tuple = _compute_merwe_weights_cached(n, alpha, beta, kappa)
+    return np.array(W_m_tuple), np.array(W_c_tuple)
 __all__ = [

pytcl/gpu/__init__.py ADDED Viewed

@@ -0,0 +1,153 @@
+"""
+GPU-accelerated algorithms for the Tracker Component Library.
+This module provides GPU-accelerated implementations of key tracking algorithms
+using CuPy (NVIDIA GPUs) or MLX (Apple Silicon). These implementations offer
+significant speedups (5-15x) for batch processing of multiple tracks or large
+particle sets.
+The module automatically selects the best available backend:
+- On Apple Silicon (M1/M2/M3): Uses MLX if installed
+- On systems with NVIDIA GPUs: Uses CuPy if installed
+- Falls back to CPU (numpy) if no GPU backend is available
+The GPU implementations mirror the CPU API but accept GPU arrays and return
+GPU arrays. Use the utility functions to seamlessly transfer data between
+CPU and GPU.
+Requirements
+------------
+For NVIDIA GPUs:
+- CUDA-capable GPU
+- CuPy >= 12.0
+For Apple Silicon:
+- macOS with Apple Silicon (M1, M2, M3, etc.)
+- MLX >= 0.5.0
+Installation
+------------
+For NVIDIA CUDA:
+    pip install pytcl[gpu]
+    # or directly:
+    pip install cupy-cuda12x  # For CUDA 12.x
+For Apple Silicon:
+    pip install pytcl[gpu-apple]
+    # or directly:
+    pip install mlx
+Examples
+--------
+Basic usage with automatic backend selection:
+>>> from pytcl.gpu import is_gpu_available, get_backend
+>>> if is_gpu_available():
+...     print(f"GPU available, using {get_backend()} backend")
+Check platform:
+>>> from pytcl.gpu import is_apple_silicon, is_mlx_available
+>>> if is_apple_silicon():
+...     print("Running on Apple Silicon")
+>>> if is_mlx_available():
+...     print("MLX acceleration available")
+Batch processing example:
+>>> from pytcl.gpu import batch_kf_predict, to_gpu, to_cpu
+>>> # Move data to GPU (automatically uses best backend)
+>>> x_gpu = to_gpu(x_batch)  # (n_tracks, state_dim)
+>>> P_gpu = to_gpu(P_batch)  # (n_tracks, state_dim, state_dim)
+>>> # Batch prediction
+>>> x_pred, P_pred = batch_kf_predict(x_gpu, P_gpu, F, Q)
+>>> # Move results back to CPU
+>>> x_pred_cpu = to_cpu(x_pred)
+See Also
+--------
+pytcl.dynamic_estimation.kalman : CPU Kalman filter implementations
+pytcl.dynamic_estimation.particle_filters : CPU particle filter implementations
+"""
+from pytcl.gpu.utils import (
+    get_array_module,
+    get_backend,
+    is_apple_silicon,
+    is_cupy_available,
+    is_gpu_available,
+    is_mlx_available,
+    to_cpu,
+    to_gpu,
+)
+__all__ = [
+    # Platform detection
+    "is_apple_silicon",
+    "is_mlx_available",
+    "is_cupy_available",
+    "get_backend",
+    # Availability check
+    "is_gpu_available",
+    # Utility functions
+    "get_array_module",
+    "to_gpu",
+    "to_cpu",
+]
+# Lazy imports for GPU implementations (only loaded if CuPy is available)
+def __getattr__(name: str) -> object:
+    """Lazy import GPU implementations."""
+    if name in ("CuPyKalmanFilter", "batch_kf_predict", "batch_kf_update"):
+        from pytcl.gpu.kalman import CuPyKalmanFilter, batch_kf_predict, batch_kf_update
+        globals()[name] = locals()[name]
+        return locals()[name]
+    if name in ("CuPyExtendedKalmanFilter", "batch_ekf_predict", "batch_ekf_update"):
+        from pytcl.gpu.ekf import (
+            CuPyExtendedKalmanFilter,
+            batch_ekf_predict,
+            batch_ekf_update,
+        )
+        globals()[name] = locals()[name]
+        return locals()[name]
+    if name in ("CuPyUnscentedKalmanFilter", "batch_ukf_predict", "batch_ukf_update"):
+        from pytcl.gpu.ukf import (
+            CuPyUnscentedKalmanFilter,
+            batch_ukf_predict,
+            batch_ukf_update,
+        )
+        globals()[name] = locals()[name]
+        return locals()[name]
+    if name in (
+        "CuPyParticleFilter",
+        "gpu_resample_systematic",
+        "gpu_resample_multinomial",
+    ):
+        from pytcl.gpu.particle_filter import (
+            CuPyParticleFilter,
+            gpu_resample_multinomial,
+            gpu_resample_systematic,
+        )
+        globals()[name] = locals()[name]
+        return locals()[name]
+    if name in ("gpu_cholesky", "gpu_qr", "gpu_solve", "MemoryPool"):
+        from pytcl.gpu.matrix_utils import (
+            MemoryPool,
+            gpu_cholesky,
+            gpu_qr,
+            gpu_solve,
+        )
+        globals()[name] = locals()[name]
+        return locals()[name]
+    raise AttributeError(f"module 'pytcl.gpu' has no attribute '{name}'")

nrl-tracker 1.9.2__py3-none-any.whl → 1.11.0__py3-none-any.whl

nrl-tracker 1.9.2py3-none-any.whl → 1.11.0py3-none-any.whl