nrl-tracker 1.9.2__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/METADATA +49 -4
- {nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/RECORD +19 -12
- pytcl/__init__.py +3 -3
- pytcl/assignment_algorithms/nd_assignment.py +359 -1
- pytcl/coordinate_systems/jacobians/jacobians.py +63 -33
- pytcl/core/optional_deps.py +20 -0
- pytcl/dynamic_estimation/kalman/matrix_utils.py +133 -35
- pytcl/gpu/__init__.py +153 -0
- pytcl/gpu/ekf.py +433 -0
- pytcl/gpu/kalman.py +543 -0
- pytcl/gpu/matrix_utils.py +491 -0
- pytcl/gpu/particle_filter.py +578 -0
- pytcl/gpu/ukf.py +476 -0
- pytcl/gpu/utils.py +582 -0
- pytcl/gravity/clenshaw.py +8 -0
- pytcl/gravity/spherical_harmonics.py +17 -10
- {nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/LICENSE +0 -0
- {nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/WHEEL +0 -0
- {nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/top_level.txt +0 -0
|
@@ -4,13 +4,61 @@ Jacobian matrices for coordinate transformations.
|
|
|
4
4
|
This module provides functions for computing Jacobian matrices of
|
|
5
5
|
coordinate transformations, essential for error propagation in tracking
|
|
6
6
|
filters (e.g., converting measurement covariances between coordinate systems).
|
|
7
|
+
|
|
8
|
+
Performance Notes
|
|
9
|
+
-----------------
|
|
10
|
+
ENU and NED Jacobians use lru_cache with quantized inputs for 25-40%
|
|
11
|
+
speedup when repeatedly called with similar lat/lon values.
|
|
7
12
|
"""
|
|
8
13
|
|
|
9
|
-
from
|
|
14
|
+
from functools import lru_cache
|
|
15
|
+
from typing import Callable, Literal, Tuple
|
|
10
16
|
|
|
11
17
|
import numpy as np
|
|
12
18
|
from numpy.typing import ArrayLike, NDArray
|
|
13
19
|
|
|
20
|
+
# Cache precision: quantize lat/lon to ~1m resolution (~1e-5 radians)
|
|
21
|
+
_JACOBIAN_CACHE_DECIMALS = 5
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _quantize_angle(angle: float) -> float:
|
|
25
|
+
"""Quantize angle for cache key compatibility."""
|
|
26
|
+
return round(angle, _JACOBIAN_CACHE_DECIMALS)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@lru_cache(maxsize=256)
|
|
30
|
+
def _enu_jacobian_cached(
|
|
31
|
+
lat_q: float, lon_q: float
|
|
32
|
+
) -> Tuple[Tuple[float, ...], Tuple[float, ...], Tuple[float, ...]]:
|
|
33
|
+
"""Cached ENU Jacobian computation with quantized inputs."""
|
|
34
|
+
sin_lat = np.sin(lat_q)
|
|
35
|
+
cos_lat = np.cos(lat_q)
|
|
36
|
+
sin_lon = np.sin(lon_q)
|
|
37
|
+
cos_lon = np.cos(lon_q)
|
|
38
|
+
|
|
39
|
+
return (
|
|
40
|
+
(-sin_lon, cos_lon, 0.0),
|
|
41
|
+
(-sin_lat * cos_lon, -sin_lat * sin_lon, cos_lat),
|
|
42
|
+
(cos_lat * cos_lon, cos_lat * sin_lon, sin_lat),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@lru_cache(maxsize=256)
|
|
47
|
+
def _ned_jacobian_cached(
|
|
48
|
+
lat_q: float, lon_q: float
|
|
49
|
+
) -> Tuple[Tuple[float, ...], Tuple[float, ...], Tuple[float, ...]]:
|
|
50
|
+
"""Cached NED Jacobian computation with quantized inputs."""
|
|
51
|
+
sin_lat = np.sin(lat_q)
|
|
52
|
+
cos_lat = np.cos(lat_q)
|
|
53
|
+
sin_lon = np.sin(lon_q)
|
|
54
|
+
cos_lon = np.cos(lon_q)
|
|
55
|
+
|
|
56
|
+
return (
|
|
57
|
+
(-sin_lat * cos_lon, -sin_lat * sin_lon, cos_lat),
|
|
58
|
+
(-sin_lon, cos_lon, 0.0),
|
|
59
|
+
(-cos_lat * cos_lon, -cos_lat * sin_lon, -sin_lat),
|
|
60
|
+
)
|
|
61
|
+
|
|
14
62
|
|
|
15
63
|
def spherical_jacobian(
|
|
16
64
|
cart_point: ArrayLike,
|
|
@@ -270,23 +318,14 @@ def enu_jacobian(
|
|
|
270
318
|
-------
|
|
271
319
|
J : ndarray
|
|
272
320
|
3x3 rotation matrix (Jacobian is constant for this linear transformation).
|
|
273
|
-
"""
|
|
274
|
-
sin_lat = np.sin(lat)
|
|
275
|
-
cos_lat = np.cos(lat)
|
|
276
|
-
sin_lon = np.sin(lon)
|
|
277
|
-
cos_lon = np.cos(lon)
|
|
278
321
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
dtype=np.float64,
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
return J
|
|
322
|
+
Notes
|
|
323
|
+
-----
|
|
324
|
+
Uses cached computation with quantized inputs for performance.
|
|
325
|
+
"""
|
|
326
|
+
# Use cached version with quantized inputs
|
|
327
|
+
cached_result = _enu_jacobian_cached(_quantize_angle(lat), _quantize_angle(lon))
|
|
328
|
+
return np.array(cached_result, dtype=np.float64)
|
|
290
329
|
|
|
291
330
|
|
|
292
331
|
def ned_jacobian(
|
|
@@ -307,23 +346,14 @@ def ned_jacobian(
|
|
|
307
346
|
-------
|
|
308
347
|
J : ndarray
|
|
309
348
|
3x3 rotation matrix.
|
|
310
|
-
"""
|
|
311
|
-
sin_lat = np.sin(lat)
|
|
312
|
-
cos_lat = np.cos(lat)
|
|
313
|
-
sin_lon = np.sin(lon)
|
|
314
|
-
cos_lon = np.cos(lon)
|
|
315
349
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
dtype=np.float64,
|
|
324
|
-
)
|
|
325
|
-
|
|
326
|
-
return J
|
|
350
|
+
Notes
|
|
351
|
+
-----
|
|
352
|
+
Uses cached computation with quantized inputs for performance.
|
|
353
|
+
"""
|
|
354
|
+
# Use cached version with quantized inputs
|
|
355
|
+
cached_result = _ned_jacobian_cached(_quantize_angle(lat), _quantize_angle(lon))
|
|
356
|
+
return np.array(cached_result, dtype=np.float64)
|
|
327
357
|
|
|
328
358
|
|
|
329
359
|
def geodetic_jacobian(
|
pytcl/core/optional_deps.py
CHANGED
|
@@ -69,6 +69,10 @@ PACKAGE_EXTRAS: dict[str, tuple[str, str]] = {
|
|
|
69
69
|
"pywavelets": ("signal", "pywavelets"),
|
|
70
70
|
# Terrain data
|
|
71
71
|
"netCDF4": ("terrain", "netCDF4"),
|
|
72
|
+
# GPU acceleration
|
|
73
|
+
"cupy": ("gpu", "cupy-cuda12x"),
|
|
74
|
+
# Apple Silicon GPU acceleration
|
|
75
|
+
"mlx": ("gpu-apple", "mlx"),
|
|
72
76
|
}
|
|
73
77
|
|
|
74
78
|
# Friendly names for features provided by each package
|
|
@@ -82,6 +86,8 @@ PACKAGE_FEATURES: dict[str, str] = {
|
|
|
82
86
|
"pywt": "wavelet transforms",
|
|
83
87
|
"pywavelets": "wavelet transforms",
|
|
84
88
|
"netCDF4": "NetCDF file reading",
|
|
89
|
+
"cupy": "GPU acceleration",
|
|
90
|
+
"mlx": "Apple Silicon GPU acceleration",
|
|
85
91
|
}
|
|
86
92
|
|
|
87
93
|
|
|
@@ -374,6 +380,16 @@ class _AvailabilityFlags:
|
|
|
374
380
|
"""True if netCDF4 is available."""
|
|
375
381
|
return is_available("netCDF4")
|
|
376
382
|
|
|
383
|
+
@property
|
|
384
|
+
def HAS_CUPY(self) -> bool:
|
|
385
|
+
"""True if cupy is available."""
|
|
386
|
+
return is_available("cupy")
|
|
387
|
+
|
|
388
|
+
@property
|
|
389
|
+
def HAS_MLX(self) -> bool:
|
|
390
|
+
"""True if mlx is available (Apple Silicon)."""
|
|
391
|
+
return is_available("mlx")
|
|
392
|
+
|
|
377
393
|
|
|
378
394
|
# Create singleton instance
|
|
379
395
|
_flags = _AvailabilityFlags()
|
|
@@ -387,6 +403,8 @@ HAS_ASTROPY = property(lambda self: _flags.HAS_ASTROPY)
|
|
|
387
403
|
HAS_PYPROJ = property(lambda self: _flags.HAS_PYPROJ)
|
|
388
404
|
HAS_CVXPY = property(lambda self: _flags.HAS_CVXPY)
|
|
389
405
|
HAS_NETCDF4 = property(lambda self: _flags.HAS_NETCDF4)
|
|
406
|
+
HAS_CUPY = property(lambda self: _flags.HAS_CUPY)
|
|
407
|
+
HAS_MLX = property(lambda self: _flags.HAS_MLX)
|
|
390
408
|
|
|
391
409
|
|
|
392
410
|
# =============================================================================
|
|
@@ -525,6 +543,8 @@ __all__ = [
|
|
|
525
543
|
"HAS_PYPROJ",
|
|
526
544
|
"HAS_CVXPY",
|
|
527
545
|
"HAS_NETCDF4",
|
|
546
|
+
"HAS_CUPY",
|
|
547
|
+
"HAS_MLX",
|
|
528
548
|
# Internal (for testing)
|
|
529
549
|
"_clear_cache",
|
|
530
550
|
"_flags",
|
|
@@ -6,18 +6,116 @@ multiple Kalman filter implementations. Separating these utilities prevents
|
|
|
6
6
|
circular imports between filter implementations.
|
|
7
7
|
|
|
8
8
|
Functions include:
|
|
9
|
-
- Cholesky factor update/downdate
|
|
9
|
+
- Cholesky factor update/downdate (Numba JIT optimized)
|
|
10
10
|
- QR-based covariance propagation
|
|
11
11
|
- Matrix symmetry enforcement
|
|
12
12
|
- Matrix square root computation
|
|
13
13
|
- Innovation likelihood computation
|
|
14
|
+
|
|
15
|
+
Performance Notes
|
|
16
|
+
-----------------
|
|
17
|
+
Critical functions use Numba JIT compilation for 5-10x speedup:
|
|
18
|
+
- _cholesky_update_core: Rank-1 Cholesky update inner loop
|
|
19
|
+
- _cholesky_downdate_core: Rank-1 Cholesky downdate inner loop
|
|
14
20
|
"""
|
|
15
21
|
|
|
22
|
+
from functools import lru_cache
|
|
16
23
|
from typing import Optional, Tuple
|
|
17
24
|
|
|
18
25
|
import numpy as np
|
|
19
26
|
from numpy.typing import NDArray
|
|
20
27
|
|
|
28
|
+
try:
|
|
29
|
+
from numba import njit
|
|
30
|
+
|
|
31
|
+
NUMBA_AVAILABLE = True
|
|
32
|
+
except ImportError:
|
|
33
|
+
NUMBA_AVAILABLE = False
|
|
34
|
+
|
|
35
|
+
# Fallback decorator that does nothing
|
|
36
|
+
def njit(*args, **kwargs): # type: ignore[misc,unused-ignore]
|
|
37
|
+
"""No-op decorator when Numba is not available."""
|
|
38
|
+
|
|
39
|
+
def decorator(func): # type: ignore[no-untyped-def,unused-ignore]
|
|
40
|
+
return func
|
|
41
|
+
|
|
42
|
+
if len(args) == 1 and callable(args[0]):
|
|
43
|
+
return args[0]
|
|
44
|
+
return decorator
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@njit(cache=True)
|
|
48
|
+
def _cholesky_update_core(
|
|
49
|
+
S: np.ndarray, v: np.ndarray, n: int
|
|
50
|
+
) -> Tuple[np.ndarray, bool]:
|
|
51
|
+
"""
|
|
52
|
+
Numba-optimized core loop for Cholesky update.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
S : ndarray
|
|
57
|
+
Lower triangular Cholesky factor (modified in place).
|
|
58
|
+
v : ndarray
|
|
59
|
+
Update vector (modified in place).
|
|
60
|
+
n : int
|
|
61
|
+
Dimension.
|
|
62
|
+
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
S : ndarray
|
|
66
|
+
Updated Cholesky factor.
|
|
67
|
+
success : bool
|
|
68
|
+
Always True for update.
|
|
69
|
+
"""
|
|
70
|
+
for k in range(n):
|
|
71
|
+
r = np.sqrt(S[k, k] ** 2 + v[k] ** 2)
|
|
72
|
+
c = r / S[k, k]
|
|
73
|
+
s = v[k] / S[k, k]
|
|
74
|
+
S[k, k] = r
|
|
75
|
+
if k < n - 1:
|
|
76
|
+
for i in range(k + 1, n):
|
|
77
|
+
S[i, k] = (S[i, k] + s * v[i]) / c
|
|
78
|
+
v[i] = c * v[i] - s * S[i, k]
|
|
79
|
+
return S, True
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@njit(cache=True)
|
|
83
|
+
def _cholesky_downdate_core(
|
|
84
|
+
S: np.ndarray, v: np.ndarray, n: int
|
|
85
|
+
) -> Tuple[np.ndarray, bool]:
|
|
86
|
+
"""
|
|
87
|
+
Numba-optimized core loop for Cholesky downdate.
|
|
88
|
+
|
|
89
|
+
Parameters
|
|
90
|
+
----------
|
|
91
|
+
S : ndarray
|
|
92
|
+
Lower triangular Cholesky factor (modified in place).
|
|
93
|
+
v : ndarray
|
|
94
|
+
Downdate vector (modified in place).
|
|
95
|
+
n : int
|
|
96
|
+
Dimension.
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
S : ndarray
|
|
101
|
+
Updated Cholesky factor.
|
|
102
|
+
success : bool
|
|
103
|
+
False if downdate would make matrix non-positive definite.
|
|
104
|
+
"""
|
|
105
|
+
for k in range(n):
|
|
106
|
+
r_sq = S[k, k] ** 2 - v[k] ** 2
|
|
107
|
+
if r_sq < 0:
|
|
108
|
+
return S, False
|
|
109
|
+
r = np.sqrt(r_sq)
|
|
110
|
+
c = r / S[k, k]
|
|
111
|
+
s = v[k] / S[k, k]
|
|
112
|
+
S[k, k] = r
|
|
113
|
+
if k < n - 1:
|
|
114
|
+
for i in range(k + 1, n):
|
|
115
|
+
S[i, k] = (S[i, k] - s * v[i]) / c
|
|
116
|
+
v[i] = c * v[i] - s * S[i, k]
|
|
117
|
+
return S, True
|
|
118
|
+
|
|
21
119
|
|
|
22
120
|
def cholesky_update(
|
|
23
121
|
S: NDArray[np.floating], v: NDArray[np.floating], sign: float = 1.0
|
|
@@ -66,28 +164,13 @@ def cholesky_update(
|
|
|
66
164
|
n = len(v)
|
|
67
165
|
|
|
68
166
|
if sign > 0:
|
|
69
|
-
# Cholesky update
|
|
70
|
-
|
|
71
|
-
r = np.sqrt(S[k, k] ** 2 + v[k] ** 2)
|
|
72
|
-
c = r / S[k, k]
|
|
73
|
-
s = v[k] / S[k, k]
|
|
74
|
-
S[k, k] = r
|
|
75
|
-
if k < n - 1:
|
|
76
|
-
S[k + 1 :, k] = (S[k + 1 :, k] + s * v[k + 1 :]) / c
|
|
77
|
-
v[k + 1 :] = c * v[k + 1 :] - s * S[k + 1 :, k]
|
|
167
|
+
# Cholesky update (Numba JIT optimized)
|
|
168
|
+
S, _ = _cholesky_update_core(S, v, n)
|
|
78
169
|
else:
|
|
79
|
-
# Cholesky downdate
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
raise ValueError("Downdate would make matrix non-positive definite")
|
|
84
|
-
r = np.sqrt(r_sq)
|
|
85
|
-
c = r / S[k, k]
|
|
86
|
-
s = v[k] / S[k, k]
|
|
87
|
-
S[k, k] = r
|
|
88
|
-
if k < n - 1:
|
|
89
|
-
S[k + 1 :, k] = (S[k + 1 :, k] - s * v[k + 1 :]) / c
|
|
90
|
-
v[k + 1 :] = c * v[k + 1 :] - s * S[k + 1 :, k]
|
|
170
|
+
# Cholesky downdate (Numba JIT optimized)
|
|
171
|
+
S, success = _cholesky_downdate_core(S, v, n)
|
|
172
|
+
if not success:
|
|
173
|
+
raise ValueError("Downdate would make matrix non-positive definite")
|
|
91
174
|
|
|
92
175
|
return S
|
|
93
176
|
|
|
@@ -371,6 +454,31 @@ def compute_mahalanobis_distance(
|
|
|
371
454
|
return float(np.sqrt(mahal_sq))
|
|
372
455
|
|
|
373
456
|
|
|
457
|
+
@lru_cache(maxsize=128)
|
|
458
|
+
def _compute_merwe_weights_cached(
|
|
459
|
+
n: int, alpha: float, beta: float, kappa: float
|
|
460
|
+
) -> Tuple[Tuple[float, ...], Tuple[float, ...]]:
|
|
461
|
+
"""
|
|
462
|
+
Cached computation of Merwe weights.
|
|
463
|
+
|
|
464
|
+
Returns tuples for hashability in cache.
|
|
465
|
+
"""
|
|
466
|
+
lam = alpha**2 * (n + kappa) - n
|
|
467
|
+
|
|
468
|
+
W_m = [0.0] * (2 * n + 1)
|
|
469
|
+
W_c = [0.0] * (2 * n + 1)
|
|
470
|
+
|
|
471
|
+
W_m[0] = lam / (n + lam)
|
|
472
|
+
W_c[0] = lam / (n + lam) + (1 - alpha**2 + beta)
|
|
473
|
+
|
|
474
|
+
weight = 1 / (2 * (n + lam))
|
|
475
|
+
for i in range(1, 2 * n + 1):
|
|
476
|
+
W_m[i] = weight
|
|
477
|
+
W_c[i] = weight
|
|
478
|
+
|
|
479
|
+
return tuple(W_m), tuple(W_c)
|
|
480
|
+
|
|
481
|
+
|
|
374
482
|
def compute_merwe_weights(
|
|
375
483
|
n: int, alpha: float = 1e-3, beta: float = 2.0, kappa: float = 0.0
|
|
376
484
|
) -> Tuple[NDArray[np.floating], NDArray[np.floating]]:
|
|
@@ -401,19 +509,9 @@ def compute_merwe_weights(
|
|
|
401
509
|
>>> np.isclose(W_m.sum(), 1.0)
|
|
402
510
|
True
|
|
403
511
|
"""
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
W_c = np.zeros(2 * n + 1)
|
|
408
|
-
|
|
409
|
-
W_m[0] = lam / (n + lam)
|
|
410
|
-
W_c[0] = lam / (n + lam) + (1 - alpha**2 + beta)
|
|
411
|
-
|
|
412
|
-
weight = 1 / (2 * (n + lam))
|
|
413
|
-
W_m[1:] = weight
|
|
414
|
-
W_c[1:] = weight
|
|
415
|
-
|
|
416
|
-
return W_m, W_c
|
|
512
|
+
# Use cached computation and convert to arrays
|
|
513
|
+
W_m_tuple, W_c_tuple = _compute_merwe_weights_cached(n, alpha, beta, kappa)
|
|
514
|
+
return np.array(W_m_tuple), np.array(W_c_tuple)
|
|
417
515
|
|
|
418
516
|
|
|
419
517
|
__all__ = [
|
pytcl/gpu/__init__.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GPU-accelerated algorithms for the Tracker Component Library.
|
|
3
|
+
|
|
4
|
+
This module provides GPU-accelerated implementations of key tracking algorithms
|
|
5
|
+
using CuPy (NVIDIA GPUs) or MLX (Apple Silicon). These implementations offer
|
|
6
|
+
significant speedups (5-15x) for batch processing of multiple tracks or large
|
|
7
|
+
particle sets.
|
|
8
|
+
|
|
9
|
+
The module automatically selects the best available backend:
|
|
10
|
+
- On Apple Silicon (M1/M2/M3): Uses MLX if installed
|
|
11
|
+
- On systems with NVIDIA GPUs: Uses CuPy if installed
|
|
12
|
+
- Falls back to CPU (numpy) if no GPU backend is available
|
|
13
|
+
|
|
14
|
+
The GPU implementations mirror the CPU API but accept GPU arrays and return
|
|
15
|
+
GPU arrays. Use the utility functions to seamlessly transfer data between
|
|
16
|
+
CPU and GPU.
|
|
17
|
+
|
|
18
|
+
Requirements
|
|
19
|
+
------------
|
|
20
|
+
For NVIDIA GPUs:
|
|
21
|
+
- CUDA-capable GPU
|
|
22
|
+
- CuPy >= 12.0
|
|
23
|
+
|
|
24
|
+
For Apple Silicon:
|
|
25
|
+
- macOS with Apple Silicon (M1, M2, M3, etc.)
|
|
26
|
+
- MLX >= 0.5.0
|
|
27
|
+
|
|
28
|
+
Installation
|
|
29
|
+
------------
|
|
30
|
+
For NVIDIA CUDA:
|
|
31
|
+
pip install pytcl[gpu]
|
|
32
|
+
# or directly:
|
|
33
|
+
pip install cupy-cuda12x # For CUDA 12.x
|
|
34
|
+
|
|
35
|
+
For Apple Silicon:
|
|
36
|
+
pip install pytcl[gpu-apple]
|
|
37
|
+
# or directly:
|
|
38
|
+
pip install mlx
|
|
39
|
+
|
|
40
|
+
Examples
|
|
41
|
+
--------
|
|
42
|
+
Basic usage with automatic backend selection:
|
|
43
|
+
|
|
44
|
+
>>> from pytcl.gpu import is_gpu_available, get_backend
|
|
45
|
+
>>> if is_gpu_available():
|
|
46
|
+
... print(f"GPU available, using {get_backend()} backend")
|
|
47
|
+
|
|
48
|
+
Check platform:
|
|
49
|
+
|
|
50
|
+
>>> from pytcl.gpu import is_apple_silicon, is_mlx_available
|
|
51
|
+
>>> if is_apple_silicon():
|
|
52
|
+
... print("Running on Apple Silicon")
|
|
53
|
+
>>> if is_mlx_available():
|
|
54
|
+
... print("MLX acceleration available")
|
|
55
|
+
|
|
56
|
+
Batch processing example:
|
|
57
|
+
|
|
58
|
+
>>> from pytcl.gpu import batch_kf_predict, to_gpu, to_cpu
|
|
59
|
+
>>> # Move data to GPU (automatically uses best backend)
|
|
60
|
+
>>> x_gpu = to_gpu(x_batch) # (n_tracks, state_dim)
|
|
61
|
+
>>> P_gpu = to_gpu(P_batch) # (n_tracks, state_dim, state_dim)
|
|
62
|
+
>>> # Batch prediction
|
|
63
|
+
>>> x_pred, P_pred = batch_kf_predict(x_gpu, P_gpu, F, Q)
|
|
64
|
+
>>> # Move results back to CPU
|
|
65
|
+
>>> x_pred_cpu = to_cpu(x_pred)
|
|
66
|
+
|
|
67
|
+
See Also
|
|
68
|
+
--------
|
|
69
|
+
pytcl.dynamic_estimation.kalman : CPU Kalman filter implementations
|
|
70
|
+
pytcl.dynamic_estimation.particle_filters : CPU particle filter implementations
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
from pytcl.gpu.utils import (
|
|
74
|
+
get_array_module,
|
|
75
|
+
get_backend,
|
|
76
|
+
is_apple_silicon,
|
|
77
|
+
is_cupy_available,
|
|
78
|
+
is_gpu_available,
|
|
79
|
+
is_mlx_available,
|
|
80
|
+
to_cpu,
|
|
81
|
+
to_gpu,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
__all__ = [
|
|
85
|
+
# Platform detection
|
|
86
|
+
"is_apple_silicon",
|
|
87
|
+
"is_mlx_available",
|
|
88
|
+
"is_cupy_available",
|
|
89
|
+
"get_backend",
|
|
90
|
+
# Availability check
|
|
91
|
+
"is_gpu_available",
|
|
92
|
+
# Utility functions
|
|
93
|
+
"get_array_module",
|
|
94
|
+
"to_gpu",
|
|
95
|
+
"to_cpu",
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# Lazy imports for GPU implementations (only loaded if CuPy is available)
|
|
100
|
+
def __getattr__(name: str) -> object:
|
|
101
|
+
"""Lazy import GPU implementations."""
|
|
102
|
+
if name in ("CuPyKalmanFilter", "batch_kf_predict", "batch_kf_update"):
|
|
103
|
+
from pytcl.gpu.kalman import CuPyKalmanFilter, batch_kf_predict, batch_kf_update
|
|
104
|
+
|
|
105
|
+
globals()[name] = locals()[name]
|
|
106
|
+
return locals()[name]
|
|
107
|
+
|
|
108
|
+
if name in ("CuPyExtendedKalmanFilter", "batch_ekf_predict", "batch_ekf_update"):
|
|
109
|
+
from pytcl.gpu.ekf import (
|
|
110
|
+
CuPyExtendedKalmanFilter,
|
|
111
|
+
batch_ekf_predict,
|
|
112
|
+
batch_ekf_update,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
globals()[name] = locals()[name]
|
|
116
|
+
return locals()[name]
|
|
117
|
+
|
|
118
|
+
if name in ("CuPyUnscentedKalmanFilter", "batch_ukf_predict", "batch_ukf_update"):
|
|
119
|
+
from pytcl.gpu.ukf import (
|
|
120
|
+
CuPyUnscentedKalmanFilter,
|
|
121
|
+
batch_ukf_predict,
|
|
122
|
+
batch_ukf_update,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
globals()[name] = locals()[name]
|
|
126
|
+
return locals()[name]
|
|
127
|
+
|
|
128
|
+
if name in (
|
|
129
|
+
"CuPyParticleFilter",
|
|
130
|
+
"gpu_resample_systematic",
|
|
131
|
+
"gpu_resample_multinomial",
|
|
132
|
+
):
|
|
133
|
+
from pytcl.gpu.particle_filter import (
|
|
134
|
+
CuPyParticleFilter,
|
|
135
|
+
gpu_resample_multinomial,
|
|
136
|
+
gpu_resample_systematic,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
globals()[name] = locals()[name]
|
|
140
|
+
return locals()[name]
|
|
141
|
+
|
|
142
|
+
if name in ("gpu_cholesky", "gpu_qr", "gpu_solve", "MemoryPool"):
|
|
143
|
+
from pytcl.gpu.matrix_utils import (
|
|
144
|
+
MemoryPool,
|
|
145
|
+
gpu_cholesky,
|
|
146
|
+
gpu_qr,
|
|
147
|
+
gpu_solve,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
globals()[name] = locals()[name]
|
|
151
|
+
return locals()[name]
|
|
152
|
+
|
|
153
|
+
raise AttributeError(f"module 'pytcl.gpu' has no attribute '{name}'")
|