nrl-tracker 1.9.2__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,491 @@
1
+ """
2
+ GPU matrix utilities for numerical linear algebra.
3
+
4
+ This module provides GPU-accelerated matrix operations commonly used in
5
+ tracking algorithms, including:
6
+ - Cholesky decomposition
7
+ - QR factorization
8
+ - Matrix inversion and solving
9
+ - Memory pool management
10
+
11
+ Examples
12
+ --------
13
+ >>> from pytcl.gpu.matrix_utils import gpu_cholesky, gpu_solve
14
+ >>> import numpy as np
15
+ >>>
16
+ >>> # Compute Cholesky decomposition on GPU
17
+ >>> A = np.eye(4) + np.random.randn(4, 4) * 0.1
18
+ >>> A = A @ A.T # Make positive definite
19
+ >>> L = gpu_cholesky(A)
20
+ >>>
21
+ >>> # Solve linear system
22
+ >>> b = np.random.randn(4)
23
+ >>> x = gpu_solve(A, b)
24
+ """
25
+
26
+ import logging
27
+ from contextlib import contextmanager
28
+ from typing import Any, Generator, Optional, Tuple
29
+
30
+ import numpy as np
31
+ from numpy.typing import ArrayLike, NDArray
32
+
33
+ from pytcl.core.optional_deps import import_optional, is_available, requires
34
+ from pytcl.gpu.utils import ensure_gpu_array
35
+
36
+ # Module logger
37
+ _logger = logging.getLogger("pytcl.gpu.matrix_utils")
38
+
39
+
40
+ @requires("cupy", extra="gpu", feature="GPU matrix utilities")
41
+ def gpu_cholesky(A: ArrayLike, lower: bool = True) -> NDArray[np.floating[Any]]:
42
+ """
43
+ GPU-accelerated Cholesky decomposition.
44
+
45
+ Computes L such that A = L @ L.T (lower=True) or A = U.T @ U (lower=False).
46
+
47
+ Parameters
48
+ ----------
49
+ A : array_like
50
+ Symmetric positive definite matrix, shape (n, n) or batch (k, n, n).
51
+ lower : bool
52
+ If True, return lower triangular. If False, return upper triangular.
53
+
54
+ Returns
55
+ -------
56
+ L : ndarray
57
+ Cholesky factor, same shape as A.
58
+
59
+ Raises
60
+ ------
61
+ numpy.linalg.LinAlgError
62
+ If matrix is not positive definite.
63
+
64
+ Examples
65
+ --------
66
+ >>> import numpy as np
67
+ >>> from pytcl.gpu.matrix_utils import gpu_cholesky
68
+ >>> A = np.array([[4, 2], [2, 3]])
69
+ >>> L = gpu_cholesky(A)
70
+ >>> np.allclose(L @ L.T, A)
71
+ True
72
+ """
73
+ cp = import_optional("cupy", extra="gpu", feature="GPU matrix utilities")
74
+
75
+ A_gpu = ensure_gpu_array(A, dtype=cp.float64)
76
+
77
+ L = cp.linalg.cholesky(A_gpu)
78
+
79
+ if not lower:
80
+ if A_gpu.ndim == 2:
81
+ L = L.T
82
+ else:
83
+ L = cp.swapaxes(L, -2, -1)
84
+
85
+ return L
86
+
87
+
88
+ @requires("cupy", extra="gpu", feature="GPU matrix utilities")
89
+ def gpu_cholesky_safe(
90
+ A: ArrayLike,
91
+ lower: bool = True,
92
+ regularization: float = 1e-10,
93
+ ) -> Tuple[NDArray[np.floating[Any]], bool]:
94
+ """
95
+ GPU Cholesky decomposition with fallback for non-positive-definite matrices.
96
+
97
+ If standard Cholesky fails, adds regularization to diagonal and retries.
98
+
99
+ Parameters
100
+ ----------
101
+ A : array_like
102
+ Symmetric matrix, shape (n, n) or batch (k, n, n).
103
+ lower : bool
104
+ Return lower (True) or upper (False) triangular factor.
105
+ regularization : float
106
+ Amount to add to diagonal if matrix is not positive definite.
107
+
108
+ Returns
109
+ -------
110
+ L : ndarray
111
+ Cholesky factor.
112
+ success : bool
113
+ True if succeeded without regularization.
114
+
115
+ Examples
116
+ --------
117
+ >>> import numpy as np
118
+ >>> from pytcl.gpu.matrix_utils import gpu_cholesky_safe
119
+ >>> A = np.array([[1, 2], [2, 1]]) # Not positive definite
120
+ >>> L, success = gpu_cholesky_safe(A)
121
+ >>> success
122
+ False
123
+ """
124
+ cp = import_optional("cupy", extra="gpu", feature="GPU matrix utilities")
125
+
126
+ A_gpu = ensure_gpu_array(A, dtype=cp.float64)
127
+
128
+ try:
129
+ L = cp.linalg.cholesky(A_gpu)
130
+ success = True
131
+ except cp.linalg.LinAlgError:
132
+ # Add regularization
133
+ if A_gpu.ndim == 2:
134
+ A_reg = A_gpu + regularization * cp.eye(A_gpu.shape[0], dtype=cp.float64)
135
+ else:
136
+ # Batch case
137
+ n = A_gpu.shape[-1]
138
+ eye = cp.eye(n, dtype=cp.float64)
139
+ A_reg = A_gpu + regularization * eye
140
+
141
+ L = cp.linalg.cholesky(A_reg)
142
+ success = False
143
+ _logger.warning("Cholesky decomposition required regularization")
144
+
145
+ if not lower:
146
+ if A_gpu.ndim == 2:
147
+ L = L.T
148
+ else:
149
+ L = cp.swapaxes(L, -2, -1)
150
+
151
+ return L, success
152
+
153
+
154
+ @requires("cupy", extra="gpu", feature="GPU matrix utilities")
155
+ def gpu_qr(
156
+ A: ArrayLike, mode: str = "reduced"
157
+ ) -> Tuple[NDArray[np.floating[Any]], NDArray[np.floating[Any]]]:
158
+ """
159
+ GPU-accelerated QR decomposition.
160
+
161
+ Computes A = Q @ R where Q is orthogonal and R is upper triangular.
162
+
163
+ Parameters
164
+ ----------
165
+ A : array_like
166
+ Matrix to decompose, shape (m, n) or batch (k, m, n).
167
+ mode : str
168
+ 'reduced' (default) or 'complete'.
169
+
170
+ Returns
171
+ -------
172
+ Q : ndarray
173
+ Orthogonal matrix.
174
+ R : ndarray
175
+ Upper triangular matrix.
176
+
177
+ Examples
178
+ --------
179
+ >>> import numpy as np
180
+ >>> from pytcl.gpu.matrix_utils import gpu_qr
181
+ >>> A = np.random.randn(4, 3)
182
+ >>> Q, R = gpu_qr(A)
183
+ >>> np.allclose(Q @ R, A)
184
+ True
185
+ """
186
+ cp = import_optional("cupy", extra="gpu", feature="GPU matrix utilities")
187
+
188
+ A_gpu = ensure_gpu_array(A, dtype=cp.float64)
189
+ Q, R = cp.linalg.qr(A_gpu, mode=mode)
190
+
191
+ return Q, R
192
+
193
+
194
+ @requires("cupy", extra="gpu", feature="GPU matrix utilities")
195
+ def gpu_solve(A: ArrayLike, b: ArrayLike) -> NDArray[np.floating[Any]]:
196
+ """
197
+ GPU-accelerated linear system solve.
198
+
199
+ Solves A @ x = b for x.
200
+
201
+ Parameters
202
+ ----------
203
+ A : array_like
204
+ Coefficient matrix, shape (n, n) or batch (k, n, n).
205
+ b : array_like
206
+ Right-hand side, shape (n,) or (n, m) or batch (k, n).
207
+
208
+ Returns
209
+ -------
210
+ x : ndarray
211
+ Solution vector/matrix.
212
+
213
+ Examples
214
+ --------
215
+ >>> import numpy as np
216
+ >>> from pytcl.gpu.matrix_utils import gpu_solve
217
+ >>> A = np.array([[3, 1], [1, 2]])
218
+ >>> b = np.array([9, 8])
219
+ >>> x = gpu_solve(A, b)
220
+ >>> np.allclose(A @ x, b)
221
+ True
222
+ """
223
+ cp = import_optional("cupy", extra="gpu", feature="GPU matrix utilities")
224
+
225
+ A_gpu = ensure_gpu_array(A, dtype=cp.float64)
226
+ b_gpu = ensure_gpu_array(b, dtype=cp.float64)
227
+
228
+ x = cp.linalg.solve(A_gpu, b_gpu)
229
+
230
+ return x
231
+
232
+
233
+ @requires("cupy", extra="gpu", feature="GPU matrix utilities")
234
+ def gpu_inv(A: ArrayLike) -> NDArray[np.floating[Any]]:
235
+ """
236
+ GPU-accelerated matrix inversion.
237
+
238
+ Parameters
239
+ ----------
240
+ A : array_like
241
+ Matrix to invert, shape (n, n) or batch (k, n, n).
242
+
243
+ Returns
244
+ -------
245
+ A_inv : ndarray
246
+ Inverse matrix.
247
+
248
+ Examples
249
+ --------
250
+ >>> import numpy as np
251
+ >>> from pytcl.gpu.matrix_utils import gpu_inv
252
+ >>> A = np.array([[1, 2], [3, 4]])
253
+ >>> A_inv = gpu_inv(A)
254
+ >>> np.allclose(A @ A_inv, np.eye(2))
255
+ True
256
+ """
257
+ cp = import_optional("cupy", extra="gpu", feature="GPU matrix utilities")
258
+
259
+ A_gpu = ensure_gpu_array(A, dtype=cp.float64)
260
+ A_inv = cp.linalg.inv(A_gpu)
261
+
262
+ return A_inv
263
+
264
+
265
+ @requires("cupy", extra="gpu", feature="GPU matrix utilities")
266
+ def gpu_eigh(
267
+ A: ArrayLike,
268
+ ) -> Tuple[NDArray[np.floating[Any]], NDArray[np.floating[Any]]]:
269
+ """
270
+ GPU-accelerated eigendecomposition for symmetric matrices.
271
+
272
+ Computes eigenvalues and eigenvectors of symmetric matrix A.
273
+
274
+ Parameters
275
+ ----------
276
+ A : array_like
277
+ Symmetric matrix, shape (n, n) or batch (k, n, n).
278
+
279
+ Returns
280
+ -------
281
+ eigenvalues : ndarray
282
+ Eigenvalues in ascending order.
283
+ eigenvectors : ndarray
284
+ Corresponding eigenvectors as columns.
285
+
286
+ Examples
287
+ --------
288
+ >>> import numpy as np
289
+ >>> from pytcl.gpu.matrix_utils import gpu_eigh
290
+ >>> A = np.array([[2, 1], [1, 2]])
291
+ >>> eigvals, eigvecs = gpu_eigh(A)
292
+ >>> eigvals
293
+ array([1., 3.])
294
+ """
295
+ cp = import_optional("cupy", extra="gpu", feature="GPU matrix utilities")
296
+
297
+ A_gpu = ensure_gpu_array(A, dtype=cp.float64)
298
+ eigvals, eigvecs = cp.linalg.eigh(A_gpu)
299
+
300
+ return eigvals, eigvecs
301
+
302
+
303
+ @requires("cupy", extra="gpu", feature="GPU matrix utilities")
304
+ def gpu_matrix_sqrt(A: ArrayLike) -> NDArray[np.floating[Any]]:
305
+ """
306
+ GPU-accelerated matrix square root for positive definite matrices.
307
+
308
+ Computes S such that S @ S = A using eigendecomposition.
309
+
310
+ Parameters
311
+ ----------
312
+ A : array_like
313
+ Symmetric positive definite matrix.
314
+
315
+ Returns
316
+ -------
317
+ S : ndarray
318
+ Matrix square root.
319
+
320
+ Examples
321
+ --------
322
+ >>> import numpy as np
323
+ >>> from pytcl.gpu.matrix_utils import gpu_matrix_sqrt
324
+ >>> A = np.array([[4, 0], [0, 9]])
325
+ >>> S = gpu_matrix_sqrt(A)
326
+ >>> np.allclose(S @ S, A)
327
+ True
328
+ """
329
+ cp = import_optional("cupy", extra="gpu", feature="GPU matrix utilities")
330
+
331
+ A_gpu = ensure_gpu_array(A, dtype=cp.float64)
332
+
333
+ # Eigendecomposition
334
+ eigvals, eigvecs = cp.linalg.eigh(A_gpu)
335
+
336
+ # Ensure non-negative eigenvalues
337
+ eigvals = cp.maximum(eigvals, 0)
338
+
339
+ # Compute sqrt
340
+ sqrt_eigvals = cp.sqrt(eigvals)
341
+
342
+ # Reconstruct: S = V @ diag(sqrt(lambda)) @ V'
343
+ if A_gpu.ndim == 2:
344
+ S = eigvecs @ cp.diag(sqrt_eigvals) @ eigvecs.T
345
+ else:
346
+ # Batch case
347
+ S = cp.einsum("...ij,...j,...kj->...ik", eigvecs, sqrt_eigvals, eigvecs)
348
+
349
+ return S
350
+
351
+
352
+ class MemoryPool:
353
+ """
354
+ GPU memory pool manager for efficient memory allocation.
355
+
356
+ This class provides convenient access to CuPy's memory pool
357
+ with additional monitoring and management utilities.
358
+
359
+ Examples
360
+ --------
361
+ >>> from pytcl.gpu.matrix_utils import MemoryPool
362
+ >>> pool = MemoryPool()
363
+ >>> print(pool.get_stats())
364
+ {'used': 0, 'total': 0, 'free': ...}
365
+ >>>
366
+ >>> # Allocate some arrays
367
+ >>> import cupy as cp
368
+ >>> x = cp.zeros((1000, 1000))
369
+ >>> print(pool.get_stats())
370
+ {'used': 8000000, ...}
371
+ >>>
372
+ >>> # Free cached memory
373
+ >>> pool.free_all()
374
+ """
375
+
376
+ def __init__(self) -> None:
377
+ """Initialize memory pool manager."""
378
+ if not is_available("cupy"):
379
+ _logger.warning("CuPy not available, MemoryPool is a no-op")
380
+ self._pool = None
381
+ self._pinned_pool = None
382
+ else:
383
+ import cupy as cp
384
+
385
+ self._pool = cp.get_default_memory_pool()
386
+ self._pinned_pool = cp.get_default_pinned_memory_pool()
387
+
388
+ def get_stats(self) -> dict[str, int]:
389
+ """
390
+ Get memory pool statistics.
391
+
392
+ Returns
393
+ -------
394
+ stats : dict
395
+ Dictionary with 'used', 'total', and 'free' bytes.
396
+ """
397
+ if self._pool is None:
398
+ return {"used": 0, "total": 0, "free": 0}
399
+
400
+ import cupy as cp
401
+
402
+ free, total = cp.cuda.Device().mem_info
403
+
404
+ return {
405
+ "used": self._pool.used_bytes(),
406
+ "total": self._pool.total_bytes(),
407
+ "free": free,
408
+ "device_total": total,
409
+ }
410
+
411
+ def free_all(self) -> None:
412
+ """Free all cached memory blocks."""
413
+ if self._pool is not None:
414
+ self._pool.free_all_blocks()
415
+ if self._pinned_pool is not None:
416
+ self._pinned_pool.free_all_blocks()
417
+
418
+ def set_limit(self, limit: Optional[int] = None) -> None:
419
+ """
420
+ Set memory pool limit.
421
+
422
+ Parameters
423
+ ----------
424
+ limit : int or None
425
+ Maximum bytes to allocate. None for no limit.
426
+ """
427
+ if self._pool is not None:
428
+ if limit is None:
429
+ self._pool.set_limit(size=0) # 0 means no limit
430
+ else:
431
+ self._pool.set_limit(size=limit)
432
+
433
+ @contextmanager
434
+ def limit_memory(self, max_bytes: int) -> Generator[None, None, None]:
435
+ """
436
+ Context manager for temporary memory limit.
437
+
438
+ Parameters
439
+ ----------
440
+ max_bytes : int
441
+ Maximum bytes allowed during context.
442
+
443
+ Examples
444
+ --------
445
+ >>> pool = MemoryPool()
446
+ >>> with pool.limit_memory(1e9): # 1GB limit
447
+ ... # Operations here have limited memory
448
+ ... pass
449
+ """
450
+ if self._pool is None:
451
+ yield
452
+ return
453
+
454
+ old_limit = self._pool.get_limit()
455
+ self._pool.set_limit(size=max_bytes)
456
+ try:
457
+ yield
458
+ finally:
459
+ self._pool.set_limit(size=old_limit)
460
+
461
+
462
+ # Global memory pool instance
463
+ _memory_pool: Optional[MemoryPool] = None
464
+
465
+
466
+ def get_memory_pool() -> MemoryPool:
467
+ """
468
+ Get the global GPU memory pool manager.
469
+
470
+ Returns
471
+ -------
472
+ pool : MemoryPool
473
+ Global memory pool instance.
474
+ """
475
+ global _memory_pool
476
+ if _memory_pool is None:
477
+ _memory_pool = MemoryPool()
478
+ return _memory_pool
479
+
480
+
481
+ __all__ = [
482
+ "gpu_cholesky",
483
+ "gpu_cholesky_safe",
484
+ "gpu_qr",
485
+ "gpu_solve",
486
+ "gpu_inv",
487
+ "gpu_eigh",
488
+ "gpu_matrix_sqrt",
489
+ "MemoryPool",
490
+ "get_memory_pool",
491
+ ]