nrl-tracker 1.9.2__py3-none-any.whl → 1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/METADATA +49 -4
- {nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/RECORD +19 -12
- pytcl/__init__.py +3 -3
- pytcl/assignment_algorithms/nd_assignment.py +359 -1
- pytcl/coordinate_systems/jacobians/jacobians.py +63 -33
- pytcl/core/optional_deps.py +20 -0
- pytcl/dynamic_estimation/kalman/matrix_utils.py +133 -35
- pytcl/gpu/__init__.py +153 -0
- pytcl/gpu/ekf.py +433 -0
- pytcl/gpu/kalman.py +543 -0
- pytcl/gpu/matrix_utils.py +491 -0
- pytcl/gpu/particle_filter.py +578 -0
- pytcl/gpu/ukf.py +476 -0
- pytcl/gpu/utils.py +582 -0
- pytcl/gravity/clenshaw.py +8 -0
- pytcl/gravity/spherical_harmonics.py +17 -10
- {nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/LICENSE +0 -0
- {nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/WHEEL +0 -0
- {nrl_tracker-1.9.2.dist-info → nrl_tracker-1.11.0.dist-info}/top_level.txt +0 -0
pytcl/gpu/utils.py
ADDED
|
@@ -0,0 +1,582 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GPU utility functions for array management and device detection.
|
|
3
|
+
|
|
4
|
+
This module provides utilities for:
|
|
5
|
+
- Checking GPU availability (CUDA via CuPy or Apple Silicon via MLX)
|
|
6
|
+
- Transferring arrays between CPU and GPU
|
|
7
|
+
- Getting the appropriate array module (numpy, cupy, or mlx)
|
|
8
|
+
- Memory management
|
|
9
|
+
- Automatic backend selection based on platform
|
|
10
|
+
|
|
11
|
+
The module automatically selects the appropriate backend:
|
|
12
|
+
- On Apple Silicon (M1/M2/M3): Uses MLX if available
|
|
13
|
+
- On systems with NVIDIA GPUs: Uses CuPy if available
|
|
14
|
+
- Falls back to CPU (numpy) if no GPU backend is available
|
|
15
|
+
|
|
16
|
+
Examples
|
|
17
|
+
--------
|
|
18
|
+
>>> from pytcl.gpu.utils import is_gpu_available, to_gpu, to_cpu
|
|
19
|
+
>>> if is_gpu_available():
|
|
20
|
+
... x_gpu = to_gpu(x_numpy)
|
|
21
|
+
... # ... perform GPU operations ...
|
|
22
|
+
... x_cpu = to_cpu(x_gpu)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import logging
|
|
26
|
+
import platform
|
|
27
|
+
from functools import lru_cache
|
|
28
|
+
from typing import Any, Literal, Union
|
|
29
|
+
|
|
30
|
+
import numpy as np
|
|
31
|
+
from numpy.typing import ArrayLike, NDArray
|
|
32
|
+
|
|
33
|
+
from pytcl.core.optional_deps import is_available
|
|
34
|
+
|
|
35
|
+
# Module logger
|
|
36
|
+
_logger = logging.getLogger("pytcl.gpu.utils")
|
|
37
|
+
|
|
38
|
+
# Type alias for arrays that could be numpy, cupy, or mlx
|
|
39
|
+
GPUArray = Any # Would be cp.ndarray or mx.array if backend is available
|
|
40
|
+
|
|
41
|
+
# Backend type
|
|
42
|
+
BackendType = Literal["cupy", "mlx", "numpy"]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@lru_cache(maxsize=1)
|
|
46
|
+
def is_apple_silicon() -> bool:
|
|
47
|
+
"""
|
|
48
|
+
Check if running on Apple Silicon (ARM64 Mac).
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
bool
|
|
53
|
+
True if running on Apple Silicon (M1, M2, M3, etc.).
|
|
54
|
+
|
|
55
|
+
Examples
|
|
56
|
+
--------
|
|
57
|
+
>>> from pytcl.gpu.utils import is_apple_silicon
|
|
58
|
+
>>> if is_apple_silicon():
|
|
59
|
+
... print("Running on Apple Silicon")
|
|
60
|
+
"""
|
|
61
|
+
return platform.system() == "Darwin" and platform.machine() == "arm64"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@lru_cache(maxsize=1)
|
|
65
|
+
def is_mlx_available() -> bool:
|
|
66
|
+
"""
|
|
67
|
+
Check if MLX acceleration is available (Apple Silicon).
|
|
68
|
+
|
|
69
|
+
Returns True if:
|
|
70
|
+
- Running on Apple Silicon (ARM64 Mac)
|
|
71
|
+
- MLX is installed
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
bool
|
|
76
|
+
True if MLX acceleration is available.
|
|
77
|
+
|
|
78
|
+
Examples
|
|
79
|
+
--------
|
|
80
|
+
>>> from pytcl.gpu.utils import is_mlx_available
|
|
81
|
+
>>> if is_mlx_available():
|
|
82
|
+
... print("MLX acceleration enabled")
|
|
83
|
+
"""
|
|
84
|
+
if not is_apple_silicon():
|
|
85
|
+
_logger.debug("Not on Apple Silicon, MLX not applicable")
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
if not is_available("mlx"):
|
|
89
|
+
_logger.debug("MLX not installed")
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
import mlx.core as mx
|
|
94
|
+
|
|
95
|
+
# Verify MLX works by creating a simple array
|
|
96
|
+
_ = mx.array([1.0, 2.0, 3.0])
|
|
97
|
+
_logger.info("MLX available on Apple Silicon")
|
|
98
|
+
return True
|
|
99
|
+
except Exception as e:
|
|
100
|
+
_logger.debug("MLX not functional: %s", e)
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@lru_cache(maxsize=1)
|
|
105
|
+
def is_cupy_available() -> bool:
|
|
106
|
+
"""
|
|
107
|
+
Check if CuPy (CUDA) acceleration is available.
|
|
108
|
+
|
|
109
|
+
Returns True if:
|
|
110
|
+
- CuPy is installed
|
|
111
|
+
- A CUDA-capable GPU is detected
|
|
112
|
+
- CUDA runtime is functional
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
bool
|
|
117
|
+
True if CuPy acceleration is available.
|
|
118
|
+
"""
|
|
119
|
+
if not is_available("cupy"):
|
|
120
|
+
_logger.debug("CuPy not installed")
|
|
121
|
+
return False
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
import cupy as cp
|
|
125
|
+
|
|
126
|
+
# Try to access a GPU device
|
|
127
|
+
device = cp.cuda.Device(0)
|
|
128
|
+
_ = device.compute_capability
|
|
129
|
+
_logger.info("CuPy available: %s", device.pci_bus_id)
|
|
130
|
+
return True
|
|
131
|
+
except Exception as e:
|
|
132
|
+
_logger.debug("CuPy/CUDA not available: %s", e)
|
|
133
|
+
return False
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@lru_cache(maxsize=1)
|
|
137
|
+
def get_backend() -> BackendType:
|
|
138
|
+
"""
|
|
139
|
+
Get the best available GPU backend for the current platform.
|
|
140
|
+
|
|
141
|
+
Priority:
|
|
142
|
+
1. MLX on Apple Silicon
|
|
143
|
+
2. CuPy on systems with NVIDIA GPUs
|
|
144
|
+
3. numpy (CPU fallback)
|
|
145
|
+
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
str
|
|
149
|
+
One of "mlx", "cupy", or "numpy".
|
|
150
|
+
|
|
151
|
+
Examples
|
|
152
|
+
--------
|
|
153
|
+
>>> from pytcl.gpu.utils import get_backend
|
|
154
|
+
>>> backend = get_backend()
|
|
155
|
+
>>> print(f"Using {backend} backend")
|
|
156
|
+
"""
|
|
157
|
+
if is_apple_silicon() and is_mlx_available():
|
|
158
|
+
return "mlx"
|
|
159
|
+
elif is_cupy_available():
|
|
160
|
+
return "cupy"
|
|
161
|
+
else:
|
|
162
|
+
return "numpy"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@lru_cache(maxsize=1)
|
|
166
|
+
def is_gpu_available() -> bool:
|
|
167
|
+
"""
|
|
168
|
+
Check if GPU acceleration is available.
|
|
169
|
+
|
|
170
|
+
Returns True if either:
|
|
171
|
+
- MLX is available (Apple Silicon)
|
|
172
|
+
- CuPy is available with a CUDA GPU
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
bool
|
|
177
|
+
True if GPU acceleration is available.
|
|
178
|
+
|
|
179
|
+
Examples
|
|
180
|
+
--------
|
|
181
|
+
>>> from pytcl.gpu.utils import is_gpu_available
|
|
182
|
+
>>> if is_gpu_available():
|
|
183
|
+
... print("GPU acceleration enabled")
|
|
184
|
+
... else:
|
|
185
|
+
... print("Falling back to CPU")
|
|
186
|
+
|
|
187
|
+
Notes
|
|
188
|
+
-----
|
|
189
|
+
The result is cached after the first call for performance.
|
|
190
|
+
Use `get_backend()` to determine which backend is being used.
|
|
191
|
+
"""
|
|
192
|
+
return is_mlx_available() or is_cupy_available()
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def get_array_module(arr: ArrayLike) -> Any:
|
|
196
|
+
"""
|
|
197
|
+
Get the array module (numpy, cupy, or mlx.core) for the given array.
|
|
198
|
+
|
|
199
|
+
This function enables writing code that works with numpy, cupy, and mlx
|
|
200
|
+
arrays by returning the appropriate module.
|
|
201
|
+
|
|
202
|
+
Parameters
|
|
203
|
+
----------
|
|
204
|
+
arr : array_like
|
|
205
|
+
Input array (numpy, cupy, or mlx).
|
|
206
|
+
|
|
207
|
+
Returns
|
|
208
|
+
-------
|
|
209
|
+
module
|
|
210
|
+
numpy, cupy, or mlx.core module, depending on the input array type.
|
|
211
|
+
|
|
212
|
+
Examples
|
|
213
|
+
--------
|
|
214
|
+
>>> import numpy as np
|
|
215
|
+
>>> from pytcl.gpu.utils import get_array_module
|
|
216
|
+
>>> x = np.array([1, 2, 3])
|
|
217
|
+
>>> xp = get_array_module(x)
|
|
218
|
+
>>> xp is np
|
|
219
|
+
True
|
|
220
|
+
|
|
221
|
+
>>> # With CuPy array
|
|
222
|
+
>>> import cupy as cp
|
|
223
|
+
>>> x_gpu = cp.array([1, 2, 3])
|
|
224
|
+
>>> xp = get_array_module(x_gpu)
|
|
225
|
+
>>> xp is cp
|
|
226
|
+
True
|
|
227
|
+
|
|
228
|
+
>>> # With MLX array
|
|
229
|
+
>>> import mlx.core as mx
|
|
230
|
+
>>> x_mlx = mx.array([1, 2, 3])
|
|
231
|
+
>>> xp = get_array_module(x_mlx)
|
|
232
|
+
>>> xp.__name__
|
|
233
|
+
'mlx.core'
|
|
234
|
+
"""
|
|
235
|
+
# Check for MLX array first
|
|
236
|
+
if is_available("mlx"):
|
|
237
|
+
import mlx.core as mx
|
|
238
|
+
|
|
239
|
+
if isinstance(arr, mx.array):
|
|
240
|
+
return mx
|
|
241
|
+
|
|
242
|
+
# Check for CuPy array
|
|
243
|
+
if is_available("cupy"):
|
|
244
|
+
import cupy as cp
|
|
245
|
+
|
|
246
|
+
if isinstance(arr, cp.ndarray):
|
|
247
|
+
return cp
|
|
248
|
+
|
|
249
|
+
return np
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def to_gpu(arr: ArrayLike, dtype: Any = None, backend: BackendType = None) -> GPUArray:
|
|
253
|
+
"""
|
|
254
|
+
Transfer an array to GPU memory.
|
|
255
|
+
|
|
256
|
+
Automatically selects the best available backend (MLX on Apple Silicon,
|
|
257
|
+
CuPy on NVIDIA GPUs) unless a specific backend is requested.
|
|
258
|
+
|
|
259
|
+
Parameters
|
|
260
|
+
----------
|
|
261
|
+
arr : array_like
|
|
262
|
+
Input array (typically numpy).
|
|
263
|
+
dtype : dtype, optional
|
|
264
|
+
Data type for the GPU array. If None, uses the input dtype.
|
|
265
|
+
backend : str, optional
|
|
266
|
+
Specific backend to use ("mlx", "cupy"). If None, auto-selects.
|
|
267
|
+
|
|
268
|
+
Returns
|
|
269
|
+
-------
|
|
270
|
+
GPUArray
|
|
271
|
+
Array in GPU memory (cupy.ndarray or mlx.array).
|
|
272
|
+
|
|
273
|
+
Raises
|
|
274
|
+
------
|
|
275
|
+
DependencyError
|
|
276
|
+
If required backend is not installed.
|
|
277
|
+
RuntimeError
|
|
278
|
+
If no GPU is available.
|
|
279
|
+
|
|
280
|
+
Examples
|
|
281
|
+
--------
|
|
282
|
+
>>> import numpy as np
|
|
283
|
+
>>> from pytcl.gpu.utils import to_gpu, is_gpu_available
|
|
284
|
+
>>> x = np.array([1.0, 2.0, 3.0])
|
|
285
|
+
>>> if is_gpu_available():
|
|
286
|
+
... x_gpu = to_gpu(x)
|
|
287
|
+
... print(type(x_gpu).__name__)
|
|
288
|
+
'ndarray' # cupy.ndarray or 'array' for mlx
|
|
289
|
+
|
|
290
|
+
Notes
|
|
291
|
+
-----
|
|
292
|
+
If the input is already a GPU array, it is returned as-is (or converted
|
|
293
|
+
to the requested dtype).
|
|
294
|
+
"""
|
|
295
|
+
from pytcl.core.optional_deps import import_optional
|
|
296
|
+
|
|
297
|
+
if not is_gpu_available():
|
|
298
|
+
raise RuntimeError(
|
|
299
|
+
"No GPU available. Check CUDA installation or MLX availability."
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Determine backend
|
|
303
|
+
if backend is None:
|
|
304
|
+
backend = get_backend()
|
|
305
|
+
|
|
306
|
+
# Use MLX backend
|
|
307
|
+
if backend == "mlx":
|
|
308
|
+
mx = import_optional(
|
|
309
|
+
"mlx.core",
|
|
310
|
+
package="mlx",
|
|
311
|
+
extra="gpu-apple",
|
|
312
|
+
feature="Apple Silicon GPU acceleration",
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# If already an MLX array
|
|
316
|
+
if isinstance(arr, mx.array):
|
|
317
|
+
if dtype is not None:
|
|
318
|
+
# MLX uses different dtype handling
|
|
319
|
+
return arr.astype(_numpy_dtype_to_mlx(mx, dtype))
|
|
320
|
+
return arr
|
|
321
|
+
|
|
322
|
+
# Convert to numpy first if needed
|
|
323
|
+
arr_np = np.asarray(arr)
|
|
324
|
+
if dtype is not None:
|
|
325
|
+
arr_np = arr_np.astype(dtype)
|
|
326
|
+
|
|
327
|
+
return mx.array(arr_np)
|
|
328
|
+
|
|
329
|
+
# Use CuPy backend
|
|
330
|
+
else:
|
|
331
|
+
cp = import_optional("cupy", extra="gpu", feature="GPU acceleration")
|
|
332
|
+
|
|
333
|
+
# If already a CuPy array
|
|
334
|
+
if isinstance(arr, cp.ndarray):
|
|
335
|
+
if dtype is not None and arr.dtype != dtype:
|
|
336
|
+
return arr.astype(dtype)
|
|
337
|
+
return arr
|
|
338
|
+
|
|
339
|
+
# Convert to numpy first if needed
|
|
340
|
+
arr_np = np.asarray(arr)
|
|
341
|
+
if dtype is not None:
|
|
342
|
+
arr_np = arr_np.astype(dtype)
|
|
343
|
+
|
|
344
|
+
return cp.asarray(arr_np)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def _numpy_dtype_to_mlx(mx: Any, dtype: Any) -> Any:
|
|
348
|
+
"""Convert numpy dtype to MLX dtype."""
|
|
349
|
+
dtype_map = {
|
|
350
|
+
np.float32: mx.float32,
|
|
351
|
+
np.float64: mx.float32, # MLX prefers float32
|
|
352
|
+
np.int32: mx.int32,
|
|
353
|
+
np.int64: mx.int64,
|
|
354
|
+
np.bool_: mx.bool_,
|
|
355
|
+
}
|
|
356
|
+
if hasattr(dtype, "type"):
|
|
357
|
+
dtype = dtype.type
|
|
358
|
+
return dtype_map.get(dtype, mx.float32)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def to_cpu(arr: Union[ArrayLike, GPUArray]) -> NDArray[np.floating]:
|
|
362
|
+
"""
|
|
363
|
+
Transfer an array from GPU to CPU memory.
|
|
364
|
+
|
|
365
|
+
Parameters
|
|
366
|
+
----------
|
|
367
|
+
arr : array_like, cupy.ndarray, or mlx.array
|
|
368
|
+
Input array (numpy, cupy, or mlx).
|
|
369
|
+
|
|
370
|
+
Returns
|
|
371
|
+
-------
|
|
372
|
+
numpy.ndarray
|
|
373
|
+
Array in CPU memory.
|
|
374
|
+
|
|
375
|
+
Examples
|
|
376
|
+
--------
|
|
377
|
+
>>> import numpy as np
|
|
378
|
+
>>> from pytcl.gpu.utils import to_gpu, to_cpu, is_gpu_available
|
|
379
|
+
>>> x = np.array([1.0, 2.0, 3.0])
|
|
380
|
+
>>> if is_gpu_available():
|
|
381
|
+
... x_gpu = to_gpu(x)
|
|
382
|
+
... x_cpu = to_cpu(x_gpu)
|
|
383
|
+
... np.allclose(x, x_cpu)
|
|
384
|
+
True
|
|
385
|
+
|
|
386
|
+
Notes
|
|
387
|
+
-----
|
|
388
|
+
If the input is already a numpy array, it is returned as-is.
|
|
389
|
+
"""
|
|
390
|
+
# Already numpy
|
|
391
|
+
if isinstance(arr, np.ndarray):
|
|
392
|
+
return arr
|
|
393
|
+
|
|
394
|
+
# Check if it's an MLX array
|
|
395
|
+
if is_available("mlx"):
|
|
396
|
+
import mlx.core as mx
|
|
397
|
+
|
|
398
|
+
if isinstance(arr, mx.array):
|
|
399
|
+
return np.array(arr)
|
|
400
|
+
|
|
401
|
+
# Check if it's a CuPy array
|
|
402
|
+
if is_available("cupy"):
|
|
403
|
+
import cupy as cp
|
|
404
|
+
|
|
405
|
+
if isinstance(arr, cp.ndarray):
|
|
406
|
+
return cp.asnumpy(arr)
|
|
407
|
+
|
|
408
|
+
# Fallback: convert via numpy
|
|
409
|
+
return np.asarray(arr)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def ensure_gpu_array(
|
|
413
|
+
arr: ArrayLike,
|
|
414
|
+
dtype: Any = np.float64,
|
|
415
|
+
backend: BackendType = None,
|
|
416
|
+
) -> GPUArray:
|
|
417
|
+
"""
|
|
418
|
+
Ensure an array is on the GPU with the specified dtype.
|
|
419
|
+
|
|
420
|
+
Parameters
|
|
421
|
+
----------
|
|
422
|
+
arr : array_like
|
|
423
|
+
Input array.
|
|
424
|
+
dtype : dtype
|
|
425
|
+
Desired data type.
|
|
426
|
+
backend : str, optional
|
|
427
|
+
Specific backend to use ("mlx", "cupy"). If None, auto-selects.
|
|
428
|
+
|
|
429
|
+
Returns
|
|
430
|
+
-------
|
|
431
|
+
GPUArray
|
|
432
|
+
Array on GPU with specified dtype (cupy.ndarray or mlx.array).
|
|
433
|
+
"""
|
|
434
|
+
gpu_arr = to_gpu(arr, backend=backend)
|
|
435
|
+
|
|
436
|
+
# MLX doesn't support float64 well, use float32
|
|
437
|
+
if backend == "mlx" or (backend is None and get_backend() == "mlx"):
|
|
438
|
+
if dtype == np.float64:
|
|
439
|
+
dtype = np.float32
|
|
440
|
+
|
|
441
|
+
if hasattr(gpu_arr, "dtype") and gpu_arr.dtype != dtype:
|
|
442
|
+
if get_backend() == "mlx":
|
|
443
|
+
import mlx.core as mx
|
|
444
|
+
|
|
445
|
+
gpu_arr = gpu_arr.astype(_numpy_dtype_to_mlx(mx, dtype))
|
|
446
|
+
else:
|
|
447
|
+
gpu_arr = gpu_arr.astype(dtype)
|
|
448
|
+
return gpu_arr
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def sync_gpu() -> None:
|
|
452
|
+
"""
|
|
453
|
+
Synchronize GPU operations.
|
|
454
|
+
|
|
455
|
+
This blocks until all pending GPU operations are complete.
|
|
456
|
+
Useful for accurate timing measurements.
|
|
457
|
+
|
|
458
|
+
Examples
|
|
459
|
+
--------
|
|
460
|
+
>>> import time
|
|
461
|
+
>>> from pytcl.gpu.utils import sync_gpu, is_gpu_available
|
|
462
|
+
>>> if is_gpu_available():
|
|
463
|
+
... # ... perform GPU operations ...
|
|
464
|
+
... sync_gpu() # Wait for completion
|
|
465
|
+
... elapsed = time.time() - start
|
|
466
|
+
"""
|
|
467
|
+
backend = get_backend()
|
|
468
|
+
|
|
469
|
+
if backend == "mlx":
|
|
470
|
+
import mlx.core as mx
|
|
471
|
+
|
|
472
|
+
mx.eval() # MLX uses lazy evaluation, eval() forces execution
|
|
473
|
+
elif backend == "cupy":
|
|
474
|
+
import cupy as cp
|
|
475
|
+
|
|
476
|
+
cp.cuda.Stream.null.synchronize()
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def get_gpu_memory_info() -> dict[str, Union[str, int]]:
|
|
480
|
+
"""
|
|
481
|
+
Get GPU memory usage information.
|
|
482
|
+
|
|
483
|
+
Returns
|
|
484
|
+
-------
|
|
485
|
+
dict
|
|
486
|
+
Dictionary with keys:
|
|
487
|
+
- 'backend': Backend in use ("mlx", "cupy", or "numpy")
|
|
488
|
+
- 'free': Free memory in bytes (if available)
|
|
489
|
+
- 'total': Total memory in bytes (if available)
|
|
490
|
+
- 'used': Used memory in bytes (if available)
|
|
491
|
+
|
|
492
|
+
Examples
|
|
493
|
+
--------
|
|
494
|
+
>>> from pytcl.gpu.utils import get_gpu_memory_info, is_gpu_available
|
|
495
|
+
>>> if is_gpu_available():
|
|
496
|
+
... info = get_gpu_memory_info()
|
|
497
|
+
... print(f"Backend: {info['backend']}")
|
|
498
|
+
"""
|
|
499
|
+
backend = get_backend()
|
|
500
|
+
|
|
501
|
+
if backend == "numpy":
|
|
502
|
+
return {"backend": "numpy", "free": 0, "total": 0, "used": 0}
|
|
503
|
+
|
|
504
|
+
if backend == "mlx":
|
|
505
|
+
# MLX doesn't expose memory info directly, but we can get device info
|
|
506
|
+
import mlx.core as mx
|
|
507
|
+
|
|
508
|
+
device = mx.default_device()
|
|
509
|
+
return {
|
|
510
|
+
"backend": "mlx",
|
|
511
|
+
"device": str(device),
|
|
512
|
+
"free": -1, # Not available
|
|
513
|
+
"total": -1, # Not available
|
|
514
|
+
"used": -1, # Not available
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
# CuPy backend
|
|
518
|
+
import cupy as cp
|
|
519
|
+
|
|
520
|
+
mempool = cp.get_default_memory_pool()
|
|
521
|
+
free, total = cp.cuda.Device().mem_info
|
|
522
|
+
|
|
523
|
+
return {
|
|
524
|
+
"backend": "cupy",
|
|
525
|
+
"free": free,
|
|
526
|
+
"total": total,
|
|
527
|
+
"used": total - free,
|
|
528
|
+
"pool_used": mempool.used_bytes(),
|
|
529
|
+
"pool_total": mempool.total_bytes(),
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def clear_gpu_memory() -> None:
|
|
534
|
+
"""
|
|
535
|
+
Clear GPU memory pools.
|
|
536
|
+
|
|
537
|
+
This frees cached memory blocks held by the GPU backend.
|
|
538
|
+
Call this when you need to free GPU memory for other operations.
|
|
539
|
+
|
|
540
|
+
Examples
|
|
541
|
+
--------
|
|
542
|
+
>>> from pytcl.gpu.utils import clear_gpu_memory, is_gpu_available
|
|
543
|
+
>>> if is_gpu_available():
|
|
544
|
+
... # ... perform GPU operations ...
|
|
545
|
+
... clear_gpu_memory() # Free cached memory
|
|
546
|
+
"""
|
|
547
|
+
backend = get_backend()
|
|
548
|
+
|
|
549
|
+
if backend == "mlx":
|
|
550
|
+
import mlx.core as mx
|
|
551
|
+
|
|
552
|
+
# MLX has automatic memory management, but we can force a sync
|
|
553
|
+
mx.eval()
|
|
554
|
+
# Note: MLX doesn't have explicit memory pool clearing like CuPy
|
|
555
|
+
elif backend == "cupy":
|
|
556
|
+
import cupy as cp
|
|
557
|
+
|
|
558
|
+
mempool = cp.get_default_memory_pool()
|
|
559
|
+
mempool.free_all_blocks()
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
__all__ = [
|
|
563
|
+
# Platform detection
|
|
564
|
+
"is_apple_silicon",
|
|
565
|
+
"is_mlx_available",
|
|
566
|
+
"is_cupy_available",
|
|
567
|
+
"get_backend",
|
|
568
|
+
# Availability check
|
|
569
|
+
"is_gpu_available",
|
|
570
|
+
# Array operations
|
|
571
|
+
"get_array_module",
|
|
572
|
+
"to_gpu",
|
|
573
|
+
"to_cpu",
|
|
574
|
+
"ensure_gpu_array",
|
|
575
|
+
# Synchronization and memory
|
|
576
|
+
"sync_gpu",
|
|
577
|
+
"get_gpu_memory_info",
|
|
578
|
+
"clear_gpu_memory",
|
|
579
|
+
# Type hints
|
|
580
|
+
"GPUArray",
|
|
581
|
+
"BackendType",
|
|
582
|
+
]
|
pytcl/gravity/clenshaw.py
CHANGED
|
@@ -8,6 +8,11 @@ Legendre functions which can overflow at high degrees.
|
|
|
8
8
|
This implementation follows Holmes & Featherstone (2002) for numerical
|
|
9
9
|
stability at ultra-high degrees (n > 2000).
|
|
10
10
|
|
|
11
|
+
Performance Notes
|
|
12
|
+
-----------------
|
|
13
|
+
Recursion coefficients (_a_nm, _b_nm) are cached using lru_cache for
|
|
14
|
+
25-40% speedup on repeated evaluations with the same (n, m) pairs.
|
|
15
|
+
|
|
11
16
|
References
|
|
12
17
|
----------
|
|
13
18
|
.. [1] Holmes, S.A. and Featherstone, W.E. "A unified approach to the
|
|
@@ -19,12 +24,14 @@ References
|
|
|
19
24
|
Journal of Geodesy 82.4-5 (2008): 223-229.
|
|
20
25
|
"""
|
|
21
26
|
|
|
27
|
+
from functools import lru_cache
|
|
22
28
|
from typing import Optional, Tuple
|
|
23
29
|
|
|
24
30
|
import numpy as np
|
|
25
31
|
from numpy.typing import NDArray
|
|
26
32
|
|
|
27
33
|
|
|
34
|
+
@lru_cache(maxsize=4096)
|
|
28
35
|
def _a_nm(n: int, m: int) -> float:
|
|
29
36
|
"""Compute recursion coefficient a_nm for normalized Legendre functions.
|
|
30
37
|
|
|
@@ -47,6 +54,7 @@ def _a_nm(n: int, m: int) -> float:
|
|
|
47
54
|
return np.sqrt(num / den)
|
|
48
55
|
|
|
49
56
|
|
|
57
|
+
@lru_cache(maxsize=4096)
|
|
50
58
|
def _b_nm(n: int, m: int) -> float:
|
|
51
59
|
"""Compute recursion coefficient b_nm for normalized Legendre functions.
|
|
52
60
|
|
|
@@ -433,6 +433,22 @@ def gravity_acceleration(
|
|
|
433
433
|
return g_r, g_lat, g_lon
|
|
434
434
|
|
|
435
435
|
|
|
436
|
+
@lru_cache(maxsize=64)
|
|
437
|
+
def _legendre_scaling_factors_cached(n_max: int) -> Tuple[float, ...]:
|
|
438
|
+
"""Cached computation of Legendre scaling factors.
|
|
439
|
+
|
|
440
|
+
Returns tuple for hashability.
|
|
441
|
+
"""
|
|
442
|
+
if n_max <= 150:
|
|
443
|
+
return tuple([1.0] * (n_max + 1))
|
|
444
|
+
|
|
445
|
+
scale = []
|
|
446
|
+
for n in range(n_max + 1):
|
|
447
|
+
exponent = -280.0 * n / n_max
|
|
448
|
+
scale.append(10.0**exponent)
|
|
449
|
+
return tuple(scale)
|
|
450
|
+
|
|
451
|
+
|
|
436
452
|
def legendre_scaling_factors(n_max: int) -> NDArray[np.floating]:
|
|
437
453
|
"""Precompute scaling factors to prevent overflow in Legendre recursion.
|
|
438
454
|
|
|
@@ -474,16 +490,7 @@ def legendre_scaling_factors(n_max: int) -> NDArray[np.floating]:
|
|
|
474
490
|
>>> scale_high[200] < scale_high[0] # Higher degrees scaled down
|
|
475
491
|
True
|
|
476
492
|
"""
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
if n_max > 150:
|
|
480
|
-
# Apply progressive scaling for high degrees
|
|
481
|
-
for n in range(n_max + 1):
|
|
482
|
-
# Scale factor decreases exponentially with degree
|
|
483
|
-
exponent = -280.0 * n / n_max
|
|
484
|
-
scale[n] = 10.0**exponent
|
|
485
|
-
|
|
486
|
-
return scale
|
|
493
|
+
return np.array(_legendre_scaling_factors_cached(n_max))
|
|
487
494
|
|
|
488
495
|
|
|
489
496
|
def associated_legendre_scaled(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|