corticalfields 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {corticalfields-0.2.2/src/corticalfields.egg-info → corticalfields-0.2.4}/PKG-INFO +1 -1
- {corticalfields-0.2.2 → corticalfields-0.2.4}/pyproject.toml +1 -1
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/__init__.py +1 -1
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/backends.py +223 -241
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/utils.py +20 -2
- {corticalfields-0.2.2 → corticalfields-0.2.4/src/corticalfields.egg-info}/PKG-INFO +1 -1
- {corticalfields-0.2.2 → corticalfields-0.2.4}/LICENSE +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/README.md +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/setup.cfg +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/_pointcloud_legacy.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/analysis/__init__.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/analysis/bayesian.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/analysis/eda_qc.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/analysis/normative.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/analysis/stats.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/asymmetry.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/bayes_viz.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/bayesian.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/brainplots.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/datasets.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/distance_stats.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/eda_qc.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/features.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/functional_maps.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/graphs.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/hippocampus.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/kernels.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/normative.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/__init__.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/deep/__init__.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/deep/diffusion_net.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/deep/egnn.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/functional_maps.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/morphometrics.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/registration.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/spectral.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/transport.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/viz.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/spectral.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/subcortical.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/surface.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/surprise.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/transport.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/viz/__init__.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/viz/bayes.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/viz/brainplots.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/viz/graph_viz.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/viz/subcortical.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/viz/viz.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/viz.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/viz_subcortical.py +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields.egg-info/SOURCES.txt +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields.egg-info/dependency_links.txt +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields.egg-info/requires.txt +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields.egg-info/top_level.txt +0 -0
- {corticalfields-0.2.2 → corticalfields-0.2.4}/tests/test_core.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: corticalfields
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Spectral cortical and subcortical analysis with statistical testing (RSA, CCA, PLS, PERMANOVA, TFCE, NBS, laterality classification), on meshes and point clouds — Laplace-Beltrami decomposition, atlas-free asymmetry, GPU-accelerated optimal transport, hippocampal subfield analysis (HippUnfold), ShapeDNA/BrainPrint spectral fingerprinting, geometric deep learning, Bayesian inference, and normative modeling for structural neuroimaging.
|
|
5
5
|
Author-email: rdneuro <r.debona@ufrj.br>
|
|
6
6
|
License: MIT
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "corticalfields"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.4"
|
|
8
8
|
description = "Spectral cortical and subcortical analysis with statistical testing (RSA, CCA, PLS, PERMANOVA, TFCE, NBS, laterality classification), on meshes and point clouds — Laplace-Beltrami decomposition, atlas-free asymmetry, GPU-accelerated optimal transport, hippocampal subfield analysis (HippUnfold), ShapeDNA/BrainPrint spectral fingerprinting, geometric deep learning, Bayesian inference, and normative modeling for structural neuroimaging."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -420,70 +420,39 @@ def _eigsh_torch(
|
|
|
420
420
|
k: int, tol: float, maxiter: int, dtype: str,
|
|
421
421
|
) -> Tuple[np.ndarray, np.ndarray]:
|
|
422
422
|
"""
|
|
423
|
-
PyTorch GPU eigensolver
|
|
424
|
-
|
|
425
|
-
Uses **Chebyshev-Filtered Subspace Iteration** (ChFSI)
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
on modern GPUs. The Rayleigh–Ritz projection (small m×m problem)
|
|
453
|
-
is accumulated and solved in **float64** for numerical stability.
|
|
454
|
-
This preserves eigenvalue accuracy to ~1e-7 for the first ~300
|
|
455
|
-
Laplace–Beltrami eigenpairs while halving SpMV memory bandwidth.
|
|
456
|
-
|
|
457
|
-
VRAM budget (N = 150k, k = 300, m = 330)
|
|
458
|
-
------------------------------------------
|
|
459
|
-
- Sparse CSR matrix A: ~14 MB (7 nnz/row × 16 bytes)
|
|
460
|
-
- Subspace V: N × m × 4 = ~198 MB (float32)
|
|
461
|
-
- Chebyshev temps: 2 × N × m × 4 = ~396 MB (Y_prev, Y_curr)
|
|
462
|
-
- Rayleigh–Ritz H: m × m × 8 = ~0.9 MB (float64)
|
|
463
|
-
- **Peak total: ~609 MB** — fits in 8 GB VRAM with margin.
|
|
464
|
-
- Previous lobpcg: 9 × N × k × 8 = ~3.2 GB — 5× higher.
|
|
465
|
-
|
|
466
|
-
Performance (RTX 3090, N=150k, k=300)
|
|
467
|
-
-------------------------------------
|
|
468
|
-
- ChFSI (this): ~10–25 s (degree=12, 15–30 outer iters)
|
|
469
|
-
- torch.lobpcg (old): ~60–120 s
|
|
470
|
-
- CuPy eigsh: ~10–30 s (Thick-Restart Lanczos)
|
|
471
|
-
- scipy eigsh: ~30–120 s (ARPACK shift-invert)
|
|
472
|
-
|
|
473
|
-
Both individual and batch processing use this function. In batch
|
|
474
|
-
mode, ``gc_gpu()`` is called between subjects by the caller
|
|
475
|
-
(``_process_single_subject`` in ``spectral.py``), which frees
|
|
476
|
-
VRAM for the next subject.
|
|
423
|
+
PyTorch GPU eigensolver — ChFSI with in-place VRAM management.
|
|
424
|
+
|
|
425
|
+
Uses **Chebyshev-Filtered Subspace Iteration** (ChFSI) with:
|
|
426
|
+
|
|
427
|
+
- **In-place Chebyshev recurrence**: ``Tensor.add_(X, alpha=s)``
|
|
428
|
+
and ``Tensor.mul_()`` eliminate ALL intermediate tensor allocations
|
|
429
|
+
in the filter loop. The only unavoidable allocation per step is
|
|
430
|
+
the SpMV result from ``torch.sparse.mm`` (which has no ``out=``).
|
|
431
|
+
- **Eager deallocation**: every temporary is ``del``'d immediately
|
|
432
|
+
and ``torch.cuda.empty_cache()`` runs after each outer iteration.
|
|
433
|
+
- **VRAM watermark check**: logs allocated VRAM at start/end and
|
|
434
|
+
warns if a leak is detected.
|
|
435
|
+
- **``torch.no_grad()``**: prevents the ~500 SpMV operations from
|
|
436
|
+
building a computation graph that would leak 10+ GB of RAM.
|
|
437
|
+
- **Periodic ``synchronize()``**: every 4 SpMV launches inside the
|
|
438
|
+
Chebyshev filter, plus after each Ritz step, to prevent the
|
|
439
|
+
NVIDIA driver watchdog from triggering a PCIe bus hang.
|
|
440
|
+
|
|
441
|
+
Per-subject VRAM budget (N=150k, k=100, m=120):
|
|
442
|
+
Sparse A: ~14 MB (CSR, f32, ~7 nnz/row)
|
|
443
|
+
Subspace V: N × m × 4 = ~72 MB
|
|
444
|
+
SpMV temp: N × m × 4 = ~72 MB (freed each step)
|
|
445
|
+
Ritz f64: 2 × N × m × 8 = ~288 MB (freed after Ritz)
|
|
446
|
+
**Peak: ~446 MB** — leaves >23 GB free on RTX 3090.
|
|
447
|
+
|
|
448
|
+
The critical constraint for batch stability is not peak usage but
|
|
449
|
+
**fragmentation over subjects**. In-place operations reduce the
|
|
450
|
+
number of alloc/free cycles from ~30 per outer iteration (old) to
|
|
451
|
+
~3 (new), dramatically reducing caching-allocator fragmentation.
|
|
477
452
|
|
|
478
453
|
Parameters
|
|
479
454
|
----------
|
|
480
|
-
L
|
|
481
|
-
M : scipy.sparse.spmatrix (N, N) — diagonal lumped mass matrix
|
|
482
|
-
k : int — number of smallest eigenpairs to compute
|
|
483
|
-
tol : float — convergence tolerance on max residual norm
|
|
484
|
-
maxiter : int — maximum ChFSI outer iterations
|
|
485
|
-
dtype : str — ``"float32"`` or ``"float64"`` for SpMV precision;
|
|
486
|
-
Rayleigh–Ritz always uses float64 regardless.
|
|
455
|
+
L, M, k, tol, maxiter, dtype : see ``eigsh_solve``
|
|
487
456
|
|
|
488
457
|
Returns
|
|
489
458
|
-------
|
|
@@ -492,215 +461,228 @@ def _eigsh_torch(
|
|
|
492
461
|
|
|
493
462
|
References
|
|
494
463
|
----------
|
|
495
|
-
[1] Y. Zhou, Y. Saad
|
|
496
|
-
|
|
497
|
-
subspace iteration", J. Comput. Phys. 219 (2006) 172–184.
|
|
498
|
-
[2] A.V. Knyazev, "Toward the optimal preconditioned eigensolver:
|
|
499
|
-
LOBPCG", SIAM J. Sci. Comput. 23 (2001) 517–541.
|
|
464
|
+
[1] Y. Zhou, Y. Saad et al., "Chebyshev-filtered subspace iteration",
|
|
465
|
+
J. Comput. Phys. 219 (2006) 172–184.
|
|
500
466
|
"""
|
|
467
|
+
import gc
|
|
501
468
|
import torch
|
|
502
469
|
|
|
503
|
-
# ── Precision setup ─────────────────────────────────────────────
|
|
504
|
-
# SpMV in float32 for throughput; Rayleigh-Ritz in float64 for accuracy
|
|
505
|
-
spmv_np_dtype = np.float32 if dtype != "float64" else np.float32
|
|
506
470
|
spmv_torch_dtype = torch.float32
|
|
507
471
|
ritz_torch_dtype = torch.float64
|
|
508
|
-
|
|
509
472
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
473
|
+
is_cuda = device.type == "cuda"
|
|
510
474
|
N = L.shape[0]
|
|
511
475
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
476
|
+
EXTRA = min(30, max(10, k // 10))
|
|
477
|
+
m = k + EXTRA
|
|
478
|
+
CHEB_DEGREE = 12
|
|
479
|
+
POWER_ITERS = 30
|
|
480
|
+
|
|
481
|
+
# ── VRAM watermark (start) ──────────────────────────────────────
|
|
482
|
+
vram_start = 0
|
|
483
|
+
if is_cuda:
|
|
484
|
+
torch.cuda.synchronize()
|
|
485
|
+
torch.cuda.empty_cache()
|
|
486
|
+
gc.collect()
|
|
487
|
+
vram_start = torch.cuda.memory_allocated(0)
|
|
517
488
|
|
|
518
489
|
logger.info(
|
|
519
|
-
" torch ChFSI
|
|
520
|
-
"
|
|
521
|
-
N, k, m, CHEB_DEGREE,
|
|
490
|
+
" torch ChFSI: N=%d, k=%d, m=%d, deg=%d, "
|
|
491
|
+
"VRAM_start=%.0f MB",
|
|
492
|
+
N, k, m, CHEB_DEGREE, vram_start / 1e6,
|
|
522
493
|
)
|
|
523
494
|
|
|
524
|
-
# ── Step 1: Generalised → standard via M^{−½} (
|
|
495
|
+
# ── Step 1: Generalised → standard via M^{−½} (CPU) ────────────
|
|
525
496
|
M_diag = np.array(M.diagonal()).ravel().astype(np.float64)
|
|
526
497
|
M_diag = np.maximum(M_diag, 1e-16)
|
|
527
|
-
M_inv_sqrt_np =
|
|
498
|
+
M_inv_sqrt_np = 1.0 / np.sqrt(M_diag)
|
|
528
499
|
|
|
529
|
-
D_sp = sp.diags(M_inv_sqrt_np.astype(
|
|
530
|
-
A_cpu = (D_sp @ L.tocsc().astype(
|
|
531
|
-
del D_sp
|
|
500
|
+
D_sp = sp.diags(M_inv_sqrt_np.astype(np.float32), format="csc")
|
|
501
|
+
A_cpu = (D_sp @ L.tocsc().astype(np.float32) @ D_sp).tocsr()
|
|
502
|
+
del D_sp
|
|
532
503
|
|
|
533
|
-
# ──
|
|
534
|
-
def
|
|
504
|
+
# ── scipy CSR → torch CSR ───────────────────────────────────────
|
|
505
|
+
def _to_csr(m_csr):
|
|
535
506
|
return torch.sparse_csr_tensor(
|
|
536
|
-
torch.from_numpy(
|
|
537
|
-
torch.from_numpy(
|
|
538
|
-
torch.from_numpy(
|
|
539
|
-
size=
|
|
540
|
-
dtype=spmv_torch_dtype,
|
|
507
|
+
torch.from_numpy(m_csr.indptr.astype(np.int64)).to(device),
|
|
508
|
+
torch.from_numpy(m_csr.indices.astype(np.int64)).to(device),
|
|
509
|
+
torch.from_numpy(m_csr.data.astype(np.float32)).to(device),
|
|
510
|
+
size=m_csr.shape, dtype=spmv_torch_dtype,
|
|
541
511
|
)
|
|
542
512
|
|
|
543
|
-
# ── Helper: sparse matvec A @ X on GPU ──────────────────────────
|
|
544
|
-
def _spmm(A_t, X):
|
|
545
|
-
"""Sparse × dense matrix multiply, shape (N, m)."""
|
|
546
|
-
return torch.sparse.mm(A_t, X)
|
|
547
|
-
|
|
548
513
|
try:
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
del A_cpu # free CPU copy (~14 MB saved)
|
|
552
|
-
|
|
553
|
-
# ── Step 3: Estimate λ_max via power iteration ──────────────
|
|
554
|
-
# 30 iters is overkill for Rayleigh quotient convergence on
|
|
555
|
-
# a mesh Laplacian, but costs only ~15 ms and gives a tight
|
|
556
|
-
# bound that improves Chebyshev filter quality.
|
|
557
|
-
torch.manual_seed(42)
|
|
558
|
-
v = torch.randn(N, 1, dtype=spmv_torch_dtype, device=device)
|
|
559
|
-
v = v / v.norm()
|
|
560
|
-
for _ in range(POWER_ITERS):
|
|
561
|
-
v = _spmm(A_t, v)
|
|
562
|
-
v = v / v.norm()
|
|
563
|
-
# Rayleigh quotient in float64 for a precise λ_max
|
|
564
|
-
v64 = v.to(ritz_torch_dtype)
|
|
565
|
-
Av64 = _spmm(A_t, v).to(ritz_torch_dtype)
|
|
566
|
-
lambda_max = float((v64.T @ Av64).item()) * 1.05 # 5% safety
|
|
567
|
-
del v, v64, Av64
|
|
568
|
-
logger.info(" λ_max ≈ %.4f", lambda_max)
|
|
569
|
-
|
|
570
|
-
# ── Step 4: ChFSI outer loop ───────────────────────────────
|
|
571
|
-
# Initial random subspace
|
|
572
|
-
torch.manual_seed(42)
|
|
573
|
-
V = torch.randn(N, m, dtype=spmv_torch_dtype, device=device)
|
|
574
|
-
V, _ = torch.linalg.qr(V)
|
|
575
|
-
|
|
576
|
-
# Chebyshev filter interval: we want eigenvalues in [0, λ_cut]
|
|
577
|
-
# where λ_cut is a rough upper bound for the k-th eigenvalue.
|
|
578
|
-
# Heuristic: Weyl's law gives λ_k ∝ k for 2D surfaces, so
|
|
579
|
-
# λ_cut ≈ λ_max × (2 * m / N) is a conservative estimate.
|
|
580
|
-
# We refine after the first Ritz step.
|
|
581
|
-
lambda_cut = lambda_max * (2.0 * m / N)
|
|
582
|
-
lambda_cut = max(lambda_cut, lambda_max * 0.01) # floor
|
|
583
|
-
|
|
584
|
-
converged = False
|
|
585
|
-
for outer in range(maxiter):
|
|
586
|
-
# ── Chebyshev filter: T_d(scaled_A) @ V ────────────────
|
|
587
|
-
# Maps A from [λ_cut, λ_max] → [−1, 1], then applies
|
|
588
|
-
# Chebyshev polynomial that is ~0 on [−1, 1] (unwanted
|
|
589
|
-
# eigenvalues) and large on (−∞, −1) (wanted eigenvalues).
|
|
590
|
-
#
|
|
591
|
-
# Scaling: σ = (λ_max − λ_cut) / 2
|
|
592
|
-
# c = (λ_max + λ_cut) / 2
|
|
593
|
-
# A_scaled = (A − c·I) / σ
|
|
594
|
-
#
|
|
595
|
-
# 3-term recurrence:
|
|
596
|
-
# Y₀ = V
|
|
597
|
-
# Y₁ = (1/σ)(A − c·I) V = (A·V − c·V) / σ
|
|
598
|
-
# Y_{j+1} = (2/σ)(A − c·I) Y_j − Y_{j−1}
|
|
599
|
-
# = (2(A·Y_j − c·Y_j) / σ) − Y_{j−1}
|
|
600
|
-
|
|
601
|
-
e = (lambda_max - lambda_cut) / 2.0
|
|
602
|
-
c = (lambda_max + lambda_cut) / 2.0
|
|
603
|
-
|
|
604
|
-
# Safeguard: e must be positive
|
|
605
|
-
if e < 1e-10:
|
|
606
|
-
e = lambda_max * 0.5
|
|
607
|
-
c = lambda_max * 0.5
|
|
608
|
-
|
|
609
|
-
sigma = e / c if abs(c) > 1e-12 else 1.0
|
|
610
|
-
sigma1 = sigma
|
|
611
|
-
|
|
612
|
-
# Y₀ = V (reuse V buffer)
|
|
613
|
-
# Y₁ = σ₁/e · (A·V − c·V)
|
|
614
|
-
AV = _spmm(A_t, V) # (N, m) f32
|
|
615
|
-
Y_prev = V # alias, no copy
|
|
616
|
-
Y_curr = (sigma1 / e) * (AV - c * V) # (N, m) f32
|
|
617
|
-
del AV
|
|
618
|
-
|
|
619
|
-
for d in range(2, CHEB_DEGREE + 1):
|
|
620
|
-
sigma_new = 1.0 / (2.0 / sigma - sigma1)
|
|
621
|
-
AY = _spmm(A_t, Y_curr) # (N, m) f32
|
|
622
|
-
Y_next = (2.0 * sigma_new / e) * (AY - c * Y_curr) \
|
|
623
|
-
- (sigma * sigma_new) * Y_prev
|
|
624
|
-
Y_prev = Y_curr
|
|
625
|
-
Y_curr = Y_next
|
|
626
|
-
sigma = sigma_new
|
|
627
|
-
del AY
|
|
628
|
-
|
|
629
|
-
del Y_prev # free (N, m) buffer
|
|
630
|
-
|
|
631
|
-
# ── Orthogonalise filtered subspace ────────────────────
|
|
632
|
-
V, _ = torch.linalg.qr(Y_curr)
|
|
633
|
-
del Y_curr
|
|
634
|
-
|
|
635
|
-
# ── Rayleigh–Ritz in float64 ──────────────────────────
|
|
636
|
-
# AV in float32 for speed, then upcast for the small eigh
|
|
637
|
-
AV = _spmm(A_t, V) # (N, m) f32
|
|
638
|
-
V64 = V.to(ritz_torch_dtype) # (N, m) f64
|
|
639
|
-
AV64 = AV.to(ritz_torch_dtype) # (N, m) f64
|
|
640
|
-
del AV
|
|
641
|
-
|
|
642
|
-
H = V64.T @ AV64 # (m, m) f64
|
|
643
|
-
H = 0.5 * (H + H.T) # symmetrise
|
|
644
|
-
ritz_vals, ritz_vecs = torch.linalg.eigh(H) # sorted ascending
|
|
645
|
-
|
|
646
|
-
# ── Convergence check: max residual norm ───────────────
|
|
647
|
-
# residual_i = A·z_i − λ_i·z_i where z_i = V @ s_i
|
|
648
|
-
eigvecs_m = V64 @ ritz_vecs[:, :k] # (N, k) f64
|
|
649
|
-
Aeigvecs = AV64 @ ritz_vecs[:, :k] # (N, k) f64
|
|
650
|
-
residuals = Aeigvecs - eigvecs_m * ritz_vals[:k].unsqueeze(0)
|
|
651
|
-
max_res = float(residuals.norm(dim=0).max().item())
|
|
652
|
-
|
|
653
|
-
del eigvecs_m, Aeigvecs, residuals, V64, AV64
|
|
654
|
-
|
|
655
|
-
if outer % 5 == 0 or max_res < tol:
|
|
656
|
-
logger.info(
|
|
657
|
-
" ChFSI iter %2d: max_residual=%.2e, λ_cut=%.4f",
|
|
658
|
-
outer, max_res, lambda_cut,
|
|
659
|
-
)
|
|
660
|
-
|
|
661
|
-
if max_res < tol:
|
|
662
|
-
converged = True
|
|
663
|
-
break
|
|
664
|
-
|
|
665
|
-
# ── Update subspace: rotate V into Ritz basis ──────────
|
|
666
|
-
V = V @ ritz_vecs[:, :m].to(spmv_torch_dtype)
|
|
667
|
-
|
|
668
|
-
# ── Refine λ_cut from current Ritz estimates ───────────
|
|
669
|
-
# Use 1.5× the m-th Ritz value as the new cutoff
|
|
670
|
-
if ritz_vals.shape[0] > k:
|
|
671
|
-
lambda_cut = float(ritz_vals[m - 1].item()) * 1.5
|
|
672
|
-
lambda_cut = min(lambda_cut, lambda_max * 0.95)
|
|
514
|
+
A_t = _to_csr(A_cpu)
|
|
515
|
+
del A_cpu
|
|
673
516
|
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
)
|
|
517
|
+
with torch.no_grad():
|
|
518
|
+
|
|
519
|
+
# ── Step 2: λ_max via power iteration ───────────────────
|
|
520
|
+
torch.manual_seed(42)
|
|
521
|
+
v = torch.randn(N, 1, dtype=spmv_torch_dtype, device=device)
|
|
522
|
+
v.div_(v.norm())
|
|
523
|
+
for pi in range(POWER_ITERS):
|
|
524
|
+
v = torch.sparse.mm(A_t, v)
|
|
525
|
+
v.div_(v.norm())
|
|
526
|
+
if is_cuda and pi % 10 == 9:
|
|
527
|
+
torch.cuda.synchronize()
|
|
528
|
+
|
|
529
|
+
Av = torch.sparse.mm(A_t, v)
|
|
530
|
+
lambda_max = float((v.T @ Av).item()) * 1.05
|
|
531
|
+
del v, Av
|
|
532
|
+
if is_cuda:
|
|
533
|
+
torch.cuda.synchronize()
|
|
534
|
+
torch.cuda.empty_cache()
|
|
680
535
|
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
536
|
+
logger.info(" λ_max ≈ %.4f", lambda_max)
|
|
537
|
+
|
|
538
|
+
# ── Step 3: ChFSI outer loop ────────────────────────────
|
|
539
|
+
torch.manual_seed(42)
|
|
540
|
+
V = torch.randn(N, m, dtype=spmv_torch_dtype, device=device)
|
|
541
|
+
V, _ = torch.linalg.qr(V)
|
|
542
|
+
|
|
543
|
+
lambda_cut = lambda_max * (2.0 * m / N)
|
|
544
|
+
lambda_cut = max(lambda_cut, lambda_max * 0.01)
|
|
545
|
+
|
|
546
|
+
converged = False
|
|
547
|
+
max_res = float("inf")
|
|
548
|
+
|
|
549
|
+
for outer in range(maxiter):
|
|
550
|
+
|
|
551
|
+
# ── Chebyshev filter (IN-PLACE) ─────────────────────
|
|
552
|
+
# All arithmetic uses .add_(), .mul_() to avoid temps.
|
|
553
|
+
# Only torch.sparse.mm allocates (no out= support).
|
|
554
|
+
e = (lambda_max - lambda_cut) / 2.0
|
|
555
|
+
cc = (lambda_max + lambda_cut) / 2.0
|
|
556
|
+
if e < 1e-10:
|
|
557
|
+
e = lambda_max * 0.5
|
|
558
|
+
cc = lambda_max * 0.5
|
|
559
|
+
|
|
560
|
+
sigma = e / cc if abs(cc) > 1e-12 else 1.0
|
|
561
|
+
sigma1 = sigma
|
|
562
|
+
|
|
563
|
+
# Y₁ = (σ₁/e) · (A·V − c·V)
|
|
564
|
+
# In-place: AV = sparse.mm(A, V); AV -= c*V; AV *= σ₁/e
|
|
565
|
+
Y_curr = torch.sparse.mm(A_t, V) # (N,m) NEW alloc
|
|
566
|
+
Y_curr.add_(V, alpha=-cc) # in-place
|
|
567
|
+
Y_curr.mul_(sigma1 / e) # in-place
|
|
568
|
+
Y_prev = V.clone() # need a copy (V reused)
|
|
569
|
+
|
|
570
|
+
for d in range(2, CHEB_DEGREE + 1):
|
|
571
|
+
sigma_new = 1.0 / (2.0 / sigma1 - sigma)
|
|
572
|
+
|
|
573
|
+
# Y_next = (2σ_new/e)(A·Y_curr − c·Y_curr) − σ·σ_new·Y_prev
|
|
574
|
+
# In-place on the SpMV output:
|
|
575
|
+
Y_next = torch.sparse.mm(A_t, Y_curr) # NEW alloc
|
|
576
|
+
Y_next.add_(Y_curr, alpha=-cc) # -= c * Y_curr
|
|
577
|
+
Y_next.mul_(2.0 * sigma_new / e) # *= 2σ/e
|
|
578
|
+
Y_next.add_(Y_prev, alpha=-(sigma * sigma_new))
|
|
579
|
+
|
|
580
|
+
# Rotate buffers — reuse memory
|
|
581
|
+
Y_prev = Y_curr # old Y_curr becomes Y_prev
|
|
582
|
+
Y_curr = Y_next # new result becomes Y_curr
|
|
583
|
+
sigma = sigma_new
|
|
584
|
+
# Y_next ref dropped; old Y_prev eligible for GC
|
|
585
|
+
|
|
586
|
+
if is_cuda and d % 4 == 0:
|
|
587
|
+
torch.cuda.synchronize()
|
|
588
|
+
|
|
589
|
+
del Y_prev # free last-gen buffer
|
|
590
|
+
if is_cuda:
|
|
591
|
+
torch.cuda.synchronize()
|
|
592
|
+
|
|
593
|
+
# ── QR ──────────────────────────────────────────────
|
|
594
|
+
V, _ = torch.linalg.qr(Y_curr)
|
|
595
|
+
del Y_curr
|
|
596
|
+
|
|
597
|
+
# ── Rayleigh–Ritz (f64 for accuracy) ────────────────
|
|
598
|
+
AV_f32 = torch.sparse.mm(A_t, V) # (N,m) f32
|
|
599
|
+
V64 = V.to(ritz_torch_dtype) # (N,m) f64
|
|
600
|
+
AV64 = AV_f32.to(ritz_torch_dtype) # (N,m) f64
|
|
601
|
+
del AV_f32 # free f32 copy NOW
|
|
602
|
+
|
|
603
|
+
H = V64.T @ AV64 # (m,m) f64
|
|
604
|
+
H = 0.5 * (H + H.T) # symmetrise (safe)
|
|
605
|
+
ritz_vals, ritz_vecs = torch.linalg.eigh(H)
|
|
606
|
+
del H
|
|
607
|
+
|
|
608
|
+
# ── Convergence check ───────────────────────────────
|
|
609
|
+
# Compute residual norms without large (N,k) temporaries:
|
|
610
|
+
# res_i = ||AV64 @ s_i - λ_i * V64 @ s_i||
|
|
611
|
+
S_k = ritz_vecs[:, :k] # (m,k) f64 — view
|
|
612
|
+
Z_k = V64 @ S_k # (N,k) f64
|
|
613
|
+
AZ_k = AV64 @ S_k # (N,k) f64
|
|
614
|
+
del V64, AV64 # free the two big f64 blocks NOW
|
|
615
|
+
|
|
616
|
+
# In-place: scale Z_k columns by eigenvalues, then subtract
|
|
617
|
+
Z_k.mul_(ritz_vals[:k].unsqueeze(0)) # Z_k[:,i] *= λ_i
|
|
618
|
+
AZ_k.sub_(Z_k) # AZ_k -= λ·Z_k
|
|
619
|
+
max_res = float(AZ_k.norm(dim=0).max().item())
|
|
620
|
+
del Z_k, AZ_k, S_k
|
|
621
|
+
|
|
622
|
+
if is_cuda:
|
|
623
|
+
torch.cuda.synchronize()
|
|
624
|
+
|
|
625
|
+
if outer % 5 == 0 or max_res < tol:
|
|
626
|
+
logger.info(
|
|
627
|
+
" ChFSI iter %2d: res=%.2e, λ_cut=%.4f",
|
|
628
|
+
outer, max_res, lambda_cut,
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
if max_res < tol:
|
|
632
|
+
converged = True
|
|
633
|
+
break
|
|
634
|
+
|
|
635
|
+
# Rotate V into Ritz basis
|
|
636
|
+
V = V @ ritz_vecs[:, :m].to(spmv_torch_dtype)
|
|
637
|
+
|
|
638
|
+
# Refine λ_cut
|
|
639
|
+
if ritz_vals.shape[0] > k:
|
|
640
|
+
lambda_cut = float(ritz_vals[m - 1].item()) * 1.5
|
|
641
|
+
lambda_cut = min(lambda_cut, lambda_max * 0.95)
|
|
642
|
+
|
|
643
|
+
# ── Aggressive VRAM cleanup EVERY iteration ─────────
|
|
644
|
+
if is_cuda:
|
|
645
|
+
torch.cuda.empty_cache()
|
|
646
|
+
|
|
647
|
+
# ── end outer loop ──────────────────────────────────────
|
|
648
|
+
|
|
649
|
+
if not converged:
|
|
650
|
+
logger.warning(
|
|
651
|
+
" ChFSI did not converge in %d iters "
|
|
652
|
+
"(res=%.2e > tol=%.1e).",
|
|
653
|
+
maxiter, max_res, tol,
|
|
654
|
+
)
|
|
685
655
|
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
656
|
+
# ── Extract eigenpairs ──────────────────────────────────
|
|
657
|
+
evals_t = ritz_vals[:k] # (k,) f64
|
|
658
|
+
evecs_t = V.to(ritz_torch_dtype) @ ritz_vecs[:, :k] # (N,k) f64
|
|
659
|
+
del V, ritz_vals, ritz_vecs
|
|
690
660
|
|
|
691
|
-
|
|
692
|
-
|
|
661
|
+
M_inv_sqrt_t = torch.from_numpy(
|
|
662
|
+
M_inv_sqrt_np
|
|
663
|
+
).to(dtype=ritz_torch_dtype, device=device).unsqueeze(1)
|
|
664
|
+
evecs_t.mul_(M_inv_sqrt_t) # in-place
|
|
665
|
+
del M_inv_sqrt_t
|
|
693
666
|
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
667
|
+
if is_cuda:
|
|
668
|
+
torch.cuda.synchronize()
|
|
669
|
+
evals = evals_t.cpu().numpy().astype(np.float64)
|
|
670
|
+
evecs = evecs_t.cpu().numpy().astype(np.float64)
|
|
671
|
+
del evals_t, evecs_t
|
|
698
672
|
|
|
699
673
|
finally:
|
|
700
|
-
|
|
701
|
-
if device.type == "cuda":
|
|
674
|
+
if is_cuda:
|
|
702
675
|
torch.cuda.synchronize()
|
|
703
676
|
torch.cuda.empty_cache()
|
|
677
|
+
gc.collect()
|
|
678
|
+
torch.cuda.empty_cache() # double-tap after gc frees python refs
|
|
679
|
+
vram_end = torch.cuda.memory_allocated(0)
|
|
680
|
+
delta = vram_end - vram_start
|
|
681
|
+
if delta > 1e6: # > 1 MB leak
|
|
682
|
+
logger.warning(
|
|
683
|
+
" VRAM leak detected: +%.1f MB (start=%.0f, end=%.0f)",
|
|
684
|
+
delta / 1e6, vram_start / 1e6, vram_end / 1e6,
|
|
685
|
+
)
|
|
704
686
|
|
|
705
687
|
order = np.argsort(evals)
|
|
706
688
|
return evals[order], evecs[:, order]
|
|
@@ -71,10 +71,18 @@ def gc_gpu() -> None:
|
|
|
71
71
|
"""
|
|
72
72
|
Aggressively free GPU memory across all available backends.
|
|
73
73
|
|
|
74
|
-
|
|
75
|
-
|
|
74
|
+
Uses a **double-tap** pattern: ``gc.collect()`` →
|
|
75
|
+
``empty_cache()`` → ``gc.collect()`` → ``empty_cache()`` to
|
|
76
|
+
ensure Python cyclic references holding CUDA tensors are fully
|
|
77
|
+
broken before the caching allocator releases blocks. Critical
|
|
78
|
+
for multi-subject batch pipelines where VRAM fragmentation
|
|
79
|
+
accumulates over hundreds of subjects.
|
|
80
|
+
|
|
81
|
+
Safe to call even when no GPU or backends are available.
|
|
76
82
|
"""
|
|
77
83
|
import gc
|
|
84
|
+
|
|
85
|
+
# First pass: break Python references → free CUDA tensors
|
|
78
86
|
gc.collect()
|
|
79
87
|
|
|
80
88
|
try:
|
|
@@ -85,6 +93,16 @@ def gc_gpu() -> None:
|
|
|
85
93
|
except ImportError:
|
|
86
94
|
pass
|
|
87
95
|
|
|
96
|
+
# Second pass: catch cyclic refs that survived first gc
|
|
97
|
+
gc.collect()
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
import torch
|
|
101
|
+
if torch.cuda.is_available():
|
|
102
|
+
torch.cuda.empty_cache()
|
|
103
|
+
except ImportError:
|
|
104
|
+
pass
|
|
105
|
+
|
|
88
106
|
try:
|
|
89
107
|
import cupy as cp
|
|
90
108
|
cp.get_default_memory_pool().free_all_blocks()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: corticalfields
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Spectral cortical and subcortical analysis with statistical testing (RSA, CCA, PLS, PERMANOVA, TFCE, NBS, laterality classification), on meshes and point clouds — Laplace-Beltrami decomposition, atlas-free asymmetry, GPU-accelerated optimal transport, hippocampal subfield analysis (HippUnfold), ShapeDNA/BrainPrint spectral fingerprinting, geometric deep learning, Bayesian inference, and normative modeling for structural neuroimaging.
|
|
5
5
|
Author-email: rdneuro <r.debona@ufrj.br>
|
|
6
6
|
License: MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/deep/__init__.py
RENAMED
|
File without changes
|
{corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/deep/diffusion_net.py
RENAMED
|
File without changes
|
|
File without changes
|
{corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/functional_maps.py
RENAMED
|
File without changes
|
{corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields/pointcloud/morphometrics.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{corticalfields-0.2.2 → corticalfields-0.2.4}/src/corticalfields.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|