blocksolver 0.8.1__cp310-cp310-win_amd64.whl → 0.8.3__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blocksolver/__init__.py +1 -1
- blocksolver/_blqmr.cp310-win_amd64.dll.a +0 -0
- blocksolver/_blqmr.cp310-win_amd64.pyd +0 -0
- blocksolver/blqmr.py +293 -83
- blocksolver-0.8.3.dist-info/METADATA +437 -0
- blocksolver-0.8.3.dist-info/RECORD +7 -0
- blocksolver-0.8.1.dist-info/METADATA +0 -157
- blocksolver-0.8.1.dist-info/RECORD +0 -7
- {blocksolver-0.8.1.dist-info → blocksolver-0.8.3.dist-info}/WHEEL +0 -0
blocksolver/__init__.py
CHANGED
|
Binary file
|
|
Binary file
|
blocksolver/blqmr.py
CHANGED
|
@@ -50,7 +50,7 @@ try:
|
|
|
50
50
|
from numba import njit
|
|
51
51
|
|
|
52
52
|
HAS_NUMBA = True
|
|
53
|
-
except ImportError:
|
|
53
|
+
except (ImportError, Exception) as e:
|
|
54
54
|
HAS_NUMBA = False
|
|
55
55
|
|
|
56
56
|
def njit(*args, **kwargs):
|
|
@@ -134,6 +134,52 @@ def _qqr_kernel_real(Q, R, n, m):
|
|
|
134
134
|
Q[i, k] -= Q[i, j] * dot
|
|
135
135
|
|
|
136
136
|
|
|
137
|
+
@njit(cache=True)
|
|
138
|
+
def _qqr_kernel_complex(Q, R, n, m):
|
|
139
|
+
"""Numba-accelerated quasi-QR kernel for complex arrays."""
|
|
140
|
+
for j in range(m):
|
|
141
|
+
# Quasi inner product: sum(q*q) WITHOUT conjugation
|
|
142
|
+
r_jj_sq = 0.0j
|
|
143
|
+
for i in range(n):
|
|
144
|
+
r_jj_sq += Q[i, j] * Q[i, j] # No conjugation!
|
|
145
|
+
r_jj = np.sqrt(r_jj_sq)
|
|
146
|
+
R[j, j] = r_jj
|
|
147
|
+
if abs(r_jj) > 1e-14:
|
|
148
|
+
inv_r_jj = 1.0 / r_jj
|
|
149
|
+
for i in range(n):
|
|
150
|
+
Q[i, j] *= inv_r_jj
|
|
151
|
+
for k in range(j + 1, m):
|
|
152
|
+
# Quasi inner product: sum(q_j * q_k) WITHOUT conjugation
|
|
153
|
+
dot = 0.0j
|
|
154
|
+
for i in range(n):
|
|
155
|
+
dot += Q[i, j] * Q[i, k] # No conjugation!
|
|
156
|
+
R[j, k] = dot
|
|
157
|
+
for i in range(n):
|
|
158
|
+
Q[i, k] -= Q[i, j] * dot
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@njit(cache=True)
|
|
162
|
+
def _qqr_kernel_real(Q, R, n, m):
|
|
163
|
+
"""Numba-accelerated quasi-QR kernel for real arrays."""
|
|
164
|
+
for j in range(m):
|
|
165
|
+
r_jj_sq = 0.0
|
|
166
|
+
for i in range(n):
|
|
167
|
+
r_jj_sq += Q[i, j] * Q[i, j]
|
|
168
|
+
r_jj = np.sqrt(r_jj_sq)
|
|
169
|
+
R[j, j] = r_jj
|
|
170
|
+
if abs(r_jj) > 1e-14:
|
|
171
|
+
inv_r_jj = 1.0 / r_jj
|
|
172
|
+
for i in range(n):
|
|
173
|
+
Q[i, j] *= inv_r_jj
|
|
174
|
+
for k in range(j + 1, m):
|
|
175
|
+
dot = 0.0
|
|
176
|
+
for i in range(n):
|
|
177
|
+
dot += Q[i, j] * Q[i, k]
|
|
178
|
+
R[j, k] = dot
|
|
179
|
+
for i in range(n):
|
|
180
|
+
Q[i, k] -= Q[i, j] * dot
|
|
181
|
+
|
|
182
|
+
|
|
137
183
|
def qqr(
|
|
138
184
|
A: np.ndarray, tol: float = 0, use_numba: bool = True
|
|
139
185
|
) -> Tuple[np.ndarray, np.ndarray]:
|
|
@@ -173,14 +219,20 @@ def qqr(
|
|
|
173
219
|
else:
|
|
174
220
|
for j in range(m):
|
|
175
221
|
qj = Q[:, j]
|
|
176
|
-
|
|
222
|
+
# CRITICAL FIX: Use sum(qj * qj) NOT np.dot(qj, qj)
|
|
223
|
+
# np.dot conjugates the first argument for complex arrays!
|
|
224
|
+
# Fortran: R(k,k)=dsqrt(sum(Q(:,k)*Q(:,k))) - no conjugation
|
|
225
|
+
r_jj_sq = np.sum(qj * qj) # Quasi inner product - NO conjugation
|
|
177
226
|
r_jj = np.sqrt(r_jj_sq)
|
|
178
227
|
R[j, j] = r_jj
|
|
179
228
|
if np.abs(r_jj) > 1e-14:
|
|
180
229
|
Q[:, j] *= 1.0 / r_jj
|
|
181
230
|
if j < m - 1:
|
|
182
|
-
|
|
183
|
-
|
|
231
|
+
# CRITICAL FIX: Quasi inner product for off-diagonal
|
|
232
|
+
# Fortran: R(k,j)=sum(Q(:,k)*Q(:,j)) - no conjugation
|
|
233
|
+
for k in range(j + 1, m):
|
|
234
|
+
R[j, k] = np.sum(Q[:, j] * Q[:, k]) # NO conjugation
|
|
235
|
+
Q[:, k] -= R[j, k] * Q[:, j]
|
|
184
236
|
|
|
185
237
|
return Q, R
|
|
186
238
|
|
|
@@ -217,8 +269,12 @@ class SparsePreconditioner:
|
|
|
217
269
|
|
|
218
270
|
def __init__(self, M1, M2=None):
|
|
219
271
|
self.is_two_part = M2 is not None
|
|
220
|
-
self.is_ilu1 = isinstance(M1, _ILUPreconditioner)
|
|
221
|
-
self.is_ilu2 =
|
|
272
|
+
self.is_ilu1 = isinstance(M1, (_ILUPreconditioner, _LUPreconditioner))
|
|
273
|
+
self.is_ilu2 = (
|
|
274
|
+
isinstance(M2, (_ILUPreconditioner, _LUPreconditioner))
|
|
275
|
+
if M2 is not None
|
|
276
|
+
else False
|
|
277
|
+
)
|
|
222
278
|
|
|
223
279
|
if M1 is not None:
|
|
224
280
|
if self.is_ilu1:
|
|
@@ -382,7 +438,7 @@ class BLQMRWorkspace:
|
|
|
382
438
|
# =============================================================================
|
|
383
439
|
|
|
384
440
|
|
|
385
|
-
def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
|
|
441
|
+
def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag", **kwargs):
|
|
386
442
|
"""
|
|
387
443
|
Create a preconditioner for iterative solvers.
|
|
388
444
|
|
|
@@ -392,8 +448,14 @@ def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
|
|
|
392
448
|
System matrix
|
|
393
449
|
precond_type : str
|
|
394
450
|
'diag' or 'jacobi': Diagonal (Jacobi) preconditioner
|
|
395
|
-
'ilu' or 'ilu0': Incomplete LU
|
|
451
|
+
'ilu' or 'ilu0': Incomplete LU with minimal fill
|
|
452
|
+
'ilut': Incomplete LU with threshold (better quality)
|
|
453
|
+
'lu': Full LU factorization (exact, use as reference)
|
|
396
454
|
'ssor': Symmetric SOR
|
|
455
|
+
**kwargs : dict
|
|
456
|
+
Additional parameters for ILU:
|
|
457
|
+
- drop_tol: Drop tolerance (default: 1e-4 for ilut, 0 for ilu0)
|
|
458
|
+
- fill_factor: Fill factor (default: 10 for ilut, 1 for ilu0)
|
|
397
459
|
|
|
398
460
|
Returns
|
|
399
461
|
-------
|
|
@@ -403,18 +465,46 @@ def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
|
|
|
403
465
|
if precond_type in ("diag", "jacobi"):
|
|
404
466
|
diag = A.diagonal().copy()
|
|
405
467
|
diag[np.abs(diag) < 1e-14] = 1.0
|
|
406
|
-
return sparse.diags(
|
|
468
|
+
return sparse.diags(
|
|
469
|
+
1.0 / diag, format="csr"
|
|
470
|
+
) # Return inverse for preconditioning!
|
|
407
471
|
|
|
408
|
-
elif precond_type
|
|
472
|
+
elif precond_type == "ilu0":
|
|
473
|
+
# ILU(0) - no fill-in, fast but may be poor quality
|
|
409
474
|
try:
|
|
410
475
|
ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
|
|
411
476
|
return _ILUPreconditioner(ilu)
|
|
412
477
|
except Exception as e:
|
|
413
|
-
warnings.warn(f"ILU factorization failed: {e}, falling back to diagonal")
|
|
478
|
+
warnings.warn(f"ILU(0) factorization failed: {e}, falling back to diagonal")
|
|
414
479
|
return make_preconditioner(A, "diag")
|
|
415
480
|
|
|
481
|
+
elif precond_type in ("ilu", "ilut"):
|
|
482
|
+
# ILUT - ILU with threshold, better quality (similar to UMFPACK)
|
|
483
|
+
drop_tol = kwargs.get("drop_tol", 1e-4)
|
|
484
|
+
fill_factor = kwargs.get("fill_factor", 10)
|
|
485
|
+
try:
|
|
486
|
+
ilu = spilu(A.tocsc(), drop_tol=drop_tol, fill_factor=fill_factor)
|
|
487
|
+
return _ILUPreconditioner(ilu)
|
|
488
|
+
except Exception as e:
|
|
489
|
+
warnings.warn(f"ILUT factorization failed: {e}, trying ILU(0)")
|
|
490
|
+
try:
|
|
491
|
+
ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
|
|
492
|
+
return _ILUPreconditioner(ilu)
|
|
493
|
+
except Exception as e2:
|
|
494
|
+
warnings.warn(f"ILU(0) also failed: {e2}, falling back to diagonal")
|
|
495
|
+
return make_preconditioner(A, "diag")
|
|
496
|
+
|
|
497
|
+
elif precond_type == "lu":
|
|
498
|
+
# Full LU - exact factorization (for reference/debugging)
|
|
499
|
+
try:
|
|
500
|
+
lu = splu(A.tocsc())
|
|
501
|
+
return _LUPreconditioner(lu)
|
|
502
|
+
except Exception as e:
|
|
503
|
+
warnings.warn(f"LU factorization failed: {e}, falling back to ILUT")
|
|
504
|
+
return make_preconditioner(A, "ilut")
|
|
505
|
+
|
|
416
506
|
elif precond_type == "ssor":
|
|
417
|
-
omega = 1.0
|
|
507
|
+
omega = kwargs.get("omega", 1.0)
|
|
418
508
|
D = sparse.diags(A.diagonal(), format="csr")
|
|
419
509
|
L = sparse.tril(A, k=-1, format="csr")
|
|
420
510
|
return (D + omega * L).tocsr()
|
|
@@ -423,6 +513,24 @@ def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
|
|
|
423
513
|
raise ValueError(f"Unknown preconditioner type: {precond_type}")
|
|
424
514
|
|
|
425
515
|
|
|
516
|
+
class _LUPreconditioner:
|
|
517
|
+
"""Wrapper for full LU preconditioner."""
|
|
518
|
+
|
|
519
|
+
def __init__(self, lu_factor):
|
|
520
|
+
self.lu = lu_factor
|
|
521
|
+
self.shape = (lu_factor.shape[0], lu_factor.shape[1])
|
|
522
|
+
self.dtype = np.float64 # Assume real for now
|
|
523
|
+
|
|
524
|
+
def solve(self, b):
|
|
525
|
+
if b.ndim == 1:
|
|
526
|
+
return self.lu.solve(b)
|
|
527
|
+
else:
|
|
528
|
+
x = np.zeros_like(b)
|
|
529
|
+
for i in range(b.shape[1]):
|
|
530
|
+
x[:, i] = self.lu.solve(b[:, i])
|
|
531
|
+
return x
|
|
532
|
+
|
|
533
|
+
|
|
426
534
|
# =============================================================================
|
|
427
535
|
# Pure-Python Block QMR Solver
|
|
428
536
|
# =============================================================================
|
|
@@ -448,7 +556,7 @@ def _blqmr_python_impl(
|
|
|
448
556
|
dtype = np.complex128 if is_complex_input else np.float64
|
|
449
557
|
|
|
450
558
|
if maxiter is None:
|
|
451
|
-
maxiter = min(n,
|
|
559
|
+
maxiter = min(n, 100)
|
|
452
560
|
|
|
453
561
|
if (
|
|
454
562
|
workspace is None
|
|
@@ -463,10 +571,13 @@ def _blqmr_python_impl(
|
|
|
463
571
|
|
|
464
572
|
# Setup preconditioner
|
|
465
573
|
if M1 is not None:
|
|
466
|
-
if isinstance(M1, _ILUPreconditioner):
|
|
574
|
+
if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
|
|
467
575
|
precond = SparsePreconditioner(M1, M2)
|
|
468
576
|
elif sparse.issparse(M1):
|
|
469
577
|
precond = SparsePreconditioner(M1, M2)
|
|
578
|
+
elif hasattr(M1, "solve"):
|
|
579
|
+
# Custom preconditioner with .solve() method
|
|
580
|
+
precond = M1 # Use directly
|
|
470
581
|
else:
|
|
471
582
|
precond = DensePreconditioner(M1, M2)
|
|
472
583
|
else:
|
|
@@ -477,7 +588,16 @@ def _blqmr_python_impl(
|
|
|
477
588
|
else:
|
|
478
589
|
x = np.asarray(x0, dtype=dtype).reshape(n, m).copy()
|
|
479
590
|
|
|
480
|
-
|
|
591
|
+
# Initialize indices: Fortran t3=mod(0,3)+1=1 -> Python t3=0
|
|
592
|
+
t3 = 0
|
|
593
|
+
t3n = 2
|
|
594
|
+
t3p = 1
|
|
595
|
+
|
|
596
|
+
# Initialize Q matrices (identity)
|
|
597
|
+
ws.Qa[:, :, :] = 0
|
|
598
|
+
ws.Qb[:, :, :] = 0
|
|
599
|
+
ws.Qc[:, :, :] = 0
|
|
600
|
+
ws.Qd[:, :, :] = 0
|
|
481
601
|
ws.Qa[:, :, t3] = np.eye(m, dtype=dtype)
|
|
482
602
|
ws.Qd[:, :, t3n] = np.eye(m, dtype=dtype)
|
|
483
603
|
ws.Qd[:, :, t3] = np.eye(m, dtype=dtype)
|
|
@@ -493,105 +613,151 @@ def _blqmr_python_impl(
|
|
|
493
613
|
if np.any(np.isnan(ws.vt)):
|
|
494
614
|
return x, 2, 1.0, 0, np.array([])
|
|
495
615
|
|
|
616
|
+
# QQR decomposition
|
|
496
617
|
Q, R = qqr(ws.vt)
|
|
497
618
|
ws.v[:, :, t3p] = Q
|
|
498
619
|
ws.beta[:, :, t3p] = R
|
|
499
620
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
621
|
+
# Compute omega - standard norm WITH conjugation (Hermitian norm)
|
|
622
|
+
# Fortran: omega(i,i,t3p)=sqrt(sum(conjg(v(:,i,t3p))*v(:,i,t3p)))
|
|
623
|
+
for i in range(m):
|
|
624
|
+
col = ws.v[:, i, t3p]
|
|
625
|
+
if is_complex_input:
|
|
626
|
+
ws.omega[i, i, t3p] = np.sqrt(np.sum(np.conj(col) * col).real)
|
|
627
|
+
else:
|
|
628
|
+
ws.omega[i, i, t3p] = np.sqrt(np.sum(col * col))
|
|
629
|
+
|
|
630
|
+
# taut = omega * beta
|
|
631
|
+
ws.taot[:] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
|
|
503
632
|
|
|
504
633
|
isquasires = not residual
|
|
505
634
|
if isquasires:
|
|
506
|
-
Qres0
|
|
635
|
+
# Fortran: Qres0=maxval(sqrt(sum(abs(conjg(taut)*taut),1))) for complex
|
|
636
|
+
if is_complex_input:
|
|
637
|
+
Qres0 = np.max(np.sqrt(np.sum(np.abs(np.conj(ws.taot) * ws.taot), axis=0)))
|
|
638
|
+
else:
|
|
639
|
+
Qres0 = np.max(np.sqrt(np.sum(ws.taot * ws.taot, axis=0)))
|
|
507
640
|
else:
|
|
508
|
-
omegat =
|
|
509
|
-
|
|
641
|
+
omegat = np.zeros((n, m), dtype=dtype)
|
|
642
|
+
for i in range(m):
|
|
643
|
+
if np.abs(ws.omega[i, i, t3p]) > 1e-14:
|
|
644
|
+
omegat[:, i] = ws.v[:, i, t3p] / ws.omega[i, i, t3p]
|
|
645
|
+
if is_complex_input:
|
|
646
|
+
Qres0 = np.max(np.sqrt(np.sum(np.abs(np.conj(ws.vt) * ws.vt), axis=0)))
|
|
647
|
+
else:
|
|
648
|
+
Qres0 = np.max(np.sqrt(np.sum(ws.vt * ws.vt, axis=0)))
|
|
510
649
|
|
|
511
650
|
if Qres0 < 1e-16:
|
|
512
651
|
result = x.real if not is_complex_input else x
|
|
513
652
|
return result, 0, 0.0, 0, np.array([0.0])
|
|
514
653
|
|
|
515
|
-
flag, resv, Qres1, relres, iter_count = 1, np.zeros(maxiter),
|
|
516
|
-
omegat = None if isquasires else Q @ np.diag(1.0 / (col_norms + 1e-16))
|
|
654
|
+
flag, resv, Qres1, relres, iter_count = 1, np.zeros(maxiter), -1.0, 1.0, 0
|
|
517
655
|
|
|
518
656
|
for k in range(1, maxiter + 1):
|
|
519
|
-
|
|
657
|
+
# Index cycling
|
|
658
|
+
t3 = k % 3
|
|
659
|
+
t3p = (k + 1) % 3
|
|
660
|
+
t3n = (k - 1) % 3
|
|
661
|
+
t3nn = (k - 2) % 3
|
|
520
662
|
|
|
663
|
+
# tmp = A * v(:,:,t3)
|
|
521
664
|
if A_is_sparse:
|
|
522
665
|
ws.Av[:] = A @ ws.v[:, :, t3]
|
|
523
666
|
else:
|
|
524
667
|
np.matmul(A, ws.v[:, :, t3], out=ws.Av)
|
|
525
668
|
|
|
669
|
+
# Apply preconditioner
|
|
526
670
|
if precond is not None:
|
|
527
671
|
precond.solve(ws.Av, out=ws.vt)
|
|
528
|
-
ws.vt
|
|
672
|
+
ws.vt[:] = ws.vt - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
|
|
529
673
|
else:
|
|
530
|
-
|
|
531
|
-
np.subtract(ws.Av, ws.vt, out=ws.vt)
|
|
674
|
+
ws.vt[:] = ws.Av - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
|
|
532
675
|
|
|
533
|
-
|
|
534
|
-
ws.
|
|
676
|
+
# alpha = v^T * vt (transpose, not conjugate transpose)
|
|
677
|
+
ws.alpha[:] = ws.v[:, :, t3].T @ ws.vt
|
|
678
|
+
ws.vt[:] = ws.vt - ws.v[:, :, t3] @ ws.alpha
|
|
535
679
|
|
|
680
|
+
# QQR decomposition
|
|
536
681
|
Q, R = qqr(ws.vt)
|
|
537
682
|
ws.v[:, :, t3p] = Q
|
|
538
683
|
ws.beta[:, :, t3p] = R
|
|
539
684
|
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
np.matmul(ws.omega[:, :, t3], ws.alpha, out=ws.tmp2)
|
|
548
|
-
np.matmul(ws.Qa[:, :, t3n], ws.tmp1, out=ws.eta)
|
|
549
|
-
ws.eta += ws.Qb[:, :, t3n] @ ws.tmp2
|
|
685
|
+
# Compute omega (standard Hermitian norm)
|
|
686
|
+
for i in range(m):
|
|
687
|
+
col = ws.v[:, i, t3p]
|
|
688
|
+
if is_complex_input:
|
|
689
|
+
ws.omega[i, i, t3p] = np.sqrt(np.sum(np.conj(col) * col).real)
|
|
690
|
+
else:
|
|
691
|
+
ws.omega[i, i, t3p] = np.sqrt(np.sum(col * col))
|
|
550
692
|
|
|
551
|
-
|
|
552
|
-
ws.
|
|
693
|
+
# Compute intermediate matrices
|
|
694
|
+
ws.tmp0[:] = ws.omega[:, :, t3n] @ ws.beta[:, :, t3].T
|
|
695
|
+
ws.theta[:] = ws.Qb[:, :, t3nn] @ ws.tmp0
|
|
696
|
+
ws.tmp1[:] = ws.Qd[:, :, t3nn] @ ws.tmp0
|
|
697
|
+
ws.tmp2[:] = ws.omega[:, :, t3] @ ws.alpha
|
|
698
|
+
ws.eta[:] = ws.Qa[:, :, t3n] @ ws.tmp1 + ws.Qb[:, :, t3n] @ ws.tmp2
|
|
699
|
+
ws.zetat[:] = ws.Qc[:, :, t3n] @ ws.tmp1 + ws.Qd[:, :, t3n] @ ws.tmp2
|
|
553
700
|
|
|
701
|
+
# Build ZZ matrix and do standard QR
|
|
554
702
|
ws.stacked[:m, :] = ws.zetat
|
|
555
|
-
|
|
703
|
+
ws.stacked[m:, :] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
|
|
556
704
|
|
|
557
705
|
QQ, zeta_full = np.linalg.qr(ws.stacked, mode="complete")
|
|
558
706
|
ws.zeta[:] = zeta_full[:m, :]
|
|
559
|
-
|
|
707
|
+
|
|
708
|
+
if is_complex_input:
|
|
709
|
+
ws.QQ_full[:] = np.conj(QQ.T)
|
|
710
|
+
else:
|
|
711
|
+
ws.QQ_full[:] = QQ.T
|
|
560
712
|
|
|
561
713
|
ws.Qa[:, :, t3] = ws.QQ_full[:m, :m]
|
|
562
714
|
ws.Qb[:, :, t3] = ws.QQ_full[:m, m : 2 * m]
|
|
563
715
|
ws.Qc[:, :, t3] = ws.QQ_full[m : 2 * m, :m]
|
|
564
716
|
ws.Qd[:, :, t3] = ws.QQ_full[m : 2 * m, m : 2 * m]
|
|
565
717
|
|
|
718
|
+
# Invert zeta
|
|
566
719
|
try:
|
|
567
720
|
zeta_inv = np.linalg.inv(ws.zeta)
|
|
568
721
|
except np.linalg.LinAlgError:
|
|
569
722
|
zeta_inv = np.linalg.pinv(ws.zeta)
|
|
570
723
|
|
|
724
|
+
# Update p, tau, x, taut
|
|
571
725
|
ws.p[:, :, t3] = (
|
|
572
726
|
ws.v[:, :, t3] - ws.p[:, :, t3n] @ ws.eta - ws.p[:, :, t3nn] @ ws.theta
|
|
573
727
|
) @ zeta_inv
|
|
728
|
+
ws.tau[:] = ws.Qa[:, :, t3] @ ws.taot
|
|
729
|
+
x[:] = x + ws.p[:, :, t3] @ ws.tau
|
|
730
|
+
ws.taot[:] = ws.Qc[:, :, t3] @ ws.taot
|
|
574
731
|
|
|
575
|
-
|
|
576
|
-
x += ws.p[:, :, t3] @ ws.tau
|
|
577
|
-
|
|
578
|
-
taot_copy = ws.taot.copy()
|
|
579
|
-
np.matmul(ws.Qc[:, :, t3], taot_copy, out=ws.taot)
|
|
580
|
-
|
|
732
|
+
# Compute residual
|
|
581
733
|
if isquasires:
|
|
582
|
-
|
|
734
|
+
if is_complex_input:
|
|
735
|
+
Qres = np.max(
|
|
736
|
+
np.sqrt(np.sum(np.abs(np.conj(ws.taot) * ws.taot), axis=0))
|
|
737
|
+
)
|
|
738
|
+
else:
|
|
739
|
+
Qres = np.max(np.sqrt(np.sum(ws.taot * ws.taot, axis=0)))
|
|
583
740
|
else:
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
741
|
+
tmp0_diag = np.zeros((m, m), dtype=dtype)
|
|
742
|
+
for i in range(m):
|
|
743
|
+
if np.abs(ws.omega[i, i, t3p]) > 1e-14:
|
|
744
|
+
tmp0_diag[i, :] = ws.Qd[:, i, t3] / ws.omega[i, i, t3p]
|
|
745
|
+
if is_complex_input:
|
|
746
|
+
omegat = omegat @ np.conj(ws.Qc[:, :, t3].T) + ws.v[
|
|
747
|
+
:, :, t3p
|
|
748
|
+
] @ np.conj(tmp0_diag)
|
|
749
|
+
tmp_res = np.conj(omegat @ ws.taot)
|
|
750
|
+
Qres = np.max(
|
|
751
|
+
np.sqrt(np.sum(np.abs(np.conj(tmp_res) * tmp_res), axis=0))
|
|
752
|
+
)
|
|
753
|
+
else:
|
|
754
|
+
omegat = omegat @ ws.Qc[:, :, t3].T + ws.v[:, :, t3p] @ tmp0_diag
|
|
755
|
+
tmp_res = omegat @ ws.taot
|
|
756
|
+
Qres = np.max(np.sqrt(np.sum(tmp_res * tmp_res, axis=0)))
|
|
591
757
|
|
|
592
758
|
resv[k - 1] = Qres
|
|
593
759
|
|
|
594
|
-
if
|
|
760
|
+
if k > 1 and abs(Qres - Qres1) < np.finfo(dtype).eps:
|
|
595
761
|
flag, iter_count = 3, k
|
|
596
762
|
break
|
|
597
763
|
|
|
@@ -739,7 +905,10 @@ def _blqmr_solve_native_csc(
|
|
|
739
905
|
try:
|
|
740
906
|
M1 = make_preconditioner(A, "ilu")
|
|
741
907
|
except Exception:
|
|
742
|
-
|
|
908
|
+
try:
|
|
909
|
+
M1 = make_preconditioner(A, "diag") # FIX: Changed A_sp to A
|
|
910
|
+
except Exception:
|
|
911
|
+
M1 = None # Fall back to no preconditioning
|
|
743
912
|
|
|
744
913
|
x, flag, relres, niter, resv = _blqmr_python_impl(
|
|
745
914
|
A, b, tol=tol, maxiter=maxiter, M1=M1, x0=x0
|
|
@@ -846,7 +1015,10 @@ def _blqmr_solve_multi_native(
|
|
|
846
1015
|
try:
|
|
847
1016
|
M1 = make_preconditioner(A, "ilu")
|
|
848
1017
|
except Exception:
|
|
849
|
-
|
|
1018
|
+
try:
|
|
1019
|
+
M1 = make_preconditioner(A, "diag") # FIX: Changed A_sp to A
|
|
1020
|
+
except Exception:
|
|
1021
|
+
M1 = None # Fall back to no preconditioning
|
|
850
1022
|
|
|
851
1023
|
if B.ndim == 1:
|
|
852
1024
|
B = B.reshape(-1, 1)
|
|
@@ -986,40 +1158,75 @@ def _blqmr_fortran(
|
|
|
986
1158
|
) -> BLQMRResult:
|
|
987
1159
|
"""Fortran backend for blqmr()."""
|
|
988
1160
|
A_csc = sparse.csc_matrix(A)
|
|
1161
|
+
|
|
1162
|
+
# CRITICAL: Sort indices for UMFPACK compatibility
|
|
1163
|
+
if not A_csc.has_sorted_indices:
|
|
1164
|
+
A_csc.sort_indices()
|
|
1165
|
+
|
|
989
1166
|
Ap = A_csc.indptr.astype(np.int32)
|
|
990
1167
|
Ai = A_csc.indices.astype(np.int32)
|
|
991
|
-
Ax = A_csc.data.astype(np.float64)
|
|
992
1168
|
|
|
993
1169
|
n = A_csc.shape[0]
|
|
994
|
-
nnz =
|
|
1170
|
+
nnz = A_csc.nnz
|
|
995
1171
|
|
|
996
1172
|
if maxiter is None:
|
|
997
1173
|
maxiter = n
|
|
998
1174
|
|
|
999
|
-
# Convert to Fortran format
|
|
1000
|
-
Ap_f = np.asfortranarray(Ap + 1, dtype=np.int32)
|
|
1001
|
-
Ai_f = np.asfortranarray(Ai + 1, dtype=np.int32)
|
|
1002
|
-
Ax_f = np.asfortranarray(Ax, dtype=np.float64)
|
|
1175
|
+
# Convert to Fortran format (1-based indexing)
|
|
1176
|
+
Ap_f = np.asfortranarray(Ap + 1, dtype=np.int32)
|
|
1177
|
+
Ai_f = np.asfortranarray(Ai + 1, dtype=np.int32)
|
|
1003
1178
|
|
|
1004
1179
|
dopcond = 1 if use_precond else 0
|
|
1005
1180
|
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
)
|
|
1181
|
+
# Check if complex
|
|
1182
|
+
is_complex = np.iscomplexobj(A) or np.iscomplexobj(B)
|
|
1183
|
+
|
|
1184
|
+
if is_complex:
|
|
1185
|
+
# Complex path
|
|
1186
|
+
Ax_f = np.asfortranarray(A_csc.data, dtype=np.complex128)
|
|
1187
|
+
|
|
1188
|
+
if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
|
|
1189
|
+
# Single RHS
|
|
1190
|
+
b_f = np.asfortranarray(B.ravel(), dtype=np.complex128)
|
|
1191
|
+
x, flag, niter, relres = _blqmr.blqmr_solve_complex(
|
|
1192
|
+
n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, dopcond
|
|
1193
|
+
)
|
|
1194
|
+
return BLQMRResult(
|
|
1195
|
+
x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
|
|
1196
|
+
)
|
|
1197
|
+
else:
|
|
1198
|
+
# Multiple RHS - use block method
|
|
1199
|
+
B_f = np.asfortranarray(B, dtype=np.complex128)
|
|
1200
|
+
nrhs = B_f.shape[1]
|
|
1201
|
+
X, flag, niter, relres = _blqmr.blqmr_solve_complex_multi(
|
|
1202
|
+
n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, dopcond
|
|
1203
|
+
)
|
|
1204
|
+
return BLQMRResult(
|
|
1205
|
+
x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
|
|
1206
|
+
)
|
|
1014
1207
|
else:
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
x
|
|
1022
|
-
|
|
1208
|
+
# Real path
|
|
1209
|
+
Ax_f = np.asfortranarray(A_csc.data, dtype=np.float64)
|
|
1210
|
+
|
|
1211
|
+
if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
|
|
1212
|
+
# Single RHS
|
|
1213
|
+
b_f = np.asfortranarray(B.ravel(), dtype=np.float64)
|
|
1214
|
+
x, flag, niter, relres = _blqmr.blqmr_solve_real(
|
|
1215
|
+
n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, dopcond
|
|
1216
|
+
)
|
|
1217
|
+
return BLQMRResult(
|
|
1218
|
+
x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
|
|
1219
|
+
)
|
|
1220
|
+
else:
|
|
1221
|
+
# Multiple RHS - use block method
|
|
1222
|
+
B_f = np.asfortranarray(B, dtype=np.float64)
|
|
1223
|
+
nrhs = B_f.shape[1]
|
|
1224
|
+
X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
|
|
1225
|
+
n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, dopcond
|
|
1226
|
+
)
|
|
1227
|
+
return BLQMRResult(
|
|
1228
|
+
x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
|
|
1229
|
+
)
|
|
1023
1230
|
|
|
1024
1231
|
|
|
1025
1232
|
def _blqmr_native(
|
|
@@ -1042,7 +1249,10 @@ def _blqmr_native(
|
|
|
1042
1249
|
try:
|
|
1043
1250
|
M1 = make_preconditioner(A_sp, "ilu")
|
|
1044
1251
|
except Exception:
|
|
1045
|
-
|
|
1252
|
+
try:
|
|
1253
|
+
M1 = make_preconditioner(A_sp, "diag")
|
|
1254
|
+
except Exception:
|
|
1255
|
+
M1 = None # Fall back to no preconditioning
|
|
1046
1256
|
|
|
1047
1257
|
x, flag, relres, niter, resv = _blqmr_python_impl(
|
|
1048
1258
|
A,
|
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: blocksolver
|
|
3
|
+
Version: 0.8.3
|
|
4
|
+
Summary: Block Quasi-Minimal-Residual sparse linear solver
|
|
5
|
+
Keywords: sparse,linear-algebra,iterative-solver,qmr,fortran,umfpack
|
|
6
|
+
Author-Email: Qianqian Fang <q.fang@neu.edu>
|
|
7
|
+
License: BSD-3-Clause OR LGPL-3.0-or-later OR GPL-3.0-or-later
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
11
|
+
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)
|
|
12
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
15
|
+
Classifier: Operating System :: MacOS
|
|
16
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
17
|
+
Classifier: Programming Language :: Fortran
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
26
|
+
Project-URL: Homepage, https://blit.sourceforge.net
|
|
27
|
+
Project-URL: Repository, https://github.com/fangq/blocksolver
|
|
28
|
+
Project-URL: Documentation, https://blit.sourceforge.net
|
|
29
|
+
Project-URL: Bug Tracker, https://github.com/fangq/blocksolver/issues
|
|
30
|
+
Requires-Python: >=3.8
|
|
31
|
+
Requires-Dist: numpy>=1.20
|
|
32
|
+
Requires-Dist: scipy>=1.0
|
|
33
|
+
Provides-Extra: fast
|
|
34
|
+
Requires-Dist: numba>=0.50; extra == "fast"
|
|
35
|
+
Provides-Extra: test
|
|
36
|
+
Requires-Dist: pytest>=6.0; extra == "test"
|
|
37
|
+
Provides-Extra: dev
|
|
38
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
39
|
+
Requires-Dist: build; extra == "dev"
|
|
40
|
+
Requires-Dist: twine; extra == "dev"
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
|
|
43
|
+
# BlockSolver - Block Quasi-Minimal Residual (BLQMR) Sparse Linear Solver
|
|
44
|
+
|
|
45
|
+
**BlockSolver** is a Python package for solving large sparse linear systems using the Block Quasi-Minimal Residual (BLQMR) algorithm. It provides both a high-performance Fortran backend and a pure Python/NumPy implementation for maximum portability.
|
|
46
|
+
|
|
47
|
+
## Features
|
|
48
|
+
|
|
49
|
+
- **Block QMR Algorithm**: Efficiently solves multiple right-hand sides simultaneously
|
|
50
|
+
- **Complex Symmetric Support**: Designed for complex symmetric matrices (A = Aᵀ, not A = A†)
|
|
51
|
+
- **Dual Backend**: Fortran extension for speed, Python fallback for portability
|
|
52
|
+
- **ILU Preconditioning**: Built-in incomplete LU preconditioner for faster convergence
|
|
53
|
+
- **SciPy Integration**: Works seamlessly with SciPy sparse matrices
|
|
54
|
+
- **Optional Numba Acceleration**: JIT-compiled kernels for the Python backend
|
|
55
|
+
|
|
56
|
+
## Algorithm
|
|
57
|
+
|
|
58
|
+
### Block Quasi-Minimal Residual (BLQMR)
|
|
59
|
+
|
|
60
|
+
The BLQMR algorithm is an iterative Krylov subspace method specifically designed for:
|
|
61
|
+
|
|
62
|
+
1. **Complex symmetric systems**: Unlike standard methods that assume Hermitian (A = A†) or general matrices, BLQMR exploits complex symmetry (A = Aᵀ) which arises in electromagnetics, acoustics, and diffuse optical tomography.
|
|
63
|
+
|
|
64
|
+
2. **Multiple right-hand sides**: Instead of solving each system independently, BLQMR processes all right-hand sides together in a block fashion, sharing Krylov subspace information and reducing total computation.
|
|
65
|
+
|
|
66
|
+
3. **Quasi-minimal residual**: The algorithm minimizes a quasi-residual norm at each iteration, providing smooth convergence without the erratic behavior of some Krylov methods.
|
|
67
|
+
|
|
68
|
+
### Key Components
|
|
69
|
+
|
|
70
|
+
- **Quasi-QR Decomposition**: A modified Gram-Schmidt process using the quasi inner product ⟨x,y⟩ = Σ xₖyₖ (without conjugation) for complex symmetric systems.
|
|
71
|
+
|
|
72
|
+
- **Three-term Lanczos Recurrence**: Builds an orthonormal basis for the Krylov subspace with short recurrences, minimizing memory usage.
|
|
73
|
+
|
|
74
|
+
- **Block Updates**: Processes m right-hand sides simultaneously, with typical block sizes of 1-16.
|
|
75
|
+
|
|
76
|
+
### When to Use BLQMR
|
|
77
|
+
|
|
78
|
+
| Use Case | Recommendation |
|
|
79
|
+
|----------|----------------|
|
|
80
|
+
| Complex symmetric matrix (A = Aᵀ) | ✅ Ideal |
|
|
81
|
+
| Multiple right-hand sides | ✅ Ideal |
|
|
82
|
+
| Real symmetric positive definite | Consider CG first |
|
|
83
|
+
| General non-symmetric | Consider GMRES or BiCGSTAB |
|
|
84
|
+
| Very large systems (>10⁶ unknowns) | ✅ Good with preconditioning |
|
|
85
|
+
|
|
86
|
+
## Installation
|
|
87
|
+
|
|
88
|
+
### From PyPI
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pip install blocksolver
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### From Source
|
|
95
|
+
|
|
96
|
+
Prerequisites:
|
|
97
|
+
- Python ≥ 3.8
|
|
98
|
+
- NumPy ≥ 1.20
|
|
99
|
+
- SciPy ≥ 1.0
|
|
100
|
+
- (Optional) Fortran compiler + UMFPACK for the accelerated backend
|
|
101
|
+
- (Optional) Numba for accelerated Python backend
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# Ubuntu/Debian
|
|
105
|
+
sudo apt install gfortran libsuitesparse-dev libblas-dev liblapack-dev
|
|
106
|
+
|
|
107
|
+
# macOS
|
|
108
|
+
brew install gcc suite-sparse openblas
|
|
109
|
+
|
|
110
|
+
# Install
|
|
111
|
+
cd python
|
|
112
|
+
pip install .
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Quick Start
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
import numpy as np
|
|
119
|
+
from scipy.sparse import csc_matrix
|
|
120
|
+
from blocksolver import blqmr
|
|
121
|
+
|
|
122
|
+
# Create a sparse matrix
|
|
123
|
+
A = csc_matrix([
|
|
124
|
+
[4, 1, 0, 0],
|
|
125
|
+
[1, 4, 1, 0],
|
|
126
|
+
[0, 1, 4, 1],
|
|
127
|
+
[0, 0, 1, 4]
|
|
128
|
+
], dtype=float)
|
|
129
|
+
|
|
130
|
+
b = np.array([1., 2., 3., 4.])
|
|
131
|
+
|
|
132
|
+
# Solve Ax = b
|
|
133
|
+
result = blqmr(A, b, tol=1e-10)
|
|
134
|
+
|
|
135
|
+
print(f"Solution: {result.x}")
|
|
136
|
+
print(f"Converged: {result.converged}")
|
|
137
|
+
print(f"Iterations: {result.iter}")
|
|
138
|
+
print(f"Relative residual: {result.relres:.2e}")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Usage
|
|
142
|
+
|
|
143
|
+
### Main Interface: `blqmr()`
|
|
144
|
+
|
|
145
|
+
The primary function `blqmr()` automatically selects the best available backend (Fortran if available, otherwise Python).
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
from blocksolver import blqmr, BLQMR_EXT
|
|
149
|
+
|
|
150
|
+
# Check which backend is active
|
|
151
|
+
print(f"Using Fortran backend: {BLQMR_EXT}")
|
|
152
|
+
|
|
153
|
+
# Basic usage
|
|
154
|
+
result = blqmr(A, b)
|
|
155
|
+
|
|
156
|
+
# With options
|
|
157
|
+
result = blqmr(A, b,
|
|
158
|
+
tol=1e-8, # Convergence tolerance
|
|
159
|
+
maxiter=1000, # Maximum iterations
|
|
160
|
+
use_precond=True, # Use ILU preconditioning
|
|
161
|
+
)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Multiple Right-Hand Sides
|
|
165
|
+
|
|
166
|
+
BLQMR excels when solving the same system with multiple right-hand sides:
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
import numpy as np
|
|
170
|
+
from blocksolver import blqmr
|
|
171
|
+
|
|
172
|
+
# 100 different right-hand sides
|
|
173
|
+
B = np.random.randn(n, 100)
|
|
174
|
+
|
|
175
|
+
# Solve all systems at once (much faster than solving individually)
|
|
176
|
+
result = blqmr(A, B, tol=1e-8)
|
|
177
|
+
|
|
178
|
+
# result.x has shape (n, 100)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Complex Symmetric Systems
|
|
182
|
+
|
|
183
|
+
BLQMR is specifically designed for complex symmetric matrices (common in frequency-domain wave problems):
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
import numpy as np
|
|
187
|
+
from blocksolver import blqmr
|
|
188
|
+
|
|
189
|
+
# Complex symmetric matrix (A = A.T, NOT A.conj().T)
|
|
190
|
+
A = create_helmholtz_matrix(frequency=1000) # Your application
|
|
191
|
+
b = np.complex128(source_term)
|
|
192
|
+
|
|
193
|
+
result = blqmr(A, b, tol=1e-8)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Custom Preconditioning
|
|
197
|
+
|
|
198
|
+
For the Python backend, you can provide custom preconditioners:
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
from blocksolver import blqmr, make_preconditioner
|
|
202
|
+
|
|
203
|
+
# Create ILU preconditioner
|
|
204
|
+
M1 = make_preconditioner(A, 'ilu')
|
|
205
|
+
|
|
206
|
+
# Or diagonal (Jacobi) preconditioner
|
|
207
|
+
M1 = make_preconditioner(A, 'diag')
|
|
208
|
+
|
|
209
|
+
# Solve with custom preconditioner
|
|
210
|
+
result = blqmr(A, b, M1=M1, use_precond=False)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### SciPy-Compatible Interface
|
|
214
|
+
|
|
215
|
+
For drop-in replacement in existing code:
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
from blocksolver import blqmr_scipy
|
|
219
|
+
|
|
220
|
+
# Returns (x, flag) like scipy.sparse.linalg solvers
|
|
221
|
+
x, flag = blqmr_scipy(A, b, tol=1e-10)
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### Low-Level CSC Interface
|
|
225
|
+
|
|
226
|
+
For maximum control, use the CSC component interface:
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
from blocksolver import blqmr_solve
|
|
230
|
+
|
|
231
|
+
# CSC format components (0-based indexing)
|
|
232
|
+
Ap = np.array([0, 2, 5, 9, 10, 12], dtype=np.int32) # Column pointers
|
|
233
|
+
Ai = np.array([0, 1, 0, 2, 4, 1, 2, 3, 4, 2, 1, 4], dtype=np.int32) # Row indices
|
|
234
|
+
Ax = np.array([2., 3., 3., -1., 4., 4., -3., 1., 2., 2., 6., 1.]) # Values
|
|
235
|
+
b = np.array([8., 45., -3., 3., 19.])
|
|
236
|
+
|
|
237
|
+
result = blqmr_solve(Ap, Ai, Ax, b,
|
|
238
|
+
tol=1e-8,
|
|
239
|
+
droptol=0.001, # ILU drop tolerance (Fortran only)
|
|
240
|
+
use_precond=True,
|
|
241
|
+
zero_based=True, # 0-based indexing (default)
|
|
242
|
+
)
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
## API Reference
|
|
246
|
+
|
|
247
|
+
### `blqmr(A, B, **kwargs) -> BLQMRResult`
|
|
248
|
+
|
|
249
|
+
Main solver interface.
|
|
250
|
+
|
|
251
|
+
**Parameters:**
|
|
252
|
+
| Parameter | Type | Default | Description |
|
|
253
|
+
|-----------|------|---------|-------------|
|
|
254
|
+
| `A` | sparse matrix or ndarray | required | System matrix (n × n) |
|
|
255
|
+
| `B` | ndarray | required | Right-hand side (n,) or (n × m) |
|
|
256
|
+
| `tol` | float | 1e-6 | Convergence tolerance |
|
|
257
|
+
| `maxiter` | int | n | Maximum iterations |
|
|
258
|
+
| `M1`, `M2` | preconditioner | None | Custom preconditioners (Python backend) |
|
|
259
|
+
| `x0` | ndarray | None | Initial guess |
|
|
260
|
+
| `use_precond` | bool | True | Use ILU preconditioning |
|
|
261
|
+
| `droptol` | float | 0.001 | ILU drop tolerance (Fortran backend) |
|
|
262
|
+
| `residual` | bool | False | Use true residual for convergence (Python) |
|
|
263
|
+
| `workspace` | BLQMRWorkspace | None | Pre-allocated workspace (Python) |
|
|
264
|
+
|
|
265
|
+
**Returns:** `BLQMRResult` object with:
|
|
266
|
+
| Attribute | Type | Description |
|
|
267
|
+
|-----------|------|-------------|
|
|
268
|
+
| `x` | ndarray | Solution vector(s) |
|
|
269
|
+
| `flag` | int | 0=converged, 1=maxiter, 2=precond fail, 3=stagnation |
|
|
270
|
+
| `iter` | int | Iterations performed |
|
|
271
|
+
| `relres` | float | Final relative residual |
|
|
272
|
+
| `converged` | bool | True if flag == 0 |
|
|
273
|
+
| `resv` | ndarray | Residual history (Python backend only) |
|
|
274
|
+
|
|
275
|
+
### `blqmr_solve(Ap, Ai, Ax, b, **kwargs) -> BLQMRResult`
|
|
276
|
+
|
|
277
|
+
Low-level CSC interface.
|
|
278
|
+
|
|
279
|
+
### `blqmr_solve_multi(Ap, Ai, Ax, B, **kwargs) -> BLQMRResult`
|
|
280
|
+
|
|
281
|
+
Multiple right-hand sides with CSC input.
|
|
282
|
+
|
|
283
|
+
### `blqmr_scipy(A, b, **kwargs) -> Tuple[ndarray, int]`
|
|
284
|
+
|
|
285
|
+
SciPy-compatible interface returning `(x, flag)`.
|
|
286
|
+
|
|
287
|
+
### `make_preconditioner(A, type) -> Preconditioner`
|
|
288
|
+
|
|
289
|
+
Create a preconditioner for the Python backend.
|
|
290
|
+
|
|
291
|
+
**Types:** `'diag'`/`'jacobi'`, `'ilu'`/`'ilu0'`, `'ssor'`
|
|
292
|
+
|
|
293
|
+
### Utility Functions
|
|
294
|
+
|
|
295
|
+
```python
|
|
296
|
+
from blocksolver import (
|
|
297
|
+
BLQMR_EXT, # True if Fortran backend available
|
|
298
|
+
HAS_NUMBA, # True if Numba acceleration available
|
|
299
|
+
get_backend_info, # Returns dict with backend details
|
|
300
|
+
test, # Run built-in tests
|
|
301
|
+
)
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## Performance Tips
|
|
305
|
+
|
|
306
|
+
1. **Use the Fortran backend** when available (10-100× faster than Python)
|
|
307
|
+
|
|
308
|
+
2. **Enable preconditioning** for ill-conditioned systems:
|
|
309
|
+
```python
|
|
310
|
+
result = blqmr(A, b, use_precond=True)
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
3. **Batch multiple right-hand sides** instead of solving one at a time:
|
|
314
|
+
```python
|
|
315
|
+
# Fast: single call with all RHS
|
|
316
|
+
result = blqmr(A, B_matrix)
|
|
317
|
+
|
|
318
|
+
# Slow: multiple calls
|
|
319
|
+
for b in B_columns:
|
|
320
|
+
result = blqmr(A, b)
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
4. **Install Numba** for faster Python backend:
|
|
324
|
+
```bash
|
|
325
|
+
pip install numba
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
5. **Reuse workspace** for repeated solves with the same dimensions:
|
|
329
|
+
```python
|
|
330
|
+
from blocksolver import BLQMRWorkspace
|
|
331
|
+
ws = BLQMRWorkspace(n, m)
|
|
332
|
+
for b in many_rhs:
|
|
333
|
+
result = blqmr(A, b, workspace=ws)
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
## Examples
|
|
337
|
+
|
|
338
|
+
### Diffuse Optical Tomography
|
|
339
|
+
|
|
340
|
+
```python
|
|
341
|
+
import numpy as np
|
|
342
|
+
from scipy.sparse import diags, kron, eye
|
|
343
|
+
from blocksolver import blqmr
|
|
344
|
+
|
|
345
|
+
def create_diffusion_matrix(nx, ny, D=1.0, mu_a=0.01, omega=1e9):
|
|
346
|
+
"""Create 2D diffusion matrix for DOT."""
|
|
347
|
+
n = nx * ny
|
|
348
|
+
h = 1.0 / nx
|
|
349
|
+
|
|
350
|
+
# Laplacian
|
|
351
|
+
Lx = diags([-1, 2, -1], [-1, 0, 1], shape=(nx, nx)) / h**2
|
|
352
|
+
Ly = diags([-1, 2, -1], [-1, 0, 1], shape=(ny, ny)) / h**2
|
|
353
|
+
L = kron(eye(ny), Lx) + kron(Ly, eye(nx))
|
|
354
|
+
|
|
355
|
+
# Diffusion equation: (-D∇² + μ_a + iω/c) φ = q
|
|
356
|
+
c = 3e10 # speed of light in tissue (cm/s)
|
|
357
|
+
A = -D * L + mu_a * eye(n) + 1j * omega / c * eye(n)
|
|
358
|
+
|
|
359
|
+
return A.tocsc()
|
|
360
|
+
|
|
361
|
+
# Setup problem
|
|
362
|
+
A = create_diffusion_matrix(100, 100, omega=2*np.pi*100e6)
|
|
363
|
+
sources = np.random.randn(10000, 16) # 16 source positions
|
|
364
|
+
|
|
365
|
+
# Solve for all sources at once
|
|
366
|
+
result = blqmr(A, sources, tol=1e-8)
|
|
367
|
+
print(f"Solved {sources.shape[1]} systems in {result.iter} iterations")
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
### Frequency-Domain Acoustics
|
|
371
|
+
|
|
372
|
+
```python
|
|
373
|
+
import numpy as np
|
|
374
|
+
from blocksolver import blqmr
|
|
375
|
+
|
|
376
|
+
# Helmholtz equation: (∇² + k²)p = f
|
|
377
|
+
# Results in complex symmetric matrix
|
|
378
|
+
|
|
379
|
+
def solve_helmholtz(K, M, f, frequencies):
|
|
380
|
+
"""Solve Helmholtz at multiple frequencies."""
|
|
381
|
+
solutions = []
|
|
382
|
+
for omega in frequencies:
|
|
383
|
+
# A = K - ω²M (complex symmetric if K, M are symmetric)
|
|
384
|
+
A = K - omega**2 * M
|
|
385
|
+
result = blqmr(A, f, tol=1e-10)
|
|
386
|
+
solutions.append(result.x)
|
|
387
|
+
return np.array(solutions)
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
## Troubleshooting
|
|
391
|
+
|
|
392
|
+
### "No Fortran backend available"
|
|
393
|
+
|
|
394
|
+
Install the package with Fortran support:
|
|
395
|
+
```bash
|
|
396
|
+
# Install dependencies first
|
|
397
|
+
sudo apt install gfortran libsuitesparse-dev # Linux
|
|
398
|
+
brew install gcc suite-sparse # macOS
|
|
399
|
+
|
|
400
|
+
# Reinstall blocksolver
|
|
401
|
+
pip install --no-cache-dir blocksolver
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
### Slow convergence
|
|
405
|
+
|
|
406
|
+
1. Enable preconditioning: `use_precond=True`
|
|
407
|
+
2. Reduce ILU drop tolerance: `droptol=1e-4` (Fortran backend)
|
|
408
|
+
3. Check matrix conditioning with `np.linalg.cond(A.toarray())`
|
|
409
|
+
|
|
410
|
+
### Memory issues with large systems
|
|
411
|
+
|
|
412
|
+
1. Use the Fortran backend (more memory efficient)
|
|
413
|
+
2. Reduce block size for multiple RHS
|
|
414
|
+
3. Use iterative refinement instead of tighter tolerance
|
|
415
|
+
|
|
416
|
+
## License
|
|
417
|
+
|
|
418
|
+
BSD-3-Clause / LGPL-3.0+ / GPL-3.0+ (tri-licensed)
|
|
419
|
+
|
|
420
|
+
## Citation
|
|
421
|
+
|
|
422
|
+
If you use BlockSolver in your research, please cite:
|
|
423
|
+
|
|
424
|
+
```bibtex
|
|
425
|
+
@software{blocksolver,
|
|
426
|
+
author = {Qianqian Fang},
|
|
427
|
+
title = {BlockSolver: Block Quasi-Minimal Residual Sparse Linear Solver},
|
|
428
|
+
url = {https://github.com/fangq/blit},
|
|
429
|
+
year = {2024}
|
|
430
|
+
}
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
## See Also
|
|
434
|
+
|
|
435
|
+
- [BLIT](https://github.com/fangq/blit) - The underlying Fortran library
|
|
436
|
+
- [SciPy sparse.linalg](https://docs.scipy.org/doc/scipy/reference/sparse.linalg.html) - Other iterative solvers
|
|
437
|
+
- [PyAMG](https://github.com/pyamg/pyamg) - Algebraic multigrid solvers
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
blocksolver-0.8.3.dist-info/METADATA,sha256=1JTScCC4OO3FBMqPtaG0OUlf5OnEHDetwDj7LPk9u9A,13264
|
|
2
|
+
blocksolver-0.8.3.dist-info/WHEEL,sha256=1nIT8bOU3dBEtO1OHNUw1PB7s17JH9tAQ93SLqU9JNM,85
|
|
3
|
+
blocksolver/_blqmr.cp310-win_amd64.pyd,sha256=jSqGnRnpV6ZHKB-aoYX0ZshtJQVpTjSimTNKiIkiVVg,34347765
|
|
4
|
+
blocksolver/_blqmr.cp310-win_amd64.dll.a,sha256=BD1rAe0mdxICApltj3xMKh5Wij-9C25neXLPKDuQQqo,1706
|
|
5
|
+
blocksolver/__init__.py,sha256=7lq88Nc2gqHTWAdpvj2zpVt8UHYgKpcVlgA1WzyEhFI,1982
|
|
6
|
+
blocksolver/blqmr.py,sha256=TRVkXlJf2FXFiZkH6vawNEEq8jowJqG-S_QGwHMeR8U,41060
|
|
7
|
+
blocksolver-0.8.3.dist-info/RECORD,,
|
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: blocksolver
|
|
3
|
-
Version: 0.8.1
|
|
4
|
-
Summary: Block Quasi-Minimal-Residual sparse linear solver
|
|
5
|
-
Keywords: sparse,linear-algebra,iterative-solver,qmr,fortran,umfpack
|
|
6
|
-
Author-Email: Qianqian Fang <q.fang@neu.edu>
|
|
7
|
-
License: BSD-3-Clause OR LGPL-3.0-or-later OR GPL-3.0-or-later
|
|
8
|
-
Classifier: Development Status :: 4 - Beta
|
|
9
|
-
Classifier: Intended Audience :: Science/Research
|
|
10
|
-
Classifier: License :: OSI Approved :: BSD License
|
|
11
|
-
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)
|
|
12
|
-
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
13
|
-
Classifier: Operating System :: OS Independent
|
|
14
|
-
Classifier: Operating System :: POSIX :: Linux
|
|
15
|
-
Classifier: Operating System :: MacOS
|
|
16
|
-
Classifier: Operating System :: Microsoft :: Windows
|
|
17
|
-
Classifier: Programming Language :: Fortran
|
|
18
|
-
Classifier: Programming Language :: Python :: 3
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
-
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
26
|
-
Project-URL: Homepage, https://blit.sourceforge.net
|
|
27
|
-
Project-URL: Repository, https://github.com/fangq/blocksolver
|
|
28
|
-
Project-URL: Documentation, https://blit.sourceforge.net
|
|
29
|
-
Project-URL: Bug Tracker, https://github.com/fangq/blocksolver/issues
|
|
30
|
-
Requires-Python: >=3.8
|
|
31
|
-
Requires-Dist: numpy>=1.20
|
|
32
|
-
Requires-Dist: scipy>=1.0
|
|
33
|
-
Provides-Extra: fast
|
|
34
|
-
Requires-Dist: numba>=0.50; extra == "fast"
|
|
35
|
-
Provides-Extra: test
|
|
36
|
-
Requires-Dist: pytest>=6.0; extra == "test"
|
|
37
|
-
Provides-Extra: dev
|
|
38
|
-
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
39
|
-
Requires-Dist: build; extra == "dev"
|
|
40
|
-
Requires-Dist: twine; extra == "dev"
|
|
41
|
-
Description-Content-Type: text/markdown
|
|
42
|
-
|
|
43
|
-
# BLIT Python Bindings
|
|
44
|
-
|
|
45
|
-
Python interface for the BLIT (Block Iterative) sparse linear solver library.
|
|
46
|
-
|
|
47
|
-
## Installation
|
|
48
|
-
|
|
49
|
-
### Prerequisites
|
|
50
|
-
|
|
51
|
-
- Python >= 3.8
|
|
52
|
-
- NumPy
|
|
53
|
-
- Fortran compiler (gfortran, ifort)
|
|
54
|
-
- UMFPACK/SuiteSparse library
|
|
55
|
-
- BLAS/LAPACK
|
|
56
|
-
|
|
57
|
-
On Ubuntu/Debian:
|
|
58
|
-
```bash
|
|
59
|
-
sudo apt install gfortran libsuitesparse-dev libblas-dev liblapack-dev
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
On macOS (Homebrew):
|
|
63
|
-
```bash
|
|
64
|
-
brew install gcc suite-sparse openblas
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
### Install
|
|
68
|
-
|
|
69
|
-
```bash
|
|
70
|
-
cd python
|
|
71
|
-
pip install .
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
For development:
|
|
75
|
-
```bash
|
|
76
|
-
pip install -e .
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
## Usage
|
|
80
|
-
|
|
81
|
-
### Basic Usage
|
|
82
|
-
|
|
83
|
-
```python
|
|
84
|
-
import numpy as np
|
|
85
|
-
from blocksolver import blqmr_solve
|
|
86
|
-
|
|
87
|
-
# Define sparse matrix in CSC format (0-based indexing)
|
|
88
|
-
Ap = np.array([0, 2, 5, 9, 10, 12], dtype=np.int32)
|
|
89
|
-
Ai = np.array([0, 1, 0, 2, 4, 1, 2, 3, 4, 2, 1, 4], dtype=np.int32)
|
|
90
|
-
Ax = np.array([2., 3., 3., -1., 4., 4., -3., 1., 2., 2., 6., 1.])
|
|
91
|
-
b = np.array([8.0, 45.0, -3.0, 3.0, 19.0])
|
|
92
|
-
|
|
93
|
-
# Solve
|
|
94
|
-
result = blqmr_solve(Ap, Ai, Ax, b, tol=1e-8)
|
|
95
|
-
|
|
96
|
-
print(f"Solution: {result.x}")
|
|
97
|
-
print(f"Converged: {result.converged}")
|
|
98
|
-
print(f"Iterations: {result.iter}")
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
### With SciPy Sparse Matrices
|
|
102
|
-
|
|
103
|
-
```python
|
|
104
|
-
from scipy.sparse import csc_matrix
|
|
105
|
-
from blocksolver import blqmr_scipy
|
|
106
|
-
|
|
107
|
-
A = csc_matrix([[4, 1, 0], [1, 3, 1], [0, 1, 2]])
|
|
108
|
-
b = np.array([1., 2., 3.])
|
|
109
|
-
|
|
110
|
-
x, flag = blqmr_scipy(A, b, tol=1e-10)
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
### Multiple Right-Hand Sides
|
|
114
|
-
|
|
115
|
-
```python
|
|
116
|
-
from blocksolver import blqmr_solve_multi
|
|
117
|
-
|
|
118
|
-
B = np.column_stack([b1, b2, b3]) # n x nrhs
|
|
119
|
-
result = blqmr_solve_multi(Ap, Ai, Ax, B)
|
|
120
|
-
# result.x is n x nrhs
|
|
121
|
-
```
|
|
122
|
-
|
|
123
|
-
## API Reference
|
|
124
|
-
|
|
125
|
-
### `blqmr_solve(Ap, Ai, Ax, b, **kwargs) -> BLQMRResult`
|
|
126
|
-
|
|
127
|
-
Solve sparse system Ax = b.
|
|
128
|
-
|
|
129
|
-
**Parameters:**
|
|
130
|
-
- `Ap`: Column pointers (int32, length n+1)
|
|
131
|
-
- `Ai`: Row indices (int32, length nnz)
|
|
132
|
-
- `Ax`: Non-zero values (float64, length nnz)
|
|
133
|
-
- `b`: Right-hand side (float64, length n)
|
|
134
|
-
- `tol`: Convergence tolerance (default: 1e-6)
|
|
135
|
-
- `maxiter`: Maximum iterations (default: n)
|
|
136
|
-
- `droptol`: ILU drop tolerance (default: 0.001)
|
|
137
|
-
- `use_precond`: Use ILU preconditioner (default: True)
|
|
138
|
-
- `zero_based`: Input uses 0-based indexing (default: True)
|
|
139
|
-
|
|
140
|
-
**Returns:** `BLQMRResult` with attributes:
|
|
141
|
-
- `x`: Solution vector
|
|
142
|
-
- `flag`: 0=converged, 1=maxiter, 2=precond fail, 3=stagnation
|
|
143
|
-
- `iter`: Iterations performed
|
|
144
|
-
- `relres`: Relative residual
|
|
145
|
-
- `converged`: Boolean property
|
|
146
|
-
|
|
147
|
-
## Testing
|
|
148
|
-
|
|
149
|
-
```bash
|
|
150
|
-
make test
|
|
151
|
-
# or
|
|
152
|
-
pytest tests/ -v
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
## License
|
|
156
|
-
|
|
157
|
-
BSD / LGPL / GPL - see LICENSE files in parent directory.
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
blocksolver-0.8.1.dist-info/METADATA,sha256=mCxOeV7DKQ1fCfkadhJjYXk3Lg0aOYRPyZI26QwJc0w,4286
|
|
2
|
-
blocksolver-0.8.1.dist-info/WHEEL,sha256=1nIT8bOU3dBEtO1OHNUw1PB7s17JH9tAQ93SLqU9JNM,85
|
|
3
|
-
blocksolver/_blqmr.cp310-win_amd64.pyd,sha256=xLeKEmXEZciUZpxvPLYCZm2zgHphkfvcL76qPd3RloA,439092
|
|
4
|
-
blocksolver/_blqmr.cp310-win_amd64.dll.a,sha256=lXYwg93YOTRn2JqRoyCYIjZ_5_log3THxAg-pMyKXuE,1706
|
|
5
|
-
blocksolver/__init__.py,sha256=XkO4nXXew3nk9uOhCg1q-mZnFyhUqHuhlsHPKifWYLo,1982
|
|
6
|
-
blocksolver/blqmr.py,sha256=NT0R7Rydvlj5DadbXn0IhVYHGKI3qiPHgD0f6q7CHHY,32874
|
|
7
|
-
blocksolver-0.8.1.dist-info/RECORD,,
|
|
File without changes
|