blocksolver 0.8.1__py3-none-any.whl → 0.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blocksolver/__init__.py +1 -1
- blocksolver/blqmr.py +502 -135
- blocksolver-0.8.5.dist-info/METADATA +509 -0
- blocksolver-0.8.5.dist-info/RECORD +6 -0
- blocksolver-0.8.1.dist-info/METADATA +0 -162
- blocksolver-0.8.1.dist-info/RECORD +0 -6
- {blocksolver-0.8.1.dist-info → blocksolver-0.8.5.dist-info}/WHEEL +0 -0
- {blocksolver-0.8.1.dist-info → blocksolver-0.8.5.dist-info}/top_level.txt +0 -0
blocksolver/blqmr.py
CHANGED
|
@@ -50,7 +50,7 @@ try:
|
|
|
50
50
|
from numba import njit
|
|
51
51
|
|
|
52
52
|
HAS_NUMBA = True
|
|
53
|
-
except ImportError:
|
|
53
|
+
except (ImportError, Exception) as e:
|
|
54
54
|
HAS_NUMBA = False
|
|
55
55
|
|
|
56
56
|
def njit(*args, **kwargs):
|
|
@@ -134,6 +134,52 @@ def _qqr_kernel_real(Q, R, n, m):
|
|
|
134
134
|
Q[i, k] -= Q[i, j] * dot
|
|
135
135
|
|
|
136
136
|
|
|
137
|
+
@njit(cache=True)
|
|
138
|
+
def _qqr_kernel_complex(Q, R, n, m):
|
|
139
|
+
"""Numba-accelerated quasi-QR kernel for complex arrays."""
|
|
140
|
+
for j in range(m):
|
|
141
|
+
# Quasi inner product: sum(q*q) WITHOUT conjugation
|
|
142
|
+
r_jj_sq = 0.0j
|
|
143
|
+
for i in range(n):
|
|
144
|
+
r_jj_sq += Q[i, j] * Q[i, j] # No conjugation!
|
|
145
|
+
r_jj = np.sqrt(r_jj_sq)
|
|
146
|
+
R[j, j] = r_jj
|
|
147
|
+
if abs(r_jj) > 1e-14:
|
|
148
|
+
inv_r_jj = 1.0 / r_jj
|
|
149
|
+
for i in range(n):
|
|
150
|
+
Q[i, j] *= inv_r_jj
|
|
151
|
+
for k in range(j + 1, m):
|
|
152
|
+
# Quasi inner product: sum(q_j * q_k) WITHOUT conjugation
|
|
153
|
+
dot = 0.0j
|
|
154
|
+
for i in range(n):
|
|
155
|
+
dot += Q[i, j] * Q[i, k] # No conjugation!
|
|
156
|
+
R[j, k] = dot
|
|
157
|
+
for i in range(n):
|
|
158
|
+
Q[i, k] -= Q[i, j] * dot
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@njit(cache=True)
|
|
162
|
+
def _qqr_kernel_real(Q, R, n, m):
|
|
163
|
+
"""Numba-accelerated quasi-QR kernel for real arrays."""
|
|
164
|
+
for j in range(m):
|
|
165
|
+
r_jj_sq = 0.0
|
|
166
|
+
for i in range(n):
|
|
167
|
+
r_jj_sq += Q[i, j] * Q[i, j]
|
|
168
|
+
r_jj = np.sqrt(r_jj_sq)
|
|
169
|
+
R[j, j] = r_jj
|
|
170
|
+
if abs(r_jj) > 1e-14:
|
|
171
|
+
inv_r_jj = 1.0 / r_jj
|
|
172
|
+
for i in range(n):
|
|
173
|
+
Q[i, j] *= inv_r_jj
|
|
174
|
+
for k in range(j + 1, m):
|
|
175
|
+
dot = 0.0
|
|
176
|
+
for i in range(n):
|
|
177
|
+
dot += Q[i, j] * Q[i, k]
|
|
178
|
+
R[j, k] = dot
|
|
179
|
+
for i in range(n):
|
|
180
|
+
Q[i, k] -= Q[i, j] * dot
|
|
181
|
+
|
|
182
|
+
|
|
137
183
|
def qqr(
|
|
138
184
|
A: np.ndarray, tol: float = 0, use_numba: bool = True
|
|
139
185
|
) -> Tuple[np.ndarray, np.ndarray]:
|
|
@@ -173,14 +219,20 @@ def qqr(
|
|
|
173
219
|
else:
|
|
174
220
|
for j in range(m):
|
|
175
221
|
qj = Q[:, j]
|
|
176
|
-
|
|
222
|
+
# CRITICAL FIX: Use sum(qj * qj) NOT np.dot(qj, qj)
|
|
223
|
+
# np.dot conjugates the first argument for complex arrays!
|
|
224
|
+
# Fortran: R(k,k)=dsqrt(sum(Q(:,k)*Q(:,k))) - no conjugation
|
|
225
|
+
r_jj_sq = np.sum(qj * qj) # Quasi inner product - NO conjugation
|
|
177
226
|
r_jj = np.sqrt(r_jj_sq)
|
|
178
227
|
R[j, j] = r_jj
|
|
179
228
|
if np.abs(r_jj) > 1e-14:
|
|
180
229
|
Q[:, j] *= 1.0 / r_jj
|
|
181
230
|
if j < m - 1:
|
|
182
|
-
|
|
183
|
-
|
|
231
|
+
# CRITICAL FIX: Quasi inner product for off-diagonal
|
|
232
|
+
# Fortran: R(k,j)=sum(Q(:,k)*Q(:,j)) - no conjugation
|
|
233
|
+
for k in range(j + 1, m):
|
|
234
|
+
R[j, k] = np.sum(Q[:, j] * Q[:, k]) # NO conjugation
|
|
235
|
+
Q[:, k] -= R[j, k] * Q[:, j]
|
|
184
236
|
|
|
185
237
|
return Q, R
|
|
186
238
|
|
|
@@ -217,8 +269,12 @@ class SparsePreconditioner:
|
|
|
217
269
|
|
|
218
270
|
def __init__(self, M1, M2=None):
|
|
219
271
|
self.is_two_part = M2 is not None
|
|
220
|
-
self.is_ilu1 = isinstance(M1, _ILUPreconditioner)
|
|
221
|
-
self.is_ilu2 =
|
|
272
|
+
self.is_ilu1 = isinstance(M1, (_ILUPreconditioner, _LUPreconditioner))
|
|
273
|
+
self.is_ilu2 = (
|
|
274
|
+
isinstance(M2, (_ILUPreconditioner, _LUPreconditioner))
|
|
275
|
+
if M2 is not None
|
|
276
|
+
else False
|
|
277
|
+
)
|
|
222
278
|
|
|
223
279
|
if M1 is not None:
|
|
224
280
|
if self.is_ilu1:
|
|
@@ -381,8 +437,101 @@ class BLQMRWorkspace:
|
|
|
381
437
|
# Preconditioner Factory
|
|
382
438
|
# =============================================================================
|
|
383
439
|
|
|
440
|
+
# Type alias for precond_type
|
|
441
|
+
PrecondType = Optional[Union[str, int]]
|
|
384
442
|
|
|
385
|
-
|
|
443
|
+
|
|
444
|
+
def _parse_precond_type_for_fortran(precond_type: PrecondType) -> int:
|
|
445
|
+
"""
|
|
446
|
+
Convert precond_type to Fortran integer code.
|
|
447
|
+
|
|
448
|
+
Returns
|
|
449
|
+
-------
|
|
450
|
+
int
|
|
451
|
+
0 = no preconditioning
|
|
452
|
+
2 = ILU
|
|
453
|
+
3 = diagonal/Jacobi
|
|
454
|
+
"""
|
|
455
|
+
if precond_type is None or precond_type == "" or precond_type is False:
|
|
456
|
+
return 0
|
|
457
|
+
|
|
458
|
+
if isinstance(precond_type, int):
|
|
459
|
+
return precond_type
|
|
460
|
+
|
|
461
|
+
if isinstance(precond_type, str):
|
|
462
|
+
precond_lower = precond_type.lower()
|
|
463
|
+
if precond_lower in ("ilu", "ilu0", "ilut"):
|
|
464
|
+
return 2
|
|
465
|
+
elif precond_lower in ("diag", "jacobi"):
|
|
466
|
+
return 3
|
|
467
|
+
else:
|
|
468
|
+
# Unknown string, default to no preconditioning
|
|
469
|
+
warnings.warn(
|
|
470
|
+
f"Unknown precond_type '{precond_type}' for Fortran backend, using no preconditioning"
|
|
471
|
+
)
|
|
472
|
+
return 0
|
|
473
|
+
|
|
474
|
+
return 0
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def _get_preconditioner_for_native(A, precond_type: PrecondType, M1_provided):
|
|
478
|
+
"""
|
|
479
|
+
Create preconditioner for native Python backend.
|
|
480
|
+
|
|
481
|
+
Parameters
|
|
482
|
+
----------
|
|
483
|
+
A : sparse matrix
|
|
484
|
+
System matrix
|
|
485
|
+
precond_type : None, '', str, or int
|
|
486
|
+
Preconditioner type specification
|
|
487
|
+
M1_provided : preconditioner or None
|
|
488
|
+
User-provided preconditioner (takes precedence)
|
|
489
|
+
|
|
490
|
+
Returns
|
|
491
|
+
-------
|
|
492
|
+
M1 : preconditioner or None
|
|
493
|
+
"""
|
|
494
|
+
# If user provided M1, use it
|
|
495
|
+
if M1_provided is not None:
|
|
496
|
+
return M1_provided
|
|
497
|
+
|
|
498
|
+
# No preconditioning requested
|
|
499
|
+
if precond_type is None or precond_type == "" or precond_type is False:
|
|
500
|
+
return None
|
|
501
|
+
|
|
502
|
+
# Integer codes (for compatibility)
|
|
503
|
+
if isinstance(precond_type, int):
|
|
504
|
+
if precond_type == 0:
|
|
505
|
+
return None
|
|
506
|
+
elif precond_type == 2:
|
|
507
|
+
precond_str = "ilu"
|
|
508
|
+
elif precond_type == 3:
|
|
509
|
+
precond_str = "diag"
|
|
510
|
+
else:
|
|
511
|
+
precond_str = "ilu" # Default to ILU for other integers
|
|
512
|
+
else:
|
|
513
|
+
precond_str = precond_type
|
|
514
|
+
|
|
515
|
+
# Create preconditioner
|
|
516
|
+
try:
|
|
517
|
+
return make_preconditioner(A, precond_str)
|
|
518
|
+
except Exception as e:
|
|
519
|
+
# Fallback chain: try diag if ilu fails
|
|
520
|
+
if precond_str not in ("diag", "jacobi"):
|
|
521
|
+
try:
|
|
522
|
+
warnings.warn(
|
|
523
|
+
f"Preconditioner '{precond_str}' failed: {e}, falling back to diagonal"
|
|
524
|
+
)
|
|
525
|
+
return make_preconditioner(A, "diag")
|
|
526
|
+
except Exception:
|
|
527
|
+
pass
|
|
528
|
+
warnings.warn(f"All preconditioners failed, proceeding without preconditioning")
|
|
529
|
+
return None
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def make_preconditioner(
|
|
533
|
+
A: sparse.spmatrix, precond_type: str = "diag", split: bool = False, **kwargs
|
|
534
|
+
):
|
|
386
535
|
"""
|
|
387
536
|
Create a preconditioner for iterative solvers.
|
|
388
537
|
|
|
@@ -392,29 +541,70 @@ def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
|
|
|
392
541
|
System matrix
|
|
393
542
|
precond_type : str
|
|
394
543
|
'diag' or 'jacobi': Diagonal (Jacobi) preconditioner
|
|
395
|
-
'ilu' or 'ilu0': Incomplete LU
|
|
396
|
-
'
|
|
544
|
+
'ilu' or 'ilu0': Incomplete LU with minimal fill
|
|
545
|
+
'ilut': Incomplete LU with threshold
|
|
546
|
+
'lu': Full LU factorization
|
|
547
|
+
split : bool
|
|
548
|
+
If True, return sqrt(D) for split preconditioning (M1=M2=sqrt(D))
|
|
549
|
+
If False, return D for left preconditioning
|
|
550
|
+
**kwargs : dict
|
|
551
|
+
Additional parameters
|
|
397
552
|
|
|
398
553
|
Returns
|
|
399
554
|
-------
|
|
400
555
|
M : preconditioner object
|
|
401
|
-
|
|
556
|
+
For split Jacobi, use as: blqmr(A, b, M1=M, M2=M)
|
|
402
557
|
"""
|
|
403
558
|
if precond_type in ("diag", "jacobi"):
|
|
404
559
|
diag = A.diagonal().copy()
|
|
405
560
|
diag[np.abs(diag) < 1e-14] = 1.0
|
|
406
|
-
return sparse.diags(diag, format="csr")
|
|
407
561
|
|
|
408
|
-
|
|
562
|
+
if split:
|
|
563
|
+
# For split preconditioning: return sqrt(D)
|
|
564
|
+
# Usage: M1 = M2 = sqrt(D), gives D^{-1/2} A D^{-1/2}
|
|
565
|
+
sqrt_diag = np.sqrt(diag)
|
|
566
|
+
return sparse.diags(sqrt_diag, format="csr")
|
|
567
|
+
else:
|
|
568
|
+
# For left preconditioning: return D
|
|
569
|
+
# Usage: M1 = D, M2 = None, gives D^{-1} A
|
|
570
|
+
return sparse.diags(diag, format="csr")
|
|
571
|
+
|
|
572
|
+
elif precond_type == "ilu0":
|
|
573
|
+
# ILU(0) - no fill-in, fast but may be poor quality
|
|
409
574
|
try:
|
|
410
575
|
ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
|
|
411
576
|
return _ILUPreconditioner(ilu)
|
|
412
577
|
except Exception as e:
|
|
413
|
-
warnings.warn(f"ILU factorization failed: {e}, falling back to diagonal")
|
|
578
|
+
warnings.warn(f"ILU(0) factorization failed: {e}, falling back to diagonal")
|
|
414
579
|
return make_preconditioner(A, "diag")
|
|
415
580
|
|
|
581
|
+
elif precond_type in ("ilu", "ilut"):
|
|
582
|
+
# ILUT - ILU with threshold, better quality (similar to UMFPACK)
|
|
583
|
+
drop_tol = kwargs.get("drop_tol", 1e-4)
|
|
584
|
+
fill_factor = kwargs.get("fill_factor", 10)
|
|
585
|
+
try:
|
|
586
|
+
ilu = spilu(A.tocsc(), drop_tol=drop_tol, fill_factor=fill_factor)
|
|
587
|
+
return _ILUPreconditioner(ilu)
|
|
588
|
+
except Exception as e:
|
|
589
|
+
warnings.warn(f"ILUT factorization failed: {e}, trying ILU(0)")
|
|
590
|
+
try:
|
|
591
|
+
ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
|
|
592
|
+
return _ILUPreconditioner(ilu)
|
|
593
|
+
except Exception as e2:
|
|
594
|
+
warnings.warn(f"ILU(0) also failed: {e2}, falling back to diagonal")
|
|
595
|
+
return make_preconditioner(A, "diag")
|
|
596
|
+
|
|
597
|
+
elif precond_type == "lu":
|
|
598
|
+
# Full LU - exact factorization (for reference/debugging)
|
|
599
|
+
try:
|
|
600
|
+
lu = splu(A.tocsc())
|
|
601
|
+
return _LUPreconditioner(lu)
|
|
602
|
+
except Exception as e:
|
|
603
|
+
warnings.warn(f"LU factorization failed: {e}, falling back to ILUT")
|
|
604
|
+
return make_preconditioner(A, "ilut")
|
|
605
|
+
|
|
416
606
|
elif precond_type == "ssor":
|
|
417
|
-
omega = 1.0
|
|
607
|
+
omega = kwargs.get("omega", 1.0)
|
|
418
608
|
D = sparse.diags(A.diagonal(), format="csr")
|
|
419
609
|
L = sparse.tril(A, k=-1, format="csr")
|
|
420
610
|
return (D + omega * L).tocsr()
|
|
@@ -423,6 +613,24 @@ def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
|
|
|
423
613
|
raise ValueError(f"Unknown preconditioner type: {precond_type}")
|
|
424
614
|
|
|
425
615
|
|
|
616
|
+
class _LUPreconditioner:
|
|
617
|
+
"""Wrapper for full LU preconditioner."""
|
|
618
|
+
|
|
619
|
+
def __init__(self, lu_factor):
|
|
620
|
+
self.lu = lu_factor
|
|
621
|
+
self.shape = (lu_factor.shape[0], lu_factor.shape[1])
|
|
622
|
+
self.dtype = np.float64 # Assume real for now
|
|
623
|
+
|
|
624
|
+
def solve(self, b):
|
|
625
|
+
if b.ndim == 1:
|
|
626
|
+
return self.lu.solve(b)
|
|
627
|
+
else:
|
|
628
|
+
x = np.zeros_like(b)
|
|
629
|
+
for i in range(b.shape[1]):
|
|
630
|
+
x[:, i] = self.lu.solve(b[:, i])
|
|
631
|
+
return x
|
|
632
|
+
|
|
633
|
+
|
|
426
634
|
# =============================================================================
|
|
427
635
|
# Pure-Python Block QMR Solver
|
|
428
636
|
# =============================================================================
|
|
@@ -448,7 +656,7 @@ def _blqmr_python_impl(
|
|
|
448
656
|
dtype = np.complex128 if is_complex_input else np.float64
|
|
449
657
|
|
|
450
658
|
if maxiter is None:
|
|
451
|
-
maxiter = min(n,
|
|
659
|
+
maxiter = min(n, 100)
|
|
452
660
|
|
|
453
661
|
if (
|
|
454
662
|
workspace is None
|
|
@@ -461,23 +669,59 @@ def _blqmr_python_impl(
|
|
|
461
669
|
ws = workspace
|
|
462
670
|
ws.reset()
|
|
463
671
|
|
|
464
|
-
# Setup preconditioner
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
672
|
+
# Setup preconditioner - distinguish split vs left-only
|
|
673
|
+
use_split_precond = False
|
|
674
|
+
precond = None
|
|
675
|
+
precond_M1 = None
|
|
676
|
+
precond_M2 = None
|
|
677
|
+
|
|
678
|
+
if M1 is not None and M2 is not None:
|
|
679
|
+
# Split preconditioning: M1⁻¹ A M2⁻¹
|
|
680
|
+
use_split_precond = True
|
|
681
|
+
if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
|
|
682
|
+
precond_M1 = SparsePreconditioner(M1, None)
|
|
468
683
|
elif sparse.issparse(M1):
|
|
469
|
-
|
|
684
|
+
precond_M1 = SparsePreconditioner(M1, None)
|
|
685
|
+
elif hasattr(M1, "solve"):
|
|
686
|
+
precond_M1 = M1
|
|
470
687
|
else:
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
688
|
+
precond_M1 = DensePreconditioner(M1, None)
|
|
689
|
+
|
|
690
|
+
if isinstance(M2, (_ILUPreconditioner, _LUPreconditioner)):
|
|
691
|
+
precond_M2 = SparsePreconditioner(M2, None)
|
|
692
|
+
elif sparse.issparse(M2):
|
|
693
|
+
precond_M2 = SparsePreconditioner(M2, None)
|
|
694
|
+
elif hasattr(M2, "solve"):
|
|
695
|
+
precond_M2 = M2
|
|
696
|
+
else:
|
|
697
|
+
precond_M2 = DensePreconditioner(M2, None)
|
|
698
|
+
|
|
699
|
+
elif M1 is not None:
|
|
700
|
+
# Left-only preconditioning: M1⁻¹ A
|
|
701
|
+
if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
|
|
702
|
+
precond = SparsePreconditioner(M1, None)
|
|
703
|
+
elif sparse.issparse(M1):
|
|
704
|
+
precond = SparsePreconditioner(M1, None)
|
|
705
|
+
elif hasattr(M1, "solve"):
|
|
706
|
+
precond = M1
|
|
707
|
+
else:
|
|
708
|
+
precond = DensePreconditioner(M1, None)
|
|
474
709
|
|
|
475
710
|
if x0 is None:
|
|
476
711
|
x = np.zeros((n, m), dtype=dtype)
|
|
477
712
|
else:
|
|
478
713
|
x = np.asarray(x0, dtype=dtype).reshape(n, m).copy()
|
|
479
714
|
|
|
480
|
-
|
|
715
|
+
# Initialize indices: Fortran t3=mod(0,3)+1=1 -> Python t3=0
|
|
716
|
+
t3 = 0
|
|
717
|
+
t3n = 2
|
|
718
|
+
t3p = 1
|
|
719
|
+
|
|
720
|
+
# Initialize Q matrices (identity)
|
|
721
|
+
ws.Qa[:, :, :] = 0
|
|
722
|
+
ws.Qb[:, :, :] = 0
|
|
723
|
+
ws.Qc[:, :, :] = 0
|
|
724
|
+
ws.Qd[:, :, :] = 0
|
|
481
725
|
ws.Qa[:, :, t3] = np.eye(m, dtype=dtype)
|
|
482
726
|
ws.Qd[:, :, t3n] = np.eye(m, dtype=dtype)
|
|
483
727
|
ws.Qd[:, :, t3] = np.eye(m, dtype=dtype)
|
|
@@ -488,110 +732,190 @@ def _blqmr_python_impl(
|
|
|
488
732
|
else:
|
|
489
733
|
np.subtract(B, A @ x, out=ws.vt)
|
|
490
734
|
|
|
491
|
-
|
|
735
|
+
# Apply preconditioner to initial residual
|
|
736
|
+
if use_split_precond:
|
|
737
|
+
# For split preconditioning, initial residual is just M1⁻¹ * (b - A*x0)
|
|
738
|
+
# because we're solving M1⁻¹ A M2⁻¹ y = M1⁻¹ b with y = M2*x
|
|
739
|
+
ws.vt[:] = precond_M1.solve(ws.vt)
|
|
740
|
+
if np.any(np.isnan(ws.vt)):
|
|
741
|
+
return x, 2, 1.0, 0, np.array([])
|
|
742
|
+
elif precond is not None:
|
|
492
743
|
precond.solve(ws.vt, out=ws.vt)
|
|
493
744
|
if np.any(np.isnan(ws.vt)):
|
|
494
745
|
return x, 2, 1.0, 0, np.array([])
|
|
495
746
|
|
|
747
|
+
# QQR decomposition
|
|
496
748
|
Q, R = qqr(ws.vt)
|
|
497
749
|
ws.v[:, :, t3p] = Q
|
|
498
750
|
ws.beta[:, :, t3p] = R
|
|
499
751
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
752
|
+
# Compute omega - standard norm WITH conjugation (Hermitian norm)
|
|
753
|
+
# Fortran: omega(i,i,t3p)=sqrt(sum(conjg(v(:,i,t3p))*v(:,i,t3p)))
|
|
754
|
+
ws.omega[:, :, t3p].fill(0)
|
|
755
|
+
if is_complex_input:
|
|
756
|
+
np.fill_diagonal(
|
|
757
|
+
ws.omega[:, :, t3p],
|
|
758
|
+
np.sqrt(
|
|
759
|
+
np.einsum("ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]).real
|
|
760
|
+
),
|
|
761
|
+
)
|
|
762
|
+
else:
|
|
763
|
+
np.fill_diagonal(
|
|
764
|
+
ws.omega[:, :, t3p],
|
|
765
|
+
np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
# taut = omega * beta
|
|
769
|
+
ws.taot[:] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
|
|
503
770
|
|
|
504
771
|
isquasires = not residual
|
|
505
772
|
if isquasires:
|
|
506
|
-
Qres0
|
|
773
|
+
# Fortran: Qres0=maxval(sqrt(sum(abs(conjg(taut)*taut),1))) for complex
|
|
774
|
+
if is_complex_input:
|
|
775
|
+
Qres0 = np.max(
|
|
776
|
+
np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
|
|
777
|
+
)
|
|
778
|
+
else:
|
|
779
|
+
Qres0 = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
|
|
507
780
|
else:
|
|
508
|
-
omegat =
|
|
509
|
-
|
|
781
|
+
omegat = np.zeros((n, m), dtype=dtype)
|
|
782
|
+
for i in range(m):
|
|
783
|
+
if np.abs(ws.omega[i, i, t3p]) > 1e-14:
|
|
784
|
+
omegat[:, i] = ws.v[:, i, t3p] / ws.omega[i, i, t3p]
|
|
785
|
+
if is_complex_input:
|
|
786
|
+
Qres0 = np.max(np.sqrt(np.sum(np.abs(np.conj(ws.vt) * ws.vt), axis=0)))
|
|
787
|
+
else:
|
|
788
|
+
Qres0 = np.max(np.sqrt(np.sum(ws.vt * ws.vt, axis=0)))
|
|
510
789
|
|
|
511
790
|
if Qres0 < 1e-16:
|
|
512
791
|
result = x.real if not is_complex_input else x
|
|
513
792
|
return result, 0, 0.0, 0, np.array([0.0])
|
|
514
793
|
|
|
515
|
-
flag, resv, Qres1, relres, iter_count = 1, np.zeros(maxiter),
|
|
516
|
-
omegat = None if isquasires else Q @ np.diag(1.0 / (col_norms + 1e-16))
|
|
794
|
+
flag, resv, Qres1, relres, iter_count = 1, np.zeros(maxiter), -1.0, 1.0, 0
|
|
517
795
|
|
|
518
796
|
for k in range(1, maxiter + 1):
|
|
519
|
-
|
|
797
|
+
# Index cycling
|
|
798
|
+
t3 = k % 3
|
|
799
|
+
t3p = (k + 1) % 3
|
|
800
|
+
t3n = (k - 1) % 3
|
|
801
|
+
t3nn = (k - 2) % 3
|
|
520
802
|
|
|
803
|
+
# tmp = A * v(:,:,t3)
|
|
521
804
|
if A_is_sparse:
|
|
522
805
|
ws.Av[:] = A @ ws.v[:, :, t3]
|
|
523
806
|
else:
|
|
524
807
|
np.matmul(A, ws.v[:, :, t3], out=ws.Av)
|
|
525
808
|
|
|
526
|
-
|
|
809
|
+
# Apply preconditioner
|
|
810
|
+
if use_split_precond:
|
|
811
|
+
# Split preconditioning: M1⁻¹ * A * M2⁻¹ * v
|
|
812
|
+
tmp = precond_M2.solve(ws.v[:, :, t3]) # M2⁻¹ * v
|
|
813
|
+
if A_is_sparse:
|
|
814
|
+
tmp = A @ tmp # A * M2⁻¹ * v
|
|
815
|
+
else:
|
|
816
|
+
tmp = np.matmul(A, tmp)
|
|
817
|
+
ws.vt[:] = precond_M1.solve(tmp) - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
|
|
818
|
+
elif precond is not None:
|
|
819
|
+
# Left-only preconditioning: M⁻¹ * A * v
|
|
527
820
|
precond.solve(ws.Av, out=ws.vt)
|
|
528
|
-
ws.vt
|
|
821
|
+
ws.vt[:] = ws.vt - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
|
|
529
822
|
else:
|
|
530
|
-
|
|
531
|
-
np.subtract(ws.Av, ws.vt, out=ws.vt)
|
|
823
|
+
ws.vt[:] = ws.Av - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
|
|
532
824
|
|
|
533
|
-
|
|
534
|
-
ws.
|
|
825
|
+
# alpha = v^T * vt (transpose, not conjugate transpose)
|
|
826
|
+
ws.alpha[:] = ws.v[:, :, t3].T @ ws.vt
|
|
827
|
+
ws.vt[:] = ws.vt - ws.v[:, :, t3] @ ws.alpha
|
|
535
828
|
|
|
829
|
+
# QQR decomposition
|
|
536
830
|
Q, R = qqr(ws.vt)
|
|
537
831
|
ws.v[:, :, t3p] = Q
|
|
538
832
|
ws.beta[:, :, t3p] = R
|
|
539
833
|
|
|
540
|
-
|
|
541
|
-
ws.omega[:, :, t3p]
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
834
|
+
# Compute omega (standard Hermitian norm)
|
|
835
|
+
ws.omega[:, :, t3p].fill(0)
|
|
836
|
+
if is_complex_input:
|
|
837
|
+
np.fill_diagonal(
|
|
838
|
+
ws.omega[:, :, t3p],
|
|
839
|
+
np.sqrt(
|
|
840
|
+
np.einsum(
|
|
841
|
+
"ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]
|
|
842
|
+
).real
|
|
843
|
+
),
|
|
844
|
+
)
|
|
845
|
+
else:
|
|
846
|
+
np.fill_diagonal(
|
|
847
|
+
ws.omega[:, :, t3p],
|
|
848
|
+
np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
|
|
849
|
+
)
|
|
550
850
|
|
|
551
|
-
|
|
552
|
-
ws.
|
|
851
|
+
# Compute intermediate matrices
|
|
852
|
+
ws.tmp0[:] = ws.omega[:, :, t3n] @ ws.beta[:, :, t3].T
|
|
853
|
+
ws.theta[:] = ws.Qb[:, :, t3nn] @ ws.tmp0
|
|
854
|
+
ws.tmp1[:] = ws.Qd[:, :, t3nn] @ ws.tmp0
|
|
855
|
+
ws.tmp2[:] = ws.omega[:, :, t3] @ ws.alpha
|
|
856
|
+
ws.eta[:] = ws.Qa[:, :, t3n] @ ws.tmp1 + ws.Qb[:, :, t3n] @ ws.tmp2
|
|
857
|
+
ws.zetat[:] = ws.Qc[:, :, t3n] @ ws.tmp1 + ws.Qd[:, :, t3n] @ ws.tmp2
|
|
553
858
|
|
|
859
|
+
# Build ZZ matrix and do standard QR
|
|
554
860
|
ws.stacked[:m, :] = ws.zetat
|
|
555
|
-
|
|
861
|
+
ws.stacked[m:, :] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
|
|
556
862
|
|
|
557
863
|
QQ, zeta_full = np.linalg.qr(ws.stacked, mode="complete")
|
|
558
864
|
ws.zeta[:] = zeta_full[:m, :]
|
|
559
|
-
|
|
865
|
+
|
|
866
|
+
if is_complex_input:
|
|
867
|
+
ws.QQ_full[:] = np.conj(QQ.T)
|
|
868
|
+
else:
|
|
869
|
+
ws.QQ_full[:] = QQ.T
|
|
560
870
|
|
|
561
871
|
ws.Qa[:, :, t3] = ws.QQ_full[:m, :m]
|
|
562
872
|
ws.Qb[:, :, t3] = ws.QQ_full[:m, m : 2 * m]
|
|
563
873
|
ws.Qc[:, :, t3] = ws.QQ_full[m : 2 * m, :m]
|
|
564
874
|
ws.Qd[:, :, t3] = ws.QQ_full[m : 2 * m, m : 2 * m]
|
|
565
875
|
|
|
876
|
+
# Invert zeta
|
|
566
877
|
try:
|
|
567
878
|
zeta_inv = np.linalg.inv(ws.zeta)
|
|
568
879
|
except np.linalg.LinAlgError:
|
|
569
880
|
zeta_inv = np.linalg.pinv(ws.zeta)
|
|
570
881
|
|
|
882
|
+
# Update p, tau, x, taut
|
|
571
883
|
ws.p[:, :, t3] = (
|
|
572
884
|
ws.v[:, :, t3] - ws.p[:, :, t3n] @ ws.eta - ws.p[:, :, t3nn] @ ws.theta
|
|
573
885
|
) @ zeta_inv
|
|
886
|
+
ws.tau[:] = ws.Qa[:, :, t3] @ ws.taot
|
|
887
|
+
x[:] = x + ws.p[:, :, t3] @ ws.tau
|
|
888
|
+
ws.taot[:] = ws.Qc[:, :, t3] @ ws.taot
|
|
574
889
|
|
|
575
|
-
|
|
576
|
-
x += ws.p[:, :, t3] @ ws.tau
|
|
577
|
-
|
|
578
|
-
taot_copy = ws.taot.copy()
|
|
579
|
-
np.matmul(ws.Qc[:, :, t3], taot_copy, out=ws.taot)
|
|
580
|
-
|
|
890
|
+
# Compute residual
|
|
581
891
|
if isquasires:
|
|
582
|
-
|
|
892
|
+
if is_complex_input:
|
|
893
|
+
Qres = np.max(
|
|
894
|
+
np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
|
|
895
|
+
)
|
|
896
|
+
else:
|
|
897
|
+
Qres = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
|
|
583
898
|
else:
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
899
|
+
tmp0_diag = np.zeros((m, m), dtype=dtype)
|
|
900
|
+
for i in range(m):
|
|
901
|
+
if np.abs(ws.omega[i, i, t3p]) > 1e-14:
|
|
902
|
+
tmp0_diag[i, :] = ws.Qd[:, i, t3] / ws.omega[i, i, t3p]
|
|
903
|
+
if is_complex_input:
|
|
904
|
+
omegat = omegat @ np.conj(ws.Qc[:, :, t3].T) + ws.v[
|
|
905
|
+
:, :, t3p
|
|
906
|
+
] @ np.conj(tmp0_diag)
|
|
907
|
+
tmp_res = np.conj(omegat @ ws.taot)
|
|
908
|
+
Qres = np.max(
|
|
909
|
+
np.sqrt(np.sum(np.abs(np.conj(tmp_res) * tmp_res), axis=0))
|
|
910
|
+
)
|
|
911
|
+
else:
|
|
912
|
+
omegat = omegat @ ws.Qc[:, :, t3].T + ws.v[:, :, t3p] @ tmp0_diag
|
|
913
|
+
tmp_res = omegat @ ws.taot
|
|
914
|
+
Qres = np.max(np.sqrt(np.sum(tmp_res * tmp_res, axis=0)))
|
|
591
915
|
|
|
592
916
|
resv[k - 1] = Qres
|
|
593
917
|
|
|
594
|
-
if
|
|
918
|
+
if k > 1 and abs(Qres - Qres1) < np.finfo(dtype).eps:
|
|
595
919
|
flag, iter_count = 3, k
|
|
596
920
|
break
|
|
597
921
|
|
|
@@ -602,6 +926,11 @@ def _blqmr_python_impl(
|
|
|
602
926
|
break
|
|
603
927
|
|
|
604
928
|
resv = resv[:iter_count]
|
|
929
|
+
|
|
930
|
+
# For split preconditioning, recover x = M2⁻¹ * y
|
|
931
|
+
if use_split_precond:
|
|
932
|
+
x = precond_M2.solve(x)
|
|
933
|
+
|
|
605
934
|
result = x.real if not is_complex_input else x
|
|
606
935
|
return result, flag, relres, iter_count, resv
|
|
607
936
|
|
|
@@ -621,7 +950,7 @@ def blqmr_solve(
|
|
|
621
950
|
tol: float = 1e-6,
|
|
622
951
|
maxiter: Optional[int] = None,
|
|
623
952
|
droptol: float = 0.001,
|
|
624
|
-
|
|
953
|
+
precond_type: PrecondType = "ilu",
|
|
625
954
|
zero_based: bool = True,
|
|
626
955
|
) -> BLQMRResult:
|
|
627
956
|
"""
|
|
@@ -647,8 +976,12 @@ def blqmr_solve(
|
|
|
647
976
|
Maximum iterations. Default is n.
|
|
648
977
|
droptol : float, default 0.001
|
|
649
978
|
Drop tolerance for ILU preconditioner (Fortran only).
|
|
650
|
-
|
|
651
|
-
|
|
979
|
+
precond_type : None, '', or str, default 'ilu'
|
|
980
|
+
Preconditioner type:
|
|
981
|
+
- None or '': No preconditioning
|
|
982
|
+
- 'ilu', 'ilu0', 'ilut': Incomplete LU
|
|
983
|
+
- 'diag', 'jacobi': Diagonal (Jacobi)
|
|
984
|
+
- For Fortran: integers 2 (ILU) or 3 (diagonal) also accepted
|
|
652
985
|
zero_based : bool, default True
|
|
653
986
|
If True, Ap and Ai use 0-based indexing (Python/C convention).
|
|
654
987
|
If False, uses 1-based indexing (Fortran convention).
|
|
@@ -673,7 +1006,7 @@ def blqmr_solve(
|
|
|
673
1006
|
tol=tol,
|
|
674
1007
|
maxiter=maxiter,
|
|
675
1008
|
droptol=droptol,
|
|
676
|
-
|
|
1009
|
+
precond_type=precond_type,
|
|
677
1010
|
zero_based=zero_based,
|
|
678
1011
|
)
|
|
679
1012
|
else:
|
|
@@ -685,13 +1018,13 @@ def blqmr_solve(
|
|
|
685
1018
|
x0=x0,
|
|
686
1019
|
tol=tol,
|
|
687
1020
|
maxiter=maxiter,
|
|
688
|
-
|
|
1021
|
+
precond_type=precond_type,
|
|
689
1022
|
zero_based=zero_based,
|
|
690
1023
|
)
|
|
691
1024
|
|
|
692
1025
|
|
|
693
1026
|
def _blqmr_solve_fortran(
|
|
694
|
-
Ap, Ai, Ax, b, *, x0, tol, maxiter, droptol,
|
|
1027
|
+
Ap, Ai, Ax, b, *, x0, tol, maxiter, droptol, precond_type, zero_based
|
|
695
1028
|
) -> BLQMRResult:
|
|
696
1029
|
"""Fortran backend for blqmr_solve."""
|
|
697
1030
|
n = len(Ap) - 1
|
|
@@ -711,10 +1044,10 @@ def _blqmr_solve_fortran(
|
|
|
711
1044
|
Ap = Ap + 1
|
|
712
1045
|
Ai = Ai + 1
|
|
713
1046
|
|
|
714
|
-
|
|
1047
|
+
pcond_type = _parse_precond_type_for_fortran(precond_type)
|
|
715
1048
|
|
|
716
1049
|
x, flag, niter, relres = _blqmr.blqmr_solve_real(
|
|
717
|
-
n, nnz, Ap, Ai, Ax, b, maxiter, tol, droptol,
|
|
1050
|
+
n, nnz, Ap, Ai, Ax, b, maxiter, tol, droptol, pcond_type
|
|
718
1051
|
)
|
|
719
1052
|
|
|
720
1053
|
return BLQMRResult(
|
|
@@ -723,7 +1056,7 @@ def _blqmr_solve_fortran(
|
|
|
723
1056
|
|
|
724
1057
|
|
|
725
1058
|
def _blqmr_solve_native_csc(
|
|
726
|
-
Ap, Ai, Ax, b, *, x0, tol, maxiter,
|
|
1059
|
+
Ap, Ai, Ax, b, *, x0, tol, maxiter, precond_type, zero_based
|
|
727
1060
|
) -> BLQMRResult:
|
|
728
1061
|
"""Native Python backend for blqmr_solve with CSC input."""
|
|
729
1062
|
n = len(Ap) - 1
|
|
@@ -734,12 +1067,7 @@ def _blqmr_solve_native_csc(
|
|
|
734
1067
|
|
|
735
1068
|
A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
|
|
736
1069
|
|
|
737
|
-
M1 = None
|
|
738
|
-
if use_precond:
|
|
739
|
-
try:
|
|
740
|
-
M1 = make_preconditioner(A, "ilu")
|
|
741
|
-
except Exception:
|
|
742
|
-
M1 = make_preconditioner(A, "diag")
|
|
1070
|
+
M1 = _get_preconditioner_for_native(A, precond_type, None)
|
|
743
1071
|
|
|
744
1072
|
x, flag, relres, niter, resv = _blqmr_python_impl(
|
|
745
1073
|
A, b, tol=tol, maxiter=maxiter, M1=M1, x0=x0
|
|
@@ -760,13 +1088,18 @@ def blqmr_solve_multi(
|
|
|
760
1088
|
tol: float = 1e-6,
|
|
761
1089
|
maxiter: Optional[int] = None,
|
|
762
1090
|
droptol: float = 0.001,
|
|
763
|
-
|
|
1091
|
+
precond_type: PrecondType = "ilu",
|
|
764
1092
|
zero_based: bool = True,
|
|
765
1093
|
) -> BLQMRResult:
|
|
766
1094
|
"""
|
|
767
1095
|
Solve sparse linear system AX = B with multiple right-hand sides.
|
|
768
1096
|
|
|
769
1097
|
Uses Fortran extension if available, otherwise falls back to pure Python.
|
|
1098
|
+
|
|
1099
|
+
Parameters
|
|
1100
|
+
----------
|
|
1101
|
+
precond_type : None, '', or str, default 'ilu'
|
|
1102
|
+
Preconditioner type (see blqmr_solve for details)
|
|
770
1103
|
"""
|
|
771
1104
|
n = len(Ap) - 1
|
|
772
1105
|
|
|
@@ -782,7 +1115,7 @@ def blqmr_solve_multi(
|
|
|
782
1115
|
tol=tol,
|
|
783
1116
|
maxiter=maxiter,
|
|
784
1117
|
droptol=droptol,
|
|
785
|
-
|
|
1118
|
+
precond_type=precond_type,
|
|
786
1119
|
zero_based=zero_based,
|
|
787
1120
|
)
|
|
788
1121
|
else:
|
|
@@ -793,13 +1126,13 @@ def blqmr_solve_multi(
|
|
|
793
1126
|
B,
|
|
794
1127
|
tol=tol,
|
|
795
1128
|
maxiter=maxiter,
|
|
796
|
-
|
|
1129
|
+
precond_type=precond_type,
|
|
797
1130
|
zero_based=zero_based,
|
|
798
1131
|
)
|
|
799
1132
|
|
|
800
1133
|
|
|
801
1134
|
def _blqmr_solve_multi_fortran(
|
|
802
|
-
Ap, Ai, Ax, B, *, tol, maxiter, droptol,
|
|
1135
|
+
Ap, Ai, Ax, B, *, tol, maxiter, droptol, precond_type, zero_based
|
|
803
1136
|
) -> BLQMRResult:
|
|
804
1137
|
"""Fortran backend for blqmr_solve_multi."""
|
|
805
1138
|
n = len(Ap) - 1
|
|
@@ -818,10 +1151,11 @@ def _blqmr_solve_multi_fortran(
|
|
|
818
1151
|
Ap = Ap + 1
|
|
819
1152
|
Ai = Ai + 1
|
|
820
1153
|
|
|
821
|
-
|
|
1154
|
+
# Convert precond_type string to Fortran integer code
|
|
1155
|
+
pcond_type = _parse_precond_type_for_fortran(precond_type)
|
|
822
1156
|
|
|
823
1157
|
X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
|
|
824
|
-
n, nnz, nrhs, Ap, Ai, Ax, B, maxiter, tol, droptol,
|
|
1158
|
+
n, nnz, nrhs, Ap, Ai, Ax, B, maxiter, tol, droptol, pcond_type
|
|
825
1159
|
)
|
|
826
1160
|
|
|
827
1161
|
return BLQMRResult(
|
|
@@ -830,7 +1164,7 @@ def _blqmr_solve_multi_fortran(
|
|
|
830
1164
|
|
|
831
1165
|
|
|
832
1166
|
def _blqmr_solve_multi_native(
|
|
833
|
-
Ap, Ai, Ax, B, *, tol, maxiter,
|
|
1167
|
+
Ap, Ai, Ax, B, *, tol, maxiter, precond_type, zero_based
|
|
834
1168
|
) -> BLQMRResult:
|
|
835
1169
|
"""Native Python backend for blqmr_solve_multi."""
|
|
836
1170
|
n = len(Ap) - 1
|
|
@@ -841,12 +1175,7 @@ def _blqmr_solve_multi_native(
|
|
|
841
1175
|
|
|
842
1176
|
A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
|
|
843
1177
|
|
|
844
|
-
M1 = None
|
|
845
|
-
if use_precond:
|
|
846
|
-
try:
|
|
847
|
-
M1 = make_preconditioner(A, "ilu")
|
|
848
|
-
except Exception:
|
|
849
|
-
M1 = make_preconditioner(A, "diag")
|
|
1178
|
+
M1 = _get_preconditioner_for_native(A, precond_type, None)
|
|
850
1179
|
|
|
851
1180
|
if B.ndim == 1:
|
|
852
1181
|
B = B.reshape(-1, 1)
|
|
@@ -909,7 +1238,7 @@ def blqmr(
|
|
|
909
1238
|
residual: bool = False,
|
|
910
1239
|
workspace: Optional[BLQMRWorkspace] = None,
|
|
911
1240
|
droptol: float = 0.001,
|
|
912
|
-
|
|
1241
|
+
precond_type: PrecondType = "ilu",
|
|
913
1242
|
) -> BLQMRResult:
|
|
914
1243
|
"""
|
|
915
1244
|
Block Quasi-Minimal-Residual (BL-QMR) solver - main interface.
|
|
@@ -925,9 +1254,10 @@ def blqmr(
|
|
|
925
1254
|
tol : float
|
|
926
1255
|
Convergence tolerance (default: 1e-6)
|
|
927
1256
|
maxiter : int, optional
|
|
928
|
-
Maximum iterations (default: n
|
|
1257
|
+
Maximum iterations (default: n)
|
|
929
1258
|
M1, M2 : preconditioner, optional
|
|
930
|
-
|
|
1259
|
+
Custom preconditioners. If provided, precond_type is ignored.
|
|
1260
|
+
M = M1 @ M2 for split preconditioning (Python backend only)
|
|
931
1261
|
x0 : ndarray, optional
|
|
932
1262
|
Initial guess
|
|
933
1263
|
residual : bool
|
|
@@ -936,8 +1266,13 @@ def blqmr(
|
|
|
936
1266
|
Pre-allocated workspace (Python backend only)
|
|
937
1267
|
droptol : float, default 0.001
|
|
938
1268
|
Drop tolerance for ILU preconditioner (Fortran backend only)
|
|
939
|
-
|
|
940
|
-
|
|
1269
|
+
precond_type : None, '', or str, default 'ilu'
|
|
1270
|
+
Preconditioner type (ignored if M1 is provided):
|
|
1271
|
+
- None or '': No preconditioning
|
|
1272
|
+
- 'ilu', 'ilu0', 'ilut': Incomplete LU
|
|
1273
|
+
- 'diag', 'jacobi': Diagonal (Jacobi)
|
|
1274
|
+
- 'lu': Full LU (expensive, for debugging)
|
|
1275
|
+
- For Fortran: integers 2 (ILU) or 3 (diagonal) also accepted
|
|
941
1276
|
|
|
942
1277
|
Returns
|
|
943
1278
|
-------
|
|
@@ -957,7 +1292,7 @@ def blqmr(
|
|
|
957
1292
|
maxiter=maxiter,
|
|
958
1293
|
x0=x0,
|
|
959
1294
|
droptol=droptol,
|
|
960
|
-
|
|
1295
|
+
precond_type=precond_type,
|
|
961
1296
|
)
|
|
962
1297
|
else:
|
|
963
1298
|
return _blqmr_native(
|
|
@@ -970,7 +1305,7 @@ def blqmr(
|
|
|
970
1305
|
x0=x0,
|
|
971
1306
|
residual=residual,
|
|
972
1307
|
workspace=workspace,
|
|
973
|
-
|
|
1308
|
+
precond_type=precond_type,
|
|
974
1309
|
)
|
|
975
1310
|
|
|
976
1311
|
|
|
@@ -982,44 +1317,79 @@ def _blqmr_fortran(
|
|
|
982
1317
|
maxiter: Optional[int],
|
|
983
1318
|
x0: Optional[np.ndarray],
|
|
984
1319
|
droptol: float,
|
|
985
|
-
|
|
1320
|
+
precond_type: PrecondType,
|
|
986
1321
|
) -> BLQMRResult:
|
|
987
1322
|
"""Fortran backend for blqmr()."""
|
|
988
1323
|
A_csc = sparse.csc_matrix(A)
|
|
1324
|
+
|
|
1325
|
+
# CRITICAL: Sort indices for UMFPACK compatibility
|
|
1326
|
+
if not A_csc.has_sorted_indices:
|
|
1327
|
+
A_csc.sort_indices()
|
|
1328
|
+
|
|
989
1329
|
Ap = A_csc.indptr.astype(np.int32)
|
|
990
1330
|
Ai = A_csc.indices.astype(np.int32)
|
|
991
|
-
Ax = A_csc.data.astype(np.float64)
|
|
992
1331
|
|
|
993
1332
|
n = A_csc.shape[0]
|
|
994
|
-
nnz =
|
|
1333
|
+
nnz = A_csc.nnz
|
|
995
1334
|
|
|
996
1335
|
if maxiter is None:
|
|
997
1336
|
maxiter = n
|
|
998
1337
|
|
|
999
|
-
# Convert to Fortran format
|
|
1000
|
-
Ap_f = np.asfortranarray(Ap + 1, dtype=np.int32)
|
|
1001
|
-
Ai_f = np.asfortranarray(Ai + 1, dtype=np.int32)
|
|
1002
|
-
Ax_f = np.asfortranarray(Ax, dtype=np.float64)
|
|
1338
|
+
# Convert to Fortran format (1-based indexing)
|
|
1339
|
+
Ap_f = np.asfortranarray(Ap + 1, dtype=np.int32)
|
|
1340
|
+
Ai_f = np.asfortranarray(Ai + 1, dtype=np.int32)
|
|
1003
1341
|
|
|
1004
|
-
|
|
1342
|
+
pcond_type = _parse_precond_type_for_fortran(precond_type)
|
|
1005
1343
|
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
)
|
|
1344
|
+
# Check if complex
|
|
1345
|
+
is_complex = np.iscomplexobj(A) or np.iscomplexobj(B)
|
|
1346
|
+
|
|
1347
|
+
if is_complex:
|
|
1348
|
+
# Complex path
|
|
1349
|
+
Ax_f = np.asfortranarray(A_csc.data, dtype=np.complex128)
|
|
1350
|
+
|
|
1351
|
+
if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
|
|
1352
|
+
# Single RHS
|
|
1353
|
+
b_f = np.asfortranarray(B.ravel(), dtype=np.complex128)
|
|
1354
|
+
x, flag, niter, relres = _blqmr.blqmr_solve_complex(
|
|
1355
|
+
n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, pcond_type
|
|
1356
|
+
)
|
|
1357
|
+
return BLQMRResult(
|
|
1358
|
+
x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
|
|
1359
|
+
)
|
|
1360
|
+
else:
|
|
1361
|
+
# Multiple RHS - use block method
|
|
1362
|
+
B_f = np.asfortranarray(B, dtype=np.complex128)
|
|
1363
|
+
nrhs = B_f.shape[1]
|
|
1364
|
+
X, flag, niter, relres = _blqmr.blqmr_solve_complex_multi(
|
|
1365
|
+
n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, pcond_type
|
|
1366
|
+
)
|
|
1367
|
+
return BLQMRResult(
|
|
1368
|
+
x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
|
|
1369
|
+
)
|
|
1014
1370
|
else:
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
x
|
|
1022
|
-
|
|
1371
|
+
# Real path
|
|
1372
|
+
Ax_f = np.asfortranarray(A_csc.data, dtype=np.float64)
|
|
1373
|
+
|
|
1374
|
+
if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
|
|
1375
|
+
# Single RHS
|
|
1376
|
+
b_f = np.asfortranarray(B.ravel(), dtype=np.float64)
|
|
1377
|
+
x, flag, niter, relres = _blqmr.blqmr_solve_real(
|
|
1378
|
+
n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, pcond_type
|
|
1379
|
+
)
|
|
1380
|
+
return BLQMRResult(
|
|
1381
|
+
x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
|
|
1382
|
+
)
|
|
1383
|
+
else:
|
|
1384
|
+
# Multiple RHS - use block method
|
|
1385
|
+
B_f = np.asfortranarray(B, dtype=np.float64)
|
|
1386
|
+
nrhs = B_f.shape[1]
|
|
1387
|
+
X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
|
|
1388
|
+
n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, pcond_type
|
|
1389
|
+
)
|
|
1390
|
+
return BLQMRResult(
|
|
1391
|
+
x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
|
|
1392
|
+
)
|
|
1023
1393
|
|
|
1024
1394
|
|
|
1025
1395
|
def _blqmr_native(
|
|
@@ -1033,16 +1403,13 @@ def _blqmr_native(
|
|
|
1033
1403
|
x0: Optional[np.ndarray],
|
|
1034
1404
|
residual: bool,
|
|
1035
1405
|
workspace: Optional[BLQMRWorkspace],
|
|
1036
|
-
|
|
1406
|
+
precond_type: PrecondType,
|
|
1037
1407
|
) -> BLQMRResult:
|
|
1038
1408
|
"""Native Python backend for blqmr()."""
|
|
1039
|
-
#
|
|
1040
|
-
if
|
|
1409
|
+
# Get preconditioner (user-provided M1 takes precedence)
|
|
1410
|
+
if M1 is None:
|
|
1041
1411
|
A_sp = sparse.csc_matrix(A) if not sparse.issparse(A) else A
|
|
1042
|
-
|
|
1043
|
-
M1 = make_preconditioner(A_sp, "ilu")
|
|
1044
|
-
except Exception:
|
|
1045
|
-
M1 = make_preconditioner(A_sp, "diag")
|
|
1412
|
+
M1 = _get_preconditioner_for_native(A_sp, precond_type, None)
|
|
1046
1413
|
|
|
1047
1414
|
x, flag, relres, niter, resv = _blqmr_python_impl(
|
|
1048
1415
|
A,
|