blocksolver 0.8.5__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
blocksolver/blqmr.py ADDED
@@ -0,0 +1,1476 @@
1
+ """
2
+ BLQMR - Block Quasi-Minimal-Residual sparse linear solver.
3
+
4
+ This module provides a unified interface that uses the Fortran extension
5
+ when available, falling back to a pure-Python implementation otherwise.
6
+ """
7
+
8
+ import numpy as np
9
+ from scipy import sparse
10
+ from scipy.sparse.linalg import splu, spilu
11
+ from dataclasses import dataclass
12
+ from typing import Optional, Tuple, Union
13
+ import warnings
14
+
15
+ __all__ = [
16
+ "blqmr_solve",
17
+ "blqmr_solve_multi",
18
+ "blqmr_scipy",
19
+ "blqmr",
20
+ "BLQMRResult",
21
+ "BLQMR_EXT",
22
+ "qqr",
23
+ "BLQMRWorkspace",
24
+ "SparsePreconditioner",
25
+ "DensePreconditioner",
26
+ "make_preconditioner",
27
+ ]
28
+
29
+ # =============================================================================
30
+ # Backend Detection
31
+ # =============================================================================
32
+
33
+ BLQMR_EXT = False
34
+ _blqmr = None
35
+
36
+ try:
37
+ from blocksolver import _blqmr
38
+
39
+ BLQMR_EXT = True
40
+ except ImportError:
41
+ try:
42
+ import _blqmr
43
+
44
+ BLQMR_EXT = True
45
+ except ImportError:
46
+ pass
47
+
48
+ # Optional Numba acceleration
49
+ try:
50
+ from numba import njit
51
+
52
+ HAS_NUMBA = True
53
+ except (ImportError, Exception) as e:
54
+ HAS_NUMBA = False
55
+
56
+ def njit(*args, **kwargs):
57
+ def decorator(func):
58
+ return func
59
+
60
+ return decorator if not args or callable(args[0]) else decorator
61
+
62
+
63
+ # =============================================================================
64
+ # Result Container
65
+ # =============================================================================
66
+
67
+
68
+ @dataclass
69
+ class BLQMRResult:
70
+ """Result container for BLQMR solver."""
71
+
72
+ x: np.ndarray
73
+ flag: int
74
+ iter: int
75
+ relres: float
76
+ resv: Optional[np.ndarray] = None
77
+
78
+ @property
79
+ def converged(self) -> bool:
80
+ return self.flag == 0
81
+
82
+ def __repr__(self) -> str:
83
+ status = "converged" if self.converged else f"flag={self.flag}"
84
+ backend = "fortran" if BLQMR_EXT else "python"
85
+ return f"BLQMRResult({status}, iter={self.iter}, relres={self.relres:.2e}, backend={backend})"
86
+
87
+
88
+ # =============================================================================
89
+ # Quasi-QR Decomposition
90
+ # =============================================================================
91
+
92
+
93
+ @njit(cache=True)
94
+ def _qqr_kernel_complex(Q, R, n, m):
95
+ """Numba-accelerated quasi-QR kernel for complex arrays."""
96
+ for j in range(m):
97
+ r_jj_sq = 0.0j
98
+ for i in range(n):
99
+ r_jj_sq += Q[i, j] * Q[i, j]
100
+ r_jj = np.sqrt(r_jj_sq)
101
+ R[j, j] = r_jj
102
+ if abs(r_jj) > 1e-14:
103
+ inv_r_jj = 1.0 / r_jj
104
+ for i in range(n):
105
+ Q[i, j] *= inv_r_jj
106
+ for k in range(j + 1, m):
107
+ dot = 0.0j
108
+ for i in range(n):
109
+ dot += Q[i, j] * Q[i, k]
110
+ R[j, k] = dot
111
+ for i in range(n):
112
+ Q[i, k] -= Q[i, j] * dot
113
+
114
+
115
+ @njit(cache=True)
116
+ def _qqr_kernel_real(Q, R, n, m):
117
+ """Numba-accelerated quasi-QR kernel for real arrays."""
118
+ for j in range(m):
119
+ r_jj_sq = 0.0
120
+ for i in range(n):
121
+ r_jj_sq += Q[i, j] * Q[i, j]
122
+ r_jj = np.sqrt(r_jj_sq)
123
+ R[j, j] = r_jj
124
+ if abs(r_jj) > 1e-14:
125
+ inv_r_jj = 1.0 / r_jj
126
+ for i in range(n):
127
+ Q[i, j] *= inv_r_jj
128
+ for k in range(j + 1, m):
129
+ dot = 0.0
130
+ for i in range(n):
131
+ dot += Q[i, j] * Q[i, k]
132
+ R[j, k] = dot
133
+ for i in range(n):
134
+ Q[i, k] -= Q[i, j] * dot
135
+
136
+
137
+ @njit(cache=True)
138
+ def _qqr_kernel_complex(Q, R, n, m):
139
+ """Numba-accelerated quasi-QR kernel for complex arrays."""
140
+ for j in range(m):
141
+ # Quasi inner product: sum(q*q) WITHOUT conjugation
142
+ r_jj_sq = 0.0j
143
+ for i in range(n):
144
+ r_jj_sq += Q[i, j] * Q[i, j] # No conjugation!
145
+ r_jj = np.sqrt(r_jj_sq)
146
+ R[j, j] = r_jj
147
+ if abs(r_jj) > 1e-14:
148
+ inv_r_jj = 1.0 / r_jj
149
+ for i in range(n):
150
+ Q[i, j] *= inv_r_jj
151
+ for k in range(j + 1, m):
152
+ # Quasi inner product: sum(q_j * q_k) WITHOUT conjugation
153
+ dot = 0.0j
154
+ for i in range(n):
155
+ dot += Q[i, j] * Q[i, k] # No conjugation!
156
+ R[j, k] = dot
157
+ for i in range(n):
158
+ Q[i, k] -= Q[i, j] * dot
159
+
160
+
161
+ @njit(cache=True)
162
+ def _qqr_kernel_real(Q, R, n, m):
163
+ """Numba-accelerated quasi-QR kernel for real arrays."""
164
+ for j in range(m):
165
+ r_jj_sq = 0.0
166
+ for i in range(n):
167
+ r_jj_sq += Q[i, j] * Q[i, j]
168
+ r_jj = np.sqrt(r_jj_sq)
169
+ R[j, j] = r_jj
170
+ if abs(r_jj) > 1e-14:
171
+ inv_r_jj = 1.0 / r_jj
172
+ for i in range(n):
173
+ Q[i, j] *= inv_r_jj
174
+ for k in range(j + 1, m):
175
+ dot = 0.0
176
+ for i in range(n):
177
+ dot += Q[i, j] * Q[i, k]
178
+ R[j, k] = dot
179
+ for i in range(n):
180
+ Q[i, k] -= Q[i, j] * dot
181
+
182
+
183
+ def qqr(
184
+ A: np.ndarray, tol: float = 0, use_numba: bool = True
185
+ ) -> Tuple[np.ndarray, np.ndarray]:
186
+ """
187
+ Quasi-QR decomposition using modified Gram-Schmidt with quasi inner product.
188
+
189
+ For complex symmetric systems, uses <x,y>_Q = sum(x_k * y_k) without conjugation.
190
+
191
+ Parameters
192
+ ----------
193
+ A : ndarray
194
+ Input matrix (n x m)
195
+ tol : float
196
+ Tolerance (unused, for API compatibility)
197
+ use_numba : bool
198
+ If True and Numba available, use JIT-compiled kernel
199
+
200
+ Returns
201
+ -------
202
+ Q : ndarray
203
+ Quasi-orthonormal columns (n x m)
204
+ R : ndarray
205
+ Upper triangular matrix (m x m)
206
+ """
207
+ n, m = A.shape
208
+ is_complex = np.iscomplexobj(A)
209
+ dtype = np.complex128 if is_complex else np.float64
210
+
211
+ Q = np.ascontiguousarray(A, dtype=dtype)
212
+ R = np.zeros((m, m), dtype=dtype)
213
+
214
+ if use_numba and HAS_NUMBA:
215
+ if is_complex:
216
+ _qqr_kernel_complex(Q, R, n, m)
217
+ else:
218
+ _qqr_kernel_real(Q, R, n, m)
219
+ else:
220
+ for j in range(m):
221
+ qj = Q[:, j]
222
+ # CRITICAL FIX: Use sum(qj * qj) NOT np.dot(qj, qj)
223
+ # np.dot conjugates the first argument for complex arrays!
224
+ # Fortran: R(k,k)=dsqrt(sum(Q(:,k)*Q(:,k))) - no conjugation
225
+ r_jj_sq = np.sum(qj * qj) # Quasi inner product - NO conjugation
226
+ r_jj = np.sqrt(r_jj_sq)
227
+ R[j, j] = r_jj
228
+ if np.abs(r_jj) > 1e-14:
229
+ Q[:, j] *= 1.0 / r_jj
230
+ if j < m - 1:
231
+ # CRITICAL FIX: Quasi inner product for off-diagonal
232
+ # Fortran: R(k,j)=sum(Q(:,k)*Q(:,j)) - no conjugation
233
+ for k in range(j + 1, m):
234
+ R[j, k] = np.sum(Q[:, j] * Q[:, k]) # NO conjugation
235
+ Q[:, k] -= R[j, k] * Q[:, j]
236
+
237
+ return Q, R
238
+
239
+
240
+ # =============================================================================
241
+ # Preconditioner Classes
242
+ # =============================================================================
243
+
244
+
245
+ class _ILUPreconditioner:
246
+ """Wrapper for ILU preconditioner to work with blqmr."""
247
+
248
+ def __init__(self, ilu_factor):
249
+ self.ilu = ilu_factor
250
+ self.shape = (ilu_factor.shape[0], ilu_factor.shape[1])
251
+ self.dtype = ilu_factor.L.dtype
252
+
253
+ def solve(self, b):
254
+ # Convert to real if needed for real ILU
255
+ b_solve = b.real if np.isrealobj(self.ilu.L.data) and np.iscomplexobj(b) else b
256
+ if b_solve.ndim == 1:
257
+ return self.ilu.solve(b_solve)
258
+ else:
259
+ x = np.zeros_like(b_solve)
260
+ for i in range(b_solve.shape[1]):
261
+ x[:, i] = self.ilu.solve(b_solve[:, i])
262
+ return x
263
+
264
+
265
+ class SparsePreconditioner:
266
+ """Efficient sparse preconditioner using LU factorization."""
267
+
268
+ __slots__ = ("lu1", "lu2", "is_two_part", "is_ilu1", "is_ilu2")
269
+
270
+ def __init__(self, M1, M2=None):
271
+ self.is_two_part = M2 is not None
272
+ self.is_ilu1 = isinstance(M1, (_ILUPreconditioner, _LUPreconditioner))
273
+ self.is_ilu2 = (
274
+ isinstance(M2, (_ILUPreconditioner, _LUPreconditioner))
275
+ if M2 is not None
276
+ else False
277
+ )
278
+
279
+ if M1 is not None:
280
+ if self.is_ilu1:
281
+ self.lu1 = M1
282
+ else:
283
+ M1_csc = sparse.csc_matrix(M1) if not sparse.isspmatrix_csc(M1) else M1
284
+ self.lu1 = splu(M1_csc)
285
+ else:
286
+ self.lu1 = None
287
+
288
+ if M2 is not None:
289
+ if self.is_ilu2:
290
+ self.lu2 = M2
291
+ else:
292
+ M2_csc = sparse.csc_matrix(M2) if not sparse.isspmatrix_csc(M2) else M2
293
+ self.lu2 = splu(M2_csc)
294
+ else:
295
+ self.lu2 = None
296
+
297
+ def solve(self, b: np.ndarray, out: Optional[np.ndarray] = None) -> np.ndarray:
298
+ if self.lu1 is None:
299
+ return b
300
+ if out is None:
301
+ out = np.empty_like(b)
302
+
303
+ # Handle dtype conversion for ILU with real data
304
+ if self.is_ilu1:
305
+ result = self.lu1.solve(b)
306
+ if out.dtype != result.dtype:
307
+ out = np.asarray(out, dtype=result.dtype)
308
+ out[:] = result
309
+ else:
310
+ if b.ndim == 1:
311
+ out[:] = self.lu1.solve(b)
312
+ else:
313
+ for i in range(b.shape[1]):
314
+ out[:, i] = self.lu1.solve(b[:, i])
315
+
316
+ if self.is_two_part:
317
+ if self.is_ilu2:
318
+ out[:] = self.lu2.solve(out)
319
+ else:
320
+ if b.ndim == 1:
321
+ out[:] = self.lu2.solve(out)
322
+ else:
323
+ for i in range(b.shape[1]):
324
+ out[:, i] = self.lu2.solve(out[:, i])
325
+ return out
326
+
327
+
328
+ class DensePreconditioner:
329
+ """Efficient dense preconditioner using LU factorization."""
330
+
331
+ __slots__ = ("lu1", "piv1", "lu2", "piv2", "is_two_part")
332
+
333
+ def __init__(self, M1: Optional[np.ndarray], M2: Optional[np.ndarray] = None):
334
+ from scipy.linalg import lu_factor
335
+
336
+ self.is_two_part = M2 is not None
337
+ if M1 is not None:
338
+ self.lu1, self.piv1 = lu_factor(M1)
339
+ else:
340
+ self.lu1 = self.piv1 = None
341
+ if M2 is not None:
342
+ self.lu2, self.piv2 = lu_factor(M2)
343
+ else:
344
+ self.lu2 = self.piv2 = None
345
+
346
+ def solve(self, b: np.ndarray, out: Optional[np.ndarray] = None) -> np.ndarray:
347
+ from scipy.linalg import lu_solve
348
+
349
+ if self.lu1 is None:
350
+ return b
351
+ result = lu_solve((self.lu1, self.piv1), b)
352
+ if self.is_two_part:
353
+ result = lu_solve((self.lu2, self.piv2), result)
354
+ if out is not None:
355
+ out[:] = result
356
+ return out
357
+ return result
358
+
359
+
360
+ # =============================================================================
361
+ # BL-QMR Workspace
362
+ # =============================================================================
363
+
364
+
365
+ class BLQMRWorkspace:
366
+ """Pre-allocated workspace for BL-QMR iterations."""
367
+
368
+ __slots__ = (
369
+ "v",
370
+ "vt",
371
+ "beta",
372
+ "alpha",
373
+ "omega",
374
+ "theta",
375
+ "Qa",
376
+ "Qb",
377
+ "Qc",
378
+ "Qd",
379
+ "zeta",
380
+ "zetat",
381
+ "eta",
382
+ "tau",
383
+ "taot",
384
+ "p",
385
+ "stacked",
386
+ "QQ_full",
387
+ "tmp0",
388
+ "tmp1",
389
+ "tmp2",
390
+ "Av",
391
+ "precond_tmp",
392
+ "n",
393
+ "m",
394
+ "dtype",
395
+ )
396
+
397
+ def __init__(self, n: int, m: int, dtype=np.float64):
398
+ self.n, self.m = n, m
399
+ self.dtype = dtype
400
+ self.v = np.zeros((n, m, 3), dtype=dtype)
401
+ self.vt = np.zeros((n, m), dtype=dtype)
402
+ self.beta = np.zeros((m, m, 3), dtype=dtype)
403
+ self.alpha = np.zeros((m, m), dtype=dtype)
404
+ self.omega = np.zeros((m, m, 3), dtype=dtype)
405
+ self.theta = np.zeros((m, m), dtype=dtype)
406
+ self.Qa = np.zeros((m, m, 3), dtype=dtype)
407
+ self.Qb = np.zeros((m, m, 3), dtype=dtype)
408
+ self.Qc = np.zeros((m, m, 3), dtype=dtype)
409
+ self.Qd = np.zeros((m, m, 3), dtype=dtype)
410
+ self.zeta = np.zeros((m, m), dtype=dtype)
411
+ self.zetat = np.zeros((m, m), dtype=dtype)
412
+ self.eta = np.zeros((m, m), dtype=dtype)
413
+ self.tau = np.zeros((m, m), dtype=dtype)
414
+ self.taot = np.zeros((m, m), dtype=dtype)
415
+ self.p = np.zeros((n, m, 3), dtype=dtype)
416
+ self.stacked = np.zeros((2 * m, m), dtype=dtype)
417
+ self.QQ_full = np.zeros((2 * m, 2 * m), dtype=dtype)
418
+ self.tmp0 = np.zeros((m, m), dtype=dtype)
419
+ self.tmp1 = np.zeros((m, m), dtype=dtype)
420
+ self.tmp2 = np.zeros((m, m), dtype=dtype)
421
+ self.Av = np.zeros((n, m), dtype=dtype)
422
+ self.precond_tmp = np.zeros((n, m), dtype=dtype)
423
+
424
+ def reset(self):
425
+ self.v.fill(0)
426
+ self.beta.fill(0)
427
+ self.omega.fill(0)
428
+ self.Qa.fill(0)
429
+ self.Qb.fill(0)
430
+ self.Qc.fill(0)
431
+ self.Qd.fill(0)
432
+ self.p.fill(0)
433
+ self.taot.fill(0)
434
+
435
+
436
+ # =============================================================================
437
+ # Preconditioner Factory
438
+ # =============================================================================
439
+
440
+ # Type alias for precond_type
441
+ PrecondType = Optional[Union[str, int]]
442
+
443
+
444
+ def _parse_precond_type_for_fortran(precond_type: PrecondType) -> int:
445
+ """
446
+ Convert precond_type to Fortran integer code.
447
+
448
+ Returns
449
+ -------
450
+ int
451
+ 0 = no preconditioning
452
+ 2 = ILU
453
+ 3 = diagonal/Jacobi
454
+ """
455
+ if precond_type is None or precond_type == "" or precond_type is False:
456
+ return 0
457
+
458
+ if isinstance(precond_type, int):
459
+ return precond_type
460
+
461
+ if isinstance(precond_type, str):
462
+ precond_lower = precond_type.lower()
463
+ if precond_lower in ("ilu", "ilu0", "ilut"):
464
+ return 2
465
+ elif precond_lower in ("diag", "jacobi"):
466
+ return 3
467
+ else:
468
+ # Unknown string, default to no preconditioning
469
+ warnings.warn(
470
+ f"Unknown precond_type '{precond_type}' for Fortran backend, using no preconditioning"
471
+ )
472
+ return 0
473
+
474
+ return 0
475
+
476
+
477
+ def _get_preconditioner_for_native(A, precond_type: PrecondType, M1_provided):
478
+ """
479
+ Create preconditioner for native Python backend.
480
+
481
+ Parameters
482
+ ----------
483
+ A : sparse matrix
484
+ System matrix
485
+ precond_type : None, '', str, or int
486
+ Preconditioner type specification
487
+ M1_provided : preconditioner or None
488
+ User-provided preconditioner (takes precedence)
489
+
490
+ Returns
491
+ -------
492
+ M1 : preconditioner or None
493
+ """
494
+ # If user provided M1, use it
495
+ if M1_provided is not None:
496
+ return M1_provided
497
+
498
+ # No preconditioning requested
499
+ if precond_type is None or precond_type == "" or precond_type is False:
500
+ return None
501
+
502
+ # Integer codes (for compatibility)
503
+ if isinstance(precond_type, int):
504
+ if precond_type == 0:
505
+ return None
506
+ elif precond_type == 2:
507
+ precond_str = "ilu"
508
+ elif precond_type == 3:
509
+ precond_str = "diag"
510
+ else:
511
+ precond_str = "ilu" # Default to ILU for other integers
512
+ else:
513
+ precond_str = precond_type
514
+
515
+ # Create preconditioner
516
+ try:
517
+ return make_preconditioner(A, precond_str)
518
+ except Exception as e:
519
+ # Fallback chain: try diag if ilu fails
520
+ if precond_str not in ("diag", "jacobi"):
521
+ try:
522
+ warnings.warn(
523
+ f"Preconditioner '{precond_str}' failed: {e}, falling back to diagonal"
524
+ )
525
+ return make_preconditioner(A, "diag")
526
+ except Exception:
527
+ pass
528
+ warnings.warn(f"All preconditioners failed, proceeding without preconditioning")
529
+ return None
530
+
531
+
532
+ def make_preconditioner(
533
+ A: sparse.spmatrix, precond_type: str = "diag", split: bool = False, **kwargs
534
+ ):
535
+ """
536
+ Create a preconditioner for iterative solvers.
537
+
538
+ Parameters
539
+ ----------
540
+ A : sparse matrix
541
+ System matrix
542
+ precond_type : str
543
+ 'diag' or 'jacobi': Diagonal (Jacobi) preconditioner
544
+ 'ilu' or 'ilu0': Incomplete LU with minimal fill
545
+ 'ilut': Incomplete LU with threshold
546
+ 'lu': Full LU factorization
547
+ split : bool
548
+ If True, return sqrt(D) for split preconditioning (M1=M2=sqrt(D))
549
+ If False, return D for left preconditioning
550
+ **kwargs : dict
551
+ Additional parameters
552
+
553
+ Returns
554
+ -------
555
+ M : preconditioner object
556
+ For split Jacobi, use as: blqmr(A, b, M1=M, M2=M)
557
+ """
558
+ if precond_type in ("diag", "jacobi"):
559
+ diag = A.diagonal().copy()
560
+ diag[np.abs(diag) < 1e-14] = 1.0
561
+
562
+ if split:
563
+ # For split preconditioning: return sqrt(D)
564
+ # Usage: M1 = M2 = sqrt(D), gives D^{-1/2} A D^{-1/2}
565
+ sqrt_diag = np.sqrt(diag)
566
+ return sparse.diags(sqrt_diag, format="csr")
567
+ else:
568
+ # For left preconditioning: return D
569
+ # Usage: M1 = D, M2 = None, gives D^{-1} A
570
+ return sparse.diags(diag, format="csr")
571
+
572
+ elif precond_type == "ilu0":
573
+ # ILU(0) - no fill-in, fast but may be poor quality
574
+ try:
575
+ ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
576
+ return _ILUPreconditioner(ilu)
577
+ except Exception as e:
578
+ warnings.warn(f"ILU(0) factorization failed: {e}, falling back to diagonal")
579
+ return make_preconditioner(A, "diag")
580
+
581
+ elif precond_type in ("ilu", "ilut"):
582
+ # ILUT - ILU with threshold, better quality (similar to UMFPACK)
583
+ drop_tol = kwargs.get("drop_tol", 1e-4)
584
+ fill_factor = kwargs.get("fill_factor", 10)
585
+ try:
586
+ ilu = spilu(A.tocsc(), drop_tol=drop_tol, fill_factor=fill_factor)
587
+ return _ILUPreconditioner(ilu)
588
+ except Exception as e:
589
+ warnings.warn(f"ILUT factorization failed: {e}, trying ILU(0)")
590
+ try:
591
+ ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
592
+ return _ILUPreconditioner(ilu)
593
+ except Exception as e2:
594
+ warnings.warn(f"ILU(0) also failed: {e2}, falling back to diagonal")
595
+ return make_preconditioner(A, "diag")
596
+
597
+ elif precond_type == "lu":
598
+ # Full LU - exact factorization (for reference/debugging)
599
+ try:
600
+ lu = splu(A.tocsc())
601
+ return _LUPreconditioner(lu)
602
+ except Exception as e:
603
+ warnings.warn(f"LU factorization failed: {e}, falling back to ILUT")
604
+ return make_preconditioner(A, "ilut")
605
+
606
+ elif precond_type == "ssor":
607
+ omega = kwargs.get("omega", 1.0)
608
+ D = sparse.diags(A.diagonal(), format="csr")
609
+ L = sparse.tril(A, k=-1, format="csr")
610
+ return (D + omega * L).tocsr()
611
+
612
+ else:
613
+ raise ValueError(f"Unknown preconditioner type: {precond_type}")
614
+
615
+
616
+ class _LUPreconditioner:
617
+ """Wrapper for full LU preconditioner."""
618
+
619
+ def __init__(self, lu_factor):
620
+ self.lu = lu_factor
621
+ self.shape = (lu_factor.shape[0], lu_factor.shape[1])
622
+ self.dtype = np.float64 # Assume real for now
623
+
624
+ def solve(self, b):
625
+ if b.ndim == 1:
626
+ return self.lu.solve(b)
627
+ else:
628
+ x = np.zeros_like(b)
629
+ for i in range(b.shape[1]):
630
+ x[:, i] = self.lu.solve(b[:, i])
631
+ return x
632
+
633
+
634
+ # =============================================================================
635
+ # Pure-Python Block QMR Solver
636
+ # =============================================================================
637
+
638
+
639
+ def _blqmr_python_impl(
640
+ A: Union[np.ndarray, sparse.spmatrix],
641
+ B: np.ndarray,
642
+ tol: float = 1e-6,
643
+ maxiter: Optional[int] = None,
644
+ M1=None,
645
+ M2=None,
646
+ x0: Optional[np.ndarray] = None,
647
+ residual: bool = False,
648
+ workspace: Optional[BLQMRWorkspace] = None,
649
+ ) -> Tuple[np.ndarray, int, float, int, np.ndarray]:
650
+ """Native Python Block QMR implementation (internal)."""
651
+ if B.ndim == 1:
652
+ B = B.reshape(-1, 1)
653
+
654
+ n, m = B.shape
655
+ is_complex_input = np.iscomplexobj(A) or np.iscomplexobj(B)
656
+ dtype = np.complex128 if is_complex_input else np.float64
657
+
658
+ if maxiter is None:
659
+ maxiter = min(n, 100)
660
+
661
+ if (
662
+ workspace is None
663
+ or workspace.n != n
664
+ or workspace.m != m
665
+ or workspace.dtype != dtype
666
+ ):
667
+ ws = BLQMRWorkspace(n, m, dtype)
668
+ else:
669
+ ws = workspace
670
+ ws.reset()
671
+
672
+ # Setup preconditioner - distinguish split vs left-only
673
+ use_split_precond = False
674
+ precond = None
675
+ precond_M1 = None
676
+ precond_M2 = None
677
+
678
+ if M1 is not None and M2 is not None:
679
+ # Split preconditioning: M1⁻¹ A M2⁻¹
680
+ use_split_precond = True
681
+ if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
682
+ precond_M1 = SparsePreconditioner(M1, None)
683
+ elif sparse.issparse(M1):
684
+ precond_M1 = SparsePreconditioner(M1, None)
685
+ elif hasattr(M1, "solve"):
686
+ precond_M1 = M1
687
+ else:
688
+ precond_M1 = DensePreconditioner(M1, None)
689
+
690
+ if isinstance(M2, (_ILUPreconditioner, _LUPreconditioner)):
691
+ precond_M2 = SparsePreconditioner(M2, None)
692
+ elif sparse.issparse(M2):
693
+ precond_M2 = SparsePreconditioner(M2, None)
694
+ elif hasattr(M2, "solve"):
695
+ precond_M2 = M2
696
+ else:
697
+ precond_M2 = DensePreconditioner(M2, None)
698
+
699
+ elif M1 is not None:
700
+ # Left-only preconditioning: M1⁻¹ A
701
+ if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
702
+ precond = SparsePreconditioner(M1, None)
703
+ elif sparse.issparse(M1):
704
+ precond = SparsePreconditioner(M1, None)
705
+ elif hasattr(M1, "solve"):
706
+ precond = M1
707
+ else:
708
+ precond = DensePreconditioner(M1, None)
709
+
710
+ if x0 is None:
711
+ x = np.zeros((n, m), dtype=dtype)
712
+ else:
713
+ x = np.asarray(x0, dtype=dtype).reshape(n, m).copy()
714
+
715
+ # Initialize indices: Fortran t3=mod(0,3)+1=1 -> Python t3=0
716
+ t3 = 0
717
+ t3n = 2
718
+ t3p = 1
719
+
720
+ # Initialize Q matrices (identity)
721
+ ws.Qa[:, :, :] = 0
722
+ ws.Qb[:, :, :] = 0
723
+ ws.Qc[:, :, :] = 0
724
+ ws.Qd[:, :, :] = 0
725
+ ws.Qa[:, :, t3] = np.eye(m, dtype=dtype)
726
+ ws.Qd[:, :, t3n] = np.eye(m, dtype=dtype)
727
+ ws.Qd[:, :, t3] = np.eye(m, dtype=dtype)
728
+
729
+ A_is_sparse = sparse.issparse(A)
730
+ if A_is_sparse:
731
+ ws.vt[:] = B - A @ x
732
+ else:
733
+ np.subtract(B, A @ x, out=ws.vt)
734
+
735
+ # Apply preconditioner to initial residual
736
+ if use_split_precond:
737
+ # For split preconditioning, initial residual is just M1⁻¹ * (b - A*x0)
738
+ # because we're solving M1⁻¹ A M2⁻¹ y = M1⁻¹ b with y = M2*x
739
+ ws.vt[:] = precond_M1.solve(ws.vt)
740
+ if np.any(np.isnan(ws.vt)):
741
+ return x, 2, 1.0, 0, np.array([])
742
+ elif precond is not None:
743
+ precond.solve(ws.vt, out=ws.vt)
744
+ if np.any(np.isnan(ws.vt)):
745
+ return x, 2, 1.0, 0, np.array([])
746
+
747
+ # QQR decomposition
748
+ Q, R = qqr(ws.vt)
749
+ ws.v[:, :, t3p] = Q
750
+ ws.beta[:, :, t3p] = R
751
+
752
+ # Compute omega - standard norm WITH conjugation (Hermitian norm)
753
+ # Fortran: omega(i,i,t3p)=sqrt(sum(conjg(v(:,i,t3p))*v(:,i,t3p)))
754
+ ws.omega[:, :, t3p].fill(0)
755
+ if is_complex_input:
756
+ np.fill_diagonal(
757
+ ws.omega[:, :, t3p],
758
+ np.sqrt(
759
+ np.einsum("ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]).real
760
+ ),
761
+ )
762
+ else:
763
+ np.fill_diagonal(
764
+ ws.omega[:, :, t3p],
765
+ np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
766
+ )
767
+
768
+ # taut = omega * beta
769
+ ws.taot[:] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
770
+
771
+ isquasires = not residual
772
+ if isquasires:
773
+ # Fortran: Qres0=maxval(sqrt(sum(abs(conjg(taut)*taut),1))) for complex
774
+ if is_complex_input:
775
+ Qres0 = np.max(
776
+ np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
777
+ )
778
+ else:
779
+ Qres0 = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
780
+ else:
781
+ omegat = np.zeros((n, m), dtype=dtype)
782
+ for i in range(m):
783
+ if np.abs(ws.omega[i, i, t3p]) > 1e-14:
784
+ omegat[:, i] = ws.v[:, i, t3p] / ws.omega[i, i, t3p]
785
+ if is_complex_input:
786
+ Qres0 = np.max(np.sqrt(np.sum(np.abs(np.conj(ws.vt) * ws.vt), axis=0)))
787
+ else:
788
+ Qres0 = np.max(np.sqrt(np.sum(ws.vt * ws.vt, axis=0)))
789
+
790
+ if Qres0 < 1e-16:
791
+ result = x.real if not is_complex_input else x
792
+ return result, 0, 0.0, 0, np.array([0.0])
793
+
794
+ flag, resv, Qres1, relres, iter_count = 1, np.zeros(maxiter), -1.0, 1.0, 0
795
+
796
+ for k in range(1, maxiter + 1):
797
+ # Index cycling
798
+ t3 = k % 3
799
+ t3p = (k + 1) % 3
800
+ t3n = (k - 1) % 3
801
+ t3nn = (k - 2) % 3
802
+
803
+ # tmp = A * v(:,:,t3)
804
+ if A_is_sparse:
805
+ ws.Av[:] = A @ ws.v[:, :, t3]
806
+ else:
807
+ np.matmul(A, ws.v[:, :, t3], out=ws.Av)
808
+
809
+ # Apply preconditioner
810
+ if use_split_precond:
811
+ # Split preconditioning: M1⁻¹ * A * M2⁻¹ * v
812
+ tmp = precond_M2.solve(ws.v[:, :, t3]) # M2⁻¹ * v
813
+ if A_is_sparse:
814
+ tmp = A @ tmp # A * M2⁻¹ * v
815
+ else:
816
+ tmp = np.matmul(A, tmp)
817
+ ws.vt[:] = precond_M1.solve(tmp) - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
818
+ elif precond is not None:
819
+ # Left-only preconditioning: M⁻¹ * A * v
820
+ precond.solve(ws.Av, out=ws.vt)
821
+ ws.vt[:] = ws.vt - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
822
+ else:
823
+ ws.vt[:] = ws.Av - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
824
+
825
+ # alpha = v^T * vt (transpose, not conjugate transpose)
826
+ ws.alpha[:] = ws.v[:, :, t3].T @ ws.vt
827
+ ws.vt[:] = ws.vt - ws.v[:, :, t3] @ ws.alpha
828
+
829
+ # QQR decomposition
830
+ Q, R = qqr(ws.vt)
831
+ ws.v[:, :, t3p] = Q
832
+ ws.beta[:, :, t3p] = R
833
+
834
+ # Compute omega (standard Hermitian norm)
835
+ ws.omega[:, :, t3p].fill(0)
836
+ if is_complex_input:
837
+ np.fill_diagonal(
838
+ ws.omega[:, :, t3p],
839
+ np.sqrt(
840
+ np.einsum(
841
+ "ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]
842
+ ).real
843
+ ),
844
+ )
845
+ else:
846
+ np.fill_diagonal(
847
+ ws.omega[:, :, t3p],
848
+ np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
849
+ )
850
+
851
+ # Compute intermediate matrices
852
+ ws.tmp0[:] = ws.omega[:, :, t3n] @ ws.beta[:, :, t3].T
853
+ ws.theta[:] = ws.Qb[:, :, t3nn] @ ws.tmp0
854
+ ws.tmp1[:] = ws.Qd[:, :, t3nn] @ ws.tmp0
855
+ ws.tmp2[:] = ws.omega[:, :, t3] @ ws.alpha
856
+ ws.eta[:] = ws.Qa[:, :, t3n] @ ws.tmp1 + ws.Qb[:, :, t3n] @ ws.tmp2
857
+ ws.zetat[:] = ws.Qc[:, :, t3n] @ ws.tmp1 + ws.Qd[:, :, t3n] @ ws.tmp2
858
+
859
+ # Build ZZ matrix and do standard QR
860
+ ws.stacked[:m, :] = ws.zetat
861
+ ws.stacked[m:, :] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
862
+
863
+ QQ, zeta_full = np.linalg.qr(ws.stacked, mode="complete")
864
+ ws.zeta[:] = zeta_full[:m, :]
865
+
866
+ if is_complex_input:
867
+ ws.QQ_full[:] = np.conj(QQ.T)
868
+ else:
869
+ ws.QQ_full[:] = QQ.T
870
+
871
+ ws.Qa[:, :, t3] = ws.QQ_full[:m, :m]
872
+ ws.Qb[:, :, t3] = ws.QQ_full[:m, m : 2 * m]
873
+ ws.Qc[:, :, t3] = ws.QQ_full[m : 2 * m, :m]
874
+ ws.Qd[:, :, t3] = ws.QQ_full[m : 2 * m, m : 2 * m]
875
+
876
+ # Invert zeta
877
+ try:
878
+ zeta_inv = np.linalg.inv(ws.zeta)
879
+ except np.linalg.LinAlgError:
880
+ zeta_inv = np.linalg.pinv(ws.zeta)
881
+
882
+ # Update p, tau, x, taut
883
+ ws.p[:, :, t3] = (
884
+ ws.v[:, :, t3] - ws.p[:, :, t3n] @ ws.eta - ws.p[:, :, t3nn] @ ws.theta
885
+ ) @ zeta_inv
886
+ ws.tau[:] = ws.Qa[:, :, t3] @ ws.taot
887
+ x[:] = x + ws.p[:, :, t3] @ ws.tau
888
+ ws.taot[:] = ws.Qc[:, :, t3] @ ws.taot
889
+
890
+ # Compute residual
891
+ if isquasires:
892
+ if is_complex_input:
893
+ Qres = np.max(
894
+ np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
895
+ )
896
+ else:
897
+ Qres = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
898
+ else:
899
+ tmp0_diag = np.zeros((m, m), dtype=dtype)
900
+ for i in range(m):
901
+ if np.abs(ws.omega[i, i, t3p]) > 1e-14:
902
+ tmp0_diag[i, :] = ws.Qd[:, i, t3] / ws.omega[i, i, t3p]
903
+ if is_complex_input:
904
+ omegat = omegat @ np.conj(ws.Qc[:, :, t3].T) + ws.v[
905
+ :, :, t3p
906
+ ] @ np.conj(tmp0_diag)
907
+ tmp_res = np.conj(omegat @ ws.taot)
908
+ Qres = np.max(
909
+ np.sqrt(np.sum(np.abs(np.conj(tmp_res) * tmp_res), axis=0))
910
+ )
911
+ else:
912
+ omegat = omegat @ ws.Qc[:, :, t3].T + ws.v[:, :, t3p] @ tmp0_diag
913
+ tmp_res = omegat @ ws.taot
914
+ Qres = np.max(np.sqrt(np.sum(tmp_res * tmp_res, axis=0)))
915
+
916
+ resv[k - 1] = Qres
917
+
918
+ if k > 1 and abs(Qres - Qres1) < np.finfo(dtype).eps:
919
+ flag, iter_count = 3, k
920
+ break
921
+
922
+ Qres1, relres, iter_count = Qres, Qres / Qres0, k
923
+
924
+ if relres <= tol:
925
+ flag = 0
926
+ break
927
+
928
+ resv = resv[:iter_count]
929
+
930
+ # For split preconditioning, recover x = M2⁻¹ * y
931
+ if use_split_precond:
932
+ x = precond_M2.solve(x)
933
+
934
+ result = x.real if not is_complex_input else x
935
+ return result, flag, relres, iter_count, resv
936
+
937
+
938
+ # =============================================================================
939
+ # High-Level Solver Interface
940
+ # =============================================================================
941
+
942
+
943
+ def blqmr_solve(
944
+ Ap: np.ndarray,
945
+ Ai: np.ndarray,
946
+ Ax: np.ndarray,
947
+ b: np.ndarray,
948
+ *,
949
+ x0: Optional[np.ndarray] = None,
950
+ tol: float = 1e-6,
951
+ maxiter: Optional[int] = None,
952
+ droptol: float = 0.001,
953
+ precond_type: PrecondType = "ilu",
954
+ zero_based: bool = True,
955
+ ) -> BLQMRResult:
956
+ """
957
+ Solve sparse linear system Ax = b using Block QMR algorithm.
958
+
959
+ Uses Fortran extension if available, otherwise falls back to pure Python.
960
+
961
+ Parameters
962
+ ----------
963
+ Ap : ndarray of int32
964
+ Column pointers for CSC format. Length n+1.
965
+ Ai : ndarray of int32
966
+ Row indices for CSC format. Length nnz.
967
+ Ax : ndarray of float64
968
+ Non-zero values. Length nnz.
969
+ b : ndarray of float64
970
+ Right-hand side vector. Length n.
971
+ x0 : ndarray, optional
972
+ Initial guess.
973
+ tol : float, default 1e-6
974
+ Convergence tolerance for relative residual.
975
+ maxiter : int, optional
976
+ Maximum iterations. Default is n.
977
+ droptol : float, default 0.001
978
+ Drop tolerance for ILU preconditioner (Fortran only).
979
+ precond_type : None, '', or str, default 'ilu'
980
+ Preconditioner type:
981
+ - None or '': No preconditioning
982
+ - 'ilu', 'ilu0', 'ilut': Incomplete LU
983
+ - 'diag', 'jacobi': Diagonal (Jacobi)
984
+ - For Fortran: integers 2 (ILU) or 3 (diagonal) also accepted
985
+ zero_based : bool, default True
986
+ If True, Ap and Ai use 0-based indexing (Python/C convention).
987
+ If False, uses 1-based indexing (Fortran convention).
988
+
989
+ Returns
990
+ -------
991
+ BLQMRResult
992
+ Result object containing solution and convergence info.
993
+ """
994
+ n = len(Ap) - 1
995
+
996
+ if maxiter is None:
997
+ maxiter = n
998
+
999
+ if BLQMR_EXT:
1000
+ return _blqmr_solve_fortran(
1001
+ Ap,
1002
+ Ai,
1003
+ Ax,
1004
+ b,
1005
+ x0=x0,
1006
+ tol=tol,
1007
+ maxiter=maxiter,
1008
+ droptol=droptol,
1009
+ precond_type=precond_type,
1010
+ zero_based=zero_based,
1011
+ )
1012
+ else:
1013
+ return _blqmr_solve_native_csc(
1014
+ Ap,
1015
+ Ai,
1016
+ Ax,
1017
+ b,
1018
+ x0=x0,
1019
+ tol=tol,
1020
+ maxiter=maxiter,
1021
+ precond_type=precond_type,
1022
+ zero_based=zero_based,
1023
+ )
1024
+
1025
+
1026
+ def _blqmr_solve_fortran(
1027
+ Ap, Ai, Ax, b, *, x0, tol, maxiter, droptol, precond_type, zero_based
1028
+ ) -> BLQMRResult:
1029
+ """Fortran backend for blqmr_solve."""
1030
+ n = len(Ap) - 1
1031
+ nnz = len(Ax)
1032
+
1033
+ Ap = np.asfortranarray(Ap, dtype=np.int32)
1034
+ Ai = np.asfortranarray(Ai, dtype=np.int32)
1035
+ Ax = np.asfortranarray(Ax, dtype=np.float64)
1036
+ b = np.asfortranarray(b, dtype=np.float64)
1037
+
1038
+ if len(Ai) != nnz:
1039
+ raise ValueError(f"Ai length ({len(Ai)}) must match Ax length ({nnz})")
1040
+ if len(b) != n:
1041
+ raise ValueError(f"b length ({len(b)}) must match matrix size ({n})")
1042
+
1043
+ if zero_based:
1044
+ Ap = Ap + 1
1045
+ Ai = Ai + 1
1046
+
1047
+ pcond_type = _parse_precond_type_for_fortran(precond_type)
1048
+
1049
+ x, flag, niter, relres = _blqmr.blqmr_solve_real(
1050
+ n, nnz, Ap, Ai, Ax, b, maxiter, tol, droptol, pcond_type
1051
+ )
1052
+
1053
+ return BLQMRResult(
1054
+ x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1055
+ )
1056
+
1057
+
1058
+ def _blqmr_solve_native_csc(
1059
+ Ap, Ai, Ax, b, *, x0, tol, maxiter, precond_type, zero_based
1060
+ ) -> BLQMRResult:
1061
+ """Native Python backend for blqmr_solve with CSC input."""
1062
+ n = len(Ap) - 1
1063
+
1064
+ if not zero_based:
1065
+ Ap = Ap - 1
1066
+ Ai = Ai - 1
1067
+
1068
+ A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
1069
+
1070
+ M1 = _get_preconditioner_for_native(A, precond_type, None)
1071
+
1072
+ x, flag, relres, niter, resv = _blqmr_python_impl(
1073
+ A, b, tol=tol, maxiter=maxiter, M1=M1, x0=x0
1074
+ )
1075
+
1076
+ if x.ndim > 1:
1077
+ x = x.ravel()
1078
+
1079
+ return BLQMRResult(x=x, flag=flag, iter=niter, relres=relres, resv=resv)
1080
+
1081
+
1082
+ def blqmr_solve_multi(
1083
+ Ap: np.ndarray,
1084
+ Ai: np.ndarray,
1085
+ Ax: np.ndarray,
1086
+ B: np.ndarray,
1087
+ *,
1088
+ tol: float = 1e-6,
1089
+ maxiter: Optional[int] = None,
1090
+ droptol: float = 0.001,
1091
+ precond_type: PrecondType = "ilu",
1092
+ zero_based: bool = True,
1093
+ ) -> BLQMRResult:
1094
+ """
1095
+ Solve sparse linear system AX = B with multiple right-hand sides.
1096
+
1097
+ Uses Fortran extension if available, otherwise falls back to pure Python.
1098
+
1099
+ Parameters
1100
+ ----------
1101
+ precond_type : None, '', or str, default 'ilu'
1102
+ Preconditioner type (see blqmr_solve for details)
1103
+ """
1104
+ n = len(Ap) - 1
1105
+
1106
+ if maxiter is None:
1107
+ maxiter = n
1108
+
1109
+ if BLQMR_EXT:
1110
+ return _blqmr_solve_multi_fortran(
1111
+ Ap,
1112
+ Ai,
1113
+ Ax,
1114
+ B,
1115
+ tol=tol,
1116
+ maxiter=maxiter,
1117
+ droptol=droptol,
1118
+ precond_type=precond_type,
1119
+ zero_based=zero_based,
1120
+ )
1121
+ else:
1122
+ return _blqmr_solve_multi_native(
1123
+ Ap,
1124
+ Ai,
1125
+ Ax,
1126
+ B,
1127
+ tol=tol,
1128
+ maxiter=maxiter,
1129
+ precond_type=precond_type,
1130
+ zero_based=zero_based,
1131
+ )
1132
+
1133
+
1134
+ def _blqmr_solve_multi_fortran(
1135
+ Ap, Ai, Ax, B, *, tol, maxiter, droptol, precond_type, zero_based
1136
+ ) -> BLQMRResult:
1137
+ """Fortran backend for blqmr_solve_multi."""
1138
+ n = len(Ap) - 1
1139
+ nnz = len(Ax)
1140
+
1141
+ Ap = np.asfortranarray(Ap, dtype=np.int32)
1142
+ Ai = np.asfortranarray(Ai, dtype=np.int32)
1143
+ Ax = np.asfortranarray(Ax, dtype=np.float64)
1144
+ B = np.asfortranarray(B, dtype=np.float64)
1145
+
1146
+ if B.ndim == 1:
1147
+ B = B.reshape(-1, 1, order="F")
1148
+ nrhs = B.shape[1]
1149
+
1150
+ if zero_based:
1151
+ Ap = Ap + 1
1152
+ Ai = Ai + 1
1153
+
1154
+ # Convert precond_type string to Fortran integer code
1155
+ pcond_type = _parse_precond_type_for_fortran(precond_type)
1156
+
1157
+ X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
1158
+ n, nnz, nrhs, Ap, Ai, Ax, B, maxiter, tol, droptol, pcond_type
1159
+ )
1160
+
1161
+ return BLQMRResult(
1162
+ x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1163
+ )
1164
+
1165
+
1166
+ def _blqmr_solve_multi_native(
1167
+ Ap, Ai, Ax, B, *, tol, maxiter, precond_type, zero_based
1168
+ ) -> BLQMRResult:
1169
+ """Native Python backend for blqmr_solve_multi."""
1170
+ n = len(Ap) - 1
1171
+
1172
+ if not zero_based:
1173
+ Ap = Ap - 1
1174
+ Ai = Ai - 1
1175
+
1176
+ A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
1177
+
1178
+ M1 = _get_preconditioner_for_native(A, precond_type, None)
1179
+
1180
+ if B.ndim == 1:
1181
+ B = B.reshape(-1, 1)
1182
+
1183
+ x, flag, relres, niter, resv = _blqmr_python_impl(
1184
+ A, B, tol=tol, maxiter=maxiter, M1=M1
1185
+ )
1186
+
1187
+ return BLQMRResult(x=x, flag=flag, iter=niter, relres=relres, resv=resv)
1188
+
1189
+
1190
+ def blqmr_scipy(
1191
+ A,
1192
+ b: np.ndarray,
1193
+ x0: Optional[np.ndarray] = None,
1194
+ tol: float = 1e-6,
1195
+ maxiter: Optional[int] = None,
1196
+ M=None,
1197
+ **kwargs,
1198
+ ) -> Tuple[np.ndarray, int]:
1199
+ """
1200
+ SciPy-compatible interface for BLQMR solver.
1201
+
1202
+ Parameters
1203
+ ----------
1204
+ A : sparse matrix or ndarray
1205
+ System matrix
1206
+ b : ndarray
1207
+ Right-hand side vector
1208
+ x0 : ndarray, optional
1209
+ Initial guess
1210
+ tol : float
1211
+ Convergence tolerance
1212
+ maxiter : int, optional
1213
+ Maximum iterations
1214
+ M : preconditioner, optional
1215
+ Preconditioner (used as M1 for Python backend)
1216
+ **kwargs
1217
+ Additional arguments passed to blqmr()
1218
+
1219
+ Returns
1220
+ -------
1221
+ x : ndarray
1222
+ Solution vector
1223
+ flag : int
1224
+ Convergence flag (0 = converged)
1225
+ """
1226
+ result = blqmr(A, b, x0=x0, tol=tol, maxiter=maxiter, M1=M, **kwargs)
1227
+ return result.x, result.flag
1228
+
1229
+
1230
+ def blqmr(
1231
+ A: Union[np.ndarray, sparse.spmatrix],
1232
+ B: np.ndarray,
1233
+ tol: float = 1e-6,
1234
+ maxiter: Optional[int] = None,
1235
+ M1=None,
1236
+ M2=None,
1237
+ x0: Optional[np.ndarray] = None,
1238
+ residual: bool = False,
1239
+ workspace: Optional[BLQMRWorkspace] = None,
1240
+ droptol: float = 0.001,
1241
+ precond_type: PrecondType = "ilu",
1242
+ ) -> BLQMRResult:
1243
+ """
1244
+ Block Quasi-Minimal-Residual (BL-QMR) solver - main interface.
1245
+
1246
+ Uses Fortran extension if available, otherwise falls back to pure Python.
1247
+
1248
+ Parameters
1249
+ ----------
1250
+ A : ndarray or sparse matrix
1251
+ Symmetric n x n matrix (can be complex)
1252
+ B : ndarray
1253
+ Right-hand side vector/matrix (n,) or (n x m)
1254
+ tol : float
1255
+ Convergence tolerance (default: 1e-6)
1256
+ maxiter : int, optional
1257
+ Maximum iterations (default: n)
1258
+ M1, M2 : preconditioner, optional
1259
+ Custom preconditioners. If provided, precond_type is ignored.
1260
+ M = M1 @ M2 for split preconditioning (Python backend only)
1261
+ x0 : ndarray, optional
1262
+ Initial guess
1263
+ residual : bool
1264
+ If True, use true residual for convergence (Python backend only)
1265
+ workspace : BLQMRWorkspace, optional
1266
+ Pre-allocated workspace (Python backend only)
1267
+ droptol : float, default 0.001
1268
+ Drop tolerance for ILU preconditioner (Fortran backend only)
1269
+ precond_type : None, '', or str, default 'ilu'
1270
+ Preconditioner type (ignored if M1 is provided):
1271
+ - None or '': No preconditioning
1272
+ - 'ilu', 'ilu0', 'ilut': Incomplete LU
1273
+ - 'diag', 'jacobi': Diagonal (Jacobi)
1274
+ - 'lu': Full LU (expensive, for debugging)
1275
+ - For Fortran: integers 2 (ILU) or 3 (diagonal) also accepted
1276
+
1277
+ Returns
1278
+ -------
1279
+ BLQMRResult
1280
+ Result object containing:
1281
+ - x: Solution array
1282
+ - flag: 0 = converged, 1 = max iterations, 2 = preconditioner singular, 3 = stagnated
1283
+ - iter: Number of iterations
1284
+ - relres: Final relative residual
1285
+ - resv: Residual history (Python backend only)
1286
+ """
1287
+ if BLQMR_EXT:
1288
+ return _blqmr_fortran(
1289
+ A,
1290
+ B,
1291
+ tol=tol,
1292
+ maxiter=maxiter,
1293
+ x0=x0,
1294
+ droptol=droptol,
1295
+ precond_type=precond_type,
1296
+ )
1297
+ else:
1298
+ return _blqmr_native(
1299
+ A,
1300
+ B,
1301
+ tol=tol,
1302
+ maxiter=maxiter,
1303
+ M1=M1,
1304
+ M2=M2,
1305
+ x0=x0,
1306
+ residual=residual,
1307
+ workspace=workspace,
1308
+ precond_type=precond_type,
1309
+ )
1310
+
1311
+
1312
+ def _blqmr_fortran(
1313
+ A: Union[np.ndarray, sparse.spmatrix],
1314
+ B: np.ndarray,
1315
+ *,
1316
+ tol: float,
1317
+ maxiter: Optional[int],
1318
+ x0: Optional[np.ndarray],
1319
+ droptol: float,
1320
+ precond_type: PrecondType,
1321
+ ) -> BLQMRResult:
1322
+ """Fortran backend for blqmr()."""
1323
+ A_csc = sparse.csc_matrix(A)
1324
+
1325
+ # CRITICAL: Sort indices for UMFPACK compatibility
1326
+ if not A_csc.has_sorted_indices:
1327
+ A_csc.sort_indices()
1328
+
1329
+ Ap = A_csc.indptr.astype(np.int32)
1330
+ Ai = A_csc.indices.astype(np.int32)
1331
+
1332
+ n = A_csc.shape[0]
1333
+ nnz = A_csc.nnz
1334
+
1335
+ if maxiter is None:
1336
+ maxiter = n
1337
+
1338
+ # Convert to Fortran format (1-based indexing)
1339
+ Ap_f = np.asfortranarray(Ap + 1, dtype=np.int32)
1340
+ Ai_f = np.asfortranarray(Ai + 1, dtype=np.int32)
1341
+
1342
+ pcond_type = _parse_precond_type_for_fortran(precond_type)
1343
+
1344
+ # Check if complex
1345
+ is_complex = np.iscomplexobj(A) or np.iscomplexobj(B)
1346
+
1347
+ if is_complex:
1348
+ # Complex path
1349
+ Ax_f = np.asfortranarray(A_csc.data, dtype=np.complex128)
1350
+
1351
+ if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
1352
+ # Single RHS
1353
+ b_f = np.asfortranarray(B.ravel(), dtype=np.complex128)
1354
+ x, flag, niter, relres = _blqmr.blqmr_solve_complex(
1355
+ n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, pcond_type
1356
+ )
1357
+ return BLQMRResult(
1358
+ x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1359
+ )
1360
+ else:
1361
+ # Multiple RHS - use block method
1362
+ B_f = np.asfortranarray(B, dtype=np.complex128)
1363
+ nrhs = B_f.shape[1]
1364
+ X, flag, niter, relres = _blqmr.blqmr_solve_complex_multi(
1365
+ n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, pcond_type
1366
+ )
1367
+ return BLQMRResult(
1368
+ x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1369
+ )
1370
+ else:
1371
+ # Real path
1372
+ Ax_f = np.asfortranarray(A_csc.data, dtype=np.float64)
1373
+
1374
+ if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
1375
+ # Single RHS
1376
+ b_f = np.asfortranarray(B.ravel(), dtype=np.float64)
1377
+ x, flag, niter, relres = _blqmr.blqmr_solve_real(
1378
+ n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, pcond_type
1379
+ )
1380
+ return BLQMRResult(
1381
+ x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1382
+ )
1383
+ else:
1384
+ # Multiple RHS - use block method
1385
+ B_f = np.asfortranarray(B, dtype=np.float64)
1386
+ nrhs = B_f.shape[1]
1387
+ X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
1388
+ n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, pcond_type
1389
+ )
1390
+ return BLQMRResult(
1391
+ x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1392
+ )
1393
+
1394
+
1395
+ def _blqmr_native(
1396
+ A: Union[np.ndarray, sparse.spmatrix],
1397
+ B: np.ndarray,
1398
+ *,
1399
+ tol: float,
1400
+ maxiter: Optional[int],
1401
+ M1,
1402
+ M2,
1403
+ x0: Optional[np.ndarray],
1404
+ residual: bool,
1405
+ workspace: Optional[BLQMRWorkspace],
1406
+ precond_type: PrecondType,
1407
+ ) -> BLQMRResult:
1408
+ """Native Python backend for blqmr()."""
1409
+ # Get preconditioner (user-provided M1 takes precedence)
1410
+ if M1 is None:
1411
+ A_sp = sparse.csc_matrix(A) if not sparse.issparse(A) else A
1412
+ M1 = _get_preconditioner_for_native(A_sp, precond_type, None)
1413
+
1414
+ x, flag, relres, niter, resv = _blqmr_python_impl(
1415
+ A,
1416
+ B,
1417
+ tol=tol,
1418
+ maxiter=maxiter,
1419
+ M1=M1,
1420
+ M2=M2,
1421
+ x0=x0,
1422
+ residual=residual,
1423
+ workspace=workspace,
1424
+ )
1425
+
1426
+ # Flatten x if single RHS
1427
+ if x.ndim > 1 and x.shape[1] == 1:
1428
+ x = x.ravel()
1429
+
1430
+ return BLQMRResult(x=x, flag=flag, iter=niter, relres=relres, resv=resv)
1431
+
1432
+
1433
+ # =============================================================================
1434
+ # Test Function
1435
+ # =============================================================================
1436
+
1437
+
1438
+ def _test():
1439
+ """Quick test to verify installation."""
1440
+ print("BLIT BLQMR Test")
1441
+ print("=" * 40)
1442
+ print(f"Fortran backend available: {BLQMR_EXT}")
1443
+ print(f"Numba acceleration available: {HAS_NUMBA}")
1444
+ print(f"Using backend: {'Fortran' if BLQMR_EXT else 'Pure Python'}")
1445
+ print()
1446
+
1447
+ # Build test matrix from CSC components
1448
+ n = 5
1449
+ Ap = np.array([0, 2, 5, 9, 10, 12], dtype=np.int32)
1450
+ Ai = np.array([0, 1, 0, 2, 4, 1, 2, 3, 4, 2, 1, 4], dtype=np.int32)
1451
+ Ax = np.array(
1452
+ [2.0, 3.0, 3.0, -1.0, 4.0, 4.0, -3.0, 1.0, 2.0, 2.0, 6.0, 1.0], dtype=np.float64
1453
+ )
1454
+ b = np.array([8.0, 45.0, -3.0, 3.0, 19.0], dtype=np.float64)
1455
+
1456
+ # Create sparse matrix
1457
+ A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
1458
+
1459
+ print(f"Matrix: {n}x{n}, nnz={len(Ax)}")
1460
+ print(f"b: {b}")
1461
+ print("\nCalling blqmr()...")
1462
+
1463
+ result = blqmr(A, b, tol=1e-8)
1464
+
1465
+ print(f"\n{result}")
1466
+ print(f"Solution: {result.x}")
1467
+
1468
+ # Verify
1469
+ res = np.linalg.norm(A @ result.x - b)
1470
+ print(f"||Ax - b|| = {res:.2e}")
1471
+
1472
+ return result.converged
1473
+
1474
+
1475
+ if __name__ == "__main__":
1476
+ _test()