blocksolver 0.8.3__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
blocksolver/blqmr.py ADDED
@@ -0,0 +1,1319 @@
1
+ """
2
+ BLQMR - Block Quasi-Minimal-Residual sparse linear solver.
3
+
4
+ This module provides a unified interface that uses the Fortran extension
5
+ when available, falling back to a pure-Python implementation otherwise.
6
+ """
7
+
8
+ import numpy as np
9
+ from scipy import sparse
10
+ from scipy.sparse.linalg import splu, spilu
11
+ from dataclasses import dataclass
12
+ from typing import Optional, Tuple, Union
13
+ import warnings
14
+
15
+ __all__ = [
16
+ "blqmr_solve",
17
+ "blqmr_solve_multi",
18
+ "blqmr_scipy",
19
+ "blqmr",
20
+ "BLQMRResult",
21
+ "BLQMR_EXT",
22
+ "qqr",
23
+ "BLQMRWorkspace",
24
+ "SparsePreconditioner",
25
+ "DensePreconditioner",
26
+ "make_preconditioner",
27
+ ]
28
+
29
+ # =============================================================================
30
+ # Backend Detection
31
+ # =============================================================================
32
+
33
+ BLQMR_EXT = False
34
+ _blqmr = None
35
+
36
+ try:
37
+ from blocksolver import _blqmr
38
+
39
+ BLQMR_EXT = True
40
+ except ImportError:
41
+ try:
42
+ import _blqmr
43
+
44
+ BLQMR_EXT = True
45
+ except ImportError:
46
+ pass
47
+
48
+ # Optional Numba acceleration
49
+ try:
50
+ from numba import njit
51
+
52
+ HAS_NUMBA = True
53
+ except (ImportError, Exception) as e:
54
+ HAS_NUMBA = False
55
+
56
+ def njit(*args, **kwargs):
57
+ def decorator(func):
58
+ return func
59
+
60
+ return decorator if not args or callable(args[0]) else decorator
61
+
62
+
63
+ # =============================================================================
64
+ # Result Container
65
+ # =============================================================================
66
+
67
+
68
+ @dataclass
69
+ class BLQMRResult:
70
+ """Result container for BLQMR solver."""
71
+
72
+ x: np.ndarray
73
+ flag: int
74
+ iter: int
75
+ relres: float
76
+ resv: Optional[np.ndarray] = None
77
+
78
+ @property
79
+ def converged(self) -> bool:
80
+ return self.flag == 0
81
+
82
+ def __repr__(self) -> str:
83
+ status = "converged" if self.converged else f"flag={self.flag}"
84
+ backend = "fortran" if BLQMR_EXT else "python"
85
+ return f"BLQMRResult({status}, iter={self.iter}, relres={self.relres:.2e}, backend={backend})"
86
+
87
+
88
+ # =============================================================================
89
+ # Quasi-QR Decomposition
90
+ # =============================================================================
91
+
92
+
93
+ @njit(cache=True)
94
+ def _qqr_kernel_complex(Q, R, n, m):
95
+ """Numba-accelerated quasi-QR kernel for complex arrays."""
96
+ for j in range(m):
97
+ r_jj_sq = 0.0j
98
+ for i in range(n):
99
+ r_jj_sq += Q[i, j] * Q[i, j]
100
+ r_jj = np.sqrt(r_jj_sq)
101
+ R[j, j] = r_jj
102
+ if abs(r_jj) > 1e-14:
103
+ inv_r_jj = 1.0 / r_jj
104
+ for i in range(n):
105
+ Q[i, j] *= inv_r_jj
106
+ for k in range(j + 1, m):
107
+ dot = 0.0j
108
+ for i in range(n):
109
+ dot += Q[i, j] * Q[i, k]
110
+ R[j, k] = dot
111
+ for i in range(n):
112
+ Q[i, k] -= Q[i, j] * dot
113
+
114
+
115
+ @njit(cache=True)
116
+ def _qqr_kernel_real(Q, R, n, m):
117
+ """Numba-accelerated quasi-QR kernel for real arrays."""
118
+ for j in range(m):
119
+ r_jj_sq = 0.0
120
+ for i in range(n):
121
+ r_jj_sq += Q[i, j] * Q[i, j]
122
+ r_jj = np.sqrt(r_jj_sq)
123
+ R[j, j] = r_jj
124
+ if abs(r_jj) > 1e-14:
125
+ inv_r_jj = 1.0 / r_jj
126
+ for i in range(n):
127
+ Q[i, j] *= inv_r_jj
128
+ for k in range(j + 1, m):
129
+ dot = 0.0
130
+ for i in range(n):
131
+ dot += Q[i, j] * Q[i, k]
132
+ R[j, k] = dot
133
+ for i in range(n):
134
+ Q[i, k] -= Q[i, j] * dot
135
+
136
+
137
+ @njit(cache=True)
138
+ def _qqr_kernel_complex(Q, R, n, m):
139
+ """Numba-accelerated quasi-QR kernel for complex arrays."""
140
+ for j in range(m):
141
+ # Quasi inner product: sum(q*q) WITHOUT conjugation
142
+ r_jj_sq = 0.0j
143
+ for i in range(n):
144
+ r_jj_sq += Q[i, j] * Q[i, j] # No conjugation!
145
+ r_jj = np.sqrt(r_jj_sq)
146
+ R[j, j] = r_jj
147
+ if abs(r_jj) > 1e-14:
148
+ inv_r_jj = 1.0 / r_jj
149
+ for i in range(n):
150
+ Q[i, j] *= inv_r_jj
151
+ for k in range(j + 1, m):
152
+ # Quasi inner product: sum(q_j * q_k) WITHOUT conjugation
153
+ dot = 0.0j
154
+ for i in range(n):
155
+ dot += Q[i, j] * Q[i, k] # No conjugation!
156
+ R[j, k] = dot
157
+ for i in range(n):
158
+ Q[i, k] -= Q[i, j] * dot
159
+
160
+
161
+ @njit(cache=True)
162
+ def _qqr_kernel_real(Q, R, n, m):
163
+ """Numba-accelerated quasi-QR kernel for real arrays."""
164
+ for j in range(m):
165
+ r_jj_sq = 0.0
166
+ for i in range(n):
167
+ r_jj_sq += Q[i, j] * Q[i, j]
168
+ r_jj = np.sqrt(r_jj_sq)
169
+ R[j, j] = r_jj
170
+ if abs(r_jj) > 1e-14:
171
+ inv_r_jj = 1.0 / r_jj
172
+ for i in range(n):
173
+ Q[i, j] *= inv_r_jj
174
+ for k in range(j + 1, m):
175
+ dot = 0.0
176
+ for i in range(n):
177
+ dot += Q[i, j] * Q[i, k]
178
+ R[j, k] = dot
179
+ for i in range(n):
180
+ Q[i, k] -= Q[i, j] * dot
181
+
182
+
183
+ def qqr(
184
+ A: np.ndarray, tol: float = 0, use_numba: bool = True
185
+ ) -> Tuple[np.ndarray, np.ndarray]:
186
+ """
187
+ Quasi-QR decomposition using modified Gram-Schmidt with quasi inner product.
188
+
189
+ For complex symmetric systems, uses <x,y>_Q = sum(x_k * y_k) without conjugation.
190
+
191
+ Parameters
192
+ ----------
193
+ A : ndarray
194
+ Input matrix (n x m)
195
+ tol : float
196
+ Tolerance (unused, for API compatibility)
197
+ use_numba : bool
198
+ If True and Numba available, use JIT-compiled kernel
199
+
200
+ Returns
201
+ -------
202
+ Q : ndarray
203
+ Quasi-orthonormal columns (n x m)
204
+ R : ndarray
205
+ Upper triangular matrix (m x m)
206
+ """
207
+ n, m = A.shape
208
+ is_complex = np.iscomplexobj(A)
209
+ dtype = np.complex128 if is_complex else np.float64
210
+
211
+ Q = np.ascontiguousarray(A, dtype=dtype)
212
+ R = np.zeros((m, m), dtype=dtype)
213
+
214
+ if use_numba and HAS_NUMBA:
215
+ if is_complex:
216
+ _qqr_kernel_complex(Q, R, n, m)
217
+ else:
218
+ _qqr_kernel_real(Q, R, n, m)
219
+ else:
220
+ for j in range(m):
221
+ qj = Q[:, j]
222
+ # CRITICAL FIX: Use sum(qj * qj) NOT np.dot(qj, qj)
223
+ # np.dot conjugates the first argument for complex arrays!
224
+ # Fortran: R(k,k)=dsqrt(sum(Q(:,k)*Q(:,k))) - no conjugation
225
+ r_jj_sq = np.sum(qj * qj) # Quasi inner product - NO conjugation
226
+ r_jj = np.sqrt(r_jj_sq)
227
+ R[j, j] = r_jj
228
+ if np.abs(r_jj) > 1e-14:
229
+ Q[:, j] *= 1.0 / r_jj
230
+ if j < m - 1:
231
+ # CRITICAL FIX: Quasi inner product for off-diagonal
232
+ # Fortran: R(k,j)=sum(Q(:,k)*Q(:,j)) - no conjugation
233
+ for k in range(j + 1, m):
234
+ R[j, k] = np.sum(Q[:, j] * Q[:, k]) # NO conjugation
235
+ Q[:, k] -= R[j, k] * Q[:, j]
236
+
237
+ return Q, R
238
+
239
+
240
+ # =============================================================================
241
+ # Preconditioner Classes
242
+ # =============================================================================
243
+
244
+
245
+ class _ILUPreconditioner:
246
+ """Wrapper for ILU preconditioner to work with blqmr."""
247
+
248
+ def __init__(self, ilu_factor):
249
+ self.ilu = ilu_factor
250
+ self.shape = (ilu_factor.shape[0], ilu_factor.shape[1])
251
+ self.dtype = ilu_factor.L.dtype
252
+
253
+ def solve(self, b):
254
+ # Convert to real if needed for real ILU
255
+ b_solve = b.real if np.isrealobj(self.ilu.L.data) and np.iscomplexobj(b) else b
256
+ if b_solve.ndim == 1:
257
+ return self.ilu.solve(b_solve)
258
+ else:
259
+ x = np.zeros_like(b_solve)
260
+ for i in range(b_solve.shape[1]):
261
+ x[:, i] = self.ilu.solve(b_solve[:, i])
262
+ return x
263
+
264
+
265
+ class SparsePreconditioner:
266
+ """Efficient sparse preconditioner using LU factorization."""
267
+
268
+ __slots__ = ("lu1", "lu2", "is_two_part", "is_ilu1", "is_ilu2")
269
+
270
+ def __init__(self, M1, M2=None):
271
+ self.is_two_part = M2 is not None
272
+ self.is_ilu1 = isinstance(M1, (_ILUPreconditioner, _LUPreconditioner))
273
+ self.is_ilu2 = (
274
+ isinstance(M2, (_ILUPreconditioner, _LUPreconditioner))
275
+ if M2 is not None
276
+ else False
277
+ )
278
+
279
+ if M1 is not None:
280
+ if self.is_ilu1:
281
+ self.lu1 = M1
282
+ else:
283
+ M1_csc = sparse.csc_matrix(M1) if not sparse.isspmatrix_csc(M1) else M1
284
+ self.lu1 = splu(M1_csc)
285
+ else:
286
+ self.lu1 = None
287
+
288
+ if M2 is not None:
289
+ if self.is_ilu2:
290
+ self.lu2 = M2
291
+ else:
292
+ M2_csc = sparse.csc_matrix(M2) if not sparse.isspmatrix_csc(M2) else M2
293
+ self.lu2 = splu(M2_csc)
294
+ else:
295
+ self.lu2 = None
296
+
297
+ def solve(self, b: np.ndarray, out: Optional[np.ndarray] = None) -> np.ndarray:
298
+ if self.lu1 is None:
299
+ return b
300
+ if out is None:
301
+ out = np.empty_like(b)
302
+
303
+ # Handle dtype conversion for ILU with real data
304
+ if self.is_ilu1:
305
+ result = self.lu1.solve(b)
306
+ if out.dtype != result.dtype:
307
+ out = np.asarray(out, dtype=result.dtype)
308
+ out[:] = result
309
+ else:
310
+ if b.ndim == 1:
311
+ out[:] = self.lu1.solve(b)
312
+ else:
313
+ for i in range(b.shape[1]):
314
+ out[:, i] = self.lu1.solve(b[:, i])
315
+
316
+ if self.is_two_part:
317
+ if self.is_ilu2:
318
+ out[:] = self.lu2.solve(out)
319
+ else:
320
+ if b.ndim == 1:
321
+ out[:] = self.lu2.solve(out)
322
+ else:
323
+ for i in range(b.shape[1]):
324
+ out[:, i] = self.lu2.solve(out[:, i])
325
+ return out
326
+
327
+
328
+ class DensePreconditioner:
329
+ """Efficient dense preconditioner using LU factorization."""
330
+
331
+ __slots__ = ("lu1", "piv1", "lu2", "piv2", "is_two_part")
332
+
333
+ def __init__(self, M1: Optional[np.ndarray], M2: Optional[np.ndarray] = None):
334
+ from scipy.linalg import lu_factor
335
+
336
+ self.is_two_part = M2 is not None
337
+ if M1 is not None:
338
+ self.lu1, self.piv1 = lu_factor(M1)
339
+ else:
340
+ self.lu1 = self.piv1 = None
341
+ if M2 is not None:
342
+ self.lu2, self.piv2 = lu_factor(M2)
343
+ else:
344
+ self.lu2 = self.piv2 = None
345
+
346
+ def solve(self, b: np.ndarray, out: Optional[np.ndarray] = None) -> np.ndarray:
347
+ from scipy.linalg import lu_solve
348
+
349
+ if self.lu1 is None:
350
+ return b
351
+ result = lu_solve((self.lu1, self.piv1), b)
352
+ if self.is_two_part:
353
+ result = lu_solve((self.lu2, self.piv2), result)
354
+ if out is not None:
355
+ out[:] = result
356
+ return out
357
+ return result
358
+
359
+
360
+ # =============================================================================
361
+ # BL-QMR Workspace
362
+ # =============================================================================
363
+
364
+
365
+ class BLQMRWorkspace:
366
+ """Pre-allocated workspace for BL-QMR iterations."""
367
+
368
+ __slots__ = (
369
+ "v",
370
+ "vt",
371
+ "beta",
372
+ "alpha",
373
+ "omega",
374
+ "theta",
375
+ "Qa",
376
+ "Qb",
377
+ "Qc",
378
+ "Qd",
379
+ "zeta",
380
+ "zetat",
381
+ "eta",
382
+ "tau",
383
+ "taot",
384
+ "p",
385
+ "stacked",
386
+ "QQ_full",
387
+ "tmp0",
388
+ "tmp1",
389
+ "tmp2",
390
+ "Av",
391
+ "precond_tmp",
392
+ "n",
393
+ "m",
394
+ "dtype",
395
+ )
396
+
397
+ def __init__(self, n: int, m: int, dtype=np.float64):
398
+ self.n, self.m = n, m
399
+ self.dtype = dtype
400
+ self.v = np.zeros((n, m, 3), dtype=dtype)
401
+ self.vt = np.zeros((n, m), dtype=dtype)
402
+ self.beta = np.zeros((m, m, 3), dtype=dtype)
403
+ self.alpha = np.zeros((m, m), dtype=dtype)
404
+ self.omega = np.zeros((m, m, 3), dtype=dtype)
405
+ self.theta = np.zeros((m, m), dtype=dtype)
406
+ self.Qa = np.zeros((m, m, 3), dtype=dtype)
407
+ self.Qb = np.zeros((m, m, 3), dtype=dtype)
408
+ self.Qc = np.zeros((m, m, 3), dtype=dtype)
409
+ self.Qd = np.zeros((m, m, 3), dtype=dtype)
410
+ self.zeta = np.zeros((m, m), dtype=dtype)
411
+ self.zetat = np.zeros((m, m), dtype=dtype)
412
+ self.eta = np.zeros((m, m), dtype=dtype)
413
+ self.tau = np.zeros((m, m), dtype=dtype)
414
+ self.taot = np.zeros((m, m), dtype=dtype)
415
+ self.p = np.zeros((n, m, 3), dtype=dtype)
416
+ self.stacked = np.zeros((2 * m, m), dtype=dtype)
417
+ self.QQ_full = np.zeros((2 * m, 2 * m), dtype=dtype)
418
+ self.tmp0 = np.zeros((m, m), dtype=dtype)
419
+ self.tmp1 = np.zeros((m, m), dtype=dtype)
420
+ self.tmp2 = np.zeros((m, m), dtype=dtype)
421
+ self.Av = np.zeros((n, m), dtype=dtype)
422
+ self.precond_tmp = np.zeros((n, m), dtype=dtype)
423
+
424
+ def reset(self):
425
+ self.v.fill(0)
426
+ self.beta.fill(0)
427
+ self.omega.fill(0)
428
+ self.Qa.fill(0)
429
+ self.Qb.fill(0)
430
+ self.Qc.fill(0)
431
+ self.Qd.fill(0)
432
+ self.p.fill(0)
433
+ self.taot.fill(0)
434
+
435
+
436
+ # =============================================================================
437
+ # Preconditioner Factory
438
+ # =============================================================================
439
+
440
+
441
+ def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag", **kwargs):
442
+ """
443
+ Create a preconditioner for iterative solvers.
444
+
445
+ Parameters
446
+ ----------
447
+ A : sparse matrix
448
+ System matrix
449
+ precond_type : str
450
+ 'diag' or 'jacobi': Diagonal (Jacobi) preconditioner
451
+ 'ilu' or 'ilu0': Incomplete LU with minimal fill
452
+ 'ilut': Incomplete LU with threshold (better quality)
453
+ 'lu': Full LU factorization (exact, use as reference)
454
+ 'ssor': Symmetric SOR
455
+ **kwargs : dict
456
+ Additional parameters for ILU:
457
+ - drop_tol: Drop tolerance (default: 1e-4 for ilut, 0 for ilu0)
458
+ - fill_factor: Fill factor (default: 10 for ilut, 1 for ilu0)
459
+
460
+ Returns
461
+ -------
462
+ M : preconditioner object
463
+ Preconditioner (use as M1 in blqmr)
464
+ """
465
+ if precond_type in ("diag", "jacobi"):
466
+ diag = A.diagonal().copy()
467
+ diag[np.abs(diag) < 1e-14] = 1.0
468
+ return sparse.diags(
469
+ 1.0 / diag, format="csr"
470
+ ) # Return inverse for preconditioning!
471
+
472
+ elif precond_type == "ilu0":
473
+ # ILU(0) - no fill-in, fast but may be poor quality
474
+ try:
475
+ ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
476
+ return _ILUPreconditioner(ilu)
477
+ except Exception as e:
478
+ warnings.warn(f"ILU(0) factorization failed: {e}, falling back to diagonal")
479
+ return make_preconditioner(A, "diag")
480
+
481
+ elif precond_type in ("ilu", "ilut"):
482
+ # ILUT - ILU with threshold, better quality (similar to UMFPACK)
483
+ drop_tol = kwargs.get("drop_tol", 1e-4)
484
+ fill_factor = kwargs.get("fill_factor", 10)
485
+ try:
486
+ ilu = spilu(A.tocsc(), drop_tol=drop_tol, fill_factor=fill_factor)
487
+ return _ILUPreconditioner(ilu)
488
+ except Exception as e:
489
+ warnings.warn(f"ILUT factorization failed: {e}, trying ILU(0)")
490
+ try:
491
+ ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
492
+ return _ILUPreconditioner(ilu)
493
+ except Exception as e2:
494
+ warnings.warn(f"ILU(0) also failed: {e2}, falling back to diagonal")
495
+ return make_preconditioner(A, "diag")
496
+
497
+ elif precond_type == "lu":
498
+ # Full LU - exact factorization (for reference/debugging)
499
+ try:
500
+ lu = splu(A.tocsc())
501
+ return _LUPreconditioner(lu)
502
+ except Exception as e:
503
+ warnings.warn(f"LU factorization failed: {e}, falling back to ILUT")
504
+ return make_preconditioner(A, "ilut")
505
+
506
+ elif precond_type == "ssor":
507
+ omega = kwargs.get("omega", 1.0)
508
+ D = sparse.diags(A.diagonal(), format="csr")
509
+ L = sparse.tril(A, k=-1, format="csr")
510
+ return (D + omega * L).tocsr()
511
+
512
+ else:
513
+ raise ValueError(f"Unknown preconditioner type: {precond_type}")
514
+
515
+
516
+ class _LUPreconditioner:
517
+ """Wrapper for full LU preconditioner."""
518
+
519
+ def __init__(self, lu_factor):
520
+ self.lu = lu_factor
521
+ self.shape = (lu_factor.shape[0], lu_factor.shape[1])
522
+ self.dtype = np.float64 # Assume real for now
523
+
524
+ def solve(self, b):
525
+ if b.ndim == 1:
526
+ return self.lu.solve(b)
527
+ else:
528
+ x = np.zeros_like(b)
529
+ for i in range(b.shape[1]):
530
+ x[:, i] = self.lu.solve(b[:, i])
531
+ return x
532
+
533
+
534
+ # =============================================================================
535
+ # Pure-Python Block QMR Solver
536
+ # =============================================================================
537
+
538
+
539
+ def _blqmr_python_impl(
540
+ A: Union[np.ndarray, sparse.spmatrix],
541
+ B: np.ndarray,
542
+ tol: float = 1e-6,
543
+ maxiter: Optional[int] = None,
544
+ M1=None,
545
+ M2=None,
546
+ x0: Optional[np.ndarray] = None,
547
+ residual: bool = False,
548
+ workspace: Optional[BLQMRWorkspace] = None,
549
+ ) -> Tuple[np.ndarray, int, float, int, np.ndarray]:
550
+ """Native Python Block QMR implementation (internal)."""
551
+ if B.ndim == 1:
552
+ B = B.reshape(-1, 1)
553
+
554
+ n, m = B.shape
555
+ is_complex_input = np.iscomplexobj(A) or np.iscomplexobj(B)
556
+ dtype = np.complex128 if is_complex_input else np.float64
557
+
558
+ if maxiter is None:
559
+ maxiter = min(n, 100)
560
+
561
+ if (
562
+ workspace is None
563
+ or workspace.n != n
564
+ or workspace.m != m
565
+ or workspace.dtype != dtype
566
+ ):
567
+ ws = BLQMRWorkspace(n, m, dtype)
568
+ else:
569
+ ws = workspace
570
+ ws.reset()
571
+
572
+ # Setup preconditioner
573
+ if M1 is not None:
574
+ if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
575
+ precond = SparsePreconditioner(M1, M2)
576
+ elif sparse.issparse(M1):
577
+ precond = SparsePreconditioner(M1, M2)
578
+ elif hasattr(M1, "solve"):
579
+ # Custom preconditioner with .solve() method
580
+ precond = M1 # Use directly
581
+ else:
582
+ precond = DensePreconditioner(M1, M2)
583
+ else:
584
+ precond = None
585
+
586
+ if x0 is None:
587
+ x = np.zeros((n, m), dtype=dtype)
588
+ else:
589
+ x = np.asarray(x0, dtype=dtype).reshape(n, m).copy()
590
+
591
+ # Initialize indices: Fortran t3=mod(0,3)+1=1 -> Python t3=0
592
+ t3 = 0
593
+ t3n = 2
594
+ t3p = 1
595
+
596
+ # Initialize Q matrices (identity)
597
+ ws.Qa[:, :, :] = 0
598
+ ws.Qb[:, :, :] = 0
599
+ ws.Qc[:, :, :] = 0
600
+ ws.Qd[:, :, :] = 0
601
+ ws.Qa[:, :, t3] = np.eye(m, dtype=dtype)
602
+ ws.Qd[:, :, t3n] = np.eye(m, dtype=dtype)
603
+ ws.Qd[:, :, t3] = np.eye(m, dtype=dtype)
604
+
605
+ A_is_sparse = sparse.issparse(A)
606
+ if A_is_sparse:
607
+ ws.vt[:] = B - A @ x
608
+ else:
609
+ np.subtract(B, A @ x, out=ws.vt)
610
+
611
+ if precond is not None:
612
+ precond.solve(ws.vt, out=ws.vt)
613
+ if np.any(np.isnan(ws.vt)):
614
+ return x, 2, 1.0, 0, np.array([])
615
+
616
+ # QQR decomposition
617
+ Q, R = qqr(ws.vt)
618
+ ws.v[:, :, t3p] = Q
619
+ ws.beta[:, :, t3p] = R
620
+
621
+ # Compute omega - standard norm WITH conjugation (Hermitian norm)
622
+ # Fortran: omega(i,i,t3p)=sqrt(sum(conjg(v(:,i,t3p))*v(:,i,t3p)))
623
+ for i in range(m):
624
+ col = ws.v[:, i, t3p]
625
+ if is_complex_input:
626
+ ws.omega[i, i, t3p] = np.sqrt(np.sum(np.conj(col) * col).real)
627
+ else:
628
+ ws.omega[i, i, t3p] = np.sqrt(np.sum(col * col))
629
+
630
+ # taut = omega * beta
631
+ ws.taot[:] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
632
+
633
+ isquasires = not residual
634
+ if isquasires:
635
+ # Fortran: Qres0=maxval(sqrt(sum(abs(conjg(taut)*taut),1))) for complex
636
+ if is_complex_input:
637
+ Qres0 = np.max(np.sqrt(np.sum(np.abs(np.conj(ws.taot) * ws.taot), axis=0)))
638
+ else:
639
+ Qres0 = np.max(np.sqrt(np.sum(ws.taot * ws.taot, axis=0)))
640
+ else:
641
+ omegat = np.zeros((n, m), dtype=dtype)
642
+ for i in range(m):
643
+ if np.abs(ws.omega[i, i, t3p]) > 1e-14:
644
+ omegat[:, i] = ws.v[:, i, t3p] / ws.omega[i, i, t3p]
645
+ if is_complex_input:
646
+ Qres0 = np.max(np.sqrt(np.sum(np.abs(np.conj(ws.vt) * ws.vt), axis=0)))
647
+ else:
648
+ Qres0 = np.max(np.sqrt(np.sum(ws.vt * ws.vt, axis=0)))
649
+
650
+ if Qres0 < 1e-16:
651
+ result = x.real if not is_complex_input else x
652
+ return result, 0, 0.0, 0, np.array([0.0])
653
+
654
+ flag, resv, Qres1, relres, iter_count = 1, np.zeros(maxiter), -1.0, 1.0, 0
655
+
656
+ for k in range(1, maxiter + 1):
657
+ # Index cycling
658
+ t3 = k % 3
659
+ t3p = (k + 1) % 3
660
+ t3n = (k - 1) % 3
661
+ t3nn = (k - 2) % 3
662
+
663
+ # tmp = A * v(:,:,t3)
664
+ if A_is_sparse:
665
+ ws.Av[:] = A @ ws.v[:, :, t3]
666
+ else:
667
+ np.matmul(A, ws.v[:, :, t3], out=ws.Av)
668
+
669
+ # Apply preconditioner
670
+ if precond is not None:
671
+ precond.solve(ws.Av, out=ws.vt)
672
+ ws.vt[:] = ws.vt - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
673
+ else:
674
+ ws.vt[:] = ws.Av - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
675
+
676
+ # alpha = v^T * vt (transpose, not conjugate transpose)
677
+ ws.alpha[:] = ws.v[:, :, t3].T @ ws.vt
678
+ ws.vt[:] = ws.vt - ws.v[:, :, t3] @ ws.alpha
679
+
680
+ # QQR decomposition
681
+ Q, R = qqr(ws.vt)
682
+ ws.v[:, :, t3p] = Q
683
+ ws.beta[:, :, t3p] = R
684
+
685
+ # Compute omega (standard Hermitian norm)
686
+ for i in range(m):
687
+ col = ws.v[:, i, t3p]
688
+ if is_complex_input:
689
+ ws.omega[i, i, t3p] = np.sqrt(np.sum(np.conj(col) * col).real)
690
+ else:
691
+ ws.omega[i, i, t3p] = np.sqrt(np.sum(col * col))
692
+
693
+ # Compute intermediate matrices
694
+ ws.tmp0[:] = ws.omega[:, :, t3n] @ ws.beta[:, :, t3].T
695
+ ws.theta[:] = ws.Qb[:, :, t3nn] @ ws.tmp0
696
+ ws.tmp1[:] = ws.Qd[:, :, t3nn] @ ws.tmp0
697
+ ws.tmp2[:] = ws.omega[:, :, t3] @ ws.alpha
698
+ ws.eta[:] = ws.Qa[:, :, t3n] @ ws.tmp1 + ws.Qb[:, :, t3n] @ ws.tmp2
699
+ ws.zetat[:] = ws.Qc[:, :, t3n] @ ws.tmp1 + ws.Qd[:, :, t3n] @ ws.tmp2
700
+
701
+ # Build ZZ matrix and do standard QR
702
+ ws.stacked[:m, :] = ws.zetat
703
+ ws.stacked[m:, :] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
704
+
705
+ QQ, zeta_full = np.linalg.qr(ws.stacked, mode="complete")
706
+ ws.zeta[:] = zeta_full[:m, :]
707
+
708
+ if is_complex_input:
709
+ ws.QQ_full[:] = np.conj(QQ.T)
710
+ else:
711
+ ws.QQ_full[:] = QQ.T
712
+
713
+ ws.Qa[:, :, t3] = ws.QQ_full[:m, :m]
714
+ ws.Qb[:, :, t3] = ws.QQ_full[:m, m : 2 * m]
715
+ ws.Qc[:, :, t3] = ws.QQ_full[m : 2 * m, :m]
716
+ ws.Qd[:, :, t3] = ws.QQ_full[m : 2 * m, m : 2 * m]
717
+
718
+ # Invert zeta
719
+ try:
720
+ zeta_inv = np.linalg.inv(ws.zeta)
721
+ except np.linalg.LinAlgError:
722
+ zeta_inv = np.linalg.pinv(ws.zeta)
723
+
724
+ # Update p, tau, x, taut
725
+ ws.p[:, :, t3] = (
726
+ ws.v[:, :, t3] - ws.p[:, :, t3n] @ ws.eta - ws.p[:, :, t3nn] @ ws.theta
727
+ ) @ zeta_inv
728
+ ws.tau[:] = ws.Qa[:, :, t3] @ ws.taot
729
+ x[:] = x + ws.p[:, :, t3] @ ws.tau
730
+ ws.taot[:] = ws.Qc[:, :, t3] @ ws.taot
731
+
732
+ # Compute residual
733
+ if isquasires:
734
+ if is_complex_input:
735
+ Qres = np.max(
736
+ np.sqrt(np.sum(np.abs(np.conj(ws.taot) * ws.taot), axis=0))
737
+ )
738
+ else:
739
+ Qres = np.max(np.sqrt(np.sum(ws.taot * ws.taot, axis=0)))
740
+ else:
741
+ tmp0_diag = np.zeros((m, m), dtype=dtype)
742
+ for i in range(m):
743
+ if np.abs(ws.omega[i, i, t3p]) > 1e-14:
744
+ tmp0_diag[i, :] = ws.Qd[:, i, t3] / ws.omega[i, i, t3p]
745
+ if is_complex_input:
746
+ omegat = omegat @ np.conj(ws.Qc[:, :, t3].T) + ws.v[
747
+ :, :, t3p
748
+ ] @ np.conj(tmp0_diag)
749
+ tmp_res = np.conj(omegat @ ws.taot)
750
+ Qres = np.max(
751
+ np.sqrt(np.sum(np.abs(np.conj(tmp_res) * tmp_res), axis=0))
752
+ )
753
+ else:
754
+ omegat = omegat @ ws.Qc[:, :, t3].T + ws.v[:, :, t3p] @ tmp0_diag
755
+ tmp_res = omegat @ ws.taot
756
+ Qres = np.max(np.sqrt(np.sum(tmp_res * tmp_res, axis=0)))
757
+
758
+ resv[k - 1] = Qres
759
+
760
+ if k > 1 and abs(Qres - Qres1) < np.finfo(dtype).eps:
761
+ flag, iter_count = 3, k
762
+ break
763
+
764
+ Qres1, relres, iter_count = Qres, Qres / Qres0, k
765
+
766
+ if relres <= tol:
767
+ flag = 0
768
+ break
769
+
770
+ resv = resv[:iter_count]
771
+ result = x.real if not is_complex_input else x
772
+ return result, flag, relres, iter_count, resv
773
+
774
+
775
+ # =============================================================================
776
+ # High-Level Solver Interface
777
+ # =============================================================================
778
+
779
+
780
+ def blqmr_solve(
781
+ Ap: np.ndarray,
782
+ Ai: np.ndarray,
783
+ Ax: np.ndarray,
784
+ b: np.ndarray,
785
+ *,
786
+ x0: Optional[np.ndarray] = None,
787
+ tol: float = 1e-6,
788
+ maxiter: Optional[int] = None,
789
+ droptol: float = 0.001,
790
+ use_precond: bool = True,
791
+ zero_based: bool = True,
792
+ ) -> BLQMRResult:
793
+ """
794
+ Solve sparse linear system Ax = b using Block QMR algorithm.
795
+
796
+ Uses Fortran extension if available, otherwise falls back to pure Python.
797
+
798
+ Parameters
799
+ ----------
800
+ Ap : ndarray of int32
801
+ Column pointers for CSC format. Length n+1.
802
+ Ai : ndarray of int32
803
+ Row indices for CSC format. Length nnz.
804
+ Ax : ndarray of float64
805
+ Non-zero values. Length nnz.
806
+ b : ndarray of float64
807
+ Right-hand side vector. Length n.
808
+ x0 : ndarray, optional
809
+ Initial guess.
810
+ tol : float, default 1e-6
811
+ Convergence tolerance for relative residual.
812
+ maxiter : int, optional
813
+ Maximum iterations. Default is n.
814
+ droptol : float, default 0.001
815
+ Drop tolerance for ILU preconditioner (Fortran only).
816
+ use_precond : bool, default True
817
+ Whether to use ILU preconditioning.
818
+ zero_based : bool, default True
819
+ If True, Ap and Ai use 0-based indexing (Python/C convention).
820
+ If False, uses 1-based indexing (Fortran convention).
821
+
822
+ Returns
823
+ -------
824
+ BLQMRResult
825
+ Result object containing solution and convergence info.
826
+ """
827
+ n = len(Ap) - 1
828
+
829
+ if maxiter is None:
830
+ maxiter = n
831
+
832
+ if BLQMR_EXT:
833
+ return _blqmr_solve_fortran(
834
+ Ap,
835
+ Ai,
836
+ Ax,
837
+ b,
838
+ x0=x0,
839
+ tol=tol,
840
+ maxiter=maxiter,
841
+ droptol=droptol,
842
+ use_precond=use_precond,
843
+ zero_based=zero_based,
844
+ )
845
+ else:
846
+ return _blqmr_solve_native_csc(
847
+ Ap,
848
+ Ai,
849
+ Ax,
850
+ b,
851
+ x0=x0,
852
+ tol=tol,
853
+ maxiter=maxiter,
854
+ use_precond=use_precond,
855
+ zero_based=zero_based,
856
+ )
857
+
858
+
859
+ def _blqmr_solve_fortran(
860
+ Ap, Ai, Ax, b, *, x0, tol, maxiter, droptol, use_precond, zero_based
861
+ ) -> BLQMRResult:
862
+ """Fortran backend for blqmr_solve."""
863
+ n = len(Ap) - 1
864
+ nnz = len(Ax)
865
+
866
+ Ap = np.asfortranarray(Ap, dtype=np.int32)
867
+ Ai = np.asfortranarray(Ai, dtype=np.int32)
868
+ Ax = np.asfortranarray(Ax, dtype=np.float64)
869
+ b = np.asfortranarray(b, dtype=np.float64)
870
+
871
+ if len(Ai) != nnz:
872
+ raise ValueError(f"Ai length ({len(Ai)}) must match Ax length ({nnz})")
873
+ if len(b) != n:
874
+ raise ValueError(f"b length ({len(b)}) must match matrix size ({n})")
875
+
876
+ if zero_based:
877
+ Ap = Ap + 1
878
+ Ai = Ai + 1
879
+
880
+ dopcond = 1 if use_precond else 0
881
+
882
+ x, flag, niter, relres = _blqmr.blqmr_solve_real(
883
+ n, nnz, Ap, Ai, Ax, b, maxiter, tol, droptol, dopcond
884
+ )
885
+
886
+ return BLQMRResult(
887
+ x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
888
+ )
889
+
890
+
891
+ def _blqmr_solve_native_csc(
892
+ Ap, Ai, Ax, b, *, x0, tol, maxiter, use_precond, zero_based
893
+ ) -> BLQMRResult:
894
+ """Native Python backend for blqmr_solve with CSC input."""
895
+ n = len(Ap) - 1
896
+
897
+ if not zero_based:
898
+ Ap = Ap - 1
899
+ Ai = Ai - 1
900
+
901
+ A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
902
+
903
+ M1 = None
904
+ if use_precond:
905
+ try:
906
+ M1 = make_preconditioner(A, "ilu")
907
+ except Exception:
908
+ try:
909
+ M1 = make_preconditioner(A, "diag") # FIX: Changed A_sp to A
910
+ except Exception:
911
+ M1 = None # Fall back to no preconditioning
912
+
913
+ x, flag, relres, niter, resv = _blqmr_python_impl(
914
+ A, b, tol=tol, maxiter=maxiter, M1=M1, x0=x0
915
+ )
916
+
917
+ if x.ndim > 1:
918
+ x = x.ravel()
919
+
920
+ return BLQMRResult(x=x, flag=flag, iter=niter, relres=relres, resv=resv)
921
+
922
+
923
+ def blqmr_solve_multi(
924
+ Ap: np.ndarray,
925
+ Ai: np.ndarray,
926
+ Ax: np.ndarray,
927
+ B: np.ndarray,
928
+ *,
929
+ tol: float = 1e-6,
930
+ maxiter: Optional[int] = None,
931
+ droptol: float = 0.001,
932
+ use_precond: bool = True,
933
+ zero_based: bool = True,
934
+ ) -> BLQMRResult:
935
+ """
936
+ Solve sparse linear system AX = B with multiple right-hand sides.
937
+
938
+ Uses Fortran extension if available, otherwise falls back to pure Python.
939
+ """
940
+ n = len(Ap) - 1
941
+
942
+ if maxiter is None:
943
+ maxiter = n
944
+
945
+ if BLQMR_EXT:
946
+ return _blqmr_solve_multi_fortran(
947
+ Ap,
948
+ Ai,
949
+ Ax,
950
+ B,
951
+ tol=tol,
952
+ maxiter=maxiter,
953
+ droptol=droptol,
954
+ use_precond=use_precond,
955
+ zero_based=zero_based,
956
+ )
957
+ else:
958
+ return _blqmr_solve_multi_native(
959
+ Ap,
960
+ Ai,
961
+ Ax,
962
+ B,
963
+ tol=tol,
964
+ maxiter=maxiter,
965
+ use_precond=use_precond,
966
+ zero_based=zero_based,
967
+ )
968
+
969
+
970
+ def _blqmr_solve_multi_fortran(
971
+ Ap, Ai, Ax, B, *, tol, maxiter, droptol, use_precond, zero_based
972
+ ) -> BLQMRResult:
973
+ """Fortran backend for blqmr_solve_multi."""
974
+ n = len(Ap) - 1
975
+ nnz = len(Ax)
976
+
977
+ Ap = np.asfortranarray(Ap, dtype=np.int32)
978
+ Ai = np.asfortranarray(Ai, dtype=np.int32)
979
+ Ax = np.asfortranarray(Ax, dtype=np.float64)
980
+ B = np.asfortranarray(B, dtype=np.float64)
981
+
982
+ if B.ndim == 1:
983
+ B = B.reshape(-1, 1, order="F")
984
+ nrhs = B.shape[1]
985
+
986
+ if zero_based:
987
+ Ap = Ap + 1
988
+ Ai = Ai + 1
989
+
990
+ dopcond = 1 if use_precond else 0
991
+
992
+ X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
993
+ n, nnz, nrhs, Ap, Ai, Ax, B, maxiter, tol, droptol, dopcond
994
+ )
995
+
996
+ return BLQMRResult(
997
+ x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
998
+ )
999
+
1000
+
1001
+ def _blqmr_solve_multi_native(
1002
+ Ap, Ai, Ax, B, *, tol, maxiter, use_precond, zero_based
1003
+ ) -> BLQMRResult:
1004
+ """Native Python backend for blqmr_solve_multi."""
1005
+ n = len(Ap) - 1
1006
+
1007
+ if not zero_based:
1008
+ Ap = Ap - 1
1009
+ Ai = Ai - 1
1010
+
1011
+ A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
1012
+
1013
+ M1 = None
1014
+ if use_precond:
1015
+ try:
1016
+ M1 = make_preconditioner(A, "ilu")
1017
+ except Exception:
1018
+ try:
1019
+ M1 = make_preconditioner(A, "diag") # FIX: Changed A_sp to A
1020
+ except Exception:
1021
+ M1 = None # Fall back to no preconditioning
1022
+
1023
+ if B.ndim == 1:
1024
+ B = B.reshape(-1, 1)
1025
+
1026
+ x, flag, relres, niter, resv = _blqmr_python_impl(
1027
+ A, B, tol=tol, maxiter=maxiter, M1=M1
1028
+ )
1029
+
1030
+ return BLQMRResult(x=x, flag=flag, iter=niter, relres=relres, resv=resv)
1031
+
1032
+
1033
+ def blqmr_scipy(
1034
+ A,
1035
+ b: np.ndarray,
1036
+ x0: Optional[np.ndarray] = None,
1037
+ tol: float = 1e-6,
1038
+ maxiter: Optional[int] = None,
1039
+ M=None,
1040
+ **kwargs,
1041
+ ) -> Tuple[np.ndarray, int]:
1042
+ """
1043
+ SciPy-compatible interface for BLQMR solver.
1044
+
1045
+ Parameters
1046
+ ----------
1047
+ A : sparse matrix or ndarray
1048
+ System matrix
1049
+ b : ndarray
1050
+ Right-hand side vector
1051
+ x0 : ndarray, optional
1052
+ Initial guess
1053
+ tol : float
1054
+ Convergence tolerance
1055
+ maxiter : int, optional
1056
+ Maximum iterations
1057
+ M : preconditioner, optional
1058
+ Preconditioner (used as M1 for Python backend)
1059
+ **kwargs
1060
+ Additional arguments passed to blqmr()
1061
+
1062
+ Returns
1063
+ -------
1064
+ x : ndarray
1065
+ Solution vector
1066
+ flag : int
1067
+ Convergence flag (0 = converged)
1068
+ """
1069
+ result = blqmr(A, b, x0=x0, tol=tol, maxiter=maxiter, M1=M, **kwargs)
1070
+ return result.x, result.flag
1071
+
1072
+
1073
+ def blqmr(
1074
+ A: Union[np.ndarray, sparse.spmatrix],
1075
+ B: np.ndarray,
1076
+ tol: float = 1e-6,
1077
+ maxiter: Optional[int] = None,
1078
+ M1=None,
1079
+ M2=None,
1080
+ x0: Optional[np.ndarray] = None,
1081
+ residual: bool = False,
1082
+ workspace: Optional[BLQMRWorkspace] = None,
1083
+ droptol: float = 0.001,
1084
+ use_precond: bool = True,
1085
+ ) -> BLQMRResult:
1086
+ """
1087
+ Block Quasi-Minimal-Residual (BL-QMR) solver - main interface.
1088
+
1089
+ Uses Fortran extension if available, otherwise falls back to pure Python.
1090
+
1091
+ Parameters
1092
+ ----------
1093
+ A : ndarray or sparse matrix
1094
+ Symmetric n x n matrix (can be complex)
1095
+ B : ndarray
1096
+ Right-hand side vector/matrix (n,) or (n x m)
1097
+ tol : float
1098
+ Convergence tolerance (default: 1e-6)
1099
+ maxiter : int, optional
1100
+ Maximum iterations (default: n for Fortran, min(n, 20) for Python)
1101
+ M1, M2 : preconditioner, optional
1102
+ Preconditioner M = M1 @ M2 (Python backend only)
1103
+ x0 : ndarray, optional
1104
+ Initial guess
1105
+ residual : bool
1106
+ If True, use true residual for convergence (Python backend only)
1107
+ workspace : BLQMRWorkspace, optional
1108
+ Pre-allocated workspace (Python backend only)
1109
+ droptol : float, default 0.001
1110
+ Drop tolerance for ILU preconditioner (Fortran backend only)
1111
+ use_precond : bool, default True
1112
+ Whether to use ILU preconditioning (Fortran backend only)
1113
+
1114
+ Returns
1115
+ -------
1116
+ BLQMRResult
1117
+ Result object containing:
1118
+ - x: Solution array
1119
+ - flag: 0 = converged, 1 = max iterations, 2 = preconditioner singular, 3 = stagnated
1120
+ - iter: Number of iterations
1121
+ - relres: Final relative residual
1122
+ - resv: Residual history (Python backend only)
1123
+ """
1124
+ if BLQMR_EXT:
1125
+ return _blqmr_fortran(
1126
+ A,
1127
+ B,
1128
+ tol=tol,
1129
+ maxiter=maxiter,
1130
+ x0=x0,
1131
+ droptol=droptol,
1132
+ use_precond=use_precond,
1133
+ )
1134
+ else:
1135
+ return _blqmr_native(
1136
+ A,
1137
+ B,
1138
+ tol=tol,
1139
+ maxiter=maxiter,
1140
+ M1=M1,
1141
+ M2=M2,
1142
+ x0=x0,
1143
+ residual=residual,
1144
+ workspace=workspace,
1145
+ use_precond=use_precond,
1146
+ )
1147
+
1148
+
1149
+ def _blqmr_fortran(
1150
+ A: Union[np.ndarray, sparse.spmatrix],
1151
+ B: np.ndarray,
1152
+ *,
1153
+ tol: float,
1154
+ maxiter: Optional[int],
1155
+ x0: Optional[np.ndarray],
1156
+ droptol: float,
1157
+ use_precond: bool,
1158
+ ) -> BLQMRResult:
1159
+ """Fortran backend for blqmr()."""
1160
+ A_csc = sparse.csc_matrix(A)
1161
+
1162
+ # CRITICAL: Sort indices for UMFPACK compatibility
1163
+ if not A_csc.has_sorted_indices:
1164
+ A_csc.sort_indices()
1165
+
1166
+ Ap = A_csc.indptr.astype(np.int32)
1167
+ Ai = A_csc.indices.astype(np.int32)
1168
+
1169
+ n = A_csc.shape[0]
1170
+ nnz = A_csc.nnz
1171
+
1172
+ if maxiter is None:
1173
+ maxiter = n
1174
+
1175
+ # Convert to Fortran format (1-based indexing)
1176
+ Ap_f = np.asfortranarray(Ap + 1, dtype=np.int32)
1177
+ Ai_f = np.asfortranarray(Ai + 1, dtype=np.int32)
1178
+
1179
+ dopcond = 1 if use_precond else 0
1180
+
1181
+ # Check if complex
1182
+ is_complex = np.iscomplexobj(A) or np.iscomplexobj(B)
1183
+
1184
+ if is_complex:
1185
+ # Complex path
1186
+ Ax_f = np.asfortranarray(A_csc.data, dtype=np.complex128)
1187
+
1188
+ if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
1189
+ # Single RHS
1190
+ b_f = np.asfortranarray(B.ravel(), dtype=np.complex128)
1191
+ x, flag, niter, relres = _blqmr.blqmr_solve_complex(
1192
+ n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, dopcond
1193
+ )
1194
+ return BLQMRResult(
1195
+ x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1196
+ )
1197
+ else:
1198
+ # Multiple RHS - use block method
1199
+ B_f = np.asfortranarray(B, dtype=np.complex128)
1200
+ nrhs = B_f.shape[1]
1201
+ X, flag, niter, relres = _blqmr.blqmr_solve_complex_multi(
1202
+ n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, dopcond
1203
+ )
1204
+ return BLQMRResult(
1205
+ x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1206
+ )
1207
+ else:
1208
+ # Real path
1209
+ Ax_f = np.asfortranarray(A_csc.data, dtype=np.float64)
1210
+
1211
+ if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
1212
+ # Single RHS
1213
+ b_f = np.asfortranarray(B.ravel(), dtype=np.float64)
1214
+ x, flag, niter, relres = _blqmr.blqmr_solve_real(
1215
+ n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, dopcond
1216
+ )
1217
+ return BLQMRResult(
1218
+ x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1219
+ )
1220
+ else:
1221
+ # Multiple RHS - use block method
1222
+ B_f = np.asfortranarray(B, dtype=np.float64)
1223
+ nrhs = B_f.shape[1]
1224
+ X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
1225
+ n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, dopcond
1226
+ )
1227
+ return BLQMRResult(
1228
+ x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1229
+ )
1230
+
1231
+
1232
+ def _blqmr_native(
1233
+ A: Union[np.ndarray, sparse.spmatrix],
1234
+ B: np.ndarray,
1235
+ *,
1236
+ tol: float,
1237
+ maxiter: Optional[int],
1238
+ M1,
1239
+ M2,
1240
+ x0: Optional[np.ndarray],
1241
+ residual: bool,
1242
+ workspace: Optional[BLQMRWorkspace],
1243
+ use_precond: bool,
1244
+ ) -> BLQMRResult:
1245
+ """Native Python backend for blqmr()."""
1246
+ # Auto-create preconditioner if requested and not provided
1247
+ if use_precond and M1 is None:
1248
+ A_sp = sparse.csc_matrix(A) if not sparse.issparse(A) else A
1249
+ try:
1250
+ M1 = make_preconditioner(A_sp, "ilu")
1251
+ except Exception:
1252
+ try:
1253
+ M1 = make_preconditioner(A_sp, "diag")
1254
+ except Exception:
1255
+ M1 = None # Fall back to no preconditioning
1256
+
1257
+ x, flag, relres, niter, resv = _blqmr_python_impl(
1258
+ A,
1259
+ B,
1260
+ tol=tol,
1261
+ maxiter=maxiter,
1262
+ M1=M1,
1263
+ M2=M2,
1264
+ x0=x0,
1265
+ residual=residual,
1266
+ workspace=workspace,
1267
+ )
1268
+
1269
+ # Flatten x if single RHS
1270
+ if x.ndim > 1 and x.shape[1] == 1:
1271
+ x = x.ravel()
1272
+
1273
+ return BLQMRResult(x=x, flag=flag, iter=niter, relres=relres, resv=resv)
1274
+
1275
+
1276
+ # =============================================================================
1277
+ # Test Function
1278
+ # =============================================================================
1279
+
1280
+
1281
+ def _test():
1282
+ """Quick test to verify installation."""
1283
+ print("BLIT BLQMR Test")
1284
+ print("=" * 40)
1285
+ print(f"Fortran backend available: {BLQMR_EXT}")
1286
+ print(f"Numba acceleration available: {HAS_NUMBA}")
1287
+ print(f"Using backend: {'Fortran' if BLQMR_EXT else 'Pure Python'}")
1288
+ print()
1289
+
1290
+ # Build test matrix from CSC components
1291
+ n = 5
1292
+ Ap = np.array([0, 2, 5, 9, 10, 12], dtype=np.int32)
1293
+ Ai = np.array([0, 1, 0, 2, 4, 1, 2, 3, 4, 2, 1, 4], dtype=np.int32)
1294
+ Ax = np.array(
1295
+ [2.0, 3.0, 3.0, -1.0, 4.0, 4.0, -3.0, 1.0, 2.0, 2.0, 6.0, 1.0], dtype=np.float64
1296
+ )
1297
+ b = np.array([8.0, 45.0, -3.0, 3.0, 19.0], dtype=np.float64)
1298
+
1299
+ # Create sparse matrix
1300
+ A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
1301
+
1302
+ print(f"Matrix: {n}x{n}, nnz={len(Ax)}")
1303
+ print(f"b: {b}")
1304
+ print("\nCalling blqmr()...")
1305
+
1306
+ result = blqmr(A, b, tol=1e-8)
1307
+
1308
+ print(f"\n{result}")
1309
+ print(f"Solution: {result.x}")
1310
+
1311
+ # Verify
1312
+ res = np.linalg.norm(A @ result.x - b)
1313
+ print(f"||Ax - b|| = {res:.2e}")
1314
+
1315
+ return result.converged
1316
+
1317
+
1318
+ if __name__ == "__main__":
1319
+ _test()