blocksolver 0.8.1__py3-none-any.whl → 0.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
blocksolver/blqmr.py CHANGED
@@ -50,7 +50,7 @@ try:
50
50
  from numba import njit
51
51
 
52
52
  HAS_NUMBA = True
53
- except ImportError:
53
+ except (ImportError, Exception) as e:
54
54
  HAS_NUMBA = False
55
55
 
56
56
  def njit(*args, **kwargs):
@@ -134,6 +134,52 @@ def _qqr_kernel_real(Q, R, n, m):
134
134
  Q[i, k] -= Q[i, j] * dot
135
135
 
136
136
 
137
+ @njit(cache=True)
138
+ def _qqr_kernel_complex(Q, R, n, m):
139
+ """Numba-accelerated quasi-QR kernel for complex arrays."""
140
+ for j in range(m):
141
+ # Quasi inner product: sum(q*q) WITHOUT conjugation
142
+ r_jj_sq = 0.0j
143
+ for i in range(n):
144
+ r_jj_sq += Q[i, j] * Q[i, j] # No conjugation!
145
+ r_jj = np.sqrt(r_jj_sq)
146
+ R[j, j] = r_jj
147
+ if abs(r_jj) > 1e-14:
148
+ inv_r_jj = 1.0 / r_jj
149
+ for i in range(n):
150
+ Q[i, j] *= inv_r_jj
151
+ for k in range(j + 1, m):
152
+ # Quasi inner product: sum(q_j * q_k) WITHOUT conjugation
153
+ dot = 0.0j
154
+ for i in range(n):
155
+ dot += Q[i, j] * Q[i, k] # No conjugation!
156
+ R[j, k] = dot
157
+ for i in range(n):
158
+ Q[i, k] -= Q[i, j] * dot
159
+
160
+
161
+ @njit(cache=True)
162
+ def _qqr_kernel_real(Q, R, n, m):
163
+ """Numba-accelerated quasi-QR kernel for real arrays."""
164
+ for j in range(m):
165
+ r_jj_sq = 0.0
166
+ for i in range(n):
167
+ r_jj_sq += Q[i, j] * Q[i, j]
168
+ r_jj = np.sqrt(r_jj_sq)
169
+ R[j, j] = r_jj
170
+ if abs(r_jj) > 1e-14:
171
+ inv_r_jj = 1.0 / r_jj
172
+ for i in range(n):
173
+ Q[i, j] *= inv_r_jj
174
+ for k in range(j + 1, m):
175
+ dot = 0.0
176
+ for i in range(n):
177
+ dot += Q[i, j] * Q[i, k]
178
+ R[j, k] = dot
179
+ for i in range(n):
180
+ Q[i, k] -= Q[i, j] * dot
181
+
182
+
137
183
  def qqr(
138
184
  A: np.ndarray, tol: float = 0, use_numba: bool = True
139
185
  ) -> Tuple[np.ndarray, np.ndarray]:
@@ -173,14 +219,20 @@ def qqr(
173
219
  else:
174
220
  for j in range(m):
175
221
  qj = Q[:, j]
176
- r_jj_sq = np.dot(qj, qj)
222
+ # CRITICAL FIX: Use sum(qj * qj) NOT np.dot(qj, qj)
223
+ # np.dot conjugates the first argument for complex arrays!
224
+ # Fortran: R(k,k)=dsqrt(sum(Q(:,k)*Q(:,k))) - no conjugation
225
+ r_jj_sq = np.sum(qj * qj) # Quasi inner product - NO conjugation
177
226
  r_jj = np.sqrt(r_jj_sq)
178
227
  R[j, j] = r_jj
179
228
  if np.abs(r_jj) > 1e-14:
180
229
  Q[:, j] *= 1.0 / r_jj
181
230
  if j < m - 1:
182
- R[j, j + 1 :] = np.dot(Q[:, j], Q[:, j + 1 :])
183
- Q[:, j + 1 :] -= np.outer(Q[:, j], R[j, j + 1 :])
231
+ # CRITICAL FIX: Quasi inner product for off-diagonal
232
+ # Fortran: R(k,j)=sum(Q(:,k)*Q(:,j)) - no conjugation
233
+ for k in range(j + 1, m):
234
+ R[j, k] = np.sum(Q[:, j] * Q[:, k]) # NO conjugation
235
+ Q[:, k] -= R[j, k] * Q[:, j]
184
236
 
185
237
  return Q, R
186
238
 
@@ -217,8 +269,12 @@ class SparsePreconditioner:
217
269
 
218
270
  def __init__(self, M1, M2=None):
219
271
  self.is_two_part = M2 is not None
220
- self.is_ilu1 = isinstance(M1, _ILUPreconditioner)
221
- self.is_ilu2 = isinstance(M2, _ILUPreconditioner) if M2 is not None else False
272
+ self.is_ilu1 = isinstance(M1, (_ILUPreconditioner, _LUPreconditioner))
273
+ self.is_ilu2 = (
274
+ isinstance(M2, (_ILUPreconditioner, _LUPreconditioner))
275
+ if M2 is not None
276
+ else False
277
+ )
222
278
 
223
279
  if M1 is not None:
224
280
  if self.is_ilu1:
@@ -381,8 +437,101 @@ class BLQMRWorkspace:
381
437
  # Preconditioner Factory
382
438
  # =============================================================================
383
439
 
440
+ # Type alias for precond_type
441
+ PrecondType = Optional[Union[str, int]]
384
442
 
385
- def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
443
+
444
+ def _parse_precond_type_for_fortran(precond_type: PrecondType) -> int:
445
+ """
446
+ Convert precond_type to Fortran integer code.
447
+
448
+ Returns
449
+ -------
450
+ int
451
+ 0 = no preconditioning
452
+ 2 = ILU
453
+ 3 = diagonal/Jacobi
454
+ """
455
+ if precond_type is None or precond_type == "" or precond_type is False:
456
+ return 0
457
+
458
+ if isinstance(precond_type, int):
459
+ return precond_type
460
+
461
+ if isinstance(precond_type, str):
462
+ precond_lower = precond_type.lower()
463
+ if precond_lower in ("ilu", "ilu0", "ilut"):
464
+ return 2
465
+ elif precond_lower in ("diag", "jacobi"):
466
+ return 3
467
+ else:
468
+ # Unknown string, default to no preconditioning
469
+ warnings.warn(
470
+ f"Unknown precond_type '{precond_type}' for Fortran backend, using no preconditioning"
471
+ )
472
+ return 0
473
+
474
+ return 0
475
+
476
+
477
+ def _get_preconditioner_for_native(A, precond_type: PrecondType, M1_provided):
478
+ """
479
+ Create preconditioner for native Python backend.
480
+
481
+ Parameters
482
+ ----------
483
+ A : sparse matrix
484
+ System matrix
485
+ precond_type : None, '', str, or int
486
+ Preconditioner type specification
487
+ M1_provided : preconditioner or None
488
+ User-provided preconditioner (takes precedence)
489
+
490
+ Returns
491
+ -------
492
+ M1 : preconditioner or None
493
+ """
494
+ # If user provided M1, use it
495
+ if M1_provided is not None:
496
+ return M1_provided
497
+
498
+ # No preconditioning requested
499
+ if precond_type is None or precond_type == "" or precond_type is False:
500
+ return None
501
+
502
+ # Integer codes (for compatibility)
503
+ if isinstance(precond_type, int):
504
+ if precond_type == 0:
505
+ return None
506
+ elif precond_type == 2:
507
+ precond_str = "ilu"
508
+ elif precond_type == 3:
509
+ precond_str = "diag"
510
+ else:
511
+ precond_str = "ilu" # Default to ILU for other integers
512
+ else:
513
+ precond_str = precond_type
514
+
515
+ # Create preconditioner
516
+ try:
517
+ return make_preconditioner(A, precond_str)
518
+ except Exception as e:
519
+ # Fallback chain: try diag if ilu fails
520
+ if precond_str not in ("diag", "jacobi"):
521
+ try:
522
+ warnings.warn(
523
+ f"Preconditioner '{precond_str}' failed: {e}, falling back to diagonal"
524
+ )
525
+ return make_preconditioner(A, "diag")
526
+ except Exception:
527
+ pass
528
+ warnings.warn(f"All preconditioners failed, proceeding without preconditioning")
529
+ return None
530
+
531
+
532
+ def make_preconditioner(
533
+ A: sparse.spmatrix, precond_type: str = "diag", split: bool = False, **kwargs
534
+ ):
386
535
  """
387
536
  Create a preconditioner for iterative solvers.
388
537
 
@@ -392,29 +541,70 @@ def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
392
541
  System matrix
393
542
  precond_type : str
394
543
  'diag' or 'jacobi': Diagonal (Jacobi) preconditioner
395
- 'ilu' or 'ilu0': Incomplete LU
396
- 'ssor': Symmetric SOR
544
+ 'ilu' or 'ilu0': Incomplete LU with minimal fill
545
+ 'ilut': Incomplete LU with threshold
546
+ 'lu': Full LU factorization
547
+ split : bool
548
+ If True, return sqrt(D) for split preconditioning (M1=M2=sqrt(D))
549
+ If False, return D for left preconditioning
550
+ **kwargs : dict
551
+ Additional parameters
397
552
 
398
553
  Returns
399
554
  -------
400
555
  M : preconditioner object
401
- Preconditioner (use as M1 in blqmr)
556
+ For split Jacobi, use as: blqmr(A, b, M1=M, M2=M)
402
557
  """
403
558
  if precond_type in ("diag", "jacobi"):
404
559
  diag = A.diagonal().copy()
405
560
  diag[np.abs(diag) < 1e-14] = 1.0
406
- return sparse.diags(diag, format="csr")
407
561
 
408
- elif precond_type in ("ilu", "ilu0"):
562
+ if split:
563
+ # For split preconditioning: return sqrt(D)
564
+ # Usage: M1 = M2 = sqrt(D), gives D^{-1/2} A D^{-1/2}
565
+ sqrt_diag = np.sqrt(diag)
566
+ return sparse.diags(sqrt_diag, format="csr")
567
+ else:
568
+ # For left preconditioning: return D
569
+ # Usage: M1 = D, M2 = None, gives D^{-1} A
570
+ return sparse.diags(diag, format="csr")
571
+
572
+ elif precond_type == "ilu0":
573
+ # ILU(0) - no fill-in, fast but may be poor quality
409
574
  try:
410
575
  ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
411
576
  return _ILUPreconditioner(ilu)
412
577
  except Exception as e:
413
- warnings.warn(f"ILU factorization failed: {e}, falling back to diagonal")
578
+ warnings.warn(f"ILU(0) factorization failed: {e}, falling back to diagonal")
414
579
  return make_preconditioner(A, "diag")
415
580
 
581
+ elif precond_type in ("ilu", "ilut"):
582
+ # ILUT - ILU with threshold, better quality (similar to UMFPACK)
583
+ drop_tol = kwargs.get("drop_tol", 1e-4)
584
+ fill_factor = kwargs.get("fill_factor", 10)
585
+ try:
586
+ ilu = spilu(A.tocsc(), drop_tol=drop_tol, fill_factor=fill_factor)
587
+ return _ILUPreconditioner(ilu)
588
+ except Exception as e:
589
+ warnings.warn(f"ILUT factorization failed: {e}, trying ILU(0)")
590
+ try:
591
+ ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
592
+ return _ILUPreconditioner(ilu)
593
+ except Exception as e2:
594
+ warnings.warn(f"ILU(0) also failed: {e2}, falling back to diagonal")
595
+ return make_preconditioner(A, "diag")
596
+
597
+ elif precond_type == "lu":
598
+ # Full LU - exact factorization (for reference/debugging)
599
+ try:
600
+ lu = splu(A.tocsc())
601
+ return _LUPreconditioner(lu)
602
+ except Exception as e:
603
+ warnings.warn(f"LU factorization failed: {e}, falling back to ILUT")
604
+ return make_preconditioner(A, "ilut")
605
+
416
606
  elif precond_type == "ssor":
417
- omega = 1.0
607
+ omega = kwargs.get("omega", 1.0)
418
608
  D = sparse.diags(A.diagonal(), format="csr")
419
609
  L = sparse.tril(A, k=-1, format="csr")
420
610
  return (D + omega * L).tocsr()
@@ -423,6 +613,24 @@ def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
423
613
  raise ValueError(f"Unknown preconditioner type: {precond_type}")
424
614
 
425
615
 
616
+ class _LUPreconditioner:
617
+ """Wrapper for full LU preconditioner."""
618
+
619
+ def __init__(self, lu_factor):
620
+ self.lu = lu_factor
621
+ self.shape = (lu_factor.shape[0], lu_factor.shape[1])
622
+ self.dtype = np.float64 # Assume real for now
623
+
624
+ def solve(self, b):
625
+ if b.ndim == 1:
626
+ return self.lu.solve(b)
627
+ else:
628
+ x = np.zeros_like(b)
629
+ for i in range(b.shape[1]):
630
+ x[:, i] = self.lu.solve(b[:, i])
631
+ return x
632
+
633
+
426
634
  # =============================================================================
427
635
  # Pure-Python Block QMR Solver
428
636
  # =============================================================================
@@ -448,7 +656,7 @@ def _blqmr_python_impl(
448
656
  dtype = np.complex128 if is_complex_input else np.float64
449
657
 
450
658
  if maxiter is None:
451
- maxiter = min(n, 20)
659
+ maxiter = min(n, 100)
452
660
 
453
661
  if (
454
662
  workspace is None
@@ -461,23 +669,59 @@ def _blqmr_python_impl(
461
669
  ws = workspace
462
670
  ws.reset()
463
671
 
464
- # Setup preconditioner
465
- if M1 is not None:
466
- if isinstance(M1, _ILUPreconditioner):
467
- precond = SparsePreconditioner(M1, M2)
672
+ # Setup preconditioner - distinguish split vs left-only
673
+ use_split_precond = False
674
+ precond = None
675
+ precond_M1 = None
676
+ precond_M2 = None
677
+
678
+ if M1 is not None and M2 is not None:
679
+ # Split preconditioning: M1⁻¹ A M2⁻¹
680
+ use_split_precond = True
681
+ if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
682
+ precond_M1 = SparsePreconditioner(M1, None)
468
683
  elif sparse.issparse(M1):
469
- precond = SparsePreconditioner(M1, M2)
684
+ precond_M1 = SparsePreconditioner(M1, None)
685
+ elif hasattr(M1, "solve"):
686
+ precond_M1 = M1
470
687
  else:
471
- precond = DensePreconditioner(M1, M2)
472
- else:
473
- precond = None
688
+ precond_M1 = DensePreconditioner(M1, None)
689
+
690
+ if isinstance(M2, (_ILUPreconditioner, _LUPreconditioner)):
691
+ precond_M2 = SparsePreconditioner(M2, None)
692
+ elif sparse.issparse(M2):
693
+ precond_M2 = SparsePreconditioner(M2, None)
694
+ elif hasattr(M2, "solve"):
695
+ precond_M2 = M2
696
+ else:
697
+ precond_M2 = DensePreconditioner(M2, None)
698
+
699
+ elif M1 is not None:
700
+ # Left-only preconditioning: M1⁻¹ A
701
+ if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
702
+ precond = SparsePreconditioner(M1, None)
703
+ elif sparse.issparse(M1):
704
+ precond = SparsePreconditioner(M1, None)
705
+ elif hasattr(M1, "solve"):
706
+ precond = M1
707
+ else:
708
+ precond = DensePreconditioner(M1, None)
474
709
 
475
710
  if x0 is None:
476
711
  x = np.zeros((n, m), dtype=dtype)
477
712
  else:
478
713
  x = np.asarray(x0, dtype=dtype).reshape(n, m).copy()
479
714
 
480
- t3, t3n, t3p, t3nn = 0, 2, 1, 1
715
+ # Initialize indices: Fortran t3=mod(0,3)+1=1 -> Python t3=0
716
+ t3 = 0
717
+ t3n = 2
718
+ t3p = 1
719
+
720
+ # Initialize Q matrices (identity)
721
+ ws.Qa[:, :, :] = 0
722
+ ws.Qb[:, :, :] = 0
723
+ ws.Qc[:, :, :] = 0
724
+ ws.Qd[:, :, :] = 0
481
725
  ws.Qa[:, :, t3] = np.eye(m, dtype=dtype)
482
726
  ws.Qd[:, :, t3n] = np.eye(m, dtype=dtype)
483
727
  ws.Qd[:, :, t3] = np.eye(m, dtype=dtype)
@@ -488,110 +732,190 @@ def _blqmr_python_impl(
488
732
  else:
489
733
  np.subtract(B, A @ x, out=ws.vt)
490
734
 
491
- if precond is not None:
735
+ # Apply preconditioner to initial residual
736
+ if use_split_precond:
737
+ # For split preconditioning, initial residual is just M1⁻¹ * (b - A*x0)
738
+ # because we're solving M1⁻¹ A M2⁻¹ y = M1⁻¹ b with y = M2*x
739
+ ws.vt[:] = precond_M1.solve(ws.vt)
740
+ if np.any(np.isnan(ws.vt)):
741
+ return x, 2, 1.0, 0, np.array([])
742
+ elif precond is not None:
492
743
  precond.solve(ws.vt, out=ws.vt)
493
744
  if np.any(np.isnan(ws.vt)):
494
745
  return x, 2, 1.0, 0, np.array([])
495
746
 
747
+ # QQR decomposition
496
748
  Q, R = qqr(ws.vt)
497
749
  ws.v[:, :, t3p] = Q
498
750
  ws.beta[:, :, t3p] = R
499
751
 
500
- col_norms = np.sqrt(np.einsum("ij,ij->j", Q.conj(), Q).real)
501
- ws.omega[:, :, t3p] = np.diag(col_norms)
502
- np.matmul(ws.omega[:, :, t3p], ws.beta[:, :, t3p], out=ws.taot)
752
+ # Compute omega - standard norm WITH conjugation (Hermitian norm)
753
+ # Fortran: omega(i,i,t3p)=sqrt(sum(conjg(v(:,i,t3p))*v(:,i,t3p)))
754
+ ws.omega[:, :, t3p].fill(0)
755
+ if is_complex_input:
756
+ np.fill_diagonal(
757
+ ws.omega[:, :, t3p],
758
+ np.sqrt(
759
+ np.einsum("ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]).real
760
+ ),
761
+ )
762
+ else:
763
+ np.fill_diagonal(
764
+ ws.omega[:, :, t3p],
765
+ np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
766
+ )
767
+
768
+ # taut = omega * beta
769
+ ws.taot[:] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
503
770
 
504
771
  isquasires = not residual
505
772
  if isquasires:
506
- Qres0 = np.sqrt(np.einsum("ij,ij->j", ws.taot.conj(), ws.taot).real).max()
773
+ # Fortran: Qres0=maxval(sqrt(sum(abs(conjg(taut)*taut),1))) for complex
774
+ if is_complex_input:
775
+ Qres0 = np.max(
776
+ np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
777
+ )
778
+ else:
779
+ Qres0 = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
507
780
  else:
508
- omegat = Q @ np.diag(1.0 / (col_norms + 1e-16))
509
- Qres0 = np.sqrt(np.einsum("ij,ij->j", ws.vt.conj(), ws.vt).real).max()
781
+ omegat = np.zeros((n, m), dtype=dtype)
782
+ for i in range(m):
783
+ if np.abs(ws.omega[i, i, t3p]) > 1e-14:
784
+ omegat[:, i] = ws.v[:, i, t3p] / ws.omega[i, i, t3p]
785
+ if is_complex_input:
786
+ Qres0 = np.max(np.sqrt(np.sum(np.abs(np.conj(ws.vt) * ws.vt), axis=0)))
787
+ else:
788
+ Qres0 = np.max(np.sqrt(np.sum(ws.vt * ws.vt, axis=0)))
510
789
 
511
790
  if Qres0 < 1e-16:
512
791
  result = x.real if not is_complex_input else x
513
792
  return result, 0, 0.0, 0, np.array([0.0])
514
793
 
515
- flag, resv, Qres1, relres, iter_count = 1, np.zeros(maxiter), None, 1.0, 0
516
- omegat = None if isquasires else Q @ np.diag(1.0 / (col_norms + 1e-16))
794
+ flag, resv, Qres1, relres, iter_count = 1, np.zeros(maxiter), -1.0, 1.0, 0
517
795
 
518
796
  for k in range(1, maxiter + 1):
519
- t3, t3n, t3p, t3nn = k % 3, (k - 1) % 3, (k + 1) % 3, (k - 2) % 3
797
+ # Index cycling
798
+ t3 = k % 3
799
+ t3p = (k + 1) % 3
800
+ t3n = (k - 1) % 3
801
+ t3nn = (k - 2) % 3
520
802
 
803
+ # tmp = A * v(:,:,t3)
521
804
  if A_is_sparse:
522
805
  ws.Av[:] = A @ ws.v[:, :, t3]
523
806
  else:
524
807
  np.matmul(A, ws.v[:, :, t3], out=ws.Av)
525
808
 
526
- if precond is not None:
809
+ # Apply preconditioner
810
+ if use_split_precond:
811
+ # Split preconditioning: M1⁻¹ * A * M2⁻¹ * v
812
+ tmp = precond_M2.solve(ws.v[:, :, t3]) # M2⁻¹ * v
813
+ if A_is_sparse:
814
+ tmp = A @ tmp # A * M2⁻¹ * v
815
+ else:
816
+ tmp = np.matmul(A, tmp)
817
+ ws.vt[:] = precond_M1.solve(tmp) - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
818
+ elif precond is not None:
819
+ # Left-only preconditioning: M⁻¹ * A * v
527
820
  precond.solve(ws.Av, out=ws.vt)
528
- ws.vt -= ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
821
+ ws.vt[:] = ws.vt - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
529
822
  else:
530
- np.matmul(ws.v[:, :, t3n], ws.beta[:, :, t3].T, out=ws.vt)
531
- np.subtract(ws.Av, ws.vt, out=ws.vt)
823
+ ws.vt[:] = ws.Av - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
532
824
 
533
- np.matmul(ws.v[:, :, t3].T, ws.vt, out=ws.alpha)
534
- ws.vt -= ws.v[:, :, t3] @ ws.alpha
825
+ # alpha = v^T * vt (transpose, not conjugate transpose)
826
+ ws.alpha[:] = ws.v[:, :, t3].T @ ws.vt
827
+ ws.vt[:] = ws.vt - ws.v[:, :, t3] @ ws.alpha
535
828
 
829
+ # QQR decomposition
536
830
  Q, R = qqr(ws.vt)
537
831
  ws.v[:, :, t3p] = Q
538
832
  ws.beta[:, :, t3p] = R
539
833
 
540
- col_norms = np.sqrt(np.einsum("ij,ij->j", Q.conj(), Q).real)
541
- ws.omega[:, :, t3p] = np.diag(col_norms)
542
-
543
- np.matmul(ws.omega[:, :, t3n], ws.beta[:, :, t3].T, out=ws.tmp0)
544
- np.matmul(ws.Qb[:, :, t3nn], ws.tmp0, out=ws.theta)
545
-
546
- np.matmul(ws.Qd[:, :, t3nn], ws.tmp0, out=ws.tmp1)
547
- np.matmul(ws.omega[:, :, t3], ws.alpha, out=ws.tmp2)
548
- np.matmul(ws.Qa[:, :, t3n], ws.tmp1, out=ws.eta)
549
- ws.eta += ws.Qb[:, :, t3n] @ ws.tmp2
834
+ # Compute omega (standard Hermitian norm)
835
+ ws.omega[:, :, t3p].fill(0)
836
+ if is_complex_input:
837
+ np.fill_diagonal(
838
+ ws.omega[:, :, t3p],
839
+ np.sqrt(
840
+ np.einsum(
841
+ "ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]
842
+ ).real
843
+ ),
844
+ )
845
+ else:
846
+ np.fill_diagonal(
847
+ ws.omega[:, :, t3p],
848
+ np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
849
+ )
550
850
 
551
- np.matmul(ws.Qc[:, :, t3n], ws.tmp1, out=ws.zetat)
552
- ws.zetat += ws.Qd[:, :, t3n] @ ws.tmp2
851
+ # Compute intermediate matrices
852
+ ws.tmp0[:] = ws.omega[:, :, t3n] @ ws.beta[:, :, t3].T
853
+ ws.theta[:] = ws.Qb[:, :, t3nn] @ ws.tmp0
854
+ ws.tmp1[:] = ws.Qd[:, :, t3nn] @ ws.tmp0
855
+ ws.tmp2[:] = ws.omega[:, :, t3] @ ws.alpha
856
+ ws.eta[:] = ws.Qa[:, :, t3n] @ ws.tmp1 + ws.Qb[:, :, t3n] @ ws.tmp2
857
+ ws.zetat[:] = ws.Qc[:, :, t3n] @ ws.tmp1 + ws.Qd[:, :, t3n] @ ws.tmp2
553
858
 
859
+ # Build ZZ matrix and do standard QR
554
860
  ws.stacked[:m, :] = ws.zetat
555
- np.matmul(ws.omega[:, :, t3p], ws.beta[:, :, t3p], out=ws.stacked[m:, :])
861
+ ws.stacked[m:, :] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
556
862
 
557
863
  QQ, zeta_full = np.linalg.qr(ws.stacked, mode="complete")
558
864
  ws.zeta[:] = zeta_full[:m, :]
559
- ws.QQ_full[:] = QQ.conj().T
865
+
866
+ if is_complex_input:
867
+ ws.QQ_full[:] = np.conj(QQ.T)
868
+ else:
869
+ ws.QQ_full[:] = QQ.T
560
870
 
561
871
  ws.Qa[:, :, t3] = ws.QQ_full[:m, :m]
562
872
  ws.Qb[:, :, t3] = ws.QQ_full[:m, m : 2 * m]
563
873
  ws.Qc[:, :, t3] = ws.QQ_full[m : 2 * m, :m]
564
874
  ws.Qd[:, :, t3] = ws.QQ_full[m : 2 * m, m : 2 * m]
565
875
 
876
+ # Invert zeta
566
877
  try:
567
878
  zeta_inv = np.linalg.inv(ws.zeta)
568
879
  except np.linalg.LinAlgError:
569
880
  zeta_inv = np.linalg.pinv(ws.zeta)
570
881
 
882
+ # Update p, tau, x, taut
571
883
  ws.p[:, :, t3] = (
572
884
  ws.v[:, :, t3] - ws.p[:, :, t3n] @ ws.eta - ws.p[:, :, t3nn] @ ws.theta
573
885
  ) @ zeta_inv
886
+ ws.tau[:] = ws.Qa[:, :, t3] @ ws.taot
887
+ x[:] = x + ws.p[:, :, t3] @ ws.tau
888
+ ws.taot[:] = ws.Qc[:, :, t3] @ ws.taot
574
889
 
575
- np.matmul(ws.Qa[:, :, t3], ws.taot, out=ws.tau)
576
- x += ws.p[:, :, t3] @ ws.tau
577
-
578
- taot_copy = ws.taot.copy()
579
- np.matmul(ws.Qc[:, :, t3], taot_copy, out=ws.taot)
580
-
890
+ # Compute residual
581
891
  if isquasires:
582
- Qres = np.sqrt(np.einsum("ij,ij->j", ws.taot.conj(), ws.taot).real).max()
892
+ if is_complex_input:
893
+ Qres = np.max(
894
+ np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
895
+ )
896
+ else:
897
+ Qres = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
583
898
  else:
584
- omega_diag_inv = np.diag(1.0 / (col_norms + 1e-16))
585
- omegat = (
586
- omegat @ ws.Qc[:, :, t3].conj().T
587
- + ws.v[:, :, t3p] @ (ws.Qd[:, :, t3] @ omega_diag_inv).conj().T
588
- )
589
- R_resid = omegat @ ws.taot
590
- Qres = np.sqrt(np.einsum("ij,ij->j", R_resid.conj(), R_resid).real).max()
899
+ tmp0_diag = np.zeros((m, m), dtype=dtype)
900
+ for i in range(m):
901
+ if np.abs(ws.omega[i, i, t3p]) > 1e-14:
902
+ tmp0_diag[i, :] = ws.Qd[:, i, t3] / ws.omega[i, i, t3p]
903
+ if is_complex_input:
904
+ omegat = omegat @ np.conj(ws.Qc[:, :, t3].T) + ws.v[
905
+ :, :, t3p
906
+ ] @ np.conj(tmp0_diag)
907
+ tmp_res = np.conj(omegat @ ws.taot)
908
+ Qres = np.max(
909
+ np.sqrt(np.sum(np.abs(np.conj(tmp_res) * tmp_res), axis=0))
910
+ )
911
+ else:
912
+ omegat = omegat @ ws.Qc[:, :, t3].T + ws.v[:, :, t3p] @ tmp0_diag
913
+ tmp_res = omegat @ ws.taot
914
+ Qres = np.max(np.sqrt(np.sum(tmp_res * tmp_res, axis=0)))
591
915
 
592
916
  resv[k - 1] = Qres
593
917
 
594
- if Qres1 is not None and Qres == Qres1:
918
+ if k > 1 and abs(Qres - Qres1) < np.finfo(dtype).eps:
595
919
  flag, iter_count = 3, k
596
920
  break
597
921
 
@@ -602,6 +926,11 @@ def _blqmr_python_impl(
602
926
  break
603
927
 
604
928
  resv = resv[:iter_count]
929
+
930
+ # For split preconditioning, recover x = M2⁻¹ * y
931
+ if use_split_precond:
932
+ x = precond_M2.solve(x)
933
+
605
934
  result = x.real if not is_complex_input else x
606
935
  return result, flag, relres, iter_count, resv
607
936
 
@@ -621,7 +950,7 @@ def blqmr_solve(
621
950
  tol: float = 1e-6,
622
951
  maxiter: Optional[int] = None,
623
952
  droptol: float = 0.001,
624
- use_precond: bool = True,
953
+ precond_type: PrecondType = "ilu",
625
954
  zero_based: bool = True,
626
955
  ) -> BLQMRResult:
627
956
  """
@@ -647,8 +976,12 @@ def blqmr_solve(
647
976
  Maximum iterations. Default is n.
648
977
  droptol : float, default 0.001
649
978
  Drop tolerance for ILU preconditioner (Fortran only).
650
- use_precond : bool, default True
651
- Whether to use ILU preconditioning.
979
+ precond_type : None, '', or str, default 'ilu'
980
+ Preconditioner type:
981
+ - None or '': No preconditioning
982
+ - 'ilu', 'ilu0', 'ilut': Incomplete LU
983
+ - 'diag', 'jacobi': Diagonal (Jacobi)
984
+ - For Fortran: integers 2 (ILU) or 3 (diagonal) also accepted
652
985
  zero_based : bool, default True
653
986
  If True, Ap and Ai use 0-based indexing (Python/C convention).
654
987
  If False, uses 1-based indexing (Fortran convention).
@@ -673,7 +1006,7 @@ def blqmr_solve(
673
1006
  tol=tol,
674
1007
  maxiter=maxiter,
675
1008
  droptol=droptol,
676
- use_precond=use_precond,
1009
+ precond_type=precond_type,
677
1010
  zero_based=zero_based,
678
1011
  )
679
1012
  else:
@@ -685,13 +1018,13 @@ def blqmr_solve(
685
1018
  x0=x0,
686
1019
  tol=tol,
687
1020
  maxiter=maxiter,
688
- use_precond=use_precond,
1021
+ precond_type=precond_type,
689
1022
  zero_based=zero_based,
690
1023
  )
691
1024
 
692
1025
 
693
1026
  def _blqmr_solve_fortran(
694
- Ap, Ai, Ax, b, *, x0, tol, maxiter, droptol, use_precond, zero_based
1027
+ Ap, Ai, Ax, b, *, x0, tol, maxiter, droptol, precond_type, zero_based
695
1028
  ) -> BLQMRResult:
696
1029
  """Fortran backend for blqmr_solve."""
697
1030
  n = len(Ap) - 1
@@ -711,10 +1044,10 @@ def _blqmr_solve_fortran(
711
1044
  Ap = Ap + 1
712
1045
  Ai = Ai + 1
713
1046
 
714
- dopcond = 1 if use_precond else 0
1047
+ pcond_type = _parse_precond_type_for_fortran(precond_type)
715
1048
 
716
1049
  x, flag, niter, relres = _blqmr.blqmr_solve_real(
717
- n, nnz, Ap, Ai, Ax, b, maxiter, tol, droptol, dopcond
1050
+ n, nnz, Ap, Ai, Ax, b, maxiter, tol, droptol, pcond_type
718
1051
  )
719
1052
 
720
1053
  return BLQMRResult(
@@ -723,7 +1056,7 @@ def _blqmr_solve_fortran(
723
1056
 
724
1057
 
725
1058
  def _blqmr_solve_native_csc(
726
- Ap, Ai, Ax, b, *, x0, tol, maxiter, use_precond, zero_based
1059
+ Ap, Ai, Ax, b, *, x0, tol, maxiter, precond_type, zero_based
727
1060
  ) -> BLQMRResult:
728
1061
  """Native Python backend for blqmr_solve with CSC input."""
729
1062
  n = len(Ap) - 1
@@ -734,12 +1067,7 @@ def _blqmr_solve_native_csc(
734
1067
 
735
1068
  A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
736
1069
 
737
- M1 = None
738
- if use_precond:
739
- try:
740
- M1 = make_preconditioner(A, "ilu")
741
- except Exception:
742
- M1 = make_preconditioner(A, "diag")
1070
+ M1 = _get_preconditioner_for_native(A, precond_type, None)
743
1071
 
744
1072
  x, flag, relres, niter, resv = _blqmr_python_impl(
745
1073
  A, b, tol=tol, maxiter=maxiter, M1=M1, x0=x0
@@ -760,13 +1088,18 @@ def blqmr_solve_multi(
760
1088
  tol: float = 1e-6,
761
1089
  maxiter: Optional[int] = None,
762
1090
  droptol: float = 0.001,
763
- use_precond: bool = True,
1091
+ precond_type: PrecondType = "ilu",
764
1092
  zero_based: bool = True,
765
1093
  ) -> BLQMRResult:
766
1094
  """
767
1095
  Solve sparse linear system AX = B with multiple right-hand sides.
768
1096
 
769
1097
  Uses Fortran extension if available, otherwise falls back to pure Python.
1098
+
1099
+ Parameters
1100
+ ----------
1101
+ precond_type : None, '', or str, default 'ilu'
1102
+ Preconditioner type (see blqmr_solve for details)
770
1103
  """
771
1104
  n = len(Ap) - 1
772
1105
 
@@ -782,7 +1115,7 @@ def blqmr_solve_multi(
782
1115
  tol=tol,
783
1116
  maxiter=maxiter,
784
1117
  droptol=droptol,
785
- use_precond=use_precond,
1118
+ precond_type=precond_type,
786
1119
  zero_based=zero_based,
787
1120
  )
788
1121
  else:
@@ -793,13 +1126,13 @@ def blqmr_solve_multi(
793
1126
  B,
794
1127
  tol=tol,
795
1128
  maxiter=maxiter,
796
- use_precond=use_precond,
1129
+ precond_type=precond_type,
797
1130
  zero_based=zero_based,
798
1131
  )
799
1132
 
800
1133
 
801
1134
  def _blqmr_solve_multi_fortran(
802
- Ap, Ai, Ax, B, *, tol, maxiter, droptol, use_precond, zero_based
1135
+ Ap, Ai, Ax, B, *, tol, maxiter, droptol, precond_type, zero_based
803
1136
  ) -> BLQMRResult:
804
1137
  """Fortran backend for blqmr_solve_multi."""
805
1138
  n = len(Ap) - 1
@@ -818,10 +1151,11 @@ def _blqmr_solve_multi_fortran(
818
1151
  Ap = Ap + 1
819
1152
  Ai = Ai + 1
820
1153
 
821
- dopcond = 1 if use_precond else 0
1154
+ # Convert precond_type string to Fortran integer code
1155
+ pcond_type = _parse_precond_type_for_fortran(precond_type)
822
1156
 
823
1157
  X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
824
- n, nnz, nrhs, Ap, Ai, Ax, B, maxiter, tol, droptol, dopcond
1158
+ n, nnz, nrhs, Ap, Ai, Ax, B, maxiter, tol, droptol, pcond_type
825
1159
  )
826
1160
 
827
1161
  return BLQMRResult(
@@ -830,7 +1164,7 @@ def _blqmr_solve_multi_fortran(
830
1164
 
831
1165
 
832
1166
  def _blqmr_solve_multi_native(
833
- Ap, Ai, Ax, B, *, tol, maxiter, use_precond, zero_based
1167
+ Ap, Ai, Ax, B, *, tol, maxiter, precond_type, zero_based
834
1168
  ) -> BLQMRResult:
835
1169
  """Native Python backend for blqmr_solve_multi."""
836
1170
  n = len(Ap) - 1
@@ -841,12 +1175,7 @@ def _blqmr_solve_multi_native(
841
1175
 
842
1176
  A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
843
1177
 
844
- M1 = None
845
- if use_precond:
846
- try:
847
- M1 = make_preconditioner(A, "ilu")
848
- except Exception:
849
- M1 = make_preconditioner(A, "diag")
1178
+ M1 = _get_preconditioner_for_native(A, precond_type, None)
850
1179
 
851
1180
  if B.ndim == 1:
852
1181
  B = B.reshape(-1, 1)
@@ -909,7 +1238,7 @@ def blqmr(
909
1238
  residual: bool = False,
910
1239
  workspace: Optional[BLQMRWorkspace] = None,
911
1240
  droptol: float = 0.001,
912
- use_precond: bool = True,
1241
+ precond_type: PrecondType = "ilu",
913
1242
  ) -> BLQMRResult:
914
1243
  """
915
1244
  Block Quasi-Minimal-Residual (BL-QMR) solver - main interface.
@@ -925,9 +1254,10 @@ def blqmr(
925
1254
  tol : float
926
1255
  Convergence tolerance (default: 1e-6)
927
1256
  maxiter : int, optional
928
- Maximum iterations (default: n for Fortran, min(n, 20) for Python)
1257
+ Maximum iterations (default: n)
929
1258
  M1, M2 : preconditioner, optional
930
- Preconditioner M = M1 @ M2 (Python backend only)
1259
+ Custom preconditioners. If provided, precond_type is ignored.
1260
+ M = M1 @ M2 for split preconditioning (Python backend only)
931
1261
  x0 : ndarray, optional
932
1262
  Initial guess
933
1263
  residual : bool
@@ -936,8 +1266,13 @@ def blqmr(
936
1266
  Pre-allocated workspace (Python backend only)
937
1267
  droptol : float, default 0.001
938
1268
  Drop tolerance for ILU preconditioner (Fortran backend only)
939
- use_precond : bool, default True
940
- Whether to use ILU preconditioning (Fortran backend only)
1269
+ precond_type : None, '', or str, default 'ilu'
1270
+ Preconditioner type (ignored if M1 is provided):
1271
+ - None or '': No preconditioning
1272
+ - 'ilu', 'ilu0', 'ilut': Incomplete LU
1273
+ - 'diag', 'jacobi': Diagonal (Jacobi)
1274
+ - 'lu': Full LU (expensive, for debugging)
1275
+ - For Fortran: integers 2 (ILU) or 3 (diagonal) also accepted
941
1276
 
942
1277
  Returns
943
1278
  -------
@@ -957,7 +1292,7 @@ def blqmr(
957
1292
  maxiter=maxiter,
958
1293
  x0=x0,
959
1294
  droptol=droptol,
960
- use_precond=use_precond,
1295
+ precond_type=precond_type,
961
1296
  )
962
1297
  else:
963
1298
  return _blqmr_native(
@@ -970,7 +1305,7 @@ def blqmr(
970
1305
  x0=x0,
971
1306
  residual=residual,
972
1307
  workspace=workspace,
973
- use_precond=use_precond,
1308
+ precond_type=precond_type,
974
1309
  )
975
1310
 
976
1311
 
@@ -982,44 +1317,79 @@ def _blqmr_fortran(
982
1317
  maxiter: Optional[int],
983
1318
  x0: Optional[np.ndarray],
984
1319
  droptol: float,
985
- use_precond: bool,
1320
+ precond_type: PrecondType,
986
1321
  ) -> BLQMRResult:
987
1322
  """Fortran backend for blqmr()."""
988
1323
  A_csc = sparse.csc_matrix(A)
1324
+
1325
+ # CRITICAL: Sort indices for UMFPACK compatibility
1326
+ if not A_csc.has_sorted_indices:
1327
+ A_csc.sort_indices()
1328
+
989
1329
  Ap = A_csc.indptr.astype(np.int32)
990
1330
  Ai = A_csc.indices.astype(np.int32)
991
- Ax = A_csc.data.astype(np.float64)
992
1331
 
993
1332
  n = A_csc.shape[0]
994
- nnz = len(Ax)
1333
+ nnz = A_csc.nnz
995
1334
 
996
1335
  if maxiter is None:
997
1336
  maxiter = n
998
1337
 
999
- # Convert to Fortran format
1000
- Ap_f = np.asfortranarray(Ap + 1, dtype=np.int32) # 1-based
1001
- Ai_f = np.asfortranarray(Ai + 1, dtype=np.int32) # 1-based
1002
- Ax_f = np.asfortranarray(Ax, dtype=np.float64)
1338
+ # Convert to Fortran format (1-based indexing)
1339
+ Ap_f = np.asfortranarray(Ap + 1, dtype=np.int32)
1340
+ Ai_f = np.asfortranarray(Ai + 1, dtype=np.int32)
1003
1341
 
1004
- dopcond = 1 if use_precond else 0
1342
+ pcond_type = _parse_precond_type_for_fortran(precond_type)
1005
1343
 
1006
- if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
1007
- b = np.asfortranarray(B.ravel(), dtype=np.float64)
1008
- x, flag, niter, relres = _blqmr.blqmr_solve_real(
1009
- n, nnz, Ap_f, Ai_f, Ax_f, b, maxiter, tol, droptol, dopcond
1010
- )
1011
- return BLQMRResult(
1012
- x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1013
- )
1344
+ # Check if complex
1345
+ is_complex = np.iscomplexobj(A) or np.iscomplexobj(B)
1346
+
1347
+ if is_complex:
1348
+ # Complex path
1349
+ Ax_f = np.asfortranarray(A_csc.data, dtype=np.complex128)
1350
+
1351
+ if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
1352
+ # Single RHS
1353
+ b_f = np.asfortranarray(B.ravel(), dtype=np.complex128)
1354
+ x, flag, niter, relres = _blqmr.blqmr_solve_complex(
1355
+ n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, pcond_type
1356
+ )
1357
+ return BLQMRResult(
1358
+ x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1359
+ )
1360
+ else:
1361
+ # Multiple RHS - use block method
1362
+ B_f = np.asfortranarray(B, dtype=np.complex128)
1363
+ nrhs = B_f.shape[1]
1364
+ X, flag, niter, relres = _blqmr.blqmr_solve_complex_multi(
1365
+ n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, pcond_type
1366
+ )
1367
+ return BLQMRResult(
1368
+ x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1369
+ )
1014
1370
  else:
1015
- B_f = np.asfortranarray(B, dtype=np.float64)
1016
- nrhs = B_f.shape[1]
1017
- X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
1018
- n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, dopcond
1019
- )
1020
- return BLQMRResult(
1021
- x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1022
- )
1371
+ # Real path
1372
+ Ax_f = np.asfortranarray(A_csc.data, dtype=np.float64)
1373
+
1374
+ if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
1375
+ # Single RHS
1376
+ b_f = np.asfortranarray(B.ravel(), dtype=np.float64)
1377
+ x, flag, niter, relres = _blqmr.blqmr_solve_real(
1378
+ n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, pcond_type
1379
+ )
1380
+ return BLQMRResult(
1381
+ x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1382
+ )
1383
+ else:
1384
+ # Multiple RHS - use block method
1385
+ B_f = np.asfortranarray(B, dtype=np.float64)
1386
+ nrhs = B_f.shape[1]
1387
+ X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
1388
+ n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, pcond_type
1389
+ )
1390
+ return BLQMRResult(
1391
+ x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
1392
+ )
1023
1393
 
1024
1394
 
1025
1395
  def _blqmr_native(
@@ -1033,16 +1403,13 @@ def _blqmr_native(
1033
1403
  x0: Optional[np.ndarray],
1034
1404
  residual: bool,
1035
1405
  workspace: Optional[BLQMRWorkspace],
1036
- use_precond: bool,
1406
+ precond_type: PrecondType,
1037
1407
  ) -> BLQMRResult:
1038
1408
  """Native Python backend for blqmr()."""
1039
- # Auto-create preconditioner if requested and not provided
1040
- if use_precond and M1 is None:
1409
+ # Get preconditioner (user-provided M1 takes precedence)
1410
+ if M1 is None:
1041
1411
  A_sp = sparse.csc_matrix(A) if not sparse.issparse(A) else A
1042
- try:
1043
- M1 = make_preconditioner(A_sp, "ilu")
1044
- except Exception:
1045
- M1 = make_preconditioner(A_sp, "diag")
1412
+ M1 = _get_preconditioner_for_native(A_sp, precond_type, None)
1046
1413
 
1047
1414
  x, flag, relres, niter, resv = _blqmr_python_impl(
1048
1415
  A,