blocksolver 0.8.3__cp38-cp38-win_amd64.whl → 0.8.5__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
blocksolver/__init__.py CHANGED
@@ -39,7 +39,7 @@ from .blqmr import (
39
39
  HAS_NUMBA,
40
40
  )
41
41
 
42
- __version__ = "0.8.3"
42
+ __version__ = "0.8.5"
43
43
  __author__ = "Qianqian Fang"
44
44
 
45
45
  __all__ = [
Binary file
blocksolver/blqmr.py CHANGED
@@ -437,8 +437,101 @@ class BLQMRWorkspace:
437
437
  # Preconditioner Factory
438
438
  # =============================================================================
439
439
 
440
+ # Type alias for precond_type
441
+ PrecondType = Optional[Union[str, int]]
440
442
 
441
- def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag", **kwargs):
443
+
444
+ def _parse_precond_type_for_fortran(precond_type: PrecondType) -> int:
445
+ """
446
+ Convert precond_type to Fortran integer code.
447
+
448
+ Returns
449
+ -------
450
+ int
451
+ 0 = no preconditioning
452
+ 2 = ILU
453
+ 3 = diagonal/Jacobi
454
+ """
455
+ if precond_type is None or precond_type == "" or precond_type is False:
456
+ return 0
457
+
458
+ if isinstance(precond_type, int):
459
+ return precond_type
460
+
461
+ if isinstance(precond_type, str):
462
+ precond_lower = precond_type.lower()
463
+ if precond_lower in ("ilu", "ilu0", "ilut"):
464
+ return 2
465
+ elif precond_lower in ("diag", "jacobi"):
466
+ return 3
467
+ else:
468
+ # Unknown string, default to no preconditioning
469
+ warnings.warn(
470
+ f"Unknown precond_type '{precond_type}' for Fortran backend, using no preconditioning"
471
+ )
472
+ return 0
473
+
474
+ return 0
475
+
476
+
477
+ def _get_preconditioner_for_native(A, precond_type: PrecondType, M1_provided):
478
+ """
479
+ Create preconditioner for native Python backend.
480
+
481
+ Parameters
482
+ ----------
483
+ A : sparse matrix
484
+ System matrix
485
+ precond_type : None, '', str, or int
486
+ Preconditioner type specification
487
+ M1_provided : preconditioner or None
488
+ User-provided preconditioner (takes precedence)
489
+
490
+ Returns
491
+ -------
492
+ M1 : preconditioner or None
493
+ """
494
+ # If user provided M1, use it
495
+ if M1_provided is not None:
496
+ return M1_provided
497
+
498
+ # No preconditioning requested
499
+ if precond_type is None or precond_type == "" or precond_type is False:
500
+ return None
501
+
502
+ # Integer codes (for compatibility)
503
+ if isinstance(precond_type, int):
504
+ if precond_type == 0:
505
+ return None
506
+ elif precond_type == 2:
507
+ precond_str = "ilu"
508
+ elif precond_type == 3:
509
+ precond_str = "diag"
510
+ else:
511
+ precond_str = "ilu" # Default to ILU for other integers
512
+ else:
513
+ precond_str = precond_type
514
+
515
+ # Create preconditioner
516
+ try:
517
+ return make_preconditioner(A, precond_str)
518
+ except Exception as e:
519
+ # Fallback chain: try diag if ilu fails
520
+ if precond_str not in ("diag", "jacobi"):
521
+ try:
522
+ warnings.warn(
523
+ f"Preconditioner '{precond_str}' failed: {e}, falling back to diagonal"
524
+ )
525
+ return make_preconditioner(A, "diag")
526
+ except Exception:
527
+ pass
528
+ warnings.warn(f"All preconditioners failed, proceeding without preconditioning")
529
+ return None
530
+
531
+
532
+ def make_preconditioner(
533
+ A: sparse.spmatrix, precond_type: str = "diag", split: bool = False, **kwargs
534
+ ):
442
535
  """
443
536
  Create a preconditioner for iterative solvers.
444
537
 
@@ -449,25 +542,32 @@ def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag", **kwargs
449
542
  precond_type : str
450
543
  'diag' or 'jacobi': Diagonal (Jacobi) preconditioner
451
544
  'ilu' or 'ilu0': Incomplete LU with minimal fill
452
- 'ilut': Incomplete LU with threshold (better quality)
453
- 'lu': Full LU factorization (exact, use as reference)
454
- 'ssor': Symmetric SOR
545
+ 'ilut': Incomplete LU with threshold
546
+ 'lu': Full LU factorization
547
+ split : bool
548
+ If True, return sqrt(D) for split preconditioning (M1=M2=sqrt(D))
549
+ If False, return D for left preconditioning
455
550
  **kwargs : dict
456
- Additional parameters for ILU:
457
- - drop_tol: Drop tolerance (default: 1e-4 for ilut, 0 for ilu0)
458
- - fill_factor: Fill factor (default: 10 for ilut, 1 for ilu0)
551
+ Additional parameters
459
552
 
460
553
  Returns
461
554
  -------
462
555
  M : preconditioner object
463
- Preconditioner (use as M1 in blqmr)
556
+ For split Jacobi, use as: blqmr(A, b, M1=M, M2=M)
464
557
  """
465
558
  if precond_type in ("diag", "jacobi"):
466
559
  diag = A.diagonal().copy()
467
560
  diag[np.abs(diag) < 1e-14] = 1.0
468
- return sparse.diags(
469
- 1.0 / diag, format="csr"
470
- ) # Return inverse for preconditioning!
561
+
562
+ if split:
563
+ # For split preconditioning: return sqrt(D)
564
+ # Usage: M1 = M2 = sqrt(D), gives D^{-1/2} A D^{-1/2}
565
+ sqrt_diag = np.sqrt(diag)
566
+ return sparse.diags(sqrt_diag, format="csr")
567
+ else:
568
+ # For left preconditioning: return D
569
+ # Usage: M1 = D, M2 = None, gives D^{-1} A
570
+ return sparse.diags(diag, format="csr")
471
571
 
472
572
  elif precond_type == "ilu0":
473
573
  # ILU(0) - no fill-in, fast but may be poor quality
@@ -569,19 +669,43 @@ def _blqmr_python_impl(
569
669
  ws = workspace
570
670
  ws.reset()
571
671
 
572
- # Setup preconditioner
573
- if M1 is not None:
672
+ # Setup preconditioner - distinguish split vs left-only
673
+ use_split_precond = False
674
+ precond = None
675
+ precond_M1 = None
676
+ precond_M2 = None
677
+
678
+ if M1 is not None and M2 is not None:
679
+ # Split preconditioning: M1⁻¹ A M2⁻¹
680
+ use_split_precond = True
574
681
  if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
575
- precond = SparsePreconditioner(M1, M2)
682
+ precond_M1 = SparsePreconditioner(M1, None)
576
683
  elif sparse.issparse(M1):
577
- precond = SparsePreconditioner(M1, M2)
684
+ precond_M1 = SparsePreconditioner(M1, None)
578
685
  elif hasattr(M1, "solve"):
579
- # Custom preconditioner with .solve() method
580
- precond = M1 # Use directly
686
+ precond_M1 = M1
581
687
  else:
582
- precond = DensePreconditioner(M1, M2)
583
- else:
584
- precond = None
688
+ precond_M1 = DensePreconditioner(M1, None)
689
+
690
+ if isinstance(M2, (_ILUPreconditioner, _LUPreconditioner)):
691
+ precond_M2 = SparsePreconditioner(M2, None)
692
+ elif sparse.issparse(M2):
693
+ precond_M2 = SparsePreconditioner(M2, None)
694
+ elif hasattr(M2, "solve"):
695
+ precond_M2 = M2
696
+ else:
697
+ precond_M2 = DensePreconditioner(M2, None)
698
+
699
+ elif M1 is not None:
700
+ # Left-only preconditioning: M1⁻¹ A
701
+ if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
702
+ precond = SparsePreconditioner(M1, None)
703
+ elif sparse.issparse(M1):
704
+ precond = SparsePreconditioner(M1, None)
705
+ elif hasattr(M1, "solve"):
706
+ precond = M1
707
+ else:
708
+ precond = DensePreconditioner(M1, None)
585
709
 
586
710
  if x0 is None:
587
711
  x = np.zeros((n, m), dtype=dtype)
@@ -608,7 +732,14 @@ def _blqmr_python_impl(
608
732
  else:
609
733
  np.subtract(B, A @ x, out=ws.vt)
610
734
 
611
- if precond is not None:
735
+ # Apply preconditioner to initial residual
736
+ if use_split_precond:
737
+ # For split preconditioning, initial residual is just M1⁻¹ * (b - A*x0)
738
+ # because we're solving M1⁻¹ A M2⁻¹ y = M1⁻¹ b with y = M2*x
739
+ ws.vt[:] = precond_M1.solve(ws.vt)
740
+ if np.any(np.isnan(ws.vt)):
741
+ return x, 2, 1.0, 0, np.array([])
742
+ elif precond is not None:
612
743
  precond.solve(ws.vt, out=ws.vt)
613
744
  if np.any(np.isnan(ws.vt)):
614
745
  return x, 2, 1.0, 0, np.array([])
@@ -620,12 +751,19 @@ def _blqmr_python_impl(
620
751
 
621
752
  # Compute omega - standard norm WITH conjugation (Hermitian norm)
622
753
  # Fortran: omega(i,i,t3p)=sqrt(sum(conjg(v(:,i,t3p))*v(:,i,t3p)))
623
- for i in range(m):
624
- col = ws.v[:, i, t3p]
625
- if is_complex_input:
626
- ws.omega[i, i, t3p] = np.sqrt(np.sum(np.conj(col) * col).real)
627
- else:
628
- ws.omega[i, i, t3p] = np.sqrt(np.sum(col * col))
754
+ ws.omega[:, :, t3p].fill(0)
755
+ if is_complex_input:
756
+ np.fill_diagonal(
757
+ ws.omega[:, :, t3p],
758
+ np.sqrt(
759
+ np.einsum("ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]).real
760
+ ),
761
+ )
762
+ else:
763
+ np.fill_diagonal(
764
+ ws.omega[:, :, t3p],
765
+ np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
766
+ )
629
767
 
630
768
  # taut = omega * beta
631
769
  ws.taot[:] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
@@ -634,9 +772,11 @@ def _blqmr_python_impl(
634
772
  if isquasires:
635
773
  # Fortran: Qres0=maxval(sqrt(sum(abs(conjg(taut)*taut),1))) for complex
636
774
  if is_complex_input:
637
- Qres0 = np.max(np.sqrt(np.sum(np.abs(np.conj(ws.taot) * ws.taot), axis=0)))
775
+ Qres0 = np.max(
776
+ np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
777
+ )
638
778
  else:
639
- Qres0 = np.max(np.sqrt(np.sum(ws.taot * ws.taot, axis=0)))
779
+ Qres0 = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
640
780
  else:
641
781
  omegat = np.zeros((n, m), dtype=dtype)
642
782
  for i in range(m):
@@ -667,7 +807,16 @@ def _blqmr_python_impl(
667
807
  np.matmul(A, ws.v[:, :, t3], out=ws.Av)
668
808
 
669
809
  # Apply preconditioner
670
- if precond is not None:
810
+ if use_split_precond:
811
+ # Split preconditioning: M1⁻¹ * A * M2⁻¹ * v
812
+ tmp = precond_M2.solve(ws.v[:, :, t3]) # M2⁻¹ * v
813
+ if A_is_sparse:
814
+ tmp = A @ tmp # A * M2⁻¹ * v
815
+ else:
816
+ tmp = np.matmul(A, tmp)
817
+ ws.vt[:] = precond_M1.solve(tmp) - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
818
+ elif precond is not None:
819
+ # Left-only preconditioning: M⁻¹ * A * v
671
820
  precond.solve(ws.Av, out=ws.vt)
672
821
  ws.vt[:] = ws.vt - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
673
822
  else:
@@ -683,12 +832,21 @@ def _blqmr_python_impl(
683
832
  ws.beta[:, :, t3p] = R
684
833
 
685
834
  # Compute omega (standard Hermitian norm)
686
- for i in range(m):
687
- col = ws.v[:, i, t3p]
688
- if is_complex_input:
689
- ws.omega[i, i, t3p] = np.sqrt(np.sum(np.conj(col) * col).real)
690
- else:
691
- ws.omega[i, i, t3p] = np.sqrt(np.sum(col * col))
835
+ ws.omega[:, :, t3p].fill(0)
836
+ if is_complex_input:
837
+ np.fill_diagonal(
838
+ ws.omega[:, :, t3p],
839
+ np.sqrt(
840
+ np.einsum(
841
+ "ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]
842
+ ).real
843
+ ),
844
+ )
845
+ else:
846
+ np.fill_diagonal(
847
+ ws.omega[:, :, t3p],
848
+ np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
849
+ )
692
850
 
693
851
  # Compute intermediate matrices
694
852
  ws.tmp0[:] = ws.omega[:, :, t3n] @ ws.beta[:, :, t3].T
@@ -733,10 +891,10 @@ def _blqmr_python_impl(
733
891
  if isquasires:
734
892
  if is_complex_input:
735
893
  Qres = np.max(
736
- np.sqrt(np.sum(np.abs(np.conj(ws.taot) * ws.taot), axis=0))
894
+ np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
737
895
  )
738
896
  else:
739
- Qres = np.max(np.sqrt(np.sum(ws.taot * ws.taot, axis=0)))
897
+ Qres = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
740
898
  else:
741
899
  tmp0_diag = np.zeros((m, m), dtype=dtype)
742
900
  for i in range(m):
@@ -768,6 +926,11 @@ def _blqmr_python_impl(
768
926
  break
769
927
 
770
928
  resv = resv[:iter_count]
929
+
930
+ # For split preconditioning, recover x = M2⁻¹ * y
931
+ if use_split_precond:
932
+ x = precond_M2.solve(x)
933
+
771
934
  result = x.real if not is_complex_input else x
772
935
  return result, flag, relres, iter_count, resv
773
936
 
@@ -787,7 +950,7 @@ def blqmr_solve(
787
950
  tol: float = 1e-6,
788
951
  maxiter: Optional[int] = None,
789
952
  droptol: float = 0.001,
790
- use_precond: bool = True,
953
+ precond_type: PrecondType = "ilu",
791
954
  zero_based: bool = True,
792
955
  ) -> BLQMRResult:
793
956
  """
@@ -813,8 +976,12 @@ def blqmr_solve(
813
976
  Maximum iterations. Default is n.
814
977
  droptol : float, default 0.001
815
978
  Drop tolerance for ILU preconditioner (Fortran only).
816
- use_precond : bool, default True
817
- Whether to use ILU preconditioning.
979
+ precond_type : None, '', or str, default 'ilu'
980
+ Preconditioner type:
981
+ - None or '': No preconditioning
982
+ - 'ilu', 'ilu0', 'ilut': Incomplete LU
983
+ - 'diag', 'jacobi': Diagonal (Jacobi)
984
+ - For Fortran: integers 2 (ILU) or 3 (diagonal) also accepted
818
985
  zero_based : bool, default True
819
986
  If True, Ap and Ai use 0-based indexing (Python/C convention).
820
987
  If False, uses 1-based indexing (Fortran convention).
@@ -839,7 +1006,7 @@ def blqmr_solve(
839
1006
  tol=tol,
840
1007
  maxiter=maxiter,
841
1008
  droptol=droptol,
842
- use_precond=use_precond,
1009
+ precond_type=precond_type,
843
1010
  zero_based=zero_based,
844
1011
  )
845
1012
  else:
@@ -851,13 +1018,13 @@ def blqmr_solve(
851
1018
  x0=x0,
852
1019
  tol=tol,
853
1020
  maxiter=maxiter,
854
- use_precond=use_precond,
1021
+ precond_type=precond_type,
855
1022
  zero_based=zero_based,
856
1023
  )
857
1024
 
858
1025
 
859
1026
  def _blqmr_solve_fortran(
860
- Ap, Ai, Ax, b, *, x0, tol, maxiter, droptol, use_precond, zero_based
1027
+ Ap, Ai, Ax, b, *, x0, tol, maxiter, droptol, precond_type, zero_based
861
1028
  ) -> BLQMRResult:
862
1029
  """Fortran backend for blqmr_solve."""
863
1030
  n = len(Ap) - 1
@@ -877,10 +1044,10 @@ def _blqmr_solve_fortran(
877
1044
  Ap = Ap + 1
878
1045
  Ai = Ai + 1
879
1046
 
880
- dopcond = 1 if use_precond else 0
1047
+ pcond_type = _parse_precond_type_for_fortran(precond_type)
881
1048
 
882
1049
  x, flag, niter, relres = _blqmr.blqmr_solve_real(
883
- n, nnz, Ap, Ai, Ax, b, maxiter, tol, droptol, dopcond
1050
+ n, nnz, Ap, Ai, Ax, b, maxiter, tol, droptol, pcond_type
884
1051
  )
885
1052
 
886
1053
  return BLQMRResult(
@@ -889,7 +1056,7 @@ def _blqmr_solve_fortran(
889
1056
 
890
1057
 
891
1058
  def _blqmr_solve_native_csc(
892
- Ap, Ai, Ax, b, *, x0, tol, maxiter, use_precond, zero_based
1059
+ Ap, Ai, Ax, b, *, x0, tol, maxiter, precond_type, zero_based
893
1060
  ) -> BLQMRResult:
894
1061
  """Native Python backend for blqmr_solve with CSC input."""
895
1062
  n = len(Ap) - 1
@@ -900,15 +1067,7 @@ def _blqmr_solve_native_csc(
900
1067
 
901
1068
  A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
902
1069
 
903
- M1 = None
904
- if use_precond:
905
- try:
906
- M1 = make_preconditioner(A, "ilu")
907
- except Exception:
908
- try:
909
- M1 = make_preconditioner(A, "diag") # FIX: Changed A_sp to A
910
- except Exception:
911
- M1 = None # Fall back to no preconditioning
1070
+ M1 = _get_preconditioner_for_native(A, precond_type, None)
912
1071
 
913
1072
  x, flag, relres, niter, resv = _blqmr_python_impl(
914
1073
  A, b, tol=tol, maxiter=maxiter, M1=M1, x0=x0
@@ -929,13 +1088,18 @@ def blqmr_solve_multi(
929
1088
  tol: float = 1e-6,
930
1089
  maxiter: Optional[int] = None,
931
1090
  droptol: float = 0.001,
932
- use_precond: bool = True,
1091
+ precond_type: PrecondType = "ilu",
933
1092
  zero_based: bool = True,
934
1093
  ) -> BLQMRResult:
935
1094
  """
936
1095
  Solve sparse linear system AX = B with multiple right-hand sides.
937
1096
 
938
1097
  Uses Fortran extension if available, otherwise falls back to pure Python.
1098
+
1099
+ Parameters
1100
+ ----------
1101
+ precond_type : None, '', or str, default 'ilu'
1102
+ Preconditioner type (see blqmr_solve for details)
939
1103
  """
940
1104
  n = len(Ap) - 1
941
1105
 
@@ -951,7 +1115,7 @@ def blqmr_solve_multi(
951
1115
  tol=tol,
952
1116
  maxiter=maxiter,
953
1117
  droptol=droptol,
954
- use_precond=use_precond,
1118
+ precond_type=precond_type,
955
1119
  zero_based=zero_based,
956
1120
  )
957
1121
  else:
@@ -962,13 +1126,13 @@ def blqmr_solve_multi(
962
1126
  B,
963
1127
  tol=tol,
964
1128
  maxiter=maxiter,
965
- use_precond=use_precond,
1129
+ precond_type=precond_type,
966
1130
  zero_based=zero_based,
967
1131
  )
968
1132
 
969
1133
 
970
1134
  def _blqmr_solve_multi_fortran(
971
- Ap, Ai, Ax, B, *, tol, maxiter, droptol, use_precond, zero_based
1135
+ Ap, Ai, Ax, B, *, tol, maxiter, droptol, precond_type, zero_based
972
1136
  ) -> BLQMRResult:
973
1137
  """Fortran backend for blqmr_solve_multi."""
974
1138
  n = len(Ap) - 1
@@ -987,10 +1151,11 @@ def _blqmr_solve_multi_fortran(
987
1151
  Ap = Ap + 1
988
1152
  Ai = Ai + 1
989
1153
 
990
- dopcond = 1 if use_precond else 0
1154
+ # Convert precond_type string to Fortran integer code
1155
+ pcond_type = _parse_precond_type_for_fortran(precond_type)
991
1156
 
992
1157
  X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
993
- n, nnz, nrhs, Ap, Ai, Ax, B, maxiter, tol, droptol, dopcond
1158
+ n, nnz, nrhs, Ap, Ai, Ax, B, maxiter, tol, droptol, pcond_type
994
1159
  )
995
1160
 
996
1161
  return BLQMRResult(
@@ -999,7 +1164,7 @@ def _blqmr_solve_multi_fortran(
999
1164
 
1000
1165
 
1001
1166
  def _blqmr_solve_multi_native(
1002
- Ap, Ai, Ax, B, *, tol, maxiter, use_precond, zero_based
1167
+ Ap, Ai, Ax, B, *, tol, maxiter, precond_type, zero_based
1003
1168
  ) -> BLQMRResult:
1004
1169
  """Native Python backend for blqmr_solve_multi."""
1005
1170
  n = len(Ap) - 1
@@ -1010,15 +1175,7 @@ def _blqmr_solve_multi_native(
1010
1175
 
1011
1176
  A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
1012
1177
 
1013
- M1 = None
1014
- if use_precond:
1015
- try:
1016
- M1 = make_preconditioner(A, "ilu")
1017
- except Exception:
1018
- try:
1019
- M1 = make_preconditioner(A, "diag") # FIX: Changed A_sp to A
1020
- except Exception:
1021
- M1 = None # Fall back to no preconditioning
1178
+ M1 = _get_preconditioner_for_native(A, precond_type, None)
1022
1179
 
1023
1180
  if B.ndim == 1:
1024
1181
  B = B.reshape(-1, 1)
@@ -1081,7 +1238,7 @@ def blqmr(
1081
1238
  residual: bool = False,
1082
1239
  workspace: Optional[BLQMRWorkspace] = None,
1083
1240
  droptol: float = 0.001,
1084
- use_precond: bool = True,
1241
+ precond_type: PrecondType = "ilu",
1085
1242
  ) -> BLQMRResult:
1086
1243
  """
1087
1244
  Block Quasi-Minimal-Residual (BL-QMR) solver - main interface.
@@ -1097,9 +1254,10 @@ def blqmr(
1097
1254
  tol : float
1098
1255
  Convergence tolerance (default: 1e-6)
1099
1256
  maxiter : int, optional
1100
- Maximum iterations (default: n for Fortran, min(n, 20) for Python)
1257
+ Maximum iterations (default: n)
1101
1258
  M1, M2 : preconditioner, optional
1102
- Preconditioner M = M1 @ M2 (Python backend only)
1259
+ Custom preconditioners. If provided, precond_type is ignored.
1260
+ M = M1 @ M2 for split preconditioning (Python backend only)
1103
1261
  x0 : ndarray, optional
1104
1262
  Initial guess
1105
1263
  residual : bool
@@ -1108,8 +1266,13 @@ def blqmr(
1108
1266
  Pre-allocated workspace (Python backend only)
1109
1267
  droptol : float, default 0.001
1110
1268
  Drop tolerance for ILU preconditioner (Fortran backend only)
1111
- use_precond : bool, default True
1112
- Whether to use ILU preconditioning (Fortran backend only)
1269
+ precond_type : None, '', or str, default 'ilu'
1270
+ Preconditioner type (ignored if M1 is provided):
1271
+ - None or '': No preconditioning
1272
+ - 'ilu', 'ilu0', 'ilut': Incomplete LU
1273
+ - 'diag', 'jacobi': Diagonal (Jacobi)
1274
+ - 'lu': Full LU (expensive, for debugging)
1275
+ - For Fortran: integers 2 (ILU) or 3 (diagonal) also accepted
1113
1276
 
1114
1277
  Returns
1115
1278
  -------
@@ -1129,7 +1292,7 @@ def blqmr(
1129
1292
  maxiter=maxiter,
1130
1293
  x0=x0,
1131
1294
  droptol=droptol,
1132
- use_precond=use_precond,
1295
+ precond_type=precond_type,
1133
1296
  )
1134
1297
  else:
1135
1298
  return _blqmr_native(
@@ -1142,7 +1305,7 @@ def blqmr(
1142
1305
  x0=x0,
1143
1306
  residual=residual,
1144
1307
  workspace=workspace,
1145
- use_precond=use_precond,
1308
+ precond_type=precond_type,
1146
1309
  )
1147
1310
 
1148
1311
 
@@ -1154,7 +1317,7 @@ def _blqmr_fortran(
1154
1317
  maxiter: Optional[int],
1155
1318
  x0: Optional[np.ndarray],
1156
1319
  droptol: float,
1157
- use_precond: bool,
1320
+ precond_type: PrecondType,
1158
1321
  ) -> BLQMRResult:
1159
1322
  """Fortran backend for blqmr()."""
1160
1323
  A_csc = sparse.csc_matrix(A)
@@ -1176,7 +1339,7 @@ def _blqmr_fortran(
1176
1339
  Ap_f = np.asfortranarray(Ap + 1, dtype=np.int32)
1177
1340
  Ai_f = np.asfortranarray(Ai + 1, dtype=np.int32)
1178
1341
 
1179
- dopcond = 1 if use_precond else 0
1342
+ pcond_type = _parse_precond_type_for_fortran(precond_type)
1180
1343
 
1181
1344
  # Check if complex
1182
1345
  is_complex = np.iscomplexobj(A) or np.iscomplexobj(B)
@@ -1189,7 +1352,7 @@ def _blqmr_fortran(
1189
1352
  # Single RHS
1190
1353
  b_f = np.asfortranarray(B.ravel(), dtype=np.complex128)
1191
1354
  x, flag, niter, relres = _blqmr.blqmr_solve_complex(
1192
- n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, dopcond
1355
+ n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, pcond_type
1193
1356
  )
1194
1357
  return BLQMRResult(
1195
1358
  x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
@@ -1199,7 +1362,7 @@ def _blqmr_fortran(
1199
1362
  B_f = np.asfortranarray(B, dtype=np.complex128)
1200
1363
  nrhs = B_f.shape[1]
1201
1364
  X, flag, niter, relres = _blqmr.blqmr_solve_complex_multi(
1202
- n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, dopcond
1365
+ n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, pcond_type
1203
1366
  )
1204
1367
  return BLQMRResult(
1205
1368
  x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
@@ -1212,7 +1375,7 @@ def _blqmr_fortran(
1212
1375
  # Single RHS
1213
1376
  b_f = np.asfortranarray(B.ravel(), dtype=np.float64)
1214
1377
  x, flag, niter, relres = _blqmr.blqmr_solve_real(
1215
- n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, dopcond
1378
+ n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, pcond_type
1216
1379
  )
1217
1380
  return BLQMRResult(
1218
1381
  x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
@@ -1222,7 +1385,7 @@ def _blqmr_fortran(
1222
1385
  B_f = np.asfortranarray(B, dtype=np.float64)
1223
1386
  nrhs = B_f.shape[1]
1224
1387
  X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
1225
- n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, dopcond
1388
+ n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, pcond_type
1226
1389
  )
1227
1390
  return BLQMRResult(
1228
1391
  x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
@@ -1240,19 +1403,13 @@ def _blqmr_native(
1240
1403
  x0: Optional[np.ndarray],
1241
1404
  residual: bool,
1242
1405
  workspace: Optional[BLQMRWorkspace],
1243
- use_precond: bool,
1406
+ precond_type: PrecondType,
1244
1407
  ) -> BLQMRResult:
1245
1408
  """Native Python backend for blqmr()."""
1246
- # Auto-create preconditioner if requested and not provided
1247
- if use_precond and M1 is None:
1409
+ # Get preconditioner (user-provided M1 takes precedence)
1410
+ if M1 is None:
1248
1411
  A_sp = sparse.csc_matrix(A) if not sparse.issparse(A) else A
1249
- try:
1250
- M1 = make_preconditioner(A_sp, "ilu")
1251
- except Exception:
1252
- try:
1253
- M1 = make_preconditioner(A_sp, "diag")
1254
- except Exception:
1255
- M1 = None # Fall back to no preconditioning
1412
+ M1 = _get_preconditioner_for_native(A_sp, precond_type, None)
1256
1413
 
1257
1414
  x, flag, relres, niter, resv = _blqmr_python_impl(
1258
1415
  A,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: blocksolver
3
- Version: 0.8.3
3
+ Version: 0.8.5
4
4
  Summary: Block Quasi-Minimal-Residual sparse linear solver
5
5
  Keywords: sparse,linear-algebra,iterative-solver,qmr,fortran,umfpack
6
6
  Author-Email: Qianqian Fang <q.fang@neu.edu>
@@ -49,7 +49,7 @@ Description-Content-Type: text/markdown
49
49
  - **Block QMR Algorithm**: Efficiently solves multiple right-hand sides simultaneously
50
50
  - **Complex Symmetric Support**: Designed for complex symmetric matrices (A = Aᵀ, not A = A†)
51
51
  - **Dual Backend**: Fortran extension for speed, Python fallback for portability
52
- - **ILU Preconditioning**: Built-in incomplete LU preconditioner for faster convergence
52
+ - **Flexible Preconditioning**: ILU, diagonal (Jacobi), and split preconditioners
53
53
  - **SciPy Integration**: Works seamlessly with SciPy sparse matrices
54
54
  - **Optional Numba Acceleration**: JIT-compiled kernels for the Python backend
55
55
 
@@ -71,7 +71,7 @@ The BLQMR algorithm is an iterative Krylov subspace method specifically designed
71
71
 
72
72
  - **Three-term Lanczos Recurrence**: Builds an orthonormal basis for the Krylov subspace with short recurrences, minimizing memory usage.
73
73
 
74
- - **Block Updates**: Processes m right-hand sides simultaneously, with typical block sizes of 1-16.
74
+ - **Block Updates**: Processes m right-hand sides simultaneously, with typical block sizes of 1-64.
75
75
 
76
76
  ### When to Use BLQMR
77
77
 
@@ -155,9 +155,9 @@ result = blqmr(A, b)
155
155
 
156
156
  # With options
157
157
  result = blqmr(A, b,
158
- tol=1e-8, # Convergence tolerance
159
- maxiter=1000, # Maximum iterations
160
- use_precond=True, # Use ILU preconditioning
158
+ tol=1e-8, # Convergence tolerance
159
+ maxiter=1000, # Maximum iterations
160
+ precond_type='ilu', # Preconditioner: 'ilu', 'diag', or None
161
161
  )
162
162
  ```
163
163
 
@@ -190,24 +190,29 @@ from blocksolver import blqmr
190
190
  A = create_helmholtz_matrix(frequency=1000) # Your application
191
191
  b = np.complex128(source_term)
192
192
 
193
- result = blqmr(A, b, tol=1e-8)
193
+ result = blqmr(A, b, tol=1e-8, precond_type='diag')
194
194
  ```
195
195
 
196
- ### Custom Preconditioning
196
+ ### Preconditioning
197
197
 
198
- For the Python backend, you can provide custom preconditioners:
198
+ BlockSolver supports multiple preconditioner types for both backends:
199
199
 
200
200
  ```python
201
201
  from blocksolver import blqmr, make_preconditioner
202
202
 
203
- # Create ILU preconditioner
204
- M1 = make_preconditioner(A, 'ilu')
203
+ # Using precond_type parameter (works with both backends)
204
+ result = blqmr(A, b, precond_type='ilu') # Incomplete LU
205
+ result = blqmr(A, b, precond_type='diag') # Diagonal (Jacobi)
206
+ result = blqmr(A, b, precond_type=None) # No preconditioning
205
207
 
206
- # Or diagonal (Jacobi) preconditioner
207
- M1 = make_preconditioner(A, 'diag')
208
+ # Custom preconditioner (Python backend only)
209
+ M1 = make_preconditioner(A, 'ilu', drop_tol=1e-4, fill_factor=10)
210
+ result = blqmr(A, b, M1=M1, precond_type=None)
208
211
 
209
- # Solve with custom preconditioner
210
- result = blqmr(A, b, M1=M1, use_precond=False)
212
+ # Split preconditioning for symmetric systems (Python backend)
213
+ # Preserves symmetry: M1^{-1} A M2^{-1}
214
+ M = make_preconditioner(A, 'diag', split=True) # Returns sqrt(D)
215
+ result = blqmr(A, b, M1=M, M2=M, precond_type=None)
211
216
  ```
212
217
 
213
218
  ### SciPy-Compatible Interface
@@ -236,9 +241,9 @@ b = np.array([8., 45., -3., 3., 19.])
236
241
 
237
242
  result = blqmr_solve(Ap, Ai, Ax, b,
238
243
  tol=1e-8,
239
- droptol=0.001, # ILU drop tolerance (Fortran only)
240
- use_precond=True,
241
- zero_based=True, # 0-based indexing (default)
244
+ droptol=0.001, # ILU drop tolerance (Fortran backend only)
245
+ precond_type='ilu', # Preconditioner type
246
+ zero_based=True, # 0-based indexing (default)
242
247
  )
243
248
  ```
244
249
 
@@ -257,7 +262,7 @@ Main solver interface.
257
262
  | `maxiter` | int | n | Maximum iterations |
258
263
  | `M1`, `M2` | preconditioner | None | Custom preconditioners (Python backend) |
259
264
  | `x0` | ndarray | None | Initial guess |
260
- | `use_precond` | bool | True | Use ILU preconditioning |
265
+ | `precond_type` | str or None | 'ilu' | Preconditioner: 'ilu', 'diag', or None |
261
266
  | `droptol` | float | 0.001 | ILU drop tolerance (Fortran backend) |
262
267
  | `residual` | bool | False | Use true residual for convergence (Python) |
263
268
  | `workspace` | BLQMRWorkspace | None | Pre-allocated workspace (Python) |
@@ -274,21 +279,29 @@ Main solver interface.
274
279
 
275
280
  ### `blqmr_solve(Ap, Ai, Ax, b, **kwargs) -> BLQMRResult`
276
281
 
277
- Low-level CSC interface.
282
+ Low-level CSC interface for single RHS.
278
283
 
279
284
  ### `blqmr_solve_multi(Ap, Ai, Ax, B, **kwargs) -> BLQMRResult`
280
285
 
281
- Multiple right-hand sides with CSC input.
286
+ Low-level CSC interface for multiple right-hand sides.
282
287
 
283
288
  ### `blqmr_scipy(A, b, **kwargs) -> Tuple[ndarray, int]`
284
289
 
285
290
  SciPy-compatible interface returning `(x, flag)`.
286
291
 
287
- ### `make_preconditioner(A, type) -> Preconditioner`
292
+ ### `make_preconditioner(A, precond_type, **kwargs) -> Preconditioner`
288
293
 
289
294
  Create a preconditioner for the Python backend.
290
295
 
291
- **Types:** `'diag'`/`'jacobi'`, `'ilu'`/`'ilu0'`, `'ssor'`
296
+ **Parameters:**
297
+ | Parameter | Type | Default | Description |
298
+ |-----------|------|---------|-------------|
299
+ | `A` | sparse matrix | required | System matrix |
300
+ | `precond_type` | str | required | 'diag', 'jacobi', 'ilu', 'ilu0', 'ilut', 'lu', 'ssor' |
301
+ | `split` | bool | False | Return sqrt(D) for split preconditioning |
302
+ | `drop_tol` | float | 1e-4 | Drop tolerance for ILUT |
303
+ | `fill_factor` | float | 10 | Fill factor for ILUT |
304
+ | `omega` | float | 1.0 | Relaxation parameter for SSOR |
292
305
 
293
306
  ### Utility Functions
294
307
 
@@ -301,13 +314,44 @@ from blocksolver import (
301
314
  )
302
315
  ```
303
316
 
317
+ ## Benchmarks
318
+
319
+ ### BLQMR vs Direct Solver (mldivide)
320
+
321
+ Complex symmetric FEM matrices, 4 right-hand sides, tolerance 10⁻⁸, split Jacobi preconditioner:
322
+
323
+ | Grid | Nodes | NNZ | mldivide | BLQMR | Speedup |
324
+ |------|-------|-----|----------|-------|---------|
325
+ | 20³ | 8,000 | 110K | 135ms | 115ms | **1.2×** |
326
+ | 30³ | 27,000 | 384K | 1.36s | 373ms | **3.6×** |
327
+ | 40³ | 64,000 | 922K | 6.40s | 947ms | **6.8×** |
328
+ | 50³ | 125,000 | 1.8M | 25.9s | 1.76s | **14.7×** |
329
+
330
+ ### Block Size Efficiency
331
+
332
+ With 64 RHS on a 8,000-node complex symmetric system:
333
+
334
+ | Block Size | Iterations | Speedup vs Single |
335
+ |------------|------------|-------------------|
336
+ | 1 (point) | 10,154 | 1.0× |
337
+ | 4 | 2,220 | 1.8× |
338
+ | 8 | 956 | 2.0× |
339
+ | 16 | 361 | 2.1× |
340
+ | 32 | 178 | 2.2× |
341
+
342
+ **Optimal block size**: 8-16 for most problems. Larger blocks have diminishing returns due to increased per-iteration cost.
343
+
344
+ ### Iteration Efficiency
345
+
346
+ With 4 RHS, BLQMR uses only ~24% of total iterations compared to 4 separate single-RHS solves — achieving **super-linear block acceleration**.
347
+
304
348
  ## Performance Tips
305
349
 
306
- 1. **Use the Fortran backend** when available (10-100× faster than Python)
350
+ 1. **Use the Fortran backend** when available (faster for large systems)
307
351
 
308
352
  2. **Enable preconditioning** for ill-conditioned systems:
309
353
  ```python
310
- result = blqmr(A, b, use_precond=True)
354
+ result = blqmr(A, b, precond_type='ilu')
311
355
  ```
312
356
 
313
357
  3. **Batch multiple right-hand sides** instead of solving one at a time:
@@ -328,11 +372,18 @@ from blocksolver import (
328
372
  5. **Reuse workspace** for repeated solves with the same dimensions:
329
373
  ```python
330
374
  from blocksolver import BLQMRWorkspace
331
- ws = BLQMRWorkspace(n, m)
375
+ ws = BLQMRWorkspace(n, m, dtype=np.complex128)
332
376
  for b in many_rhs:
333
377
  result = blqmr(A, b, workspace=ws)
334
378
  ```
335
379
 
380
+ 6. **Use split Jacobi for complex symmetric systems**:
381
+ ```python
382
+ # Preserves symmetry of preconditioned system
383
+ M = make_preconditioner(A, 'diag', split=True)
384
+ result = blqmr(A, b, M1=M, M2=M, precond_type=None)
385
+ ```
386
+
336
387
  ## Examples
337
388
 
338
389
  ### Diffuse Optical Tomography
@@ -360,10 +411,10 @@ def create_diffusion_matrix(nx, ny, D=1.0, mu_a=0.01, omega=1e9):
360
411
 
361
412
  # Setup problem
362
413
  A = create_diffusion_matrix(100, 100, omega=2*np.pi*100e6)
363
- sources = np.random.randn(10000, 16) # 16 source positions
414
+ sources = np.random.randn(10000, 16) + 0j # 16 source positions
364
415
 
365
416
  # Solve for all sources at once
366
- result = blqmr(A, sources, tol=1e-8)
417
+ result = blqmr(A, sources, tol=1e-8, precond_type='diag')
367
418
  print(f"Solved {sources.shape[1]} systems in {result.iter} iterations")
368
419
  ```
369
420
 
@@ -382,7 +433,7 @@ def solve_helmholtz(K, M, f, frequencies):
382
433
  for omega in frequencies:
383
434
  # A = K - ω²M (complex symmetric if K, M are symmetric)
384
435
  A = K - omega**2 * M
385
- result = blqmr(A, f, tol=1e-10)
436
+ result = blqmr(A, f, tol=1e-10, precond_type='diag')
386
437
  solutions.append(result.x)
387
438
  return np.array(solutions)
388
439
  ```
@@ -401,12 +452,28 @@ brew install gcc suite-sparse # macOS
401
452
  pip install --no-cache-dir blocksolver
402
453
  ```
403
454
 
455
+ ### Check backend status
456
+
457
+ ```python
458
+ from blocksolver import get_backend_info
459
+ print(get_backend_info())
460
+ # {'backend': 'binary', 'has_fortran': True, 'has_numba': True}
461
+ ```
462
+
404
463
  ### Slow convergence
405
464
 
406
- 1. Enable preconditioning: `use_precond=True`
465
+ 1. Enable preconditioning: `precond_type='ilu'` or `precond_type='diag'`
407
466
  2. Reduce ILU drop tolerance: `droptol=1e-4` (Fortran backend)
408
467
  3. Check matrix conditioning with `np.linalg.cond(A.toarray())`
409
468
 
469
+ ### ILU factorization fails
470
+
471
+ For indefinite or complex symmetric matrices, ILU may fail:
472
+ ```python
473
+ # Fall back to diagonal preconditioner
474
+ result = blqmr(A, b, precond_type='diag')
475
+ ```
476
+
410
477
  ### Memory issues with large systems
411
478
 
412
479
  1. Use the Fortran backend (more memory efficient)
@@ -415,7 +482,7 @@ pip install --no-cache-dir blocksolver
415
482
 
416
483
  ## License
417
484
 
418
- BSD-3-Clause / LGPL-3.0+ / GPL-3.0+ (tri-licensed)
485
+ BSD-3-Clause or GPL-3.0+ (dual-licensed)
419
486
 
420
487
  ## Citation
421
488
 
@@ -0,0 +1,7 @@
1
+ blocksolver-0.8.5.dist-info/METADATA,sha256=K7OUbJ-pEkHWX3Cvy9iz1_L1onkox-5T9YwOvCrCiSU,15985
2
+ blocksolver-0.8.5.dist-info/WHEEL,sha256=vIXzP6jLUy4sdmrQppnovVBqmdfNCkEM0I7EHxeJ-zs,83
3
+ blocksolver/_blqmr.cp38-win_amd64.pyd,sha256=7cNpd1gPUty299GF5MwRbxk4xjvsUAIEudDEfOVaB6Y,34366661
4
+ blocksolver/_blqmr.cp38-win_amd64.dll.a,sha256=Nketjx2gg0CTuKGh2_z6lckLN0D1HR1uPudzY66SFOs,1696
5
+ blocksolver/__init__.py,sha256=PZV19qS5YQDWdAcqgD6g0wW7KRKrdv1JuLDKmrnC6Es,1982
6
+ blocksolver/blqmr.py,sha256=diRm-xD2-4r0W59WrRe-O26DHJgc32voCVQa0H5FCRk,46543
7
+ blocksolver-0.8.5.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- blocksolver-0.8.3.dist-info/METADATA,sha256=1JTScCC4OO3FBMqPtaG0OUlf5OnEHDetwDj7LPk9u9A,13264
2
- blocksolver-0.8.3.dist-info/WHEEL,sha256=vIXzP6jLUy4sdmrQppnovVBqmdfNCkEM0I7EHxeJ-zs,83
3
- blocksolver/_blqmr.cp38-win_amd64.pyd,sha256=854GKxQ4YgdNiVEIco_MjK38acsP8YpIUkzdAslpCTo,34345539
4
- blocksolver/_blqmr.cp38-win_amd64.dll.a,sha256=Nketjx2gg0CTuKGh2_z6lckLN0D1HR1uPudzY66SFOs,1696
5
- blocksolver/__init__.py,sha256=7lq88Nc2gqHTWAdpvj2zpVt8UHYgKpcVlgA1WzyEhFI,1982
6
- blocksolver/blqmr.py,sha256=TRVkXlJf2FXFiZkH6vawNEEq8jowJqG-S_QGwHMeR8U,41060
7
- blocksolver-0.8.3.dist-info/RECORD,,