PyPI - blocksolver - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.5__py3-none-any.whl - Mend

blocksolver 0.8.1py3-none-any.whl → 0.8.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

blocksolver/__init__.py +1 -1
blocksolver/blqmr.py +502 -135
blocksolver-0.8.5.dist-info/METADATA +509 -0
blocksolver-0.8.5.dist-info/RECORD +6 -0
blocksolver-0.8.1.dist-info/METADATA +0 -162
blocksolver-0.8.1.dist-info/RECORD +0 -6
{blocksolver-0.8.1.dist-info → blocksolver-0.8.5.dist-info}/WHEEL +0 -0
{blocksolver-0.8.1.dist-info → blocksolver-0.8.5.dist-info}/top_level.txt +0 -0

blocksolver/blqmr.py CHANGED Viewed

@@ -50,7 +50,7 @@ try:
     from numba import njit
     HAS_NUMBA = True
-except ImportError:
+except (ImportError, Exception) as e:
     HAS_NUMBA = False
     def njit(*args, **kwargs):
@@ -134,6 +134,52 @@ def _qqr_kernel_real(Q, R, n, m):
                     Q[i, k] -= Q[i, j] * dot
+@njit(cache=True)
+def _qqr_kernel_complex(Q, R, n, m):
+    """Numba-accelerated quasi-QR kernel for complex arrays."""
+    for j in range(m):
+        # Quasi inner product: sum(q*q) WITHOUT conjugation
+        r_jj_sq = 0.0j
+        for i in range(n):
+            r_jj_sq += Q[i, j] * Q[i, j]  # No conjugation!
+        r_jj = np.sqrt(r_jj_sq)
+        R[j, j] = r_jj
+        if abs(r_jj) > 1e-14:
+            inv_r_jj = 1.0 / r_jj
+            for i in range(n):
+                Q[i, j] *= inv_r_jj
+            for k in range(j + 1, m):
+                # Quasi inner product: sum(q_j * q_k) WITHOUT conjugation
+                dot = 0.0j
+                for i in range(n):
+                    dot += Q[i, j] * Q[i, k]  # No conjugation!
+                R[j, k] = dot
+                for i in range(n):
+                    Q[i, k] -= Q[i, j] * dot
+@njit(cache=True)
+def _qqr_kernel_real(Q, R, n, m):
+    """Numba-accelerated quasi-QR kernel for real arrays."""
+    for j in range(m):
+        r_jj_sq = 0.0
+        for i in range(n):
+            r_jj_sq += Q[i, j] * Q[i, j]
+        r_jj = np.sqrt(r_jj_sq)
+        R[j, j] = r_jj
+        if abs(r_jj) > 1e-14:
+            inv_r_jj = 1.0 / r_jj
+            for i in range(n):
+                Q[i, j] *= inv_r_jj
+            for k in range(j + 1, m):
+                dot = 0.0
+                for i in range(n):
+                    dot += Q[i, j] * Q[i, k]
+                R[j, k] = dot
+                for i in range(n):
+                    Q[i, k] -= Q[i, j] * dot
 def qqr(
     A: np.ndarray, tol: float = 0, use_numba: bool = True
 ) -> Tuple[np.ndarray, np.ndarray]:
@@ -173,14 +219,20 @@ def qqr(
     else:
         for j in range(m):
             qj = Q[:, j]
-            r_jj_sq = np.dot(qj, qj)
+            # CRITICAL FIX: Use sum(qj * qj) NOT np.dot(qj, qj)
+            # np.dot conjugates the first argument for complex arrays!
+            # Fortran: R(k,k)=dsqrt(sum(Q(:,k)*Q(:,k))) - no conjugation
+            r_jj_sq = np.sum(qj * qj)  # Quasi inner product - NO conjugation
             r_jj = np.sqrt(r_jj_sq)
             R[j, j] = r_jj
             if np.abs(r_jj) > 1e-14:
                 Q[:, j] *= 1.0 / r_jj
                 if j < m - 1:
-                    R[j, j + 1 :] = np.dot(Q[:, j], Q[:, j + 1 :])
-                    Q[:, j + 1 :] -= np.outer(Q[:, j], R[j, j + 1 :])
+                    # CRITICAL FIX: Quasi inner product for off-diagonal
+                    # Fortran: R(k,j)=sum(Q(:,k)*Q(:,j)) - no conjugation
+                    for k in range(j + 1, m):
+                        R[j, k] = np.sum(Q[:, j] * Q[:, k])  # NO conjugation
+                        Q[:, k] -= R[j, k] * Q[:, j]
     return Q, R
@@ -217,8 +269,12 @@ class SparsePreconditioner:
     def __init__(self, M1, M2=None):
         self.is_two_part = M2 is not None
-        self.is_ilu1 = isinstance(M1, _ILUPreconditioner)
-        self.is_ilu2 = isinstance(M2, _ILUPreconditioner) if M2 is not None else False
+        self.is_ilu1 = isinstance(M1, (_ILUPreconditioner, _LUPreconditioner))
+        self.is_ilu2 = (
+            isinstance(M2, (_ILUPreconditioner, _LUPreconditioner))
+            if M2 is not None
+            else False
+        )
         if M1 is not None:
             if self.is_ilu1:
@@ -381,8 +437,101 @@ class BLQMRWorkspace:
 # Preconditioner Factory
 # =============================================================================
+# Type alias for precond_type
+PrecondType = Optional[Union[str, int]]
-def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
+def _parse_precond_type_for_fortran(precond_type: PrecondType) -> int:
+    """
+    Convert precond_type to Fortran integer code.
+    Returns
+    -------
+    int
+        0 = no preconditioning
+        2 = ILU
+        3 = diagonal/Jacobi
+    """
+    if precond_type is None or precond_type == "" or precond_type is False:
+        return 0
+    if isinstance(precond_type, int):
+        return precond_type
+    if isinstance(precond_type, str):
+        precond_lower = precond_type.lower()
+        if precond_lower in ("ilu", "ilu0", "ilut"):
+            return 2
+        elif precond_lower in ("diag", "jacobi"):
+            return 3
+        else:
+            # Unknown string, default to no preconditioning
+            warnings.warn(
+                f"Unknown precond_type '{precond_type}' for Fortran backend, using no preconditioning"
+            )
+            return 0
+    return 0
+def _get_preconditioner_for_native(A, precond_type: PrecondType, M1_provided):
+    """
+    Create preconditioner for native Python backend.
+    Parameters
+    ----------
+    A : sparse matrix
+        System matrix
+    precond_type : None, '', str, or int
+        Preconditioner type specification
+    M1_provided : preconditioner or None
+        User-provided preconditioner (takes precedence)
+    Returns
+    -------
+    M1 : preconditioner or None
+    """
+    # If user provided M1, use it
+    if M1_provided is not None:
+        return M1_provided
+    # No preconditioning requested
+    if precond_type is None or precond_type == "" or precond_type is False:
+        return None
+    # Integer codes (for compatibility)
+    if isinstance(precond_type, int):
+        if precond_type == 0:
+            return None
+        elif precond_type == 2:
+            precond_str = "ilu"
+        elif precond_type == 3:
+            precond_str = "diag"
+        else:
+            precond_str = "ilu"  # Default to ILU for other integers
+    else:
+        precond_str = precond_type
+    # Create preconditioner
+    try:
+        return make_preconditioner(A, precond_str)
+    except Exception as e:
+        # Fallback chain: try diag if ilu fails
+        if precond_str not in ("diag", "jacobi"):
+            try:
+                warnings.warn(
+                    f"Preconditioner '{precond_str}' failed: {e}, falling back to diagonal"
+                )
+                return make_preconditioner(A, "diag")
+            except Exception:
+                pass
+        warnings.warn(f"All preconditioners failed, proceeding without preconditioning")
+        return None
+def make_preconditioner(
+    A: sparse.spmatrix, precond_type: str = "diag", split: bool = False, **kwargs
+):
     """
     Create a preconditioner for iterative solvers.
@@ -392,29 +541,70 @@ def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
         System matrix
     precond_type : str
         'diag' or 'jacobi': Diagonal (Jacobi) preconditioner
-        'ilu' or 'ilu0': Incomplete LU
-        'ssor': Symmetric SOR
+        'ilu' or 'ilu0': Incomplete LU with minimal fill
+        'ilut': Incomplete LU with threshold
+        'lu': Full LU factorization
+    split : bool
+        If True, return sqrt(D) for split preconditioning (M1=M2=sqrt(D))
+        If False, return D for left preconditioning
+    **kwargs : dict
+        Additional parameters
     Returns
     -------
     M : preconditioner object
-        Preconditioner (use as M1 in blqmr)
+        For split Jacobi, use as: blqmr(A, b, M1=M, M2=M)
     """
     if precond_type in ("diag", "jacobi"):
         diag = A.diagonal().copy()
         diag[np.abs(diag) < 1e-14] = 1.0
-        return sparse.diags(diag, format="csr")
-    elif precond_type in ("ilu", "ilu0"):
+        if split:
+            # For split preconditioning: return sqrt(D)
+            # Usage: M1 = M2 = sqrt(D), gives D^{-1/2} A D^{-1/2}
+            sqrt_diag = np.sqrt(diag)
+            return sparse.diags(sqrt_diag, format="csr")
+        else:
+            # For left preconditioning: return D
+            # Usage: M1 = D, M2 = None, gives D^{-1} A
+            return sparse.diags(diag, format="csr")
+    elif precond_type == "ilu0":
+        # ILU(0) - no fill-in, fast but may be poor quality
         try:
             ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
             return _ILUPreconditioner(ilu)
         except Exception as e:
-            warnings.warn(f"ILU factorization failed: {e}, falling back to diagonal")
+            warnings.warn(f"ILU(0) factorization failed: {e}, falling back to diagonal")
             return make_preconditioner(A, "diag")
+    elif precond_type in ("ilu", "ilut"):
+        # ILUT - ILU with threshold, better quality (similar to UMFPACK)
+        drop_tol = kwargs.get("drop_tol", 1e-4)
+        fill_factor = kwargs.get("fill_factor", 10)
+        try:
+            ilu = spilu(A.tocsc(), drop_tol=drop_tol, fill_factor=fill_factor)
+            return _ILUPreconditioner(ilu)
+        except Exception as e:
+            warnings.warn(f"ILUT factorization failed: {e}, trying ILU(0)")
+            try:
+                ilu = spilu(A.tocsc(), drop_tol=0, fill_factor=1)
+                return _ILUPreconditioner(ilu)
+            except Exception as e2:
+                warnings.warn(f"ILU(0) also failed: {e2}, falling back to diagonal")
+                return make_preconditioner(A, "diag")
+    elif precond_type == "lu":
+        # Full LU - exact factorization (for reference/debugging)
+        try:
+            lu = splu(A.tocsc())
+            return _LUPreconditioner(lu)
+        except Exception as e:
+            warnings.warn(f"LU factorization failed: {e}, falling back to ILUT")
+            return make_preconditioner(A, "ilut")
     elif precond_type == "ssor":
-        omega = 1.0
+        omega = kwargs.get("omega", 1.0)
         D = sparse.diags(A.diagonal(), format="csr")
         L = sparse.tril(A, k=-1, format="csr")
         return (D + omega * L).tocsr()
@@ -423,6 +613,24 @@ def make_preconditioner(A: sparse.spmatrix, precond_type: str = "diag"):
         raise ValueError(f"Unknown preconditioner type: {precond_type}")
+class _LUPreconditioner:
+    """Wrapper for full LU preconditioner."""
+    def __init__(self, lu_factor):
+        self.lu = lu_factor
+        self.shape = (lu_factor.shape[0], lu_factor.shape[1])
+        self.dtype = np.float64  # Assume real for now
+    def solve(self, b):
+        if b.ndim == 1:
+            return self.lu.solve(b)
+        else:
+            x = np.zeros_like(b)
+            for i in range(b.shape[1]):
+                x[:, i] = self.lu.solve(b[:, i])
+            return x
 # =============================================================================
 # Pure-Python Block QMR Solver
 # =============================================================================
@@ -448,7 +656,7 @@ def _blqmr_python_impl(
     dtype = np.complex128 if is_complex_input else np.float64
     if maxiter is None:
-        maxiter = min(n, 20)
+        maxiter = min(n, 100)
     if (
         workspace is None
@@ -461,23 +669,59 @@ def _blqmr_python_impl(
         ws = workspace
         ws.reset()
-    # Setup preconditioner
-    if M1 is not None:
-        if isinstance(M1, _ILUPreconditioner):
-            precond = SparsePreconditioner(M1, M2)
+    # Setup preconditioner - distinguish split vs left-only
+    use_split_precond = False
+    precond = None
+    precond_M1 = None
+    precond_M2 = None
+    if M1 is not None and M2 is not None:
+        # Split preconditioning: M1⁻¹ A M2⁻¹
+        use_split_precond = True
+        if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
+            precond_M1 = SparsePreconditioner(M1, None)
         elif sparse.issparse(M1):
-            precond = SparsePreconditioner(M1, M2)
+            precond_M1 = SparsePreconditioner(M1, None)
+        elif hasattr(M1, "solve"):
+            precond_M1 = M1
         else:
-            precond = DensePreconditioner(M1, M2)
-    else:
-        precond = None
+            precond_M1 = DensePreconditioner(M1, None)
+        if isinstance(M2, (_ILUPreconditioner, _LUPreconditioner)):
+            precond_M2 = SparsePreconditioner(M2, None)
+        elif sparse.issparse(M2):
+            precond_M2 = SparsePreconditioner(M2, None)
+        elif hasattr(M2, "solve"):
+            precond_M2 = M2
+        else:
+            precond_M2 = DensePreconditioner(M2, None)
+    elif M1 is not None:
+        # Left-only preconditioning: M1⁻¹ A
+        if isinstance(M1, (_ILUPreconditioner, _LUPreconditioner)):
+            precond = SparsePreconditioner(M1, None)
+        elif sparse.issparse(M1):
+            precond = SparsePreconditioner(M1, None)
+        elif hasattr(M1, "solve"):
+            precond = M1
+        else:
+            precond = DensePreconditioner(M1, None)
     if x0 is None:
         x = np.zeros((n, m), dtype=dtype)
     else:
         x = np.asarray(x0, dtype=dtype).reshape(n, m).copy()
-    t3, t3n, t3p, t3nn = 0, 2, 1, 1
+    # Initialize indices: Fortran t3=mod(0,3)+1=1 -> Python t3=0
+    t3 = 0
+    t3n = 2
+    t3p = 1
+    # Initialize Q matrices (identity)
+    ws.Qa[:, :, :] = 0
+    ws.Qb[:, :, :] = 0
+    ws.Qc[:, :, :] = 0
+    ws.Qd[:, :, :] = 0
     ws.Qa[:, :, t3] = np.eye(m, dtype=dtype)
     ws.Qd[:, :, t3n] = np.eye(m, dtype=dtype)
     ws.Qd[:, :, t3] = np.eye(m, dtype=dtype)
@@ -488,110 +732,190 @@ def _blqmr_python_impl(
     else:
         np.subtract(B, A @ x, out=ws.vt)
-    if precond is not None:
+    # Apply preconditioner to initial residual
+    if use_split_precond:
+        # For split preconditioning, initial residual is just M1⁻¹ * (b - A*x0)
+        # because we're solving M1⁻¹ A M2⁻¹ y = M1⁻¹ b with y = M2*x
+        ws.vt[:] = precond_M1.solve(ws.vt)
+        if np.any(np.isnan(ws.vt)):
+            return x, 2, 1.0, 0, np.array([])
+    elif precond is not None:
         precond.solve(ws.vt, out=ws.vt)
         if np.any(np.isnan(ws.vt)):
             return x, 2, 1.0, 0, np.array([])
+    # QQR decomposition
     Q, R = qqr(ws.vt)
     ws.v[:, :, t3p] = Q
     ws.beta[:, :, t3p] = R
-    col_norms = np.sqrt(np.einsum("ij,ij->j", Q.conj(), Q).real)
-    ws.omega[:, :, t3p] = np.diag(col_norms)
-    np.matmul(ws.omega[:, :, t3p], ws.beta[:, :, t3p], out=ws.taot)
+    # Compute omega - standard norm WITH conjugation (Hermitian norm)
+    # Fortran: omega(i,i,t3p)=sqrt(sum(conjg(v(:,i,t3p))*v(:,i,t3p)))
+    ws.omega[:, :, t3p].fill(0)
+    if is_complex_input:
+        np.fill_diagonal(
+            ws.omega[:, :, t3p],
+            np.sqrt(
+                np.einsum("ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]).real
+            ),
+        )
+    else:
+        np.fill_diagonal(
+            ws.omega[:, :, t3p],
+            np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
+        )
+    # taut = omega * beta
+    ws.taot[:] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
     isquasires = not residual
     if isquasires:
-        Qres0 = np.sqrt(np.einsum("ij,ij->j", ws.taot.conj(), ws.taot).real).max()
+        # Fortran: Qres0=maxval(sqrt(sum(abs(conjg(taut)*taut),1))) for complex
+        if is_complex_input:
+            Qres0 = np.max(
+                np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
+            )
+        else:
+            Qres0 = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
     else:
-        omegat = Q @ np.diag(1.0 / (col_norms + 1e-16))
-        Qres0 = np.sqrt(np.einsum("ij,ij->j", ws.vt.conj(), ws.vt).real).max()
+        omegat = np.zeros((n, m), dtype=dtype)
+        for i in range(m):
+            if np.abs(ws.omega[i, i, t3p]) > 1e-14:
+                omegat[:, i] = ws.v[:, i, t3p] / ws.omega[i, i, t3p]
+        if is_complex_input:
+            Qres0 = np.max(np.sqrt(np.sum(np.abs(np.conj(ws.vt) * ws.vt), axis=0)))
+        else:
+            Qres0 = np.max(np.sqrt(np.sum(ws.vt * ws.vt, axis=0)))
     if Qres0 < 1e-16:
         result = x.real if not is_complex_input else x
         return result, 0, 0.0, 0, np.array([0.0])
-    flag, resv, Qres1, relres, iter_count = 1, np.zeros(maxiter), None, 1.0, 0
-    omegat = None if isquasires else Q @ np.diag(1.0 / (col_norms + 1e-16))
+    flag, resv, Qres1, relres, iter_count = 1, np.zeros(maxiter), -1.0, 1.0, 0
     for k in range(1, maxiter + 1):
-        t3, t3n, t3p, t3nn = k % 3, (k - 1) % 3, (k + 1) % 3, (k - 2) % 3
+        # Index cycling
+        t3 = k % 3
+        t3p = (k + 1) % 3
+        t3n = (k - 1) % 3
+        t3nn = (k - 2) % 3
+        # tmp = A * v(:,:,t3)
         if A_is_sparse:
             ws.Av[:] = A @ ws.v[:, :, t3]
         else:
             np.matmul(A, ws.v[:, :, t3], out=ws.Av)
-        if precond is not None:
+        # Apply preconditioner
+        if use_split_precond:
+            # Split preconditioning: M1⁻¹ * A * M2⁻¹ * v
+            tmp = precond_M2.solve(ws.v[:, :, t3])  # M2⁻¹ * v
+            if A_is_sparse:
+                tmp = A @ tmp  # A * M2⁻¹ * v
+            else:
+                tmp = np.matmul(A, tmp)
+            ws.vt[:] = precond_M1.solve(tmp) - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
+        elif precond is not None:
+            # Left-only preconditioning: M⁻¹ * A * v
             precond.solve(ws.Av, out=ws.vt)
-            ws.vt -= ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
+            ws.vt[:] = ws.vt - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
         else:
-            np.matmul(ws.v[:, :, t3n], ws.beta[:, :, t3].T, out=ws.vt)
-            np.subtract(ws.Av, ws.vt, out=ws.vt)
+            ws.vt[:] = ws.Av - ws.v[:, :, t3n] @ ws.beta[:, :, t3].T
-        np.matmul(ws.v[:, :, t3].T, ws.vt, out=ws.alpha)
-        ws.vt -= ws.v[:, :, t3] @ ws.alpha
+        # alpha = v^T * vt (transpose, not conjugate transpose)
+        ws.alpha[:] = ws.v[:, :, t3].T @ ws.vt
+        ws.vt[:] = ws.vt - ws.v[:, :, t3] @ ws.alpha
+        # QQR decomposition
         Q, R = qqr(ws.vt)
         ws.v[:, :, t3p] = Q
         ws.beta[:, :, t3p] = R
-        col_norms = np.sqrt(np.einsum("ij,ij->j", Q.conj(), Q).real)
-        ws.omega[:, :, t3p] = np.diag(col_norms)
-        np.matmul(ws.omega[:, :, t3n], ws.beta[:, :, t3].T, out=ws.tmp0)
-        np.matmul(ws.Qb[:, :, t3nn], ws.tmp0, out=ws.theta)
-        np.matmul(ws.Qd[:, :, t3nn], ws.tmp0, out=ws.tmp1)
-        np.matmul(ws.omega[:, :, t3], ws.alpha, out=ws.tmp2)
-        np.matmul(ws.Qa[:, :, t3n], ws.tmp1, out=ws.eta)
-        ws.eta += ws.Qb[:, :, t3n] @ ws.tmp2
+        # Compute omega (standard Hermitian norm)
+        ws.omega[:, :, t3p].fill(0)
+        if is_complex_input:
+            np.fill_diagonal(
+                ws.omega[:, :, t3p],
+                np.sqrt(
+                    np.einsum(
+                        "ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]
+                    ).real
+                ),
+            )
+        else:
+            np.fill_diagonal(
+                ws.omega[:, :, t3p],
+                np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
+            )
-        np.matmul(ws.Qc[:, :, t3n], ws.tmp1, out=ws.zetat)
-        ws.zetat += ws.Qd[:, :, t3n] @ ws.tmp2
+        # Compute intermediate matrices
+        ws.tmp0[:] = ws.omega[:, :, t3n] @ ws.beta[:, :, t3].T
+        ws.theta[:] = ws.Qb[:, :, t3nn] @ ws.tmp0
+        ws.tmp1[:] = ws.Qd[:, :, t3nn] @ ws.tmp0
+        ws.tmp2[:] = ws.omega[:, :, t3] @ ws.alpha
+        ws.eta[:] = ws.Qa[:, :, t3n] @ ws.tmp1 + ws.Qb[:, :, t3n] @ ws.tmp2
+        ws.zetat[:] = ws.Qc[:, :, t3n] @ ws.tmp1 + ws.Qd[:, :, t3n] @ ws.tmp2
+        # Build ZZ matrix and do standard QR
         ws.stacked[:m, :] = ws.zetat
-        np.matmul(ws.omega[:, :, t3p], ws.beta[:, :, t3p], out=ws.stacked[m:, :])
+        ws.stacked[m:, :] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
         QQ, zeta_full = np.linalg.qr(ws.stacked, mode="complete")
         ws.zeta[:] = zeta_full[:m, :]
-        ws.QQ_full[:] = QQ.conj().T
+        if is_complex_input:
+            ws.QQ_full[:] = np.conj(QQ.T)
+        else:
+            ws.QQ_full[:] = QQ.T
         ws.Qa[:, :, t3] = ws.QQ_full[:m, :m]
         ws.Qb[:, :, t3] = ws.QQ_full[:m, m : 2 * m]
         ws.Qc[:, :, t3] = ws.QQ_full[m : 2 * m, :m]
         ws.Qd[:, :, t3] = ws.QQ_full[m : 2 * m, m : 2 * m]
+        # Invert zeta
         try:
             zeta_inv = np.linalg.inv(ws.zeta)
         except np.linalg.LinAlgError:
             zeta_inv = np.linalg.pinv(ws.zeta)
+        # Update p, tau, x, taut
         ws.p[:, :, t3] = (
             ws.v[:, :, t3] - ws.p[:, :, t3n] @ ws.eta - ws.p[:, :, t3nn] @ ws.theta
         ) @ zeta_inv
+        ws.tau[:] = ws.Qa[:, :, t3] @ ws.taot
+        x[:] = x + ws.p[:, :, t3] @ ws.tau
+        ws.taot[:] = ws.Qc[:, :, t3] @ ws.taot
-        np.matmul(ws.Qa[:, :, t3], ws.taot, out=ws.tau)
-        x += ws.p[:, :, t3] @ ws.tau
-        taot_copy = ws.taot.copy()
-        np.matmul(ws.Qc[:, :, t3], taot_copy, out=ws.taot)
+        # Compute residual
         if isquasires:
-            Qres = np.sqrt(np.einsum("ij,ij->j", ws.taot.conj(), ws.taot).real).max()
+            if is_complex_input:
+                Qres = np.max(
+                    np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
+                )
+            else:
+                Qres = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
         else:
-            omega_diag_inv = np.diag(1.0 / (col_norms + 1e-16))
-            omegat = (
-                omegat @ ws.Qc[:, :, t3].conj().T
-                + ws.v[:, :, t3p] @ (ws.Qd[:, :, t3] @ omega_diag_inv).conj().T
-            )
-            R_resid = omegat @ ws.taot
-            Qres = np.sqrt(np.einsum("ij,ij->j", R_resid.conj(), R_resid).real).max()
+            tmp0_diag = np.zeros((m, m), dtype=dtype)
+            for i in range(m):
+                if np.abs(ws.omega[i, i, t3p]) > 1e-14:
+                    tmp0_diag[i, :] = ws.Qd[:, i, t3] / ws.omega[i, i, t3p]
+            if is_complex_input:
+                omegat = omegat @ np.conj(ws.Qc[:, :, t3].T) + ws.v[
+                    :, :, t3p
+                ] @ np.conj(tmp0_diag)
+                tmp_res = np.conj(omegat @ ws.taot)
+                Qres = np.max(
+                    np.sqrt(np.sum(np.abs(np.conj(tmp_res) * tmp_res), axis=0))
+                )
+            else:
+                omegat = omegat @ ws.Qc[:, :, t3].T + ws.v[:, :, t3p] @ tmp0_diag
+                tmp_res = omegat @ ws.taot
+                Qres = np.max(np.sqrt(np.sum(tmp_res * tmp_res, axis=0)))
         resv[k - 1] = Qres
-        if Qres1 is not None and Qres == Qres1:
+        if k > 1 and abs(Qres - Qres1) < np.finfo(dtype).eps:
             flag, iter_count = 3, k
             break
@@ -602,6 +926,11 @@ def _blqmr_python_impl(
             break
     resv = resv[:iter_count]
+    # For split preconditioning, recover x = M2⁻¹ * y
+    if use_split_precond:
+        x = precond_M2.solve(x)
     result = x.real if not is_complex_input else x
     return result, flag, relres, iter_count, resv
@@ -621,7 +950,7 @@ def blqmr_solve(
     tol: float = 1e-6,
     maxiter: Optional[int] = None,
     droptol: float = 0.001,
-    use_precond: bool = True,
+    precond_type: PrecondType = "ilu",
     zero_based: bool = True,
 ) -> BLQMRResult:
     """
@@ -647,8 +976,12 @@ def blqmr_solve(
         Maximum iterations. Default is n.
     droptol : float, default 0.001
         Drop tolerance for ILU preconditioner (Fortran only).
-    use_precond : bool, default True
-        Whether to use ILU preconditioning.
+    precond_type : None, '', or str, default 'ilu'
+        Preconditioner type:
+        - None or '': No preconditioning
+        - 'ilu', 'ilu0', 'ilut': Incomplete LU
+        - 'diag', 'jacobi': Diagonal (Jacobi)
+        - For Fortran: integers 2 (ILU) or 3 (diagonal) also accepted
     zero_based : bool, default True
         If True, Ap and Ai use 0-based indexing (Python/C convention).
         If False, uses 1-based indexing (Fortran convention).
@@ -673,7 +1006,7 @@ def blqmr_solve(
             tol=tol,
             maxiter=maxiter,
             droptol=droptol,
-            use_precond=use_precond,
+            precond_type=precond_type,
             zero_based=zero_based,
         )
     else:
@@ -685,13 +1018,13 @@ def blqmr_solve(
             x0=x0,
             tol=tol,
             maxiter=maxiter,
-            use_precond=use_precond,
+            precond_type=precond_type,
             zero_based=zero_based,
         )
 def _blqmr_solve_fortran(
-    Ap, Ai, Ax, b, *, x0, tol, maxiter, droptol, use_precond, zero_based
+    Ap, Ai, Ax, b, *, x0, tol, maxiter, droptol, precond_type, zero_based
 ) -> BLQMRResult:
     """Fortran backend for blqmr_solve."""
     n = len(Ap) - 1
@@ -711,10 +1044,10 @@ def _blqmr_solve_fortran(
         Ap = Ap + 1
         Ai = Ai + 1
-    dopcond = 1 if use_precond else 0
+    pcond_type = _parse_precond_type_for_fortran(precond_type)
     x, flag, niter, relres = _blqmr.blqmr_solve_real(
-        n, nnz, Ap, Ai, Ax, b, maxiter, tol, droptol, dopcond
+        n, nnz, Ap, Ai, Ax, b, maxiter, tol, droptol, pcond_type
     )
     return BLQMRResult(
@@ -723,7 +1056,7 @@ def _blqmr_solve_fortran(
 def _blqmr_solve_native_csc(
-    Ap, Ai, Ax, b, *, x0, tol, maxiter, use_precond, zero_based
+    Ap, Ai, Ax, b, *, x0, tol, maxiter, precond_type, zero_based
 ) -> BLQMRResult:
     """Native Python backend for blqmr_solve with CSC input."""
     n = len(Ap) - 1
@@ -734,12 +1067,7 @@ def _blqmr_solve_native_csc(
     A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
-    M1 = None
-    if use_precond:
-        try:
-            M1 = make_preconditioner(A, "ilu")
-        except Exception:
-            M1 = make_preconditioner(A, "diag")
+    M1 = _get_preconditioner_for_native(A, precond_type, None)
     x, flag, relres, niter, resv = _blqmr_python_impl(
         A, b, tol=tol, maxiter=maxiter, M1=M1, x0=x0
@@ -760,13 +1088,18 @@ def blqmr_solve_multi(
     tol: float = 1e-6,
     maxiter: Optional[int] = None,
     droptol: float = 0.001,
-    use_precond: bool = True,
+    precond_type: PrecondType = "ilu",
     zero_based: bool = True,
 ) -> BLQMRResult:
     """
     Solve sparse linear system AX = B with multiple right-hand sides.
     Uses Fortran extension if available, otherwise falls back to pure Python.
+    Parameters
+    ----------
+    precond_type : None, '', or str, default 'ilu'
+        Preconditioner type (see blqmr_solve for details)
     """
     n = len(Ap) - 1
@@ -782,7 +1115,7 @@ def blqmr_solve_multi(
             tol=tol,
             maxiter=maxiter,
             droptol=droptol,
-            use_precond=use_precond,
+            precond_type=precond_type,
             zero_based=zero_based,
         )
     else:
@@ -793,13 +1126,13 @@ def blqmr_solve_multi(
             B,
             tol=tol,
             maxiter=maxiter,
-            use_precond=use_precond,
+            precond_type=precond_type,
             zero_based=zero_based,
         )
 def _blqmr_solve_multi_fortran(
-    Ap, Ai, Ax, B, *, tol, maxiter, droptol, use_precond, zero_based
+    Ap, Ai, Ax, B, *, tol, maxiter, droptol, precond_type, zero_based
 ) -> BLQMRResult:
     """Fortran backend for blqmr_solve_multi."""
     n = len(Ap) - 1
@@ -818,10 +1151,11 @@ def _blqmr_solve_multi_fortran(
         Ap = Ap + 1
         Ai = Ai + 1
-    dopcond = 1 if use_precond else 0
+    # Convert precond_type string to Fortran integer code
+    pcond_type = _parse_precond_type_for_fortran(precond_type)
     X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
-        n, nnz, nrhs, Ap, Ai, Ax, B, maxiter, tol, droptol, dopcond
+        n, nnz, nrhs, Ap, Ai, Ax, B, maxiter, tol, droptol, pcond_type
     )
     return BLQMRResult(
@@ -830,7 +1164,7 @@ def _blqmr_solve_multi_fortran(
 def _blqmr_solve_multi_native(
-    Ap, Ai, Ax, B, *, tol, maxiter, use_precond, zero_based
+    Ap, Ai, Ax, B, *, tol, maxiter, precond_type, zero_based
 ) -> BLQMRResult:
     """Native Python backend for blqmr_solve_multi."""
     n = len(Ap) - 1
@@ -841,12 +1175,7 @@ def _blqmr_solve_multi_native(
     A = sparse.csc_matrix((Ax, Ai, Ap), shape=(n, n))
-    M1 = None
-    if use_precond:
-        try:
-            M1 = make_preconditioner(A, "ilu")
-        except Exception:
-            M1 = make_preconditioner(A, "diag")
+    M1 = _get_preconditioner_for_native(A, precond_type, None)
     if B.ndim == 1:
         B = B.reshape(-1, 1)
@@ -909,7 +1238,7 @@ def blqmr(
     residual: bool = False,
     workspace: Optional[BLQMRWorkspace] = None,
     droptol: float = 0.001,
-    use_precond: bool = True,
+    precond_type: PrecondType = "ilu",
 ) -> BLQMRResult:
     """
     Block Quasi-Minimal-Residual (BL-QMR) solver - main interface.
@@ -925,9 +1254,10 @@ def blqmr(
     tol : float
         Convergence tolerance (default: 1e-6)
     maxiter : int, optional
-        Maximum iterations (default: n for Fortran, min(n, 20) for Python)
+        Maximum iterations (default: n)
     M1, M2 : preconditioner, optional
-        Preconditioner M = M1 @ M2 (Python backend only)
+        Custom preconditioners. If provided, precond_type is ignored.
+        M = M1 @ M2 for split preconditioning (Python backend only)
     x0 : ndarray, optional
         Initial guess
     residual : bool
@@ -936,8 +1266,13 @@ def blqmr(
         Pre-allocated workspace (Python backend only)
     droptol : float, default 0.001
         Drop tolerance for ILU preconditioner (Fortran backend only)
-    use_precond : bool, default True
-        Whether to use ILU preconditioning (Fortran backend only)
+    precond_type : None, '', or str, default 'ilu'
+        Preconditioner type (ignored if M1 is provided):
+        - None or '': No preconditioning
+        - 'ilu', 'ilu0', 'ilut': Incomplete LU
+        - 'diag', 'jacobi': Diagonal (Jacobi)
+        - 'lu': Full LU (expensive, for debugging)
+        - For Fortran: integers 2 (ILU) or 3 (diagonal) also accepted
     Returns
     -------
@@ -957,7 +1292,7 @@ def blqmr(
             maxiter=maxiter,
             x0=x0,
             droptol=droptol,
-            use_precond=use_precond,
+            precond_type=precond_type,
         )
     else:
         return _blqmr_native(
@@ -970,7 +1305,7 @@ def blqmr(
             x0=x0,
             residual=residual,
             workspace=workspace,
-            use_precond=use_precond,
+            precond_type=precond_type,
         )
@@ -982,44 +1317,79 @@ def _blqmr_fortran(
     maxiter: Optional[int],
     x0: Optional[np.ndarray],
     droptol: float,
-    use_precond: bool,
+    precond_type: PrecondType,
 ) -> BLQMRResult:
     """Fortran backend for blqmr()."""
     A_csc = sparse.csc_matrix(A)
+    # CRITICAL: Sort indices for UMFPACK compatibility
+    if not A_csc.has_sorted_indices:
+        A_csc.sort_indices()
     Ap = A_csc.indptr.astype(np.int32)
     Ai = A_csc.indices.astype(np.int32)
-    Ax = A_csc.data.astype(np.float64)
     n = A_csc.shape[0]
-    nnz = len(Ax)
+    nnz = A_csc.nnz
     if maxiter is None:
         maxiter = n
-    # Convert to Fortran format
-    Ap_f = np.asfortranarray(Ap + 1, dtype=np.int32)  # 1-based
-    Ai_f = np.asfortranarray(Ai + 1, dtype=np.int32)  # 1-based
-    Ax_f = np.asfortranarray(Ax, dtype=np.float64)
+    # Convert to Fortran format (1-based indexing)
+    Ap_f = np.asfortranarray(Ap + 1, dtype=np.int32)
+    Ai_f = np.asfortranarray(Ai + 1, dtype=np.int32)
-    dopcond = 1 if use_precond else 0
+    pcond_type = _parse_precond_type_for_fortran(precond_type)
-    if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
-        b = np.asfortranarray(B.ravel(), dtype=np.float64)
-        x, flag, niter, relres = _blqmr.blqmr_solve_real(
-            n, nnz, Ap_f, Ai_f, Ax_f, b, maxiter, tol, droptol, dopcond
-        )
-        return BLQMRResult(
-            x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
-        )
+    # Check if complex
+    is_complex = np.iscomplexobj(A) or np.iscomplexobj(B)
+    if is_complex:
+        # Complex path
+        Ax_f = np.asfortranarray(A_csc.data, dtype=np.complex128)
+        if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
+            # Single RHS
+            b_f = np.asfortranarray(B.ravel(), dtype=np.complex128)
+            x, flag, niter, relres = _blqmr.blqmr_solve_complex(
+                n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, pcond_type
+            )
+            return BLQMRResult(
+                x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
+            )
+        else:
+            # Multiple RHS - use block method
+            B_f = np.asfortranarray(B, dtype=np.complex128)
+            nrhs = B_f.shape[1]
+            X, flag, niter, relres = _blqmr.blqmr_solve_complex_multi(
+                n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, pcond_type
+            )
+            return BLQMRResult(
+                x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
+            )
     else:
-        B_f = np.asfortranarray(B, dtype=np.float64)
-        nrhs = B_f.shape[1]
-        X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
-            n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, dopcond
-        )
-        return BLQMRResult(
-            x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
-        )
+        # Real path
+        Ax_f = np.asfortranarray(A_csc.data, dtype=np.float64)
+        if B.ndim == 1 or (B.ndim == 2 and B.shape[1] == 1):
+            # Single RHS
+            b_f = np.asfortranarray(B.ravel(), dtype=np.float64)
+            x, flag, niter, relres = _blqmr.blqmr_solve_real(
+                n, nnz, Ap_f, Ai_f, Ax_f, b_f, maxiter, tol, droptol, pcond_type
+            )
+            return BLQMRResult(
+                x=x.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
+            )
+        else:
+            # Multiple RHS - use block method
+            B_f = np.asfortranarray(B, dtype=np.float64)
+            nrhs = B_f.shape[1]
+            X, flag, niter, relres = _blqmr.blqmr_solve_real_multi(
+                n, nnz, nrhs, Ap_f, Ai_f, Ax_f, B_f, maxiter, tol, droptol, pcond_type
+            )
+            return BLQMRResult(
+                x=X.copy(), flag=int(flag), iter=int(niter), relres=float(relres)
+            )
 def _blqmr_native(
@@ -1033,16 +1403,13 @@ def _blqmr_native(
     x0: Optional[np.ndarray],
     residual: bool,
     workspace: Optional[BLQMRWorkspace],
-    use_precond: bool,
+    precond_type: PrecondType,
 ) -> BLQMRResult:
     """Native Python backend for blqmr()."""
-    # Auto-create preconditioner if requested and not provided
-    if use_precond and M1 is None:
+    # Get preconditioner (user-provided M1 takes precedence)
+    if M1 is None:
         A_sp = sparse.csc_matrix(A) if not sparse.issparse(A) else A
-        try:
-            M1 = make_preconditioner(A_sp, "ilu")
-        except Exception:
-            M1 = make_preconditioner(A_sp, "diag")
+        M1 = _get_preconditioner_for_native(A_sp, precond_type, None)
     x, flag, relres, niter, resv = _blqmr_python_impl(
         A,

blocksolver 0.8.1__py3-none-any.whl → 0.8.5__py3-none-any.whl

blocksolver 0.8.1py3-none-any.whl → 0.8.5py3-none-any.whl