blocksolver 0.8.4__cp313-cp313-win_amd64.whl → 0.8.5__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
blocksolver/__init__.py CHANGED
@@ -39,7 +39,7 @@ from .blqmr import (
39
39
  HAS_NUMBA,
40
40
  )
41
41
 
42
- __version__ = "0.8.4"
42
+ __version__ = "0.8.5"
43
43
  __author__ = "Qianqian Fang"
44
44
 
45
45
  __all__ = [
Binary file
blocksolver/blqmr.py CHANGED
@@ -751,12 +751,19 @@ def _blqmr_python_impl(
751
751
 
752
752
  # Compute omega - standard norm WITH conjugation (Hermitian norm)
753
753
  # Fortran: omega(i,i,t3p)=sqrt(sum(conjg(v(:,i,t3p))*v(:,i,t3p)))
754
- for i in range(m):
755
- col = ws.v[:, i, t3p]
756
- if is_complex_input:
757
- ws.omega[i, i, t3p] = np.sqrt(np.sum(np.conj(col) * col).real)
758
- else:
759
- ws.omega[i, i, t3p] = np.sqrt(np.sum(col * col))
754
+ ws.omega[:, :, t3p].fill(0)
755
+ if is_complex_input:
756
+ np.fill_diagonal(
757
+ ws.omega[:, :, t3p],
758
+ np.sqrt(
759
+ np.einsum("ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]).real
760
+ ),
761
+ )
762
+ else:
763
+ np.fill_diagonal(
764
+ ws.omega[:, :, t3p],
765
+ np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
766
+ )
760
767
 
761
768
  # taut = omega * beta
762
769
  ws.taot[:] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
@@ -765,9 +772,11 @@ def _blqmr_python_impl(
765
772
  if isquasires:
766
773
  # Fortran: Qres0=maxval(sqrt(sum(abs(conjg(taut)*taut),1))) for complex
767
774
  if is_complex_input:
768
- Qres0 = np.max(np.sqrt(np.sum(np.abs(np.conj(ws.taot) * ws.taot), axis=0)))
775
+ Qres0 = np.max(
776
+ np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
777
+ )
769
778
  else:
770
- Qres0 = np.max(np.sqrt(np.sum(ws.taot * ws.taot, axis=0)))
779
+ Qres0 = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
771
780
  else:
772
781
  omegat = np.zeros((n, m), dtype=dtype)
773
782
  for i in range(m):
@@ -823,12 +832,21 @@ def _blqmr_python_impl(
823
832
  ws.beta[:, :, t3p] = R
824
833
 
825
834
  # Compute omega (standard Hermitian norm)
826
- for i in range(m):
827
- col = ws.v[:, i, t3p]
828
- if is_complex_input:
829
- ws.omega[i, i, t3p] = np.sqrt(np.sum(np.conj(col) * col).real)
830
- else:
831
- ws.omega[i, i, t3p] = np.sqrt(np.sum(col * col))
835
+ ws.omega[:, :, t3p].fill(0)
836
+ if is_complex_input:
837
+ np.fill_diagonal(
838
+ ws.omega[:, :, t3p],
839
+ np.sqrt(
840
+ np.einsum(
841
+ "ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]
842
+ ).real
843
+ ),
844
+ )
845
+ else:
846
+ np.fill_diagonal(
847
+ ws.omega[:, :, t3p],
848
+ np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
849
+ )
832
850
 
833
851
  # Compute intermediate matrices
834
852
  ws.tmp0[:] = ws.omega[:, :, t3n] @ ws.beta[:, :, t3].T
@@ -873,10 +891,10 @@ def _blqmr_python_impl(
873
891
  if isquasires:
874
892
  if is_complex_input:
875
893
  Qres = np.max(
876
- np.sqrt(np.sum(np.abs(np.conj(ws.taot) * ws.taot), axis=0))
894
+ np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
877
895
  )
878
896
  else:
879
- Qres = np.max(np.sqrt(np.sum(ws.taot * ws.taot, axis=0)))
897
+ Qres = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
880
898
  else:
881
899
  tmp0_diag = np.zeros((m, m), dtype=dtype)
882
900
  for i in range(m):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: blocksolver
3
- Version: 0.8.4
3
+ Version: 0.8.5
4
4
  Summary: Block Quasi-Minimal-Residual sparse linear solver
5
5
  Keywords: sparse,linear-algebra,iterative-solver,qmr,fortran,umfpack
6
6
  Author-Email: Qianqian Fang <q.fang@neu.edu>
@@ -49,7 +49,7 @@ Description-Content-Type: text/markdown
49
49
  - **Block QMR Algorithm**: Efficiently solves multiple right-hand sides simultaneously
50
50
  - **Complex Symmetric Support**: Designed for complex symmetric matrices (A = Aᵀ, not A = A†)
51
51
  - **Dual Backend**: Fortran extension for speed, Python fallback for portability
52
- - **ILU Preconditioning**: Built-in incomplete LU preconditioner for faster convergence
52
+ - **Flexible Preconditioning**: ILU, diagonal (Jacobi), and split preconditioners
53
53
  - **SciPy Integration**: Works seamlessly with SciPy sparse matrices
54
54
  - **Optional Numba Acceleration**: JIT-compiled kernels for the Python backend
55
55
 
@@ -71,7 +71,7 @@ The BLQMR algorithm is an iterative Krylov subspace method specifically designed
71
71
 
72
72
  - **Three-term Lanczos Recurrence**: Builds an orthonormal basis for the Krylov subspace with short recurrences, minimizing memory usage.
73
73
 
74
- - **Block Updates**: Processes m right-hand sides simultaneously, with typical block sizes of 1-16.
74
+ - **Block Updates**: Processes m right-hand sides simultaneously, with typical block sizes of 1-64.
75
75
 
76
76
  ### When to Use BLQMR
77
77
 
@@ -155,9 +155,9 @@ result = blqmr(A, b)
155
155
 
156
156
  # With options
157
157
  result = blqmr(A, b,
158
- tol=1e-8, # Convergence tolerance
159
- maxiter=1000, # Maximum iterations
160
- use_precond=True, # Use ILU preconditioning
158
+ tol=1e-8, # Convergence tolerance
159
+ maxiter=1000, # Maximum iterations
160
+ precond_type='ilu', # Preconditioner: 'ilu', 'diag', or None
161
161
  )
162
162
  ```
163
163
 
@@ -190,24 +190,29 @@ from blocksolver import blqmr
190
190
  A = create_helmholtz_matrix(frequency=1000) # Your application
191
191
  b = np.complex128(source_term)
192
192
 
193
- result = blqmr(A, b, tol=1e-8)
193
+ result = blqmr(A, b, tol=1e-8, precond_type='diag')
194
194
  ```
195
195
 
196
- ### Custom Preconditioning
196
+ ### Preconditioning
197
197
 
198
- For the Python backend, you can provide custom preconditioners:
198
+ BlockSolver supports multiple preconditioner types for both backends:
199
199
 
200
200
  ```python
201
201
  from blocksolver import blqmr, make_preconditioner
202
202
 
203
- # Create ILU preconditioner
204
- M1 = make_preconditioner(A, 'ilu')
203
+ # Using precond_type parameter (works with both backends)
204
+ result = blqmr(A, b, precond_type='ilu') # Incomplete LU
205
+ result = blqmr(A, b, precond_type='diag') # Diagonal (Jacobi)
206
+ result = blqmr(A, b, precond_type=None) # No preconditioning
205
207
 
206
- # Or diagonal (Jacobi) preconditioner
207
- M1 = make_preconditioner(A, 'diag')
208
+ # Custom preconditioner (Python backend only)
209
+ M1 = make_preconditioner(A, 'ilu', drop_tol=1e-4, fill_factor=10)
210
+ result = blqmr(A, b, M1=M1, precond_type=None)
208
211
 
209
- # Solve with custom preconditioner
210
- result = blqmr(A, b, M1=M1, use_precond=False)
212
+ # Split preconditioning for symmetric systems (Python backend)
213
+ # Preserves symmetry: M1^{-1} A M2^{-1}
214
+ M = make_preconditioner(A, 'diag', split=True) # Returns sqrt(D)
215
+ result = blqmr(A, b, M1=M, M2=M, precond_type=None)
211
216
  ```
212
217
 
213
218
  ### SciPy-Compatible Interface
@@ -236,9 +241,9 @@ b = np.array([8., 45., -3., 3., 19.])
236
241
 
237
242
  result = blqmr_solve(Ap, Ai, Ax, b,
238
243
  tol=1e-8,
239
- droptol=0.001, # ILU drop tolerance (Fortran only)
240
- use_precond=True,
241
- zero_based=True, # 0-based indexing (default)
244
+ droptol=0.001, # ILU drop tolerance (Fortran backend only)
245
+ precond_type='ilu', # Preconditioner type
246
+ zero_based=True, # 0-based indexing (default)
242
247
  )
243
248
  ```
244
249
 
@@ -257,7 +262,7 @@ Main solver interface.
257
262
  | `maxiter` | int | n | Maximum iterations |
258
263
  | `M1`, `M2` | preconditioner | None | Custom preconditioners (Python backend) |
259
264
  | `x0` | ndarray | None | Initial guess |
260
- | `use_precond` | bool | True | Use ILU preconditioning |
265
+ | `precond_type` | str or None | 'ilu' | Preconditioner: 'ilu', 'diag', or None |
261
266
  | `droptol` | float | 0.001 | ILU drop tolerance (Fortran backend) |
262
267
  | `residual` | bool | False | Use true residual for convergence (Python) |
263
268
  | `workspace` | BLQMRWorkspace | None | Pre-allocated workspace (Python) |
@@ -274,21 +279,29 @@ Main solver interface.
274
279
 
275
280
  ### `blqmr_solve(Ap, Ai, Ax, b, **kwargs) -> BLQMRResult`
276
281
 
277
- Low-level CSC interface.
282
+ Low-level CSC interface for single RHS.
278
283
 
279
284
  ### `blqmr_solve_multi(Ap, Ai, Ax, B, **kwargs) -> BLQMRResult`
280
285
 
281
- Multiple right-hand sides with CSC input.
286
+ Low-level CSC interface for multiple right-hand sides.
282
287
 
283
288
  ### `blqmr_scipy(A, b, **kwargs) -> Tuple[ndarray, int]`
284
289
 
285
290
  SciPy-compatible interface returning `(x, flag)`.
286
291
 
287
- ### `make_preconditioner(A, type) -> Preconditioner`
292
+ ### `make_preconditioner(A, precond_type, **kwargs) -> Preconditioner`
288
293
 
289
294
  Create a preconditioner for the Python backend.
290
295
 
291
- **Types:** `'diag'`/`'jacobi'`, `'ilu'`/`'ilu0'`, `'ssor'`
296
+ **Parameters:**
297
+ | Parameter | Type | Default | Description |
298
+ |-----------|------|---------|-------------|
299
+ | `A` | sparse matrix | required | System matrix |
300
+ | `precond_type` | str | required | 'diag', 'jacobi', 'ilu', 'ilu0', 'ilut', 'lu', 'ssor' |
301
+ | `split` | bool | False | Return sqrt(D) for split preconditioning |
302
+ | `drop_tol` | float | 1e-4 | Drop tolerance for ILUT |
303
+ | `fill_factor` | float | 10 | Fill factor for ILUT |
304
+ | `omega` | float | 1.0 | Relaxation parameter for SSOR |
292
305
 
293
306
  ### Utility Functions
294
307
 
@@ -301,13 +314,44 @@ from blocksolver import (
301
314
  )
302
315
  ```
303
316
 
317
+ ## Benchmarks
318
+
319
+ ### BLQMR vs Direct Solver (mldivide)
320
+
321
+ Complex symmetric FEM matrices, 4 right-hand sides, tolerance 10⁻⁸, split Jacobi preconditioner:
322
+
323
+ | Grid | Nodes | NNZ | mldivide | BLQMR | Speedup |
324
+ |------|-------|-----|----------|-------|---------|
325
+ | 20³ | 8,000 | 110K | 135ms | 115ms | **1.2×** |
326
+ | 30³ | 27,000 | 384K | 1.36s | 373ms | **3.6×** |
327
+ | 40³ | 64,000 | 922K | 6.40s | 947ms | **6.8×** |
328
+ | 50³ | 125,000 | 1.8M | 25.9s | 1.76s | **14.7×** |
329
+
330
+ ### Block Size Efficiency
331
+
332
+ With 64 RHS on a 8,000-node complex symmetric system:
333
+
334
+ | Block Size | Iterations | Speedup vs Single |
335
+ |------------|------------|-------------------|
336
+ | 1 (point) | 10,154 | 1.0× |
337
+ | 4 | 2,220 | 1.8× |
338
+ | 8 | 956 | 2.0× |
339
+ | 16 | 361 | 2.1× |
340
+ | 32 | 178 | 2.2× |
341
+
342
+ **Optimal block size**: 8-16 for most problems. Larger blocks have diminishing returns due to increased per-iteration cost.
343
+
344
+ ### Iteration Efficiency
345
+
346
+ With 4 RHS, BLQMR uses only ~24% of total iterations compared to 4 separate single-RHS solves — achieving **super-linear block acceleration**.
347
+
304
348
  ## Performance Tips
305
349
 
306
- 1. **Use the Fortran backend** when available (10-100× faster than Python)
350
+ 1. **Use the Fortran backend** when available (faster for large systems)
307
351
 
308
352
  2. **Enable preconditioning** for ill-conditioned systems:
309
353
  ```python
310
- result = blqmr(A, b, use_precond=True)
354
+ result = blqmr(A, b, precond_type='ilu')
311
355
  ```
312
356
 
313
357
  3. **Batch multiple right-hand sides** instead of solving one at a time:
@@ -328,11 +372,18 @@ from blocksolver import (
328
372
  5. **Reuse workspace** for repeated solves with the same dimensions:
329
373
  ```python
330
374
  from blocksolver import BLQMRWorkspace
331
- ws = BLQMRWorkspace(n, m)
375
+ ws = BLQMRWorkspace(n, m, dtype=np.complex128)
332
376
  for b in many_rhs:
333
377
  result = blqmr(A, b, workspace=ws)
334
378
  ```
335
379
 
380
+ 6. **Use split Jacobi for complex symmetric systems**:
381
+ ```python
382
+ # Preserves symmetry of preconditioned system
383
+ M = make_preconditioner(A, 'diag', split=True)
384
+ result = blqmr(A, b, M1=M, M2=M, precond_type=None)
385
+ ```
386
+
336
387
  ## Examples
337
388
 
338
389
  ### Diffuse Optical Tomography
@@ -360,10 +411,10 @@ def create_diffusion_matrix(nx, ny, D=1.0, mu_a=0.01, omega=1e9):
360
411
 
361
412
  # Setup problem
362
413
  A = create_diffusion_matrix(100, 100, omega=2*np.pi*100e6)
363
- sources = np.random.randn(10000, 16) # 16 source positions
414
+ sources = np.random.randn(10000, 16) + 0j # 16 source positions
364
415
 
365
416
  # Solve for all sources at once
366
- result = blqmr(A, sources, tol=1e-8)
417
+ result = blqmr(A, sources, tol=1e-8, precond_type='diag')
367
418
  print(f"Solved {sources.shape[1]} systems in {result.iter} iterations")
368
419
  ```
369
420
 
@@ -382,7 +433,7 @@ def solve_helmholtz(K, M, f, frequencies):
382
433
  for omega in frequencies:
383
434
  # A = K - ω²M (complex symmetric if K, M are symmetric)
384
435
  A = K - omega**2 * M
385
- result = blqmr(A, f, tol=1e-10)
436
+ result = blqmr(A, f, tol=1e-10, precond_type='diag')
386
437
  solutions.append(result.x)
387
438
  return np.array(solutions)
388
439
  ```
@@ -401,12 +452,28 @@ brew install gcc suite-sparse # macOS
401
452
  pip install --no-cache-dir blocksolver
402
453
  ```
403
454
 
455
+ ### Check backend status
456
+
457
+ ```python
458
+ from blocksolver import get_backend_info
459
+ print(get_backend_info())
460
+ # {'backend': 'binary', 'has_fortran': True, 'has_numba': True}
461
+ ```
462
+
404
463
  ### Slow convergence
405
464
 
406
- 1. Enable preconditioning: `use_precond=True`
465
+ 1. Enable preconditioning: `precond_type='ilu'` or `precond_type='diag'`
407
466
  2. Reduce ILU drop tolerance: `droptol=1e-4` (Fortran backend)
408
467
  3. Check matrix conditioning with `np.linalg.cond(A.toarray())`
409
468
 
469
+ ### ILU factorization fails
470
+
471
+ For indefinite or complex symmetric matrices, ILU may fail:
472
+ ```python
473
+ # Fall back to diagonal preconditioner
474
+ result = blqmr(A, b, precond_type='diag')
475
+ ```
476
+
410
477
  ### Memory issues with large systems
411
478
 
412
479
  1. Use the Fortran backend (more memory efficient)
@@ -415,7 +482,7 @@ pip install --no-cache-dir blocksolver
415
482
 
416
483
  ## License
417
484
 
418
- BSD-3-Clause / LGPL-3.0+ / GPL-3.0+ (tri-licensed)
485
+ BSD-3-Clause or GPL-3.0+ (dual-licensed)
419
486
 
420
487
  ## Citation
421
488
 
@@ -0,0 +1,7 @@
1
+ blocksolver-0.8.5.dist-info/METADATA,sha256=K7OUbJ-pEkHWX3Cvy9iz1_L1onkox-5T9YwOvCrCiSU,15985
2
+ blocksolver-0.8.5.dist-info/WHEEL,sha256=suq8ARrxbiI7iLH3BgK-82uzxQ-4Hm-m8w01oCokrtA,85
3
+ blocksolver/_blqmr.cp313-win_amd64.pyd,sha256=snMORylXiw5dR1yhm_BV86_S2XJa7EXhQ2MkO11e8NM,34368892
4
+ blocksolver/_blqmr.cp313-win_amd64.dll.a,sha256=zdAww8dqSmL_SMRpDRzryczF4CYvLLdaoxCoVAeOiaU,1706
5
+ blocksolver/__init__.py,sha256=PZV19qS5YQDWdAcqgD6g0wW7KRKrdv1JuLDKmrnC6Es,1982
6
+ blocksolver/blqmr.py,sha256=diRm-xD2-4r0W59WrRe-O26DHJgc32voCVQa0H5FCRk,46543
7
+ blocksolver-0.8.5.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- blocksolver-0.8.4.dist-info/METADATA,sha256=4s5MWTlfx-pnVrt5KaalrAVPmCflegDVOGiWnx5L23Y,13264
2
- blocksolver-0.8.4.dist-info/WHEEL,sha256=suq8ARrxbiI7iLH3BgK-82uzxQ-4Hm-m8w01oCokrtA,85
3
- blocksolver/_blqmr.cp313-win_amd64.pyd,sha256=a2pm7VIi67ok4vRc6YYurTKIO2kkJV4_mkNOtZrVVt0,34375165
4
- blocksolver/_blqmr.cp313-win_amd64.dll.a,sha256=zdAww8dqSmL_SMRpDRzryczF4CYvLLdaoxCoVAeOiaU,1706
5
- blocksolver/__init__.py,sha256=aC1Iq40CVJ_GhNLSnjnXaOeioHZV8hHw1yy349pF3VU,1982
6
- blocksolver/blqmr.py,sha256=mrJ_ze4lYxS6uV4XGF66f1NhcwhYg6p3OERWXOGaTTU,46057
7
- blocksolver-0.8.4.dist-info/RECORD,,