blocksolver 0.8.4__cp313-cp313-win_amd64.whl → 0.8.5__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blocksolver/__init__.py +1 -1
- blocksolver/_blqmr.cp313-win_amd64.pyd +0 -0
- blocksolver/blqmr.py +34 -16
- {blocksolver-0.8.4.dist-info → blocksolver-0.8.5.dist-info}/METADATA +98 -31
- blocksolver-0.8.5.dist-info/RECORD +7 -0
- blocksolver-0.8.4.dist-info/RECORD +0 -7
- {blocksolver-0.8.4.dist-info → blocksolver-0.8.5.dist-info}/WHEEL +0 -0
blocksolver/__init__.py
CHANGED
|
Binary file
|
blocksolver/blqmr.py
CHANGED
|
@@ -751,12 +751,19 @@ def _blqmr_python_impl(
|
|
|
751
751
|
|
|
752
752
|
# Compute omega - standard norm WITH conjugation (Hermitian norm)
|
|
753
753
|
# Fortran: omega(i,i,t3p)=sqrt(sum(conjg(v(:,i,t3p))*v(:,i,t3p)))
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
ws.omega[
|
|
758
|
-
|
|
759
|
-
|
|
754
|
+
ws.omega[:, :, t3p].fill(0)
|
|
755
|
+
if is_complex_input:
|
|
756
|
+
np.fill_diagonal(
|
|
757
|
+
ws.omega[:, :, t3p],
|
|
758
|
+
np.sqrt(
|
|
759
|
+
np.einsum("ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]).real
|
|
760
|
+
),
|
|
761
|
+
)
|
|
762
|
+
else:
|
|
763
|
+
np.fill_diagonal(
|
|
764
|
+
ws.omega[:, :, t3p],
|
|
765
|
+
np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
|
|
766
|
+
)
|
|
760
767
|
|
|
761
768
|
# taut = omega * beta
|
|
762
769
|
ws.taot[:] = ws.omega[:, :, t3p] @ ws.beta[:, :, t3p]
|
|
@@ -765,9 +772,11 @@ def _blqmr_python_impl(
|
|
|
765
772
|
if isquasires:
|
|
766
773
|
# Fortran: Qres0=maxval(sqrt(sum(abs(conjg(taut)*taut),1))) for complex
|
|
767
774
|
if is_complex_input:
|
|
768
|
-
Qres0 = np.max(
|
|
775
|
+
Qres0 = np.max(
|
|
776
|
+
np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
|
|
777
|
+
)
|
|
769
778
|
else:
|
|
770
|
-
Qres0 = np.max(np.sqrt(np.
|
|
779
|
+
Qres0 = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
|
|
771
780
|
else:
|
|
772
781
|
omegat = np.zeros((n, m), dtype=dtype)
|
|
773
782
|
for i in range(m):
|
|
@@ -823,12 +832,21 @@ def _blqmr_python_impl(
|
|
|
823
832
|
ws.beta[:, :, t3p] = R
|
|
824
833
|
|
|
825
834
|
# Compute omega (standard Hermitian norm)
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
ws.omega[
|
|
830
|
-
|
|
831
|
-
|
|
835
|
+
ws.omega[:, :, t3p].fill(0)
|
|
836
|
+
if is_complex_input:
|
|
837
|
+
np.fill_diagonal(
|
|
838
|
+
ws.omega[:, :, t3p],
|
|
839
|
+
np.sqrt(
|
|
840
|
+
np.einsum(
|
|
841
|
+
"ij,ij->j", np.conj(ws.v[:, :, t3p]), ws.v[:, :, t3p]
|
|
842
|
+
).real
|
|
843
|
+
),
|
|
844
|
+
)
|
|
845
|
+
else:
|
|
846
|
+
np.fill_diagonal(
|
|
847
|
+
ws.omega[:, :, t3p],
|
|
848
|
+
np.sqrt(np.einsum("ij,ij->j", ws.v[:, :, t3p], ws.v[:, :, t3p])),
|
|
849
|
+
)
|
|
832
850
|
|
|
833
851
|
# Compute intermediate matrices
|
|
834
852
|
ws.tmp0[:] = ws.omega[:, :, t3n] @ ws.beta[:, :, t3].T
|
|
@@ -873,10 +891,10 @@ def _blqmr_python_impl(
|
|
|
873
891
|
if isquasires:
|
|
874
892
|
if is_complex_input:
|
|
875
893
|
Qres = np.max(
|
|
876
|
-
np.sqrt(np.
|
|
894
|
+
np.sqrt(np.einsum("ij,ij->j", np.conj(ws.taot), ws.taot).real)
|
|
877
895
|
)
|
|
878
896
|
else:
|
|
879
|
-
Qres = np.max(np.sqrt(np.
|
|
897
|
+
Qres = np.max(np.sqrt(np.einsum("ij,ij->j", ws.taot, ws.taot)))
|
|
880
898
|
else:
|
|
881
899
|
tmp0_diag = np.zeros((m, m), dtype=dtype)
|
|
882
900
|
for i in range(m):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: blocksolver
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.5
|
|
4
4
|
Summary: Block Quasi-Minimal-Residual sparse linear solver
|
|
5
5
|
Keywords: sparse,linear-algebra,iterative-solver,qmr,fortran,umfpack
|
|
6
6
|
Author-Email: Qianqian Fang <q.fang@neu.edu>
|
|
@@ -49,7 +49,7 @@ Description-Content-Type: text/markdown
|
|
|
49
49
|
- **Block QMR Algorithm**: Efficiently solves multiple right-hand sides simultaneously
|
|
50
50
|
- **Complex Symmetric Support**: Designed for complex symmetric matrices (A = Aᵀ, not A = A†)
|
|
51
51
|
- **Dual Backend**: Fortran extension for speed, Python fallback for portability
|
|
52
|
-
- **
|
|
52
|
+
- **Flexible Preconditioning**: ILU, diagonal (Jacobi), and split preconditioners
|
|
53
53
|
- **SciPy Integration**: Works seamlessly with SciPy sparse matrices
|
|
54
54
|
- **Optional Numba Acceleration**: JIT-compiled kernels for the Python backend
|
|
55
55
|
|
|
@@ -71,7 +71,7 @@ The BLQMR algorithm is an iterative Krylov subspace method specifically designed
|
|
|
71
71
|
|
|
72
72
|
- **Three-term Lanczos Recurrence**: Builds an orthonormal basis for the Krylov subspace with short recurrences, minimizing memory usage.
|
|
73
73
|
|
|
74
|
-
- **Block Updates**: Processes m right-hand sides simultaneously, with typical block sizes of 1-
|
|
74
|
+
- **Block Updates**: Processes m right-hand sides simultaneously, with typical block sizes of 1-64.
|
|
75
75
|
|
|
76
76
|
### When to Use BLQMR
|
|
77
77
|
|
|
@@ -155,9 +155,9 @@ result = blqmr(A, b)
|
|
|
155
155
|
|
|
156
156
|
# With options
|
|
157
157
|
result = blqmr(A, b,
|
|
158
|
-
tol=1e-8,
|
|
159
|
-
maxiter=1000,
|
|
160
|
-
|
|
158
|
+
tol=1e-8, # Convergence tolerance
|
|
159
|
+
maxiter=1000, # Maximum iterations
|
|
160
|
+
precond_type='ilu', # Preconditioner: 'ilu', 'diag', or None
|
|
161
161
|
)
|
|
162
162
|
```
|
|
163
163
|
|
|
@@ -190,24 +190,29 @@ from blocksolver import blqmr
|
|
|
190
190
|
A = create_helmholtz_matrix(frequency=1000) # Your application
|
|
191
191
|
b = np.complex128(source_term)
|
|
192
192
|
|
|
193
|
-
result = blqmr(A, b, tol=1e-8)
|
|
193
|
+
result = blqmr(A, b, tol=1e-8, precond_type='diag')
|
|
194
194
|
```
|
|
195
195
|
|
|
196
|
-
###
|
|
196
|
+
### Preconditioning
|
|
197
197
|
|
|
198
|
-
|
|
198
|
+
BlockSolver supports multiple preconditioner types for both backends:
|
|
199
199
|
|
|
200
200
|
```python
|
|
201
201
|
from blocksolver import blqmr, make_preconditioner
|
|
202
202
|
|
|
203
|
-
#
|
|
204
|
-
|
|
203
|
+
# Using precond_type parameter (works with both backends)
|
|
204
|
+
result = blqmr(A, b, precond_type='ilu') # Incomplete LU
|
|
205
|
+
result = blqmr(A, b, precond_type='diag') # Diagonal (Jacobi)
|
|
206
|
+
result = blqmr(A, b, precond_type=None) # No preconditioning
|
|
205
207
|
|
|
206
|
-
#
|
|
207
|
-
M1 = make_preconditioner(A, '
|
|
208
|
+
# Custom preconditioner (Python backend only)
|
|
209
|
+
M1 = make_preconditioner(A, 'ilu', drop_tol=1e-4, fill_factor=10)
|
|
210
|
+
result = blqmr(A, b, M1=M1, precond_type=None)
|
|
208
211
|
|
|
209
|
-
#
|
|
210
|
-
|
|
212
|
+
# Split preconditioning for symmetric systems (Python backend)
|
|
213
|
+
# Preserves symmetry: M1^{-1} A M2^{-1}
|
|
214
|
+
M = make_preconditioner(A, 'diag', split=True) # Returns sqrt(D)
|
|
215
|
+
result = blqmr(A, b, M1=M, M2=M, precond_type=None)
|
|
211
216
|
```
|
|
212
217
|
|
|
213
218
|
### SciPy-Compatible Interface
|
|
@@ -236,9 +241,9 @@ b = np.array([8., 45., -3., 3., 19.])
|
|
|
236
241
|
|
|
237
242
|
result = blqmr_solve(Ap, Ai, Ax, b,
|
|
238
243
|
tol=1e-8,
|
|
239
|
-
droptol=0.001,
|
|
240
|
-
|
|
241
|
-
zero_based=True,
|
|
244
|
+
droptol=0.001, # ILU drop tolerance (Fortran backend only)
|
|
245
|
+
precond_type='ilu', # Preconditioner type
|
|
246
|
+
zero_based=True, # 0-based indexing (default)
|
|
242
247
|
)
|
|
243
248
|
```
|
|
244
249
|
|
|
@@ -257,7 +262,7 @@ Main solver interface.
|
|
|
257
262
|
| `maxiter` | int | n | Maximum iterations |
|
|
258
263
|
| `M1`, `M2` | preconditioner | None | Custom preconditioners (Python backend) |
|
|
259
264
|
| `x0` | ndarray | None | Initial guess |
|
|
260
|
-
| `
|
|
265
|
+
| `precond_type` | str or None | 'ilu' | Preconditioner: 'ilu', 'diag', or None |
|
|
261
266
|
| `droptol` | float | 0.001 | ILU drop tolerance (Fortran backend) |
|
|
262
267
|
| `residual` | bool | False | Use true residual for convergence (Python) |
|
|
263
268
|
| `workspace` | BLQMRWorkspace | None | Pre-allocated workspace (Python) |
|
|
@@ -274,21 +279,29 @@ Main solver interface.
|
|
|
274
279
|
|
|
275
280
|
### `blqmr_solve(Ap, Ai, Ax, b, **kwargs) -> BLQMRResult`
|
|
276
281
|
|
|
277
|
-
Low-level CSC interface.
|
|
282
|
+
Low-level CSC interface for single RHS.
|
|
278
283
|
|
|
279
284
|
### `blqmr_solve_multi(Ap, Ai, Ax, B, **kwargs) -> BLQMRResult`
|
|
280
285
|
|
|
281
|
-
|
|
286
|
+
Low-level CSC interface for multiple right-hand sides.
|
|
282
287
|
|
|
283
288
|
### `blqmr_scipy(A, b, **kwargs) -> Tuple[ndarray, int]`
|
|
284
289
|
|
|
285
290
|
SciPy-compatible interface returning `(x, flag)`.
|
|
286
291
|
|
|
287
|
-
### `make_preconditioner(A,
|
|
292
|
+
### `make_preconditioner(A, precond_type, **kwargs) -> Preconditioner`
|
|
288
293
|
|
|
289
294
|
Create a preconditioner for the Python backend.
|
|
290
295
|
|
|
291
|
-
**
|
|
296
|
+
**Parameters:**
|
|
297
|
+
| Parameter | Type | Default | Description |
|
|
298
|
+
|-----------|------|---------|-------------|
|
|
299
|
+
| `A` | sparse matrix | required | System matrix |
|
|
300
|
+
| `precond_type` | str | required | 'diag', 'jacobi', 'ilu', 'ilu0', 'ilut', 'lu', 'ssor' |
|
|
301
|
+
| `split` | bool | False | Return sqrt(D) for split preconditioning |
|
|
302
|
+
| `drop_tol` | float | 1e-4 | Drop tolerance for ILUT |
|
|
303
|
+
| `fill_factor` | float | 10 | Fill factor for ILUT |
|
|
304
|
+
| `omega` | float | 1.0 | Relaxation parameter for SSOR |
|
|
292
305
|
|
|
293
306
|
### Utility Functions
|
|
294
307
|
|
|
@@ -301,13 +314,44 @@ from blocksolver import (
|
|
|
301
314
|
)
|
|
302
315
|
```
|
|
303
316
|
|
|
317
|
+
## Benchmarks
|
|
318
|
+
|
|
319
|
+
### BLQMR vs Direct Solver (mldivide)
|
|
320
|
+
|
|
321
|
+
Complex symmetric FEM matrices, 4 right-hand sides, tolerance 10⁻⁸, split Jacobi preconditioner:
|
|
322
|
+
|
|
323
|
+
| Grid | Nodes | NNZ | mldivide | BLQMR | Speedup |
|
|
324
|
+
|------|-------|-----|----------|-------|---------|
|
|
325
|
+
| 20³ | 8,000 | 110K | 135ms | 115ms | **1.2×** |
|
|
326
|
+
| 30³ | 27,000 | 384K | 1.36s | 373ms | **3.6×** |
|
|
327
|
+
| 40³ | 64,000 | 922K | 6.40s | 947ms | **6.8×** |
|
|
328
|
+
| 50³ | 125,000 | 1.8M | 25.9s | 1.76s | **14.7×** |
|
|
329
|
+
|
|
330
|
+
### Block Size Efficiency
|
|
331
|
+
|
|
332
|
+
With 64 RHS on a 8,000-node complex symmetric system:
|
|
333
|
+
|
|
334
|
+
| Block Size | Iterations | Speedup vs Single |
|
|
335
|
+
|------------|------------|-------------------|
|
|
336
|
+
| 1 (point) | 10,154 | 1.0× |
|
|
337
|
+
| 4 | 2,220 | 1.8× |
|
|
338
|
+
| 8 | 956 | 2.0× |
|
|
339
|
+
| 16 | 361 | 2.1× |
|
|
340
|
+
| 32 | 178 | 2.2× |
|
|
341
|
+
|
|
342
|
+
**Optimal block size**: 8-16 for most problems. Larger blocks have diminishing returns due to increased per-iteration cost.
|
|
343
|
+
|
|
344
|
+
### Iteration Efficiency
|
|
345
|
+
|
|
346
|
+
With 4 RHS, BLQMR uses only ~24% of total iterations compared to 4 separate single-RHS solves — achieving **super-linear block acceleration**.
|
|
347
|
+
|
|
304
348
|
## Performance Tips
|
|
305
349
|
|
|
306
|
-
1. **Use the Fortran backend** when available (
|
|
350
|
+
1. **Use the Fortran backend** when available (faster for large systems)
|
|
307
351
|
|
|
308
352
|
2. **Enable preconditioning** for ill-conditioned systems:
|
|
309
353
|
```python
|
|
310
|
-
result = blqmr(A, b,
|
|
354
|
+
result = blqmr(A, b, precond_type='ilu')
|
|
311
355
|
```
|
|
312
356
|
|
|
313
357
|
3. **Batch multiple right-hand sides** instead of solving one at a time:
|
|
@@ -328,11 +372,18 @@ from blocksolver import (
|
|
|
328
372
|
5. **Reuse workspace** for repeated solves with the same dimensions:
|
|
329
373
|
```python
|
|
330
374
|
from blocksolver import BLQMRWorkspace
|
|
331
|
-
ws = BLQMRWorkspace(n, m)
|
|
375
|
+
ws = BLQMRWorkspace(n, m, dtype=np.complex128)
|
|
332
376
|
for b in many_rhs:
|
|
333
377
|
result = blqmr(A, b, workspace=ws)
|
|
334
378
|
```
|
|
335
379
|
|
|
380
|
+
6. **Use split Jacobi for complex symmetric systems**:
|
|
381
|
+
```python
|
|
382
|
+
# Preserves symmetry of preconditioned system
|
|
383
|
+
M = make_preconditioner(A, 'diag', split=True)
|
|
384
|
+
result = blqmr(A, b, M1=M, M2=M, precond_type=None)
|
|
385
|
+
```
|
|
386
|
+
|
|
336
387
|
## Examples
|
|
337
388
|
|
|
338
389
|
### Diffuse Optical Tomography
|
|
@@ -360,10 +411,10 @@ def create_diffusion_matrix(nx, ny, D=1.0, mu_a=0.01, omega=1e9):
|
|
|
360
411
|
|
|
361
412
|
# Setup problem
|
|
362
413
|
A = create_diffusion_matrix(100, 100, omega=2*np.pi*100e6)
|
|
363
|
-
sources = np.random.randn(10000, 16) # 16 source positions
|
|
414
|
+
sources = np.random.randn(10000, 16) + 0j # 16 source positions
|
|
364
415
|
|
|
365
416
|
# Solve for all sources at once
|
|
366
|
-
result = blqmr(A, sources, tol=1e-8)
|
|
417
|
+
result = blqmr(A, sources, tol=1e-8, precond_type='diag')
|
|
367
418
|
print(f"Solved {sources.shape[1]} systems in {result.iter} iterations")
|
|
368
419
|
```
|
|
369
420
|
|
|
@@ -382,7 +433,7 @@ def solve_helmholtz(K, M, f, frequencies):
|
|
|
382
433
|
for omega in frequencies:
|
|
383
434
|
# A = K - ω²M (complex symmetric if K, M are symmetric)
|
|
384
435
|
A = K - omega**2 * M
|
|
385
|
-
result = blqmr(A, f, tol=1e-10)
|
|
436
|
+
result = blqmr(A, f, tol=1e-10, precond_type='diag')
|
|
386
437
|
solutions.append(result.x)
|
|
387
438
|
return np.array(solutions)
|
|
388
439
|
```
|
|
@@ -401,12 +452,28 @@ brew install gcc suite-sparse # macOS
|
|
|
401
452
|
pip install --no-cache-dir blocksolver
|
|
402
453
|
```
|
|
403
454
|
|
|
455
|
+
### Check backend status
|
|
456
|
+
|
|
457
|
+
```python
|
|
458
|
+
from blocksolver import get_backend_info
|
|
459
|
+
print(get_backend_info())
|
|
460
|
+
# {'backend': 'binary', 'has_fortran': True, 'has_numba': True}
|
|
461
|
+
```
|
|
462
|
+
|
|
404
463
|
### Slow convergence
|
|
405
464
|
|
|
406
|
-
1. Enable preconditioning: `
|
|
465
|
+
1. Enable preconditioning: `precond_type='ilu'` or `precond_type='diag'`
|
|
407
466
|
2. Reduce ILU drop tolerance: `droptol=1e-4` (Fortran backend)
|
|
408
467
|
3. Check matrix conditioning with `np.linalg.cond(A.toarray())`
|
|
409
468
|
|
|
469
|
+
### ILU factorization fails
|
|
470
|
+
|
|
471
|
+
For indefinite or complex symmetric matrices, ILU may fail:
|
|
472
|
+
```python
|
|
473
|
+
# Fall back to diagonal preconditioner
|
|
474
|
+
result = blqmr(A, b, precond_type='diag')
|
|
475
|
+
```
|
|
476
|
+
|
|
410
477
|
### Memory issues with large systems
|
|
411
478
|
|
|
412
479
|
1. Use the Fortran backend (more memory efficient)
|
|
@@ -415,7 +482,7 @@ pip install --no-cache-dir blocksolver
|
|
|
415
482
|
|
|
416
483
|
## License
|
|
417
484
|
|
|
418
|
-
BSD-3-Clause
|
|
485
|
+
BSD-3-Clause or GPL-3.0+ (dual-licensed)
|
|
419
486
|
|
|
420
487
|
## Citation
|
|
421
488
|
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
blocksolver-0.8.5.dist-info/METADATA,sha256=K7OUbJ-pEkHWX3Cvy9iz1_L1onkox-5T9YwOvCrCiSU,15985
|
|
2
|
+
blocksolver-0.8.5.dist-info/WHEEL,sha256=suq8ARrxbiI7iLH3BgK-82uzxQ-4Hm-m8w01oCokrtA,85
|
|
3
|
+
blocksolver/_blqmr.cp313-win_amd64.pyd,sha256=snMORylXiw5dR1yhm_BV86_S2XJa7EXhQ2MkO11e8NM,34368892
|
|
4
|
+
blocksolver/_blqmr.cp313-win_amd64.dll.a,sha256=zdAww8dqSmL_SMRpDRzryczF4CYvLLdaoxCoVAeOiaU,1706
|
|
5
|
+
blocksolver/__init__.py,sha256=PZV19qS5YQDWdAcqgD6g0wW7KRKrdv1JuLDKmrnC6Es,1982
|
|
6
|
+
blocksolver/blqmr.py,sha256=diRm-xD2-4r0W59WrRe-O26DHJgc32voCVQa0H5FCRk,46543
|
|
7
|
+
blocksolver-0.8.5.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
blocksolver-0.8.4.dist-info/METADATA,sha256=4s5MWTlfx-pnVrt5KaalrAVPmCflegDVOGiWnx5L23Y,13264
|
|
2
|
-
blocksolver-0.8.4.dist-info/WHEEL,sha256=suq8ARrxbiI7iLH3BgK-82uzxQ-4Hm-m8w01oCokrtA,85
|
|
3
|
-
blocksolver/_blqmr.cp313-win_amd64.pyd,sha256=a2pm7VIi67ok4vRc6YYurTKIO2kkJV4_mkNOtZrVVt0,34375165
|
|
4
|
-
blocksolver/_blqmr.cp313-win_amd64.dll.a,sha256=zdAww8dqSmL_SMRpDRzryczF4CYvLLdaoxCoVAeOiaU,1706
|
|
5
|
-
blocksolver/__init__.py,sha256=aC1Iq40CVJ_GhNLSnjnXaOeioHZV8hHw1yy349pF3VU,1982
|
|
6
|
-
blocksolver/blqmr.py,sha256=mrJ_ze4lYxS6uV4XGF66f1NhcwhYg6p3OERWXOGaTTU,46057
|
|
7
|
-
blocksolver-0.8.4.dist-info/RECORD,,
|
|
File without changes
|