FastLSQ 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. {fastlsq-0.2.2 → fastlsq-0.2.3}/CHANGELOG.md +33 -0
  2. {fastlsq-0.2.2 → fastlsq-0.2.3}/FastLSQ.egg-info/PKG-INFO +1 -1
  3. {fastlsq-0.2.2 → fastlsq-0.2.3}/PKG-INFO +1 -1
  4. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/__init__.py +1 -1
  5. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/api.py +29 -9
  6. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/linalg.py +46 -7
  7. {fastlsq-0.2.2 → fastlsq-0.2.3}/pyproject.toml +1 -1
  8. {fastlsq-0.2.2 → fastlsq-0.2.3}/tests/test_vector_basis.py +1 -1
  9. {fastlsq-0.2.2 → fastlsq-0.2.3}/FastLSQ.egg-info/SOURCES.txt +0 -0
  10. {fastlsq-0.2.2 → fastlsq-0.2.3}/FastLSQ.egg-info/dependency_links.txt +0 -0
  11. {fastlsq-0.2.2 → fastlsq-0.2.3}/FastLSQ.egg-info/requires.txt +0 -0
  12. {fastlsq-0.2.2 → fastlsq-0.2.3}/FastLSQ.egg-info/top_level.txt +0 -0
  13. {fastlsq-0.2.2 → fastlsq-0.2.3}/LICENSE +0 -0
  14. {fastlsq-0.2.2 → fastlsq-0.2.3}/MANIFEST.in +0 -0
  15. {fastlsq-0.2.2 → fastlsq-0.2.3}/README.md +0 -0
  16. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/add_your_own_pde.py +0 -0
  17. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/benchmark_comparison.py +0 -0
  18. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/custom_features.py +0 -0
  19. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/fred_sde.py +0 -0
  20. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/fred_sde_fastlsq.py +0 -0
  21. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/gaia_potential.py +0 -0
  22. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/gaia_potential_fastlsq.py +0 -0
  23. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/horizons_ephemeris.py +0 -0
  24. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/numerai_alpha.py +0 -0
  25. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/numerai_alpha_fastlsq.py +0 -0
  26. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/run_all_fastlsq.py +0 -0
  27. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/__init__.py +0 -0
  28. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/_alsu_lattice.py +0 -0
  29. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/_common.py +0 -0
  30. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/run_all.py +0 -0
  31. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_beamloss_ode.py +0 -0
  32. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_betatron_tune.py +0 -0
  33. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_green_fff.py +0 -0
  34. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_hill_ivp.py +0 -0
  35. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_observe_fit_act_simulator.py +0 -0
  36. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_orbit_inverse.py +0 -0
  37. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_passive_loco.py +0 -0
  38. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_perturbed_hill.py +0 -0
  39. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_sofb_observe_fit_act.py +0 -0
  40. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_streaming_archive_growth.py +0 -0
  41. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_synchrotron_ode.py +0 -0
  42. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_tides_3months.py +0 -0
  43. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_topoff_impulse.py +0 -0
  44. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s01_visualize.py +0 -0
  45. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s02_plasma_wakefield.py +0 -0
  46. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s03_synchrobetatron.py +0 -0
  47. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s04_sunspots.py +0 -0
  48. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s05_helioseismology.py +0 -0
  49. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s06_tides.py +0 -0
  50. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s07_iers_earth_rotation.py +0 -0
  51. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s08_mauna_loa_co2.py +0 -0
  52. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s09_enso_qbo.py +0 -0
  53. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s10_pulsar_timing.py +0 -0
  54. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s11_modal_analysis.py +0 -0
  55. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s12_mems_resonator.py +0 -0
  56. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s13_variable_stars_kepler.py +0 -0
  57. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s14_eeg.py +0 -0
  58. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/scenarios/s15_circadian.py +0 -0
  59. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/extras/spectral_expansion.py +0 -0
  60. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/grad_shafranov.py +0 -0
  61. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/grid_inverse.py +0 -0
  62. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/grid_rl_control.py +0 -0
  63. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/grid_swing.py +0 -0
  64. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/gs_inverse.py +0 -0
  65. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/gs_rl_control.py +0 -0
  66. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/inverse_heat_source.py +0 -0
  67. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/inverse_magnetostatics.py +0 -0
  68. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/inverse_source_position.py +0 -0
  69. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/learnable_helmholtz.py +0 -0
  70. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/orbit_hill.py +0 -0
  71. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/orbit_inverse.py +0 -0
  72. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/orbit_rl.py +0 -0
  73. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/pde_discovery.py +0 -0
  74. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/run_all_extensions.py +0 -0
  75. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/run_linear.py +0 -0
  76. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/run_nonlinear.py +0 -0
  77. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/tutorial_basic.py +0 -0
  78. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/tutorial_nonlinear.py +0 -0
  79. {fastlsq-0.2.2 → fastlsq-0.2.3}/examples/vector_basis_stream_vorticity.py +0 -0
  80. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/basis.py +0 -0
  81. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/block.py +0 -0
  82. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/device.py +0 -0
  83. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/diagnostics.py +0 -0
  84. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/export.py +0 -0
  85. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/geometry.py +0 -0
  86. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/learnable.py +0 -0
  87. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/lightning.py +0 -0
  88. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/newton.py +0 -0
  89. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/plotting.py +0 -0
  90. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/problems/__init__.py +0 -0
  91. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/problems/linear.py +0 -0
  92. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/problems/nonlinear.py +0 -0
  93. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/problems/regression.py +0 -0
  94. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/solvers.py +0 -0
  95. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/tuning.py +0 -0
  96. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/utils.py +0 -0
  97. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/vector.py +0 -0
  98. {fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/viz.py +0 -0
  99. {fastlsq-0.2.2 → fastlsq-0.2.3}/requirements.txt +0 -0
  100. {fastlsq-0.2.2 → fastlsq-0.2.3}/setup.cfg +0 -0
  101. {fastlsq-0.2.2 → fastlsq-0.2.3}/tests/test_basic.py +0 -0
  102. {fastlsq-0.2.2 → fastlsq-0.2.3}/tests/test_block.py +0 -0
  103. {fastlsq-0.2.2 → fastlsq-0.2.3}/tests/test_derivatives.py +0 -0
  104. {fastlsq-0.2.2 → fastlsq-0.2.3}/tests/test_device.py +0 -0
  105. {fastlsq-0.2.2 → fastlsq-0.2.3}/tests/test_grad_shafranov.py +0 -0
  106. {fastlsq-0.2.2 → fastlsq-0.2.3}/tests/test_grid_swing.py +0 -0
  107. {fastlsq-0.2.2 → fastlsq-0.2.3}/tests/test_learnable.py +0 -0
  108. {fastlsq-0.2.2 → fastlsq-0.2.3}/tests/test_orbit_hill.py +0 -0
@@ -2,6 +2,39 @@
2
2
 
3
3
  All notable changes to FastLSQ will be documented in this file.
4
4
 
5
+ ## [0.2.3] - 2026-06-04
6
+
7
+ ### Added
8
+
9
+ - **Householder-QR least-squares back-end** `solve_lstsq(..., method="qr")`:
10
+ backward-stable at `cond(A)` (ridge applied via the `[A; sqrt(mu) I]`
11
+ augmentation, not the normal equations), giving SVD-grade accuracy (~1e-14 on
12
+ the Helmholtz random-feature benchmark) at QR cost -- and, on the
13
+ rank-deficient CPU/no-ridge path, faster than the `gelsd` `"svd"` driver too,
14
+ while far more accurate than the normal-equations `"cholesky"` (no `cond(A)`
15
+ squaring, no required ridge). Assumes the system is numerically full column
16
+ rank; `"svd"` remains the rank-deficient-safe reference.
17
+ - **`solve_linear(..., method=...)`**: the linear solve back-end is now
18
+ selectable from the high-level API (`"auto"`, `"qr"`, `"svd"`, `"cholesky"`,
19
+ `"rsvd"`; defaults to `"auto"`).
20
+
21
+ ### Changed
22
+
23
+ - **`method="auto"` now tries QR before SVD.** After the Cholesky conditioning
24
+ probe rejects the fast path, `auto` uses the faster, more accurate QR solve and
25
+ falls back to the rank-revealing SVD only when QR's solution blows up
26
+ (`||x|| / (1 + ||b||)` above a generous guard). Real PDE systems measure
27
+ `<= 0.3` and keep QR; genuinely rank-deficient *inconsistent* systems (e.g. a
28
+ random RHS) measure ~3e14 and route to SVD. Net: the default solve is faster
29
+ and at least as accurate on real problems, with minimum-norm SVD preserved
30
+ exactly where it is needed.
31
+ - **N-scaled collocation defaults.** `solve_linear` and `solve_nonlinear` now
32
+ default `n_pde`/`n_bc` to `None` and derive them from the feature count
33
+ (`n_pde = max(3000, 3 * n_blocks * hidden_size)`, `n_bc = max(800, n_pde // 5)`),
34
+ replacing the fixed `10000`/`2000` (and `5000`/`1000`) over-sampling that was
35
+ ~6x the default feature count. Faster for the default configuration; passing
36
+ explicit `n_pde`/`n_bc` still overrides.
37
+
5
38
  ## [0.2.2] - 2026-06-03
6
39
 
7
40
  ### Fixed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FastLSQ
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support
5
5
  Author: Antonin Sulc
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: FastLSQ
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support
5
5
  Author: Antonin Sulc
6
6
  License-Expression: MIT
@@ -44,7 +44,7 @@ from fastlsq.export import (
44
44
  )
45
45
  from fastlsq import viz
46
46
 
47
- __version__ = "0.2.2"
47
+ __version__ = "0.2.3"
48
48
  __all__ = [
49
49
  # Device selection (CPU / CUDA / Apple-MPS, dtype-aware)
50
50
  "resolve_device",
@@ -35,10 +35,11 @@ def solve_linear(
35
35
  scale: Optional[float] = None,
36
36
  n_blocks: int = 3,
37
37
  hidden_size: int = 500,
38
- n_pde: int = 10000,
39
- n_bc: int = 2000,
38
+ n_pde: Optional[int] = None,
39
+ n_bc: Optional[int] = None,
40
40
  n_test: int = 5000,
41
41
  mu: float = 0.0,
42
+ method: str = "auto",
42
43
  auto_scale: bool = True,
43
44
  auto_scale_trials: int = 5,
44
45
  return_solver: bool = False,
@@ -65,12 +66,17 @@ def solve_linear(
65
66
  Number of feature blocks.
66
67
  hidden_size : int
67
68
  Features per block.
68
- n_pde, n_bc : int
69
- Number of collocation and boundary points.
69
+ n_pde, n_bc : int, optional
70
+ Number of collocation and boundary points. If None, scaled with the
71
+ feature count: n_pde = max(3000, 3 * n_blocks * hidden_size),
72
+ n_bc = max(800, n_pde // 5).
70
73
  n_test : int
71
74
  Number of test points for error evaluation.
72
75
  mu : float
73
76
  Tikhonov regularisation parameter (0 = no regularisation).
77
+ method : str
78
+ Linear solve back-end passed to ``solve_lstsq`` ("auto", "qr", "svd",
79
+ "cholesky", "rsvd"). Default "auto".
74
80
  auto_scale : bool
75
81
  If True and scale=None, automatically select scale via grid search.
76
82
  auto_scale_trials : int
@@ -93,6 +99,12 @@ def solve_linear(
93
99
  """
94
100
  t0 = time.time()
95
101
 
102
+ n_feat = n_blocks * hidden_size
103
+ if n_pde is None:
104
+ n_pde = max(3000, 3 * n_feat) # ~3x oversampling; fixed 10000 was 6x for default N
105
+ if n_bc is None:
106
+ n_bc = max(800, n_pde // 5)
107
+
96
108
  # Auto-select scale if needed
97
109
  if scale is None and auto_scale:
98
110
  if verbose:
@@ -127,7 +139,7 @@ def solve_linear(
127
139
 
128
140
  # Assemble and solve
129
141
  A, b = problem.build(solver, x_pde, *build_args)
130
- beta_raw = solve_lstsq(A, b, mu=mu)
142
+ beta_raw = solve_lstsq(A, b, mu=mu, method=method)
131
143
  n_outputs = getattr(problem, "n_outputs", 1)
132
144
  solver.beta = unpack_beta(beta_raw, solver.n_features, n_outputs)
133
145
 
@@ -170,8 +182,8 @@ def solve_nonlinear(
170
182
  scale: Optional[float] = None,
171
183
  n_blocks: int = 3,
172
184
  hidden_size: int = 500,
173
- n_pde: int = 5000,
174
- n_bc: int = 1000,
185
+ n_pde: Optional[int] = None,
186
+ n_bc: Optional[int] = None,
175
187
  n_test: int = 5000,
176
188
  max_iter: int = 30,
177
189
  tol_res: float = 1e-8,
@@ -202,8 +214,10 @@ def solve_nonlinear(
202
214
  Number of feature blocks.
203
215
  hidden_size : int
204
216
  Features per block.
205
- n_pde, n_bc : int
206
- Number of collocation and boundary points.
217
+ n_pde, n_bc : int, optional
218
+ Number of collocation and boundary points. If None, scaled with the
219
+ feature count: n_pde = max(3000, 3 * n_blocks * hidden_size),
220
+ n_bc = max(800, n_pde // 5).
207
221
  n_test : int
208
222
  Number of test points for error evaluation.
209
223
  max_iter : int
@@ -239,6 +253,12 @@ def solve_nonlinear(
239
253
  """
240
254
  t0 = time.time()
241
255
 
256
+ n_feat = n_blocks * hidden_size
257
+ if n_pde is None:
258
+ n_pde = max(3000, 3 * n_feat) # ~3x oversampling; fixed 10000 was 6x for default N
259
+ if n_bc is None:
260
+ n_bc = max(800, n_pde // 5)
261
+
242
262
  # Auto-select scale if needed
243
263
  if scale is None and auto_scale:
244
264
  if verbose:
@@ -11,17 +11,26 @@ condition number -- leaving several orders of magnitude of accuracy on the floor
11
11
 
12
12
  ``solve_lstsq`` therefore exposes several back-ends via ``method=``:
13
13
 
14
+ * ``"qr"`` -- Householder-QR least squares (ridge via ``[A; sqrt(mu) I]``
15
+ augmentation). Backward-stable at ``cond(A)`` -- SVD-grade
16
+ accuracy with no normal-equations squaring and no required
17
+ ridge, at ~QR cost (cheaper than SVD). Assumes (numerically)
18
+ full column rank; ``"svd"`` is the rank-deficient-safe choice
19
+ (and ``"auto"``'s ultimate fallback if QR blows up).
14
20
  * ``"svd"`` -- rank-revealing truncated SVD of ``A`` (LAPACK ``gelsd`` fast
15
- path on CPU; explicit SVD elsewhere). The accuracy reference.
21
+ path on CPU; explicit SVD elsewhere). The accuracy reference;
22
+ use for a genuinely rank-deficient ``A``.
16
23
  * ``"cholesky"`` -- normal-equations ``(A^T A + mu I)`` Cholesky. Fast, but only
17
24
  safe when ``A`` is well-conditioned.
18
25
  * ``"rsvd"`` -- randomized SVD (range-finder + power iterations). ``O(MNk)``
19
26
  for a target ``rank`` k << N -- the cheap option for strongly
20
27
  low-rank systems.
21
28
  * ``"auto"`` (default) -- try Cholesky; if the system is ill-conditioned (a
22
- cheap pivot-ratio test) fall back to ``"svd"``. Recovers the
23
- fast path on well-conditioned problems **without** sacrificing
24
- accuracy on the rest.
29
+ cheap pivot-ratio test) use the faster ``"qr"``, and fall back
30
+ to rank-revealing ``"svd"`` only if QR's solution blows up (the
31
+ feature matrices can be rank-deficient). Fast path when
32
+ well-conditioned, QR speed/accuracy on the rest, SVD as the
33
+ safety net.
25
34
 
26
35
  All back-ends are device/dtype-aware. Apple-MPS lacks a robust ``svd``/``lstsq``,
27
36
  so the factorization is run on CPU and the result moved back (one-time warning).
@@ -33,6 +42,13 @@ import torch
33
42
 
34
43
  _MPS_WARNED = False
35
44
 
45
+ # In ``method="auto"``: above this ``||x|| / (1 + ||b||)`` ratio the unpivoted-QR
46
+ # solve is treated as a rank-deficiency blow-up and handed to the rank-revealing
47
+ # SVD instead. Real PDE systems measure <= 0.3 here; the degenerate inconsistent
48
+ # (random-RHS) rank-deficient case measures ~3e14 -- so the guard is generous and
49
+ # a false positive only costs speed, never correctness.
50
+ _QR_AUTO_NORM_GUARD = 1e6
51
+
36
52
 
37
53
  def _maybe_cpu(A, b):
38
54
  """MPS has no robust svd/lstsq -- factorize on CPU, remember to move back."""
@@ -86,9 +102,22 @@ def _rsvd_solve(A, b, mu, rcond, rank, oversample, n_iter):
86
102
  return Vh.transpose(-2, -1) @ (filt.unsqueeze(-1) * (U.transpose(-2, -1) @ b))
87
103
 
88
104
 
105
+ def _qr_solve(A, b, mu):
106
+ """Householder-QR least squares (ridge via [A; sqrt(mu) I] augmentation).
107
+ Backward-stable at cond(A): SVD-grade accuracy with NO normal-equations
108
+ squaring and no required ridge, at ~QR cost (cheaper than SVD). Assumes
109
+ (numerically) full column rank; use method='svd' for a rank-deficient A."""
110
+ if mu:
111
+ n = A.shape[-1]
112
+ A = torch.cat([A, (mu ** 0.5) * torch.eye(n, dtype=A.dtype, device=A.device)], dim=-2)
113
+ b = torch.cat([b, torch.zeros(n, b.shape[-1], dtype=b.dtype, device=b.device)], dim=-2)
114
+ Q, R = torch.linalg.qr(A, mode="reduced")
115
+ return torch.linalg.solve_triangular(R, Q.transpose(-2, -1) @ b, upper=True)
116
+
117
+
89
118
  def _auto_solve(A, b, mu, rcond):
90
119
  # Cheap conditioning probe: cond(A) ~ max/min Cholesky pivot. If well within
91
- # float64's reach use the fast Cholesky; otherwise fall back to the SVD.
120
+ # float64's reach use the fast Cholesky.
92
121
  try:
93
122
  x, L = _cholesky_solve(A, b, mu)
94
123
  d = torch.diagonal(L).abs()
@@ -96,6 +125,14 @@ def _auto_solve(A, b, mu, rcond):
96
125
  return x
97
126
  except torch.linalg.LinAlgError:
98
127
  pass
128
+ # Ill-conditioned: try the faster, backward-stable QR. On a genuinely
129
+ # rank-deficient *inconsistent* A unpivoted QR can return a wildly
130
+ # non-minimum-norm solution, so fall back to the rank-revealing SVD when the
131
+ # QR solution blows up (or is non-finite). See _QR_AUTO_NORM_GUARD.
132
+ x = _qr_solve(A, b, mu)
133
+ nx = torch.linalg.vector_norm(x)
134
+ if torch.isfinite(nx) and nx <= _QR_AUTO_NORM_GUARD * (1.0 + torch.linalg.vector_norm(b)):
135
+ return x
99
136
  return _svd_solve(A, b, mu, rcond)
100
137
 
101
138
 
@@ -112,7 +149,7 @@ def solve_lstsq(A, b, mu=0.0, rcond=1e-12, method="auto",
112
149
  an unstable add-on).
113
150
  rcond : float
114
151
  Relative singular-value / pivot threshold for rank determination.
115
- method : {"auto", "svd", "cholesky", "rsvd"}
152
+ method : {"auto", "qr", "svd", "cholesky", "rsvd"}
116
153
  Solve back-end (see module docstring). Default "auto".
117
154
  rank, oversample, n_iter : int
118
155
  Randomized-SVD parameters (``method="rsvd"`` only). Set ``rank`` << N for
@@ -127,11 +164,13 @@ def solve_lstsq(A, b, mu=0.0, rcond=1e-12, method="auto",
127
164
  x = _auto_solve(A2, b2, mu, rcond)
128
165
  elif method == "svd":
129
166
  x = _svd_solve(A2, b2, mu, rcond)
167
+ elif method == "qr":
168
+ x = _qr_solve(A2, b2, mu)
130
169
  elif method == "cholesky":
131
170
  x = _cholesky_solve(A2, b2, mu)[0]
132
171
  elif method == "rsvd":
133
172
  x = _rsvd_solve(A2, b2, mu, rcond, rank, oversample, n_iter)
134
173
  else:
135
174
  raise ValueError(f"Unknown method {method!r}; "
136
- "choose 'auto', 'svd', 'cholesky', or 'rsvd'.")
175
+ "choose 'auto', 'qr', 'svd', 'cholesky', or 'rsvd'.")
137
176
  return x.to(mps_dev) if mps_dev is not None else x
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "FastLSQ"
7
- version = "0.2.2"
7
+ version = "0.2.3"
8
8
  description = "One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -20,7 +20,7 @@ from fastlsq.utils import device
20
20
  # ----------------------------------------------------------------------
21
21
 
22
22
  def test_version():
23
- assert fastlsq.__version__ == "0.2.1"
23
+ assert fastlsq.__version__ == "0.2.3"
24
24
 
25
25
 
26
26
  def test_imports():
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes