PyPI - FastLSQ - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

FastLSQ 0.2.2tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

{fastlsq-0.2.2 → fastlsq-0.2.4}/CHANGELOG.md RENAMED Viewed

@@ -2,6 +2,60 @@
 All notable changes to FastLSQ will be documented in this file.
+## [0.2.4] - 2026-06-04
+### Added
+- **Benchmark + inverse-problem test suite** (`tests/test_benchmarks_inverse.py`):
+  12 deterministic smoke tests (~11 s) that solve the linear (`PoissonND`,
+  `HeatND`, `Wave1D`, `Helmholtz2D`, `Maxwell2D_TM`) and nonlinear
+  (`NLPoisson2D`, `Bratu2D`, `SteadyBurgers1D`, `NLHelmholtz2D`, `AllenCahn1D`)
+  benchmark equations through the public `solve_linear` / `solve_nonlinear` API,
+  plus two inverse pipelines -- Gaussian source-position recovery (forward solve
+  + L-BFGS) and SINDy-style PDE discovery via analytical derivatives --
+  exercising the 0.2.3 QR / N-scaled-collocation solver path end to end.
+### Known issues
+- `Wave2D_MS` does not solve via `solve_linear` (relative error 1.0 in every
+  configuration tested), and `ElasticWave2D` -- a 2-output vector problem whose
+  `exact()` returns `(N, 2)` -- never sets `n_outputs`, so the scalar API cannot
+  unpack it. Both are pre-existing problem-definition gaps, independent of the
+  solver work, and are excluded from the new smoke test pending a fix.
+## [0.2.3] - 2026-06-04
+### Added
+- **Householder-QR least-squares back-end** `solve_lstsq(..., method="qr")`:
+  backward-stable at `cond(A)` (ridge applied via the `[A; sqrt(mu) I]`
+  augmentation, not the normal equations), giving SVD-grade accuracy (~1e-14 on
+  the Helmholtz random-feature benchmark) at QR cost -- and, on the
+  rank-deficient CPU/no-ridge path, faster than the `gelsd` `"svd"` driver too,
+  while far more accurate than the normal-equations `"cholesky"` (no `cond(A)`
+  squaring, no required ridge). Assumes the system is numerically full column
+  rank; `"svd"` remains the rank-deficient-safe reference.
+- **`solve_linear(..., method=...)`**: the linear solve back-end is now
+  selectable from the high-level API (`"auto"`, `"qr"`, `"svd"`, `"cholesky"`,
+  `"rsvd"`; defaults to `"auto"`).
+### Changed
+- **`method="auto"` now tries QR before SVD.** After the Cholesky conditioning
+  probe rejects the fast path, `auto` uses the faster, more accurate QR solve and
+  falls back to the rank-revealing SVD only when QR's solution blows up
+  (`||x|| / (1 + ||b||)` above a generous guard). Real PDE systems measure
+  `<= 0.3` and keep QR; genuinely rank-deficient *inconsistent* systems (e.g. a
+  random RHS) measure ~3e14 and route to SVD. Net: the default solve is faster
+  and at least as accurate on real problems, with minimum-norm SVD preserved
+  exactly where it is needed.
+- **N-scaled collocation defaults.** `solve_linear` and `solve_nonlinear` now
+  default `n_pde`/`n_bc` to `None` and derive them from the feature count
+  (`n_pde = max(3000, 3 * n_blocks * hidden_size)`, `n_bc = max(800, n_pde // 5)`),
+  replacing the fixed `10000`/`2000` (and `5000`/`1000`) over-sampling that was
+  ~6x the default feature count. Faster for the default configuration; passing
+  explicit `n_pde`/`n_bc` still overrides.
 ## [0.2.2] - 2026-06-03
 ### Fixed

{fastlsq-0.2.2 → fastlsq-0.2.4}/FastLSQ.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: FastLSQ
-Version: 0.2.2
+Version: 0.2.4
 Summary: One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support
 Author: Antonin Sulc
 License-Expression: MIT

{fastlsq-0.2.2 → fastlsq-0.2.4}/FastLSQ.egg-info/SOURCES.txt RENAMED Viewed

@@ -96,6 +96,7 @@ fastlsq/problems/linear.py
 fastlsq/problems/nonlinear.py
 fastlsq/problems/regression.py
 tests/test_basic.py
+tests/test_benchmarks_inverse.py
 tests/test_block.py
 tests/test_derivatives.py
 tests/test_device.py

{fastlsq-0.2.2 → fastlsq-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: FastLSQ
-Version: 0.2.2
+Version: 0.2.4
 Summary: One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support
 Author: Antonin Sulc
 License-Expression: MIT

{fastlsq-0.2.2 → fastlsq-0.2.4}/fastlsq/__init__.py RENAMED Viewed

@@ -44,7 +44,7 @@ from fastlsq.export import (
 )
 from fastlsq import viz
-__version__ = "0.2.2"
+__version__ = "0.2.4"
 __all__ = [
     # Device selection (CPU / CUDA / Apple-MPS, dtype-aware)
     "resolve_device",

{fastlsq-0.2.2 → fastlsq-0.2.4}/fastlsq/api.py RENAMED Viewed

@@ -35,10 +35,11 @@ def solve_linear(
     scale: Optional[float] = None,
     n_blocks: int = 3,
     hidden_size: int = 500,
-    n_pde: int = 10000,
-    n_bc: int = 2000,
+    n_pde: Optional[int] = None,
+    n_bc: Optional[int] = None,
     n_test: int = 5000,
     mu: float = 0.0,
+    method: str = "auto",
     auto_scale: bool = True,
     auto_scale_trials: int = 5,
     return_solver: bool = False,
@@ -65,12 +66,17 @@ def solve_linear(
         Number of feature blocks.
     hidden_size : int
         Features per block.
-    n_pde, n_bc : int
-        Number of collocation and boundary points.
+    n_pde, n_bc : int, optional
+        Number of collocation and boundary points. If None, scaled with the
+        feature count: n_pde = max(3000, 3 * n_blocks * hidden_size),
+        n_bc = max(800, n_pde // 5).
     n_test : int
         Number of test points for error evaluation.
     mu : float
         Tikhonov regularisation parameter (0 = no regularisation).
+    method : str
+        Linear solve back-end passed to ``solve_lstsq`` ("auto", "qr", "svd",
+        "cholesky", "rsvd"). Default "auto".
     auto_scale : bool
         If True and scale=None, automatically select scale via grid search.
     auto_scale_trials : int
@@ -93,6 +99,12 @@ def solve_linear(
     """
     t0 = time.time()
+    n_feat = n_blocks * hidden_size
+    if n_pde is None:
+        n_pde = max(3000, 3 * n_feat)   # ~3x oversampling; fixed 10000 was 6x for default N
+    if n_bc is None:
+        n_bc = max(800, n_pde // 5)
     # Auto-select scale if needed
     if scale is None and auto_scale:
         if verbose:
@@ -127,7 +139,7 @@ def solve_linear(
     # Assemble and solve
     A, b = problem.build(solver, x_pde, *build_args)
-    beta_raw = solve_lstsq(A, b, mu=mu)
+    beta_raw = solve_lstsq(A, b, mu=mu, method=method)
     n_outputs = getattr(problem, "n_outputs", 1)
     solver.beta = unpack_beta(beta_raw, solver.n_features, n_outputs)
@@ -170,8 +182,8 @@ def solve_nonlinear(
     scale: Optional[float] = None,
     n_blocks: int = 3,
     hidden_size: int = 500,
-    n_pde: int = 5000,
-    n_bc: int = 1000,
+    n_pde: Optional[int] = None,
+    n_bc: Optional[int] = None,
     n_test: int = 5000,
     max_iter: int = 30,
     tol_res: float = 1e-8,
@@ -202,8 +214,10 @@ def solve_nonlinear(
         Number of feature blocks.
     hidden_size : int
         Features per block.
-    n_pde, n_bc : int
-        Number of collocation and boundary points.
+    n_pde, n_bc : int, optional
+        Number of collocation and boundary points. If None, scaled with the
+        feature count: n_pde = max(3000, 3 * n_blocks * hidden_size),
+        n_bc = max(800, n_pde // 5).
     n_test : int
         Number of test points for error evaluation.
     max_iter : int
@@ -239,6 +253,12 @@ def solve_nonlinear(
     """
     t0 = time.time()
+    n_feat = n_blocks * hidden_size
+    if n_pde is None:
+        n_pde = max(3000, 3 * n_feat)   # ~3x oversampling; fixed 10000 was 6x for default N
+    if n_bc is None:
+        n_bc = max(800, n_pde // 5)
     # Auto-select scale if needed
     if scale is None and auto_scale:
         if verbose:

{fastlsq-0.2.2 → fastlsq-0.2.4}/fastlsq/linalg.py RENAMED Viewed

@@ -11,17 +11,26 @@ condition number -- leaving several orders of magnitude of accuracy on the floor
 ``solve_lstsq`` therefore exposes several back-ends via ``method=``:
+* ``"qr"``       -- Householder-QR least squares (ridge via ``[A; sqrt(mu) I]``
+                    augmentation).  Backward-stable at ``cond(A)`` -- SVD-grade
+                    accuracy with no normal-equations squaring and no required
+                    ridge, at ~QR cost (cheaper than SVD).  Assumes (numerically)
+                    full column rank; ``"svd"`` is the rank-deficient-safe choice
+                    (and ``"auto"``'s ultimate fallback if QR blows up).
 * ``"svd"``      -- rank-revealing truncated SVD of ``A`` (LAPACK ``gelsd`` fast
-                    path on CPU; explicit SVD elsewhere).  The accuracy reference.
+                    path on CPU; explicit SVD elsewhere).  The accuracy reference;
+                    use for a genuinely rank-deficient ``A``.
 * ``"cholesky"`` -- normal-equations ``(A^T A + mu I)`` Cholesky.  Fast, but only
                     safe when ``A`` is well-conditioned.
 * ``"rsvd"``     -- randomized SVD (range-finder + power iterations).  ``O(MNk)``
                     for a target ``rank`` k << N -- the cheap option for strongly
                     low-rank systems.
 * ``"auto"`` (default) -- try Cholesky; if the system is ill-conditioned (a
-                    cheap pivot-ratio test) fall back to ``"svd"``.  Recovers the
-                    fast path on well-conditioned problems **without** sacrificing
-                    accuracy on the rest.
+                    cheap pivot-ratio test) use the faster ``"qr"``, and fall back
+                    to rank-revealing ``"svd"`` only if QR's solution blows up (the
+                    feature matrices can be rank-deficient).  Fast path when
+                    well-conditioned, QR speed/accuracy on the rest, SVD as the
+                    safety net.
 All back-ends are device/dtype-aware.  Apple-MPS lacks a robust ``svd``/``lstsq``,
 so the factorization is run on CPU and the result moved back (one-time warning).
@@ -33,6 +42,13 @@ import torch
 _MPS_WARNED = False
+# In ``method="auto"``: above this ``||x|| / (1 + ||b||)`` ratio the unpivoted-QR
+# solve is treated as a rank-deficiency blow-up and handed to the rank-revealing
+# SVD instead.  Real PDE systems measure <= 0.3 here; the degenerate inconsistent
+# (random-RHS) rank-deficient case measures ~3e14 -- so the guard is generous and
+# a false positive only costs speed, never correctness.
+_QR_AUTO_NORM_GUARD = 1e6
 def _maybe_cpu(A, b):
     """MPS has no robust svd/lstsq -- factorize on CPU, remember to move back."""
@@ -86,9 +102,22 @@ def _rsvd_solve(A, b, mu, rcond, rank, oversample, n_iter):
     return Vh.transpose(-2, -1) @ (filt.unsqueeze(-1) * (U.transpose(-2, -1) @ b))
+def _qr_solve(A, b, mu):
+    """Householder-QR least squares (ridge via [A; sqrt(mu) I] augmentation).
+    Backward-stable at cond(A): SVD-grade accuracy with NO normal-equations
+    squaring and no required ridge, at ~QR cost (cheaper than SVD).  Assumes
+    (numerically) full column rank; use method='svd' for a rank-deficient A."""
+    if mu:
+        n = A.shape[-1]
+        A = torch.cat([A, (mu ** 0.5) * torch.eye(n, dtype=A.dtype, device=A.device)], dim=-2)
+        b = torch.cat([b, torch.zeros(n, b.shape[-1], dtype=b.dtype, device=b.device)], dim=-2)
+    Q, R = torch.linalg.qr(A, mode="reduced")
+    return torch.linalg.solve_triangular(R, Q.transpose(-2, -1) @ b, upper=True)
 def _auto_solve(A, b, mu, rcond):
     # Cheap conditioning probe: cond(A) ~ max/min Cholesky pivot.  If well within
-    # float64's reach use the fast Cholesky; otherwise fall back to the SVD.
+    # float64's reach use the fast Cholesky.
     try:
         x, L = _cholesky_solve(A, b, mu)
         d = torch.diagonal(L).abs()
@@ -96,6 +125,14 @@ def _auto_solve(A, b, mu, rcond):
             return x
     except torch.linalg.LinAlgError:
         pass
+    # Ill-conditioned: try the faster, backward-stable QR.  On a genuinely
+    # rank-deficient *inconsistent* A unpivoted QR can return a wildly
+    # non-minimum-norm solution, so fall back to the rank-revealing SVD when the
+    # QR solution blows up (or is non-finite).  See _QR_AUTO_NORM_GUARD.
+    x = _qr_solve(A, b, mu)
+    nx = torch.linalg.vector_norm(x)
+    if torch.isfinite(nx) and nx <= _QR_AUTO_NORM_GUARD * (1.0 + torch.linalg.vector_norm(b)):
+        return x
     return _svd_solve(A, b, mu, rcond)
@@ -112,7 +149,7 @@ def solve_lstsq(A, b, mu=0.0, rcond=1e-12, method="auto",
         an unstable add-on).
     rcond : float
         Relative singular-value / pivot threshold for rank determination.
-    method : {"auto", "svd", "cholesky", "rsvd"}
+    method : {"auto", "qr", "svd", "cholesky", "rsvd"}
         Solve back-end (see module docstring).  Default "auto".
     rank, oversample, n_iter : int
         Randomized-SVD parameters (``method="rsvd"`` only).  Set ``rank`` << N for
@@ -127,11 +164,13 @@ def solve_lstsq(A, b, mu=0.0, rcond=1e-12, method="auto",
         x = _auto_solve(A2, b2, mu, rcond)
     elif method == "svd":
         x = _svd_solve(A2, b2, mu, rcond)
+    elif method == "qr":
+        x = _qr_solve(A2, b2, mu)
     elif method == "cholesky":
         x = _cholesky_solve(A2, b2, mu)[0]
     elif method == "rsvd":
         x = _rsvd_solve(A2, b2, mu, rcond, rank, oversample, n_iter)
     else:
         raise ValueError(f"Unknown method {method!r}; "
-                         "choose 'auto', 'svd', 'cholesky', or 'rsvd'.")
+                         "choose 'auto', 'qr', 'svd', 'cholesky', or 'rsvd'.")
     return x.to(mps_dev) if mps_dev is not None else x

{fastlsq-0.2.2 → fastlsq-0.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "FastLSQ"
-version = "0.2.2"
+version = "0.2.4"
 description = "One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support"
 readme = "README.md"
 license = "MIT"

fastlsq-0.2.4/tests/test_benchmarks_inverse.py ADDED Viewed

@@ -0,0 +1,144 @@
+# Copyright (c) 2026 Antonin Sulc -- MIT.
+"""Smoke tests for the benchmark PDE equations and the inverse-problem workflows.
+Exercises the forward benchmark problems through the public ``solve_linear`` /
+``solve_nonlinear`` API and two inverse pipelines -- parameter recovery via an
+outer optimiser, and SINDy-style PDE discovery via analytical derivatives -- so
+the v0.2.3 QR / N-scaled-collocation solver path is covered end-to-end, not just
+on the single Poisson problem in ``test_basic``.
+Scales are fixed (not auto-selected) and the RNG is seeded so the smoke test is
+fast and deterministic; tolerances carry ~10x headroom over measured errors.
+Excluded (pre-existing, unrelated to the solver work):
+  * ``Wave2D_MS``    -- rel-err == 1.0 via ``solve_linear`` in every config
+                        (old 10000/2000 defaults included), i.e. does not solve.
+  * ``ElasticWave2D``-- a 2-output vector problem (``exact()`` returns (N, 2))
+                        that never sets ``n_outputs``, so the scalar API can't
+                        unpack it.  Needs the vector solver path.
+"""
+import numpy as np
+import pytest
+import torch
+from fastlsq import (
+    solve_linear, solve_nonlinear, solve_lstsq, Op, SinusoidalBasis,
+    sample_box, sample_boundary_box,
+)
+from fastlsq.problems import linear as L
+from fastlsq.problems import nonlinear as NL
+# (class, fixed scale, val_err tolerance)
+LINEAR_CASES = [
+    (L.PoissonND,    0.5, 5e-3),
+    (L.HeatND,       0.5, 1e-1),
+    (L.Wave1D,      15.0, 5e-3),
+    (L.Helmholtz2D, 10.0, 1e-5),
+    (L.Maxwell2D_TM, 2.0, 5e-3),
+]
+NONLINEAR_CASES = [
+    (NL.NLPoisson2D,     8.0, 1e-4),
+    (NL.Bratu2D,        15.0, 1e-4),
+    (NL.SteadyBurgers1D,10.0, 1e-4),
+    (NL.NLHelmholtz2D,   5.0, 1e-4),
+    (NL.AllenCahn1D,    15.0, 2e-1),
+]
+@pytest.mark.parametrize(
+    "cls,scale,tol", LINEAR_CASES, ids=[c[0].__name__ for c in LINEAR_CASES]
+)
+def test_linear_benchmark_solves(cls, scale, tol):
+    """Each linear benchmark equation solves end-to-end via the public API."""
+    torch.set_default_dtype(torch.float64)
+    torch.manual_seed(0)
+    r = solve_linear(cls(), scale=scale, n_blocks=2, hidden_size=300,
+                     n_test=1500, auto_scale=False, verbose=False)
+    ve = r["metrics"]["val_err"]
+    assert np.isfinite(ve), f"{cls.__name__}: non-finite val_err"
+    assert ve < tol, f"{cls.__name__}: val_err={ve:.2e} exceeds tol {tol:.0e}"
+@pytest.mark.parametrize(
+    "cls,scale,tol", NONLINEAR_CASES, ids=[c[0].__name__ for c in NONLINEAR_CASES]
+)
+def test_nonlinear_benchmark_solves(cls, scale, tol):
+    """Each nonlinear benchmark equation converges via Newton + the public API."""
+    torch.set_default_dtype(torch.float64)
+    torch.manual_seed(0)
+    r = solve_nonlinear(cls(), scale=scale, n_blocks=2, hidden_size=300,
+                        n_test=1500, max_iter=15, auto_scale=False, verbose=False)
+    ve = r["metrics"]["val_err"]
+    assert r["n_iters"] > 0, f"{cls.__name__}: no Newton iterations ran"
+    assert np.isfinite(ve), f"{cls.__name__}: non-finite val_err"
+    assert ve < tol, f"{cls.__name__}: val_err={ve:.2e} exceeds tol {tol:.0e}"
+def test_inverse_source_position():
+    """Recover a Gaussian source position from sensor data (forward solve + L-BFGS)."""
+    opt = pytest.importorskip("scipy.optimize")
+    torch.set_default_dtype(torch.float64)
+    torch.manual_seed(0)
+    pde_op = -Op.laplacian(d=2)
+    basis = SinusoidalBasis.random(input_dim=2, n_features=700, sigma=5.0,
+                                   normalize=True)
+    x_pde = sample_box(3000, 2)
+    x_bc = sample_boundary_box(400, 2)
+    n_bc = x_bc.shape[0]
+    cache = basis.cache(x_pde)
+    A = torch.cat([pde_op.apply(basis, x_pde, cache=cache),
+                   100.0 * basis.evaluate(x_bc)])
+    x_sens = torch.tensor([[0.3, 0.3], [0.7, 0.7], [0.3, 0.7], [0.7, 0.3]])
+    def forward(xs, ys):
+        b = torch.exp(-((x_pde[:, 0] - xs) ** 2
+                        + (x_pde[:, 1] - ys) ** 2) / 0.1).unsqueeze(1)
+        b = torch.cat([b, torch.zeros(n_bc, 1, dtype=b.dtype)])
+        beta = solve_lstsq(A, b)
+        return (basis.evaluate(x_sens) @ beta).detach().cpu().numpy().ravel()
+    true = np.array([0.4, 0.6])
+    rng = np.random.default_rng(0)
+    u_obs = forward(*true) + 0.005 * rng.standard_normal(4)
+    res = opt.minimize(
+        lambda p: float(np.sum((forward(float(p[0]), float(p[1])) - u_obs) ** 2)),
+        x0=[0.5, 0.5], method="L-BFGS-B", bounds=[(0.1, 0.9)] * 2,
+    )
+    assert np.linalg.norm(res.x - true) < 0.06, f"recovered {res.x} vs {true}"
+def test_pde_discovery_recovers_governing_equation():
+    """SINDy-style discovery via analytical derivatives recovers u_xx = a*u + b*u_x.
+    Synthetic damped oscillator u = exp(-x/2) sin(2x) -> u_xx = -4.25 u - 1.0 u_x.
+    The dominant restoring term is recovered tightly; the damping term is harder
+    from 2% noise, so it is only bounded in sign/magnitude.
+    """
+    torch.set_default_dtype(torch.float64)
+    torch.manual_seed(42)
+    M = 500
+    x = torch.linspace(0, 2 * np.pi, M).reshape(-1, 1)
+    u_true = torch.exp(-0.5 * x) * torch.sin(2 * x)
+    u_noisy = u_true + 0.02 * torch.randn_like(u_true)
+    basis = SinusoidalBasis.random(input_dim=1, n_features=400, sigma=4.0,
+                                   normalize=True)
+    beta = solve_lstsq(basis.evaluate(x), u_noisy, mu=1e-3)
+    cache = basis.cache(x)
+    u = basis.evaluate(x, cache=cache) @ beta
+    u_x = basis.derivative(x, alpha=(1,), cache=cache) @ beta
+    u_xx = basis.derivative(x, alpha=(2,), cache=cache) @ beta
+    coef = solve_lstsq(torch.cat([u, u_x], dim=1), u_xx)  # u_xx = a*u + b*u_x
+    a, b = float(coef[0]), float(coef[1])
+    assert abs(a - (-4.25)) < 0.3, f"restoring coeff a={a:.3f} (want -4.25)"
+    assert b < 0 and abs(b - (-1.0)) < 0.5, f"damping coeff b={b:.3f} (want -1.0)"
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

{fastlsq-0.2.2 → fastlsq-0.2.4}/tests/test_vector_basis.py RENAMED Viewed

@@ -20,7 +20,7 @@ from fastlsq.utils import device
 # ----------------------------------------------------------------------
 def test_version():
-    assert fastlsq.__version__ == "0.2.1"
+    assert fastlsq.__version__ == "0.2.4"
 def test_imports():