PyPI - FastLSQ - Versions diffs - 0.2.2__tar.gz → 0.2.3__tar.gz - Mend

FastLSQ 0.2.2tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

{fastlsq-0.2.2 → fastlsq-0.2.3}/CHANGELOG.md RENAMED Viewed

@@ -2,6 +2,39 @@
 All notable changes to FastLSQ will be documented in this file.
+## [0.2.3] - 2026-06-04
+### Added
+- **Householder-QR least-squares back-end** `solve_lstsq(..., method="qr")`:
+  backward-stable at `cond(A)` (ridge applied via the `[A; sqrt(mu) I]`
+  augmentation, not the normal equations), giving SVD-grade accuracy (~1e-14 on
+  the Helmholtz random-feature benchmark) at QR cost -- and, on the
+  rank-deficient CPU/no-ridge path, faster than the `gelsd` `"svd"` driver too,
+  while far more accurate than the normal-equations `"cholesky"` (no `cond(A)`
+  squaring, no required ridge). Assumes the system is numerically full column
+  rank; `"svd"` remains the rank-deficient-safe reference.
+- **`solve_linear(..., method=...)`**: the linear solve back-end is now
+  selectable from the high-level API (`"auto"`, `"qr"`, `"svd"`, `"cholesky"`,
+  `"rsvd"`; defaults to `"auto"`).
+### Changed
+- **`method="auto"` now tries QR before SVD.** After the Cholesky conditioning
+  probe rejects the fast path, `auto` uses the faster, more accurate QR solve and
+  falls back to the rank-revealing SVD only when QR's solution blows up
+  (`||x|| / (1 + ||b||)` above a generous guard). Real PDE systems measure
+  `<= 0.3` and keep QR; genuinely rank-deficient *inconsistent* systems (e.g. a
+  random RHS) measure ~3e14 and route to SVD. Net: the default solve is faster
+  and at least as accurate on real problems, with minimum-norm SVD preserved
+  exactly where it is needed.
+- **N-scaled collocation defaults.** `solve_linear` and `solve_nonlinear` now
+  default `n_pde`/`n_bc` to `None` and derive them from the feature count
+  (`n_pde = max(3000, 3 * n_blocks * hidden_size)`, `n_bc = max(800, n_pde // 5)`),
+  replacing the fixed `10000`/`2000` (and `5000`/`1000`) over-sampling that was
+  ~6x the default feature count. Faster for the default configuration; passing
+  explicit `n_pde`/`n_bc` still overrides.
 ## [0.2.2] - 2026-06-03
 ### Fixed

{fastlsq-0.2.2 → fastlsq-0.2.3}/FastLSQ.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: FastLSQ
-Version: 0.2.2
+Version: 0.2.3
 Summary: One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support
 Author: Antonin Sulc
 License-Expression: MIT

{fastlsq-0.2.2 → fastlsq-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: FastLSQ
-Version: 0.2.2
+Version: 0.2.3
 Summary: One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support
 Author: Antonin Sulc
 License-Expression: MIT

{fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/__init__.py RENAMED Viewed

@@ -44,7 +44,7 @@ from fastlsq.export import (
 )
 from fastlsq import viz
-__version__ = "0.2.2"
+__version__ = "0.2.3"
 __all__ = [
     # Device selection (CPU / CUDA / Apple-MPS, dtype-aware)
     "resolve_device",

{fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/api.py RENAMED Viewed

@@ -35,10 +35,11 @@ def solve_linear(
     scale: Optional[float] = None,
     n_blocks: int = 3,
     hidden_size: int = 500,
-    n_pde: int = 10000,
-    n_bc: int = 2000,
+    n_pde: Optional[int] = None,
+    n_bc: Optional[int] = None,
     n_test: int = 5000,
     mu: float = 0.0,
+    method: str = "auto",
     auto_scale: bool = True,
     auto_scale_trials: int = 5,
     return_solver: bool = False,
@@ -65,12 +66,17 @@ def solve_linear(
         Number of feature blocks.
     hidden_size : int
         Features per block.
-    n_pde, n_bc : int
-        Number of collocation and boundary points.
+    n_pde, n_bc : int, optional
+        Number of collocation and boundary points. If None, scaled with the
+        feature count: n_pde = max(3000, 3 * n_blocks * hidden_size),
+        n_bc = max(800, n_pde // 5).
     n_test : int
         Number of test points for error evaluation.
     mu : float
         Tikhonov regularisation parameter (0 = no regularisation).
+    method : str
+        Linear solve back-end passed to ``solve_lstsq`` ("auto", "qr", "svd",
+        "cholesky", "rsvd"). Default "auto".
     auto_scale : bool
         If True and scale=None, automatically select scale via grid search.
     auto_scale_trials : int
@@ -93,6 +99,12 @@ def solve_linear(
     """
     t0 = time.time()
+    n_feat = n_blocks * hidden_size
+    if n_pde is None:
+        n_pde = max(3000, 3 * n_feat)   # ~3x oversampling; fixed 10000 was 6x for default N
+    if n_bc is None:
+        n_bc = max(800, n_pde // 5)
     # Auto-select scale if needed
     if scale is None and auto_scale:
         if verbose:
@@ -127,7 +139,7 @@ def solve_linear(
     # Assemble and solve
     A, b = problem.build(solver, x_pde, *build_args)
-    beta_raw = solve_lstsq(A, b, mu=mu)
+    beta_raw = solve_lstsq(A, b, mu=mu, method=method)
     n_outputs = getattr(problem, "n_outputs", 1)
     solver.beta = unpack_beta(beta_raw, solver.n_features, n_outputs)
@@ -170,8 +182,8 @@ def solve_nonlinear(
     scale: Optional[float] = None,
     n_blocks: int = 3,
     hidden_size: int = 500,
-    n_pde: int = 5000,
-    n_bc: int = 1000,
+    n_pde: Optional[int] = None,
+    n_bc: Optional[int] = None,
     n_test: int = 5000,
     max_iter: int = 30,
     tol_res: float = 1e-8,
@@ -202,8 +214,10 @@ def solve_nonlinear(
         Number of feature blocks.
     hidden_size : int
         Features per block.
-    n_pde, n_bc : int
-        Number of collocation and boundary points.
+    n_pde, n_bc : int, optional
+        Number of collocation and boundary points. If None, scaled with the
+        feature count: n_pde = max(3000, 3 * n_blocks * hidden_size),
+        n_bc = max(800, n_pde // 5).
     n_test : int
         Number of test points for error evaluation.
     max_iter : int
@@ -239,6 +253,12 @@ def solve_nonlinear(
     """
     t0 = time.time()
+    n_feat = n_blocks * hidden_size
+    if n_pde is None:
+        n_pde = max(3000, 3 * n_feat)   # ~3x oversampling; fixed 10000 was 6x for default N
+    if n_bc is None:
+        n_bc = max(800, n_pde // 5)
     # Auto-select scale if needed
     if scale is None and auto_scale:
         if verbose:

{fastlsq-0.2.2 → fastlsq-0.2.3}/fastlsq/linalg.py RENAMED Viewed

@@ -11,17 +11,26 @@ condition number -- leaving several orders of magnitude of accuracy on the floor
 ``solve_lstsq`` therefore exposes several back-ends via ``method=``:
+* ``"qr"``       -- Householder-QR least squares (ridge via ``[A; sqrt(mu) I]``
+                    augmentation).  Backward-stable at ``cond(A)`` -- SVD-grade
+                    accuracy with no normal-equations squaring and no required
+                    ridge, at ~QR cost (cheaper than SVD).  Assumes (numerically)
+                    full column rank; ``"svd"`` is the rank-deficient-safe choice
+                    (and ``"auto"``'s ultimate fallback if QR blows up).
 * ``"svd"``      -- rank-revealing truncated SVD of ``A`` (LAPACK ``gelsd`` fast
-                    path on CPU; explicit SVD elsewhere).  The accuracy reference.
+                    path on CPU; explicit SVD elsewhere).  The accuracy reference;
+                    use for a genuinely rank-deficient ``A``.
 * ``"cholesky"`` -- normal-equations ``(A^T A + mu I)`` Cholesky.  Fast, but only
                     safe when ``A`` is well-conditioned.
 * ``"rsvd"``     -- randomized SVD (range-finder + power iterations).  ``O(MNk)``
                     for a target ``rank`` k << N -- the cheap option for strongly
                     low-rank systems.
 * ``"auto"`` (default) -- try Cholesky; if the system is ill-conditioned (a
-                    cheap pivot-ratio test) fall back to ``"svd"``.  Recovers the
-                    fast path on well-conditioned problems **without** sacrificing
-                    accuracy on the rest.
+                    cheap pivot-ratio test) use the faster ``"qr"``, and fall back
+                    to rank-revealing ``"svd"`` only if QR's solution blows up (the
+                    feature matrices can be rank-deficient).  Fast path when
+                    well-conditioned, QR speed/accuracy on the rest, SVD as the
+                    safety net.
 All back-ends are device/dtype-aware.  Apple-MPS lacks a robust ``svd``/``lstsq``,
 so the factorization is run on CPU and the result moved back (one-time warning).
@@ -33,6 +42,13 @@ import torch
 _MPS_WARNED = False
+# In ``method="auto"``: above this ``||x|| / (1 + ||b||)`` ratio the unpivoted-QR
+# solve is treated as a rank-deficiency blow-up and handed to the rank-revealing
+# SVD instead.  Real PDE systems measure <= 0.3 here; the degenerate inconsistent
+# (random-RHS) rank-deficient case measures ~3e14 -- so the guard is generous and
+# a false positive only costs speed, never correctness.
+_QR_AUTO_NORM_GUARD = 1e6
 def _maybe_cpu(A, b):
     """MPS has no robust svd/lstsq -- factorize on CPU, remember to move back."""
@@ -86,9 +102,22 @@ def _rsvd_solve(A, b, mu, rcond, rank, oversample, n_iter):
     return Vh.transpose(-2, -1) @ (filt.unsqueeze(-1) * (U.transpose(-2, -1) @ b))
+def _qr_solve(A, b, mu):
+    """Householder-QR least squares (ridge via [A; sqrt(mu) I] augmentation).
+    Backward-stable at cond(A): SVD-grade accuracy with NO normal-equations
+    squaring and no required ridge, at ~QR cost (cheaper than SVD).  Assumes
+    (numerically) full column rank; use method='svd' for a rank-deficient A."""
+    if mu:
+        n = A.shape[-1]
+        A = torch.cat([A, (mu ** 0.5) * torch.eye(n, dtype=A.dtype, device=A.device)], dim=-2)
+        b = torch.cat([b, torch.zeros(n, b.shape[-1], dtype=b.dtype, device=b.device)], dim=-2)
+    Q, R = torch.linalg.qr(A, mode="reduced")
+    return torch.linalg.solve_triangular(R, Q.transpose(-2, -1) @ b, upper=True)
 def _auto_solve(A, b, mu, rcond):
     # Cheap conditioning probe: cond(A) ~ max/min Cholesky pivot.  If well within
-    # float64's reach use the fast Cholesky; otherwise fall back to the SVD.
+    # float64's reach use the fast Cholesky.
     try:
         x, L = _cholesky_solve(A, b, mu)
         d = torch.diagonal(L).abs()
@@ -96,6 +125,14 @@ def _auto_solve(A, b, mu, rcond):
             return x
     except torch.linalg.LinAlgError:
         pass
+    # Ill-conditioned: try the faster, backward-stable QR.  On a genuinely
+    # rank-deficient *inconsistent* A unpivoted QR can return a wildly
+    # non-minimum-norm solution, so fall back to the rank-revealing SVD when the
+    # QR solution blows up (or is non-finite).  See _QR_AUTO_NORM_GUARD.
+    x = _qr_solve(A, b, mu)
+    nx = torch.linalg.vector_norm(x)
+    if torch.isfinite(nx) and nx <= _QR_AUTO_NORM_GUARD * (1.0 + torch.linalg.vector_norm(b)):
+        return x
     return _svd_solve(A, b, mu, rcond)
@@ -112,7 +149,7 @@ def solve_lstsq(A, b, mu=0.0, rcond=1e-12, method="auto",
         an unstable add-on).
     rcond : float
         Relative singular-value / pivot threshold for rank determination.
-    method : {"auto", "svd", "cholesky", "rsvd"}
+    method : {"auto", "qr", "svd", "cholesky", "rsvd"}
         Solve back-end (see module docstring).  Default "auto".
     rank, oversample, n_iter : int
         Randomized-SVD parameters (``method="rsvd"`` only).  Set ``rank`` << N for
@@ -127,11 +164,13 @@ def solve_lstsq(A, b, mu=0.0, rcond=1e-12, method="auto",
         x = _auto_solve(A2, b2, mu, rcond)
     elif method == "svd":
         x = _svd_solve(A2, b2, mu, rcond)
+    elif method == "qr":
+        x = _qr_solve(A2, b2, mu)
     elif method == "cholesky":
         x = _cholesky_solve(A2, b2, mu)[0]
     elif method == "rsvd":
         x = _rsvd_solve(A2, b2, mu, rcond, rank, oversample, n_iter)
     else:
         raise ValueError(f"Unknown method {method!r}; "
-                         "choose 'auto', 'svd', 'cholesky', or 'rsvd'.")
+                         "choose 'auto', 'qr', 'svd', 'cholesky', or 'rsvd'.")
     return x.to(mps_dev) if mps_dev is not None else x

{fastlsq-0.2.2 → fastlsq-0.2.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "FastLSQ"
-version = "0.2.2"
+version = "0.2.3"
 description = "One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support"
 readme = "README.md"
 license = "MIT"

{fastlsq-0.2.2 → fastlsq-0.2.3}/tests/test_vector_basis.py RENAMED Viewed

@@ -20,7 +20,7 @@ from fastlsq.utils import device
 # ----------------------------------------------------------------------
 def test_version():
-    assert fastlsq.__version__ == "0.2.1"
+    assert fastlsq.__version__ == "0.2.3"
 def test_imports():