FastLSQ 0.2.3__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fastlsq-0.2.3 → fastlsq-0.2.5}/CHANGELOG.md +54 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/FastLSQ.egg-info/PKG-INFO +47 -11
- {fastlsq-0.2.3 → fastlsq-0.2.5}/FastLSQ.egg-info/SOURCES.txt +1 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/PKG-INFO +47 -11
- {fastlsq-0.2.3 → fastlsq-0.2.5}/README.md +46 -10
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/__init__.py +1 -1
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/problems/linear.py +74 -34
- {fastlsq-0.2.3 → fastlsq-0.2.5}/pyproject.toml +1 -1
- fastlsq-0.2.5/tests/test_benchmarks_inverse.py +156 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/tests/test_vector_basis.py +1 -1
- {fastlsq-0.2.3 → fastlsq-0.2.5}/FastLSQ.egg-info/dependency_links.txt +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/FastLSQ.egg-info/requires.txt +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/FastLSQ.egg-info/top_level.txt +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/LICENSE +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/MANIFEST.in +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/add_your_own_pde.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/benchmark_comparison.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/custom_features.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/fred_sde.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/fred_sde_fastlsq.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/gaia_potential.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/gaia_potential_fastlsq.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/horizons_ephemeris.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/numerai_alpha.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/numerai_alpha_fastlsq.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/run_all_fastlsq.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/__init__.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/_alsu_lattice.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/_common.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/run_all.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_beamloss_ode.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_betatron_tune.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_green_fff.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_hill_ivp.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_observe_fit_act_simulator.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_orbit_inverse.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_passive_loco.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_perturbed_hill.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_sofb_observe_fit_act.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_streaming_archive_growth.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_synchrotron_ode.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_tides_3months.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_topoff_impulse.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s01_visualize.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s02_plasma_wakefield.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s03_synchrobetatron.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s04_sunspots.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s05_helioseismology.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s06_tides.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s07_iers_earth_rotation.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s08_mauna_loa_co2.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s09_enso_qbo.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s10_pulsar_timing.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s11_modal_analysis.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s12_mems_resonator.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s13_variable_stars_kepler.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s14_eeg.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/scenarios/s15_circadian.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/extras/spectral_expansion.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/grad_shafranov.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/grid_inverse.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/grid_rl_control.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/grid_swing.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/gs_inverse.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/gs_rl_control.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/inverse_heat_source.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/inverse_magnetostatics.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/inverse_source_position.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/learnable_helmholtz.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/orbit_hill.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/orbit_inverse.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/orbit_rl.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/pde_discovery.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/run_all_extensions.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/run_linear.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/run_nonlinear.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/tutorial_basic.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/tutorial_nonlinear.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/examples/vector_basis_stream_vorticity.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/api.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/basis.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/block.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/device.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/diagnostics.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/export.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/geometry.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/learnable.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/lightning.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/linalg.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/newton.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/plotting.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/problems/__init__.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/problems/nonlinear.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/problems/regression.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/solvers.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/tuning.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/utils.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/vector.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/fastlsq/viz.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/requirements.txt +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/setup.cfg +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/tests/test_basic.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/tests/test_block.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/tests/test_derivatives.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/tests/test_device.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/tests/test_grad_shafranov.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/tests/test_grid_swing.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/tests/test_learnable.py +0 -0
- {fastlsq-0.2.3 → fastlsq-0.2.5}/tests/test_orbit_hill.py +0 -0
|
@@ -2,6 +2,60 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to FastLSQ will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.2.5] - 2026-06-04
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- **`Wave2D_MS` solves via `solve_linear`.** The long-time anisotropic wave
|
|
10
|
+
returned relative value error 1.0 in every configuration because its
|
|
11
|
+
`t_max = 100` time normalisation packed ~87 temporal cycles into `tau ∈ [0,1]`:
|
|
12
|
+
the PDE's second time-derivative amplifies the random-feature *representation*
|
|
13
|
+
error by `Omega²` (`Omega = pi·sqrt(1+a2)·t_max`), so the one-shot
|
|
14
|
+
least-squares collocation cannot resolve the oscillation -- even 8000 features
|
|
15
|
+
with near-hard boundary constraints stay at rel-err 1.0, because the best
|
|
16
|
+
representable solution itself carries a huge PDE residual. Reducing `t_max` to
|
|
17
|
+
`4` (~3.5 cycles) and matching the anisotropic temporal feature bandwidth to
|
|
18
|
+
`Omega` (`scale_multipliers = [1, 1, 7]`) recovers the solution to ~3e-4 at
|
|
19
|
+
900 features (`scale = 3`); the exactly-consistent `t_max²`-scaled operator is
|
|
20
|
+
unchanged. Added to the `tests/test_benchmarks_inverse.py` linear smoke test.
|
|
21
|
+
Resolves the `Wave2D_MS` [0.2.4] known issue.
|
|
22
|
+
- **`ElasticWave2D` solves via the block-stacked vector path.** The coupled
|
|
23
|
+
2-output elastic-wave problem now declares `n_outputs = 2`, assembles its
|
|
24
|
+
operator in block-stacked form (`A ∈ ℝ^{Mk×Nk}`, `b ∈ ℝ^{Mk×1}`) via
|
|
25
|
+
`block_concat`, and gains the `exact_grad` Jacobian (shape `(M, d, k)`, time
|
|
26
|
+
axis chain-ruled by `t_max`) that the error metric requires. `unpack_beta` now
|
|
27
|
+
recovers a `(N, 2)` `beta`, so `solve_linear(ElasticWave2D(), scale=5.0)`
|
|
28
|
+
recovers both components (relative value error ~7e-3 at the default
|
|
29
|
+
resolution) instead of failing to unpack the vector solution. Added to the
|
|
30
|
+
`tests/test_benchmarks_inverse.py` linear smoke test. Resolves the
|
|
31
|
+
`ElasticWave2D` [0.2.4] known issue; the `t_max²` operator scaling from
|
|
32
|
+
[0.2.2] (consistent with `Wave2D_MS`) is preserved.
|
|
33
|
+
|
|
34
|
+
## [0.2.4] - 2026-06-04
|
|
35
|
+
|
|
36
|
+
### Added
|
|
37
|
+
|
|
38
|
+
- **Benchmark + inverse-problem test suite** (`tests/test_benchmarks_inverse.py`):
|
|
39
|
+
12 deterministic smoke tests (~11 s) that solve the linear (`PoissonND`,
|
|
40
|
+
`HeatND`, `Wave1D`, `Helmholtz2D`, `Maxwell2D_TM`) and nonlinear
|
|
41
|
+
(`NLPoisson2D`, `Bratu2D`, `SteadyBurgers1D`, `NLHelmholtz2D`, `AllenCahn1D`)
|
|
42
|
+
benchmark equations through the public `solve_linear` / `solve_nonlinear` API,
|
|
43
|
+
plus two inverse pipelines -- Gaussian source-position recovery (forward solve
|
|
44
|
+
+ L-BFGS) and SINDy-style PDE discovery via analytical derivatives --
|
|
45
|
+
exercising the 0.2.3 QR / N-scaled-collocation solver path end to end.
|
|
46
|
+
|
|
47
|
+
### Known issues
|
|
48
|
+
|
|
49
|
+
- `Wave2D_MS` does not solve via `solve_linear` (relative error 1.0 in every
|
|
50
|
+
configuration tested) -- a pre-existing problem-definition gap, independent of
|
|
51
|
+
the solver work, excluded from the new smoke test pending a fix. *(Fixed in
|
|
52
|
+
[0.2.5]: `t_max` reduced 100 -> 4 so the normalised-time oscillation
|
|
53
|
+
(~3.5 vs ~87 cycles) is resolvable; now covered by the smoke test.)*
|
|
54
|
+
- `ElasticWave2D` -- a 2-output vector problem whose `exact()` returns `(N, 2)`
|
|
55
|
+
-- never sets `n_outputs`, so the scalar API cannot unpack it; also excluded
|
|
56
|
+
here. *(Fixed in [0.2.5]: it now uses the block-stacked vector path and
|
|
57
|
+
is covered by the smoke test.)*
|
|
58
|
+
|
|
5
59
|
## [0.2.3] - 2026-06-04
|
|
6
60
|
|
|
7
61
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: FastLSQ
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support
|
|
5
5
|
Author: Antonin Sulc
|
|
6
6
|
License-Expression: MIT
|
|
@@ -55,9 +55,12 @@ analytical derivative engine for random Fourier features. For sinusoidal
|
|
|
55
55
|
features `phi_j(x) = sin(W_j . x + b_j)`, every derivative of every order
|
|
56
56
|
admits an exact closed-form expression -- no automatic differentiation needed.
|
|
57
57
|
|
|
58
|
-
Linear PDEs are solved in a single least-squares step
|
|
59
|
-
|
|
60
|
-
|
|
58
|
+
Linear PDEs are solved in a single least-squares step. The random-feature
|
|
59
|
+
system is typically rank-deficient, so the solve is routed through a
|
|
60
|
+
backward-stable, auto-selected least-squares back-end (Cholesky fast-path ->
|
|
61
|
+
Householder QR -> rank-revealing SVD) that runs on CPU, CUDA, or Apple-MPS.
|
|
62
|
+
Nonlinear PDEs are solved via Newton-Raphson iteration with Tikhonov
|
|
63
|
+
regularisation, 1/sqrt(N) feature normalisation, and continuation/homotopy.
|
|
61
64
|
|
|
62
65
|
## Installation
|
|
63
66
|
|
|
@@ -68,7 +71,7 @@ pip install fastlsq
|
|
|
68
71
|
For development (includes testing and build tools):
|
|
69
72
|
|
|
70
73
|
```bash
|
|
71
|
-
git clone https://github.com/
|
|
74
|
+
git clone https://github.com/sulcantonin/FastLSQ.git
|
|
72
75
|
cd FastLSQ
|
|
73
76
|
pip install -e ".[dev]"
|
|
74
77
|
```
|
|
@@ -101,6 +104,26 @@ print(f"Converged in {result['n_iters']} iterations")
|
|
|
101
104
|
print(f"Value error: {result['metrics']['val_err']:.2e}")
|
|
102
105
|
```
|
|
103
106
|
|
|
107
|
+
### Choose a solver back-end and device
|
|
108
|
+
|
|
109
|
+
The linear solve is routed automatically, but `solve_linear` exposes the
|
|
110
|
+
back-end via `method=` (see [How it works](#how-it-works) for the routing):
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from fastlsq import solve_linear, set_device
|
|
114
|
+
from fastlsq.problems.linear import PoissonND
|
|
115
|
+
|
|
116
|
+
# "auto" (default) -- Cholesky fast-path -> QR -> rank-revealing SVD
|
|
117
|
+
# "qr" -- Householder QR; SVD-grade accuracy at QR cost (full-rank A)
|
|
118
|
+
# "svd" -- rank-revealing truncated SVD; the rank-deficient-safe reference
|
|
119
|
+
# "cholesky" -- normal-equations Cholesky; fast, well-conditioned A only
|
|
120
|
+
# "rsvd" -- randomized SVD, O(MNk), for strongly low-rank A
|
|
121
|
+
result = solve_linear(PoissonND(), scale=5.0, method="qr")
|
|
122
|
+
|
|
123
|
+
# Device selection (CPU / CUDA / Apple-MPS), or set FASTLSQ_DEVICE=cuda
|
|
124
|
+
set_device("cuda") # the float64 default stays on CPU/CUDA; MPS is float32-only
|
|
125
|
+
```
|
|
126
|
+
|
|
104
127
|
### Use the basis directly
|
|
105
128
|
|
|
106
129
|
```python
|
|
@@ -204,9 +227,10 @@ u_yy = A @ solver.beta # (M, k): ∂²u/∂y² per com
|
|
|
204
227
|
|
|
205
228
|
Scalar problems are untouched: `n_outputs` defaults to `1`, `solver.beta` keeps
|
|
206
229
|
shape `(N, 1)`, and `predict_with_grad` returns gradient shape `(M, d)` for
|
|
207
|
-
backward compatibility (the trailing component axis is squeezed when k=1).
|
|
208
|
-
`
|
|
209
|
-
|
|
230
|
+
backward compatibility (the trailing component axis is squeezed when k=1). The
|
|
231
|
+
`Stokes2D` sketch above and [tests/test_block.py](tests/test_block.py) -- a
|
|
232
|
+
runnable `block_concat` + `unpack_beta` solve that recovers both components of a
|
|
233
|
+
k=2 system -- are the reference for the block-stacked vector path.
|
|
210
234
|
|
|
211
235
|
### Plot solutions
|
|
212
236
|
|
|
@@ -258,11 +282,15 @@ derivative engine:
|
|
|
258
282
|
| `FastLSQSolver` | Manages feature blocks; exposes `.basis` for all derivative computations |
|
|
259
283
|
| `LearnableFastLSQ` | Differentiable solver with learnable bandwidth via reparameterisation trick |
|
|
260
284
|
| `block_concat`, `pack_beta`, `unpack_beta` | Block-structured assembly helpers for vector-valued **u** (coupled systems). `solver.beta` has shape `(N, k)`; scalar problems are the k=1 case |
|
|
285
|
+
| `solve_lstsq` | Multi-back-end least-squares solve (`auto`/`qr`/`svd`/`cholesky`/`rsvd`); rank-revealing by default for the rank-deficient feature matrix |
|
|
286
|
+
| `resolve_device` / `set_device` / `get_device` | CPU / CUDA / Apple-MPS selection, dtype-aware (MPS is float32-only; factorizations fall back to CPU) |
|
|
261
287
|
|
|
262
288
|
### How it works
|
|
263
289
|
|
|
264
290
|
1. **Basis construction.** Given collocation points **x**, construct a
|
|
265
|
-
`SinusoidalBasis` with random weights W and biases b.
|
|
291
|
+
`SinusoidalBasis` with random weights W and biases b. The collocation counts
|
|
292
|
+
default to scale with the feature count
|
|
293
|
+
(`n_pde = max(3000, 3 * n_blocks * hidden_size)`, `n_bc = max(800, n_pde // 5)`).
|
|
266
294
|
|
|
267
295
|
2. **Analytical derivatives.** Exploit the cyclic derivative identity:
|
|
268
296
|
the n-th derivative of sin(z) cycles through {sin, cos, -sin, -cos}
|
|
@@ -273,8 +301,13 @@ derivative engine:
|
|
|
273
301
|
(e.g. `Op.laplacian(d=2)`) and apply it to the basis to get the system
|
|
274
302
|
matrix `A`.
|
|
275
303
|
|
|
276
|
-
4. **Linear solve.** Solve `A beta = b`
|
|
277
|
-
|
|
304
|
+
4. **Linear solve.** Solve `A beta = b` in the least-squares sense. The
|
|
305
|
+
random-feature matrix `A` is typically rank-deficient (near-duplicate
|
|
306
|
+
columns), so the default `method="auto"` starts from a Cholesky fast-path
|
|
307
|
+
(guarded by a cheap conditioning probe), falls back to backward-stable
|
|
308
|
+
Householder **QR**, and resorts to a rank-revealing **SVD** only if the QR
|
|
309
|
+
solution blows up. A Tikhonov ridge `mu` enters via the `[A; sqrt(mu) I]`
|
|
310
|
+
augmentation, not the condition-squaring normal equations.
|
|
278
311
|
|
|
279
312
|
5. **Newton iteration (nonlinear).** Linearise the PDE residual, solve
|
|
280
313
|
`J delta_beta = -R` with backtracking line search, and repeat.
|
|
@@ -336,9 +369,12 @@ See `examples/add_your_own_pde.py` for the complete tutorial.
|
|
|
336
369
|
- **Symbolic PDE operators**: Compose differential operators with `Op` (Laplacian, wave, Helmholtz, biharmonic, custom) via intuitive arithmetic; coefficients can be `nn.Parameter` for AdamW optimisation
|
|
337
370
|
- **Vector-valued solutions**: First-class support for **u**: ℝᵈ → ℝᵏ (elasticity, Stokes, Maxwell). Problems declare `n_outputs = k`; `block_concat` assembles coupled block systems; `solver.predict(x)` returns shape `(M, k)`. Scalar problems are the `k=1` case
|
|
338
371
|
- **High-level API**: Solve PDEs in one line with `solve_linear()` and `solve_nonlinear()`
|
|
372
|
+
- **Robust linear solver**: Pluggable least-squares back-ends; the default `auto` routes Cholesky -> QR -> SVD, and backward-stable QR delivers SVD-grade accuracy at QR cost on the rank-deficient random-feature system
|
|
339
373
|
- **Learnable bandwidth**: `LearnableFastLSQ` optimises the bandwidth (scalar or anisotropic) via reparameterisation
|
|
340
374
|
- **Learnable PDE coefficients**: Plug `nn.Parameter` into `Op` (e.g. Helmholtz wavenumber `k`) and optimise via AdamW; gradients flow through the prebuilt linear solve
|
|
341
375
|
- **Auto-tuning**: Automatic scale selection via grid search
|
|
376
|
+
- **Device support**: CPU / CUDA / Apple-MPS via `set_device()` or the `FASTLSQ_DEVICE` env var, dtype-aware (the float64 high-accuracy path stays on CPU/CUDA)
|
|
377
|
+
- **Adaptive collocation**: `n_pde` / `n_bc` default to feature-count-scaled values, overridable per solve
|
|
342
378
|
- **Built-in plotting**: Solution visualization, convergence plots, spectral sensitivity
|
|
343
379
|
- **Geometry samplers**: Box, ball, sphere, interval, custom samplers
|
|
344
380
|
- **Diagnostics**: Problem validation, conditioning checks, error detection
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: FastLSQ
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support
|
|
5
5
|
Author: Antonin Sulc
|
|
6
6
|
License-Expression: MIT
|
|
@@ -55,9 +55,12 @@ analytical derivative engine for random Fourier features. For sinusoidal
|
|
|
55
55
|
features `phi_j(x) = sin(W_j . x + b_j)`, every derivative of every order
|
|
56
56
|
admits an exact closed-form expression -- no automatic differentiation needed.
|
|
57
57
|
|
|
58
|
-
Linear PDEs are solved in a single least-squares step
|
|
59
|
-
|
|
60
|
-
|
|
58
|
+
Linear PDEs are solved in a single least-squares step. The random-feature
|
|
59
|
+
system is typically rank-deficient, so the solve is routed through a
|
|
60
|
+
backward-stable, auto-selected least-squares back-end (Cholesky fast-path ->
|
|
61
|
+
Householder QR -> rank-revealing SVD) that runs on CPU, CUDA, or Apple-MPS.
|
|
62
|
+
Nonlinear PDEs are solved via Newton-Raphson iteration with Tikhonov
|
|
63
|
+
regularisation, 1/sqrt(N) feature normalisation, and continuation/homotopy.
|
|
61
64
|
|
|
62
65
|
## Installation
|
|
63
66
|
|
|
@@ -68,7 +71,7 @@ pip install fastlsq
|
|
|
68
71
|
For development (includes testing and build tools):
|
|
69
72
|
|
|
70
73
|
```bash
|
|
71
|
-
git clone https://github.com/
|
|
74
|
+
git clone https://github.com/sulcantonin/FastLSQ.git
|
|
72
75
|
cd FastLSQ
|
|
73
76
|
pip install -e ".[dev]"
|
|
74
77
|
```
|
|
@@ -101,6 +104,26 @@ print(f"Converged in {result['n_iters']} iterations")
|
|
|
101
104
|
print(f"Value error: {result['metrics']['val_err']:.2e}")
|
|
102
105
|
```
|
|
103
106
|
|
|
107
|
+
### Choose a solver back-end and device
|
|
108
|
+
|
|
109
|
+
The linear solve is routed automatically, but `solve_linear` exposes the
|
|
110
|
+
back-end via `method=` (see [How it works](#how-it-works) for the routing):
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from fastlsq import solve_linear, set_device
|
|
114
|
+
from fastlsq.problems.linear import PoissonND
|
|
115
|
+
|
|
116
|
+
# "auto" (default) -- Cholesky fast-path -> QR -> rank-revealing SVD
|
|
117
|
+
# "qr" -- Householder QR; SVD-grade accuracy at QR cost (full-rank A)
|
|
118
|
+
# "svd" -- rank-revealing truncated SVD; the rank-deficient-safe reference
|
|
119
|
+
# "cholesky" -- normal-equations Cholesky; fast, well-conditioned A only
|
|
120
|
+
# "rsvd" -- randomized SVD, O(MNk), for strongly low-rank A
|
|
121
|
+
result = solve_linear(PoissonND(), scale=5.0, method="qr")
|
|
122
|
+
|
|
123
|
+
# Device selection (CPU / CUDA / Apple-MPS), or set FASTLSQ_DEVICE=cuda
|
|
124
|
+
set_device("cuda") # the float64 default stays on CPU/CUDA; MPS is float32-only
|
|
125
|
+
```
|
|
126
|
+
|
|
104
127
|
### Use the basis directly
|
|
105
128
|
|
|
106
129
|
```python
|
|
@@ -204,9 +227,10 @@ u_yy = A @ solver.beta # (M, k): ∂²u/∂y² per com
|
|
|
204
227
|
|
|
205
228
|
Scalar problems are untouched: `n_outputs` defaults to `1`, `solver.beta` keeps
|
|
206
229
|
shape `(N, 1)`, and `predict_with_grad` returns gradient shape `(M, d)` for
|
|
207
|
-
backward compatibility (the trailing component axis is squeezed when k=1).
|
|
208
|
-
`
|
|
209
|
-
|
|
230
|
+
backward compatibility (the trailing component axis is squeezed when k=1). The
|
|
231
|
+
`Stokes2D` sketch above and [tests/test_block.py](tests/test_block.py) -- a
|
|
232
|
+
runnable `block_concat` + `unpack_beta` solve that recovers both components of a
|
|
233
|
+
k=2 system -- are the reference for the block-stacked vector path.
|
|
210
234
|
|
|
211
235
|
### Plot solutions
|
|
212
236
|
|
|
@@ -258,11 +282,15 @@ derivative engine:
|
|
|
258
282
|
| `FastLSQSolver` | Manages feature blocks; exposes `.basis` for all derivative computations |
|
|
259
283
|
| `LearnableFastLSQ` | Differentiable solver with learnable bandwidth via reparameterisation trick |
|
|
260
284
|
| `block_concat`, `pack_beta`, `unpack_beta` | Block-structured assembly helpers for vector-valued **u** (coupled systems). `solver.beta` has shape `(N, k)`; scalar problems are the k=1 case |
|
|
285
|
+
| `solve_lstsq` | Multi-back-end least-squares solve (`auto`/`qr`/`svd`/`cholesky`/`rsvd`); rank-revealing by default for the rank-deficient feature matrix |
|
|
286
|
+
| `resolve_device` / `set_device` / `get_device` | CPU / CUDA / Apple-MPS selection, dtype-aware (MPS is float32-only; factorizations fall back to CPU) |
|
|
261
287
|
|
|
262
288
|
### How it works
|
|
263
289
|
|
|
264
290
|
1. **Basis construction.** Given collocation points **x**, construct a
|
|
265
|
-
`SinusoidalBasis` with random weights W and biases b.
|
|
291
|
+
`SinusoidalBasis` with random weights W and biases b. The collocation counts
|
|
292
|
+
default to scale with the feature count
|
|
293
|
+
(`n_pde = max(3000, 3 * n_blocks * hidden_size)`, `n_bc = max(800, n_pde // 5)`).
|
|
266
294
|
|
|
267
295
|
2. **Analytical derivatives.** Exploit the cyclic derivative identity:
|
|
268
296
|
the n-th derivative of sin(z) cycles through {sin, cos, -sin, -cos}
|
|
@@ -273,8 +301,13 @@ derivative engine:
|
|
|
273
301
|
(e.g. `Op.laplacian(d=2)`) and apply it to the basis to get the system
|
|
274
302
|
matrix `A`.
|
|
275
303
|
|
|
276
|
-
4. **Linear solve.** Solve `A beta = b`
|
|
277
|
-
|
|
304
|
+
4. **Linear solve.** Solve `A beta = b` in the least-squares sense. The
|
|
305
|
+
random-feature matrix `A` is typically rank-deficient (near-duplicate
|
|
306
|
+
columns), so the default `method="auto"` starts from a Cholesky fast-path
|
|
307
|
+
(guarded by a cheap conditioning probe), falls back to backward-stable
|
|
308
|
+
Householder **QR**, and resorts to a rank-revealing **SVD** only if the QR
|
|
309
|
+
solution blows up. A Tikhonov ridge `mu` enters via the `[A; sqrt(mu) I]`
|
|
310
|
+
augmentation, not the condition-squaring normal equations.
|
|
278
311
|
|
|
279
312
|
5. **Newton iteration (nonlinear).** Linearise the PDE residual, solve
|
|
280
313
|
`J delta_beta = -R` with backtracking line search, and repeat.
|
|
@@ -336,9 +369,12 @@ See `examples/add_your_own_pde.py` for the complete tutorial.
|
|
|
336
369
|
- **Symbolic PDE operators**: Compose differential operators with `Op` (Laplacian, wave, Helmholtz, biharmonic, custom) via intuitive arithmetic; coefficients can be `nn.Parameter` for AdamW optimisation
|
|
337
370
|
- **Vector-valued solutions**: First-class support for **u**: ℝᵈ → ℝᵏ (elasticity, Stokes, Maxwell). Problems declare `n_outputs = k`; `block_concat` assembles coupled block systems; `solver.predict(x)` returns shape `(M, k)`. Scalar problems are the `k=1` case
|
|
338
371
|
- **High-level API**: Solve PDEs in one line with `solve_linear()` and `solve_nonlinear()`
|
|
372
|
+
- **Robust linear solver**: Pluggable least-squares back-ends; the default `auto` routes Cholesky -> QR -> SVD, and backward-stable QR delivers SVD-grade accuracy at QR cost on the rank-deficient random-feature system
|
|
339
373
|
- **Learnable bandwidth**: `LearnableFastLSQ` optimises the bandwidth (scalar or anisotropic) via reparameterisation
|
|
340
374
|
- **Learnable PDE coefficients**: Plug `nn.Parameter` into `Op` (e.g. Helmholtz wavenumber `k`) and optimise via AdamW; gradients flow through the prebuilt linear solve
|
|
341
375
|
- **Auto-tuning**: Automatic scale selection via grid search
|
|
376
|
+
- **Device support**: CPU / CUDA / Apple-MPS via `set_device()` or the `FASTLSQ_DEVICE` env var, dtype-aware (the float64 high-accuracy path stays on CPU/CUDA)
|
|
377
|
+
- **Adaptive collocation**: `n_pde` / `n_bc` default to feature-count-scaled values, overridable per solve
|
|
342
378
|
- **Built-in plotting**: Solution visualization, convergence plots, spectral sensitivity
|
|
343
379
|
- **Geometry samplers**: Box, ball, sphere, interval, custom samplers
|
|
344
380
|
- **Diagnostics**: Problem validation, conditioning checks, error detection
|
|
@@ -14,9 +14,12 @@ analytical derivative engine for random Fourier features. For sinusoidal
|
|
|
14
14
|
features `phi_j(x) = sin(W_j . x + b_j)`, every derivative of every order
|
|
15
15
|
admits an exact closed-form expression -- no automatic differentiation needed.
|
|
16
16
|
|
|
17
|
-
Linear PDEs are solved in a single least-squares step
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
Linear PDEs are solved in a single least-squares step. The random-feature
|
|
18
|
+
system is typically rank-deficient, so the solve is routed through a
|
|
19
|
+
backward-stable, auto-selected least-squares back-end (Cholesky fast-path ->
|
|
20
|
+
Householder QR -> rank-revealing SVD) that runs on CPU, CUDA, or Apple-MPS.
|
|
21
|
+
Nonlinear PDEs are solved via Newton-Raphson iteration with Tikhonov
|
|
22
|
+
regularisation, 1/sqrt(N) feature normalisation, and continuation/homotopy.
|
|
20
23
|
|
|
21
24
|
## Installation
|
|
22
25
|
|
|
@@ -27,7 +30,7 @@ pip install fastlsq
|
|
|
27
30
|
For development (includes testing and build tools):
|
|
28
31
|
|
|
29
32
|
```bash
|
|
30
|
-
git clone https://github.com/
|
|
33
|
+
git clone https://github.com/sulcantonin/FastLSQ.git
|
|
31
34
|
cd FastLSQ
|
|
32
35
|
pip install -e ".[dev]"
|
|
33
36
|
```
|
|
@@ -60,6 +63,26 @@ print(f"Converged in {result['n_iters']} iterations")
|
|
|
60
63
|
print(f"Value error: {result['metrics']['val_err']:.2e}")
|
|
61
64
|
```
|
|
62
65
|
|
|
66
|
+
### Choose a solver back-end and device
|
|
67
|
+
|
|
68
|
+
The linear solve is routed automatically, but `solve_linear` exposes the
|
|
69
|
+
back-end via `method=` (see [How it works](#how-it-works) for the routing):
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from fastlsq import solve_linear, set_device
|
|
73
|
+
from fastlsq.problems.linear import PoissonND
|
|
74
|
+
|
|
75
|
+
# "auto" (default) -- Cholesky fast-path -> QR -> rank-revealing SVD
|
|
76
|
+
# "qr" -- Householder QR; SVD-grade accuracy at QR cost (full-rank A)
|
|
77
|
+
# "svd" -- rank-revealing truncated SVD; the rank-deficient-safe reference
|
|
78
|
+
# "cholesky" -- normal-equations Cholesky; fast, well-conditioned A only
|
|
79
|
+
# "rsvd" -- randomized SVD, O(MNk), for strongly low-rank A
|
|
80
|
+
result = solve_linear(PoissonND(), scale=5.0, method="qr")
|
|
81
|
+
|
|
82
|
+
# Device selection (CPU / CUDA / Apple-MPS), or set FASTLSQ_DEVICE=cuda
|
|
83
|
+
set_device("cuda") # the float64 default stays on CPU/CUDA; MPS is float32-only
|
|
84
|
+
```
|
|
85
|
+
|
|
63
86
|
### Use the basis directly
|
|
64
87
|
|
|
65
88
|
```python
|
|
@@ -163,9 +186,10 @@ u_yy = A @ solver.beta # (M, k): ∂²u/∂y² per com
|
|
|
163
186
|
|
|
164
187
|
Scalar problems are untouched: `n_outputs` defaults to `1`, `solver.beta` keeps
|
|
165
188
|
shape `(N, 1)`, and `predict_with_grad` returns gradient shape `(M, d)` for
|
|
166
|
-
backward compatibility (the trailing component axis is squeezed when k=1).
|
|
167
|
-
`
|
|
168
|
-
|
|
189
|
+
backward compatibility (the trailing component axis is squeezed when k=1). The
|
|
190
|
+
`Stokes2D` sketch above and [tests/test_block.py](tests/test_block.py) -- a
|
|
191
|
+
runnable `block_concat` + `unpack_beta` solve that recovers both components of a
|
|
192
|
+
k=2 system -- are the reference for the block-stacked vector path.
|
|
169
193
|
|
|
170
194
|
### Plot solutions
|
|
171
195
|
|
|
@@ -217,11 +241,15 @@ derivative engine:
|
|
|
217
241
|
| `FastLSQSolver` | Manages feature blocks; exposes `.basis` for all derivative computations |
|
|
218
242
|
| `LearnableFastLSQ` | Differentiable solver with learnable bandwidth via reparameterisation trick |
|
|
219
243
|
| `block_concat`, `pack_beta`, `unpack_beta` | Block-structured assembly helpers for vector-valued **u** (coupled systems). `solver.beta` has shape `(N, k)`; scalar problems are the k=1 case |
|
|
244
|
+
| `solve_lstsq` | Multi-back-end least-squares solve (`auto`/`qr`/`svd`/`cholesky`/`rsvd`); rank-revealing by default for the rank-deficient feature matrix |
|
|
245
|
+
| `resolve_device` / `set_device` / `get_device` | CPU / CUDA / Apple-MPS selection, dtype-aware (MPS is float32-only; factorizations fall back to CPU) |
|
|
220
246
|
|
|
221
247
|
### How it works
|
|
222
248
|
|
|
223
249
|
1. **Basis construction.** Given collocation points **x**, construct a
|
|
224
|
-
`SinusoidalBasis` with random weights W and biases b.
|
|
250
|
+
`SinusoidalBasis` with random weights W and biases b. The collocation counts
|
|
251
|
+
default to scale with the feature count
|
|
252
|
+
(`n_pde = max(3000, 3 * n_blocks * hidden_size)`, `n_bc = max(800, n_pde // 5)`).
|
|
225
253
|
|
|
226
254
|
2. **Analytical derivatives.** Exploit the cyclic derivative identity:
|
|
227
255
|
the n-th derivative of sin(z) cycles through {sin, cos, -sin, -cos}
|
|
@@ -232,8 +260,13 @@ derivative engine:
|
|
|
232
260
|
(e.g. `Op.laplacian(d=2)`) and apply it to the basis to get the system
|
|
233
261
|
matrix `A`.
|
|
234
262
|
|
|
235
|
-
4. **Linear solve.** Solve `A beta = b`
|
|
236
|
-
|
|
263
|
+
4. **Linear solve.** Solve `A beta = b` in the least-squares sense. The
|
|
264
|
+
random-feature matrix `A` is typically rank-deficient (near-duplicate
|
|
265
|
+
columns), so the default `method="auto"` starts from a Cholesky fast-path
|
|
266
|
+
(guarded by a cheap conditioning probe), falls back to backward-stable
|
|
267
|
+
Householder **QR**, and resorts to a rank-revealing **SVD** only if the QR
|
|
268
|
+
solution blows up. A Tikhonov ridge `mu` enters via the `[A; sqrt(mu) I]`
|
|
269
|
+
augmentation, not the condition-squaring normal equations.
|
|
237
270
|
|
|
238
271
|
5. **Newton iteration (nonlinear).** Linearise the PDE residual, solve
|
|
239
272
|
`J delta_beta = -R` with backtracking line search, and repeat.
|
|
@@ -295,9 +328,12 @@ See `examples/add_your_own_pde.py` for the complete tutorial.
|
|
|
295
328
|
- **Symbolic PDE operators**: Compose differential operators with `Op` (Laplacian, wave, Helmholtz, biharmonic, custom) via intuitive arithmetic; coefficients can be `nn.Parameter` for AdamW optimisation
|
|
296
329
|
- **Vector-valued solutions**: First-class support for **u**: ℝᵈ → ℝᵏ (elasticity, Stokes, Maxwell). Problems declare `n_outputs = k`; `block_concat` assembles coupled block systems; `solver.predict(x)` returns shape `(M, k)`. Scalar problems are the `k=1` case
|
|
297
330
|
- **High-level API**: Solve PDEs in one line with `solve_linear()` and `solve_nonlinear()`
|
|
331
|
+
- **Robust linear solver**: Pluggable least-squares back-ends; the default `auto` routes Cholesky -> QR -> SVD, and backward-stable QR delivers SVD-grade accuracy at QR cost on the rank-deficient random-feature system
|
|
298
332
|
- **Learnable bandwidth**: `LearnableFastLSQ` optimises the bandwidth (scalar or anisotropic) via reparameterisation
|
|
299
333
|
- **Learnable PDE coefficients**: Plug `nn.Parameter` into `Op` (e.g. Helmholtz wavenumber `k`) and optimise via AdamW; gradients flow through the prebuilt linear solve
|
|
300
334
|
- **Auto-tuning**: Automatic scale selection via grid search
|
|
335
|
+
- **Device support**: CPU / CUDA / Apple-MPS via `set_device()` or the `FASTLSQ_DEVICE` env var, dtype-aware (the float64 high-accuracy path stays on CPU/CUDA)
|
|
336
|
+
- **Adaptive collocation**: `n_pde` / `n_bc` default to feature-count-scaled values, overridable per solve
|
|
301
337
|
- **Built-in plotting**: Solution visualization, convergence plots, spectral sensitivity
|
|
302
338
|
- **Geometry samplers**: Box, ball, sphere, interval, custom samplers
|
|
303
339
|
- **Diagnostics**: Problem validation, conditioning checks, error detection
|
|
@@ -17,6 +17,7 @@ import torch
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
|
|
19
19
|
from fastlsq.utils import device
|
|
20
|
+
from fastlsq.block import block_concat
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
# ======================================================================
|
|
@@ -218,17 +219,36 @@ class Wave1D:
|
|
|
218
219
|
# ======================================================================
|
|
219
220
|
|
|
220
221
|
class Wave2D_MS:
|
|
221
|
-
"""Wave 2-D multi-scale
|
|
222
|
-
|
|
223
|
-
|
|
222
|
+
"""Wave 2-D multi-scale (anisotropic, normalised time).
|
|
223
|
+
|
|
224
|
+
Anisotropic wave u_tt = u_xx + a2 u_yy on [0,1]^2 x [0, t_max], with time
|
|
225
|
+
normalised to tau = t / t_max in [0,1]. ``build`` therefore carries the
|
|
226
|
+
spatial term's t_max^2 factor (d^2/dt^2 = t_max^-2 d^2/dtau^2), so the
|
|
227
|
+
discretised operator u_tautau - t_max^2 (u_xx + a2 u_yy) is satisfied
|
|
228
|
+
exactly by ``exact`` (the (1,1) standing mode, omega = pi sqrt(1+a2)).
|
|
229
|
+
|
|
230
|
+
Resolvability constraint on ``t_max``. In normalised time the solution
|
|
231
|
+
oscillates at Omega = omega * t_max, i.e. ~ sqrt(1+a2) * t_max / 2 temporal
|
|
232
|
+
cycles over tau in [0,1]. The PDE's second time-derivative amplifies the
|
|
233
|
+
random-feature *representation* error by Omega^2, so the one-shot
|
|
234
|
+
least-squares collocation only resolves a handful of cycles before that
|
|
235
|
+
amplified error swamps the solution -- the original ``t_max = 100`` (~87
|
|
236
|
+
cycles) did not solve in *any* configuration (rel-err 1.0, the [0.2.4] known
|
|
237
|
+
issue), even at 8000 features with near-hard boundary constraints, because
|
|
238
|
+
the best representable solution itself carries a huge PDE residual.
|
|
239
|
+
``t_max = 4`` keeps it at ~3.5 cycles (solves to ~1e-3 at 900 features); the
|
|
240
|
+
anisotropic ``scale_multipliers`` place the temporal feature bandwidth at
|
|
241
|
+
~Omega while the spatial bandwidth stays ~pi.
|
|
224
242
|
"""
|
|
225
243
|
|
|
226
244
|
def __init__(self):
|
|
227
245
|
self.name = "Wave 2D-MS"
|
|
228
246
|
self.dim = 3
|
|
229
247
|
self.a2 = 2.0
|
|
230
|
-
self.t_max =
|
|
231
|
-
|
|
248
|
+
self.t_max = 4.0 # ~3.5 temporal cycles -- see class docstring
|
|
249
|
+
# Anisotropic feature bandwidth: temporal ~ Omega = pi*sqrt(1+a2)*t_max
|
|
250
|
+
# ~= 21.8, matched at scale ~3 (multiplier 7); spatial bandwidth ~ pi.
|
|
251
|
+
self.scale_multipliers = [1.0, 1.0, 7.0]
|
|
232
252
|
|
|
233
253
|
def exact(self, x_in):
|
|
234
254
|
xv = x_in[:, 0:1]
|
|
@@ -316,6 +336,7 @@ class ElasticWave2D:
|
|
|
316
336
|
def __init__(self, c_p: float = 2.0, c_s: float = 1.0, t_max: float = 2.0):
|
|
317
337
|
self.name = "Elastic Wave 2D"
|
|
318
338
|
self.dim = 3 # x, y, t
|
|
339
|
+
self.n_outputs = 2 # (u_x, u_y) -- block-stacked vector solve
|
|
319
340
|
self.c_p = c_p
|
|
320
341
|
self.c_s = c_s
|
|
321
342
|
self.c_p2 = c_p ** 2
|
|
@@ -351,6 +372,33 @@ class ElasticWave2D:
|
|
|
351
372
|
uy_t = (self.ky * torch.sin(self.kx * xv) * torch.cos(self.ky * yv) * fac)
|
|
352
373
|
return torch.cat([ux_t, uy_t], dim=1)
|
|
353
374
|
|
|
375
|
+
def exact_grad(self, x_in):
|
|
376
|
+
"""Jacobian of (u_x, u_y). Returns (M, d, k) with J[:, j, c] = du_c/dx_j.
|
|
377
|
+
|
|
378
|
+
Time is normalised (t_phys = t * t_max), so the t-derivatives pick up a
|
|
379
|
+
t_max chain-rule factor -- matching ``exact_ut`` and ``Wave2D_MS`` and the
|
|
380
|
+
normalised inputs ``predict_with_grad`` differentiates against.
|
|
381
|
+
"""
|
|
382
|
+
xv, yv, tv = x_in[:, 0:1], x_in[:, 1:2], x_in[:, 2:3] * self.t_max
|
|
383
|
+
kx, ky = self.kx, self.ky
|
|
384
|
+
cx, sx = torch.cos(kx * xv), torch.sin(kx * xv)
|
|
385
|
+
cy, sy = torch.cos(ky * yv), torch.sin(ky * yv)
|
|
386
|
+
ct, st = torch.cos(self.omega_p * tv), torch.sin(self.omega_p * tv)
|
|
387
|
+
dt = -self.omega_p * self.t_max * st # d/dt_norm of cos(omega_p * t_phys)
|
|
388
|
+
|
|
389
|
+
# u_x = kx cos(kx x) sin(ky y) cos(omega_p t)
|
|
390
|
+
ux_x = kx * (-kx * sx) * sy * ct
|
|
391
|
+
ux_y = kx * cx * (ky * cy) * ct
|
|
392
|
+
ux_t = kx * cx * sy * dt
|
|
393
|
+
# u_y = ky sin(kx x) cos(ky y) cos(omega_p t)
|
|
394
|
+
uy_x = ky * (kx * cx) * cy * ct
|
|
395
|
+
uy_y = ky * sx * (-ky * sy) * ct
|
|
396
|
+
uy_t = ky * sx * cy * dt
|
|
397
|
+
|
|
398
|
+
grad_ux = torch.cat([ux_x, ux_y, ux_t], dim=1) # (M, 3)
|
|
399
|
+
grad_uy = torch.cat([uy_x, uy_y, uy_t], dim=1) # (M, 3)
|
|
400
|
+
return torch.stack([grad_ux, grad_uy], dim=-1) # (M, 3, 2)
|
|
401
|
+
|
|
354
402
|
def get_train_data(self, n_pde=5000, n_bc=1000):
|
|
355
403
|
x_pde = torch.rand(n_pde, 3, device=device)
|
|
356
404
|
x_ic = torch.cat([
|
|
@@ -378,10 +426,14 @@ class ElasticWave2D:
|
|
|
378
426
|
], None
|
|
379
427
|
|
|
380
428
|
def build(self, slv, x_pde, bcs, f_pde_ignored):
|
|
381
|
-
"""
|
|
429
|
+
"""Block-stacked system for the coupled (u_x, u_y) solve.
|
|
430
|
+
|
|
431
|
+
Two column blocks (u_x, u_y coefficients); each equation / BC adds a
|
|
432
|
+
block row. ``block_concat`` assembles A in R^{Mk x Nk}, b in R^{Mk x 1}
|
|
433
|
+
(k = n_outputs = 2) so ``unpack_beta`` recovers a (N, 2) beta.
|
|
434
|
+
"""
|
|
382
435
|
basis = slv.basis
|
|
383
436
|
cache = basis.cache(x_pde)
|
|
384
|
-
N = basis.n_features
|
|
385
437
|
|
|
386
438
|
# Derivatives for (x, y, t) with t as dim 2
|
|
387
439
|
u_xx = basis.derivative(x_pde, (2, 0, 0), cache=cache)
|
|
@@ -389,48 +441,36 @@ class ElasticWave2D:
|
|
|
389
441
|
u_tt = basis.derivative(x_pde, (0, 0, 2), cache=cache)
|
|
390
442
|
u_xy = basis.derivative(x_pde, (1, 1, 0), cache=cache)
|
|
391
443
|
|
|
392
|
-
# t is normalised to [0,1]; physical d²/dt² = (1/t_max)² d²/d
|
|
444
|
+
# t is normalised to [0,1]; physical d²/dt² = (1/t_max)² d²/dτ², so the
|
|
445
|
+
# spatial + cross terms carry a t_max² factor (consistent with Wave2D_MS).
|
|
393
446
|
t_scale = self.t_max ** 2
|
|
447
|
+
cross = t_scale * self.c_cross
|
|
394
448
|
|
|
395
449
|
# PDE1: u_x_ττ = t_max²·(c_p² u_x_xx + c_s² u_x_yy + (c_p²-c_s²) u_y_xy)
|
|
396
450
|
A1_x = u_tt - t_scale * (self.c_p2 * u_xx + self.c_s2 * u_yy)
|
|
397
|
-
A1_y = -
|
|
398
|
-
|
|
451
|
+
A1_y = -cross * u_xy
|
|
399
452
|
# PDE2: u_y_ττ = t_max²·(c_p² u_y_yy + c_s² u_y_xx + (c_p²-c_s²) u_x_xy)
|
|
400
|
-
A2_x = -
|
|
453
|
+
A2_x = -cross * u_xy
|
|
401
454
|
A2_y = u_tt - t_scale * (self.c_p2 * u_yy + self.c_s2 * u_xx)
|
|
402
455
|
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
], dim=0)
|
|
407
|
-
b_pde = torch.zeros(2 * len(x_pde), 1, device=device)
|
|
456
|
+
z_pde = torch.zeros(len(x_pde), 1, device=device)
|
|
457
|
+
rows = [[A1_x, A1_y], [A2_x, A2_y]] # block rows: [u_x col, u_y col]
|
|
458
|
+
rhs = [[z_pde], [z_pde]] # matching RHS column blocks
|
|
408
459
|
|
|
409
|
-
As, bs = [A_pde], [b_pde]
|
|
410
460
|
w_bc = 1000.0
|
|
411
|
-
|
|
412
461
|
for (pts, vals, type_) in bcs:
|
|
413
|
-
h = basis.evaluate(pts)
|
|
414
|
-
dh = basis.gradient(pts)
|
|
415
|
-
n_pts = len(pts)
|
|
416
462
|
if type_ == "dirichlet":
|
|
417
|
-
|
|
418
|
-
H_block_x = torch.cat([h, torch.zeros_like(h)], dim=1)
|
|
419
|
-
H_block_y = torch.cat([torch.zeros_like(h), h], dim=1)
|
|
420
|
-
A_bc = torch.cat([H_block_x, H_block_y], dim=0) * w_bc
|
|
421
|
-
b_bc = torch.cat([vals[:, 0:1], vals[:, 1:2]], dim=0) * w_bc
|
|
463
|
+
op = basis.evaluate(pts) * w_bc
|
|
422
464
|
elif type_ == "neumann_t":
|
|
423
|
-
|
|
424
|
-
D_block_x = torch.cat([dh_t, torch.zeros_like(dh_t)], dim=1)
|
|
425
|
-
D_block_y = torch.cat([torch.zeros_like(dh_t), dh_t], dim=1)
|
|
426
|
-
A_bc = torch.cat([D_block_x, D_block_y], dim=0) * w_bc
|
|
427
|
-
b_bc = torch.cat([vals[:, 0:1], vals[:, 1:2]], dim=0) * w_bc
|
|
465
|
+
op = basis.gradient(pts)[:, 2, :] * w_bc
|
|
428
466
|
else:
|
|
429
467
|
continue
|
|
430
|
-
|
|
431
|
-
|
|
468
|
+
# vals: (n_pts, 2). One block row per component:
|
|
469
|
+
# u_x -> [op, None], u_y -> [None, op]
|
|
470
|
+
rows += [[op, None], [None, op]]
|
|
471
|
+
rhs += [[vals[:, 0:1] * w_bc], [vals[:, 1:2] * w_bc]]
|
|
432
472
|
|
|
433
|
-
return
|
|
473
|
+
return block_concat(rows), block_concat(rhs)
|
|
434
474
|
|
|
435
475
|
def get_test_points(self, n=2000):
|
|
436
476
|
return torch.rand(n, 3, device=device)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "FastLSQ"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.5"
|
|
8
8
|
description = "One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# Copyright (c) 2026 Antonin Sulc -- MIT.
|
|
2
|
+
"""Smoke tests for the benchmark PDE equations and the inverse-problem workflows.
|
|
3
|
+
|
|
4
|
+
Exercises the forward benchmark problems through the public ``solve_linear`` /
|
|
5
|
+
``solve_nonlinear`` API and two inverse pipelines -- parameter recovery via an
|
|
6
|
+
outer optimiser, and SINDy-style PDE discovery via analytical derivatives -- so
|
|
7
|
+
the v0.2.3 QR / N-scaled-collocation solver path is covered end-to-end, not just
|
|
8
|
+
on the single Poisson problem in ``test_basic``.
|
|
9
|
+
|
|
10
|
+
Scales are fixed (not auto-selected) and the RNG is seeded so the smoke test is
|
|
11
|
+
fast and deterministic; tolerances carry ~10x headroom over measured errors.
|
|
12
|
+
|
|
13
|
+
``ElasticWave2D`` -- a coupled 2-output vector problem -- exercises the
|
|
14
|
+
block-stacked vector path (``n_outputs = 2``, ``block_concat`` assembly,
|
|
15
|
+
``unpack_beta`` -> ``(N, 2)`` beta); it carries a per-case ``n_blocks`` bump
|
|
16
|
+
since the coupled solve needs more features than the scalar benchmarks.
|
|
17
|
+
|
|
18
|
+
``Wave2D_MS`` -- a long-time anisotropic wave -- likewise bumps ``n_blocks``;
|
|
19
|
+
its ``t_max`` was reduced from 100 to 4 so the normalised-time solution spans
|
|
20
|
+
~3.5 temporal cycles rather than ~87. The PDE's second time-derivative
|
|
21
|
+
amplifies the random-feature representation error by ``Omega**2``, so the
|
|
22
|
+
one-shot collocation only resolves a few cycles (see the class docstring) --
|
|
23
|
+
the old t_max=100 gave rel-err 1.0 in every configuration.
|
|
24
|
+
"""
|
|
25
|
+
import numpy as np
|
|
26
|
+
import pytest
|
|
27
|
+
import torch
|
|
28
|
+
|
|
29
|
+
from fastlsq import (
|
|
30
|
+
solve_linear, solve_nonlinear, solve_lstsq, Op, SinusoidalBasis,
|
|
31
|
+
sample_box, sample_boundary_box,
|
|
32
|
+
)
|
|
33
|
+
from fastlsq.problems import linear as L
|
|
34
|
+
from fastlsq.problems import nonlinear as NL
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# (class, fixed scale, val_err tolerance, solver-config overrides)
|
|
38
|
+
LINEAR_CASES = [
|
|
39
|
+
(L.PoissonND, 0.5, 5e-3, {}),
|
|
40
|
+
(L.HeatND, 0.5, 1e-1, {}),
|
|
41
|
+
(L.Wave1D, 15.0, 5e-3, {}),
|
|
42
|
+
(L.Helmholtz2D, 10.0, 1e-5, {}),
|
|
43
|
+
(L.Maxwell2D_TM, 2.0, 5e-3, {}),
|
|
44
|
+
# Long-time anisotropic wave: temporal-matched bandwidth + more features
|
|
45
|
+
# (t_max reduced 100 -> 4 so the collocation can resolve the ~3.5 cycles).
|
|
46
|
+
(L.Wave2D_MS, 3.0, 1e-2, {"n_blocks": 3}),
|
|
47
|
+
# Coupled 2-output vector problem: needs more features than the scalars.
|
|
48
|
+
(L.ElasticWave2D, 6.0, 1e-1, {"n_blocks": 3}),
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
NONLINEAR_CASES = [
|
|
52
|
+
(NL.NLPoisson2D, 8.0, 1e-4),
|
|
53
|
+
(NL.Bratu2D, 15.0, 1e-4),
|
|
54
|
+
(NL.SteadyBurgers1D,10.0, 1e-4),
|
|
55
|
+
(NL.NLHelmholtz2D, 5.0, 1e-4),
|
|
56
|
+
(NL.AllenCahn1D, 15.0, 2e-1),
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@pytest.mark.parametrize(
|
|
61
|
+
"cls,scale,tol,solver_kw", LINEAR_CASES, ids=[c[0].__name__ for c in LINEAR_CASES]
|
|
62
|
+
)
|
|
63
|
+
def test_linear_benchmark_solves(cls, scale, tol, solver_kw):
|
|
64
|
+
"""Each linear benchmark equation solves end-to-end via the public API."""
|
|
65
|
+
torch.set_default_dtype(torch.float64)
|
|
66
|
+
torch.manual_seed(0)
|
|
67
|
+
cfg = dict(n_blocks=2, hidden_size=300, n_test=1500,
|
|
68
|
+
auto_scale=False, verbose=False)
|
|
69
|
+
cfg.update(solver_kw)
|
|
70
|
+
r = solve_linear(cls(), scale=scale, **cfg)
|
|
71
|
+
ve = r["metrics"]["val_err"]
|
|
72
|
+
assert np.isfinite(ve), f"{cls.__name__}: non-finite val_err"
|
|
73
|
+
assert ve < tol, f"{cls.__name__}: val_err={ve:.2e} exceeds tol {tol:.0e}"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@pytest.mark.parametrize(
|
|
77
|
+
"cls,scale,tol", NONLINEAR_CASES, ids=[c[0].__name__ for c in NONLINEAR_CASES]
|
|
78
|
+
)
|
|
79
|
+
def test_nonlinear_benchmark_solves(cls, scale, tol):
|
|
80
|
+
"""Each nonlinear benchmark equation converges via Newton + the public API."""
|
|
81
|
+
torch.set_default_dtype(torch.float64)
|
|
82
|
+
torch.manual_seed(0)
|
|
83
|
+
r = solve_nonlinear(cls(), scale=scale, n_blocks=2, hidden_size=300,
|
|
84
|
+
n_test=1500, max_iter=15, auto_scale=False, verbose=False)
|
|
85
|
+
ve = r["metrics"]["val_err"]
|
|
86
|
+
assert r["n_iters"] > 0, f"{cls.__name__}: no Newton iterations ran"
|
|
87
|
+
assert np.isfinite(ve), f"{cls.__name__}: non-finite val_err"
|
|
88
|
+
assert ve < tol, f"{cls.__name__}: val_err={ve:.2e} exceeds tol {tol:.0e}"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_inverse_source_position():
|
|
92
|
+
"""Recover a Gaussian source position from sensor data (forward solve + L-BFGS)."""
|
|
93
|
+
opt = pytest.importorskip("scipy.optimize")
|
|
94
|
+
torch.set_default_dtype(torch.float64)
|
|
95
|
+
torch.manual_seed(0)
|
|
96
|
+
|
|
97
|
+
pde_op = -Op.laplacian(d=2)
|
|
98
|
+
basis = SinusoidalBasis.random(input_dim=2, n_features=700, sigma=5.0,
|
|
99
|
+
normalize=True)
|
|
100
|
+
x_pde = sample_box(3000, 2)
|
|
101
|
+
x_bc = sample_boundary_box(400, 2)
|
|
102
|
+
n_bc = x_bc.shape[0]
|
|
103
|
+
cache = basis.cache(x_pde)
|
|
104
|
+
A = torch.cat([pde_op.apply(basis, x_pde, cache=cache),
|
|
105
|
+
100.0 * basis.evaluate(x_bc)])
|
|
106
|
+
x_sens = torch.tensor([[0.3, 0.3], [0.7, 0.7], [0.3, 0.7], [0.7, 0.3]])
|
|
107
|
+
|
|
108
|
+
def forward(xs, ys):
|
|
109
|
+
b = torch.exp(-((x_pde[:, 0] - xs) ** 2
|
|
110
|
+
+ (x_pde[:, 1] - ys) ** 2) / 0.1).unsqueeze(1)
|
|
111
|
+
b = torch.cat([b, torch.zeros(n_bc, 1, dtype=b.dtype)])
|
|
112
|
+
beta = solve_lstsq(A, b)
|
|
113
|
+
return (basis.evaluate(x_sens) @ beta).detach().cpu().numpy().ravel()
|
|
114
|
+
|
|
115
|
+
true = np.array([0.4, 0.6])
|
|
116
|
+
rng = np.random.default_rng(0)
|
|
117
|
+
u_obs = forward(*true) + 0.005 * rng.standard_normal(4)
|
|
118
|
+
|
|
119
|
+
res = opt.minimize(
|
|
120
|
+
lambda p: float(np.sum((forward(float(p[0]), float(p[1])) - u_obs) ** 2)),
|
|
121
|
+
x0=[0.5, 0.5], method="L-BFGS-B", bounds=[(0.1, 0.9)] * 2,
|
|
122
|
+
)
|
|
123
|
+
assert np.linalg.norm(res.x - true) < 0.06, f"recovered {res.x} vs {true}"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def test_pde_discovery_recovers_governing_equation():
|
|
127
|
+
"""SINDy-style discovery via analytical derivatives recovers u_xx = a*u + b*u_x.
|
|
128
|
+
|
|
129
|
+
Synthetic damped oscillator u = exp(-x/2) sin(2x) -> u_xx = -4.25 u - 1.0 u_x.
|
|
130
|
+
The dominant restoring term is recovered tightly; the damping term is harder
|
|
131
|
+
from 2% noise, so it is only bounded in sign/magnitude.
|
|
132
|
+
"""
|
|
133
|
+
torch.set_default_dtype(torch.float64)
|
|
134
|
+
torch.manual_seed(42)
|
|
135
|
+
|
|
136
|
+
M = 500
|
|
137
|
+
x = torch.linspace(0, 2 * np.pi, M).reshape(-1, 1)
|
|
138
|
+
u_true = torch.exp(-0.5 * x) * torch.sin(2 * x)
|
|
139
|
+
u_noisy = u_true + 0.02 * torch.randn_like(u_true)
|
|
140
|
+
|
|
141
|
+
basis = SinusoidalBasis.random(input_dim=1, n_features=400, sigma=4.0,
|
|
142
|
+
normalize=True)
|
|
143
|
+
beta = solve_lstsq(basis.evaluate(x), u_noisy, mu=1e-3)
|
|
144
|
+
cache = basis.cache(x)
|
|
145
|
+
u = basis.evaluate(x, cache=cache) @ beta
|
|
146
|
+
u_x = basis.derivative(x, alpha=(1,), cache=cache) @ beta
|
|
147
|
+
u_xx = basis.derivative(x, alpha=(2,), cache=cache) @ beta
|
|
148
|
+
|
|
149
|
+
coef = solve_lstsq(torch.cat([u, u_x], dim=1), u_xx) # u_xx = a*u + b*u_x
|
|
150
|
+
a, b = float(coef[0]), float(coef[1])
|
|
151
|
+
assert abs(a - (-4.25)) < 0.3, f"restoring coeff a={a:.3f} (want -4.25)"
|
|
152
|
+
assert b < 0 and abs(b - (-1.0)) < 0.5, f"damping coeff b={b:.3f} (want -1.0)"
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
if __name__ == "__main__":
|
|
156
|
+
pytest.main([__file__, "-v"])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|