FastLSQ 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fastlsq-0.2.1 → fastlsq-0.2.3}/CHANGELOG.md +80 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/FastLSQ.egg-info/PKG-INFO +8 -8
- {fastlsq-0.2.1 → fastlsq-0.2.3}/FastLSQ.egg-info/SOURCES.txt +0 -10
- {fastlsq-0.2.1 → fastlsq-0.2.3}/MANIFEST.in +0 -1
- {fastlsq-0.2.1 → fastlsq-0.2.3}/PKG-INFO +8 -8
- {fastlsq-0.2.1 → fastlsq-0.2.3}/README.md +3 -3
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/orbit_hill.py +7 -5
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/__init__.py +1 -1
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/api.py +36 -14
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/basis.py +5 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/export.py +4 -1
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/learnable.py +15 -8
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/linalg.py +47 -8
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/newton.py +8 -2
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/problems/linear.py +6 -6
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/problems/nonlinear.py +20 -20
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/problems/regression.py +38 -38
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/tuning.py +9 -1
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/vector.py +2 -2
- {fastlsq-0.2.1 → fastlsq-0.2.3}/pyproject.toml +5 -5
- {fastlsq-0.2.1 → fastlsq-0.2.3}/tests/test_vector_basis.py +1 -1
- fastlsq-0.2.1/misc/fastlsq_teaser.png +0 -0
- fastlsq-0.2.1/misc/ideal_quadrupole.png +0 -0
- fastlsq-0.2.1/misc/inverse_heat_source.gif +0 -0
- fastlsq-0.2.1/misc/inverse_heat_source.png +0 -0
- fastlsq-0.2.1/misc/inverse_magnetostatics.png +0 -0
- fastlsq-0.2.1/misc/inverse_magnetostatics_convergence.png +0 -0
- fastlsq-0.2.1/misc/quadrupole_convergence.png +0 -0
- fastlsq-0.2.1/misc/quadrupole_optimization.png +0 -0
- fastlsq-0.2.1/misc/tutorial_nlpoisson_convergence.png +0 -0
- fastlsq-0.2.1/misc/tutorial_nlpoisson_solution.png +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/FastLSQ.egg-info/dependency_links.txt +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/FastLSQ.egg-info/requires.txt +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/FastLSQ.egg-info/top_level.txt +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/LICENSE +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/add_your_own_pde.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/benchmark_comparison.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/custom_features.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/fred_sde.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/fred_sde_fastlsq.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/gaia_potential.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/gaia_potential_fastlsq.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/horizons_ephemeris.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/numerai_alpha.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/numerai_alpha_fastlsq.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/run_all_fastlsq.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/__init__.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/_alsu_lattice.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/_common.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/run_all.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_beamloss_ode.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_betatron_tune.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_green_fff.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_hill_ivp.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_observe_fit_act_simulator.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_orbit_inverse.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_passive_loco.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_perturbed_hill.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_sofb_observe_fit_act.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_streaming_archive_growth.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_synchrotron_ode.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_tides_3months.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_topoff_impulse.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s01_visualize.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s02_plasma_wakefield.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s03_synchrobetatron.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s04_sunspots.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s05_helioseismology.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s06_tides.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s07_iers_earth_rotation.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s08_mauna_loa_co2.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s09_enso_qbo.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s10_pulsar_timing.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s11_modal_analysis.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s12_mems_resonator.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s13_variable_stars_kepler.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s14_eeg.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/scenarios/s15_circadian.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/extras/spectral_expansion.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/grad_shafranov.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/grid_inverse.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/grid_rl_control.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/grid_swing.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/gs_inverse.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/gs_rl_control.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/inverse_heat_source.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/inverse_magnetostatics.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/inverse_source_position.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/learnable_helmholtz.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/orbit_inverse.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/orbit_rl.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/pde_discovery.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/run_all_extensions.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/run_linear.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/run_nonlinear.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/tutorial_basic.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/tutorial_nonlinear.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/examples/vector_basis_stream_vorticity.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/block.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/device.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/diagnostics.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/geometry.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/lightning.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/plotting.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/problems/__init__.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/solvers.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/utils.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/fastlsq/viz.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/requirements.txt +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/setup.cfg +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/tests/test_basic.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/tests/test_block.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/tests/test_derivatives.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/tests/test_device.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/tests/test_grad_shafranov.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/tests/test_grid_swing.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/tests/test_learnable.py +0 -0
- {fastlsq-0.2.1 → fastlsq-0.2.3}/tests/test_orbit_hill.py +0 -0
|
@@ -2,6 +2,86 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to FastLSQ will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.2.3] - 2026-06-04
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- **Householder-QR least-squares back-end** `solve_lstsq(..., method="qr")`:
|
|
10
|
+
backward-stable at `cond(A)` (ridge applied via the `[A; sqrt(mu) I]`
|
|
11
|
+
augmentation, not the normal equations), giving SVD-grade accuracy (~1e-14 on
|
|
12
|
+
the Helmholtz random-feature benchmark) at QR cost -- and, on the
|
|
13
|
+
rank-deficient CPU/no-ridge path, faster than the `gelsd` `"svd"` driver too,
|
|
14
|
+
while far more accurate than the normal-equations `"cholesky"` (no `cond(A)`
|
|
15
|
+
squaring, no required ridge). Assumes the system is numerically full column
|
|
16
|
+
rank; `"svd"` remains the rank-deficient-safe reference.
|
|
17
|
+
- **`solve_linear(..., method=...)`**: the linear solve back-end is now
|
|
18
|
+
selectable from the high-level API (`"auto"`, `"qr"`, `"svd"`, `"cholesky"`,
|
|
19
|
+
`"rsvd"`; defaults to `"auto"`).
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
|
|
23
|
+
- **`method="auto"` now tries QR before SVD.** After the Cholesky conditioning
|
|
24
|
+
probe rejects the fast path, `auto` uses the faster, more accurate QR solve and
|
|
25
|
+
falls back to the rank-revealing SVD only when QR's solution blows up
|
|
26
|
+
(`||x|| / (1 + ||b||)` above a generous guard). Real PDE systems measure
|
|
27
|
+
`<= 0.3` and keep QR; genuinely rank-deficient *inconsistent* systems (e.g. a
|
|
28
|
+
random RHS) measure ~3e14 and route to SVD. Net: the default solve is faster
|
|
29
|
+
and at least as accurate on real problems, with minimum-norm SVD preserved
|
|
30
|
+
exactly where it is needed.
|
|
31
|
+
- **N-scaled collocation defaults.** `solve_linear` and `solve_nonlinear` now
|
|
32
|
+
default `n_pde`/`n_bc` to `None` and derive them from the feature count
|
|
33
|
+
(`n_pde = max(3000, 3 * n_blocks * hidden_size)`, `n_bc = max(800, n_pde // 5)`),
|
|
34
|
+
replacing the fixed `10000`/`2000` (and `5000`/`1000`) over-sampling that was
|
|
35
|
+
~6x the default feature count. Faster for the default configuration; passing
|
|
36
|
+
explicit `n_pde`/`n_bc` still overrides.
|
|
37
|
+
|
|
38
|
+
## [0.2.2] - 2026-06-03
|
|
39
|
+
|
|
40
|
+
### Fixed
|
|
41
|
+
|
|
42
|
+
- **Learnable bandwidth now trains.** `LearnableFastLSQ.solve_inner` replaced the
|
|
43
|
+
backprop-through-`torch.linalg.svd` inner solve (which returned NaN gradients
|
|
44
|
+
w.r.t. the bandwidth on the clustered singular values of random-feature
|
|
45
|
+
matrices) with the SVD-based `gelsd` rank-revealing least-squares driver, so
|
|
46
|
+
`train_bandwidth` / `fit` no longer stall at step 0.
|
|
47
|
+
- **Default-solve accuracy.** Tightened the `_auto_solve` Cholesky-acceptance
|
|
48
|
+
probe from `rcond**0.5` to `rcond**0.25`, so `method="auto"` falls back to SVD
|
|
49
|
+
before the normal-equations Cholesky loses half its float64 digits
|
|
50
|
+
(cond(A) ~ 1e7 previously returned a ~1e-3-accurate answer).
|
|
51
|
+
- **Newton convergence and robustness.** The stop test now combines a *relative*
|
|
52
|
+
residual criterion (`res_norm < tol_res * R0`) with the relative solution
|
|
53
|
+
change (`||Δu||/||u|| < tol_du`); the previous unreachable absolute residual
|
|
54
|
+
tolerance forced every nonlinear solve to run the full `max_iter`. The
|
|
55
|
+
backtracking line search keeps the previous iterate when no step satisfies
|
|
56
|
+
Armijo instead of committing a worse point. `solve_nonlinear` default
|
|
57
|
+
tolerances loosened to `tol_res=1e-8`, `tol_du=1e-10`.
|
|
58
|
+
- **Continuation guard.** `solve_nonlinear` no longer raises `TypeError` when a
|
|
59
|
+
problem sets `use_continuation=True` without a `nu_target`.
|
|
60
|
+
- **Regression problems solvable via the public API.** Their `get_train_data`
|
|
61
|
+
now accepts the `n_pde`/`n_bc` signature used by `solve_linear`,
|
|
62
|
+
`auto_select_scale`, and `check_problem` (was `n_samples`, raising
|
|
63
|
+
`TypeError`); `auto_select_scale` now raises when every trial fails instead of
|
|
64
|
+
silently returning the first scale.
|
|
65
|
+
- **Float32 inputs.** `SinusoidalBasis.cache` promotes inputs to the basis
|
|
66
|
+
dtype/device, so float32 collocation points no longer raise `float != double`.
|
|
67
|
+
- **Checkpoint reload.** `load_checkpoint` passes `weights_only=False`, fixing
|
|
68
|
+
`UnpicklingError` on torch >= 2.6 (checkpoints store NumPy arrays).
|
|
69
|
+
- **Vector per-component scale.** `VectorFastLSQSolver.add_block` accepts a NumPy
|
|
70
|
+
array of per-component bandwidths (previously list/tuple only, silently
|
|
71
|
+
misread as per-dimension).
|
|
72
|
+
- **ElasticWave2D operator.** Scaled the spatial and cross terms by `t_max²`
|
|
73
|
+
(time normalisation), consistent with `Wave2D_MS`.
|
|
74
|
+
|
|
75
|
+
### Changed
|
|
76
|
+
|
|
77
|
+
- Problem modules (`nonlinear.py`, `regression.py`) resolve the device via the
|
|
78
|
+
live `get_device()` rather than an import-time snapshot.
|
|
79
|
+
- Packaging: the source distribution no longer ships the `misc/` images (the
|
|
80
|
+
sdist was ~14 MB); project URLs point to `github.com/sulcantonin/FastLSQ`;
|
|
81
|
+
README images use absolute URLs so they render on PyPI.
|
|
82
|
+
`examples/orbit_hill.py` solves via rank-revealing `lstsq` rather than a
|
|
83
|
+
normal-equations Cholesky.
|
|
84
|
+
|
|
5
85
|
## [0.2.1] - 2026-06-02
|
|
6
86
|
|
|
7
87
|
### Added
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: FastLSQ
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support
|
|
5
5
|
Author: Antonin Sulc
|
|
6
6
|
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/
|
|
8
|
-
Project-URL: Repository, https://github.com/
|
|
7
|
+
Project-URL: Homepage, https://github.com/sulcantonin/FastLSQ
|
|
8
|
+
Project-URL: Repository, https://github.com/sulcantonin/FastLSQ
|
|
9
9
|
Project-URL: Paper, https://arxiv.org/abs/2602.10541
|
|
10
|
-
Project-URL: Bug Tracker, https://github.com/
|
|
11
|
-
Project-URL: Changelog, https://github.com/
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/sulcantonin/FastLSQ/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/sulcantonin/FastLSQ/blob/main/CHANGELOG.md
|
|
12
12
|
Keywords: pde,partial-differential-equations,fourier-features,least-squares,scientific-computing,neural-network,physics-informed,newton-raphson
|
|
13
13
|
Classifier: Development Status :: 4 - Beta
|
|
14
14
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -45,7 +45,7 @@ Dynamic: license-file
|
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
<p align="center">
|
|
48
|
-
<img src="misc/fastlsq_teaser.png" alt="FastLSQ method overview" width="400"/>
|
|
48
|
+
<img src="https://raw.githubusercontent.com/sulcantonin/FastLSQ/main/misc/fastlsq_teaser.png" alt="FastLSQ method overview" width="400"/>
|
|
49
49
|
</p>
|
|
50
50
|
|
|
51
51
|
**Solving PDEs in one shot via Fourier features with exact analytical derivatives.**
|
|
@@ -235,8 +235,8 @@ python examples/learnable_helmholtz.py
|
|
|
235
235
|
The analytical derivatives enable gradients through the pre-factored solve, making inverse problems tractable. Example: recovering 4 anisotropic Gaussian heat sources (24 parameters) from 4 sparse sensors. The heat equation is solved in space-time; L-BFGS-B optimises source positions and shapes to match sensor time-series. *(Click image for animation.)*
|
|
236
236
|
|
|
237
237
|
<p align="center">
|
|
238
|
-
<a href="misc/inverse_heat_source.gif">
|
|
239
|
-
<img src="misc/inverse_heat_source.png" alt="Inverse heat source localisation" width="700"/>
|
|
238
|
+
<a href="https://raw.githubusercontent.com/sulcantonin/FastLSQ/main/misc/inverse_heat_source.gif">
|
|
239
|
+
<img src="https://raw.githubusercontent.com/sulcantonin/FastLSQ/main/misc/inverse_heat_source.png" alt="Inverse heat source localisation" width="700"/>
|
|
240
240
|
</a>
|
|
241
241
|
</p>
|
|
242
242
|
|
|
@@ -95,16 +95,6 @@ fastlsq/problems/__init__.py
|
|
|
95
95
|
fastlsq/problems/linear.py
|
|
96
96
|
fastlsq/problems/nonlinear.py
|
|
97
97
|
fastlsq/problems/regression.py
|
|
98
|
-
misc/fastlsq_teaser.png
|
|
99
|
-
misc/ideal_quadrupole.png
|
|
100
|
-
misc/inverse_heat_source.gif
|
|
101
|
-
misc/inverse_heat_source.png
|
|
102
|
-
misc/inverse_magnetostatics.png
|
|
103
|
-
misc/inverse_magnetostatics_convergence.png
|
|
104
|
-
misc/quadrupole_convergence.png
|
|
105
|
-
misc/quadrupole_optimization.png
|
|
106
|
-
misc/tutorial_nlpoisson_convergence.png
|
|
107
|
-
misc/tutorial_nlpoisson_solution.png
|
|
108
98
|
tests/test_basic.py
|
|
109
99
|
tests/test_block.py
|
|
110
100
|
tests/test_derivatives.py
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: FastLSQ
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: One-shot PDE solving via Fourier features with exact analytical derivatives; rank-revealing solvers, learnable anisotropic bandwidth, and CPU/CUDA/MPS support
|
|
5
5
|
Author: Antonin Sulc
|
|
6
6
|
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/
|
|
8
|
-
Project-URL: Repository, https://github.com/
|
|
7
|
+
Project-URL: Homepage, https://github.com/sulcantonin/FastLSQ
|
|
8
|
+
Project-URL: Repository, https://github.com/sulcantonin/FastLSQ
|
|
9
9
|
Project-URL: Paper, https://arxiv.org/abs/2602.10541
|
|
10
|
-
Project-URL: Bug Tracker, https://github.com/
|
|
11
|
-
Project-URL: Changelog, https://github.com/
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/sulcantonin/FastLSQ/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/sulcantonin/FastLSQ/blob/main/CHANGELOG.md
|
|
12
12
|
Keywords: pde,partial-differential-equations,fourier-features,least-squares,scientific-computing,neural-network,physics-informed,newton-raphson
|
|
13
13
|
Classifier: Development Status :: 4 - Beta
|
|
14
14
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -45,7 +45,7 @@ Dynamic: license-file
|
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
<p align="center">
|
|
48
|
-
<img src="misc/fastlsq_teaser.png" alt="FastLSQ method overview" width="400"/>
|
|
48
|
+
<img src="https://raw.githubusercontent.com/sulcantonin/FastLSQ/main/misc/fastlsq_teaser.png" alt="FastLSQ method overview" width="400"/>
|
|
49
49
|
</p>
|
|
50
50
|
|
|
51
51
|
**Solving PDEs in one shot via Fourier features with exact analytical derivatives.**
|
|
@@ -235,8 +235,8 @@ python examples/learnable_helmholtz.py
|
|
|
235
235
|
The analytical derivatives enable gradients through the pre-factored solve, making inverse problems tractable. Example: recovering 4 anisotropic Gaussian heat sources (24 parameters) from 4 sparse sensors. The heat equation is solved in space-time; L-BFGS-B optimises source positions and shapes to match sensor time-series. *(Click image for animation.)*
|
|
236
236
|
|
|
237
237
|
<p align="center">
|
|
238
|
-
<a href="misc/inverse_heat_source.gif">
|
|
239
|
-
<img src="misc/inverse_heat_source.png" alt="Inverse heat source localisation" width="700"/>
|
|
238
|
+
<a href="https://raw.githubusercontent.com/sulcantonin/FastLSQ/main/misc/inverse_heat_source.gif">
|
|
239
|
+
<img src="https://raw.githubusercontent.com/sulcantonin/FastLSQ/main/misc/inverse_heat_source.png" alt="Inverse heat source localisation" width="700"/>
|
|
240
240
|
</a>
|
|
241
241
|
</p>
|
|
242
242
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
<p align="center">
|
|
7
|
-
<img src="misc/fastlsq_teaser.png" alt="FastLSQ method overview" width="400"/>
|
|
7
|
+
<img src="https://raw.githubusercontent.com/sulcantonin/FastLSQ/main/misc/fastlsq_teaser.png" alt="FastLSQ method overview" width="400"/>
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
10
|
**Solving PDEs in one shot via Fourier features with exact analytical derivatives.**
|
|
@@ -194,8 +194,8 @@ python examples/learnable_helmholtz.py
|
|
|
194
194
|
The analytical derivatives enable gradients through the pre-factored solve, making inverse problems tractable. Example: recovering 4 anisotropic Gaussian heat sources (24 parameters) from 4 sparse sensors. The heat equation is solved in space-time; L-BFGS-B optimises source positions and shapes to match sensor time-series. *(Click image for animation.)*
|
|
195
195
|
|
|
196
196
|
<p align="center">
|
|
197
|
-
<a href="misc/inverse_heat_source.gif">
|
|
198
|
-
<img src="misc/inverse_heat_source.png" alt="Inverse heat source localisation" width="700"/>
|
|
197
|
+
<a href="https://raw.githubusercontent.com/sulcantonin/FastLSQ/main/misc/inverse_heat_source.gif">
|
|
198
|
+
<img src="https://raw.githubusercontent.com/sulcantonin/FastLSQ/main/misc/inverse_heat_source.png" alt="Inverse heat source localisation" width="700"/>
|
|
199
199
|
</a>
|
|
200
200
|
</p>
|
|
201
201
|
|
|
@@ -31,7 +31,6 @@ import sys
|
|
|
31
31
|
import time
|
|
32
32
|
import numpy as np
|
|
33
33
|
import torch
|
|
34
|
-
from scipy.linalg import cho_factor, cho_solve
|
|
35
34
|
|
|
36
35
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
37
36
|
from fastlsq.basis import SinusoidalBasis # noqa: E402
|
|
@@ -166,10 +165,13 @@ def assemble(basis: SinusoidalBasis, pts_int: torch.Tensor):
|
|
|
166
165
|
def solve(A, b):
|
|
167
166
|
A64 = A.astype(np.float64, copy=False)
|
|
168
167
|
b64 = b.astype(np.float64, copy=False)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
168
|
+
# Rank-revealing least squares. Forming the normal equations A^T A (+ridge)
|
|
169
|
+
# and Cholesky-factoring them squares the condition number of this
|
|
170
|
+
# random-feature system, which made cho_factor fail ("not positive
|
|
171
|
+
# definite"); lstsq solves min ||A x - b|| directly via SVD and needs no
|
|
172
|
+
# positive-definiteness.
|
|
173
|
+
beta, *_ = np.linalg.lstsq(A64, b64, rcond=None)
|
|
174
|
+
return beta
|
|
173
175
|
|
|
174
176
|
|
|
175
177
|
# ---------------------------------------------------------------------------
|
|
@@ -35,10 +35,11 @@ def solve_linear(
|
|
|
35
35
|
scale: Optional[float] = None,
|
|
36
36
|
n_blocks: int = 3,
|
|
37
37
|
hidden_size: int = 500,
|
|
38
|
-
n_pde: int =
|
|
39
|
-
n_bc: int =
|
|
38
|
+
n_pde: Optional[int] = None,
|
|
39
|
+
n_bc: Optional[int] = None,
|
|
40
40
|
n_test: int = 5000,
|
|
41
41
|
mu: float = 0.0,
|
|
42
|
+
method: str = "auto",
|
|
42
43
|
auto_scale: bool = True,
|
|
43
44
|
auto_scale_trials: int = 5,
|
|
44
45
|
return_solver: bool = False,
|
|
@@ -65,12 +66,17 @@ def solve_linear(
|
|
|
65
66
|
Number of feature blocks.
|
|
66
67
|
hidden_size : int
|
|
67
68
|
Features per block.
|
|
68
|
-
n_pde, n_bc : int
|
|
69
|
-
Number of collocation and boundary points.
|
|
69
|
+
n_pde, n_bc : int, optional
|
|
70
|
+
Number of collocation and boundary points. If None, scaled with the
|
|
71
|
+
feature count: n_pde = max(3000, 3 * n_blocks * hidden_size),
|
|
72
|
+
n_bc = max(800, n_pde // 5).
|
|
70
73
|
n_test : int
|
|
71
74
|
Number of test points for error evaluation.
|
|
72
75
|
mu : float
|
|
73
76
|
Tikhonov regularisation parameter (0 = no regularisation).
|
|
77
|
+
method : str
|
|
78
|
+
Linear solve back-end passed to ``solve_lstsq`` ("auto", "qr", "svd",
|
|
79
|
+
"cholesky", "rsvd"). Default "auto".
|
|
74
80
|
auto_scale : bool
|
|
75
81
|
If True and scale=None, automatically select scale via grid search.
|
|
76
82
|
auto_scale_trials : int
|
|
@@ -93,6 +99,12 @@ def solve_linear(
|
|
|
93
99
|
"""
|
|
94
100
|
t0 = time.time()
|
|
95
101
|
|
|
102
|
+
n_feat = n_blocks * hidden_size
|
|
103
|
+
if n_pde is None:
|
|
104
|
+
n_pde = max(3000, 3 * n_feat) # ~3x oversampling; fixed 10000 was 6x for default N
|
|
105
|
+
if n_bc is None:
|
|
106
|
+
n_bc = max(800, n_pde // 5)
|
|
107
|
+
|
|
96
108
|
# Auto-select scale if needed
|
|
97
109
|
if scale is None and auto_scale:
|
|
98
110
|
if verbose:
|
|
@@ -127,7 +139,7 @@ def solve_linear(
|
|
|
127
139
|
|
|
128
140
|
# Assemble and solve
|
|
129
141
|
A, b = problem.build(solver, x_pde, *build_args)
|
|
130
|
-
beta_raw = solve_lstsq(A, b, mu=mu)
|
|
142
|
+
beta_raw = solve_lstsq(A, b, mu=mu, method=method)
|
|
131
143
|
n_outputs = getattr(problem, "n_outputs", 1)
|
|
132
144
|
solver.beta = unpack_beta(beta_raw, solver.n_features, n_outputs)
|
|
133
145
|
|
|
@@ -170,12 +182,12 @@ def solve_nonlinear(
|
|
|
170
182
|
scale: Optional[float] = None,
|
|
171
183
|
n_blocks: int = 3,
|
|
172
184
|
hidden_size: int = 500,
|
|
173
|
-
n_pde: int =
|
|
174
|
-
n_bc: int =
|
|
185
|
+
n_pde: Optional[int] = None,
|
|
186
|
+
n_bc: Optional[int] = None,
|
|
175
187
|
n_test: int = 5000,
|
|
176
188
|
max_iter: int = 30,
|
|
177
|
-
tol_res: float = 1e-
|
|
178
|
-
tol_du: float = 1e-
|
|
189
|
+
tol_res: float = 1e-8,
|
|
190
|
+
tol_du: float = 1e-10,
|
|
179
191
|
damping: float = 1.0,
|
|
180
192
|
mu: float = 1e-10,
|
|
181
193
|
auto_scale: bool = True,
|
|
@@ -202,8 +214,10 @@ def solve_nonlinear(
|
|
|
202
214
|
Number of feature blocks.
|
|
203
215
|
hidden_size : int
|
|
204
216
|
Features per block.
|
|
205
|
-
n_pde, n_bc : int
|
|
206
|
-
Number of collocation and boundary points.
|
|
217
|
+
n_pde, n_bc : int, optional
|
|
218
|
+
Number of collocation and boundary points. If None, scaled with the
|
|
219
|
+
feature count: n_pde = max(3000, 3 * n_blocks * hidden_size),
|
|
220
|
+
n_bc = max(800, n_pde // 5).
|
|
207
221
|
n_test : int
|
|
208
222
|
Number of test points for error evaluation.
|
|
209
223
|
max_iter : int
|
|
@@ -239,6 +253,12 @@ def solve_nonlinear(
|
|
|
239
253
|
"""
|
|
240
254
|
t0 = time.time()
|
|
241
255
|
|
|
256
|
+
n_feat = n_blocks * hidden_size
|
|
257
|
+
if n_pde is None:
|
|
258
|
+
n_pde = max(3000, 3 * n_feat) # ~3x oversampling; fixed 10000 was 6x for default N
|
|
259
|
+
if n_bc is None:
|
|
260
|
+
n_bc = max(800, n_pde // 5)
|
|
261
|
+
|
|
242
262
|
# Auto-select scale if needed
|
|
243
263
|
if scale is None and auto_scale:
|
|
244
264
|
if verbose:
|
|
@@ -264,9 +284,11 @@ def solve_nonlinear(
|
|
|
264
284
|
# Check for continuation
|
|
265
285
|
if getattr(problem, "use_continuation", False):
|
|
266
286
|
schedule = list(problem.continuation_schedule)
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
287
|
+
nu_target = getattr(problem, "nu_target", None)
|
|
288
|
+
if nu_target is not None:
|
|
289
|
+
if schedule[-1] != nu_target:
|
|
290
|
+
schedule.append(nu_target)
|
|
291
|
+
schedule = [v for v in schedule if v >= nu_target]
|
|
270
292
|
|
|
271
293
|
history = continuation_solve(
|
|
272
294
|
solver, problem, x_pde, bcs, f_pde,
|
|
@@ -172,6 +172,11 @@ class SinusoidalBasis:
|
|
|
172
172
|
|
|
173
173
|
def cache(self, x: torch.Tensor) -> BasisCache:
|
|
174
174
|
"""Create a cache for the given collocation points."""
|
|
175
|
+
# Accept inputs in any dtype/device (e.g. float32 from user code) and
|
|
176
|
+
# promote to the basis's own dtype/device so ``x @ self.W`` never trips
|
|
177
|
+
# a float32-vs-float64 mismatch.
|
|
178
|
+
if x.dtype != self.W.dtype or x.device != self.W.device:
|
|
179
|
+
x = x.to(dtype=self.W.dtype, device=self.W.device)
|
|
175
180
|
return BasisCache(x @ self.W + self.b)
|
|
176
181
|
|
|
177
182
|
# ------------------------------------------------------------------
|
|
@@ -164,7 +164,10 @@ def load_checkpoint(
|
|
|
164
164
|
solver : FastLSQSolver
|
|
165
165
|
metadata : dict, optional
|
|
166
166
|
"""
|
|
167
|
-
|
|
167
|
+
# weights_only=False: save_checkpoint writes NumPy arrays (see to_dict),
|
|
168
|
+
# which torch>=2.6's default weights_only=True refuses to unpickle. The
|
|
169
|
+
# file is produced by this library, so it is trusted.
|
|
170
|
+
state = torch.load(path, map_location=device, weights_only=False)
|
|
168
171
|
metadata = state.pop("metadata", None)
|
|
169
172
|
solver = from_dict(state, device=device)
|
|
170
173
|
return solver, metadata
|
|
@@ -180,19 +180,26 @@ class LearnableFastLSQ(nn.Module):
|
|
|
180
180
|
rcond: float = 1e-12):
|
|
181
181
|
"""Differentiable rank-revealing inner solve.
|
|
182
182
|
|
|
183
|
-
Solves ``beta* = argmin ||A beta - b||^2 + mu ||beta||^2`` through
|
|
184
|
-
|
|
185
|
-
``L`` *and* the solve is stable when ``A``
|
|
186
|
-
|
|
187
|
-
|
|
183
|
+
Solves ``beta* = argmin ||A beta - b||^2 + mu ||beta||^2`` through the
|
|
184
|
+
SVD-based ``gelsd`` least-squares driver with ``rcond`` truncation, so
|
|
185
|
+
gradients still flow back to ``L`` *and* the solve is stable when ``A``
|
|
186
|
+
is rank-deficient. (The ``rcond`` cut suppresses the near-null space,
|
|
187
|
+
and ``gelsd``'s backward uses the stable pseudoinverse formula rather
|
|
188
|
+
than per-singular-vector derivatives -- which is what keeps the outer
|
|
189
|
+
AdamW loop's gradients finite. A plain ``torch.linalg.lstsq`` *without*
|
|
190
|
+
``rcond`` is what amplifies the null space.)
|
|
188
191
|
|
|
189
192
|
For ``n_outputs > 1`` the system is block-stacked: the flat solution is
|
|
190
193
|
kept as ``self._beta_flat`` (shape-compatible with ``A``) for residual
|
|
191
194
|
losses, while ``self.beta`` is reshaped to ``(N, k)`` for prediction.
|
|
192
195
|
"""
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
+
if mu and mu > 0.0:
|
|
197
|
+
n = A.shape[-1]
|
|
198
|
+
A_aug = torch.cat([A, (mu ** 0.5) * torch.eye(n, dtype=A.dtype, device=A.device)], dim=0)
|
|
199
|
+
b_aug = torch.cat([b, torch.zeros(n, b.shape[-1], dtype=b.dtype, device=b.device)], dim=0)
|
|
200
|
+
beta_flat = torch.linalg.lstsq(A_aug, b_aug, rcond=rcond, driver="gelsd").solution
|
|
201
|
+
else:
|
|
202
|
+
beta_flat = torch.linalg.lstsq(A, b, rcond=rcond, driver="gelsd").solution
|
|
196
203
|
self._beta_flat = beta_flat
|
|
197
204
|
if self.n_outputs > 1:
|
|
198
205
|
self.beta = unpack_beta(beta_flat, self.n_features, self.n_outputs)
|
|
@@ -11,17 +11,26 @@ condition number -- leaving several orders of magnitude of accuracy on the floor
|
|
|
11
11
|
|
|
12
12
|
``solve_lstsq`` therefore exposes several back-ends via ``method=``:
|
|
13
13
|
|
|
14
|
+
* ``"qr"`` -- Householder-QR least squares (ridge via ``[A; sqrt(mu) I]``
|
|
15
|
+
augmentation). Backward-stable at ``cond(A)`` -- SVD-grade
|
|
16
|
+
accuracy with no normal-equations squaring and no required
|
|
17
|
+
ridge, at ~QR cost (cheaper than SVD). Assumes (numerically)
|
|
18
|
+
full column rank; ``"svd"`` is the rank-deficient-safe choice
|
|
19
|
+
(and ``"auto"``'s ultimate fallback if QR blows up).
|
|
14
20
|
* ``"svd"`` -- rank-revealing truncated SVD of ``A`` (LAPACK ``gelsd`` fast
|
|
15
|
-
path on CPU; explicit SVD elsewhere). The accuracy reference
|
|
21
|
+
path on CPU; explicit SVD elsewhere). The accuracy reference;
|
|
22
|
+
use for a genuinely rank-deficient ``A``.
|
|
16
23
|
* ``"cholesky"`` -- normal-equations ``(A^T A + mu I)`` Cholesky. Fast, but only
|
|
17
24
|
safe when ``A`` is well-conditioned.
|
|
18
25
|
* ``"rsvd"`` -- randomized SVD (range-finder + power iterations). ``O(MNk)``
|
|
19
26
|
for a target ``rank`` k << N -- the cheap option for strongly
|
|
20
27
|
low-rank systems.
|
|
21
28
|
* ``"auto"`` (default) -- try Cholesky; if the system is ill-conditioned (a
|
|
22
|
-
cheap pivot-ratio test)
|
|
23
|
-
|
|
24
|
-
|
|
29
|
+
cheap pivot-ratio test) use the faster ``"qr"``, and fall back
|
|
30
|
+
to rank-revealing ``"svd"`` only if QR's solution blows up (the
|
|
31
|
+
feature matrices can be rank-deficient). Fast path when
|
|
32
|
+
well-conditioned, QR speed/accuracy on the rest, SVD as the
|
|
33
|
+
safety net.
|
|
25
34
|
|
|
26
35
|
All back-ends are device/dtype-aware. Apple-MPS lacks a robust ``svd``/``lstsq``,
|
|
27
36
|
so the factorization is run on CPU and the result moved back (one-time warning).
|
|
@@ -33,6 +42,13 @@ import torch
|
|
|
33
42
|
|
|
34
43
|
_MPS_WARNED = False
|
|
35
44
|
|
|
45
|
+
# In ``method="auto"``: above this ``||x|| / (1 + ||b||)`` ratio the unpivoted-QR
|
|
46
|
+
# solve is treated as a rank-deficiency blow-up and handed to the rank-revealing
|
|
47
|
+
# SVD instead. Real PDE systems measure <= 0.3 here; the degenerate inconsistent
|
|
48
|
+
# (random-RHS) rank-deficient case measures ~3e14 -- so the guard is generous and
|
|
49
|
+
# a false positive only costs speed, never correctness.
|
|
50
|
+
_QR_AUTO_NORM_GUARD = 1e6
|
|
51
|
+
|
|
36
52
|
|
|
37
53
|
def _maybe_cpu(A, b):
|
|
38
54
|
"""MPS has no robust svd/lstsq -- factorize on CPU, remember to move back."""
|
|
@@ -86,16 +102,37 @@ def _rsvd_solve(A, b, mu, rcond, rank, oversample, n_iter):
|
|
|
86
102
|
return Vh.transpose(-2, -1) @ (filt.unsqueeze(-1) * (U.transpose(-2, -1) @ b))
|
|
87
103
|
|
|
88
104
|
|
|
105
|
+
def _qr_solve(A, b, mu):
|
|
106
|
+
"""Householder-QR least squares (ridge via [A; sqrt(mu) I] augmentation).
|
|
107
|
+
Backward-stable at cond(A): SVD-grade accuracy with NO normal-equations
|
|
108
|
+
squaring and no required ridge, at ~QR cost (cheaper than SVD). Assumes
|
|
109
|
+
(numerically) full column rank; use method='svd' for a rank-deficient A."""
|
|
110
|
+
if mu:
|
|
111
|
+
n = A.shape[-1]
|
|
112
|
+
A = torch.cat([A, (mu ** 0.5) * torch.eye(n, dtype=A.dtype, device=A.device)], dim=-2)
|
|
113
|
+
b = torch.cat([b, torch.zeros(n, b.shape[-1], dtype=b.dtype, device=b.device)], dim=-2)
|
|
114
|
+
Q, R = torch.linalg.qr(A, mode="reduced")
|
|
115
|
+
return torch.linalg.solve_triangular(R, Q.transpose(-2, -1) @ b, upper=True)
|
|
116
|
+
|
|
117
|
+
|
|
89
118
|
def _auto_solve(A, b, mu, rcond):
|
|
90
119
|
# Cheap conditioning probe: cond(A) ~ max/min Cholesky pivot. If well within
|
|
91
|
-
# float64's reach use the fast Cholesky
|
|
120
|
+
# float64's reach use the fast Cholesky.
|
|
92
121
|
try:
|
|
93
122
|
x, L = _cholesky_solve(A, b, mu)
|
|
94
123
|
d = torch.diagonal(L).abs()
|
|
95
|
-
if torch.isfinite(d).all() and d.min() > (rcond ** 0.
|
|
124
|
+
if torch.isfinite(d).all() and d.min() > (rcond ** 0.25) * d.max():
|
|
96
125
|
return x
|
|
97
126
|
except torch.linalg.LinAlgError:
|
|
98
127
|
pass
|
|
128
|
+
# Ill-conditioned: try the faster, backward-stable QR. On a genuinely
|
|
129
|
+
# rank-deficient *inconsistent* A unpivoted QR can return a wildly
|
|
130
|
+
# non-minimum-norm solution, so fall back to the rank-revealing SVD when the
|
|
131
|
+
# QR solution blows up (or is non-finite). See _QR_AUTO_NORM_GUARD.
|
|
132
|
+
x = _qr_solve(A, b, mu)
|
|
133
|
+
nx = torch.linalg.vector_norm(x)
|
|
134
|
+
if torch.isfinite(nx) and nx <= _QR_AUTO_NORM_GUARD * (1.0 + torch.linalg.vector_norm(b)):
|
|
135
|
+
return x
|
|
99
136
|
return _svd_solve(A, b, mu, rcond)
|
|
100
137
|
|
|
101
138
|
|
|
@@ -112,7 +149,7 @@ def solve_lstsq(A, b, mu=0.0, rcond=1e-12, method="auto",
|
|
|
112
149
|
an unstable add-on).
|
|
113
150
|
rcond : float
|
|
114
151
|
Relative singular-value / pivot threshold for rank determination.
|
|
115
|
-
method : {"auto", "svd", "cholesky", "rsvd"}
|
|
152
|
+
method : {"auto", "qr", "svd", "cholesky", "rsvd"}
|
|
116
153
|
Solve back-end (see module docstring). Default "auto".
|
|
117
154
|
rank, oversample, n_iter : int
|
|
118
155
|
Randomized-SVD parameters (``method="rsvd"`` only). Set ``rank`` << N for
|
|
@@ -127,11 +164,13 @@ def solve_lstsq(A, b, mu=0.0, rcond=1e-12, method="auto",
|
|
|
127
164
|
x = _auto_solve(A2, b2, mu, rcond)
|
|
128
165
|
elif method == "svd":
|
|
129
166
|
x = _svd_solve(A2, b2, mu, rcond)
|
|
167
|
+
elif method == "qr":
|
|
168
|
+
x = _qr_solve(A2, b2, mu)
|
|
130
169
|
elif method == "cholesky":
|
|
131
170
|
x = _cholesky_solve(A2, b2, mu)[0]
|
|
132
171
|
elif method == "rsvd":
|
|
133
172
|
x = _rsvd_solve(A2, b2, mu, rcond, rank, oversample, n_iter)
|
|
134
173
|
else:
|
|
135
174
|
raise ValueError(f"Unknown method {method!r}; "
|
|
136
|
-
"choose 'auto', 'svd', 'cholesky', or 'rsvd'.")
|
|
175
|
+
"choose 'auto', 'qr', 'svd', 'cholesky', or 'rsvd'.")
|
|
137
176
|
return x.to(mps_dev) if mps_dev is not None else x
|
|
@@ -87,10 +87,13 @@ def newton_solve(solver, problem, x_pde, bcs, f_pde,
|
|
|
87
87
|
history = []
|
|
88
88
|
n_outputs = getattr(problem, "n_outputs", 1)
|
|
89
89
|
N = solver.n_features
|
|
90
|
+
R0 = None
|
|
90
91
|
|
|
91
92
|
for it in range(max_iter):
|
|
92
93
|
J, neg_R = problem.build_newton_step(solver, x_pde, bcs, f_pde)
|
|
93
94
|
res_norm = torch.norm(neg_R).item()
|
|
95
|
+
if R0 is None:
|
|
96
|
+
R0 = max(res_norm, 1e-30)
|
|
94
97
|
|
|
95
98
|
delta_beta_raw = solve_lstsq(J, neg_R, mu=mu)
|
|
96
99
|
delta_beta = unpack_beta(delta_beta_raw, N, n_outputs)
|
|
@@ -116,7 +119,10 @@ def newton_solve(solver, problem, x_pde, bcs, f_pde,
|
|
|
116
119
|
break
|
|
117
120
|
alpha *= 0.5
|
|
118
121
|
else:
|
|
119
|
-
|
|
122
|
+
# No backtracked step satisfied the Armijo condition; reject the
|
|
123
|
+
# step and keep the previous iterate rather than committing a
|
|
124
|
+
# point that may be worse than where we started.
|
|
125
|
+
solver.beta = beta_old
|
|
120
126
|
|
|
121
127
|
history.append({
|
|
122
128
|
"iter": it, "residual": res_norm,
|
|
@@ -128,7 +134,7 @@ def newton_solve(solver, problem, x_pde, bcs, f_pde,
|
|
|
128
134
|
print(f" Newton {it:2d}: |R|={res_norm:.2e} "
|
|
129
135
|
f"|du|/|u|={rel_du:.2e} alpha={alpha:.3f}")
|
|
130
136
|
|
|
131
|
-
if res_norm < tol_res
|
|
137
|
+
if res_norm < tol_res * R0 or rel_du < tol_du:
|
|
132
138
|
if verbose:
|
|
133
139
|
print(f" Converged in {it + 1} iterations "
|
|
134
140
|
f"(|R|={res_norm:.1e}, |du|/|u|={rel_du:.1e})")
|
|
@@ -392,13 +392,13 @@ class ElasticWave2D:
|
|
|
392
392
|
# t is normalised to [0,1]; physical d²/dt² = (1/t_max)² d²/dτ²
|
|
393
393
|
t_scale = self.t_max ** 2
|
|
394
394
|
|
|
395
|
-
# PDE1:
|
|
396
|
-
A1_x = t_scale *
|
|
397
|
-
A1_y = -self.c_cross * u_xy
|
|
395
|
+
# PDE1: u_x_ττ = t_max²·(c_p² u_x_xx + c_s² u_x_yy + (c_p²-c_s²) u_y_xy)
|
|
396
|
+
A1_x = u_tt - t_scale * (self.c_p2 * u_xx + self.c_s2 * u_yy)
|
|
397
|
+
A1_y = -t_scale * self.c_cross * u_xy
|
|
398
398
|
|
|
399
|
-
# PDE2:
|
|
400
|
-
A2_x = -self.c_cross * u_xy
|
|
401
|
-
A2_y = t_scale *
|
|
399
|
+
# PDE2: u_y_ττ = t_max²·(c_p² u_y_yy + c_s² u_y_xx + (c_p²-c_s²) u_x_xy)
|
|
400
|
+
A2_x = -t_scale * self.c_cross * u_xy
|
|
401
|
+
A2_y = u_tt - t_scale * (self.c_p2 * u_yy + self.c_s2 * u_xx)
|
|
402
402
|
|
|
403
403
|
A_pde = torch.cat([
|
|
404
404
|
torch.cat([A1_x, A1_y], dim=1),
|