adcd 2.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adcd-2.1.2/LICENSE +21 -0
- adcd-2.1.2/PKG-INFO +295 -0
- adcd-2.1.2/README.md +256 -0
- adcd-2.1.2/pyproject.toml +85 -0
- adcd-2.1.2/setup.cfg +4 -0
- adcd-2.1.2/src/adcd/__init__.py +87 -0
- adcd-2.1.2/src/adcd/anomaly_scenarios.py +393 -0
- adcd-2.1.2/src/adcd/api.py +251 -0
- adcd-2.1.2/src/adcd/arc_scorer.py +198 -0
- adcd-2.1.2/src/adcd/coarse_evaluator.py +97 -0
- adcd-2.1.2/src/adcd/correction_orchestrator.py +482 -0
- adcd-2.1.2/src/adcd/dimensional_checker.py +210 -0
- adcd-2.1.2/src/adcd/feynman_dataset.py +502 -0
- adcd-2.1.2/src/adcd/jax_optimizer.py +428 -0
- adcd-2.1.2/src/adcd/llm_proposer.py +993 -0
- adcd-2.1.2/src/adcd/metrics.py +300 -0
- adcd-2.1.2/src/adcd/mode_detection.py +72 -0
- adcd-2.1.2/src/adcd/orchestrator.py +277 -0
- adcd-2.1.2/src/adcd/pipeline.py +205 -0
- adcd-2.1.2/src/adcd/real_data_loader.py +310 -0
- adcd-2.1.2/src/adcd/real_scenarios.py +162 -0
- adcd-2.1.2/src/adcd/residual_analyzer.py +124 -0
- adcd-2.1.2/src/adcd/result.py +291 -0
- adcd-2.1.2/src/adcd.egg-info/PKG-INFO +295 -0
- adcd-2.1.2/src/adcd.egg-info/SOURCES.txt +40 -0
- adcd-2.1.2/src/adcd.egg-info/dependency_links.txt +1 -0
- adcd-2.1.2/src/adcd.egg-info/entry_points.txt +2 -0
- adcd-2.1.2/src/adcd.egg-info/requires.txt +18 -0
- adcd-2.1.2/src/adcd.egg-info/top_level.txt +1 -0
- adcd-2.1.2/tests/test_api.py +216 -0
- adcd-2.1.2/tests/test_arc_scorer.py +84 -0
- adcd-2.1.2/tests/test_coarse_evaluator.py +45 -0
- adcd-2.1.2/tests/test_correction_discovery.py +159 -0
- adcd-2.1.2/tests/test_dimensional_checker.py +103 -0
- adcd-2.1.2/tests/test_gate_telemetry.py +64 -0
- adcd-2.1.2/tests/test_integration.py +144 -0
- adcd-2.1.2/tests/test_jax_optimizer.py +250 -0
- adcd-2.1.2/tests/test_metrics_scale.py +56 -0
- adcd-2.1.2/tests/test_multivar_arc.py +45 -0
- adcd-2.1.2/tests/test_paper_claims.py +50 -0
- adcd-2.1.2/tests/test_pipeline.py +139 -0
- adcd-2.1.2/tests/test_real_data.py +261 -0
adcd-2.1.2/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Muhammad Afif Erdita
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
adcd-2.1.2/PKG-INFO
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: adcd
|
|
3
|
+
Version: 2.1.2
|
|
4
|
+
Summary: Anomaly-Driven Correction Discovery: Physics-Constrained Symbolic Regression for Evolutionary Scientific Discovery
|
|
5
|
+
Author-email: Muhammad Afif Erdita <maeapip10@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://zenodo.org/records/20534940
|
|
8
|
+
Project-URL: Repository, https://github.com/apiprdt/PhysicsPaper
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/apiprdt/PhysicsPaper/issues
|
|
10
|
+
Project-URL: DOI, https://doi.org/10.5281/zenodo.20534940
|
|
11
|
+
Keywords: symbolic regression,physics,machine learning,scientific discovery,JAX
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Requires-Dist: scipy>=1.11
|
|
25
|
+
Requires-Dist: sympy>=1.12
|
|
26
|
+
Requires-Dist: jax>=0.4.20
|
|
27
|
+
Requires-Dist: jaxlib>=0.4.20
|
|
28
|
+
Requires-Dist: matplotlib>=3.7
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
32
|
+
Requires-Dist: flake8>=6.1; extra == "dev"
|
|
33
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
34
|
+
Provides-Extra: llm
|
|
35
|
+
Requires-Dist: google-generativeai>=0.3; extra == "llm"
|
|
36
|
+
Provides-Extra: all
|
|
37
|
+
Requires-Dist: adcd[dev,llm]; extra == "all"
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
|
|
40
|
+
# ADCD — Anomaly-Driven Correction Discovery
|
|
41
|
+
|
|
42
|
+
[](https://doi.org/10.5281/zenodo.20534940)
|
|
43
|
+
[](https://github.com/apiprdt/PhysicsPaper/actions/workflows/ci.yml)
|
|
44
|
+
[](https://opensource.org/licenses/MIT)
|
|
45
|
+
[](https://www.python.org/downloads/)
|
|
46
|
+
|
|
47
|
+
**Physics-Constrained Symbolic Regression for Evolutionary Scientific Discovery**
|
|
48
|
+
|
|
49
|
+
ADCD is a symbolic regression framework that discovers *physical correction terms* rather than learning equations from scratch. Given a known classical law and anomalous observations, ADCD recovers the dimensionless correction Δ that reconciles theory with experiment — mirroring how physics actually evolves.
|
|
50
|
+
|
|
51
|
+
> **82.8% (±7.7%) mean structural recovery** across 5 random seeds, with peak **94.4%** at the reference seed.
|
|
52
|
+
> **4/4 real-world structural class matches** (Mercury, Lamb Shift, Muon g-2, Blackbody).
|
|
53
|
+
> **77 automated unit tests** passing on Python 3.10 and 3.11.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Key Features
|
|
58
|
+
|
|
59
|
+
- **Correction-first paradigm** — starts from a known classical law, not a blank slate; designed for anomaly-driven theory refinement where the baseline is structurally correct
|
|
60
|
+
- **Physics-gated search cascade** — AST complexity, dimensional homogeneity + transcendental guardrails, and asymptotic consistency (ARC) gates screen unphysical candidates *before* optimization
|
|
61
|
+
- **JAX-traced L-BFGS-B optimizer** — parameter-scaled differentiable fitting with multi-restart log-uniform initialization
|
|
62
|
+
- **BIC reranking** — selects the most parsimonious correction over purely numerical fits
|
|
63
|
+
- **Residual feature intelligence** — statistical priors (monotonicity, curvature, oscillation, decay rate, symmetry) bias the template sampler toward the correct mathematical family
|
|
64
|
+
- **Coarse empirical evaluation** — data-driven pre-filter ranks gate survivors before full JAX optimization
|
|
65
|
+
- **Noise-robust** — 93.3% mean at 0% noise, 91.1% at 1%, 71.1% at 5%, 68.9% at 10%
|
|
66
|
+
|
|
67
|
+
## Quick Start
|
|
68
|
+
|
|
69
|
+
### Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install adcd
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Or install from source:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
git clone https://github.com/apiprdt/PhysicsPaper.git
|
|
79
|
+
cd PhysicsPaper
|
|
80
|
+
pip install -e ".[dev]"
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Usage
|
|
84
|
+
|
|
85
|
+
Running ADCD is extremely simple using the high-level scientific API:
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
import adcd
|
|
89
|
+
|
|
90
|
+
# 1. Load a pre-defined benchmark scenario
|
|
91
|
+
scenarios = adcd.get_all_scenarios()
|
|
92
|
+
scenario = scenarios[0] # Relativistic Kinetic Energy
|
|
93
|
+
|
|
94
|
+
# 2. Run discovery in a single line!
|
|
95
|
+
result = adcd.discover_correction(scenario, max_iterations=5, proposer="mock")
|
|
96
|
+
|
|
97
|
+
print(f"Discovered correction: {result.best_expr}")
|
|
98
|
+
print(f"Residual NMSE: {result.best_nmse_residual:.2e}")
|
|
99
|
+
print(f"Parameters: {result.best_theta}")
|
|
100
|
+
|
|
101
|
+
# 3. Export LaTeX or plot residuals
|
|
102
|
+
print(result.export_latex())
|
|
103
|
+
result.plot_residuals()
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
For custom experimental data, use `adcd.fit(...)`:
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
import numpy as np
|
|
110
|
+
import adcd
|
|
111
|
+
|
|
112
|
+
x = np.linspace(1.0, 5.0, 100)
|
|
113
|
+
X = {"x": x}
|
|
114
|
+
y_classical = 2.0 * x
|
|
115
|
+
y_observed = 2.0 * x + 0.5 * x**2 # hidden x² correction
|
|
116
|
+
|
|
117
|
+
result = adcd.fit(
|
|
118
|
+
X=X,
|
|
119
|
+
y_obs=y_observed,
|
|
120
|
+
y_classical=y_classical,
|
|
121
|
+
limit_variable="x",
|
|
122
|
+
limit_direction="0",
|
|
123
|
+
correction_mode="additive"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
result.summary()
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Benchmark Results
|
|
130
|
+
|
|
131
|
+
### Standard Benchmark (seed=42, Mock Proposer)
|
|
132
|
+
|
|
133
|
+
Results from `run_correction_discovery.py --proposer mock` (reference seed=42, 4 iterations per scenario).
|
|
134
|
+
|
|
135
|
+
| Scenario | Tier | 0% Noise | 1% Noise | 5% Noise | 10% Noise |
|
|
136
|
+
|----------|------|:--------:|:--------:|:--------:|:---------:|
|
|
137
|
+
| Relativistic KE | Textbook | ✓ | ✓ | ✓ | ✓ |
|
|
138
|
+
| Yukawa Gravity | Textbook | ✓ | ✓ | ✓ | ✓ |
|
|
139
|
+
| Anharmonic Spring | Textbook | ✓ | ✓ | ✓ | ✓ |
|
|
140
|
+
| Screened Coulomb | Cross-Domain | ✓ | ✓ | ✗ | ✗ |
|
|
141
|
+
| Net Radiation | Cross-Domain | ✓ | ✓ | ✓ | ✓ |
|
|
142
|
+
| Nonlinear Drag | Cross-Domain | ✓ | ✓ | ✓ | ✓ |
|
|
143
|
+
| Mystery-A (tanh²) | Synthetic | ✓ | ✓ | ✓ | ✓ |
|
|
144
|
+
| Mystery-B (sinc) | Synthetic | ✓ | ✓ | ✓ | ✓ |
|
|
145
|
+
| Mystery-C (log-quotient) | Synthetic | ✓ | ✓ | ✓ | ✓ |
|
|
146
|
+
| **Overall** | | **100%** | **100%** | **88.9%** | **88.9%** |
|
|
147
|
+
|
|
148
|
+
> **Note**: Screened Coulomb fails at ≥5% noise because exponential decay ($e^{-r/\lambda}$) and rational saturation ($r/(r+\lambda)$) are numerically indistinguishable at the tested SNR with limited dynamic range — an information-theoretic limit, not a framework deficiency.
|
|
149
|
+
|
|
150
|
+
### Multi-Seed Reproducibility
|
|
151
|
+
|
|
152
|
+
All results are reported across 5 independent random seeds (0, 7, 21, 42, 99):
|
|
153
|
+
|
|
154
|
+
| Seed | Class Match Rate |
|
|
155
|
+
|:----:|:----------------:|
|
|
156
|
+
| 0 | 86.1% (31/36) |
|
|
157
|
+
| 7 | 75.0% (27/36) |
|
|
158
|
+
| 21 | 77.8% (28/36) |
|
|
159
|
+
| 42 | 94.4% (34/36) |
|
|
160
|
+
| 99 | 80.6% (29/36) |
|
|
161
|
+
| **Mean** | **82.8% ± 7.7%** |
|
|
162
|
+
|
|
163
|
+
Performance variation reflects stochastic template sampling in the MockProposer. Physics gates ensure that **when** the correct functional family is sampled, it consistently survives filtering and is selected by BIC reranking.
|
|
164
|
+
|
|
165
|
+
### Real-World Physical Constants Benchmark
|
|
166
|
+
|
|
167
|
+
Synthetic-real hybrid data using experimentally validated constants from JPL DE440, NIST, and CODATA:
|
|
168
|
+
|
|
169
|
+
| Physical Scenario | Discovered Correction | Converged | Class Match | NMSE |
|
|
170
|
+
|---|---|:---:|:---:|:---:|
|
|
171
|
+
| Mercury Perihelion (GR) | `θ₀·vc²` | — | ✓ polynomial | 1.11e-05 |
|
|
172
|
+
| Hydrogen Lamb Shift (QED) | `θ₀(n/θ₁)^(-θ₂)` | ✓ | ✓ power_law | 1.82e-18 |
|
|
173
|
+
| Muon g-2 (Schwinger) | `θ₀(α/π)^θ₁` | ✓ | ✓ polynomial | 7.94e-07 |
|
|
174
|
+
| Blackbody (Planck) | `-1 + e^(-f/θ₁)` | — | ✓ exponential | 2.59e-02 |
|
|
175
|
+
|
|
176
|
+
All 4 scenarios achieve correct structural class identification. 2 scenarios (Lamb Shift, Muon g-2) achieve full convergence with NMSE < 10⁻⁶. Mercury and Blackbody achieve correct structural identification but quantitative convergence is limited by parametrization sensitivity and dynamic range, respectively.
|
|
177
|
+
|
|
178
|
+
### PySR Comparison (fair profile: 100 iterations, maxsize 30, 60s timeout)
|
|
179
|
+
|
|
180
|
+
| Method | 0% Noise | 1% Noise | 5% Noise | 10% Noise |
|
|
181
|
+
|--------|:--------:|:--------:|:--------:|:---------:|
|
|
182
|
+
| ADCD (ours, seed=42) | 9/9 (100%) | 9/9 (100%) | 8/9 (88.9%) | 8/9 (88.9%) |
|
|
183
|
+
| PySR fair | 4/9 (44.4%) | 5/9 (55.6%) | 1/9 (11.1%) | 5/9 (55.6%) |
|
|
184
|
+
|
|
185
|
+
ADCD outperforms PySR fair by **77.8 percentage points** at 5% noise (88.9% vs 11.1%). A legacy fast profile (wall-clock matched) is retained in `pysr_baseline_results.json` for historical comparison only.
|
|
186
|
+
|
|
187
|
+
## Project Structure
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
PhysicsPaper/
|
|
191
|
+
├── src/adcd/ # Installable package
|
|
192
|
+
│ ├── __init__.py # Public API (adcd.fit, adcd.discover_correction)
|
|
193
|
+
│ ├── anomaly_scenarios.py # 9 standard + 3 blind benchmark scenarios
|
|
194
|
+
│ ├── arc_scorer.py # Asymptotic consistency gate (ARC)
|
|
195
|
+
│ ├── coarse_evaluator.py # Coarse numerical pre-filter
|
|
196
|
+
│ ├── correction_orchestrator.py # Main multi-iteration discovery loop
|
|
197
|
+
│ ├── dimensional_checker.py # Dimensional homogeneity + transcendental guardrail
|
|
198
|
+
│ ├── jax_optimizer.py # JAX L-BFGS-B optimizer (parameter-scaled)
|
|
199
|
+
│ ├── llm_proposer.py # Mock + Gemini + OpenAI-compatible proposers
|
|
200
|
+
│ ├── metrics.py # NMSE, BIC, structural classification
|
|
201
|
+
│ ├── pipeline.py # Stage 1 filter cascade
|
|
202
|
+
│ ├── real_data_loader.py # Real-world data loading (JPL, NIST, CODATA)
|
|
203
|
+
│ ├── real_scenarios.py # Real-world validation scenarios
|
|
204
|
+
│ ├── residual_analyzer.py # Statistical residual feature extraction
|
|
205
|
+
│ └── result.py # CorrectionResult: summary, LaTeX, plot
|
|
206
|
+
├── tests/ # 58 unit + integration tests
|
|
207
|
+
├── paper/ # LaTeX source (main.tex) + figures
|
|
208
|
+
├── run_correction_discovery.py # Standard 9-scenario benchmark runner
|
|
209
|
+
├── run_real_data_benchmark.py # Real-world physical constants benchmark
|
|
210
|
+
├── run_reproducibility.py # Multi-seed reproducibility study (5 seeds)
|
|
211
|
+
├── run_ablation.py # Gate ablation study
|
|
212
|
+
├── run_pysr_baseline.py # PySR comparison baseline
|
|
213
|
+
├── run_mlp_baseline.py # MLP comparison baseline
|
|
214
|
+
├── run_misspecification_benchmark.py # Baseline misspecification fail-safe test
|
|
215
|
+
├── generate_figures.py # Paper figure generator
|
|
216
|
+
├── .github/workflows/ # CI (test + lint + LaTeX) and PyPI publish
|
|
217
|
+
├── pyproject.toml # PEP 517/518 build configuration
|
|
218
|
+
└── README.md # This file
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## Running Tests
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
pip install -e ".[dev]"
|
|
225
|
+
pytest --cov=adcd
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
All 77 tests pass on Python 3.10 and 3.11 (Ubuntu and Windows).
|
|
229
|
+
|
|
230
|
+
## Submission & Release
|
|
231
|
+
|
|
232
|
+
Paper submission guide (GitHub Release → Zenodo → arXiv): [docs/SUBMISSION_CHECKLIST_v2.1.2.md](docs/SUBMISSION_CHECKLIST_v2.1.2.md)
|
|
233
|
+
|
|
234
|
+
Current release tag: **v2.1.2** | Package version: **2.1.2**
|
|
235
|
+
|
|
236
|
+
## Reproducing Paper Results
|
|
237
|
+
|
|
238
|
+
Verify claims before citing numbers:
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
python scripts/verify_paper_claims.py # expect [ALL OK]
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
One-command reproduction (Windows):
|
|
245
|
+
|
|
246
|
+
```powershell
|
|
247
|
+
.\reproduce_all.ps1
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
Or step-by-step:
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
python run_correction_discovery.py --proposer mock # Main benchmark + gate telemetry
|
|
254
|
+
python run_real_data_benchmark.py # Real-world (5 scenarios)
|
|
255
|
+
python run_pysr_baseline.py --profile fair # Fair PySR comparison
|
|
256
|
+
python run_ablation.py # Gate ablation study
|
|
257
|
+
python run_oracle_ablation.py # Oracle ground-truth injection test
|
|
258
|
+
python run_correction_scaling.py # Correction magnitude sweep
|
|
259
|
+
python scripts/generate_experiment_report.py # Sync experiment_results.md
|
|
260
|
+
python scripts/generate_efficiency_table.py # ADCD vs PySR efficiency table
|
|
261
|
+
python scripts/validate_results.py # Consistency checks
|
|
262
|
+
python generate_figures.py # All paper figures
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
> **Proposer regimes:** Mock Proposer = template-assisted recovery; Hybrid/Gemini = zero-shot discovery. Report both separately (see paper Section 4).
|
|
266
|
+
|
|
267
|
+
```bash
|
|
268
|
+
# LLM benchmark (requires GEMINI_API_KEY) — writes results/llm_benchmark.json
|
|
269
|
+
python run_llm_benchmark.py --proposer hybrid
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## Citing This Work
|
|
273
|
+
|
|
274
|
+
If you use ADCD in your research, please cite:
|
|
275
|
+
|
|
276
|
+
```bibtex
|
|
277
|
+
@software{erdita2026adcd,
|
|
278
|
+
author = {Erdita, Muhammad Afif},
|
|
279
|
+
title = {{Anomaly-Driven Correction Discovery (ADCD): Physics-Constrained
|
|
280
|
+
Symbolic Regression for Evolutionary Scientific Discovery}},
|
|
281
|
+
year = {2026},
|
|
282
|
+
publisher = {Zenodo},
|
|
283
|
+
version = {2.1.2},
|
|
284
|
+
doi = {10.5281/zenodo.20534940},
|
|
285
|
+
url = {https://doi.org/10.5281/zenodo.20534940}
|
|
286
|
+
}
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
## AI Disclosure
|
|
290
|
+
|
|
291
|
+
This project was developed with assistance from Google DeepMind's Antigravity AI assistant. AI was used as a pair-programming and writing tool. All scientific content, experimental design decisions, and intellectual contributions are the author's own.
|
|
292
|
+
|
|
293
|
+
## License
|
|
294
|
+
|
|
295
|
+
[MIT](LICENSE)
|
adcd-2.1.2/README.md
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# ADCD — Anomaly-Driven Correction Discovery
|
|
2
|
+
|
|
3
|
+
[](https://doi.org/10.5281/zenodo.20534940)
|
|
4
|
+
[](https://github.com/apiprdt/PhysicsPaper/actions/workflows/ci.yml)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
|
|
8
|
+
**Physics-Constrained Symbolic Regression for Evolutionary Scientific Discovery**
|
|
9
|
+
|
|
10
|
+
ADCD is a symbolic regression framework that discovers *physical correction terms* rather than learning equations from scratch. Given a known classical law and anomalous observations, ADCD recovers the dimensionless correction Δ that reconciles theory with experiment — mirroring how physics actually evolves.
|
|
11
|
+
|
|
12
|
+
> **82.8% (±7.7%) mean structural recovery** across 5 random seeds, with peak **94.4%** at the reference seed.
|
|
13
|
+
> **4/4 real-world structural class matches** (Mercury, Lamb Shift, Muon g-2, Blackbody).
|
|
14
|
+
> **77 automated unit tests** passing on Python 3.10 and 3.11.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Key Features
|
|
19
|
+
|
|
20
|
+
- **Correction-first paradigm** — starts from a known classical law, not a blank slate; designed for anomaly-driven theory refinement where the baseline is structurally correct
|
|
21
|
+
- **Physics-gated search cascade** — AST complexity, dimensional homogeneity + transcendental guardrails, and asymptotic consistency (ARC) gates screen unphysical candidates *before* optimization
|
|
22
|
+
- **JAX-traced L-BFGS-B optimizer** — parameter-scaled differentiable fitting with multi-restart log-uniform initialization
|
|
23
|
+
- **BIC reranking** — selects the most parsimonious correction over purely numerical fits
|
|
24
|
+
- **Residual feature intelligence** — statistical priors (monotonicity, curvature, oscillation, decay rate, symmetry) bias the template sampler toward the correct mathematical family
|
|
25
|
+
- **Coarse empirical evaluation** — data-driven pre-filter ranks gate survivors before full JAX optimization
|
|
26
|
+
- **Noise-robust** — 93.3% mean at 0% noise, 91.1% at 1%, 71.1% at 5%, 68.9% at 10%
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
### Installation
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install adcd
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Or install from source:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
git clone https://github.com/apiprdt/PhysicsPaper.git
|
|
40
|
+
cd PhysicsPaper
|
|
41
|
+
pip install -e ".[dev]"
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Usage
|
|
45
|
+
|
|
46
|
+
Running ADCD is extremely simple using the high-level scientific API:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
import adcd
|
|
50
|
+
|
|
51
|
+
# 1. Load a pre-defined benchmark scenario
|
|
52
|
+
scenarios = adcd.get_all_scenarios()
|
|
53
|
+
scenario = scenarios[0] # Relativistic Kinetic Energy
|
|
54
|
+
|
|
55
|
+
# 2. Run discovery in a single line!
|
|
56
|
+
result = adcd.discover_correction(scenario, max_iterations=5, proposer="mock")
|
|
57
|
+
|
|
58
|
+
print(f"Discovered correction: {result.best_expr}")
|
|
59
|
+
print(f"Residual NMSE: {result.best_nmse_residual:.2e}")
|
|
60
|
+
print(f"Parameters: {result.best_theta}")
|
|
61
|
+
|
|
62
|
+
# 3. Export LaTeX or plot residuals
|
|
63
|
+
print(result.export_latex())
|
|
64
|
+
result.plot_residuals()
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
For custom experimental data, use `adcd.fit(...)`:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
import numpy as np
|
|
71
|
+
import adcd
|
|
72
|
+
|
|
73
|
+
x = np.linspace(1.0, 5.0, 100)
|
|
74
|
+
X = {"x": x}
|
|
75
|
+
y_classical = 2.0 * x
|
|
76
|
+
y_observed = 2.0 * x + 0.5 * x**2 # hidden x² correction
|
|
77
|
+
|
|
78
|
+
result = adcd.fit(
|
|
79
|
+
X=X,
|
|
80
|
+
y_obs=y_observed,
|
|
81
|
+
y_classical=y_classical,
|
|
82
|
+
limit_variable="x",
|
|
83
|
+
limit_direction="0",
|
|
84
|
+
correction_mode="additive"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
result.summary()
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Benchmark Results
|
|
91
|
+
|
|
92
|
+
### Standard Benchmark (seed=42, Mock Proposer)
|
|
93
|
+
|
|
94
|
+
Results from `run_correction_discovery.py --proposer mock` (reference seed=42, 4 iterations per scenario).
|
|
95
|
+
|
|
96
|
+
| Scenario | Tier | 0% Noise | 1% Noise | 5% Noise | 10% Noise |
|
|
97
|
+
|----------|------|:--------:|:--------:|:--------:|:---------:|
|
|
98
|
+
| Relativistic KE | Textbook | ✓ | ✓ | ✓ | ✓ |
|
|
99
|
+
| Yukawa Gravity | Textbook | ✓ | ✓ | ✓ | ✓ |
|
|
100
|
+
| Anharmonic Spring | Textbook | ✓ | ✓ | ✓ | ✓ |
|
|
101
|
+
| Screened Coulomb | Cross-Domain | ✓ | ✓ | ✗ | ✗ |
|
|
102
|
+
| Net Radiation | Cross-Domain | ✓ | ✓ | ✓ | ✓ |
|
|
103
|
+
| Nonlinear Drag | Cross-Domain | ✓ | ✓ | ✓ | ✓ |
|
|
104
|
+
| Mystery-A (tanh²) | Synthetic | ✓ | ✓ | ✓ | ✓ |
|
|
105
|
+
| Mystery-B (sinc) | Synthetic | ✓ | ✓ | ✓ | ✓ |
|
|
106
|
+
| Mystery-C (log-quotient) | Synthetic | ✓ | ✓ | ✓ | ✓ |
|
|
107
|
+
| **Overall** | | **100%** | **100%** | **88.9%** | **88.9%** |
|
|
108
|
+
|
|
109
|
+
> **Note**: Screened Coulomb fails at ≥5% noise because exponential decay ($e^{-r/\lambda}$) and rational saturation ($r/(r+\lambda)$) are numerically indistinguishable at the tested SNR with limited dynamic range — an information-theoretic limit, not a framework deficiency.
|
|
110
|
+
|
|
111
|
+
### Multi-Seed Reproducibility
|
|
112
|
+
|
|
113
|
+
All results are reported across 5 independent random seeds (0, 7, 21, 42, 99):
|
|
114
|
+
|
|
115
|
+
| Seed | Class Match Rate |
|
|
116
|
+
|:----:|:----------------:|
|
|
117
|
+
| 0 | 86.1% (31/36) |
|
|
118
|
+
| 7 | 75.0% (27/36) |
|
|
119
|
+
| 21 | 77.8% (28/36) |
|
|
120
|
+
| 42 | 94.4% (34/36) |
|
|
121
|
+
| 99 | 80.6% (29/36) |
|
|
122
|
+
| **Mean** | **82.8% ± 7.7%** |
|
|
123
|
+
|
|
124
|
+
Performance variation reflects stochastic template sampling in the MockProposer. Physics gates ensure that **when** the correct functional family is sampled, it consistently survives filtering and is selected by BIC reranking.
|
|
125
|
+
|
|
126
|
+
### Real-World Physical Constants Benchmark
|
|
127
|
+
|
|
128
|
+
Synthetic-real hybrid data using experimentally validated constants from JPL DE440, NIST, and CODATA:
|
|
129
|
+
|
|
130
|
+
| Physical Scenario | Discovered Correction | Converged | Class Match | NMSE |
|
|
131
|
+
|---|---|:---:|:---:|:---:|
|
|
132
|
+
| Mercury Perihelion (GR) | `θ₀·vc²` | — | ✓ polynomial | 1.11e-05 |
|
|
133
|
+
| Hydrogen Lamb Shift (QED) | `θ₀(n/θ₁)^(-θ₂)` | ✓ | ✓ power_law | 1.82e-18 |
|
|
134
|
+
| Muon g-2 (Schwinger) | `θ₀(α/π)^θ₁` | ✓ | ✓ polynomial | 7.94e-07 |
|
|
135
|
+
| Blackbody (Planck) | `-1 + e^(-f/θ₁)` | — | ✓ exponential | 2.59e-02 |
|
|
136
|
+
|
|
137
|
+
All 4 scenarios achieve correct structural class identification. 2 scenarios (Lamb Shift, Muon g-2) achieve full convergence with NMSE < 10⁻⁶. Mercury and Blackbody achieve correct structural identification but quantitative convergence is limited by parametrization sensitivity and dynamic range, respectively.
|
|
138
|
+
|
|
139
|
+
### PySR Comparison (fair profile: 100 iterations, maxsize 30, 60s timeout)
|
|
140
|
+
|
|
141
|
+
| Method | 0% Noise | 1% Noise | 5% Noise | 10% Noise |
|
|
142
|
+
|--------|:--------:|:--------:|:--------:|:---------:|
|
|
143
|
+
| ADCD (ours, seed=42) | 9/9 (100%) | 9/9 (100%) | 8/9 (88.9%) | 8/9 (88.9%) |
|
|
144
|
+
| PySR fair | 4/9 (44.4%) | 5/9 (55.6%) | 1/9 (11.1%) | 5/9 (55.6%) |
|
|
145
|
+
|
|
146
|
+
ADCD outperforms PySR fair by **77.8 percentage points** at 5% noise (88.9% vs 11.1%). A legacy fast profile (wall-clock matched) is retained in `pysr_baseline_results.json` for historical comparison only.
|
|
147
|
+
|
|
148
|
+
## Project Structure
|
|
149
|
+
|
|
150
|
+
```
|
|
151
|
+
PhysicsPaper/
|
|
152
|
+
├── src/adcd/ # Installable package
|
|
153
|
+
│ ├── __init__.py # Public API (adcd.fit, adcd.discover_correction)
|
|
154
|
+
│ ├── anomaly_scenarios.py # 9 standard + 3 blind benchmark scenarios
|
|
155
|
+
│ ├── arc_scorer.py # Asymptotic consistency gate (ARC)
|
|
156
|
+
│ ├── coarse_evaluator.py # Coarse numerical pre-filter
|
|
157
|
+
│ ├── correction_orchestrator.py # Main multi-iteration discovery loop
|
|
158
|
+
│ ├── dimensional_checker.py # Dimensional homogeneity + transcendental guardrail
|
|
159
|
+
│ ├── jax_optimizer.py # JAX L-BFGS-B optimizer (parameter-scaled)
|
|
160
|
+
│ ├── llm_proposer.py # Mock + Gemini + OpenAI-compatible proposers
|
|
161
|
+
│ ├── metrics.py # NMSE, BIC, structural classification
|
|
162
|
+
│ ├── pipeline.py # Stage 1 filter cascade
|
|
163
|
+
│ ├── real_data_loader.py # Real-world data loading (JPL, NIST, CODATA)
|
|
164
|
+
│ ├── real_scenarios.py # Real-world validation scenarios
|
|
165
|
+
│ ├── residual_analyzer.py # Statistical residual feature extraction
|
|
166
|
+
│ └── result.py # CorrectionResult: summary, LaTeX, plot
|
|
167
|
+
├── tests/ # 58 unit + integration tests
|
|
168
|
+
├── paper/ # LaTeX source (main.tex) + figures
|
|
169
|
+
├── run_correction_discovery.py # Standard 9-scenario benchmark runner
|
|
170
|
+
├── run_real_data_benchmark.py # Real-world physical constants benchmark
|
|
171
|
+
├── run_reproducibility.py # Multi-seed reproducibility study (5 seeds)
|
|
172
|
+
├── run_ablation.py # Gate ablation study
|
|
173
|
+
├── run_pysr_baseline.py # PySR comparison baseline
|
|
174
|
+
├── run_mlp_baseline.py # MLP comparison baseline
|
|
175
|
+
├── run_misspecification_benchmark.py # Baseline misspecification fail-safe test
|
|
176
|
+
├── generate_figures.py # Paper figure generator
|
|
177
|
+
├── .github/workflows/ # CI (test + lint + LaTeX) and PyPI publish
|
|
178
|
+
├── pyproject.toml # PEP 517/518 build configuration
|
|
179
|
+
└── README.md # This file
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Running Tests
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
pip install -e ".[dev]"
|
|
186
|
+
pytest --cov=adcd
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
All 77 tests pass on Python 3.10 and 3.11 (Ubuntu and Windows).
|
|
190
|
+
|
|
191
|
+
## Submission & Release
|
|
192
|
+
|
|
193
|
+
Paper submission guide (GitHub Release → Zenodo → arXiv): [docs/SUBMISSION_CHECKLIST_v2.1.2.md](docs/SUBMISSION_CHECKLIST_v2.1.2.md)
|
|
194
|
+
|
|
195
|
+
Current release tag: **v2.1.2** | Package version: **2.1.2**
|
|
196
|
+
|
|
197
|
+
## Reproducing Paper Results
|
|
198
|
+
|
|
199
|
+
Verify claims before citing numbers:
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
python scripts/verify_paper_claims.py # expect [ALL OK]
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
One-command reproduction (Windows):
|
|
206
|
+
|
|
207
|
+
```powershell
|
|
208
|
+
.\reproduce_all.ps1
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Or step-by-step:
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
python run_correction_discovery.py --proposer mock # Main benchmark + gate telemetry
|
|
215
|
+
python run_real_data_benchmark.py # Real-world (5 scenarios)
|
|
216
|
+
python run_pysr_baseline.py --profile fair # Fair PySR comparison
|
|
217
|
+
python run_ablation.py # Gate ablation study
|
|
218
|
+
python run_oracle_ablation.py # Oracle ground-truth injection test
|
|
219
|
+
python run_correction_scaling.py # Correction magnitude sweep
|
|
220
|
+
python scripts/generate_experiment_report.py # Sync experiment_results.md
|
|
221
|
+
python scripts/generate_efficiency_table.py # ADCD vs PySR efficiency table
|
|
222
|
+
python scripts/validate_results.py # Consistency checks
|
|
223
|
+
python generate_figures.py # All paper figures
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
> **Proposer regimes:** Mock Proposer = template-assisted recovery; Hybrid/Gemini = zero-shot discovery. Report both separately (see paper Section 4).
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
# LLM benchmark (requires GEMINI_API_KEY) — writes results/llm_benchmark.json
|
|
230
|
+
python run_llm_benchmark.py --proposer hybrid
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## Citing This Work
|
|
234
|
+
|
|
235
|
+
If you use ADCD in your research, please cite:
|
|
236
|
+
|
|
237
|
+
```bibtex
|
|
238
|
+
@software{erdita2026adcd,
|
|
239
|
+
author = {Erdita, Muhammad Afif},
|
|
240
|
+
title = {{Anomaly-Driven Correction Discovery (ADCD): Physics-Constrained
|
|
241
|
+
Symbolic Regression for Evolutionary Scientific Discovery}},
|
|
242
|
+
year = {2026},
|
|
243
|
+
publisher = {Zenodo},
|
|
244
|
+
version = {2.1.2},
|
|
245
|
+
doi = {10.5281/zenodo.20534940},
|
|
246
|
+
url = {https://doi.org/10.5281/zenodo.20534940}
|
|
247
|
+
}
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## AI Disclosure
|
|
251
|
+
|
|
252
|
+
This project was developed with assistance from Google DeepMind's Antigravity AI assistant. AI was used as a pair-programming and writing tool. All scientific content, experimental design decisions, and intellectual contributions are the author's own.
|
|
253
|
+
|
|
254
|
+
## License
|
|
255
|
+
|
|
256
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "adcd"
|
|
7
|
+
version = "2.1.2"
|
|
8
|
+
description = "Anomaly-Driven Correction Discovery: Physics-Constrained Symbolic Regression for Evolutionary Scientific Discovery"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Muhammad Afif Erdita", email = "maeapip10@gmail.com" }
|
|
13
|
+
]
|
|
14
|
+
keywords = ["symbolic regression", "physics", "machine learning", "scientific discovery", "JAX"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Science/Research",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Physics",
|
|
23
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
24
|
+
]
|
|
25
|
+
requires-python = ">=3.10"
|
|
26
|
+
dependencies = [
|
|
27
|
+
"numpy>=1.24",
|
|
28
|
+
"scipy>=1.11",
|
|
29
|
+
"sympy>=1.12",
|
|
30
|
+
"jax>=0.4.20",
|
|
31
|
+
"jaxlib>=0.4.20",
|
|
32
|
+
"matplotlib>=3.7",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
dev = [
|
|
37
|
+
"pytest>=7.4",
|
|
38
|
+
"pytest-cov>=4.1",
|
|
39
|
+
"flake8>=6.1",
|
|
40
|
+
"black>=23.0",
|
|
41
|
+
]
|
|
42
|
+
llm = [
|
|
43
|
+
"google-generativeai>=0.3",
|
|
44
|
+
]
|
|
45
|
+
all = ["adcd[dev,llm]"]
|
|
46
|
+
|
|
47
|
+
[project.urls]
|
|
48
|
+
Homepage = "https://zenodo.org/records/20534940"
|
|
49
|
+
Repository = "https://github.com/apiprdt/PhysicsPaper"
|
|
50
|
+
"Bug Tracker" = "https://github.com/apiprdt/PhysicsPaper/issues"
|
|
51
|
+
DOI = "https://doi.org/10.5281/zenodo.20534940"
|
|
52
|
+
|
|
53
|
+
[project.scripts]
|
|
54
|
+
adcd-discover = "adcd.correction_orchestrator:main_cli"
|
|
55
|
+
|
|
56
|
+
[tool.setuptools.packages.find]
|
|
57
|
+
where = ["src"]
|
|
58
|
+
|
|
59
|
+
[tool.setuptools.package-dir]
|
|
60
|
+
"" = "src"
|
|
61
|
+
|
|
62
|
+
[tool.pytest.ini_options]
|
|
63
|
+
testpaths = ["tests"]
|
|
64
|
+
python_files = ["test_*.py"]
|
|
65
|
+
python_classes = ["Test*"]
|
|
66
|
+
python_functions = ["test_*"]
|
|
67
|
+
addopts = "--tb=short -q"
|
|
68
|
+
filterwarnings = [
|
|
69
|
+
# matplotlib emits DeprecationWarning when plt.show() is called on a
|
|
70
|
+
# non-interactive (Agg) backend. This is a third-party warning not under
|
|
71
|
+
# our control; we already guard plt.show() with plt.isinteractive().
|
|
72
|
+
"ignore:FigureCanvasAgg is non-interactive:DeprecationWarning",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
[tool.coverage.run]
|
|
76
|
+
source = ["adcd"]
|
|
77
|
+
omit = ["tests/*", "scratch/*"]
|
|
78
|
+
|
|
79
|
+
[tool.black]
|
|
80
|
+
line-length = 100
|
|
81
|
+
target-version = ["py310", "py311"]
|
|
82
|
+
|
|
83
|
+
[tool.flake8]
|
|
84
|
+
max-line-length = 100
|
|
85
|
+
exclude = [".git", "__pycache__", "scratch", "*.egg-info"]
|
adcd-2.1.2/setup.cfg
ADDED