adcd 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. adcd-2.1.2/LICENSE +21 -0
  2. adcd-2.1.2/PKG-INFO +295 -0
  3. adcd-2.1.2/README.md +256 -0
  4. adcd-2.1.2/pyproject.toml +85 -0
  5. adcd-2.1.2/setup.cfg +4 -0
  6. adcd-2.1.2/src/adcd/__init__.py +87 -0
  7. adcd-2.1.2/src/adcd/anomaly_scenarios.py +393 -0
  8. adcd-2.1.2/src/adcd/api.py +251 -0
  9. adcd-2.1.2/src/adcd/arc_scorer.py +198 -0
  10. adcd-2.1.2/src/adcd/coarse_evaluator.py +97 -0
  11. adcd-2.1.2/src/adcd/correction_orchestrator.py +482 -0
  12. adcd-2.1.2/src/adcd/dimensional_checker.py +210 -0
  13. adcd-2.1.2/src/adcd/feynman_dataset.py +502 -0
  14. adcd-2.1.2/src/adcd/jax_optimizer.py +428 -0
  15. adcd-2.1.2/src/adcd/llm_proposer.py +993 -0
  16. adcd-2.1.2/src/adcd/metrics.py +300 -0
  17. adcd-2.1.2/src/adcd/mode_detection.py +72 -0
  18. adcd-2.1.2/src/adcd/orchestrator.py +277 -0
  19. adcd-2.1.2/src/adcd/pipeline.py +205 -0
  20. adcd-2.1.2/src/adcd/real_data_loader.py +310 -0
  21. adcd-2.1.2/src/adcd/real_scenarios.py +162 -0
  22. adcd-2.1.2/src/adcd/residual_analyzer.py +124 -0
  23. adcd-2.1.2/src/adcd/result.py +291 -0
  24. adcd-2.1.2/src/adcd.egg-info/PKG-INFO +295 -0
  25. adcd-2.1.2/src/adcd.egg-info/SOURCES.txt +40 -0
  26. adcd-2.1.2/src/adcd.egg-info/dependency_links.txt +1 -0
  27. adcd-2.1.2/src/adcd.egg-info/entry_points.txt +2 -0
  28. adcd-2.1.2/src/adcd.egg-info/requires.txt +18 -0
  29. adcd-2.1.2/src/adcd.egg-info/top_level.txt +1 -0
  30. adcd-2.1.2/tests/test_api.py +216 -0
  31. adcd-2.1.2/tests/test_arc_scorer.py +84 -0
  32. adcd-2.1.2/tests/test_coarse_evaluator.py +45 -0
  33. adcd-2.1.2/tests/test_correction_discovery.py +159 -0
  34. adcd-2.1.2/tests/test_dimensional_checker.py +103 -0
  35. adcd-2.1.2/tests/test_gate_telemetry.py +64 -0
  36. adcd-2.1.2/tests/test_integration.py +144 -0
  37. adcd-2.1.2/tests/test_jax_optimizer.py +250 -0
  38. adcd-2.1.2/tests/test_metrics_scale.py +56 -0
  39. adcd-2.1.2/tests/test_multivar_arc.py +45 -0
  40. adcd-2.1.2/tests/test_paper_claims.py +50 -0
  41. adcd-2.1.2/tests/test_pipeline.py +139 -0
  42. adcd-2.1.2/tests/test_real_data.py +261 -0
adcd-2.1.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Muhammad Afif Erdita
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
adcd-2.1.2/PKG-INFO ADDED
@@ -0,0 +1,295 @@
1
+ Metadata-Version: 2.4
2
+ Name: adcd
3
+ Version: 2.1.2
4
+ Summary: Anomaly-Driven Correction Discovery: Physics-Constrained Symbolic Regression for Evolutionary Scientific Discovery
5
+ Author-email: Muhammad Afif Erdita <maeapip10@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://zenodo.org/records/20534940
8
+ Project-URL: Repository, https://github.com/apiprdt/PhysicsPaper
9
+ Project-URL: Bug Tracker, https://github.com/apiprdt/PhysicsPaper/issues
10
+ Project-URL: DOI, https://doi.org/10.5281/zenodo.20534940
11
+ Keywords: symbolic regression,physics,machine learning,scientific discovery,JAX
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Topic :: Scientific/Engineering :: Physics
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: numpy>=1.24
24
+ Requires-Dist: scipy>=1.11
25
+ Requires-Dist: sympy>=1.12
26
+ Requires-Dist: jax>=0.4.20
27
+ Requires-Dist: jaxlib>=0.4.20
28
+ Requires-Dist: matplotlib>=3.7
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest>=7.4; extra == "dev"
31
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
32
+ Requires-Dist: flake8>=6.1; extra == "dev"
33
+ Requires-Dist: black>=23.0; extra == "dev"
34
+ Provides-Extra: llm
35
+ Requires-Dist: google-generativeai>=0.3; extra == "llm"
36
+ Provides-Extra: all
37
+ Requires-Dist: adcd[dev,llm]; extra == "all"
38
+ Dynamic: license-file
39
+
40
+ # ADCD — Anomaly-Driven Correction Discovery
41
+
42
+ [![DOI](https://zenodo.org/badge/20534940.svg)](https://doi.org/10.5281/zenodo.20534940)
43
+ [![CI](https://github.com/apiprdt/PhysicsPaper/actions/workflows/ci.yml/badge.svg)](https://github.com/apiprdt/PhysicsPaper/actions/workflows/ci.yml)
44
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
45
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
46
+
47
+ **Physics-Constrained Symbolic Regression for Evolutionary Scientific Discovery**
48
+
49
+ ADCD is a symbolic regression framework that discovers *physical correction terms* rather than learning equations from scratch. Given a known classical law and anomalous observations, ADCD recovers the dimensionless correction Δ that reconciles theory with experiment — mirroring how physics actually evolves.
50
+
51
+ > **82.8% (±7.7%) mean structural recovery** across 5 random seeds, with peak **94.4%** at the reference seed.
52
+ > **4/4 real-world structural class matches** (Mercury, Lamb Shift, Muon g-2, Blackbody).
53
+ > **77 automated unit tests** passing on Python 3.10 and 3.11.
54
+
55
+ ---
56
+
57
+ ## Key Features
58
+
59
+ - **Correction-first paradigm** — starts from a known classical law, not a blank slate; designed for anomaly-driven theory refinement where the baseline is structurally correct
60
+ - **Physics-gated search cascade** — AST complexity, dimensional homogeneity + transcendental guardrails, and asymptotic consistency (ARC) gates screen unphysical candidates *before* optimization
61
+ - **JAX-traced L-BFGS-B optimizer** — parameter-scaled differentiable fitting with multi-restart log-uniform initialization
62
+ - **BIC reranking** — selects the most parsimonious correction over purely numerical fits
63
+ - **Residual feature intelligence** — statistical priors (monotonicity, curvature, oscillation, decay rate, symmetry) bias the template sampler toward the correct mathematical family
64
+ - **Coarse empirical evaluation** — data-driven pre-filter ranks gate survivors before full JAX optimization
65
+ - **Noise-robust** — 93.3% mean at 0% noise, 91.1% at 1%, 71.1% at 5%, 68.9% at 10%
66
+
67
+ ## Quick Start
68
+
69
+ ### Installation
70
+
71
+ ```bash
72
+ pip install adcd
73
+ ```
74
+
75
+ Or install from source:
76
+
77
+ ```bash
78
+ git clone https://github.com/apiprdt/PhysicsPaper.git
79
+ cd PhysicsPaper
80
+ pip install -e ".[dev]"
81
+ ```
82
+
83
+ ### Usage
84
+
85
+ Running ADCD is extremely simple using the high-level scientific API:
86
+
87
+ ```python
88
+ import adcd
89
+
90
+ # 1. Load a pre-defined benchmark scenario
91
+ scenarios = adcd.get_all_scenarios()
92
+ scenario = scenarios[0] # Relativistic Kinetic Energy
93
+
94
+ # 2. Run discovery in a single line!
95
+ result = adcd.discover_correction(scenario, max_iterations=5, proposer="mock")
96
+
97
+ print(f"Discovered correction: {result.best_expr}")
98
+ print(f"Residual NMSE: {result.best_nmse_residual:.2e}")
99
+ print(f"Parameters: {result.best_theta}")
100
+
101
+ # 3. Export LaTeX or plot residuals
102
+ print(result.export_latex())
103
+ result.plot_residuals()
104
+ ```
105
+
106
+ For custom experimental data, use `adcd.fit(...)`:
107
+
108
+ ```python
109
+ import numpy as np
110
+ import adcd
111
+
112
+ x = np.linspace(1.0, 5.0, 100)
113
+ X = {"x": x}
114
+ y_classical = 2.0 * x
115
+ y_observed = 2.0 * x + 0.5 * x**2 # hidden x² correction
116
+
117
+ result = adcd.fit(
118
+ X=X,
119
+ y_obs=y_observed,
120
+ y_classical=y_classical,
121
+ limit_variable="x",
122
+ limit_direction="0",
123
+ correction_mode="additive"
124
+ )
125
+
126
+ result.summary()
127
+ ```
128
+
129
+ ## Benchmark Results
130
+
131
+ ### Standard Benchmark (seed=42, Mock Proposer)
132
+
133
+ Results from `run_correction_discovery.py --proposer mock` (reference seed=42, 4 iterations per scenario).
134
+
135
+ | Scenario | Tier | 0% Noise | 1% Noise | 5% Noise | 10% Noise |
136
+ |----------|------|:--------:|:--------:|:--------:|:---------:|
137
+ | Relativistic KE | Textbook | ✓ | ✓ | ✓ | ✓ |
138
+ | Yukawa Gravity | Textbook | ✓ | ✓ | ✓ | ✓ |
139
+ | Anharmonic Spring | Textbook | ✓ | ✓ | ✓ | ✓ |
140
+ | Screened Coulomb | Cross-Domain | ✓ | ✓ | ✗ | ✗ |
141
+ | Net Radiation | Cross-Domain | ✓ | ✓ | ✓ | ✓ |
142
+ | Nonlinear Drag | Cross-Domain | ✓ | ✓ | ✓ | ✓ |
143
+ | Mystery-A (tanh²) | Synthetic | ✓ | ✓ | ✓ | ✓ |
144
+ | Mystery-B (sinc) | Synthetic | ✓ | ✓ | ✓ | ✓ |
145
+ | Mystery-C (log-quotient) | Synthetic | ✓ | ✓ | ✓ | ✓ |
146
+ | **Overall** | | **100%** | **100%** | **88.9%** | **88.9%** |
147
+
148
+ > **Note**: Screened Coulomb fails at ≥5% noise because exponential decay ($e^{-r/\lambda}$) and rational saturation ($r/(r+\lambda)$) are numerically indistinguishable at the tested SNR with limited dynamic range — an information-theoretic limit, not a framework deficiency.
149
+
150
+ ### Multi-Seed Reproducibility
151
+
152
+ All results are reported across 5 independent random seeds (0, 7, 21, 42, 99):
153
+
154
+ | Seed | Class Match Rate |
155
+ |:----:|:----------------:|
156
+ | 0 | 86.1% (31/36) |
157
+ | 7 | 75.0% (27/36) |
158
+ | 21 | 77.8% (28/36) |
159
+ | 42 | 94.4% (34/36) |
160
+ | 99 | 80.6% (29/36) |
161
+ | **Mean** | **82.8% ± 7.7%** |
162
+
163
+ Performance variation reflects stochastic template sampling in the MockProposer. Physics gates ensure that **when** the correct functional family is sampled, it consistently survives filtering and is selected by BIC reranking.
164
+
165
+ ### Real-World Physical Constants Benchmark
166
+
167
+ Synthetic-real hybrid data using experimentally validated constants from JPL DE440, NIST, and CODATA:
168
+
169
+ | Physical Scenario | Discovered Correction | Converged | Class Match | NMSE |
170
+ |---|---|:---:|:---:|:---:|
171
+ | Mercury Perihelion (GR) | `θ₀·vc²` | — | ✓ polynomial | 1.11e-05 |
172
+ | Hydrogen Lamb Shift (QED) | `θ₀(n/θ₁)^(-θ₂)` | ✓ | ✓ power_law | 1.82e-18 |
173
+ | Muon g-2 (Schwinger) | `θ₀(α/π)^θ₁` | ✓ | ✓ polynomial | 7.94e-07 |
174
+ | Blackbody (Planck) | `-1 + e^(-f/θ₁)` | — | ✓ exponential | 2.59e-02 |
175
+
176
+ All 4 scenarios achieve correct structural class identification. 2 scenarios (Lamb Shift, Muon g-2) achieve full convergence with NMSE < 10⁻⁶. Mercury and Blackbody achieve correct structural identification but quantitative convergence is limited by parametrization sensitivity and dynamic range, respectively.
177
+
178
+ ### PySR Comparison (fair profile: 100 iterations, maxsize 30, 60s timeout)
179
+
180
+ | Method | 0% Noise | 1% Noise | 5% Noise | 10% Noise |
181
+ |--------|:--------:|:--------:|:--------:|:---------:|
182
+ | ADCD (ours, seed=42) | 9/9 (100%) | 9/9 (100%) | 8/9 (88.9%) | 8/9 (88.9%) |
183
+ | PySR fair | 4/9 (44.4%) | 5/9 (55.6%) | 1/9 (11.1%) | 5/9 (55.6%) |
184
+
185
+ ADCD outperforms PySR fair by **77.8 percentage points** at 5% noise (88.9% vs 11.1%). A legacy fast profile (wall-clock matched) is retained in `pysr_baseline_results.json` for historical comparison only.
186
+
187
+ ## Project Structure
188
+
189
+ ```
190
+ PhysicsPaper/
191
+ ├── src/adcd/ # Installable package
192
+ │ ├── __init__.py # Public API (adcd.fit, adcd.discover_correction)
193
+ │ ├── anomaly_scenarios.py # 9 standard + 3 blind benchmark scenarios
194
+ │ ├── arc_scorer.py # Asymptotic consistency gate (ARC)
195
+ │ ├── coarse_evaluator.py # Coarse numerical pre-filter
196
+ │ ├── correction_orchestrator.py # Main multi-iteration discovery loop
197
+ │ ├── dimensional_checker.py # Dimensional homogeneity + transcendental guardrail
198
+ │ ├── jax_optimizer.py # JAX L-BFGS-B optimizer (parameter-scaled)
199
+ │ ├── llm_proposer.py # Mock + Gemini + OpenAI-compatible proposers
200
+ │ ├── metrics.py # NMSE, BIC, structural classification
201
+ │ ├── pipeline.py # Stage 1 filter cascade
202
+ │ ├── real_data_loader.py # Real-world data loading (JPL, NIST, CODATA)
203
+ │ ├── real_scenarios.py # Real-world validation scenarios
204
+ │ ├── residual_analyzer.py # Statistical residual feature extraction
205
+ │ └── result.py # CorrectionResult: summary, LaTeX, plot
206
+ ├── tests/ # 58 unit + integration tests
207
+ ├── paper/ # LaTeX source (main.tex) + figures
208
+ ├── run_correction_discovery.py # Standard 9-scenario benchmark runner
209
+ ├── run_real_data_benchmark.py # Real-world physical constants benchmark
210
+ ├── run_reproducibility.py # Multi-seed reproducibility study (5 seeds)
211
+ ├── run_ablation.py # Gate ablation study
212
+ ├── run_pysr_baseline.py # PySR comparison baseline
213
+ ├── run_mlp_baseline.py # MLP comparison baseline
214
+ ├── run_misspecification_benchmark.py # Baseline misspecification fail-safe test
215
+ ├── generate_figures.py # Paper figure generator
216
+ ├── .github/workflows/ # CI (test + lint + LaTeX) and PyPI publish
217
+ ├── pyproject.toml # PEP 517/518 build configuration
218
+ └── README.md # This file
219
+ ```
220
+
221
+ ## Running Tests
222
+
223
+ ```bash
224
+ pip install -e ".[dev]"
225
+ pytest --cov=adcd
226
+ ```
227
+
228
+ All 77 tests pass on Python 3.10 and 3.11 (Ubuntu and Windows).
229
+
230
+ ## Submission & Release
231
+
232
+ Paper submission guide (GitHub Release → Zenodo → arXiv): [docs/SUBMISSION_CHECKLIST_v2.1.2.md](docs/SUBMISSION_CHECKLIST_v2.1.2.md)
233
+
234
+ Current release tag: **v2.1.2** | Package version: **2.1.2**
235
+
236
+ ## Reproducing Paper Results
237
+
238
+ Verify claims before citing numbers:
239
+
240
+ ```bash
241
+ python scripts/verify_paper_claims.py # expect [ALL OK]
242
+ ```
243
+
244
+ One-command reproduction (Windows):
245
+
246
+ ```powershell
247
+ .\reproduce_all.ps1
248
+ ```
249
+
250
+ Or step-by-step:
251
+
252
+ ```bash
253
+ python run_correction_discovery.py --proposer mock # Main benchmark + gate telemetry
254
+ python run_real_data_benchmark.py # Real-world (5 scenarios)
255
+ python run_pysr_baseline.py --profile fair # Fair PySR comparison
256
+ python run_ablation.py # Gate ablation study
257
+ python run_oracle_ablation.py # Oracle ground-truth injection test
258
+ python run_correction_scaling.py # Correction magnitude sweep
259
+ python scripts/generate_experiment_report.py # Sync experiment_results.md
260
+ python scripts/generate_efficiency_table.py # ADCD vs PySR efficiency table
261
+ python scripts/validate_results.py # Consistency checks
262
+ python generate_figures.py # All paper figures
263
+ ```
264
+
265
+ > **Proposer regimes:** Mock Proposer = template-assisted recovery; Hybrid/Gemini = zero-shot discovery. Report both separately (see paper Section 4).
266
+
267
+ ```bash
268
+ # LLM benchmark (requires GEMINI_API_KEY) — writes results/llm_benchmark.json
269
+ python run_llm_benchmark.py --proposer hybrid
270
+ ```
271
+
272
+ ## Citing This Work
273
+
274
+ If you use ADCD in your research, please cite:
275
+
276
+ ```bibtex
277
+ @software{erdita2026adcd,
278
+ author = {Erdita, Muhammad Afif},
279
+ title = {{Anomaly-Driven Correction Discovery (ADCD): Physics-Constrained
280
+ Symbolic Regression for Evolutionary Scientific Discovery}},
281
+ year = {2026},
282
+ publisher = {Zenodo},
283
+ version = {2.1.2},
284
+ doi = {10.5281/zenodo.20534940},
285
+ url = {https://doi.org/10.5281/zenodo.20534940}
286
+ }
287
+ ```
288
+
289
+ ## AI Disclosure
290
+
291
+ This project was developed with assistance from Google DeepMind's Antigravity AI assistant. AI was used as a pair-programming and writing tool. All scientific content, experimental design decisions, and intellectual contributions are the author's own.
292
+
293
+ ## License
294
+
295
+ [MIT](LICENSE)
adcd-2.1.2/README.md ADDED
@@ -0,0 +1,256 @@
1
+ # ADCD — Anomaly-Driven Correction Discovery
2
+
3
+ [![DOI](https://zenodo.org/badge/20534940.svg)](https://doi.org/10.5281/zenodo.20534940)
4
+ [![CI](https://github.com/apiprdt/PhysicsPaper/actions/workflows/ci.yml/badge.svg)](https://github.com/apiprdt/PhysicsPaper/actions/workflows/ci.yml)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
7
+
8
+ **Physics-Constrained Symbolic Regression for Evolutionary Scientific Discovery**
9
+
10
+ ADCD is a symbolic regression framework that discovers *physical correction terms* rather than learning equations from scratch. Given a known classical law and anomalous observations, ADCD recovers the dimensionless correction Δ that reconciles theory with experiment — mirroring how physics actually evolves.
11
+
12
+ > **82.8% (±7.7%) mean structural recovery** across 5 random seeds, with peak **94.4%** at the reference seed.
13
+ > **4/4 real-world structural class matches** (Mercury, Lamb Shift, Muon g-2, Blackbody).
14
+ > **77 automated unit tests** passing on Python 3.10 and 3.11.
15
+
16
+ ---
17
+
18
+ ## Key Features
19
+
20
+ - **Correction-first paradigm** — starts from a known classical law, not a blank slate; designed for anomaly-driven theory refinement where the baseline is structurally correct
21
+ - **Physics-gated search cascade** — AST complexity, dimensional homogeneity + transcendental guardrails, and asymptotic consistency (ARC) gates screen unphysical candidates *before* optimization
22
+ - **JAX-traced L-BFGS-B optimizer** — parameter-scaled differentiable fitting with multi-restart log-uniform initialization
23
+ - **BIC reranking** — selects the most parsimonious correction over purely numerical fits
24
+ - **Residual feature intelligence** — statistical priors (monotonicity, curvature, oscillation, decay rate, symmetry) bias the template sampler toward the correct mathematical family
25
+ - **Coarse empirical evaluation** — data-driven pre-filter ranks gate survivors before full JAX optimization
26
+ - **Noise-robust** — 93.3% mean at 0% noise, 91.1% at 1%, 71.1% at 5%, 68.9% at 10%
27
+
28
+ ## Quick Start
29
+
30
+ ### Installation
31
+
32
+ ```bash
33
+ pip install adcd
34
+ ```
35
+
36
+ Or install from source:
37
+
38
+ ```bash
39
+ git clone https://github.com/apiprdt/PhysicsPaper.git
40
+ cd PhysicsPaper
41
+ pip install -e ".[dev]"
42
+ ```
43
+
44
+ ### Usage
45
+
46
+ Running ADCD is extremely simple using the high-level scientific API:
47
+
48
+ ```python
49
+ import adcd
50
+
51
+ # 1. Load a pre-defined benchmark scenario
52
+ scenarios = adcd.get_all_scenarios()
53
+ scenario = scenarios[0] # Relativistic Kinetic Energy
54
+
55
+ # 2. Run discovery in a single line!
56
+ result = adcd.discover_correction(scenario, max_iterations=5, proposer="mock")
57
+
58
+ print(f"Discovered correction: {result.best_expr}")
59
+ print(f"Residual NMSE: {result.best_nmse_residual:.2e}")
60
+ print(f"Parameters: {result.best_theta}")
61
+
62
+ # 3. Export LaTeX or plot residuals
63
+ print(result.export_latex())
64
+ result.plot_residuals()
65
+ ```
66
+
67
+ For custom experimental data, use `adcd.fit(...)`:
68
+
69
+ ```python
70
+ import numpy as np
71
+ import adcd
72
+
73
+ x = np.linspace(1.0, 5.0, 100)
74
+ X = {"x": x}
75
+ y_classical = 2.0 * x
76
+ y_observed = 2.0 * x + 0.5 * x**2 # hidden x² correction
77
+
78
+ result = adcd.fit(
79
+ X=X,
80
+ y_obs=y_observed,
81
+ y_classical=y_classical,
82
+ limit_variable="x",
83
+ limit_direction="0",
84
+ correction_mode="additive"
85
+ )
86
+
87
+ result.summary()
88
+ ```
89
+
90
+ ## Benchmark Results
91
+
92
+ ### Standard Benchmark (seed=42, Mock Proposer)
93
+
94
+ Results from `run_correction_discovery.py --proposer mock` (reference seed=42, 4 iterations per scenario).
95
+
96
+ | Scenario | Tier | 0% Noise | 1% Noise | 5% Noise | 10% Noise |
97
+ |----------|------|:--------:|:--------:|:--------:|:---------:|
98
+ | Relativistic KE | Textbook | ✓ | ✓ | ✓ | ✓ |
99
+ | Yukawa Gravity | Textbook | ✓ | ✓ | ✓ | ✓ |
100
+ | Anharmonic Spring | Textbook | ✓ | ✓ | ✓ | ✓ |
101
+ | Screened Coulomb | Cross-Domain | ✓ | ✓ | ✗ | ✗ |
102
+ | Net Radiation | Cross-Domain | ✓ | ✓ | ✓ | ✓ |
103
+ | Nonlinear Drag | Cross-Domain | ✓ | ✓ | ✓ | ✓ |
104
+ | Mystery-A (tanh²) | Synthetic | ✓ | ✓ | ✓ | ✓ |
105
+ | Mystery-B (sinc) | Synthetic | ✓ | ✓ | ✓ | ✓ |
106
+ | Mystery-C (log-quotient) | Synthetic | ✓ | ✓ | ✓ | ✓ |
107
+ | **Overall** | | **100%** | **100%** | **88.9%** | **88.9%** |
108
+
109
+ > **Note**: Screened Coulomb fails at ≥5% noise because exponential decay ($e^{-r/\lambda}$) and rational saturation ($r/(r+\lambda)$) are numerically indistinguishable at the tested SNR with limited dynamic range — an information-theoretic limit, not a framework deficiency.
110
+
111
+ ### Multi-Seed Reproducibility
112
+
113
+ All results are reported across 5 independent random seeds (0, 7, 21, 42, 99):
114
+
115
+ | Seed | Class Match Rate |
116
+ |:----:|:----------------:|
117
+ | 0 | 86.1% (31/36) |
118
+ | 7 | 75.0% (27/36) |
119
+ | 21 | 77.8% (28/36) |
120
+ | 42 | 94.4% (34/36) |
121
+ | 99 | 80.6% (29/36) |
122
+ | **Mean** | **82.8% ± 7.7%** |
123
+
124
+ Performance variation reflects stochastic template sampling in the MockProposer. Physics gates ensure that **when** the correct functional family is sampled, it consistently survives filtering and is selected by BIC reranking.
125
+
126
+ ### Real-World Physical Constants Benchmark
127
+
128
+ Synthetic-real hybrid data using experimentally validated constants from JPL DE440, NIST, and CODATA:
129
+
130
+ | Physical Scenario | Discovered Correction | Converged | Class Match | NMSE |
131
+ |---|---|:---:|:---:|:---:|
132
+ | Mercury Perihelion (GR) | `θ₀·vc²` | — | ✓ polynomial | 1.11e-05 |
133
+ | Hydrogen Lamb Shift (QED) | `θ₀(n/θ₁)^(-θ₂)` | ✓ | ✓ power_law | 1.82e-18 |
134
+ | Muon g-2 (Schwinger) | `θ₀(α/π)^θ₁` | ✓ | ✓ polynomial | 7.94e-07 |
135
+ | Blackbody (Planck) | `-1 + e^(-f/θ₁)` | — | ✓ exponential | 2.59e-02 |
136
+
137
+ All 4 scenarios achieve correct structural class identification. 2 scenarios (Lamb Shift, Muon g-2) achieve full convergence with NMSE < 10⁻⁶. Mercury and Blackbody achieve correct structural identification but quantitative convergence is limited by parametrization sensitivity and dynamic range, respectively.
138
+
139
+ ### PySR Comparison (fair profile: 100 iterations, maxsize 30, 60s timeout)
140
+
141
+ | Method | 0% Noise | 1% Noise | 5% Noise | 10% Noise |
142
+ |--------|:--------:|:--------:|:--------:|:---------:|
143
+ | ADCD (ours, seed=42) | 9/9 (100%) | 9/9 (100%) | 8/9 (88.9%) | 8/9 (88.9%) |
144
+ | PySR fair | 4/9 (44.4%) | 5/9 (55.6%) | 1/9 (11.1%) | 5/9 (55.6%) |
145
+
146
+ ADCD outperforms PySR fair by **77.8 percentage points** at 5% noise (88.9% vs 11.1%). A legacy fast profile (wall-clock matched) is retained in `pysr_baseline_results.json` for historical comparison only.
147
+
148
+ ## Project Structure
149
+
150
+ ```
151
+ PhysicsPaper/
152
+ ├── src/adcd/ # Installable package
153
+ │ ├── __init__.py # Public API (adcd.fit, adcd.discover_correction)
154
+ │ ├── anomaly_scenarios.py # 9 standard + 3 blind benchmark scenarios
155
+ │ ├── arc_scorer.py # Asymptotic consistency gate (ARC)
156
+ │ ├── coarse_evaluator.py # Coarse numerical pre-filter
157
+ │ ├── correction_orchestrator.py # Main multi-iteration discovery loop
158
+ │ ├── dimensional_checker.py # Dimensional homogeneity + transcendental guardrail
159
+ │ ├── jax_optimizer.py # JAX L-BFGS-B optimizer (parameter-scaled)
160
+ │ ├── llm_proposer.py # Mock + Gemini + OpenAI-compatible proposers
161
+ │ ├── metrics.py # NMSE, BIC, structural classification
162
+ │ ├── pipeline.py # Stage 1 filter cascade
163
+ │ ├── real_data_loader.py # Real-world data loading (JPL, NIST, CODATA)
164
+ │ ├── real_scenarios.py # Real-world validation scenarios
165
+ │ ├── residual_analyzer.py # Statistical residual feature extraction
166
+ │ └── result.py # CorrectionResult: summary, LaTeX, plot
167
+ ├── tests/ # 58 unit + integration tests
168
+ ├── paper/ # LaTeX source (main.tex) + figures
169
+ ├── run_correction_discovery.py # Standard 9-scenario benchmark runner
170
+ ├── run_real_data_benchmark.py # Real-world physical constants benchmark
171
+ ├── run_reproducibility.py # Multi-seed reproducibility study (5 seeds)
172
+ ├── run_ablation.py # Gate ablation study
173
+ ├── run_pysr_baseline.py # PySR comparison baseline
174
+ ├── run_mlp_baseline.py # MLP comparison baseline
175
+ ├── run_misspecification_benchmark.py # Baseline misspecification fail-safe test
176
+ ├── generate_figures.py # Paper figure generator
177
+ ├── .github/workflows/ # CI (test + lint + LaTeX) and PyPI publish
178
+ ├── pyproject.toml # PEP 517/518 build configuration
179
+ └── README.md # This file
180
+ ```
181
+
182
+ ## Running Tests
183
+
184
+ ```bash
185
+ pip install -e ".[dev]"
186
+ pytest --cov=adcd
187
+ ```
188
+
189
+ All 77 tests pass on Python 3.10 and 3.11 (Ubuntu and Windows).
190
+
191
+ ## Submission & Release
192
+
193
+ Paper submission guide (GitHub Release → Zenodo → arXiv): [docs/SUBMISSION_CHECKLIST_v2.1.2.md](docs/SUBMISSION_CHECKLIST_v2.1.2.md)
194
+
195
+ Current release tag: **v2.1.2** | Package version: **2.1.2**
196
+
197
+ ## Reproducing Paper Results
198
+
199
+ Verify claims before citing numbers:
200
+
201
+ ```bash
202
+ python scripts/verify_paper_claims.py # expect [ALL OK]
203
+ ```
204
+
205
+ One-command reproduction (Windows):
206
+
207
+ ```powershell
208
+ .\reproduce_all.ps1
209
+ ```
210
+
211
+ Or step-by-step:
212
+
213
+ ```bash
214
+ python run_correction_discovery.py --proposer mock # Main benchmark + gate telemetry
215
+ python run_real_data_benchmark.py # Real-world (5 scenarios)
216
+ python run_pysr_baseline.py --profile fair # Fair PySR comparison
217
+ python run_ablation.py # Gate ablation study
218
+ python run_oracle_ablation.py # Oracle ground-truth injection test
219
+ python run_correction_scaling.py # Correction magnitude sweep
220
+ python scripts/generate_experiment_report.py # Sync experiment_results.md
221
+ python scripts/generate_efficiency_table.py # ADCD vs PySR efficiency table
222
+ python scripts/validate_results.py # Consistency checks
223
+ python generate_figures.py # All paper figures
224
+ ```
225
+
226
+ > **Proposer regimes:** Mock Proposer = template-assisted recovery; Hybrid/Gemini = zero-shot discovery. Report both separately (see paper Section 4).
227
+
228
+ ```bash
229
+ # LLM benchmark (requires GEMINI_API_KEY) — writes results/llm_benchmark.json
230
+ python run_llm_benchmark.py --proposer hybrid
231
+ ```
232
+
233
+ ## Citing This Work
234
+
235
+ If you use ADCD in your research, please cite:
236
+
237
+ ```bibtex
238
+ @software{erdita2026adcd,
239
+ author = {Erdita, Muhammad Afif},
240
+ title = {{Anomaly-Driven Correction Discovery (ADCD): Physics-Constrained
241
+ Symbolic Regression for Evolutionary Scientific Discovery}},
242
+ year = {2026},
243
+ publisher = {Zenodo},
244
+ version = {2.1.2},
245
+ doi = {10.5281/zenodo.20534940},
246
+ url = {https://doi.org/10.5281/zenodo.20534940}
247
+ }
248
+ ```
249
+
250
+ ## AI Disclosure
251
+
252
+ This project was developed with assistance from Google DeepMind's Antigravity AI assistant. AI was used as a pair-programming and writing tool. All scientific content, experimental design decisions, and intellectual contributions are the author's own.
253
+
254
+ ## License
255
+
256
+ [MIT](LICENSE)
@@ -0,0 +1,85 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "adcd"
7
+ version = "2.1.2"
8
+ description = "Anomaly-Driven Correction Discovery: Physics-Constrained Symbolic Regression for Evolutionary Scientific Discovery"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ authors = [
12
+ { name = "Muhammad Afif Erdita", email = "maeapip10@gmail.com" }
13
+ ]
14
+ keywords = ["symbolic regression", "physics", "machine learning", "scientific discovery", "JAX"]
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Intended Audience :: Science/Research",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Topic :: Scientific/Engineering :: Physics",
23
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
24
+ ]
25
+ requires-python = ">=3.10"
26
+ dependencies = [
27
+ "numpy>=1.24",
28
+ "scipy>=1.11",
29
+ "sympy>=1.12",
30
+ "jax>=0.4.20",
31
+ "jaxlib>=0.4.20",
32
+ "matplotlib>=3.7",
33
+ ]
34
+
35
+ [project.optional-dependencies]
36
+ dev = [
37
+ "pytest>=7.4",
38
+ "pytest-cov>=4.1",
39
+ "flake8>=6.1",
40
+ "black>=23.0",
41
+ ]
42
+ llm = [
43
+ "google-generativeai>=0.3",
44
+ ]
45
+ all = ["adcd[dev,llm]"]
46
+
47
+ [project.urls]
48
+ Homepage = "https://zenodo.org/records/20534940"
49
+ Repository = "https://github.com/apiprdt/PhysicsPaper"
50
+ "Bug Tracker" = "https://github.com/apiprdt/PhysicsPaper/issues"
51
+ DOI = "https://doi.org/10.5281/zenodo.20534940"
52
+
53
+ [project.scripts]
54
+ adcd-discover = "adcd.correction_orchestrator:main_cli"
55
+
56
+ [tool.setuptools.packages.find]
57
+ where = ["src"]
58
+
59
+ [tool.setuptools.package-dir]
60
+ "" = "src"
61
+
62
+ [tool.pytest.ini_options]
63
+ testpaths = ["tests"]
64
+ python_files = ["test_*.py"]
65
+ python_classes = ["Test*"]
66
+ python_functions = ["test_*"]
67
+ addopts = "--tb=short -q"
68
+ filterwarnings = [
69
+ # matplotlib emits DeprecationWarning when plt.show() is called on a
70
+ # non-interactive (Agg) backend. This is a third-party warning not under
71
+ # our control; we already guard plt.show() with plt.isinteractive().
72
+ "ignore:FigureCanvasAgg is non-interactive:DeprecationWarning",
73
+ ]
74
+
75
+ [tool.coverage.run]
76
+ source = ["adcd"]
77
+ omit = ["tests/*", "scratch/*"]
78
+
79
+ [tool.black]
80
+ line-length = 100
81
+ target-version = ["py310", "py311"]
82
+
83
+ [tool.flake8]
84
+ max-line-length = 100
85
+ exclude = [".git", "__pycache__", "scratch", "*.egg-info"]
adcd-2.1.2/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+