adcd 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
adcd/__init__.py ADDED
@@ -0,0 +1,87 @@
1
+ """
2
+ ADCD — Anomaly-Driven Correction Discovery
3
+ ===========================================
4
+ A physics-constrained symbolic regression framework that discovers
5
+ physical correction terms rather than learning equations from scratch.
6
+
7
+ DOI: 10.5281/zenodo.20534940
8
+
9
+ Quick Start
10
+ -----------
11
+ >>> import adcd
12
+ >>> scenarios = adcd.get_all_scenarios()
13
+ >>> result = adcd.discover_correction(scenarios[0])
14
+ >>> print(result.best_expr)
15
+ """
16
+
17
+ __version__ = "2.1.2"
18
+ __author__ = "Muhammad Afif Erdita"
19
+ __email__ = "maeapip10@gmail.com"
20
+ __license__ = "MIT"
21
+
22
+ # High-level API entries
23
+ from adcd.api import fit, discover_correction
24
+ from adcd.result import ADCDResult
25
+
26
+ # Core discovery API
27
+ from adcd.correction_orchestrator import (
28
+ CorrectionOrchestrator,
29
+ CorrectionIterationResult,
30
+ CorrectionSearchResult,
31
+ )
32
+
33
+ # Scenario definitions
34
+ from adcd.anomaly_scenarios import (
35
+ AnomalyScenario,
36
+ get_all_scenarios,
37
+ )
38
+
39
+ # Metrics
40
+ from adcd.metrics import (
41
+ evaluate_correction,
42
+ classify_structure,
43
+ bic_score,
44
+ CorrectionEvaluation,
45
+ )
46
+
47
+ # Pipeline
48
+ from adcd.pipeline import Stage1Pipeline, GateStats
49
+
50
+ # Optimiser
51
+ from adcd.jax_optimizer import JAXOptimizer, OptimizationResult
52
+
53
+ # Gates
54
+ from adcd.dimensional_checker import ASTValidator, DimensionalChecker
55
+ from adcd.arc_scorer import ARCScorer, AsymptoticRegime, build_arc_regimes
56
+
57
+ __all__ = [
58
+ # High-level API
59
+ "fit",
60
+ "discover_correction",
61
+ "ADCDResult",
62
+ # Discovery
63
+ "CorrectionOrchestrator",
64
+ "CorrectionIterationResult",
65
+ "CorrectionSearchResult",
66
+ # Scenarios
67
+ "AnomalyScenario",
68
+ "get_all_scenarios",
69
+ # Metrics
70
+ "evaluate_correction",
71
+ "classify_structure",
72
+ "bic_score",
73
+ "CorrectionEvaluation",
74
+ # Pipeline / Gates
75
+ "Stage1Pipeline",
76
+ "GateStats",
77
+ "JAXOptimizer",
78
+ "OptimizationResult",
79
+ "ASTValidator",
80
+ "DimensionalChecker",
81
+ "ARCScorer",
82
+ "AsymptoticRegime",
83
+ "build_arc_regimes",
84
+ # Metadata
85
+ "__version__",
86
+ ]
87
+
@@ -0,0 +1,393 @@
1
+ import numpy as np
2
+ import sympy as sp
3
+ from dataclasses import dataclass
4
+ from typing import Dict, List, Tuple
5
+
6
+ @dataclass
7
+ class AnomalyScenario:
8
+ name: str
9
+ tier: str # "textbook" | "cross_domain" | "synthetic"
10
+ domain: str # e.g., "gravity", "thermodynamics"
11
+
12
+ # The known classical law
13
+ classical_expr: str # e.g., "0.5 * m * v**2"
14
+ classical_variables: List[str] # e.g., ["m", "v"]
15
+ classical_constants: Dict[str, float] # e.g., {"c": 3e8}
16
+
17
+ # The hidden correction (ground truth, hidden from the pipeline)
18
+ correction_type: str # "multiplicative" or "additive"
19
+ correction_expr: str # e.g., "theta_0 * (v / c)**2"
20
+ correction_constants: Dict[str, float] # e.g., {"theta_0": 0.75}
21
+
22
+ # Physical metadata for the LLM
23
+ anomaly_regime: str # e.g., "high speeds v approaching c"
24
+ variables_with_units: Dict[str, str]
25
+ classical_limit_variable: str # e.g., "v"
26
+ classical_limit_direction: str # e.g., "0" (Δ -> 0 as v -> 0)
27
+
28
+ # Structural classification (for evaluation)
29
+ correction_class: str # "exponential" | "power_law" | "rational" | "trigonometric" | "polynomial" | "logarithmic"
30
+
31
+ def generate_data(self, n_points: int = 200, noise_level: float = 0.0, seed: int = 42) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, np.ndarray]:
32
+ """
33
+ Generates variables X, classical prediction y_classical,
34
+ noisy observation y_obs, and the corresponding residual.
35
+ """
36
+ rng = np.random.RandomState(seed)
37
+ X = {}
38
+
39
+ # 1. Generate domain-specific variables in the anomaly-sensitive regime
40
+ if self.name == "Relativistic KE" or self.name.startswith("Subtle Misspecification"):
41
+ c = self.classical_constants["c"]
42
+ X["m"] = rng.uniform(0.5, 10.0, size=n_points)
43
+ X["v"] = rng.uniform(0.1 * c, 0.85 * c, size=n_points)
44
+
45
+ elif self.name == "Yukawa Gravity":
46
+ X["m"] = rng.uniform(1.0, 10.0, size=n_points)
47
+ X["M"] = rng.uniform(10.0, 100.0, size=n_points)
48
+ X["r"] = rng.uniform(0.5, 5.0, size=n_points)
49
+
50
+ elif self.name == "Anharmonic Spring":
51
+ X["k"] = rng.uniform(5.0, 50.0, size=n_points)
52
+ X["x"] = rng.uniform(0.1, 3.0, size=n_points)
53
+
54
+ elif self.name == "Screened Coulomb":
55
+ X["q1"] = rng.uniform(1e-6, 1e-5, size=n_points)
56
+ X["q2"] = rng.uniform(1e-6, 1e-5, size=n_points)
57
+ X["r"] = rng.uniform(0.2, 4.0, size=n_points)
58
+
59
+ elif self.name == "Net Radiation":
60
+ X["A"] = rng.uniform(0.1, 2.0, size=n_points)
61
+ X["T"] = rng.uniform(250.0, 800.0, size=n_points)
62
+
63
+ elif "Nonlinear Drag" in self.name:
64
+ X["b"] = rng.uniform(0.5, 5.0, size=n_points)
65
+ X["v"] = rng.uniform(0.1, 5.0, size=n_points)
66
+
67
+ elif self.name == "Mystery-A":
68
+ X["m"] = rng.uniform(1.0, 10.0, size=n_points)
69
+ X["M"] = rng.uniform(10.0, 100.0, size=n_points)
70
+ X["r"] = rng.uniform(0.5, 5.0, size=n_points)
71
+
72
+ elif self.name == "Mystery-B":
73
+ X["m"] = rng.uniform(0.5, 5.0, size=n_points)
74
+ X["v"] = rng.uniform(0.1, 10.0, size=n_points)
75
+
76
+ elif self.name == "Mystery-C":
77
+ X["k"] = rng.uniform(10.0, 100.0, size=n_points)
78
+ X["x"] = rng.uniform(0.1, 5.0, size=n_points)
79
+
80
+ elif self.name == "Blind-1: Van der Waals":
81
+ X["n"] = rng.uniform(1.0, 5.0, size=n_points)
82
+ X["T"] = rng.uniform(250.0, 450.0, size=n_points)
83
+ X["V"] = rng.uniform(1.0, 5.0, size=n_points)
84
+
85
+ elif self.name == "Blind-2: Stokes-Einstein":
86
+ X["T"] = rng.uniform(270.0, 350.0, size=n_points)
87
+ X["r"] = rng.uniform(1.0, 5.0, size=n_points)
88
+
89
+ elif self.name == "Blind-3: Wien Displacement":
90
+ X["T"] = rng.uniform(1000.0, 6000.0, size=n_points)
91
+ X["f"] = rng.uniform(1e12, 1e14, size=n_points)
92
+
93
+ elif self.name == "Misspecification 1: Wrong Baseline Form":
94
+ X["m"] = rng.uniform(1.0, 10.0, size=n_points)
95
+ X["v"] = rng.uniform(0.1, 5.0, size=n_points)
96
+
97
+ elif self.name == "Misspecification 2: Missing Variable":
98
+ X["m"] = rng.uniform(1.0, 10.0, size=n_points)
99
+ X["g"] = np.full(n_points, 9.81)
100
+ # We generate 'v' here internally to create the ground truth,
101
+ # even though the user (classical_variables) didn't specify it.
102
+ # We must explicitly add it to local_corr_dict later.
103
+ self._hidden_v = rng.uniform(0.1, 5.0, size=n_points)
104
+
105
+ elif self.name == "Misspecification 3: Spurious Variable":
106
+ X["k"] = rng.uniform(5.0, 50.0, size=n_points)
107
+ X["x"] = rng.uniform(0.1, 3.0, size=n_points)
108
+ X["T"] = rng.uniform(250.0, 400.0, size=n_points) # Irrelevant variable
109
+
110
+ else:
111
+ # Fallback random generator for arbitrary names
112
+ for var in self.classical_variables:
113
+ X[var] = rng.uniform(1.0, 10.0, size=n_points)
114
+
115
+ # 2. Evaluate classical law
116
+ local_dict = {**X, **self.classical_constants}
117
+ y_classical = eval(self.classical_expr, {"np": np, "sp": sp}, local_dict)
118
+
119
+ # 3. Evaluate ground-truth correction
120
+ local_corr_dict = {**X, **self.classical_constants, **self.correction_constants}
121
+
122
+ # Inject hidden variables for missing variable case
123
+ if self.name == "Misspecification 2: Missing Variable":
124
+ local_corr_dict["v"] = self._hidden_v
125
+
126
+ # Safely evaluate ground truth correction
127
+ # Replace theta_X names with their actual values in the expression
128
+ expr_str = self.correction_expr
129
+ for k, v in self.correction_constants.items():
130
+ expr_str = expr_str.replace(k, str(v))
131
+
132
+ # Map exp, sin, cos, tanh to numpy counterparts
133
+ eval_env = {
134
+ "np": np,
135
+ "sp": sp,
136
+ "exp": np.exp,
137
+ "sin": np.sin,
138
+ "cos": np.cos,
139
+ "tanh": np.tanh,
140
+ "log": np.log,
141
+ "sqrt": np.sqrt
142
+ }
143
+ delta_true = eval(expr_str, eval_env, local_corr_dict)
144
+
145
+ # 4. Compute y_true
146
+ if self.correction_type == "multiplicative":
147
+ y_true = y_classical * (1.0 + delta_true)
148
+ else: # additive
149
+ y_true = y_classical + delta_true
150
+
151
+ # 5. Add observational Gaussian noise
152
+ if noise_level > 0.0:
153
+ # Multiplicative noise relative to y_true
154
+ noise = rng.normal(0, noise_level, size=n_points)
155
+ y_obs = y_true * (1.0 + noise)
156
+ else:
157
+ y_obs = y_true.copy()
158
+
159
+ # 6. Compute residual
160
+ if self.correction_type == "multiplicative":
161
+ residual = y_obs / y_classical - 1.0
162
+ else:
163
+ residual = y_obs - y_classical
164
+
165
+ return X, y_obs, y_classical, residual
166
+
167
+ def get_all_scenarios() -> List[AnomalyScenario]:
168
+ """Returns the 9 standard scenarios across the 3 tiers."""
169
+ return [
170
+ # =========================================================================
171
+ # TIER 1: Textbook Corrections (LLM seen these)
172
+ # =========================================================================
173
+ AnomalyScenario(
174
+ name="Relativistic KE",
175
+ tier="textbook",
176
+ domain="relativistic mechanics",
177
+ classical_expr="0.5 * m * v**2",
178
+ classical_variables=["m", "v"],
179
+ classical_constants={"c": 3.0e8},
180
+ correction_type="multiplicative",
181
+ correction_expr="theta_0 * (v / c)**2",
182
+ correction_constants={"theta_0": 0.75},
183
+ anomaly_regime="high speeds v approaching c",
184
+ variables_with_units={"m": "kg", "v": "m/s", "c": "m/s"},
185
+ classical_limit_variable="v",
186
+ classical_limit_direction="0",
187
+ correction_class="polynomial"
188
+ ),
189
+ AnomalyScenario(
190
+ name="Yukawa Gravity",
191
+ tier="textbook",
192
+ domain="gravitation",
193
+ classical_expr="G * m * M / r**2",
194
+ classical_variables=["m", "M", "r"],
195
+ classical_constants={"G": 6.6743e-11},
196
+ correction_type="multiplicative",
197
+ correction_expr="theta_0 * exp(-r / theta_1)",
198
+ correction_constants={"theta_0": 0.15, "theta_1": 2.5},
199
+ anomaly_regime="short distances r < 5.0",
200
+ variables_with_units={"m": "kg", "M": "kg", "r": "m", "G": "N*m^2/kg^2"},
201
+ classical_limit_variable="r",
202
+ classical_limit_direction="oo",
203
+ correction_class="exponential"
204
+ ),
205
+ AnomalyScenario(
206
+ name="Anharmonic Spring",
207
+ tier="textbook",
208
+ domain="mechanics",
209
+ classical_expr="0.5 * k * x**2",
210
+ classical_variables=["k", "x"],
211
+ classical_constants={},
212
+ correction_type="additive",
213
+ correction_expr="theta_0 * x**4",
214
+ correction_constants={"theta_0": 0.15},
215
+ anomaly_regime="large amplitude displacements x > 1.5",
216
+ variables_with_units={"k": "N/m", "x": "m"},
217
+ classical_limit_variable="x",
218
+ classical_limit_direction="0",
219
+ correction_class="polynomial"
220
+ ),
221
+
222
+ # =========================================================================
223
+ # TIER 2: Cross-Domain (known physics, unusual pairing)
224
+ # =========================================================================
225
+ AnomalyScenario(
226
+ name="Screened Coulomb",
227
+ tier="cross_domain",
228
+ domain="electrostatics",
229
+ classical_expr="k_e * q1 * q2 / r**2",
230
+ classical_variables=["q1", "q2", "r"],
231
+ classical_constants={"k_e": 8.9876e9},
232
+ correction_type="multiplicative",
233
+ correction_expr="exp(-r / theta_0) - 1.0",
234
+ correction_constants={"theta_0": 1.5},
235
+ anomaly_regime="shielded plasma environments, large distances r > 1.0",
236
+ variables_with_units={"q1": "C", "q2": "C", "r": "m", "k_e": "N*m^2/C^2"},
237
+ classical_limit_variable="r",
238
+ classical_limit_direction="0",
239
+ correction_class="exponential"
240
+ ),
241
+ AnomalyScenario(
242
+ name="Net Radiation",
243
+ tier="cross_domain",
244
+ domain="thermodynamics",
245
+ classical_expr="sigma * A * T**4",
246
+ classical_variables=["A", "T"],
247
+ classical_constants={"sigma": 5.6704e-8},
248
+ correction_type="multiplicative",
249
+ correction_expr="- (theta_0 / T)**4",
250
+ # We treat T_env = 293.15 K as theta_0 parameter
251
+ correction_constants={"theta_0": 293.15},
252
+ anomaly_regime="cool temperatures close to ambient temperature T < 500 K",
253
+ variables_with_units={"A": "m^2", "T": "K", "sigma": "W/(m^2*K^4)"},
254
+ classical_limit_variable="T",
255
+ classical_limit_direction="oo",
256
+ correction_class="power_law"
257
+ ),
258
+ AnomalyScenario(
259
+ name="Nonlinear Drag",
260
+ tier="cross_domain",
261
+ domain="fluid dynamics",
262
+ classical_expr="b * v",
263
+ classical_variables=["b", "v"],
264
+ classical_constants={},
265
+ correction_type="additive",
266
+ # F_drag = b*v + theta_0 * v**2
267
+ # residual = F_drag - b*v = theta_0 * v**2
268
+ # Enforces addition of quadratic drag at higher Reynolds numbers
269
+ correction_expr="theta_0 * v**2",
270
+ correction_constants={"theta_0": 0.25},
271
+ anomaly_regime="high speed turbulent flows v > 2.0",
272
+ variables_with_units={"b": "kg/s", "v": "m/s"},
273
+ classical_limit_variable="v",
274
+ classical_limit_direction="0",
275
+ correction_class="polynomial"
276
+ ),
277
+
278
+ # =========================================================================
279
+ # TIER 3: Synthetic / Novel (LLM has never seen these)
280
+ # =========================================================================
281
+ AnomalyScenario(
282
+ name="Mystery-A",
283
+ tier="synthetic",
284
+ domain="gravitation",
285
+ classical_expr="G * m * M / r**2",
286
+ classical_variables=["m", "M", "r"],
287
+ classical_constants={"G": 6.6743e-11},
288
+ correction_type="multiplicative",
289
+ correction_expr="-tanh(theta_0 / r)**2",
290
+ correction_constants={"theta_0": 1.2},
291
+ anomaly_regime="sub-wavelength strong gravitational fields, small r < 3.0",
292
+ variables_with_units={"m": "kg", "M": "kg", "r": "m", "G": "N*m^2/kg^2"},
293
+ classical_limit_variable="r",
294
+ classical_limit_direction="oo",
295
+ correction_class="trigonometric"
296
+ ),
297
+ AnomalyScenario(
298
+ name="Mystery-B",
299
+ tier="synthetic",
300
+ domain="mechanics",
301
+ classical_expr="0.5 * m * v**2",
302
+ classical_variables=["m", "v"],
303
+ classical_constants={},
304
+ correction_type="multiplicative",
305
+ # sinc correction function: sinc(v/v_0) - 1
306
+ correction_expr="sin(v / theta_0) / (v / theta_0) - 1.0",
307
+ correction_constants={"theta_0": 4.5},
308
+ anomaly_regime="velocity fluctuations under quantum boundary, v > 1.0",
309
+ variables_with_units={"m": "kg", "v": "m/s"},
310
+ classical_limit_variable="v",
311
+ classical_limit_direction="0",
312
+ correction_class="trigonometric"
313
+ ),
314
+ AnomalyScenario(
315
+ name="Mystery-C",
316
+ tier="synthetic",
317
+ domain="mechanics",
318
+ classical_expr="k * x",
319
+ classical_variables=["k", "x"],
320
+ classical_constants={},
321
+ correction_type="multiplicative",
322
+ correction_expr="log(1.0 + x / theta_0) / (x / theta_0) - 1.0",
323
+ correction_constants={"theta_0": 2.0},
324
+ anomaly_regime="nonlinear polymer stretching, x > 0.5",
325
+ variables_with_units={"k": "N/m", "x": "m"},
326
+ classical_limit_variable="x",
327
+ classical_limit_direction="0",
328
+ correction_class="logarithmic"
329
+ ),
330
+
331
+ # ── BLIND TEST SCENARIOS ──────────────────────────────────────────────
332
+ # Ground truth DISEMBUNYIKAN dari pipeline. Kita hanya tahu correction_class.
333
+ # Ini untuk membuktikan generalisasi di luar benchmark yang dibuat sendiri.
334
+
335
+ AnomalyScenario(
336
+ name="Blind-1: Van der Waals",
337
+ tier="blind",
338
+ domain="thermodynamics",
339
+ # Classical: Ideal gas law: P = nRT/V
340
+ # Anomaly: Van der Waals correction factor (a/V^2 pressure term)
341
+ classical_expr="n * R * T / V",
342
+ classical_variables=["n", "T", "V"],
343
+ classical_constants={"R": 8.314},
344
+ correction_type="multiplicative",
345
+ # Correction: (1 - a*n^2/V^2) factor, simplified as additive delta
346
+ correction_expr="theta_0 * n**2 / V**2",
347
+ correction_constants={"theta_0": 0.364}, # 'a' for CO2 in Pa·m^6/mol^2
348
+ anomaly_regime="high pressure / low volume gas, V < 5L",
349
+ variables_with_units={"n": "mol", "T": "K", "V": "m^3"},
350
+ classical_limit_variable="V",
351
+ classical_limit_direction="oo", # correction -> 0 as V -> infinity (ideal gas limit)
352
+ correction_class="rational"
353
+ ),
354
+ AnomalyScenario(
355
+ name="Blind-2: Stokes-Einstein",
356
+ tier="blind",
357
+ domain="biophysics",
358
+ # Classical: Einstein diffusion D = kT/(6*pi*eta*r)
359
+ # Anomaly: Shape correction factor for non-spherical particles
360
+ classical_expr="k_B * T / (6 * pi * eta * r)",
361
+ classical_variables=["T", "r"],
362
+ classical_constants={"k_B": 1.380649e-23, "pi": 3.14159, "eta": 1e-3},
363
+ correction_type="multiplicative",
364
+ # Oblate spheroid correction: (3/8)*sqrt(pi)*r^0.5 - 1 (simplified)
365
+ correction_expr="theta_0 * (r / theta_1)**0.5",
366
+ correction_constants={"theta_0": 0.15, "theta_1": 1.0},
367
+ anomaly_regime="non-spherical macromolecules, r > 5nm",
368
+ variables_with_units={"T": "K", "r": "m"},
369
+ classical_limit_variable="r",
370
+ classical_limit_direction="0",
371
+ correction_class="power_law"
372
+ ),
373
+ AnomalyScenario(
374
+ name="Blind-3: Wien Displacement",
375
+ tier="blind",
376
+ domain="quantum_optics",
377
+ # Classical: Rayleigh-Jeans law: I = 2*k*T*f^2/c^2 (low freq)
378
+ # Anomaly: Planck quantum correction
379
+ classical_expr="2 * k_B * T * f**2 / c**2",
380
+ classical_variables=["T", "f"],
381
+ classical_constants={"k_B": 1.380649e-23, "c": 3e8},
382
+ correction_type="multiplicative",
383
+ # Quantum correction: (hf/kT)/(exp(hf/kT) - 1) relative to kT/hf limit
384
+ # Simplified as: exp(-theta_0 * f / T) correction
385
+ correction_expr="exp(-theta_0 * f / T) / (1 - exp(-theta_0 * f / T)) * (theta_0 * f / T)",
386
+ correction_constants={"theta_0": 4.799e-11}, # h/k_B
387
+ anomaly_regime="high frequency UV/visible regime, f > 1e13 Hz",
388
+ variables_with_units={"T": "K", "f": "Hz"},
389
+ classical_limit_variable="f",
390
+ classical_limit_direction="0",
391
+ correction_class="exponential"
392
+ )
393
+ ]