adcd 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
adcd/api.py ADDED
@@ -0,0 +1,251 @@
1
+ import os
2
+ import numpy as np
3
+ import sympy as sp
4
+ from typing import Dict, Tuple, Optional
5
+
6
+ from adcd.anomaly_scenarios import AnomalyScenario
7
+ from adcd.llm_proposer import (
8
+ CorrectionMockProposer,
9
+ CorrectionGeminiProposer,
10
+ HybridCorrectionProposer,
11
+ )
12
+ from adcd.dimensional_checker import ASTValidator, DimensionalChecker
13
+ from adcd.arc_scorer import ARCScorer, AsymptoticRegime, build_arc_regimes
14
+ from adcd.pipeline import Stage1Pipeline
15
+ from adcd.jax_optimizer import JAXOptimizer
16
+ from adcd.correction_orchestrator import CorrectionOrchestrator
17
+ from adcd.mode_detection import detect_correction_mode
18
+ from adcd.result import ADCDResult
19
+
20
+
21
+ class CustomAnomalyScenario:
22
+ """
23
+ Duck-typed wrapper that exposes custom numpy arrays (X, y_obs, y_classical)
24
+ as an AnomalyScenario object to the ADCD orchestrator.
25
+ """
26
+ def __init__(
27
+ self,
28
+ X: Dict[str, np.ndarray],
29
+ y_obs: np.ndarray,
30
+ y_classical: np.ndarray,
31
+ classical_expr: str,
32
+ correction_type: str,
33
+ limit_variable: str,
34
+ limit_direction: str,
35
+ variables_with_units: Optional[Dict[str, str]] = None,
36
+ name: str = "Custom Dataset Run",
37
+ ):
38
+ self.name = name
39
+ self.tier = "custom"
40
+ self.domain = "custom"
41
+ self.classical_expr = classical_expr
42
+ self.classical_variables = list(X.keys())
43
+ self.classical_constants = {}
44
+ self.correction_type = correction_type
45
+ self.correction_expr = "Unknown"
46
+ self.correction_constants = {}
47
+ self.anomaly_regime = "custom"
48
+ self.variables_with_units = variables_with_units or {k: "dimensionless" for k in X.keys()}
49
+ self.classical_limit_variable = limit_variable
50
+ self.classical_limit_direction = limit_direction
51
+ self.correction_class = "unknown"
52
+ self._X = X
53
+ self._y_obs = y_obs
54
+ self._y_classical = y_classical
55
+
56
+ def generate_data(
57
+ self,
58
+ n_points: int = 200,
59
+ noise_level: float = 0.0,
60
+ seed: int = 42,
61
+ ) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, np.ndarray]:
62
+ if self.correction_type == "multiplicative":
63
+ safe_classical = np.where(self._y_classical == 0, 1e-15, self._y_classical)
64
+ residual = self._y_obs / safe_classical - 1.0
65
+ else:
66
+ residual = self._y_obs - self._y_classical
67
+ return self._X, self._y_obs, self._y_classical, residual
68
+
69
+
70
+ def fit(
71
+ X: Dict[str, np.ndarray],
72
+ y_obs: np.ndarray,
73
+ y_classical: np.ndarray,
74
+ limit_variable: Optional[str] = None,
75
+ limit_direction: str = "0",
76
+ classical_expr: str = "0",
77
+ variables_with_units: Optional[Dict[str, str]] = None,
78
+ correction_mode: str = "auto",
79
+ max_iterations: int = 5,
80
+ proposer: str = "mock",
81
+ api_key: Optional[str] = None,
82
+ verbose: bool = True,
83
+ seed: int = 42,
84
+ scenario_name: str = "Custom Dataset Run",
85
+ ) -> ADCDResult:
86
+ """
87
+ Fit a physical correction term to an observed anomaly dataset.
88
+
89
+ Args:
90
+ X: Dictionary of independent variable arrays, e.g. {"v": array, "m": array}
91
+ y_obs: Observed outputs (containing anomaly)
92
+ y_classical: Classical theory predictions
93
+ limit_variable: The variable governing the asymptotic classical limit
94
+ limit_direction: The direction of the limit ("0" or "oo")
95
+ classical_expr: Formula of the classical law (for LLM context)
96
+ variables_with_units: Dictionary of variables and units (e.g. {"v": "m/s"})
97
+ correction_mode: "additive", "multiplicative", or "auto" (automatically detected)
98
+ max_iterations: Max number of discovery iterations
99
+ proposer: The proposer backend ("mock", "gemini", "hybrid")
100
+ api_key: LLM API key (falls back to GEMINI_API_KEY env variable)
101
+ verbose: Print progress logs during optimization
102
+ seed: Random seed for repeatability
103
+
104
+ Returns:
105
+ ADCDResult wrapping the discovery outcomes and visualization helpers.
106
+ """
107
+ # 1. Clean input shapes
108
+ for k, v in X.items():
109
+ X[k] = np.asarray(v, dtype=float)
110
+ y_obs = np.asarray(y_obs, dtype=float)
111
+ y_classical = np.asarray(y_classical, dtype=float)
112
+
113
+ # 2. Handle auto-mode detection
114
+ if correction_mode == "auto":
115
+ mode, confidence = detect_correction_mode(y_obs, y_classical)
116
+ if verbose:
117
+ print(f"[ADCD Auto-Mode] Detected {mode} correction with confidence {confidence:.2f}")
118
+ else:
119
+ mode = correction_mode
120
+
121
+ # 3. Handle limit variable fallback and parsing
122
+ if limit_variable is None:
123
+ limit_vars = [list(X.keys())[0]]
124
+ if verbose:
125
+ print(f"[ADCD Warning] limit_variable not specified. Defaulting to first key: '{limit_vars[0]}'")
126
+ elif isinstance(limit_variable, str):
127
+ limit_vars = [v.strip() for v in limit_variable.split(",")]
128
+ elif isinstance(limit_variable, (list, tuple)):
129
+ limit_vars = [str(v).strip() for v in limit_variable]
130
+ else:
131
+ limit_vars = [list(X.keys())[0]]
132
+
133
+ if isinstance(limit_direction, str):
134
+ limit_dirs = [d.strip() for d in limit_direction.split(",")]
135
+ elif isinstance(limit_direction, (list, tuple)):
136
+ limit_dirs = [str(d).strip() for d in limit_direction]
137
+ else:
138
+ limit_dirs = ["0"]
139
+
140
+ # Match lengths
141
+ if len(limit_dirs) < len(limit_vars):
142
+ limit_dirs.extend([limit_dirs[-1]] * (len(limit_vars) - len(limit_dirs)))
143
+ elif len(limit_dirs) > len(limit_vars):
144
+ limit_dirs = limit_dirs[:len(limit_vars)]
145
+
146
+ limit_var_str = ",".join(limit_vars)
147
+ limit_dir_str = ",".join(limit_dirs)
148
+
149
+ # 4. Construct virtual scenario
150
+ scenario = CustomAnomalyScenario(
151
+ X=X,
152
+ y_obs=y_obs,
153
+ y_classical=y_classical,
154
+ classical_expr=classical_expr,
155
+ correction_type=mode,
156
+ limit_variable=limit_var_str,
157
+ limit_direction=limit_dir_str,
158
+ variables_with_units=variables_with_units,
159
+ name=scenario_name,
160
+ )
161
+
162
+ # 5. Build proposer
163
+ if proposer == "mock":
164
+ proposer_obj = CorrectionMockProposer(seed=seed)
165
+ elif proposer == "gemini":
166
+ key = api_key or os.environ.get("GEMINI_API_KEY")
167
+ if not key:
168
+ raise ValueError("API key must be provided via `api_key` or GEMINI_API_KEY env var.")
169
+ proposer_obj = CorrectionGeminiProposer(api_key=key)
170
+ elif proposer == "hybrid":
171
+ key = api_key or os.environ.get("GEMINI_API_KEY")
172
+ if not key:
173
+ raise ValueError("API key must be provided via `api_key` or GEMINI_API_KEY env var.")
174
+ proposer_obj = HybridCorrectionProposer(api_key=key)
175
+ else:
176
+ raise ValueError(f"Unknown proposer type: '{proposer}'")
177
+
178
+ # 6. Configure pipeline
179
+ validator = ASTValidator()
180
+ checker = DimensionalChecker()
181
+
182
+ regimes = build_arc_regimes(limit_var_str, limit_dir_str)
183
+ scorer = ARCScorer(regimes=regimes)
184
+ pipeline = Stage1Pipeline(validator, checker, scorer)
185
+ optimizer = JAXOptimizer()
186
+
187
+ orchestrator = CorrectionOrchestrator(
188
+ proposer=proposer_obj,
189
+ pipeline=pipeline,
190
+ optimizer=optimizer,
191
+ max_iterations=max_iterations,
192
+ verbose=verbose
193
+ )
194
+
195
+ search_result = orchestrator.search_correction(scenario, seed=seed)
196
+
197
+ return ADCDResult(
198
+ search_result=search_result,
199
+ scenario=scenario,
200
+ X=X,
201
+ y_obs=y_obs,
202
+ y_classical=y_classical,
203
+ )
204
+
205
+
206
+ def discover_correction(
207
+ scenario: AnomalyScenario,
208
+ noise_level: float = 0.0,
209
+ max_iterations: int = 5,
210
+ proposer: str = "mock",
211
+ correction_mode: str = "auto",
212
+ api_key: Optional[str] = None,
213
+ verbose: bool = True,
214
+ seed: int = 42,
215
+ ) -> ADCDResult:
216
+ """
217
+ Run ADCD correction discovery on a pre-defined AnomalyScenario.
218
+
219
+ Args:
220
+ scenario: The AnomalyScenario to run
221
+ noise_level: Noise level to apply (0.0 to 1.0)
222
+ max_iterations: Max iterations for discovery search
223
+ proposer: Proposer type ("mock", "gemini", "hybrid")
224
+ correction_mode: "additive", "multiplicative", or "auto" (default: auto)
225
+ api_key: LLM API key
226
+ verbose: Print progress logs
227
+ seed: Random seed
228
+
229
+ Returns:
230
+ ADCDResult wrapping discovery outcome.
231
+ """
232
+ # Generate scenario data
233
+ X, y_obs, y_classical, _ = scenario.generate_data(noise_level=noise_level, seed=seed)
234
+
235
+ # Route directly to fit()
236
+ return fit(
237
+ X=X,
238
+ y_obs=y_obs,
239
+ y_classical=y_classical,
240
+ limit_variable=scenario.classical_limit_variable,
241
+ limit_direction=scenario.classical_limit_direction,
242
+ classical_expr=scenario.classical_expr,
243
+ variables_with_units=scenario.variables_with_units,
244
+ correction_mode=correction_mode,
245
+ max_iterations=max_iterations,
246
+ proposer=proposer,
247
+ api_key=api_key,
248
+ verbose=verbose,
249
+ seed=seed,
250
+ scenario_name=scenario.name,
251
+ )
adcd/arc_scorer.py ADDED
@@ -0,0 +1,198 @@
1
+ import logging
2
+ from dataclasses import dataclass
3
+ from typing import List, Union, Any, Dict, Sequence
4
+ import numpy as np
5
+ import sympy as sp
6
+
7
+ # Konfigurasi Logging Terstruktur
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger("ARCScorer")
10
+
11
+
12
+ @dataclass
13
+ class AsymptoticRegime:
14
+ """
15
+ Representasi formal dari kondisi batas fisik asimtotik (Regime Bounds).
16
+ R_k = (variable, limit_target, ground_truth_expression, importance_weight)
17
+ """
18
+ variable: Union[str, sp.Symbol]
19
+ limit_target: Any # Bisa angka numerik (0, 1) or sp.oo / -sp.oo
20
+ ground_truth_expr: Union[str, sp.Expr]
21
+ weight: float = 1.0
22
+
23
+ def __post_init__(self):
24
+ # Konversi string ke objek simbolik SymPy secara otomatis jika diperlukan
25
+ if isinstance(self.variable, str):
26
+ self.variable = sp.Symbol(self.variable)
27
+ if isinstance(self.ground_truth_expr, str):
28
+ self.ground_truth_expr = sp.sympify(self.ground_truth_expr)
29
+
30
+
31
+ def calculate_similarity(expr1: sp.Expr, expr2: sp.Expr) -> float:
32
+ """
33
+ Mengevaluasi kesamaan matematis struktural antara dua ekspresi aljabar
34
+ menggunakan arsitektur verifikasi Three-Tier (Symbolic -> Divergence -> Numerical).
35
+ """
36
+ # --- TIER 1: VERIFIKASI SIMBOLIK EKSAK ---
37
+ try:
38
+ diff = sp.simplify(expr1 - expr2)
39
+ if diff == 0:
40
+ return 1.0
41
+ except Exception as e:
42
+ logger.debug(f"Tier 1 simplification split failed: {e}")
43
+
44
+ # --- TIER 3: DETEKSI DIVERGENSI (HARD FAILURE GATE) ---
45
+ # Jika salah satu menuju tak hingga/tak terdefinisi sedangkan yang lain bernilai konstan
46
+ inf_tokens = [sp.oo, -sp.oo, sp.zoo]
47
+ is_inf1 = expr1 in inf_tokens or getattr(expr1, "is_infinite", False)
48
+ is_inf2 = expr2 in inf_tokens or getattr(expr2, "is_infinite", False)
49
+
50
+ if is_inf1 != is_inf2:
51
+ return 0.0
52
+ if is_inf1 and is_inf2:
53
+ return 1.0 if expr1 == expr2 else 0.0
54
+
55
+ # --- TIER 2: EVALUASI KEDEKATAN NUMERIK (FALLBACK STRATEGY) ---
56
+ # Jika penyederhanaan aljabar buntu akibat fungsi transendental non-elementer,
57
+ # lakukan sampling 100 titik acak pada variabel konstanta fisis tersisa (e.g., m, c, G, M).
58
+ free_symbols = expr1.free_symbols.union(expr2.free_symbols)
59
+
60
+ if not free_symbols:
61
+ try:
62
+ val1 = float(expr1.evalf())
63
+ val2 = float(expr2.evalf())
64
+ if np.isnan(val1) or np.isnan(val2):
65
+ return 0.0
66
+ rel_error = abs(val1 - val2) / (abs(val2) + 1e-9)
67
+ return float(np.exp(-rel_error))
68
+ except Exception:
69
+ return 0.0
70
+
71
+ # Generator angka acak yang konsisten (seeded untuk stabilitas testing)
72
+ rng = np.random.default_rng(42)
73
+ symbols_list = list(free_symbols)
74
+ errors = []
75
+
76
+ for _ in range(100):
77
+ # Berikan nilai fisis positif acak yang masuk akal [0.5, 2.0] untuk parameter tersisa
78
+ sample_vals = rng.uniform(0.5, 2.0, size=len(symbols_list))
79
+ subs_dict = dict(zip(symbols_list, sample_vals))
80
+
81
+ try:
82
+ val1 = float(expr1.subs(subs_dict).evalf())
83
+ val2 = float(expr2.subs(subs_dict).evalf())
84
+
85
+ if np.isinf(val1) or np.isinf(val2) or np.isnan(val1) or np.isnan(val2):
86
+ return 0.0
87
+
88
+ rel_error = abs(val1 - val2) / (abs(val2) + 1e-9)
89
+ errors.append(rel_error)
90
+ except Exception:
91
+ return 0.0
92
+
93
+ if not errors:
94
+ return 0.0
95
+
96
+ mean_relative_error = np.mean(errors)
97
+ return float(np.exp(-mean_relative_error))
98
+
99
+
100
+ def _parse_limit_tokens(
101
+ limit_variables: Union[str, Sequence[str]],
102
+ limit_directions: Union[str, Sequence[str]],
103
+ ) -> tuple[List[str], List[str]]:
104
+ """Parse comma-separated or sequence limit specs into aligned variable/direction lists."""
105
+ if isinstance(limit_variables, str):
106
+ vars_list = [v.strip() for v in limit_variables.split(",") if v.strip()]
107
+ else:
108
+ vars_list = [str(v).strip() for v in limit_variables]
109
+
110
+ if isinstance(limit_directions, str):
111
+ dirs_list = [d.strip() for d in limit_directions.split(",") if d.strip()]
112
+ else:
113
+ dirs_list = [str(d).strip() for d in limit_directions]
114
+
115
+ if not vars_list:
116
+ raise ValueError("At least one limit variable is required.")
117
+
118
+ if not dirs_list:
119
+ dirs_list = ["0"]
120
+ if len(dirs_list) < len(vars_list):
121
+ dirs_list.extend([dirs_list[-1]] * (len(vars_list) - len(dirs_list)))
122
+ elif len(dirs_list) > len(vars_list):
123
+ dirs_list = dirs_list[: len(vars_list)]
124
+
125
+ return vars_list, dirs_list
126
+
127
+
128
+ def build_arc_regimes(
129
+ limit_variables: Union[str, Sequence[str]],
130
+ limit_directions: Union[str, Sequence[str]] = "0",
131
+ ground_truth_expr: Union[str, sp.Expr] = "0",
132
+ weight: float = 1.0,
133
+ ) -> List[AsymptoticRegime]:
134
+ """
135
+ Build ARC asymptotic regimes for one or more limit variables.
136
+
137
+ Supports multi-variable corrections Δ(x₁, x₂, …) by specifying comma-separated
138
+ limits, e.g. limit_variables="x,y" and limit_directions="0,oo".
139
+ """
140
+ vars_list, dirs_list = _parse_limit_tokens(limit_variables, limit_directions)
141
+ regimes: List[AsymptoticRegime] = []
142
+ for var, direction in zip(vars_list, dirs_list):
143
+ limit_target = sp.oo if direction == "oo" else 0
144
+ regimes.append(
145
+ AsymptoticRegime(
146
+ variable=sp.Symbol(var),
147
+ limit_target=limit_target,
148
+ ground_truth_expr=ground_truth_expr,
149
+ weight=weight,
150
+ )
151
+ )
152
+ return regimes
153
+
154
+
155
+ class ARCScorer:
156
+ """
157
+ Mesin utama Stage 1 Gatekeeper untuk menghitung bobot kelayakan
158
+ struktur asimtotik formula kandidat dari LLM sebelum diteruskan ke graf JAX.
159
+ """
160
+ def __init__(self, regimes: List[AsymptoticRegime]):
161
+ if not regimes:
162
+ raise ValueError("Daftar kondisi batas (regimes) tidak boleh kosong.")
163
+ self.regimes = regimes
164
+ self.total_weight = sum(r.weight for r in regimes)
165
+
166
+ def score(self, candidate_expr: Union[str, sp.Expr], constants: Dict[str, float] = None) -> float:
167
+ """
168
+ Menghitung nilai akhir ARC Score untuk satu kandidat fungsi.
169
+ Menggunakan evaluasi limit matematis murni tanpa pencocokan string biasa.
170
+ """
171
+ try:
172
+ candidate = sp.sympify(candidate_expr)
173
+ if constants:
174
+ subs_dict = {sp.Symbol(k): v for k, v in constants.items() if sp.Symbol(k) in candidate.free_symbols}
175
+ if subs_dict:
176
+ candidate = candidate.subs(subs_dict)
177
+ except Exception as e:
178
+ logger.error(f"Gagal memproses sintaks ekspresi kandidat: {e}")
179
+ return 0.0
180
+
181
+ weighted_similarity_sum = 0.0
182
+
183
+ for r in self.regimes:
184
+ try:
185
+ # Menggunakan mesin limit internal SymPy yang kokoh
186
+ evaluated_limit = sp.limit(candidate, r.variable, r.limit_target)
187
+
188
+ # Hitung skor kedekatan fisis limit kandidat vs ground truth boundary
189
+ similarity = calculate_similarity(evaluated_limit, r.ground_truth_expr)
190
+ weighted_similarity_sum += r.weight * similarity
191
+
192
+ logger.debug(f"Regime {r.variable}->{r.limit_target} | Limit: {evaluated_limit} | Sim: {similarity}")
193
+ except Exception as e:
194
+ # Kasus kegagalan matematis kritis (seperti PoleError) langsung diberi penalti 0
195
+ logger.warning(f"Kegagalan komputasi limit pada variabel {r.variable}: {e}")
196
+ continue
197
+
198
+ return weighted_similarity_sum / self.total_weight
@@ -0,0 +1,97 @@
1
+ import sympy as sp
2
+ import numpy as np
3
+ from typing import Dict, Tuple
4
+
5
+ DEFAULT_CONSTANTS = {
6
+ 'c': 3.0e8,
7
+ 'G': 6.6743e-11,
8
+ 'M': 1.989e30,
9
+ }
10
+
11
+ class CoarseEvaluator:
12
+ """
13
+ Evaluates the empirical accuracy (MSE and Normalized MSE) of candidate
14
+ equations on observed physical datasets using high-speed lambdified numpy arrays.
15
+
16
+ Example:
17
+ >>> evaluator = CoarseEvaluator(X={"x": np.array([1, 2, 3])}, y_obs=np.array([2, 4, 6]))
18
+ >>> mse, nmse = evaluator.evaluate(sp.sympify("2 * x"))
19
+ """
20
+ def __init__(self, X: Dict[str, np.ndarray], y_obs: np.ndarray, constants: Dict[str, float] = None):
21
+ if not X:
22
+ raise ValueError("Dataset X tidak boleh kosong.")
23
+ self.X = X
24
+ self.y_obs = y_obs
25
+ self.constants = constants if constants is not None else DEFAULT_CONSTANTS
26
+
27
+ # Calculate variance of y_obs with safeguard for trivial datasets
28
+ self.y_var = float(np.var(y_obs))
29
+ if self.y_var < 1e-10:
30
+ self.y_var = 1e-10
31
+
32
+ # Determine shape of dataset from first array
33
+ self.data_shape = next(iter(X.values())).shape
34
+
35
+ def evaluate(self, expr: sp.Expr, has_params: bool = False) -> Tuple[float, float]:
36
+ """
37
+ Evaluates the candidate SymPy expression on the dataset.
38
+
39
+ Args:
40
+ expr: The SymPy expression to evaluate.
41
+ has_params: If True, scales the prediction to fit the observation (1D OLS).
42
+
43
+ Returns:
44
+ Tuple of (MSE, NMSE). Returns (inf, inf) if any numerical overflow/error occurs.
45
+
46
+ Example:
47
+ >>> mse, nmse = evaluator.evaluate(sp.sympify("theta_0 * x"), has_params=True)
48
+ """
49
+ free_syms = list(expr.free_symbols)
50
+ sym_names = [str(sym) for sym in free_syms]
51
+
52
+ # Map each free symbol in the expression to its array or constant value
53
+ args = []
54
+ for name in sym_names:
55
+ if name in self.X:
56
+ args.append(self.X[name])
57
+ elif name in self.constants:
58
+ # Broadcast constant value to match the data shape
59
+ args.append(np.full(self.data_shape, self.constants[name]))
60
+ else:
61
+ # Unknown variable/constant in expression -> hard failure
62
+ return float('inf'), float('inf')
63
+
64
+ try:
65
+ # Vectorized lambda compilation
66
+ f = sp.lambdify(free_syms, expr, modules=["numpy"])
67
+
68
+ # Execute model prediction
69
+ y_pred = f(*args)
70
+
71
+ # Protect against non-numpy array returns (e.g. constant expression like "5.0")
72
+ if not isinstance(y_pred, np.ndarray):
73
+ y_pred = np.full(self.data_shape, float(y_pred))
74
+
75
+ # Clean check for invalid numerical outputs (inf, NaN, complex numbers)
76
+ if np.any(np.isinf(y_pred)) or np.any(np.isnan(y_pred)) or np.iscomplexobj(y_pred):
77
+ return float('inf'), float('inf')
78
+
79
+ # Scale prediction to match observed target scale (1D OLS)
80
+ if has_params:
81
+ try:
82
+ denom = float(np.dot(y_pred, y_pred))
83
+ if denom > 1e-30:
84
+ optimal_scale = float(np.dot(y_pred, self.y_obs)) / denom
85
+ y_pred = optimal_scale * y_pred
86
+ except Exception:
87
+ pass
88
+
89
+ # Calculate MSE and scale-invariant NMSE
90
+ mse = float(np.mean((y_pred - self.y_obs) ** 2))
91
+ nmse = mse / self.y_var
92
+
93
+ return mse, nmse
94
+
95
+ except Exception:
96
+ # Catch division by zero, domain errors, overflow, etc.
97
+ return float('inf'), float('inf')