PyPI - adcd - Versions diffs - 2.1.2__py3-none-any.whl - Mend

adcd 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

adcd/__init__.py +87 -0
adcd/anomaly_scenarios.py +393 -0
adcd/api.py +251 -0
adcd/arc_scorer.py +198 -0
adcd/coarse_evaluator.py +97 -0
adcd/correction_orchestrator.py +482 -0
adcd/dimensional_checker.py +210 -0
adcd/feynman_dataset.py +502 -0
adcd/jax_optimizer.py +428 -0
adcd/llm_proposer.py +993 -0
adcd/metrics.py +300 -0
adcd/mode_detection.py +72 -0
adcd/orchestrator.py +277 -0
adcd/pipeline.py +205 -0
adcd/real_data_loader.py +310 -0
adcd/real_scenarios.py +162 -0
adcd/residual_analyzer.py +124 -0
adcd/result.py +291 -0
adcd-2.1.2.dist-info/METADATA +295 -0
adcd-2.1.2.dist-info/RECORD +24 -0
adcd-2.1.2.dist-info/WHEEL +5 -0
adcd-2.1.2.dist-info/entry_points.txt +2 -0
adcd-2.1.2.dist-info/licenses/LICENSE +21 -0
adcd-2.1.2.dist-info/top_level.txt +1 -0

adcd/api.py ADDED Viewed

@@ -0,0 +1,251 @@
+import os
+import numpy as np
+import sympy as sp
+from typing import Dict, Tuple, Optional
+from adcd.anomaly_scenarios import AnomalyScenario
+from adcd.llm_proposer import (
+    CorrectionMockProposer,
+    CorrectionGeminiProposer,
+    HybridCorrectionProposer,
+)
+from adcd.dimensional_checker import ASTValidator, DimensionalChecker
+from adcd.arc_scorer import ARCScorer, AsymptoticRegime, build_arc_regimes
+from adcd.pipeline import Stage1Pipeline
+from adcd.jax_optimizer import JAXOptimizer
+from adcd.correction_orchestrator import CorrectionOrchestrator
+from adcd.mode_detection import detect_correction_mode
+from adcd.result import ADCDResult
+class CustomAnomalyScenario:
+    """
+    Duck-typed wrapper that exposes custom numpy arrays (X, y_obs, y_classical)
+    as an AnomalyScenario object to the ADCD orchestrator.
+    """
+    def __init__(
+        self,
+        X: Dict[str, np.ndarray],
+        y_obs: np.ndarray,
+        y_classical: np.ndarray,
+        classical_expr: str,
+        correction_type: str,
+        limit_variable: str,
+        limit_direction: str,
+        variables_with_units: Optional[Dict[str, str]] = None,
+        name: str = "Custom Dataset Run",
+    ):
+        self.name = name
+        self.tier = "custom"
+        self.domain = "custom"
+        self.classical_expr = classical_expr
+        self.classical_variables = list(X.keys())
+        self.classical_constants = {}
+        self.correction_type = correction_type
+        self.correction_expr = "Unknown"
+        self.correction_constants = {}
+        self.anomaly_regime = "custom"
+        self.variables_with_units = variables_with_units or {k: "dimensionless" for k in X.keys()}
+        self.classical_limit_variable = limit_variable
+        self.classical_limit_direction = limit_direction
+        self.correction_class = "unknown"
+        self._X = X
+        self._y_obs = y_obs
+        self._y_classical = y_classical
+    def generate_data(
+        self,
+        n_points: int = 200,
+        noise_level: float = 0.0,
+        seed: int = 42,
+    ) -> Tuple[Dict[str, np.ndarray], np.ndarray, np.ndarray, np.ndarray]:
+        if self.correction_type == "multiplicative":
+            safe_classical = np.where(self._y_classical == 0, 1e-15, self._y_classical)
+            residual = self._y_obs / safe_classical - 1.0
+        else:
+            residual = self._y_obs - self._y_classical
+        return self._X, self._y_obs, self._y_classical, residual
+def fit(
+    X: Dict[str, np.ndarray],
+    y_obs: np.ndarray,
+    y_classical: np.ndarray,
+    limit_variable: Optional[str] = None,
+    limit_direction: str = "0",
+    classical_expr: str = "0",
+    variables_with_units: Optional[Dict[str, str]] = None,
+    correction_mode: str = "auto",
+    max_iterations: int = 5,
+    proposer: str = "mock",
+    api_key: Optional[str] = None,
+    verbose: bool = True,
+    seed: int = 42,
+    scenario_name: str = "Custom Dataset Run",
+) -> ADCDResult:
+    """
+    Fit a physical correction term to an observed anomaly dataset.
+    Args:
+        X: Dictionary of independent variable arrays, e.g. {"v": array, "m": array}
+        y_obs: Observed outputs (containing anomaly)
+        y_classical: Classical theory predictions
+        limit_variable: The variable governing the asymptotic classical limit
+        limit_direction: The direction of the limit ("0" or "oo")
+        classical_expr: Formula of the classical law (for LLM context)
+        variables_with_units: Dictionary of variables and units (e.g. {"v": "m/s"})
+        correction_mode: "additive", "multiplicative", or "auto" (automatically detected)
+        max_iterations: Max number of discovery iterations
+        proposer: The proposer backend ("mock", "gemini", "hybrid")
+        api_key: LLM API key (falls back to GEMINI_API_KEY env variable)
+        verbose: Print progress logs during optimization
+        seed: Random seed for repeatability
+    Returns:
+        ADCDResult wrapping the discovery outcomes and visualization helpers.
+    """
+    # 1. Clean input shapes
+    for k, v in X.items():
+        X[k] = np.asarray(v, dtype=float)
+    y_obs = np.asarray(y_obs, dtype=float)
+    y_classical = np.asarray(y_classical, dtype=float)
+    # 2. Handle auto-mode detection
+    if correction_mode == "auto":
+        mode, confidence = detect_correction_mode(y_obs, y_classical)
+        if verbose:
+            print(f"[ADCD Auto-Mode] Detected {mode} correction with confidence {confidence:.2f}")
+    else:
+        mode = correction_mode
+    # 3. Handle limit variable fallback and parsing
+    if limit_variable is None:
+        limit_vars = [list(X.keys())[0]]
+        if verbose:
+            print(f"[ADCD Warning] limit_variable not specified. Defaulting to first key: '{limit_vars[0]}'")
+    elif isinstance(limit_variable, str):
+        limit_vars = [v.strip() for v in limit_variable.split(",")]
+    elif isinstance(limit_variable, (list, tuple)):
+        limit_vars = [str(v).strip() for v in limit_variable]
+    else:
+        limit_vars = [list(X.keys())[0]]
+    if isinstance(limit_direction, str):
+        limit_dirs = [d.strip() for d in limit_direction.split(",")]
+    elif isinstance(limit_direction, (list, tuple)):
+        limit_dirs = [str(d).strip() for d in limit_direction]
+    else:
+        limit_dirs = ["0"]
+    # Match lengths
+    if len(limit_dirs) < len(limit_vars):
+        limit_dirs.extend([limit_dirs[-1]] * (len(limit_vars) - len(limit_dirs)))
+    elif len(limit_dirs) > len(limit_vars):
+        limit_dirs = limit_dirs[:len(limit_vars)]
+    limit_var_str = ",".join(limit_vars)
+    limit_dir_str = ",".join(limit_dirs)
+    # 4. Construct virtual scenario
+    scenario = CustomAnomalyScenario(
+        X=X,
+        y_obs=y_obs,
+        y_classical=y_classical,
+        classical_expr=classical_expr,
+        correction_type=mode,
+        limit_variable=limit_var_str,
+        limit_direction=limit_dir_str,
+        variables_with_units=variables_with_units,
+        name=scenario_name,
+    )
+    # 5. Build proposer
+    if proposer == "mock":
+        proposer_obj = CorrectionMockProposer(seed=seed)
+    elif proposer == "gemini":
+        key = api_key or os.environ.get("GEMINI_API_KEY")
+        if not key:
+            raise ValueError("API key must be provided via `api_key` or GEMINI_API_KEY env var.")
+        proposer_obj = CorrectionGeminiProposer(api_key=key)
+    elif proposer == "hybrid":
+        key = api_key or os.environ.get("GEMINI_API_KEY")
+        if not key:
+            raise ValueError("API key must be provided via `api_key` or GEMINI_API_KEY env var.")
+        proposer_obj = HybridCorrectionProposer(api_key=key)
+    else:
+        raise ValueError(f"Unknown proposer type: '{proposer}'")
+    # 6. Configure pipeline
+    validator = ASTValidator()
+    checker = DimensionalChecker()
+    regimes = build_arc_regimes(limit_var_str, limit_dir_str)
+    scorer = ARCScorer(regimes=regimes)
+    pipeline = Stage1Pipeline(validator, checker, scorer)
+    optimizer = JAXOptimizer()
+    orchestrator = CorrectionOrchestrator(
+        proposer=proposer_obj,
+        pipeline=pipeline,
+        optimizer=optimizer,
+        max_iterations=max_iterations,
+        verbose=verbose
+    )
+    search_result = orchestrator.search_correction(scenario, seed=seed)
+    return ADCDResult(
+        search_result=search_result,
+        scenario=scenario,
+        X=X,
+        y_obs=y_obs,
+        y_classical=y_classical,
+    )
+def discover_correction(
+    scenario: AnomalyScenario,
+    noise_level: float = 0.0,
+    max_iterations: int = 5,
+    proposer: str = "mock",
+    correction_mode: str = "auto",
+    api_key: Optional[str] = None,
+    verbose: bool = True,
+    seed: int = 42,
+) -> ADCDResult:
+    """
+    Run ADCD correction discovery on a pre-defined AnomalyScenario.
+    Args:
+        scenario: The AnomalyScenario to run
+        noise_level: Noise level to apply (0.0 to 1.0)
+        max_iterations: Max iterations for discovery search
+        proposer: Proposer type ("mock", "gemini", "hybrid")
+        correction_mode: "additive", "multiplicative", or "auto" (default: auto)
+        api_key: LLM API key
+        verbose: Print progress logs
+        seed: Random seed
+    Returns:
+        ADCDResult wrapping discovery outcome.
+    """
+    # Generate scenario data
+    X, y_obs, y_classical, _ = scenario.generate_data(noise_level=noise_level, seed=seed)
+    # Route directly to fit()
+    return fit(
+        X=X,
+        y_obs=y_obs,
+        y_classical=y_classical,
+        limit_variable=scenario.classical_limit_variable,
+        limit_direction=scenario.classical_limit_direction,
+        classical_expr=scenario.classical_expr,
+        variables_with_units=scenario.variables_with_units,
+        correction_mode=correction_mode,
+        max_iterations=max_iterations,
+        proposer=proposer,
+        api_key=api_key,
+        verbose=verbose,
+        seed=seed,
+        scenario_name=scenario.name,
+    )

adcd/arc_scorer.py ADDED Viewed

@@ -0,0 +1,198 @@
+import logging
+from dataclasses import dataclass
+from typing import List, Union, Any, Dict, Sequence
+import numpy as np
+import sympy as sp
+# Konfigurasi Logging Terstruktur
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("ARCScorer")
+@dataclass
+class AsymptoticRegime:
+    """
+    Representasi formal dari kondisi batas fisik asimtotik (Regime Bounds).
+    R_k = (variable, limit_target, ground_truth_expression, importance_weight)
+    """
+    variable: Union[str, sp.Symbol]
+    limit_target: Any  # Bisa angka numerik (0, 1) or sp.oo / -sp.oo
+    ground_truth_expr: Union[str, sp.Expr]
+    weight: float = 1.0
+    def __post_init__(self):
+        # Konversi string ke objek simbolik SymPy secara otomatis jika diperlukan
+        if isinstance(self.variable, str):
+            self.variable = sp.Symbol(self.variable)
+        if isinstance(self.ground_truth_expr, str):
+            self.ground_truth_expr = sp.sympify(self.ground_truth_expr)
+def calculate_similarity(expr1: sp.Expr, expr2: sp.Expr) -> float:
+    """
+    Mengevaluasi kesamaan matematis struktural antara dua ekspresi aljabar
+    menggunakan arsitektur verifikasi Three-Tier (Symbolic -> Divergence -> Numerical).
+    """
+    # --- TIER 1: VERIFIKASI SIMBOLIK EKSAK ---
+    try:
+        diff = sp.simplify(expr1 - expr2)
+        if diff == 0:
+            return 1.0
+    except Exception as e:
+        logger.debug(f"Tier 1 simplification split failed: {e}")
+    # --- TIER 3: DETEKSI DIVERGENSI (HARD FAILURE GATE) ---
+    # Jika salah satu menuju tak hingga/tak terdefinisi sedangkan yang lain bernilai konstan
+    inf_tokens = [sp.oo, -sp.oo, sp.zoo]
+    is_inf1 = expr1 in inf_tokens or getattr(expr1, "is_infinite", False)
+    is_inf2 = expr2 in inf_tokens or getattr(expr2, "is_infinite", False)
+    if is_inf1 != is_inf2:
+        return 0.0
+    if is_inf1 and is_inf2:
+        return 1.0 if expr1 == expr2 else 0.0
+    # --- TIER 2: EVALUASI KEDEKATAN NUMERIK (FALLBACK STRATEGY) ---
+    # Jika penyederhanaan aljabar buntu akibat fungsi transendental non-elementer,
+    # lakukan sampling 100 titik acak pada variabel konstanta fisis tersisa (e.g., m, c, G, M).
+    free_symbols = expr1.free_symbols.union(expr2.free_symbols)
+    if not free_symbols:
+        try:
+            val1 = float(expr1.evalf())
+            val2 = float(expr2.evalf())
+            if np.isnan(val1) or np.isnan(val2):
+                return 0.0
+            rel_error = abs(val1 - val2) / (abs(val2) + 1e-9)
+            return float(np.exp(-rel_error))
+        except Exception:
+            return 0.0
+    # Generator angka acak yang konsisten (seeded untuk stabilitas testing)
+    rng = np.random.default_rng(42)
+    symbols_list = list(free_symbols)
+    errors = []
+    for _ in range(100):
+        # Berikan nilai fisis positif acak yang masuk akal [0.5, 2.0] untuk parameter tersisa
+        sample_vals = rng.uniform(0.5, 2.0, size=len(symbols_list))
+        subs_dict = dict(zip(symbols_list, sample_vals))
+        try:
+            val1 = float(expr1.subs(subs_dict).evalf())
+            val2 = float(expr2.subs(subs_dict).evalf())
+            if np.isinf(val1) or np.isinf(val2) or np.isnan(val1) or np.isnan(val2):
+                return 0.0
+            rel_error = abs(val1 - val2) / (abs(val2) + 1e-9)
+            errors.append(rel_error)
+        except Exception:
+            return 0.0
+    if not errors:
+        return 0.0
+    mean_relative_error = np.mean(errors)
+    return float(np.exp(-mean_relative_error))
+def _parse_limit_tokens(
+    limit_variables: Union[str, Sequence[str]],
+    limit_directions: Union[str, Sequence[str]],
+) -> tuple[List[str], List[str]]:
+    """Parse comma-separated or sequence limit specs into aligned variable/direction lists."""
+    if isinstance(limit_variables, str):
+        vars_list = [v.strip() for v in limit_variables.split(",") if v.strip()]
+    else:
+        vars_list = [str(v).strip() for v in limit_variables]
+    if isinstance(limit_directions, str):
+        dirs_list = [d.strip() for d in limit_directions.split(",") if d.strip()]
+    else:
+        dirs_list = [str(d).strip() for d in limit_directions]
+    if not vars_list:
+        raise ValueError("At least one limit variable is required.")
+    if not dirs_list:
+        dirs_list = ["0"]
+    if len(dirs_list) < len(vars_list):
+        dirs_list.extend([dirs_list[-1]] * (len(vars_list) - len(dirs_list)))
+    elif len(dirs_list) > len(vars_list):
+        dirs_list = dirs_list[: len(vars_list)]
+    return vars_list, dirs_list
+def build_arc_regimes(
+    limit_variables: Union[str, Sequence[str]],
+    limit_directions: Union[str, Sequence[str]] = "0",
+    ground_truth_expr: Union[str, sp.Expr] = "0",
+    weight: float = 1.0,
+) -> List[AsymptoticRegime]:
+    """
+    Build ARC asymptotic regimes for one or more limit variables.
+    Supports multi-variable corrections Δ(x₁, x₂, …) by specifying comma-separated
+    limits, e.g. limit_variables="x,y" and limit_directions="0,oo".
+    """
+    vars_list, dirs_list = _parse_limit_tokens(limit_variables, limit_directions)
+    regimes: List[AsymptoticRegime] = []
+    for var, direction in zip(vars_list, dirs_list):
+        limit_target = sp.oo if direction == "oo" else 0
+        regimes.append(
+            AsymptoticRegime(
+                variable=sp.Symbol(var),
+                limit_target=limit_target,
+                ground_truth_expr=ground_truth_expr,
+                weight=weight,
+            )
+        )
+    return regimes
+class ARCScorer:
+    """
+    Mesin utama Stage 1 Gatekeeper untuk menghitung bobot kelayakan
+    struktur asimtotik formula kandidat dari LLM sebelum diteruskan ke graf JAX.
+    """
+    def __init__(self, regimes: List[AsymptoticRegime]):
+        if not regimes:
+            raise ValueError("Daftar kondisi batas (regimes) tidak boleh kosong.")
+        self.regimes = regimes
+        self.total_weight = sum(r.weight for r in regimes)
+    def score(self, candidate_expr: Union[str, sp.Expr], constants: Dict[str, float] = None) -> float:
+        """
+        Menghitung nilai akhir ARC Score untuk satu kandidat fungsi.
+        Menggunakan evaluasi limit matematis murni tanpa pencocokan string biasa.
+        """
+        try:
+            candidate = sp.sympify(candidate_expr)
+            if constants:
+                subs_dict = {sp.Symbol(k): v for k, v in constants.items() if sp.Symbol(k) in candidate.free_symbols}
+                if subs_dict:
+                    candidate = candidate.subs(subs_dict)
+        except Exception as e:
+            logger.error(f"Gagal memproses sintaks ekspresi kandidat: {e}")
+            return 0.0
+        weighted_similarity_sum = 0.0
+        for r in self.regimes:
+            try:
+                # Menggunakan mesin limit internal SymPy yang kokoh
+                evaluated_limit = sp.limit(candidate, r.variable, r.limit_target)
+                # Hitung skor kedekatan fisis limit kandidat vs ground truth boundary
+                similarity = calculate_similarity(evaluated_limit, r.ground_truth_expr)
+                weighted_similarity_sum += r.weight * similarity
+                logger.debug(f"Regime {r.variable}->{r.limit_target} | Limit: {evaluated_limit} | Sim: {similarity}")
+            except Exception as e:
+                # Kasus kegagalan matematis kritis (seperti PoleError) langsung diberi penalti 0
+                logger.warning(f"Kegagalan komputasi limit pada variabel {r.variable}: {e}")
+                continue
+        return weighted_similarity_sum / self.total_weight

adcd/coarse_evaluator.py ADDED Viewed

@@ -0,0 +1,97 @@
+import sympy as sp
+import numpy as np
+from typing import Dict, Tuple
+DEFAULT_CONSTANTS = {
+    'c': 3.0e8,
+    'G': 6.6743e-11,
+    'M': 1.989e30,
+}
+class CoarseEvaluator:
+    """
+    Evaluates the empirical accuracy (MSE and Normalized MSE) of candidate
+    equations on observed physical datasets using high-speed lambdified numpy arrays.
+    Example:
+        >>> evaluator = CoarseEvaluator(X={"x": np.array([1, 2, 3])}, y_obs=np.array([2, 4, 6]))
+        >>> mse, nmse = evaluator.evaluate(sp.sympify("2 * x"))
+    """
+    def __init__(self, X: Dict[str, np.ndarray], y_obs: np.ndarray, constants: Dict[str, float] = None):
+        if not X:
+            raise ValueError("Dataset X tidak boleh kosong.")
+        self.X = X
+        self.y_obs = y_obs
+        self.constants = constants if constants is not None else DEFAULT_CONSTANTS
+        # Calculate variance of y_obs with safeguard for trivial datasets
+        self.y_var = float(np.var(y_obs))
+        if self.y_var < 1e-10:
+            self.y_var = 1e-10
+        # Determine shape of dataset from first array
+        self.data_shape = next(iter(X.values())).shape
+    def evaluate(self, expr: sp.Expr, has_params: bool = False) -> Tuple[float, float]:
+        """
+        Evaluates the candidate SymPy expression on the dataset.
+        Args:
+            expr: The SymPy expression to evaluate.
+            has_params: If True, scales the prediction to fit the observation (1D OLS).
+        Returns:
+            Tuple of (MSE, NMSE). Returns (inf, inf) if any numerical overflow/error occurs.
+        Example:
+            >>> mse, nmse = evaluator.evaluate(sp.sympify("theta_0 * x"), has_params=True)
+        """
+        free_syms = list(expr.free_symbols)
+        sym_names = [str(sym) for sym in free_syms]
+        # Map each free symbol in the expression to its array or constant value
+        args = []
+        for name in sym_names:
+            if name in self.X:
+                args.append(self.X[name])
+            elif name in self.constants:
+                # Broadcast constant value to match the data shape
+                args.append(np.full(self.data_shape, self.constants[name]))
+            else:
+                # Unknown variable/constant in expression -> hard failure
+                return float('inf'), float('inf')
+        try:
+            # Vectorized lambda compilation
+            f = sp.lambdify(free_syms, expr, modules=["numpy"])
+            # Execute model prediction
+            y_pred = f(*args)
+            # Protect against non-numpy array returns (e.g. constant expression like "5.0")
+            if not isinstance(y_pred, np.ndarray):
+                y_pred = np.full(self.data_shape, float(y_pred))
+            # Clean check for invalid numerical outputs (inf, NaN, complex numbers)
+            if np.any(np.isinf(y_pred)) or np.any(np.isnan(y_pred)) or np.iscomplexobj(y_pred):
+                return float('inf'), float('inf')
+            # Scale prediction to match observed target scale (1D OLS)
+            if has_params:
+                try:
+                    denom = float(np.dot(y_pred, y_pred))
+                    if denom > 1e-30:
+                        optimal_scale = float(np.dot(y_pred, self.y_obs)) / denom
+                        y_pred = optimal_scale * y_pred
+                except Exception:
+                    pass
+            # Calculate MSE and scale-invariant NMSE
+            mse = float(np.mean((y_pred - self.y_obs) ** 2))
+            nmse = mse / self.y_var
+            return mse, nmse
+        except Exception:
+            # Catch division by zero, domain errors, overflow, etc.
+            return float('inf'), float('inf')