PyPI - ins-pricing - Versions diffs - 0.2.9__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

ins-pricing 0.2.9py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

ins_pricing/CHANGELOG.md +93 -0
ins_pricing/README.md +11 -0
ins_pricing/cli/bayesopt_entry_runner.py +626 -499
ins_pricing/cli/utils/evaluation_context.py +320 -0
ins_pricing/cli/utils/import_resolver.py +350 -0
ins_pricing/modelling/core/bayesopt/PHASE2_REFACTORING_SUMMARY.md +449 -0
ins_pricing/modelling/core/bayesopt/PHASE3_REFACTORING_SUMMARY.md +406 -0
ins_pricing/modelling/core/bayesopt/REFACTORING_SUMMARY.md +247 -0
ins_pricing/modelling/core/bayesopt/config_components.py +351 -0
ins_pricing/modelling/core/bayesopt/config_preprocess.py +3 -4
ins_pricing/modelling/core/bayesopt/core.py +153 -94
ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +118 -31
ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +294 -139
ins_pricing/modelling/core/bayesopt/utils/__init__.py +86 -0
ins_pricing/modelling/core/bayesopt/utils/constants.py +183 -0
ins_pricing/modelling/core/bayesopt/utils/distributed_utils.py +186 -0
ins_pricing/modelling/core/bayesopt/utils/io_utils.py +126 -0
ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +540 -0
ins_pricing/modelling/core/bayesopt/utils/torch_trainer_mixin.py +587 -0
ins_pricing/modelling/core/bayesopt/utils.py +98 -1496
ins_pricing/modelling/core/bayesopt/utils_backup.py +1503 -0
ins_pricing/setup.py +1 -1
{ins_pricing-0.2.9.dist-info → ins_pricing-0.3.0.dist-info}/METADATA +162 -149
{ins_pricing-0.2.9.dist-info → ins_pricing-0.3.0.dist-info}/RECORD +26 -13
{ins_pricing-0.2.9.dist-info → ins_pricing-0.3.0.dist-info}/WHEEL +0 -0
{ins_pricing-0.2.9.dist-info → ins_pricing-0.3.0.dist-info}/top_level.txt +0 -0

ins_pricing/cli/bayesopt_entry_runner.py CHANGED Viewed

@@ -29,108 +29,60 @@ from typing import Any, Dict, List, Optional
 import numpy as np
 import pandas as pd
-try:
-    from .. import bayesopt as ropt  # type: ignore
-    from .utils.cli_common import (  # type: ignore
-        PLOT_MODEL_LABELS,
-        PYTORCH_TRAINERS,
-        build_model_names,
-        dedupe_preserve_order,
-        load_dataset,
-        parse_model_pairs,
-        resolve_data_path,
-        resolve_path,
-        fingerprint_file,
-        coerce_dataset_types,
-        split_train_test,
-    )
-    from .utils.cli_config import (  # type: ignore
-        add_config_json_arg,
-        add_output_dir_arg,
-        resolve_and_load_config,
-        resolve_data_config,
-        resolve_report_config,
-        resolve_split_config,
-        resolve_runtime_config,
-        resolve_output_dirs,
-    )
-except Exception:  # pragma: no cover
-    try:
-        import bayesopt as ropt  # type: ignore
-        from utils.cli_common import (  # type: ignore
-            PLOT_MODEL_LABELS,
-            PYTORCH_TRAINERS,
-            build_model_names,
-            dedupe_preserve_order,
-            load_dataset,
-            parse_model_pairs,
-            resolve_data_path,
-            resolve_path,
-            fingerprint_file,
-            coerce_dataset_types,
-            split_train_test,
-        )
-        from utils.cli_config import (  # type: ignore
-            add_config_json_arg,
-            add_output_dir_arg,
-            resolve_and_load_config,
-            resolve_data_config,
-            resolve_report_config,
-            resolve_split_config,
-            resolve_runtime_config,
-            resolve_output_dirs,
-        )
-    except Exception:
-        try:
-            import ins_pricing.modelling.core.bayesopt as ropt  # type: ignore
-            from ins_pricing.cli.utils.cli_common import (  # type: ignore
-                PLOT_MODEL_LABELS,
-                PYTORCH_TRAINERS,
-                build_model_names,
-                dedupe_preserve_order,
-                load_dataset,
-                parse_model_pairs,
-                resolve_data_path,
-                resolve_path,
-                fingerprint_file,
-                coerce_dataset_types,
-                split_train_test,
-            )
-            from ins_pricing.cli.utils.cli_config import (  # type: ignore
-                add_config_json_arg,
-                add_output_dir_arg,
-                resolve_and_load_config,
-                resolve_data_config,
-                resolve_report_config,
-                resolve_split_config,
-                resolve_runtime_config,
-                resolve_output_dirs,
-            )
-        except Exception:
-            import BayesOpt as ropt  # type: ignore
-            from utils.cli_common import (  # type: ignore
-                PLOT_MODEL_LABELS,
-                PYTORCH_TRAINERS,
-                build_model_names,
-                dedupe_preserve_order,
-                load_dataset,
-                parse_model_pairs,
-                resolve_data_path,
-                resolve_path,
-                fingerprint_file,
-                coerce_dataset_types,
-                split_train_test,
-            )
-            from utils.cli_config import (  # type: ignore
-                add_config_json_arg,
-                add_output_dir_arg,
-                resolve_and_load_config,
-                resolve_data_config,
-                resolve_report_config,
-                resolve_split_config,
-                resolve_runtime_config,
-                resolve_output_dirs,
-            )
+# Use unified import resolver to eliminate nested try/except chains
+from .utils.import_resolver import resolve_imports, setup_sys_path
+from .utils.evaluation_context import (
+    EvaluationContext,
+    TrainingContext,
+    ModelIdentity,
+    DataFingerprint,
+    CalibrationConfig,
+    ThresholdConfig,
+    BootstrapConfig,
+    ReportConfig,
+    RegistryConfig,
+)
+# Resolve all imports from a single location
+setup_sys_path()
+_imports = resolve_imports()
+ropt = _imports.bayesopt
+PLOT_MODEL_LABELS = _imports.PLOT_MODEL_LABELS
+PYTORCH_TRAINERS = _imports.PYTORCH_TRAINERS
+build_model_names = _imports.build_model_names
+dedupe_preserve_order = _imports.dedupe_preserve_order
+load_dataset = _imports.load_dataset
+parse_model_pairs = _imports.parse_model_pairs
+resolve_data_path = _imports.resolve_data_path
+resolve_path = _imports.resolve_path
+fingerprint_file = _imports.fingerprint_file
+coerce_dataset_types = _imports.coerce_dataset_types
+split_train_test = _imports.split_train_test
+add_config_json_arg = _imports.add_config_json_arg
+add_output_dir_arg = _imports.add_output_dir_arg
+resolve_and_load_config = _imports.resolve_and_load_config
+resolve_data_config = _imports.resolve_data_config
+resolve_report_config = _imports.resolve_report_config
+resolve_split_config = _imports.resolve_split_config
+resolve_runtime_config = _imports.resolve_runtime_config
+resolve_output_dirs = _imports.resolve_output_dirs
+bootstrap_ci = _imports.bootstrap_ci
+calibrate_predictions = _imports.calibrate_predictions
+eval_metrics_report = _imports.metrics_report
+select_threshold = _imports.select_threshold
+ModelArtifact = _imports.ModelArtifact
+ModelRegistry = _imports.ModelRegistry
+drift_psi_report = _imports.drift_psi_report
+group_metrics = _imports.group_metrics
+ReportPayload = _imports.ReportPayload
+write_report = _imports.write_report
+configure_run_logging = _imports.configure_run_logging
+plot_loss_curve_common = _imports.plot_loss_curve
 import matplotlib
@@ -138,81 +90,6 @@ if os.name != "nt" and not os.environ.get("DISPLAY") and not os.environ.get("MPL
     matplotlib.use("Agg")
 import matplotlib.pyplot as plt
-try:
-    from .utils.run_logging import configure_run_logging  # type: ignore
-except Exception:  # pragma: no cover
-    try:
-        from utils.run_logging import configure_run_logging  # type: ignore
-    except Exception:  # pragma: no cover
-        configure_run_logging = None  # type: ignore
-try:
-    from ..modelling.plotting.diagnostics import plot_loss_curve as plot_loss_curve_common
-except Exception:  # pragma: no cover
-    try:
-        from ins_pricing.plotting.diagnostics import plot_loss_curve as plot_loss_curve_common
-    except Exception:  # pragma: no cover
-        plot_loss_curve_common = None
-try:
-    from ..modelling.core.evaluation import (  # type: ignore
-        bootstrap_ci,
-        calibrate_predictions,
-        metrics_report as eval_metrics_report,
-        select_threshold,
-    )
-    from ..governance.registry import ModelArtifact, ModelRegistry  # type: ignore
-    from ..production import psi_report as drift_psi_report  # type: ignore
-    from ..production.monitoring import group_metrics  # type: ignore
-    from ..reporting.report_builder import ReportPayload, write_report  # type: ignore
-except Exception:  # pragma: no cover
-    try:
-        from ins_pricing.modelling.core.evaluation import (  # type: ignore
-            bootstrap_ci,
-            calibrate_predictions,
-            metrics_report as eval_metrics_report,
-            select_threshold,
-        )
-        from ins_pricing.governance.registry import (  # type: ignore
-            ModelArtifact,
-            ModelRegistry,
-        )
-        from ins_pricing.production import psi_report as drift_psi_report  # type: ignore
-        from ins_pricing.production.monitoring import group_metrics  # type: ignore
-        from ins_pricing.reporting.report_builder import (  # type: ignore
-            ReportPayload,
-            write_report,
-        )
-    except Exception:  # pragma: no cover
-        try:
-            from evaluation import (  # type: ignore
-                bootstrap_ci,
-                calibrate_predictions,
-                metrics_report as eval_metrics_report,
-                select_threshold,
-            )
-            from ins_pricing.governance.registry import (  # type: ignore
-                ModelArtifact,
-                ModelRegistry,
-            )
-            from ins_pricing.production import psi_report as drift_psi_report  # type: ignore
-            from ins_pricing.production.monitoring import group_metrics  # type: ignore
-            from ins_pricing.reporting.report_builder import (  # type: ignore
-                ReportPayload,
-                write_report,
-            )
-        except Exception:  # pragma: no cover
-            bootstrap_ci = None  # type: ignore
-            calibrate_predictions = None  # type: ignore
-            eval_metrics_report = None  # type: ignore
-            select_threshold = None  # type: ignore
-            drift_psi_report = None  # type: ignore
-            group_metrics = None  # type: ignore
-            ReportPayload = None  # type: ignore
-            write_report = None  # type: ignore
-            ModelRegistry = None  # type: ignore
-            ModelArtifact = None  # type: ignore
 def _parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
@@ -520,6 +397,444 @@ def _compute_psi_report(
         return None
+# --- Refactored helper functions for _evaluate_and_report ---
+def _apply_calibration(
+    y_true_train: np.ndarray,
+    y_pred_train: np.ndarray,
+    y_pred_test: np.ndarray,
+    calibration_cfg: Dict[str, Any],
+    model_name: str,
+    model_key: str,
+) -> tuple[np.ndarray, np.ndarray, Optional[Dict[str, Any]]]:
+    """Apply calibration to predictions for classification tasks.
+    Returns:
+        Tuple of (calibrated_train_preds, calibrated_test_preds, calibration_info)
+    """
+    cal_cfg = dict(calibration_cfg or {})
+    cal_enabled = bool(cal_cfg.get("enable", False) or cal_cfg.get("method"))
+    if not cal_enabled or calibrate_predictions is None:
+        return y_pred_train, y_pred_test, None
+    method = cal_cfg.get("method", "sigmoid")
+    max_rows = cal_cfg.get("max_rows")
+    seed = cal_cfg.get("seed")
+    y_cal, p_cal = _sample_arrays(
+        y_true_train, y_pred_train, max_rows=max_rows, seed=seed)
+    try:
+        calibrator = calibrate_predictions(y_cal, p_cal, method=method)
+        calibrated_train = calibrator.predict(y_pred_train)
+        calibrated_test = calibrator.predict(y_pred_test)
+        calibration_info = {"method": calibrator.method, "max_rows": max_rows}
+        return calibrated_train, calibrated_test, calibration_info
+    except Exception as exc:
+        print(f"[Report] Calibration failed for {model_name}/{model_key}: {exc}")
+        return y_pred_train, y_pred_test, None
+def _select_classification_threshold(
+    y_true_train: np.ndarray,
+    y_pred_train_eval: np.ndarray,
+    threshold_cfg: Dict[str, Any],
+) -> tuple[float, Optional[Dict[str, Any]]]:
+    """Select threshold for classification predictions.
+    Returns:
+        Tuple of (threshold_value, threshold_info)
+    """
+    thr_cfg = dict(threshold_cfg or {})
+    thr_enabled = bool(
+        thr_cfg.get("enable", False)
+        or thr_cfg.get("metric")
+        or thr_cfg.get("value") is not None
+    )
+    if thr_cfg.get("value") is not None:
+        threshold_value = float(thr_cfg["value"])
+        return threshold_value, {"threshold": threshold_value, "source": "fixed"}
+    if thr_enabled and select_threshold is not None:
+        max_rows = thr_cfg.get("max_rows")
+        seed = thr_cfg.get("seed")
+        y_thr, p_thr = _sample_arrays(
+            y_true_train, y_pred_train_eval, max_rows=max_rows, seed=seed)
+        threshold_info = select_threshold(
+            y_thr,
+            p_thr,
+            metric=thr_cfg.get("metric", "f1"),
+            min_positive_rate=thr_cfg.get("min_positive_rate"),
+            grid=thr_cfg.get("grid", 99),
+        )
+        return float(threshold_info.get("threshold", 0.5)), threshold_info
+    return 0.5, None
+def _compute_classification_metrics(
+    y_true_test: np.ndarray,
+    y_pred_test_eval: np.ndarray,
+    threshold_value: float,
+) -> Dict[str, Any]:
+    """Compute metrics for classification task."""
+    metrics = eval_metrics_report(
+        y_true_test,
+        y_pred_test_eval,
+        task_type="classification",
+        threshold=threshold_value,
+    )
+    precision = float(metrics.get("precision", 0.0))
+    recall = float(metrics.get("recall", 0.0))
+    f1 = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall)
+    metrics["f1"] = float(f1)
+    metrics["threshold"] = float(threshold_value)
+    return metrics
+def _compute_bootstrap_ci(
+    y_true_test: np.ndarray,
+    y_pred_test_eval: np.ndarray,
+    weight_test: Optional[np.ndarray],
+    metrics: Dict[str, Any],
+    bootstrap_cfg: Dict[str, Any],
+    task_type: str,
+) -> Dict[str, Dict[str, float]]:
+    """Compute bootstrap confidence intervals for metrics."""
+    if not bootstrap_cfg or not bool(bootstrap_cfg.get("enable", False)) or bootstrap_ci is None:
+        return {}
+    metric_names = bootstrap_cfg.get("metrics")
+    if not metric_names:
+        metric_names = [name for name in metrics.keys() if name != "threshold"]
+    n_samples = int(bootstrap_cfg.get("n_samples", 200))
+    ci = float(bootstrap_cfg.get("ci", 0.95))
+    seed = bootstrap_cfg.get("seed")
+    def _metric_fn(y_true, y_pred, weight=None):
+        vals = eval_metrics_report(
+            y_true,
+            y_pred,
+            task_type=task_type,
+            weight=weight,
+            threshold=metrics.get("threshold", 0.5),
+        )
+        if task_type == "classification":
+            prec = float(vals.get("precision", 0.0))
+            rec = float(vals.get("recall", 0.0))
+            vals["f1"] = 0.0 if (prec + rec) == 0 else 2 * prec * rec / (prec + rec)
+        return vals
+    bootstrap_results: Dict[str, Dict[str, float]] = {}
+    for name in metric_names:
+        if name not in metrics:
+            continue
+        ci_result = bootstrap_ci(
+            lambda y_t, y_p, w=None: float(_metric_fn(y_t, y_p, w).get(name, 0.0)),
+            y_true_test,
+            y_pred_test_eval,
+            weight=weight_test,
+            n_samples=n_samples,
+            ci=ci,
+            seed=seed,
+        )
+        bootstrap_results[str(name)] = ci_result
+    return bootstrap_results
+def _compute_validation_table(
+    model: ropt.BayesOptModel,
+    pred_col: str,
+    report_group_cols: Optional[List[str]],
+    weight_col: Optional[str],
+    model_name: str,
+    model_key: str,
+) -> Optional[pd.DataFrame]:
+    """Compute grouped validation metrics table."""
+    if not report_group_cols or group_metrics is None:
+        return None
+    available_groups = [
+        col for col in report_group_cols if col in model.test_data.columns
+    ]
+    if not available_groups:
+        return None
+    try:
+        validation_table = group_metrics(
+            model.test_data,
+            actual_col=model.resp_nme,
+            pred_col=pred_col,
+            group_cols=available_groups,
+            weight_col=weight_col if weight_col and weight_col in model.test_data.columns else None,
+        )
+        counts = (
+            model.test_data.groupby(available_groups, dropna=False)
+            .size()
+            .reset_index(name="count")
+        )
+        return validation_table.merge(counts, on=available_groups, how="left")
+    except Exception as exc:
+        print(f"[Report] group_metrics failed for {model_name}/{model_key}: {exc}")
+        return None
+def _compute_risk_trend(
+    model: ropt.BayesOptModel,
+    pred_col: str,
+    report_time_col: Optional[str],
+    report_time_freq: str,
+    report_time_ascending: bool,
+    weight_col: Optional[str],
+    model_name: str,
+    model_key: str,
+) -> Optional[pd.DataFrame]:
+    """Compute time-series risk trend metrics."""
+    if not report_time_col or group_metrics is None:
+        return None
+    if report_time_col not in model.test_data.columns:
+        return None
+    try:
+        time_df = model.test_data.copy()
+        time_series = pd.to_datetime(time_df[report_time_col], errors="coerce")
+        time_df = time_df.loc[time_series.notna()].copy()
+        if time_df.empty:
+            return None
+        time_df["_time_bucket"] = (
+            pd.to_datetime(time_df[report_time_col], errors="coerce")
+            .dt.to_period(report_time_freq)
+            .dt.to_timestamp()
+        )
+        risk_trend = group_metrics(
+            time_df,
+            actual_col=model.resp_nme,
+            pred_col=pred_col,
+            group_cols=["_time_bucket"],
+            weight_col=weight_col if weight_col and weight_col in time_df.columns else None,
+        )
+        counts = (
+            time_df.groupby("_time_bucket", dropna=False)
+            .size()
+            .reset_index(name="count")
+        )
+        risk_trend = risk_trend.merge(counts, on="_time_bucket", how="left")
+        risk_trend = risk_trend.sort_values(
+            "_time_bucket", ascending=bool(report_time_ascending)
+        ).reset_index(drop=True)
+        return risk_trend.rename(columns={"_time_bucket": report_time_col})
+    except Exception as exc:
+        print(f"[Report] time metrics failed for {model_name}/{model_key}: {exc}")
+        return None
+def _write_metrics_json(
+    report_root: Path,
+    model_name: str,
+    model_key: str,
+    version: str,
+    metrics: Dict[str, Any],
+    threshold_info: Optional[Dict[str, Any]],
+    calibration_info: Optional[Dict[str, Any]],
+    bootstrap_results: Dict[str, Dict[str, float]],
+    data_path: Path,
+    data_fingerprint: Dict[str, Any],
+    config_sha: str,
+    pred_col: str,
+    task_type: str,
+) -> Path:
+    """Write metrics to JSON file and return the path."""
+    metrics_payload = {
+        "model_name": model_name,
+        "model_key": model_key,
+        "model_version": version,
+        "metrics": metrics,
+        "threshold": threshold_info,
+        "calibration": calibration_info,
+        "bootstrap": bootstrap_results,
+        "data_path": str(data_path),
+        "data_fingerprint": data_fingerprint,
+        "config_sha256": config_sha,
+        "pred_col": pred_col,
+        "task_type": task_type,
+    }
+    metrics_path = report_root / f"{model_name}_{model_key}_metrics.json"
+    metrics_path.write_text(
+        json.dumps(metrics_payload, indent=2, ensure_ascii=True),
+        encoding="utf-8",
+    )
+    return metrics_path
+def _write_model_report(
+    report_root: Path,
+    model_name: str,
+    model_key: str,
+    version: str,
+    metrics: Dict[str, Any],
+    risk_trend: Optional[pd.DataFrame],
+    psi_report_df: Optional[pd.DataFrame],
+    validation_table: Optional[pd.DataFrame],
+    calibration_info: Optional[Dict[str, Any]],
+    threshold_info: Optional[Dict[str, Any]],
+    bootstrap_results: Dict[str, Dict[str, float]],
+    config_sha: str,
+    data_fingerprint: Dict[str, Any],
+) -> Optional[Path]:
+    """Write model report and return the path."""
+    if ReportPayload is None or write_report is None:
+        return None
+    notes_lines = [
+        f"- Config SHA256: {config_sha}",
+        f"- Data fingerprint: {data_fingerprint.get('sha256_prefix')}",
+    ]
+    if calibration_info:
+        notes_lines.append(f"- Calibration: {calibration_info.get('method')}")
+    if threshold_info:
+        notes_lines.append(f"- Threshold selection: {threshold_info}")
+    if bootstrap_results:
+        notes_lines.append("- Bootstrap: see metrics JSON for CI")
+    payload = ReportPayload(
+        model_name=f"{model_name}/{model_key}",
+        model_version=version,
+        metrics={k: float(v) for k, v in metrics.items()},
+        risk_trend=risk_trend,
+        drift_report=psi_report_df,
+        validation_table=validation_table,
+        extra_notes="\n".join(notes_lines),
+    )
+    return write_report(
+        payload,
+        report_root / f"{model_name}_{model_key}_report.md",
+    )
+def _register_model_to_registry(
+    model: ropt.BayesOptModel,
+    model_name: str,
+    model_key: str,
+    version: str,
+    metrics: Dict[str, Any],
+    task_type: str,
+    data_path: Path,
+    data_fingerprint: Dict[str, Any],
+    config_sha: str,
+    registry_path: Optional[str],
+    registry_tags: Dict[str, Any],
+    registry_status: str,
+    report_path: Optional[Path],
+    metrics_path: Path,
+    cfg: Dict[str, Any],
+) -> None:
+    """Register model artifacts to the model registry."""
+    if ModelRegistry is None or ModelArtifact is None:
+        return
+    registry = ModelRegistry(
+        registry_path
+        if registry_path
+        else Path(model.output_manager.result_dir) / "model_registry.json"
+    )
+    tags = {str(k): str(v) for k, v in (registry_tags or {}).items()}
+    tags.update({
+        "model_key": str(model_key),
+        "task_type": str(task_type),
+        "data_path": str(data_path),
+        "data_sha256_prefix": str(data_fingerprint.get("sha256_prefix", "")),
+        "data_size": str(data_fingerprint.get("size", "")),
+        "data_mtime": str(data_fingerprint.get("mtime", "")),
+        "config_sha256": str(config_sha),
+    })
+    artifacts = _collect_model_artifacts(
+        model, model_name, model_key, report_path, metrics_path, cfg
+    )
+    registry.register(
+        name=str(model_name),
+        version=version,
+        metrics={k: float(v) for k, v in metrics.items()},
+        tags=tags,
+        artifacts=artifacts,
+        status=str(registry_status or "candidate"),
+        notes=f"model_key={model_key}",
+    )
+def _collect_model_artifacts(
+    model: ropt.BayesOptModel,
+    model_name: str,
+    model_key: str,
+    report_path: Optional[Path],
+    metrics_path: Path,
+    cfg: Dict[str, Any],
+) -> List:
+    """Collect all model artifacts for registry."""
+    artifacts = []
+    # Trained model artifact
+    trainer = model.trainers.get(model_key)
+    if trainer is not None:
+        try:
+            model_path = trainer.output.model_path(trainer._get_model_filename())
+            if os.path.exists(model_path):
+                artifacts.append(ModelArtifact(path=model_path, description="trained model"))
+        except Exception:
+            pass
+    # Report artifact
+    if report_path is not None:
+        artifacts.append(ModelArtifact(path=str(report_path), description="model report"))
+    # Metrics JSON artifact
+    if metrics_path.exists():
+        artifacts.append(ModelArtifact(path=str(metrics_path), description="metrics json"))
+    # Preprocess artifacts
+    if bool(cfg.get("save_preprocess", False)):
+        artifact_path = cfg.get("preprocess_artifact_path")
+        if artifact_path:
+            preprocess_path = Path(str(artifact_path))
+            if not preprocess_path.is_absolute():
+                preprocess_path = Path(model.output_manager.result_dir) / preprocess_path
+        else:
+            preprocess_path = Path(model.output_manager.result_path(
+                f"{model.model_nme}_preprocess.json"
+            ))
+        if preprocess_path.exists():
+            artifacts.append(
+                ModelArtifact(path=str(preprocess_path), description="preprocess artifacts")
+            )
+    # Prediction cache artifacts
+    if bool(cfg.get("cache_predictions", False)):
+        cache_dir = cfg.get("prediction_cache_dir")
+        if cache_dir:
+            pred_root = Path(str(cache_dir))
+            if not pred_root.is_absolute():
+                pred_root = Path(model.output_manager.result_dir) / pred_root
+        else:
+            pred_root = Path(model.output_manager.result_dir) / "predictions"
+        ext = "csv" if str(cfg.get("prediction_cache_format", "parquet")).lower() == "csv" else "parquet"
+        for split_label in ("train", "test"):
+            pred_path = pred_root / f"{model_name}_{model_key}_{split_label}.{ext}"
+            if pred_path.exists():
+                artifacts.append(
+                    ModelArtifact(path=str(pred_path), description=f"predictions {split_label}")
+                )
+    return artifacts
 def _evaluate_and_report(
     model: ropt.BayesOptModel,
     *,
@@ -544,374 +859,164 @@ def _evaluate_and_report(
     run_id: str,
     config_sha: str,
 ) -> None:
+    """Evaluate model predictions and generate reports.
+    This function orchestrates the evaluation pipeline:
+    1. Extract predictions and ground truth
+    2. Apply calibration (for classification)
+    3. Select threshold (for classification)
+    4. Compute metrics
+    5. Compute bootstrap confidence intervals
+    6. Generate validation tables and risk trends
+    7. Write reports and register model
+    """
     if eval_metrics_report is None:
         print("[Report] Skip evaluation: metrics module unavailable.")
         return
     pred_col = PLOT_MODEL_LABELS.get(model_key, (None, f"pred_{model_key}"))[1]
     if pred_col not in model.test_data.columns:
-        print(
-            f"[Report] Missing prediction column '{pred_col}' for {model_name}/{model_key}; skip.")
+        print(f"[Report] Missing prediction column '{pred_col}' for {model_name}/{model_key}; skip.")
         return
+    # Extract predictions and weights
     weight_col = getattr(model, "weight_nme", None)
-    y_true_train = model.train_data[model.resp_nme].to_numpy(
-        dtype=float, copy=False)
-    y_true_test = model.test_data[model.resp_nme].to_numpy(
-        dtype=float, copy=False)
+    y_true_train = model.train_data[model.resp_nme].to_numpy(dtype=float, copy=False)
+    y_true_test = model.test_data[model.resp_nme].to_numpy(dtype=float, copy=False)
     y_pred_train = model.train_data[pred_col].to_numpy(dtype=float, copy=False)
     y_pred_test = model.test_data[pred_col].to_numpy(dtype=float, copy=False)
-    weight_train = (
-        model.train_data[weight_col].to_numpy(dtype=float, copy=False)
-        if weight_col and weight_col in model.train_data.columns
-        else None
-    )
     weight_test = (
         model.test_data[weight_col].to_numpy(dtype=float, copy=False)
         if weight_col and weight_col in model.test_data.columns
         else None
     )
-    task_type = str(cfg.get("task_type", getattr(
-        model, "task_type", "regression")))
+    task_type = str(cfg.get("task_type", getattr(model, "task_type", "regression")))
+    # Process based on task type
     if task_type == "classification":
         y_pred_train = np.clip(y_pred_train, 0.0, 1.0)
         y_pred_test = np.clip(y_pred_test, 0.0, 1.0)
-    calibration_info: Optional[Dict[str, Any]] = None
-    threshold_info: Optional[Dict[str, Any]] = None
-    y_pred_train_eval = y_pred_train
-    y_pred_test_eval = y_pred_test
-    if task_type == "classification":
-        cal_cfg = dict(calibration_cfg or {})
-        cal_enabled = bool(cal_cfg.get("enable", False)
-                           or cal_cfg.get("method"))
-        if cal_enabled and calibrate_predictions is not None:
-            method = cal_cfg.get("method", "sigmoid")
-            max_rows = cal_cfg.get("max_rows")
-            seed = cal_cfg.get("seed")
-            y_cal, p_cal = _sample_arrays(
-                y_true_train, y_pred_train, max_rows=max_rows, seed=seed)
-            try:
-                calibrator = calibrate_predictions(y_cal, p_cal, method=method)
-                y_pred_train_eval = calibrator.predict(y_pred_train)
-                y_pred_test_eval = calibrator.predict(y_pred_test)
-                calibration_info = {
-                    "method": calibrator.method, "max_rows": max_rows}
-            except Exception as exc:
-                print(
-                    f"[Report] Calibration failed for {model_name}/{model_key}: {exc}")
-        thr_cfg = dict(threshold_cfg or {})
-        thr_enabled = bool(
-            thr_cfg.get("enable", False)
-            or thr_cfg.get("metric")
-            or thr_cfg.get("value") is not None
+        y_pred_train_eval, y_pred_test_eval, calibration_info = _apply_calibration(
+            y_true_train, y_pred_train, y_pred_test, calibration_cfg, model_name, model_key
         )
-        threshold_value = 0.5
-        if thr_cfg.get("value") is not None:
-            threshold_value = float(thr_cfg["value"])
-            threshold_info = {"threshold": threshold_value, "source": "fixed"}
-        elif thr_enabled and select_threshold is not None:
-            max_rows = thr_cfg.get("max_rows")
-            seed = thr_cfg.get("seed")
-            y_thr, p_thr = _sample_arrays(
-                y_true_train, y_pred_train_eval, max_rows=max_rows, seed=seed)
-            threshold_info = select_threshold(
-                y_thr,
-                p_thr,
-                metric=thr_cfg.get("metric", "f1"),
-                min_positive_rate=thr_cfg.get("min_positive_rate"),
-                grid=thr_cfg.get("grid", 99),
-            )
-            threshold_value = float(threshold_info.get("threshold", 0.5))
-        else:
-            threshold_value = 0.5
-        metrics = eval_metrics_report(
-            y_true_test,
-            y_pred_test_eval,
-            task_type=task_type,
-            threshold=threshold_value,
+        threshold_value, threshold_info = _select_classification_threshold(
+            y_true_train, y_pred_train_eval, threshold_cfg
         )
-        precision = float(metrics.get("precision", 0.0))
-        recall = float(metrics.get("recall", 0.0))
-        f1 = 0.0 if (precision + recall) == 0 else 2 * \
-            precision * recall / (precision + recall)
-        metrics["f1"] = float(f1)
-        metrics["threshold"] = float(threshold_value)
+        metrics = _compute_classification_metrics(y_true_test, y_pred_test_eval, threshold_value)
     else:
+        y_pred_test_eval = y_pred_test
+        calibration_info = None
+        threshold_info = None
         metrics = eval_metrics_report(
-            y_true_test,
-            y_pred_test_eval,
-            task_type=task_type,
-            weight=weight_test,
+            y_true_test, y_pred_test_eval, task_type=task_type, weight=weight_test
         )
-    bootstrap_results: Dict[str, Dict[str, float]] = {}
-    if bootstrap_cfg and bool(bootstrap_cfg.get("enable", False)) and bootstrap_ci is not None:
-        metric_names = bootstrap_cfg.get("metrics") or list(metrics.keys())
-        n_samples = int(bootstrap_cfg.get("n_samples", 200))
-        ci = float(bootstrap_cfg.get("ci", 0.95))
-        seed = bootstrap_cfg.get("seed")
-        def _metric_fn(y_true, y_pred, weight=None):
-            vals = eval_metrics_report(
-                y_true,
-                y_pred,
-                task_type=task_type,
-                weight=weight,
-                threshold=metrics.get("threshold", 0.5),
-            )
-            if task_type == "classification":
-                prec = float(vals.get("precision", 0.0))
-                rec = float(vals.get("recall", 0.0))
-                vals["f1"] = 0.0 if (prec + rec) == 0 else 2 * \
-                    prec * rec / (prec + rec)
-            return vals
-        for name in metric_names:
-            if name not in metrics:
-                continue
-            ci_result = bootstrap_ci(
-                lambda y_t, y_p, w=None: float(
-                    _metric_fn(y_t, y_p, w).get(name, 0.0)),
-                y_true_test,
-                y_pred_test_eval,
-                weight=weight_test,
-                n_samples=n_samples,
-                ci=ci,
-                seed=seed,
-            )
-            bootstrap_results[str(name)] = ci_result
+    # Compute bootstrap confidence intervals
+    bootstrap_results = _compute_bootstrap_ci(
+        y_true_test, y_pred_test_eval, weight_test, metrics, bootstrap_cfg, task_type
+    )
-    validation_table = None
-    if report_group_cols and group_metrics is not None:
-        available_groups = [
-            col for col in report_group_cols if col in model.test_data.columns
-        ]
-        if available_groups:
-            try:
-                validation_table = group_metrics(
-                    model.test_data,
-                    actual_col=model.resp_nme,
-                    pred_col=pred_col,
-                    group_cols=available_groups,
-                    weight_col=weight_col if weight_col and weight_col in model.test_data.columns else None,
-                )
-                counts = (
-                    model.test_data.groupby(available_groups, dropna=False)
-                    .size()
-                    .reset_index(name="count")
-                )
-                validation_table = validation_table.merge(
-                    counts, on=available_groups, how="left")
-            except Exception as exc:
-                print(
-                    f"[Report] group_metrics failed for {model_name}/{model_key}: {exc}")
-    risk_trend = None
-    if report_time_col and group_metrics is not None:
-        if report_time_col in model.test_data.columns:
-            try:
-                time_df = model.test_data.copy()
-                time_series = pd.to_datetime(
-                    time_df[report_time_col], errors="coerce")
-                time_df = time_df.loc[time_series.notna()].copy()
-                if not time_df.empty:
-                    time_df["_time_bucket"] = (
-                        pd.to_datetime(
-                            time_df[report_time_col], errors="coerce")
-                        .dt.to_period(report_time_freq)
-                        .dt.to_timestamp()
-                    )
-                    risk_trend = group_metrics(
-                        time_df,
-                        actual_col=model.resp_nme,
-                        pred_col=pred_col,
-                        group_cols=["_time_bucket"],
-                        weight_col=weight_col if weight_col and weight_col in time_df.columns else None,
-                    )
-                    counts = (
-                        time_df.groupby("_time_bucket", dropna=False)
-                        .size()
-                        .reset_index(name="count")
-                    )
-                    risk_trend = risk_trend.merge(
-                        counts, on="_time_bucket", how="left")
-                    risk_trend = risk_trend.sort_values(
-                        "_time_bucket", ascending=bool(report_time_ascending)
-                    ).reset_index(drop=True)
-                    risk_trend = risk_trend.rename(
-                        columns={"_time_bucket": report_time_col})
-            except Exception as exc:
-                print(
-                    f"[Report] time metrics failed for {model_name}/{model_key}: {exc}")
+    # Compute validation table and risk trend
+    validation_table = _compute_validation_table(
+        model, pred_col, report_group_cols, weight_col, model_name, model_key
+    )
+    risk_trend = _compute_risk_trend(
+        model, pred_col, report_time_col, report_time_freq,
+        report_time_ascending, weight_col, model_name, model_key
+    )
+    # Setup output directory
     report_root = (
         Path(report_output_dir)
         if report_output_dir
         else Path(model.output_manager.result_dir) / "reports"
     )
     report_root.mkdir(parents=True, exist_ok=True)
     version = f"{model_key}_{run_id}"
-    metrics_payload = {
-        "model_name": model_name,
-        "model_key": model_key,
-        "model_version": version,
-        "metrics": metrics,
-        "threshold": threshold_info,
-        "calibration": calibration_info,
-        "bootstrap": bootstrap_results,
-        "data_path": str(data_path),
-        "data_fingerprint": data_fingerprint,
-        "config_sha256": config_sha,
-        "pred_col": pred_col,
-        "task_type": task_type,
-    }
-    metrics_path = report_root / f"{model_name}_{model_key}_metrics.json"
-    metrics_path.write_text(
-        json.dumps(metrics_payload, indent=2, ensure_ascii=True),
-        encoding="utf-8",
+    # Write metrics JSON
+    metrics_path = _write_metrics_json(
+        report_root, model_name, model_key, version, metrics,
+        threshold_info, calibration_info, bootstrap_results,
+        data_path, data_fingerprint, config_sha, pred_col, task_type
     )
-    report_path = None
-    if ReportPayload is not None and write_report is not None:
-        notes_lines = [
-            f"- Config SHA256: {config_sha}",
-            f"- Data fingerprint: {data_fingerprint.get('sha256_prefix')}",
-        ]
-        if calibration_info:
-            notes_lines.append(
-                f"- Calibration: {calibration_info.get('method')}"
-            )
-        if threshold_info:
-            notes_lines.append(
-                f"- Threshold selection: {threshold_info}"
-            )
-        if bootstrap_results:
-            notes_lines.append("- Bootstrap: see metrics JSON for CI")
-        extra_notes = "\n".join(notes_lines)
-        payload = ReportPayload(
-            model_name=f"{model_name}/{model_key}",
-            model_version=version,
-            metrics={k: float(v) for k, v in metrics.items()},
-            risk_trend=risk_trend,
-            drift_report=psi_report_df,
-            validation_table=validation_table,
-            extra_notes=extra_notes,
-        )
-        report_path = write_report(
-            payload,
-            report_root / f"{model_name}_{model_key}_report.md",
-        )
+    # Write model report
+    report_path = _write_model_report(
+        report_root, model_name, model_key, version, metrics,
+        risk_trend, psi_report_df, validation_table,
+        calibration_info, threshold_info, bootstrap_results,
+        config_sha, data_fingerprint
+    )
-    if register_model and ModelRegistry is not None and ModelArtifact is not None:
-        registry = ModelRegistry(
-            registry_path
-            if registry_path
-            else Path(model.output_manager.result_dir) / "model_registry.json"
-        )
-        tags = {str(k): str(v) for k, v in (registry_tags or {}).items()}
-        tags.update({
-            "model_key": str(model_key),
-            "task_type": str(task_type),
-            "data_path": str(data_path),
-            "data_sha256_prefix": str(data_fingerprint.get("sha256_prefix", "")),
-            "data_size": str(data_fingerprint.get("size", "")),
-            "data_mtime": str(data_fingerprint.get("mtime", "")),
-            "config_sha256": str(config_sha),
-        })
-        artifacts = []
-        trainer = model.trainers.get(model_key)
-        if trainer is not None:
-            try:
-                model_path = trainer.output.model_path(
-                    trainer._get_model_filename())
-                if os.path.exists(model_path):
-                    artifacts.append(ModelArtifact(
-                        path=model_path, description="trained model"))
-            except Exception:
-                pass
-        if report_path is not None:
-            artifacts.append(ModelArtifact(
-                path=str(report_path), description="model report"))
-        if metrics_path.exists():
-            artifacts.append(ModelArtifact(
-                path=str(metrics_path), description="metrics json"))
-        if bool(cfg.get("save_preprocess", False)):
-            artifact_path = cfg.get("preprocess_artifact_path")
-            if artifact_path:
-                preprocess_path = Path(str(artifact_path))
-                if not preprocess_path.is_absolute():
-                    preprocess_path = Path(
-                        model.output_manager.result_dir) / preprocess_path
-            else:
-                preprocess_path = Path(model.output_manager.result_path(
-                    f"{model.model_nme}_preprocess.json"
-                ))
-            if preprocess_path.exists():
-                artifacts.append(
-                    ModelArtifact(path=str(preprocess_path),
-                                  description="preprocess artifacts")
-                )
-        if bool(cfg.get("cache_predictions", False)):
-            cache_dir = cfg.get("prediction_cache_dir")
-            if cache_dir:
-                pred_root = Path(str(cache_dir))
-                if not pred_root.is_absolute():
-                    pred_root = Path(
-                        model.output_manager.result_dir) / pred_root
-            else:
-                pred_root = Path(
-                    model.output_manager.result_dir) / "predictions"
-            ext = "csv" if str(
-                cfg.get("prediction_cache_format", "parquet")).lower() == "csv" else "parquet"
-            for split_label in ("train", "test"):
-                pred_path = pred_root / \
-                    f"{model_name}_{model_key}_{split_label}.{ext}"
-                if pred_path.exists():
-                    artifacts.append(
-                        ModelArtifact(path=str(pred_path),
-                                      description=f"predictions {split_label}")
-                    )
-        registry.register(
-            name=str(model_name),
-            version=version,
-            metrics={k: float(v) for k, v in metrics.items()},
-            tags=tags,
-            artifacts=artifacts,
-            status=str(registry_status or "candidate"),
-            notes=f"model_key={model_key}",
+    # Register model
+    if register_model:
+        _register_model_to_registry(
+            model, model_name, model_key, version, metrics, task_type,
+            data_path, data_fingerprint, config_sha, registry_path,
+            registry_tags, registry_status, report_path, metrics_path, cfg
         )
-def train_from_config(args: argparse.Namespace) -> None:
-    script_dir = Path(__file__).resolve().parents[1]
-    config_path, cfg = resolve_and_load_config(
-        args.config_json,
-        script_dir,
-        required_keys=["data_dir", "model_list",
-                       "model_categories", "target", "weight"],
+def _evaluate_with_context(
+    model: ropt.BayesOptModel,
+    ctx: EvaluationContext,
+) -> None:
+    """Evaluate model predictions using context object.
+    This is a cleaner interface that uses the EvaluationContext dataclass
+    instead of 19+ individual parameters.
+    """
+    _evaluate_and_report(
+        model,
+        model_name=ctx.identity.model_name,
+        model_key=ctx.identity.model_key,
+        cfg=ctx.cfg,
+        data_path=ctx.data_path,
+        data_fingerprint=ctx.data_fingerprint.to_dict(),
+        report_output_dir=ctx.report.output_dir,
+        report_group_cols=ctx.report.group_cols,
+        report_time_col=ctx.report.time_col,
+        report_time_freq=ctx.report.time_freq,
+        report_time_ascending=ctx.report.time_ascending,
+        psi_report_df=ctx.psi_report_df,
+        calibration_cfg={
+            "enable": ctx.calibration.enable,
+            "method": ctx.calibration.method,
+            "max_rows": ctx.calibration.max_rows,
+            "seed": ctx.calibration.seed,
+        },
+        threshold_cfg={
+            "enable": ctx.threshold.enable,
+            "metric": ctx.threshold.metric,
+            "value": ctx.threshold.value,
+            "min_positive_rate": ctx.threshold.min_positive_rate,
+            "grid": ctx.threshold.grid,
+            "max_rows": ctx.threshold.max_rows,
+            "seed": ctx.threshold.seed,
+        },
+        bootstrap_cfg={
+            "enable": ctx.bootstrap.enable,
+            "metrics": ctx.bootstrap.metrics,
+            "n_samples": ctx.bootstrap.n_samples,
+            "ci": ctx.bootstrap.ci,
+            "seed": ctx.bootstrap.seed,
+        },
+        register_model=ctx.registry.register,
+        registry_path=ctx.registry.path,
+        registry_tags=ctx.registry.tags,
+        registry_status=ctx.registry.status,
+        run_id=ctx.run_id,
+        config_sha=ctx.config_sha,
     )
-    plot_requested = bool(args.plot_curves or cfg.get("plot_curves", False))
-    config_sha = hashlib.sha256(config_path.read_bytes()).hexdigest()
-    run_id = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
-    def _safe_int_env(key: str, default: int) -> int:
-        try:
-            return int(os.environ.get(key, default))
-        except (TypeError, ValueError):
-            return default
-    dist_world_size = _safe_int_env("WORLD_SIZE", 1)
-    dist_rank = _safe_int_env("RANK", 0)
-    dist_active = dist_world_size > 1
-    is_main_process = (not dist_active) or dist_rank == 0
+def _create_ddp_barrier(dist_ctx: TrainingContext):
+    """Create a DDP barrier function for distributed training synchronization."""
     def _ddp_barrier(reason: str) -> None:
-        if not dist_active:
+        if not dist_ctx.is_distributed:
             return
         torch_mod = getattr(ropt, "torch", None)
         dist_mod = getattr(torch_mod, "distributed", None)
@@ -928,6 +1033,28 @@ def train_from_config(args: argparse.Namespace) -> None:
         except Exception as exc:
             print(f"[DDP] barrier failed during {reason}: {exc}", flush=True)
             raise
+    return _ddp_barrier
+def train_from_config(args: argparse.Namespace) -> None:
+    script_dir = Path(__file__).resolve().parents[1]
+    config_path, cfg = resolve_and_load_config(
+        args.config_json,
+        script_dir,
+        required_keys=["data_dir", "model_list",
+                       "model_categories", "target", "weight"],
+    )
+    plot_requested = bool(args.plot_curves or cfg.get("plot_curves", False))
+    config_sha = hashlib.sha256(config_path.read_bytes()).hexdigest()
+    run_id = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
+    # Use TrainingContext for distributed training state
+    dist_ctx = TrainingContext.from_env()
+    dist_world_size = dist_ctx.world_size
+    dist_rank = dist_ctx.rank
+    dist_active = dist_ctx.is_distributed
+    is_main_process = dist_ctx.is_main_process
+    _ddp_barrier = _create_ddp_barrier(dist_ctx)
     data_dir, data_format, data_path_template, dtype_map = resolve_data_config(
         cfg,

ins-pricing 0.2.9__py3-none-any.whl → 0.3.0__py3-none-any.whl

ins-pricing 0.2.9py3-none-any.whl → 0.3.0py3-none-any.whl