PyPI - ins-pricing - Versions diffs - 0.1.6__py3-none-any.whl - Mend

ins-pricing 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

ins_pricing/README.md +60 -0
ins_pricing/__init__.py +102 -0
ins_pricing/governance/README.md +18 -0
ins_pricing/governance/__init__.py +20 -0
ins_pricing/governance/approval.py +93 -0
ins_pricing/governance/audit.py +37 -0
ins_pricing/governance/registry.py +99 -0
ins_pricing/governance/release.py +159 -0
ins_pricing/modelling/BayesOpt.py +146 -0
ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
ins_pricing/modelling/BayesOpt_entry.py +575 -0
ins_pricing/modelling/BayesOpt_incremental.py +731 -0
ins_pricing/modelling/Explain_Run.py +36 -0
ins_pricing/modelling/Explain_entry.py +539 -0
ins_pricing/modelling/Pricing_Run.py +36 -0
ins_pricing/modelling/README.md +33 -0
ins_pricing/modelling/__init__.py +44 -0
ins_pricing/modelling/bayesopt/__init__.py +98 -0
ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
ins_pricing/modelling/bayesopt/core.py +1476 -0
ins_pricing/modelling/bayesopt/models.py +2196 -0
ins_pricing/modelling/bayesopt/trainers.py +2446 -0
ins_pricing/modelling/bayesopt/utils.py +1021 -0
ins_pricing/modelling/cli_common.py +136 -0
ins_pricing/modelling/explain/__init__.py +55 -0
ins_pricing/modelling/explain/gradients.py +334 -0
ins_pricing/modelling/explain/metrics.py +176 -0
ins_pricing/modelling/explain/permutation.py +155 -0
ins_pricing/modelling/explain/shap_utils.py +146 -0
ins_pricing/modelling/notebook_utils.py +284 -0
ins_pricing/modelling/plotting/__init__.py +45 -0
ins_pricing/modelling/plotting/common.py +63 -0
ins_pricing/modelling/plotting/curves.py +572 -0
ins_pricing/modelling/plotting/diagnostics.py +139 -0
ins_pricing/modelling/plotting/geo.py +362 -0
ins_pricing/modelling/plotting/importance.py +121 -0
ins_pricing/modelling/run_logging.py +133 -0
ins_pricing/modelling/tests/conftest.py +8 -0
ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
ins_pricing/modelling/tests/test_explain.py +56 -0
ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
ins_pricing/modelling/tests/test_graph_cache.py +33 -0
ins_pricing/modelling/tests/test_plotting.py +63 -0
ins_pricing/modelling/tests/test_plotting_library.py +150 -0
ins_pricing/modelling/tests/test_preprocessor.py +48 -0
ins_pricing/modelling/watchdog_run.py +211 -0
ins_pricing/pricing/README.md +44 -0
ins_pricing/pricing/__init__.py +27 -0
ins_pricing/pricing/calibration.py +39 -0
ins_pricing/pricing/data_quality.py +117 -0
ins_pricing/pricing/exposure.py +85 -0
ins_pricing/pricing/factors.py +91 -0
ins_pricing/pricing/monitoring.py +99 -0
ins_pricing/pricing/rate_table.py +78 -0
ins_pricing/production/__init__.py +21 -0
ins_pricing/production/drift.py +30 -0
ins_pricing/production/monitoring.py +143 -0
ins_pricing/production/scoring.py +40 -0
ins_pricing/reporting/README.md +20 -0
ins_pricing/reporting/__init__.py +11 -0
ins_pricing/reporting/report_builder.py +72 -0
ins_pricing/reporting/scheduler.py +45 -0
ins_pricing/setup.py +41 -0
ins_pricing v2/__init__.py +23 -0
ins_pricing v2/governance/__init__.py +20 -0
ins_pricing v2/governance/approval.py +93 -0
ins_pricing v2/governance/audit.py +37 -0
ins_pricing v2/governance/registry.py +99 -0
ins_pricing v2/governance/release.py +159 -0
ins_pricing v2/modelling/Explain_Run.py +36 -0
ins_pricing v2/modelling/Pricing_Run.py +36 -0
ins_pricing v2/modelling/__init__.py +151 -0
ins_pricing v2/modelling/cli_common.py +141 -0
ins_pricing v2/modelling/config.py +249 -0
ins_pricing v2/modelling/config_preprocess.py +254 -0
ins_pricing v2/modelling/core.py +741 -0
ins_pricing v2/modelling/data_container.py +42 -0
ins_pricing v2/modelling/explain/__init__.py +55 -0
ins_pricing v2/modelling/explain/gradients.py +334 -0
ins_pricing v2/modelling/explain/metrics.py +176 -0
ins_pricing v2/modelling/explain/permutation.py +155 -0
ins_pricing v2/modelling/explain/shap_utils.py +146 -0
ins_pricing v2/modelling/features.py +215 -0
ins_pricing v2/modelling/model_manager.py +148 -0
ins_pricing v2/modelling/model_plotting.py +463 -0
ins_pricing v2/modelling/models.py +2203 -0
ins_pricing v2/modelling/notebook_utils.py +294 -0
ins_pricing v2/modelling/plotting/__init__.py +45 -0
ins_pricing v2/modelling/plotting/common.py +63 -0
ins_pricing v2/modelling/plotting/curves.py +572 -0
ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
ins_pricing v2/modelling/plotting/geo.py +362 -0
ins_pricing v2/modelling/plotting/importance.py +121 -0
ins_pricing v2/modelling/run_logging.py +133 -0
ins_pricing v2/modelling/tests/conftest.py +8 -0
ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
ins_pricing v2/modelling/tests/test_explain.py +56 -0
ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
ins_pricing v2/modelling/tests/test_plotting.py +63 -0
ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
ins_pricing v2/modelling/trainers.py +2447 -0
ins_pricing v2/modelling/utils.py +1020 -0
ins_pricing v2/modelling/watchdog_run.py +211 -0
ins_pricing v2/pricing/__init__.py +27 -0
ins_pricing v2/pricing/calibration.py +39 -0
ins_pricing v2/pricing/data_quality.py +117 -0
ins_pricing v2/pricing/exposure.py +85 -0
ins_pricing v2/pricing/factors.py +91 -0
ins_pricing v2/pricing/monitoring.py +99 -0
ins_pricing v2/pricing/rate_table.py +78 -0
ins_pricing v2/production/__init__.py +21 -0
ins_pricing v2/production/drift.py +30 -0
ins_pricing v2/production/monitoring.py +143 -0
ins_pricing v2/production/scoring.py +40 -0
ins_pricing v2/reporting/__init__.py +11 -0
ins_pricing v2/reporting/report_builder.py +72 -0
ins_pricing v2/reporting/scheduler.py +45 -0
ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
ins_pricing v2/scripts/Explain_entry.py +545 -0
ins_pricing v2/scripts/__init__.py +1 -0
ins_pricing v2/scripts/train.py +568 -0
ins_pricing v2/setup.py +55 -0
ins_pricing v2/smoke_test.py +28 -0
ins_pricing-0.1.6.dist-info/METADATA +78 -0
ins_pricing-0.1.6.dist-info/RECORD +169 -0
ins_pricing-0.1.6.dist-info/WHEEL +5 -0
ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
user_packages/__init__.py +105 -0
user_packages legacy/BayesOpt.py +5659 -0
user_packages legacy/BayesOpt_entry.py +513 -0
user_packages legacy/BayesOpt_incremental.py +685 -0
user_packages legacy/Pricing_Run.py +36 -0
user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
user_packages legacy/Try/BayesOpt legacy.py +3280 -0
user_packages legacy/Try/BayesOpt.py +838 -0
user_packages legacy/Try/BayesOptAll.py +1569 -0
user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
user_packages legacy/Try/BayesOptSearch.py +830 -0
user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
user_packages legacy/Try/BayesOptV1.py +1911 -0
user_packages legacy/Try/BayesOptV10.py +2973 -0
user_packages legacy/Try/BayesOptV11.py +3001 -0
user_packages legacy/Try/BayesOptV12.py +3001 -0
user_packages legacy/Try/BayesOptV2.py +2065 -0
user_packages legacy/Try/BayesOptV3.py +2209 -0
user_packages legacy/Try/BayesOptV4.py +2342 -0
user_packages legacy/Try/BayesOptV5.py +2372 -0
user_packages legacy/Try/BayesOptV6.py +2759 -0
user_packages legacy/Try/BayesOptV7.py +2832 -0
user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
user_packages legacy/Try/BayesOptV9.py +2927 -0
user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
user_packages legacy/Try/xgbbayesopt.py +523 -0
user_packages legacy/__init__.py +19 -0
user_packages legacy/cli_common.py +124 -0
user_packages legacy/notebook_utils.py +228 -0
user_packages legacy/watchdog_run.py +202 -0

ins_pricing v2/modelling/plotting/curves.py ADDED Viewed

@@ -0,0 +1,572 @@
+from __future__ import annotations
+from typing import Mapping, Optional, Sequence, Tuple
+import numpy as np
+import pandas as pd
+from .common import EPS, PlotStyle, finalize_figure, plt
+try:  # optional dependency guard
+    from sklearn.metrics import (
+        auc,
+        average_precision_score,
+        precision_recall_curve,
+        roc_curve,
+    )
+    from sklearn.calibration import calibration_curve
+except Exception:  # pragma: no cover - handled at call time
+    auc = None
+    average_precision_score = None
+    precision_recall_curve = None
+    roc_curve = None
+    calibration_curve = None
+def _require_sklearn(func_name: str) -> None:
+    if roc_curve is None or auc is None:
+        raise RuntimeError(f"{func_name} requires scikit-learn to be installed.")
+def _to_1d(values: Sequence[float], name: str) -> np.ndarray:
+    arr = np.asarray(values, dtype=float).reshape(-1)
+    if arr.size == 0:
+        raise ValueError(f"{name} is empty.")
+    return arr
+def _align_arrays(
+    pred: Sequence[float],
+    actual: Sequence[float],
+    weight: Optional[Sequence[float]] = None,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    pred_arr = _to_1d(pred, "pred")
+    actual_arr = _to_1d(actual, "actual")
+    if len(pred_arr) != len(actual_arr):
+        raise ValueError("pred and actual must have the same length.")
+    if weight is None:
+        weight_arr = np.ones_like(pred_arr, dtype=float)
+    else:
+        weight_arr = _to_1d(weight, "weight")
+        if len(weight_arr) != len(pred_arr):
+            raise ValueError("weight must have the same length as pred.")
+    mask = np.isfinite(pred_arr) & np.isfinite(actual_arr) & np.isfinite(weight_arr)
+    pred_arr = pred_arr[mask]
+    actual_arr = actual_arr[mask]
+    weight_arr = weight_arr[mask]
+    return pred_arr, actual_arr, weight_arr
+def _bin_by_weight(
+    data: pd.DataFrame,
+    *,
+    sort_col: str,
+    weight_col: str,
+    n_bins: int,
+) -> pd.DataFrame:
+    n_bins = max(1, int(n_bins))
+    data_sorted = data.sort_values(by=sort_col, ascending=True).copy()
+    weight_sum = float(data_sorted[weight_col].sum())
+    if weight_sum <= EPS:
+        data_sorted.loc[:, "bins"] = 0
+    else:
+        data_sorted.loc[:, "cum_weight"] = data_sorted[weight_col].cumsum()
+        data_sorted.loc[:, "bins"] = np.floor(
+            data_sorted["cum_weight"] * float(n_bins) / weight_sum
+        )
+        data_sorted.loc[data_sorted["bins"] == n_bins, "bins"] = n_bins - 1
+    return data_sorted.groupby(["bins"], observed=True).sum(numeric_only=True)
+def lift_table(
+    pred: Sequence[float],
+    actual: Sequence[float],
+    weight: Optional[Sequence[float]] = None,
+    *,
+    n_bins: int = 10,
+    pred_weighted: bool = False,
+    actual_weighted: bool = True,
+) -> pd.DataFrame:
+    """Compute lift table for a single model.
+    pred/actual should be 1d arrays. If pred_weighted/actual_weighted is True,
+    the value is already multiplied by weight and will not be re-weighted.
+    """
+    pred_arr, actual_arr, weight_arr = _align_arrays(pred, actual, weight)
+    weight_safe = np.maximum(weight_arr, EPS)
+    if pred_weighted:
+        pred_raw = pred_arr / weight_safe
+        w_pred = pred_arr
+    else:
+        pred_raw = pred_arr
+        w_pred = pred_arr * weight_arr
+    if actual_weighted:
+        w_act = actual_arr
+    else:
+        w_act = actual_arr * weight_arr
+    lift_df = pd.DataFrame(
+        {
+            "pred_sort": pred_raw,
+            "w_pred": w_pred,
+            "act": w_act,
+            "weight": weight_arr,
+        }
+    )
+    plot_data = _bin_by_weight(
+        lift_df, sort_col="pred_sort", weight_col="weight", n_bins=n_bins
+    )
+    denom = np.maximum(plot_data["weight"], EPS)
+    plot_data["exp_v"] = plot_data["w_pred"] / denom
+    plot_data["act_v"] = plot_data["act"] / denom
+    plot_data.reset_index(inplace=True)
+    return plot_data
+def plot_lift_curve(
+    pred: Sequence[float],
+    actual: Sequence[float],
+    weight: Optional[Sequence[float]] = None,
+    *,
+    n_bins: int = 10,
+    title: str = "Lift Chart",
+    pred_label: str = "Predicted",
+    act_label: str = "Actual",
+    weight_label: str = "Earned Exposure",
+    pred_weighted: bool = False,
+    actual_weighted: bool = True,
+    ax: Optional[plt.Axes] = None,
+    show: bool = False,
+    save_path: Optional[str] = None,
+    style: Optional[PlotStyle] = None,
+) -> plt.Figure:
+    style = style or PlotStyle()
+    plot_data = lift_table(
+        pred,
+        actual,
+        weight,
+        n_bins=n_bins,
+        pred_weighted=pred_weighted,
+        actual_weighted=actual_weighted,
+    )
+    created_fig = ax is None
+    if created_fig:
+        fig, ax = plt.subplots(figsize=style.figsize)
+    else:
+        fig = ax.figure
+    ax.plot(plot_data.index, plot_data["act_v"], label=act_label, color="red")
+    ax.plot(plot_data.index, plot_data["exp_v"], label=pred_label, color="blue")
+    ax.set_title(title, fontsize=style.title_size)
+    ax.set_xticks(plot_data.index)
+    ax.set_xticklabels(plot_data.index, rotation=90, fontsize=style.tick_size)
+    ax.tick_params(axis="y", labelsize=style.tick_size)
+    if style.grid:
+        ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
+    ax.legend(loc="upper left", fontsize=style.legend_size, frameon=False)
+    ax.margins(0.05)
+    ax2 = ax.twinx()
+    ax2.bar(
+        plot_data.index,
+        plot_data["weight"],
+        alpha=0.5,
+        color=style.weight_color,
+        label=weight_label,
+    )
+    ax2.tick_params(axis="y", labelsize=style.tick_size)
+    ax2.legend(loc="upper right", fontsize=style.legend_size, frameon=False)
+    if created_fig:
+        finalize_figure(fig, save_path=save_path, show=show, style=style)
+    return fig
+def double_lift_table(
+    pred1: Sequence[float],
+    pred2: Sequence[float],
+    actual: Sequence[float],
+    weight: Optional[Sequence[float]] = None,
+    *,
+    n_bins: int = 10,
+    pred1_weighted: bool = False,
+    pred2_weighted: bool = False,
+    actual_weighted: bool = True,
+) -> pd.DataFrame:
+    pred1_arr, actual_arr, weight_arr = _align_arrays(pred1, actual, weight)
+    pred2_arr, _, _ = _align_arrays(pred2, actual, weight_arr)
+    weight_safe = np.maximum(weight_arr, EPS)
+    pred1_raw = pred1_arr / weight_safe if pred1_weighted else pred1_arr
+    pred2_raw = pred2_arr / weight_safe if pred2_weighted else pred2_arr
+    w_pred1 = pred1_raw * weight_arr
+    w_pred2 = pred2_raw * weight_arr
+    w_act = actual_arr if actual_weighted else actual_arr * weight_arr
+    lift_df = pd.DataFrame(
+        {
+            "diff_ly": pred1_raw / np.maximum(pred2_raw, EPS),
+            "pred1": w_pred1,
+            "pred2": w_pred2,
+            "act": w_act,
+            "weight": weight_arr,
+        }
+    )
+    plot_data = _bin_by_weight(
+        lift_df, sort_col="diff_ly", weight_col="weight", n_bins=n_bins
+    )
+    denom = np.maximum(plot_data["act"], EPS)
+    plot_data["exp_v1"] = plot_data["pred1"] / denom
+    plot_data["exp_v2"] = plot_data["pred2"] / denom
+    plot_data["act_v"] = plot_data["act"] / denom
+    plot_data.reset_index(inplace=True)
+    return plot_data
+def plot_double_lift_curve(
+    pred1: Sequence[float],
+    pred2: Sequence[float],
+    actual: Sequence[float],
+    weight: Optional[Sequence[float]] = None,
+    *,
+    n_bins: int = 10,
+    title: str = "Double Lift Chart",
+    label1: str = "Model 1",
+    label2: str = "Model 2",
+    act_label: str = "Actual",
+    weight_label: str = "Earned Exposure",
+    pred1_weighted: bool = False,
+    pred2_weighted: bool = False,
+    actual_weighted: bool = True,
+    ax: Optional[plt.Axes] = None,
+    show: bool = False,
+    save_path: Optional[str] = None,
+    style: Optional[PlotStyle] = None,
+) -> plt.Figure:
+    style = style or PlotStyle()
+    plot_data = double_lift_table(
+        pred1,
+        pred2,
+        actual,
+        weight,
+        n_bins=n_bins,
+        pred1_weighted=pred1_weighted,
+        pred2_weighted=pred2_weighted,
+        actual_weighted=actual_weighted,
+    )
+    created_fig = ax is None
+    if created_fig:
+        fig, ax = plt.subplots(figsize=style.figsize)
+    else:
+        fig = ax.figure
+    ax.plot(plot_data.index, plot_data["act_v"], label=act_label, color="red")
+    ax.plot(plot_data.index, plot_data["exp_v1"], label=label1, color="blue")
+    ax.plot(plot_data.index, plot_data["exp_v2"], label=label2, color="black")
+    ax.set_title(title, fontsize=style.title_size)
+    ax.set_xticks(plot_data.index)
+    ax.set_xticklabels(plot_data.index, rotation=90, fontsize=style.tick_size)
+    ax.set_xlabel(f"{label1} / {label2}", fontsize=style.label_size)
+    ax.tick_params(axis="y", labelsize=style.tick_size)
+    if style.grid:
+        ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
+    ax.legend(loc="upper left", fontsize=style.legend_size, frameon=False)
+    ax.margins(0.1)
+    ax2 = ax.twinx()
+    ax2.bar(
+        plot_data.index,
+        plot_data["weight"],
+        alpha=0.5,
+        color=style.weight_color,
+        label=weight_label,
+    )
+    ax2.tick_params(axis="y", labelsize=style.tick_size)
+    ax2.legend(loc="upper right", fontsize=style.legend_size, frameon=False)
+    if created_fig:
+        finalize_figure(fig, save_path=save_path, show=show, style=style)
+    return fig
+def plot_roc_curves(
+    y_true: Sequence[float],
+    scores: Mapping[str, Sequence[float]],
+    *,
+    weight: Optional[Sequence[float]] = None,
+    title: str = "ROC Curve",
+    ax: Optional[plt.Axes] = None,
+    show: bool = False,
+    save_path: Optional[str] = None,
+    style: Optional[PlotStyle] = None,
+) -> plt.Figure:
+    _require_sklearn("plot_roc_curves")
+    style = style or PlotStyle()
+    created_fig = ax is None
+    if created_fig:
+        fig, ax = plt.subplots(figsize=style.figsize)
+    else:
+        fig = ax.figure
+    for idx, (label, score) in enumerate(scores.items()):
+        s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
+        try:
+            fpr, tpr, _ = roc_curve(y_arr, s_arr, sample_weight=w_arr)
+        except TypeError:
+            fpr, tpr, _ = roc_curve(y_arr, s_arr)
+        auc_val = auc(fpr, tpr)
+        color = style.palette[idx % len(style.palette)]
+        ax.plot(fpr, tpr, color=color, label=f"{label} (AUC={auc_val:.3f})")
+    ax.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1)
+    ax.set_xlabel("False Positive Rate", fontsize=style.label_size)
+    ax.set_ylabel("True Positive Rate", fontsize=style.label_size)
+    ax.set_title(title, fontsize=style.title_size)
+    ax.tick_params(axis="both", labelsize=style.tick_size)
+    if style.grid:
+        ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
+    ax.legend(loc="lower right", fontsize=style.legend_size, frameon=False)
+    if created_fig:
+        finalize_figure(fig, save_path=save_path, show=show, style=style)
+    return fig
+def plot_pr_curves(
+    y_true: Sequence[float],
+    scores: Mapping[str, Sequence[float]],
+    *,
+    weight: Optional[Sequence[float]] = None,
+    title: str = "Precision-Recall Curve",
+    ax: Optional[plt.Axes] = None,
+    show: bool = False,
+    save_path: Optional[str] = None,
+    style: Optional[PlotStyle] = None,
+) -> plt.Figure:
+    if precision_recall_curve is None or average_precision_score is None:
+        raise RuntimeError("plot_pr_curves requires scikit-learn to be installed.")
+    style = style or PlotStyle()
+    created_fig = ax is None
+    if created_fig:
+        fig, ax = plt.subplots(figsize=style.figsize)
+    else:
+        fig = ax.figure
+    for idx, (label, score) in enumerate(scores.items()):
+        s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
+        try:
+            precision, recall, _ = precision_recall_curve(
+                y_arr, s_arr, sample_weight=w_arr
+            )
+            ap = average_precision_score(y_arr, s_arr, sample_weight=w_arr)
+        except TypeError:
+            precision, recall, _ = precision_recall_curve(y_arr, s_arr)
+            ap = average_precision_score(y_arr, s_arr)
+        color = style.palette[idx % len(style.palette)]
+        ax.plot(recall, precision, color=color, label=f"{label} (AP={ap:.3f})")
+    ax.set_xlabel("Recall", fontsize=style.label_size)
+    ax.set_ylabel("Precision", fontsize=style.label_size)
+    ax.set_title(title, fontsize=style.title_size)
+    ax.tick_params(axis="both", labelsize=style.tick_size)
+    if style.grid:
+        ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
+    ax.legend(loc="lower left", fontsize=style.legend_size, frameon=False)
+    if created_fig:
+        finalize_figure(fig, save_path=save_path, show=show, style=style)
+    return fig
+def plot_ks_curve(
+    y_true: Sequence[float],
+    score: Sequence[float],
+    *,
+    weight: Optional[Sequence[float]] = None,
+    title: str = "KS Curve",
+    ax: Optional[plt.Axes] = None,
+    show: bool = False,
+    save_path: Optional[str] = None,
+    style: Optional[PlotStyle] = None,
+) -> plt.Figure:
+    _require_sklearn("plot_ks_curve")
+    style = style or PlotStyle()
+    s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
+    try:
+        fpr, tpr, thresholds = roc_curve(y_arr, s_arr, sample_weight=w_arr)
+    except TypeError:
+        fpr, tpr, thresholds = roc_curve(y_arr, s_arr)
+    ks_vals = tpr - fpr
+    ks_idx = int(np.argmax(ks_vals))
+    ks_val = float(ks_vals[ks_idx])
+    created_fig = ax is None
+    if created_fig:
+        fig, ax = plt.subplots(figsize=style.figsize)
+    else:
+        fig = ax.figure
+    ax.plot(thresholds, tpr, label="TPR", color=style.palette[0])
+    ax.plot(thresholds, fpr, label="FPR", color=style.palette[1])
+    ax.plot(thresholds, ks_vals, label=f"KS={ks_val:.3f}", color=style.palette[3])
+    ax.set_title(title, fontsize=style.title_size)
+    ax.set_xlabel("Threshold", fontsize=style.label_size)
+    ax.set_ylabel("Rate", fontsize=style.label_size)
+    ax.tick_params(axis="both", labelsize=style.tick_size)
+    if style.grid:
+        ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
+    ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
+    if created_fig:
+        finalize_figure(fig, save_path=save_path, show=show, style=style)
+    return fig
+def plot_calibration_curve(
+    y_true: Sequence[float],
+    score: Sequence[float],
+    *,
+    weight: Optional[Sequence[float]] = None,
+    n_bins: int = 10,
+    title: str = "Calibration Curve",
+    ax: Optional[plt.Axes] = None,
+    show: bool = False,
+    save_path: Optional[str] = None,
+    style: Optional[PlotStyle] = None,
+) -> plt.Figure:
+    if calibration_curve is None:
+        raise RuntimeError("plot_calibration_curve requires scikit-learn to be installed.")
+    style = style or PlotStyle()
+    s_arr, y_arr, w_arr = _align_arrays(score, y_true, weight)
+    try:
+        prob_true, prob_pred = calibration_curve(
+            y_arr,
+            s_arr,
+            n_bins=max(2, int(n_bins)),
+            strategy="quantile",
+            sample_weight=w_arr,
+        )
+    except TypeError:
+        prob_true, prob_pred = calibration_curve(
+            y_arr,
+            s_arr,
+            n_bins=max(2, int(n_bins)),
+            strategy="quantile",
+        )
+    created_fig = ax is None
+    if created_fig:
+        fig, ax = plt.subplots(figsize=style.figsize)
+    else:
+        fig = ax.figure
+    ax.plot(prob_pred, prob_true, marker="o", label="Observed")
+    ax.plot([0, 1], [0, 1], linestyle="--", color="gray", linewidth=1, label="Ideal")
+    ax.set_xlabel("Mean Predicted", fontsize=style.label_size)
+    ax.set_ylabel("Mean Observed", fontsize=style.label_size)
+    ax.set_title(title, fontsize=style.title_size)
+    ax.tick_params(axis="both", labelsize=style.tick_size)
+    if style.grid:
+        ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
+    ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
+    if created_fig:
+        finalize_figure(fig, save_path=save_path, show=show, style=style)
+    return fig
+def plot_conversion_lift(
+    pred: Sequence[float],
+    actual_binary: Sequence[float],
+    weight: Optional[Sequence[float]] = None,
+    *,
+    n_bins: int = 20,
+    title: str = "Conversion Lift",
+    ax: Optional[plt.Axes] = None,
+    show: bool = False,
+    save_path: Optional[str] = None,
+    style: Optional[PlotStyle] = None,
+) -> plt.Figure:
+    style = style or PlotStyle()
+    pred_arr, actual_arr, weight_arr = _align_arrays(pred, actual_binary, weight)
+    data = pd.DataFrame(
+        {
+            "pred": pred_arr,
+            "actual": actual_arr,
+            "weight": weight_arr,
+        }
+    )
+    data = data.sort_values(by="pred", ascending=True).copy()
+    data["cum_weight"] = data["weight"].cumsum()
+    total_weight = float(data["weight"].sum())
+    if total_weight > EPS:
+        data["bin"] = pd.cut(
+            data["cum_weight"],
+            bins=max(2, int(n_bins)),
+            labels=False,
+            right=False,
+        )
+    else:
+        data["bin"] = 0
+    data["weighted_actual"] = data["actual"] * data["weight"]
+    lift_agg = data.groupby("bin", observed=True).agg(
+        total_weight=("weight", "sum"),
+        weighted_actual=("weighted_actual", "sum"),
+    )
+    lift_agg = lift_agg.reset_index()
+    lift_agg["conversion_rate"] = lift_agg["weighted_actual"] / np.maximum(
+        lift_agg["total_weight"], EPS
+    )
+    overall_rate = float(lift_agg["weighted_actual"].sum()) / max(total_weight, EPS)
+    created_fig = ax is None
+    if created_fig:
+        fig, ax = plt.subplots(figsize=style.figsize)
+    else:
+        fig = ax.figure
+    ax.axhline(
+        y=overall_rate,
+        color="gray",
+        linestyle="--",
+        label=f"Overall ({overall_rate:.2%})",
+    )
+    ax.plot(
+        lift_agg["bin"],
+        lift_agg["conversion_rate"],
+        marker="o",
+        linestyle="-",
+        label="Actual Rate",
+    )
+    ax.set_title(title, fontsize=style.title_size)
+    ax.set_xlabel("Score Bin", fontsize=style.label_size)
+    ax.set_ylabel("Conversion Rate", fontsize=style.label_size)
+    ax.tick_params(axis="both", labelsize=style.tick_size)
+    if style.grid:
+        ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
+    ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
+    if created_fig:
+        finalize_figure(fig, save_path=save_path, show=show, style=style)
+    return fig

ins_pricing v2/modelling/plotting/diagnostics.py ADDED Viewed

@@ -0,0 +1,139 @@
+from __future__ import annotations
+from typing import Mapping, Optional, Sequence
+import numpy as np
+import pandas as pd
+from .common import EPS, PlotStyle, finalize_figure, plt
+def plot_loss_curve(
+    *,
+    history: Optional[Mapping[str, Sequence[float]]] = None,
+    train: Optional[Sequence[float]] = None,
+    val: Optional[Sequence[float]] = None,
+    title: str = "Loss vs. Epoch",
+    ax: Optional[plt.Axes] = None,
+    show: bool = False,
+    save_path: Optional[str] = None,
+    style: Optional[PlotStyle] = None,
+) -> Optional[plt.Figure]:
+    style = style or PlotStyle()
+    if history is not None:
+        if train is None:
+            train = history.get("train")
+        if val is None:
+            val = history.get("val")
+    train_hist = list(train or [])
+    val_hist = list(val or [])
+    if not train_hist and not val_hist:
+        return None
+    created_fig = ax is None
+    if created_fig:
+        fig, ax = plt.subplots(figsize=style.figsize)
+    else:
+        fig = ax.figure
+    if train_hist:
+        ax.plot(
+            range(1, len(train_hist) + 1),
+            train_hist,
+            label="Train Loss",
+            color="tab:blue",
+        )
+    if val_hist:
+        ax.plot(
+            range(1, len(val_hist) + 1),
+            val_hist,
+            label="Validation Loss",
+            color="tab:orange",
+        )
+    ax.set_xlabel("Epoch", fontsize=style.label_size)
+    ax.set_ylabel("Weighted Loss", fontsize=style.label_size)
+    ax.set_title(title, fontsize=style.title_size)
+    ax.tick_params(axis="both", labelsize=style.tick_size)
+    if style.grid:
+        ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
+    ax.legend(loc="best", fontsize=style.legend_size, frameon=False)
+    if created_fig:
+        finalize_figure(fig, save_path=save_path, show=show, style=style)
+    return fig
+def plot_oneway(
+    df: pd.DataFrame,
+    *,
+    feature: str,
+    weight_col: str,
+    target_col: str,
+    n_bins: int = 10,
+    is_categorical: bool = False,
+    title: Optional[str] = None,
+    ax: Optional[plt.Axes] = None,
+    show: bool = False,
+    save_path: Optional[str] = None,
+    style: Optional[PlotStyle] = None,
+) -> Optional[plt.Figure]:
+    if feature not in df.columns:
+        raise KeyError(f"feature '{feature}' not found in data.")
+    if weight_col not in df.columns:
+        raise KeyError(f"weight_col '{weight_col}' not found in data.")
+    if target_col not in df.columns:
+        raise KeyError(f"target_col '{target_col}' not found in data.")
+    style = style or PlotStyle()
+    title = title or f"Analysis of {feature}"
+    if is_categorical:
+        group_col = feature
+        plot_source = df
+    else:
+        group_col = f"{feature}_bins"
+        series = pd.to_numeric(df[feature], errors="coerce")
+        try:
+            bins = pd.qcut(series, n_bins, duplicates="drop")
+        except ValueError:
+            bins = pd.cut(series, bins=max(1, int(n_bins)), duplicates="drop")
+        plot_source = df.assign(**{group_col: bins})
+    plot_data = plot_source.groupby([group_col], observed=True).sum(numeric_only=True)
+    plot_data.reset_index(inplace=True)
+    denom = np.maximum(plot_data[weight_col].to_numpy(dtype=float), EPS)
+    plot_data["act_v"] = plot_data[target_col].to_numpy(dtype=float) / denom
+    created_fig = ax is None
+    if created_fig:
+        fig, ax = plt.subplots(figsize=style.figsize)
+    else:
+        fig = ax.figure
+    ax.plot(plot_data.index, plot_data["act_v"], label="Actual", color="red")
+    ax.set_title(title, fontsize=style.title_size)
+    ax.set_xticks(plot_data.index)
+    labels = plot_data[group_col].astype(str).tolist()
+    tick_size = 3 if len(labels) > 50 else style.tick_size
+    ax.set_xticklabels(labels, rotation=90, fontsize=tick_size)
+    ax.tick_params(axis="y", labelsize=style.tick_size)
+    if style.grid:
+        ax.grid(True, linestyle=style.grid_style, alpha=style.grid_alpha)
+    ax2 = ax.twinx()
+    ax2.bar(
+        plot_data.index,
+        plot_data[weight_col],
+        alpha=0.5,
+        color=style.weight_color,
+    )
+    ax2.tick_params(axis="y", labelsize=style.tick_size)
+    if created_fig:
+        finalize_figure(fig, save_path=save_path, show=show, style=style)
+    return fig