ins-pricing 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +60 -0
- ins_pricing/__init__.py +102 -0
- ins_pricing/governance/README.md +18 -0
- ins_pricing/governance/__init__.py +20 -0
- ins_pricing/governance/approval.py +93 -0
- ins_pricing/governance/audit.py +37 -0
- ins_pricing/governance/registry.py +99 -0
- ins_pricing/governance/release.py +159 -0
- ins_pricing/modelling/BayesOpt.py +146 -0
- ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
- ins_pricing/modelling/BayesOpt_entry.py +575 -0
- ins_pricing/modelling/BayesOpt_incremental.py +731 -0
- ins_pricing/modelling/Explain_Run.py +36 -0
- ins_pricing/modelling/Explain_entry.py +539 -0
- ins_pricing/modelling/Pricing_Run.py +36 -0
- ins_pricing/modelling/README.md +33 -0
- ins_pricing/modelling/__init__.py +44 -0
- ins_pricing/modelling/bayesopt/__init__.py +98 -0
- ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
- ins_pricing/modelling/bayesopt/core.py +1476 -0
- ins_pricing/modelling/bayesopt/models.py +2196 -0
- ins_pricing/modelling/bayesopt/trainers.py +2446 -0
- ins_pricing/modelling/bayesopt/utils.py +1021 -0
- ins_pricing/modelling/cli_common.py +136 -0
- ins_pricing/modelling/explain/__init__.py +55 -0
- ins_pricing/modelling/explain/gradients.py +334 -0
- ins_pricing/modelling/explain/metrics.py +176 -0
- ins_pricing/modelling/explain/permutation.py +155 -0
- ins_pricing/modelling/explain/shap_utils.py +146 -0
- ins_pricing/modelling/notebook_utils.py +284 -0
- ins_pricing/modelling/plotting/__init__.py +45 -0
- ins_pricing/modelling/plotting/common.py +63 -0
- ins_pricing/modelling/plotting/curves.py +572 -0
- ins_pricing/modelling/plotting/diagnostics.py +139 -0
- ins_pricing/modelling/plotting/geo.py +362 -0
- ins_pricing/modelling/plotting/importance.py +121 -0
- ins_pricing/modelling/run_logging.py +133 -0
- ins_pricing/modelling/tests/conftest.py +8 -0
- ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing/modelling/tests/test_explain.py +56 -0
- ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing/modelling/tests/test_plotting.py +63 -0
- ins_pricing/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing/modelling/watchdog_run.py +211 -0
- ins_pricing/pricing/README.md +44 -0
- ins_pricing/pricing/__init__.py +27 -0
- ins_pricing/pricing/calibration.py +39 -0
- ins_pricing/pricing/data_quality.py +117 -0
- ins_pricing/pricing/exposure.py +85 -0
- ins_pricing/pricing/factors.py +91 -0
- ins_pricing/pricing/monitoring.py +99 -0
- ins_pricing/pricing/rate_table.py +78 -0
- ins_pricing/production/__init__.py +21 -0
- ins_pricing/production/drift.py +30 -0
- ins_pricing/production/monitoring.py +143 -0
- ins_pricing/production/scoring.py +40 -0
- ins_pricing/reporting/README.md +20 -0
- ins_pricing/reporting/__init__.py +11 -0
- ins_pricing/reporting/report_builder.py +72 -0
- ins_pricing/reporting/scheduler.py +45 -0
- ins_pricing/setup.py +41 -0
- ins_pricing v2/__init__.py +23 -0
- ins_pricing v2/governance/__init__.py +20 -0
- ins_pricing v2/governance/approval.py +93 -0
- ins_pricing v2/governance/audit.py +37 -0
- ins_pricing v2/governance/registry.py +99 -0
- ins_pricing v2/governance/release.py +159 -0
- ins_pricing v2/modelling/Explain_Run.py +36 -0
- ins_pricing v2/modelling/Pricing_Run.py +36 -0
- ins_pricing v2/modelling/__init__.py +151 -0
- ins_pricing v2/modelling/cli_common.py +141 -0
- ins_pricing v2/modelling/config.py +249 -0
- ins_pricing v2/modelling/config_preprocess.py +254 -0
- ins_pricing v2/modelling/core.py +741 -0
- ins_pricing v2/modelling/data_container.py +42 -0
- ins_pricing v2/modelling/explain/__init__.py +55 -0
- ins_pricing v2/modelling/explain/gradients.py +334 -0
- ins_pricing v2/modelling/explain/metrics.py +176 -0
- ins_pricing v2/modelling/explain/permutation.py +155 -0
- ins_pricing v2/modelling/explain/shap_utils.py +146 -0
- ins_pricing v2/modelling/features.py +215 -0
- ins_pricing v2/modelling/model_manager.py +148 -0
- ins_pricing v2/modelling/model_plotting.py +463 -0
- ins_pricing v2/modelling/models.py +2203 -0
- ins_pricing v2/modelling/notebook_utils.py +294 -0
- ins_pricing v2/modelling/plotting/__init__.py +45 -0
- ins_pricing v2/modelling/plotting/common.py +63 -0
- ins_pricing v2/modelling/plotting/curves.py +572 -0
- ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
- ins_pricing v2/modelling/plotting/geo.py +362 -0
- ins_pricing v2/modelling/plotting/importance.py +121 -0
- ins_pricing v2/modelling/run_logging.py +133 -0
- ins_pricing v2/modelling/tests/conftest.py +8 -0
- ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing v2/modelling/tests/test_explain.py +56 -0
- ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing v2/modelling/tests/test_plotting.py +63 -0
- ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing v2/modelling/trainers.py +2447 -0
- ins_pricing v2/modelling/utils.py +1020 -0
- ins_pricing v2/modelling/watchdog_run.py +211 -0
- ins_pricing v2/pricing/__init__.py +27 -0
- ins_pricing v2/pricing/calibration.py +39 -0
- ins_pricing v2/pricing/data_quality.py +117 -0
- ins_pricing v2/pricing/exposure.py +85 -0
- ins_pricing v2/pricing/factors.py +91 -0
- ins_pricing v2/pricing/monitoring.py +99 -0
- ins_pricing v2/pricing/rate_table.py +78 -0
- ins_pricing v2/production/__init__.py +21 -0
- ins_pricing v2/production/drift.py +30 -0
- ins_pricing v2/production/monitoring.py +143 -0
- ins_pricing v2/production/scoring.py +40 -0
- ins_pricing v2/reporting/__init__.py +11 -0
- ins_pricing v2/reporting/report_builder.py +72 -0
- ins_pricing v2/reporting/scheduler.py +45 -0
- ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
- ins_pricing v2/scripts/Explain_entry.py +545 -0
- ins_pricing v2/scripts/__init__.py +1 -0
- ins_pricing v2/scripts/train.py +568 -0
- ins_pricing v2/setup.py +55 -0
- ins_pricing v2/smoke_test.py +28 -0
- ins_pricing-0.1.6.dist-info/METADATA +78 -0
- ins_pricing-0.1.6.dist-info/RECORD +169 -0
- ins_pricing-0.1.6.dist-info/WHEEL +5 -0
- ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
- user_packages/__init__.py +105 -0
- user_packages legacy/BayesOpt.py +5659 -0
- user_packages legacy/BayesOpt_entry.py +513 -0
- user_packages legacy/BayesOpt_incremental.py +685 -0
- user_packages legacy/Pricing_Run.py +36 -0
- user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
- user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
- user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
- user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
- user_packages legacy/Try/BayesOpt legacy.py +3280 -0
- user_packages legacy/Try/BayesOpt.py +838 -0
- user_packages legacy/Try/BayesOptAll.py +1569 -0
- user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
- user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
- user_packages legacy/Try/BayesOptSearch.py +830 -0
- user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
- user_packages legacy/Try/BayesOptV1.py +1911 -0
- user_packages legacy/Try/BayesOptV10.py +2973 -0
- user_packages legacy/Try/BayesOptV11.py +3001 -0
- user_packages legacy/Try/BayesOptV12.py +3001 -0
- user_packages legacy/Try/BayesOptV2.py +2065 -0
- user_packages legacy/Try/BayesOptV3.py +2209 -0
- user_packages legacy/Try/BayesOptV4.py +2342 -0
- user_packages legacy/Try/BayesOptV5.py +2372 -0
- user_packages legacy/Try/BayesOptV6.py +2759 -0
- user_packages legacy/Try/BayesOptV7.py +2832 -0
- user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
- user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
- user_packages legacy/Try/BayesOptV9.py +2927 -0
- user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
- user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
- user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
- user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
- user_packages legacy/Try/xgbbayesopt.py +523 -0
- user_packages legacy/__init__.py +19 -0
- user_packages legacy/cli_common.py +124 -0
- user_packages legacy/notebook_utils.py +228 -0
- user_packages legacy/watchdog_run.py +202 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Dict, Optional
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def compute_base_rate(
|
|
11
|
+
df: pd.DataFrame,
|
|
12
|
+
*,
|
|
13
|
+
loss_col: str,
|
|
14
|
+
exposure_col: str,
|
|
15
|
+
weight_col: Optional[str] = None,
|
|
16
|
+
) -> float:
|
|
17
|
+
"""Compute base rate as loss / exposure."""
|
|
18
|
+
loss = df[loss_col].to_numpy(dtype=float, copy=False)
|
|
19
|
+
exposure = df[exposure_col].to_numpy(dtype=float, copy=False)
|
|
20
|
+
if weight_col and weight_col in df.columns:
|
|
21
|
+
weight = df[weight_col].to_numpy(dtype=float, copy=False)
|
|
22
|
+
loss = loss * weight
|
|
23
|
+
exposure = exposure * weight
|
|
24
|
+
total_exposure = float(np.sum(exposure))
|
|
25
|
+
if total_exposure <= 0:
|
|
26
|
+
return 0.0
|
|
27
|
+
return float(np.sum(loss) / total_exposure)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def apply_factor_tables(
|
|
31
|
+
df: pd.DataFrame,
|
|
32
|
+
factor_tables: Dict[str, pd.DataFrame],
|
|
33
|
+
*,
|
|
34
|
+
default_relativity: float = 1.0,
|
|
35
|
+
) -> np.ndarray:
|
|
36
|
+
"""Apply factor relativities and return a multiplicative factor."""
|
|
37
|
+
multiplier = np.ones(len(df), dtype=float)
|
|
38
|
+
for factor, table in factor_tables.items():
|
|
39
|
+
if factor not in df.columns:
|
|
40
|
+
raise ValueError(f"Missing factor column: {factor}")
|
|
41
|
+
if "level" not in table.columns or "relativity" not in table.columns:
|
|
42
|
+
raise ValueError("Factor table must include 'level' and 'relativity'.")
|
|
43
|
+
mapping = table.set_index("level")["relativity"]
|
|
44
|
+
rel = df[factor].map(mapping).fillna(default_relativity).to_numpy(dtype=float)
|
|
45
|
+
multiplier *= rel
|
|
46
|
+
return multiplier
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def rate_premium(
|
|
50
|
+
df: pd.DataFrame,
|
|
51
|
+
*,
|
|
52
|
+
exposure_col: str,
|
|
53
|
+
base_rate: float,
|
|
54
|
+
factor_tables: Dict[str, pd.DataFrame],
|
|
55
|
+
default_relativity: float = 1.0,
|
|
56
|
+
) -> np.ndarray:
|
|
57
|
+
"""Compute premium using base rate and factor tables."""
|
|
58
|
+
exposure = df[exposure_col].to_numpy(dtype=float, copy=False)
|
|
59
|
+
factors = apply_factor_tables(
|
|
60
|
+
df, factor_tables, default_relativity=default_relativity
|
|
61
|
+
)
|
|
62
|
+
return exposure * float(base_rate) * factors
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class RateTable:
|
|
67
|
+
base_rate: float
|
|
68
|
+
factor_tables: Dict[str, pd.DataFrame]
|
|
69
|
+
default_relativity: float = 1.0
|
|
70
|
+
|
|
71
|
+
def score(self, df: pd.DataFrame, *, exposure_col: str) -> np.ndarray:
|
|
72
|
+
return rate_premium(
|
|
73
|
+
df,
|
|
74
|
+
exposure_col=exposure_col,
|
|
75
|
+
base_rate=self.base_rate,
|
|
76
|
+
factor_tables=self.factor_tables,
|
|
77
|
+
default_relativity=self.default_relativity,
|
|
78
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .drift import psi_report
|
|
4
|
+
from .monitoring import (
|
|
5
|
+
classification_metrics,
|
|
6
|
+
group_metrics,
|
|
7
|
+
loss_ratio,
|
|
8
|
+
metrics_report,
|
|
9
|
+
regression_metrics,
|
|
10
|
+
)
|
|
11
|
+
from .scoring import batch_score
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"psi_report",
|
|
15
|
+
"classification_metrics",
|
|
16
|
+
"group_metrics",
|
|
17
|
+
"loss_ratio",
|
|
18
|
+
"metrics_report",
|
|
19
|
+
"regression_metrics",
|
|
20
|
+
"batch_score",
|
|
21
|
+
]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, Optional
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from ins_pricing.pricing.monitoring import psi_report as _psi_report
|
|
9
|
+
except Exception: # pragma: no cover - optional import
|
|
10
|
+
_psi_report = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def psi_report(
|
|
14
|
+
expected_df: pd.DataFrame,
|
|
15
|
+
actual_df: pd.DataFrame,
|
|
16
|
+
*,
|
|
17
|
+
features: Optional[Iterable[str]] = None,
|
|
18
|
+
bins: int = 10,
|
|
19
|
+
strategy: str = "quantile",
|
|
20
|
+
) -> pd.DataFrame:
|
|
21
|
+
"""Population Stability Index report for drift monitoring."""
|
|
22
|
+
if _psi_report is None:
|
|
23
|
+
raise RuntimeError("psi_report requires ins_pricing.pricing.monitoring.")
|
|
24
|
+
return _psi_report(
|
|
25
|
+
expected_df,
|
|
26
|
+
actual_df,
|
|
27
|
+
features=features,
|
|
28
|
+
bins=bins,
|
|
29
|
+
strategy=strategy,
|
|
30
|
+
)
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Dict, Iterable, Optional
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _safe_div(numer: float, denom: float, default: float = 0.0) -> float:
|
|
10
|
+
if denom == 0:
|
|
11
|
+
return default
|
|
12
|
+
return numer / denom
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def regression_metrics(
|
|
16
|
+
y_true: np.ndarray,
|
|
17
|
+
y_pred: np.ndarray,
|
|
18
|
+
*,
|
|
19
|
+
weight: Optional[np.ndarray] = None,
|
|
20
|
+
) -> Dict[str, float]:
|
|
21
|
+
y_true = np.asarray(y_true, dtype=float).reshape(-1)
|
|
22
|
+
y_pred = np.asarray(y_pred, dtype=float).reshape(-1)
|
|
23
|
+
if weight is not None:
|
|
24
|
+
weight = np.asarray(weight, dtype=float).reshape(-1)
|
|
25
|
+
if weight.shape[0] != y_true.shape[0]:
|
|
26
|
+
raise ValueError("weight length must match y_true.")
|
|
27
|
+
err = y_true - y_pred
|
|
28
|
+
if weight is None:
|
|
29
|
+
mse = float(np.mean(err ** 2))
|
|
30
|
+
mae = float(np.mean(np.abs(err)))
|
|
31
|
+
else:
|
|
32
|
+
w_sum = float(np.sum(weight))
|
|
33
|
+
mse = float(np.sum(weight * (err ** 2)) / max(w_sum, 1.0))
|
|
34
|
+
mae = float(np.sum(weight * np.abs(err)) / max(w_sum, 1.0))
|
|
35
|
+
rmse = float(np.sqrt(mse))
|
|
36
|
+
denom = float(np.mean(y_true)) if np.mean(y_true) != 0 else 1.0
|
|
37
|
+
mape = float(np.mean(np.abs(err) / np.clip(np.abs(y_true), 1e-9, None)))
|
|
38
|
+
ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
|
|
39
|
+
ss_res = float(np.sum(err ** 2))
|
|
40
|
+
r2 = 1.0 - _safe_div(ss_res, ss_tot, default=0.0)
|
|
41
|
+
return {"rmse": rmse, "mae": mae, "mape": mape, "r2": r2}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def loss_ratio(
|
|
45
|
+
actual_loss: np.ndarray,
|
|
46
|
+
predicted_premium: np.ndarray,
|
|
47
|
+
*,
|
|
48
|
+
weight: Optional[np.ndarray] = None,
|
|
49
|
+
) -> float:
|
|
50
|
+
actual_loss = np.asarray(actual_loss, dtype=float).reshape(-1)
|
|
51
|
+
predicted_premium = np.asarray(predicted_premium, dtype=float).reshape(-1)
|
|
52
|
+
if weight is not None:
|
|
53
|
+
weight = np.asarray(weight, dtype=float).reshape(-1)
|
|
54
|
+
actual_loss = actual_loss * weight
|
|
55
|
+
predicted_premium = predicted_premium * weight
|
|
56
|
+
return _safe_div(float(np.sum(actual_loss)), float(np.sum(predicted_premium)), default=0.0)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def classification_metrics(
|
|
60
|
+
y_true: np.ndarray,
|
|
61
|
+
y_pred: np.ndarray,
|
|
62
|
+
*,
|
|
63
|
+
threshold: float = 0.5,
|
|
64
|
+
) -> Dict[str, float]:
|
|
65
|
+
y_true = np.asarray(y_true, dtype=float).reshape(-1)
|
|
66
|
+
y_pred = np.asarray(y_pred, dtype=float).reshape(-1)
|
|
67
|
+
pred_label = (y_pred >= threshold).astype(float)
|
|
68
|
+
acc = float(np.mean(pred_label == y_true))
|
|
69
|
+
precision = _safe_div(float(np.sum((pred_label == 1) & (y_true == 1))),
|
|
70
|
+
float(np.sum(pred_label == 1)), default=0.0)
|
|
71
|
+
recall = _safe_div(float(np.sum((pred_label == 1) & (y_true == 1))),
|
|
72
|
+
float(np.sum(y_true == 1)), default=0.0)
|
|
73
|
+
return {"accuracy": acc, "precision": precision, "recall": recall}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def metrics_report(
|
|
77
|
+
y_true: np.ndarray,
|
|
78
|
+
y_pred: np.ndarray,
|
|
79
|
+
*,
|
|
80
|
+
task_type: str = "regression",
|
|
81
|
+
weight: Optional[np.ndarray] = None,
|
|
82
|
+
) -> Dict[str, float]:
|
|
83
|
+
if task_type == "classification":
|
|
84
|
+
metrics = classification_metrics(y_true, y_pred)
|
|
85
|
+
else:
|
|
86
|
+
metrics = regression_metrics(y_true, y_pred, weight=weight)
|
|
87
|
+
return metrics
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def group_metrics(
|
|
91
|
+
df: pd.DataFrame,
|
|
92
|
+
*,
|
|
93
|
+
actual_col: str,
|
|
94
|
+
pred_col: str,
|
|
95
|
+
group_cols: Iterable[str],
|
|
96
|
+
weight_col: Optional[str] = None,
|
|
97
|
+
) -> pd.DataFrame:
|
|
98
|
+
group_cols = list(group_cols)
|
|
99
|
+
work = df[group_cols].copy()
|
|
100
|
+
y_true = df[actual_col].to_numpy(dtype=float)
|
|
101
|
+
y_pred = df[pred_col].to_numpy(dtype=float)
|
|
102
|
+
err = y_true - y_pred
|
|
103
|
+
work["_y_true"] = y_true
|
|
104
|
+
work["_y_pred"] = y_pred
|
|
105
|
+
work["_err"] = err
|
|
106
|
+
work["_abs_err"] = np.abs(err)
|
|
107
|
+
work["_err_sq"] = err ** 2
|
|
108
|
+
work["_abs_ratio"] = work["_abs_err"] / np.clip(np.abs(work["_y_true"]), 1e-9, None)
|
|
109
|
+
work["_y_true_sq"] = work["_y_true"] ** 2
|
|
110
|
+
|
|
111
|
+
if weight_col:
|
|
112
|
+
w = df[weight_col].to_numpy(dtype=float)
|
|
113
|
+
work["_w"] = w
|
|
114
|
+
work["_w_err_sq"] = w * work["_err_sq"]
|
|
115
|
+
work["_w_abs_err"] = w * work["_abs_err"]
|
|
116
|
+
|
|
117
|
+
grouped = work.groupby(group_cols, dropna=False)
|
|
118
|
+
count = grouped["_y_true"].count().replace(0, 1.0)
|
|
119
|
+
sum_y = grouped["_y_true"].sum()
|
|
120
|
+
sum_y2 = grouped["_y_true_sq"].sum()
|
|
121
|
+
ss_tot = sum_y2 - (sum_y ** 2) / count
|
|
122
|
+
ss_tot = ss_tot.clip(lower=0.0)
|
|
123
|
+
ss_res = grouped["_err_sq"].sum()
|
|
124
|
+
r2 = 1.0 - (ss_res / ss_tot.replace(0.0, np.nan))
|
|
125
|
+
r2 = r2.fillna(0.0)
|
|
126
|
+
|
|
127
|
+
mape = grouped["_abs_ratio"].mean()
|
|
128
|
+
if weight_col:
|
|
129
|
+
sum_w = grouped["_w"].sum().replace(0, 1.0)
|
|
130
|
+
mse = grouped["_w_err_sq"].sum() / sum_w
|
|
131
|
+
mae = grouped["_w_abs_err"].sum() / sum_w
|
|
132
|
+
else:
|
|
133
|
+
mse = grouped["_err_sq"].sum() / count
|
|
134
|
+
mae = grouped["_abs_err"].sum() / count
|
|
135
|
+
|
|
136
|
+
rmse = np.sqrt(mse)
|
|
137
|
+
result = pd.DataFrame({
|
|
138
|
+
"rmse": rmse.astype(float),
|
|
139
|
+
"mae": mae.astype(float),
|
|
140
|
+
"mape": mape.astype(float),
|
|
141
|
+
"r2": r2.astype(float),
|
|
142
|
+
})
|
|
143
|
+
return result.reset_index()
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Callable, Optional
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def batch_score(
|
|
11
|
+
predict_fn: Callable[[pd.DataFrame], np.ndarray],
|
|
12
|
+
data: pd.DataFrame,
|
|
13
|
+
*,
|
|
14
|
+
output_col: str = "prediction",
|
|
15
|
+
batch_size: int = 10000,
|
|
16
|
+
output_path: Optional[str | Path] = None,
|
|
17
|
+
keep_input: bool = True,
|
|
18
|
+
) -> pd.DataFrame:
|
|
19
|
+
"""Batch scoring for large datasets."""
|
|
20
|
+
if batch_size <= 0:
|
|
21
|
+
raise ValueError("batch_size must be positive.")
|
|
22
|
+
n_rows = len(data)
|
|
23
|
+
prediction = np.empty(n_rows, dtype=float)
|
|
24
|
+
for start in range(0, n_rows, batch_size):
|
|
25
|
+
end = min(start + batch_size, n_rows)
|
|
26
|
+
chunk = data.iloc[start:end]
|
|
27
|
+
pred = np.asarray(predict_fn(chunk)).reshape(-1)
|
|
28
|
+
if pred.shape[0] != (end - start):
|
|
29
|
+
raise ValueError("predict_fn output length must match batch size.")
|
|
30
|
+
prediction[start:end] = pred
|
|
31
|
+
result = data.copy() if keep_input else pd.DataFrame(index=data.index)
|
|
32
|
+
result[output_col] = prediction
|
|
33
|
+
if output_path:
|
|
34
|
+
output_path = Path(output_path)
|
|
35
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
if output_path.suffix.lower() in {".parquet", ".pq"}:
|
|
37
|
+
result.to_parquet(output_path, index=False)
|
|
38
|
+
else:
|
|
39
|
+
result.to_csv(output_path, index=False)
|
|
40
|
+
return result
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Optional
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _df_to_markdown(df: pd.DataFrame, max_rows: int = 20) -> str:
|
|
12
|
+
if df is None or df.empty:
|
|
13
|
+
return "_(no data)_"
|
|
14
|
+
data = df.copy()
|
|
15
|
+
if len(data) > max_rows:
|
|
16
|
+
data = data.head(max_rows)
|
|
17
|
+
headers = list(data.columns)
|
|
18
|
+
rows = data.astype(str).values.tolist()
|
|
19
|
+
lines = []
|
|
20
|
+
lines.append("| " + " | ".join(headers) + " |")
|
|
21
|
+
lines.append("| " + " | ".join(["---"] * len(headers)) + " |")
|
|
22
|
+
for row in rows:
|
|
23
|
+
lines.append("| " + " | ".join(row) + " |")
|
|
24
|
+
return "\n".join(lines)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ReportPayload:
|
|
29
|
+
model_name: str
|
|
30
|
+
model_version: str
|
|
31
|
+
metrics: Dict[str, float]
|
|
32
|
+
risk_trend: Optional[pd.DataFrame] = None
|
|
33
|
+
drift_report: Optional[pd.DataFrame] = None
|
|
34
|
+
validation_table: Optional[pd.DataFrame] = None
|
|
35
|
+
extra_notes: Optional[str] = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def build_report(payload: ReportPayload) -> str:
|
|
39
|
+
now = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
40
|
+
metrics_lines = [f"- {k}: {v:.6f}" for k, v in payload.metrics.items()]
|
|
41
|
+
metrics_block = "\n".join(metrics_lines) if metrics_lines else "_(no metrics)_"
|
|
42
|
+
|
|
43
|
+
report = [
|
|
44
|
+
f"# Model Report: {payload.model_name} ({payload.model_version})",
|
|
45
|
+
"",
|
|
46
|
+
f"Generated at: {now}",
|
|
47
|
+
"",
|
|
48
|
+
"## Model Validation",
|
|
49
|
+
metrics_block,
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
if payload.validation_table is not None:
|
|
53
|
+
report.extend(["", "### Validation Details", _df_to_markdown(payload.validation_table)])
|
|
54
|
+
|
|
55
|
+
report.extend(["", "## Drift / Stability"])
|
|
56
|
+
report.append(_df_to_markdown(payload.drift_report))
|
|
57
|
+
|
|
58
|
+
report.extend(["", "## Risk Trend"])
|
|
59
|
+
report.append(_df_to_markdown(payload.risk_trend))
|
|
60
|
+
|
|
61
|
+
if payload.extra_notes:
|
|
62
|
+
report.extend(["", "## Notes", payload.extra_notes])
|
|
63
|
+
|
|
64
|
+
return "\n".join(report).strip() + "\n"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def write_report(payload: ReportPayload, output_path: str | Path) -> Path:
|
|
68
|
+
output_path = Path(output_path)
|
|
69
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
content = build_report(payload)
|
|
71
|
+
output_path.write_text(content, encoding="utf-8")
|
|
72
|
+
return output_path
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
import time
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from typing import Callable, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _next_run(run_time: str, now: Optional[datetime] = None) -> datetime:
|
|
10
|
+
if now is None:
|
|
11
|
+
now = datetime.now()
|
|
12
|
+
hour, minute = [int(x) for x in run_time.split(":")]
|
|
13
|
+
candidate = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
|
14
|
+
if candidate <= now:
|
|
15
|
+
candidate = candidate + timedelta(days=1)
|
|
16
|
+
return candidate
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def schedule_daily(
|
|
20
|
+
job_fn: Callable[[], None],
|
|
21
|
+
*,
|
|
22
|
+
run_time: str = "01:00",
|
|
23
|
+
stop_event: Optional[threading.Event] = None,
|
|
24
|
+
) -> threading.Thread:
|
|
25
|
+
"""Run job_fn daily at local time HH:MM in a background thread."""
|
|
26
|
+
if stop_event is None:
|
|
27
|
+
stop_event = threading.Event()
|
|
28
|
+
|
|
29
|
+
def _loop():
|
|
30
|
+
while not stop_event.is_set():
|
|
31
|
+
next_time = _next_run(run_time)
|
|
32
|
+
sleep_seconds = (next_time - datetime.now()).total_seconds()
|
|
33
|
+
if sleep_seconds > 0:
|
|
34
|
+
stop_event.wait(timeout=sleep_seconds)
|
|
35
|
+
if stop_event.is_set():
|
|
36
|
+
break
|
|
37
|
+
try:
|
|
38
|
+
job_fn()
|
|
39
|
+
except Exception:
|
|
40
|
+
pass
|
|
41
|
+
time.sleep(1)
|
|
42
|
+
|
|
43
|
+
thread = threading.Thread(target=_loop, daemon=True)
|
|
44
|
+
thread.start()
|
|
45
|
+
return thread
|