ins-pricing 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +60 -0
- ins_pricing/__init__.py +102 -0
- ins_pricing/governance/README.md +18 -0
- ins_pricing/governance/__init__.py +20 -0
- ins_pricing/governance/approval.py +93 -0
- ins_pricing/governance/audit.py +37 -0
- ins_pricing/governance/registry.py +99 -0
- ins_pricing/governance/release.py +159 -0
- ins_pricing/modelling/BayesOpt.py +146 -0
- ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
- ins_pricing/modelling/BayesOpt_entry.py +575 -0
- ins_pricing/modelling/BayesOpt_incremental.py +731 -0
- ins_pricing/modelling/Explain_Run.py +36 -0
- ins_pricing/modelling/Explain_entry.py +539 -0
- ins_pricing/modelling/Pricing_Run.py +36 -0
- ins_pricing/modelling/README.md +33 -0
- ins_pricing/modelling/__init__.py +44 -0
- ins_pricing/modelling/bayesopt/__init__.py +98 -0
- ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
- ins_pricing/modelling/bayesopt/core.py +1476 -0
- ins_pricing/modelling/bayesopt/models.py +2196 -0
- ins_pricing/modelling/bayesopt/trainers.py +2446 -0
- ins_pricing/modelling/bayesopt/utils.py +1021 -0
- ins_pricing/modelling/cli_common.py +136 -0
- ins_pricing/modelling/explain/__init__.py +55 -0
- ins_pricing/modelling/explain/gradients.py +334 -0
- ins_pricing/modelling/explain/metrics.py +176 -0
- ins_pricing/modelling/explain/permutation.py +155 -0
- ins_pricing/modelling/explain/shap_utils.py +146 -0
- ins_pricing/modelling/notebook_utils.py +284 -0
- ins_pricing/modelling/plotting/__init__.py +45 -0
- ins_pricing/modelling/plotting/common.py +63 -0
- ins_pricing/modelling/plotting/curves.py +572 -0
- ins_pricing/modelling/plotting/diagnostics.py +139 -0
- ins_pricing/modelling/plotting/geo.py +362 -0
- ins_pricing/modelling/plotting/importance.py +121 -0
- ins_pricing/modelling/run_logging.py +133 -0
- ins_pricing/modelling/tests/conftest.py +8 -0
- ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing/modelling/tests/test_explain.py +56 -0
- ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing/modelling/tests/test_plotting.py +63 -0
- ins_pricing/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing/modelling/watchdog_run.py +211 -0
- ins_pricing/pricing/README.md +44 -0
- ins_pricing/pricing/__init__.py +27 -0
- ins_pricing/pricing/calibration.py +39 -0
- ins_pricing/pricing/data_quality.py +117 -0
- ins_pricing/pricing/exposure.py +85 -0
- ins_pricing/pricing/factors.py +91 -0
- ins_pricing/pricing/monitoring.py +99 -0
- ins_pricing/pricing/rate_table.py +78 -0
- ins_pricing/production/__init__.py +21 -0
- ins_pricing/production/drift.py +30 -0
- ins_pricing/production/monitoring.py +143 -0
- ins_pricing/production/scoring.py +40 -0
- ins_pricing/reporting/README.md +20 -0
- ins_pricing/reporting/__init__.py +11 -0
- ins_pricing/reporting/report_builder.py +72 -0
- ins_pricing/reporting/scheduler.py +45 -0
- ins_pricing/setup.py +41 -0
- ins_pricing v2/__init__.py +23 -0
- ins_pricing v2/governance/__init__.py +20 -0
- ins_pricing v2/governance/approval.py +93 -0
- ins_pricing v2/governance/audit.py +37 -0
- ins_pricing v2/governance/registry.py +99 -0
- ins_pricing v2/governance/release.py +159 -0
- ins_pricing v2/modelling/Explain_Run.py +36 -0
- ins_pricing v2/modelling/Pricing_Run.py +36 -0
- ins_pricing v2/modelling/__init__.py +151 -0
- ins_pricing v2/modelling/cli_common.py +141 -0
- ins_pricing v2/modelling/config.py +249 -0
- ins_pricing v2/modelling/config_preprocess.py +254 -0
- ins_pricing v2/modelling/core.py +741 -0
- ins_pricing v2/modelling/data_container.py +42 -0
- ins_pricing v2/modelling/explain/__init__.py +55 -0
- ins_pricing v2/modelling/explain/gradients.py +334 -0
- ins_pricing v2/modelling/explain/metrics.py +176 -0
- ins_pricing v2/modelling/explain/permutation.py +155 -0
- ins_pricing v2/modelling/explain/shap_utils.py +146 -0
- ins_pricing v2/modelling/features.py +215 -0
- ins_pricing v2/modelling/model_manager.py +148 -0
- ins_pricing v2/modelling/model_plotting.py +463 -0
- ins_pricing v2/modelling/models.py +2203 -0
- ins_pricing v2/modelling/notebook_utils.py +294 -0
- ins_pricing v2/modelling/plotting/__init__.py +45 -0
- ins_pricing v2/modelling/plotting/common.py +63 -0
- ins_pricing v2/modelling/plotting/curves.py +572 -0
- ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
- ins_pricing v2/modelling/plotting/geo.py +362 -0
- ins_pricing v2/modelling/plotting/importance.py +121 -0
- ins_pricing v2/modelling/run_logging.py +133 -0
- ins_pricing v2/modelling/tests/conftest.py +8 -0
- ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing v2/modelling/tests/test_explain.py +56 -0
- ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing v2/modelling/tests/test_plotting.py +63 -0
- ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing v2/modelling/trainers.py +2447 -0
- ins_pricing v2/modelling/utils.py +1020 -0
- ins_pricing v2/modelling/watchdog_run.py +211 -0
- ins_pricing v2/pricing/__init__.py +27 -0
- ins_pricing v2/pricing/calibration.py +39 -0
- ins_pricing v2/pricing/data_quality.py +117 -0
- ins_pricing v2/pricing/exposure.py +85 -0
- ins_pricing v2/pricing/factors.py +91 -0
- ins_pricing v2/pricing/monitoring.py +99 -0
- ins_pricing v2/pricing/rate_table.py +78 -0
- ins_pricing v2/production/__init__.py +21 -0
- ins_pricing v2/production/drift.py +30 -0
- ins_pricing v2/production/monitoring.py +143 -0
- ins_pricing v2/production/scoring.py +40 -0
- ins_pricing v2/reporting/__init__.py +11 -0
- ins_pricing v2/reporting/report_builder.py +72 -0
- ins_pricing v2/reporting/scheduler.py +45 -0
- ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
- ins_pricing v2/scripts/Explain_entry.py +545 -0
- ins_pricing v2/scripts/__init__.py +1 -0
- ins_pricing v2/scripts/train.py +568 -0
- ins_pricing v2/setup.py +55 -0
- ins_pricing v2/smoke_test.py +28 -0
- ins_pricing-0.1.6.dist-info/METADATA +78 -0
- ins_pricing-0.1.6.dist-info/RECORD +169 -0
- ins_pricing-0.1.6.dist-info/WHEEL +5 -0
- ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
- user_packages/__init__.py +105 -0
- user_packages legacy/BayesOpt.py +5659 -0
- user_packages legacy/BayesOpt_entry.py +513 -0
- user_packages legacy/BayesOpt_incremental.py +685 -0
- user_packages legacy/Pricing_Run.py +36 -0
- user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
- user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
- user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
- user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
- user_packages legacy/Try/BayesOpt legacy.py +3280 -0
- user_packages legacy/Try/BayesOpt.py +838 -0
- user_packages legacy/Try/BayesOptAll.py +1569 -0
- user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
- user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
- user_packages legacy/Try/BayesOptSearch.py +830 -0
- user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
- user_packages legacy/Try/BayesOptV1.py +1911 -0
- user_packages legacy/Try/BayesOptV10.py +2973 -0
- user_packages legacy/Try/BayesOptV11.py +3001 -0
- user_packages legacy/Try/BayesOptV12.py +3001 -0
- user_packages legacy/Try/BayesOptV2.py +2065 -0
- user_packages legacy/Try/BayesOptV3.py +2209 -0
- user_packages legacy/Try/BayesOptV4.py +2342 -0
- user_packages legacy/Try/BayesOptV5.py +2372 -0
- user_packages legacy/Try/BayesOptV6.py +2759 -0
- user_packages legacy/Try/BayesOptV7.py +2832 -0
- user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
- user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
- user_packages legacy/Try/BayesOptV9.py +2927 -0
- user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
- user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
- user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
- user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
- user_packages legacy/Try/xgbbayesopt.py +523 -0
- user_packages legacy/__init__.py +19 -0
- user_packages legacy/cli_common.py +124 -0
- user_packages legacy/notebook_utils.py +228 -0
- user_packages legacy/watchdog_run.py +202 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Dict, Iterable, Optional
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _safe_div(numer: float, denom: float, default: float = 0.0) -> float:
|
|
10
|
+
if denom == 0:
|
|
11
|
+
return default
|
|
12
|
+
return numer / denom
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def regression_metrics(
|
|
16
|
+
y_true: np.ndarray,
|
|
17
|
+
y_pred: np.ndarray,
|
|
18
|
+
*,
|
|
19
|
+
weight: Optional[np.ndarray] = None,
|
|
20
|
+
) -> Dict[str, float]:
|
|
21
|
+
y_true = np.asarray(y_true, dtype=float).reshape(-1)
|
|
22
|
+
y_pred = np.asarray(y_pred, dtype=float).reshape(-1)
|
|
23
|
+
if weight is not None:
|
|
24
|
+
weight = np.asarray(weight, dtype=float).reshape(-1)
|
|
25
|
+
if weight.shape[0] != y_true.shape[0]:
|
|
26
|
+
raise ValueError("weight length must match y_true.")
|
|
27
|
+
err = y_true - y_pred
|
|
28
|
+
if weight is None:
|
|
29
|
+
mse = float(np.mean(err ** 2))
|
|
30
|
+
mae = float(np.mean(np.abs(err)))
|
|
31
|
+
else:
|
|
32
|
+
w_sum = float(np.sum(weight))
|
|
33
|
+
mse = float(np.sum(weight * (err ** 2)) / max(w_sum, 1.0))
|
|
34
|
+
mae = float(np.sum(weight * np.abs(err)) / max(w_sum, 1.0))
|
|
35
|
+
rmse = float(np.sqrt(mse))
|
|
36
|
+
denom = float(np.mean(y_true)) if np.mean(y_true) != 0 else 1.0
|
|
37
|
+
mape = float(np.mean(np.abs(err) / np.clip(np.abs(y_true), 1e-9, None)))
|
|
38
|
+
ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
|
|
39
|
+
ss_res = float(np.sum(err ** 2))
|
|
40
|
+
r2 = 1.0 - _safe_div(ss_res, ss_tot, default=0.0)
|
|
41
|
+
return {"rmse": rmse, "mae": mae, "mape": mape, "r2": r2}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def loss_ratio(
|
|
45
|
+
actual_loss: np.ndarray,
|
|
46
|
+
predicted_premium: np.ndarray,
|
|
47
|
+
*,
|
|
48
|
+
weight: Optional[np.ndarray] = None,
|
|
49
|
+
) -> float:
|
|
50
|
+
actual_loss = np.asarray(actual_loss, dtype=float).reshape(-1)
|
|
51
|
+
predicted_premium = np.asarray(predicted_premium, dtype=float).reshape(-1)
|
|
52
|
+
if weight is not None:
|
|
53
|
+
weight = np.asarray(weight, dtype=float).reshape(-1)
|
|
54
|
+
actual_loss = actual_loss * weight
|
|
55
|
+
predicted_premium = predicted_premium * weight
|
|
56
|
+
return _safe_div(float(np.sum(actual_loss)), float(np.sum(predicted_premium)), default=0.0)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def classification_metrics(
|
|
60
|
+
y_true: np.ndarray,
|
|
61
|
+
y_pred: np.ndarray,
|
|
62
|
+
*,
|
|
63
|
+
threshold: float = 0.5,
|
|
64
|
+
) -> Dict[str, float]:
|
|
65
|
+
y_true = np.asarray(y_true, dtype=float).reshape(-1)
|
|
66
|
+
y_pred = np.asarray(y_pred, dtype=float).reshape(-1)
|
|
67
|
+
pred_label = (y_pred >= threshold).astype(float)
|
|
68
|
+
acc = float(np.mean(pred_label == y_true))
|
|
69
|
+
precision = _safe_div(float(np.sum((pred_label == 1) & (y_true == 1))),
|
|
70
|
+
float(np.sum(pred_label == 1)), default=0.0)
|
|
71
|
+
recall = _safe_div(float(np.sum((pred_label == 1) & (y_true == 1))),
|
|
72
|
+
float(np.sum(y_true == 1)), default=0.0)
|
|
73
|
+
return {"accuracy": acc, "precision": precision, "recall": recall}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def metrics_report(
|
|
77
|
+
y_true: np.ndarray,
|
|
78
|
+
y_pred: np.ndarray,
|
|
79
|
+
*,
|
|
80
|
+
task_type: str = "regression",
|
|
81
|
+
weight: Optional[np.ndarray] = None,
|
|
82
|
+
) -> Dict[str, float]:
|
|
83
|
+
if task_type == "classification":
|
|
84
|
+
metrics = classification_metrics(y_true, y_pred)
|
|
85
|
+
else:
|
|
86
|
+
metrics = regression_metrics(y_true, y_pred, weight=weight)
|
|
87
|
+
return metrics
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def group_metrics(
|
|
91
|
+
df: pd.DataFrame,
|
|
92
|
+
*,
|
|
93
|
+
actual_col: str,
|
|
94
|
+
pred_col: str,
|
|
95
|
+
group_cols: Iterable[str],
|
|
96
|
+
weight_col: Optional[str] = None,
|
|
97
|
+
) -> pd.DataFrame:
|
|
98
|
+
group_cols = list(group_cols)
|
|
99
|
+
work = df[group_cols].copy()
|
|
100
|
+
y_true = df[actual_col].to_numpy(dtype=float)
|
|
101
|
+
y_pred = df[pred_col].to_numpy(dtype=float)
|
|
102
|
+
err = y_true - y_pred
|
|
103
|
+
work["_y_true"] = y_true
|
|
104
|
+
work["_y_pred"] = y_pred
|
|
105
|
+
work["_err"] = err
|
|
106
|
+
work["_abs_err"] = np.abs(err)
|
|
107
|
+
work["_err_sq"] = err ** 2
|
|
108
|
+
work["_abs_ratio"] = work["_abs_err"] / np.clip(np.abs(work["_y_true"]), 1e-9, None)
|
|
109
|
+
work["_y_true_sq"] = work["_y_true"] ** 2
|
|
110
|
+
|
|
111
|
+
if weight_col:
|
|
112
|
+
w = df[weight_col].to_numpy(dtype=float)
|
|
113
|
+
work["_w"] = w
|
|
114
|
+
work["_w_err_sq"] = w * work["_err_sq"]
|
|
115
|
+
work["_w_abs_err"] = w * work["_abs_err"]
|
|
116
|
+
|
|
117
|
+
grouped = work.groupby(group_cols, dropna=False)
|
|
118
|
+
count = grouped["_y_true"].count().replace(0, 1.0)
|
|
119
|
+
sum_y = grouped["_y_true"].sum()
|
|
120
|
+
sum_y2 = grouped["_y_true_sq"].sum()
|
|
121
|
+
ss_tot = sum_y2 - (sum_y ** 2) / count
|
|
122
|
+
ss_tot = ss_tot.clip(lower=0.0)
|
|
123
|
+
ss_res = grouped["_err_sq"].sum()
|
|
124
|
+
r2 = 1.0 - (ss_res / ss_tot.replace(0.0, np.nan))
|
|
125
|
+
r2 = r2.fillna(0.0)
|
|
126
|
+
|
|
127
|
+
mape = grouped["_abs_ratio"].mean()
|
|
128
|
+
if weight_col:
|
|
129
|
+
sum_w = grouped["_w"].sum().replace(0, 1.0)
|
|
130
|
+
mse = grouped["_w_err_sq"].sum() / sum_w
|
|
131
|
+
mae = grouped["_w_abs_err"].sum() / sum_w
|
|
132
|
+
else:
|
|
133
|
+
mse = grouped["_err_sq"].sum() / count
|
|
134
|
+
mae = grouped["_abs_err"].sum() / count
|
|
135
|
+
|
|
136
|
+
rmse = np.sqrt(mse)
|
|
137
|
+
result = pd.DataFrame({
|
|
138
|
+
"rmse": rmse.astype(float),
|
|
139
|
+
"mae": mae.astype(float),
|
|
140
|
+
"mape": mape.astype(float),
|
|
141
|
+
"r2": r2.astype(float),
|
|
142
|
+
})
|
|
143
|
+
return result.reset_index()
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Callable, Optional
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def batch_score(
|
|
11
|
+
predict_fn: Callable[[pd.DataFrame], np.ndarray],
|
|
12
|
+
data: pd.DataFrame,
|
|
13
|
+
*,
|
|
14
|
+
output_col: str = "prediction",
|
|
15
|
+
batch_size: int = 10000,
|
|
16
|
+
output_path: Optional[str | Path] = None,
|
|
17
|
+
keep_input: bool = True,
|
|
18
|
+
) -> pd.DataFrame:
|
|
19
|
+
"""Batch scoring for large datasets."""
|
|
20
|
+
if batch_size <= 0:
|
|
21
|
+
raise ValueError("batch_size must be positive.")
|
|
22
|
+
n_rows = len(data)
|
|
23
|
+
prediction = np.empty(n_rows, dtype=float)
|
|
24
|
+
for start in range(0, n_rows, batch_size):
|
|
25
|
+
end = min(start + batch_size, n_rows)
|
|
26
|
+
chunk = data.iloc[start:end]
|
|
27
|
+
pred = np.asarray(predict_fn(chunk)).reshape(-1)
|
|
28
|
+
if pred.shape[0] != (end - start):
|
|
29
|
+
raise ValueError("predict_fn output length must match batch size.")
|
|
30
|
+
prediction[start:end] = pred
|
|
31
|
+
result = data.copy() if keep_input else pd.DataFrame(index=data.index)
|
|
32
|
+
result[output_col] = prediction
|
|
33
|
+
if output_path:
|
|
34
|
+
output_path = Path(output_path)
|
|
35
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
if output_path.suffix.lower() in {".parquet", ".pq"}:
|
|
37
|
+
result.to_parquet(output_path, index=False)
|
|
38
|
+
else:
|
|
39
|
+
result.to_csv(output_path, index=False)
|
|
40
|
+
return result
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# reporting
|
|
2
|
+
|
|
3
|
+
Report builder and scheduler for model monitoring.
|
|
4
|
+
|
|
5
|
+
Example:
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from ins_pricing.reporting import ReportPayload, write_report, schedule_daily
|
|
9
|
+
|
|
10
|
+
payload = ReportPayload(
|
|
11
|
+
model_name="pricing_ft",
|
|
12
|
+
model_version="v1",
|
|
13
|
+
metrics={"rmse": 0.12, "loss_ratio": 0.63},
|
|
14
|
+
risk_trend=risk_df,
|
|
15
|
+
drift_report=psi_df,
|
|
16
|
+
)
|
|
17
|
+
write_report(payload, "Reports/model_report.md")
|
|
18
|
+
|
|
19
|
+
schedule_daily(lambda: write_report(payload, "Reports/model_report.md"), run_time="02:00")
|
|
20
|
+
```
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Optional
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _df_to_markdown(df: pd.DataFrame, max_rows: int = 20) -> str:
|
|
12
|
+
if df is None or df.empty:
|
|
13
|
+
return "_(no data)_"
|
|
14
|
+
data = df.copy()
|
|
15
|
+
if len(data) > max_rows:
|
|
16
|
+
data = data.head(max_rows)
|
|
17
|
+
headers = list(data.columns)
|
|
18
|
+
rows = data.astype(str).values.tolist()
|
|
19
|
+
lines = []
|
|
20
|
+
lines.append("| " + " | ".join(headers) + " |")
|
|
21
|
+
lines.append("| " + " | ".join(["---"] * len(headers)) + " |")
|
|
22
|
+
for row in rows:
|
|
23
|
+
lines.append("| " + " | ".join(row) + " |")
|
|
24
|
+
return "\n".join(lines)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ReportPayload:
|
|
29
|
+
model_name: str
|
|
30
|
+
model_version: str
|
|
31
|
+
metrics: Dict[str, float]
|
|
32
|
+
risk_trend: Optional[pd.DataFrame] = None
|
|
33
|
+
drift_report: Optional[pd.DataFrame] = None
|
|
34
|
+
validation_table: Optional[pd.DataFrame] = None
|
|
35
|
+
extra_notes: Optional[str] = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def build_report(payload: ReportPayload) -> str:
|
|
39
|
+
now = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
40
|
+
metrics_lines = [f"- {k}: {v:.6f}" for k, v in payload.metrics.items()]
|
|
41
|
+
metrics_block = "\n".join(metrics_lines) if metrics_lines else "_(no metrics)_"
|
|
42
|
+
|
|
43
|
+
report = [
|
|
44
|
+
f"# Model Report: {payload.model_name} ({payload.model_version})",
|
|
45
|
+
"",
|
|
46
|
+
f"Generated at: {now}",
|
|
47
|
+
"",
|
|
48
|
+
"## Model Validation",
|
|
49
|
+
metrics_block,
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
if payload.validation_table is not None:
|
|
53
|
+
report.extend(["", "### Validation Details", _df_to_markdown(payload.validation_table)])
|
|
54
|
+
|
|
55
|
+
report.extend(["", "## Drift / Stability"])
|
|
56
|
+
report.append(_df_to_markdown(payload.drift_report))
|
|
57
|
+
|
|
58
|
+
report.extend(["", "## Risk Trend"])
|
|
59
|
+
report.append(_df_to_markdown(payload.risk_trend))
|
|
60
|
+
|
|
61
|
+
if payload.extra_notes:
|
|
62
|
+
report.extend(["", "## Notes", payload.extra_notes])
|
|
63
|
+
|
|
64
|
+
return "\n".join(report).strip() + "\n"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def write_report(payload: ReportPayload, output_path: str | Path) -> Path:
|
|
68
|
+
output_path = Path(output_path)
|
|
69
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
content = build_report(payload)
|
|
71
|
+
output_path.write_text(content, encoding="utf-8")
|
|
72
|
+
return output_path
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
import time
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from typing import Callable, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _next_run(run_time: str, now: Optional[datetime] = None) -> datetime:
|
|
10
|
+
if now is None:
|
|
11
|
+
now = datetime.now()
|
|
12
|
+
hour, minute = [int(x) for x in run_time.split(":")]
|
|
13
|
+
candidate = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
|
14
|
+
if candidate <= now:
|
|
15
|
+
candidate = candidate + timedelta(days=1)
|
|
16
|
+
return candidate
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def schedule_daily(
|
|
20
|
+
job_fn: Callable[[], None],
|
|
21
|
+
*,
|
|
22
|
+
run_time: str = "01:00",
|
|
23
|
+
stop_event: Optional[threading.Event] = None,
|
|
24
|
+
) -> threading.Thread:
|
|
25
|
+
"""Run job_fn daily at local time HH:MM in a background thread."""
|
|
26
|
+
if stop_event is None:
|
|
27
|
+
stop_event = threading.Event()
|
|
28
|
+
|
|
29
|
+
def _loop():
|
|
30
|
+
while not stop_event.is_set():
|
|
31
|
+
next_time = _next_run(run_time)
|
|
32
|
+
sleep_seconds = (next_time - datetime.now()).total_seconds()
|
|
33
|
+
if sleep_seconds > 0:
|
|
34
|
+
stop_event.wait(timeout=sleep_seconds)
|
|
35
|
+
if stop_event.is_set():
|
|
36
|
+
break
|
|
37
|
+
try:
|
|
38
|
+
job_fn()
|
|
39
|
+
except Exception:
|
|
40
|
+
pass
|
|
41
|
+
time.sleep(1)
|
|
42
|
+
|
|
43
|
+
thread = threading.Thread(target=_loop, daemon=True)
|
|
44
|
+
thread.start()
|
|
45
|
+
return thread
|
ins_pricing/setup.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def _discover_packages() -> list[str]:
|
|
5
|
+
root_packages = ["modelling", "pricing", "production", "governance", "reporting"]
|
|
6
|
+
packages = ["ins_pricing"]
|
|
7
|
+
for root in root_packages:
|
|
8
|
+
found = find_packages(where=".", include=[root, f"{root}.*"])
|
|
9
|
+
for pkg in found:
|
|
10
|
+
packages.append(f"ins_pricing.{pkg}")
|
|
11
|
+
return packages
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
setup(
|
|
15
|
+
name="ins_pricing",
|
|
16
|
+
version="0.1.6",
|
|
17
|
+
description="Reusable modelling, pricing, governance, and reporting utilities.",
|
|
18
|
+
author="meishi125478",
|
|
19
|
+
license="Proprietary",
|
|
20
|
+
python_requires=">=3.9",
|
|
21
|
+
package_dir={"ins_pricing": "."},
|
|
22
|
+
packages=_discover_packages(),
|
|
23
|
+
install_requires=[
|
|
24
|
+
"numpy>=1.20",
|
|
25
|
+
"pandas>=1.4",
|
|
26
|
+
"torch>=1.13",
|
|
27
|
+
"optuna>=3.0",
|
|
28
|
+
"xgboost>=1.6",
|
|
29
|
+
"scikit-learn>=1.1",
|
|
30
|
+
"statsmodels>=0.13",
|
|
31
|
+
"joblib>=1.2",
|
|
32
|
+
"matplotlib>=3.5",
|
|
33
|
+
"shap>=0.41",
|
|
34
|
+
"contextily>=1.3",
|
|
35
|
+
"pynndescent>=0.5",
|
|
36
|
+
"torch-geometric>=2.3",
|
|
37
|
+
],
|
|
38
|
+
include_package_data=True,
|
|
39
|
+
package_data={"ins_pricing": ["**/*.json", "**/*.md"]},
|
|
40
|
+
exclude_package_data={"ins_pricing": ["modelling/demo/*", "modelling/demo/**/*"]},
|
|
41
|
+
)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Root package for ins_pricing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from importlib import import_module
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
# Root subpackages
|
|
8
|
+
_ROOT_SUBPACKAGES = {
|
|
9
|
+
"modelling": "ins_pricing.modelling",
|
|
10
|
+
"pricing": "ins_pricing.pricing",
|
|
11
|
+
"production": "ins_pricing.production",
|
|
12
|
+
"governance": "ins_pricing.governance",
|
|
13
|
+
"reporting": "ins_pricing.reporting",
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
__all__ = sorted(list(_ROOT_SUBPACKAGES.keys()))
|
|
17
|
+
|
|
18
|
+
def __getattr__(name: str):
|
|
19
|
+
if name in _ROOT_SUBPACKAGES:
|
|
20
|
+
module = import_module(_ROOT_SUBPACKAGES[name])
|
|
21
|
+
globals()[name] = module
|
|
22
|
+
return module
|
|
23
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .approval import ApprovalAction, ApprovalRequest, ApprovalStore
|
|
4
|
+
from .audit import AuditEvent, AuditLogger
|
|
5
|
+
from .registry import ModelArtifact, ModelRegistry, ModelVersion
|
|
6
|
+
from .release import DeploymentState, ModelRef, ReleaseManager
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ApprovalAction",
|
|
10
|
+
"ApprovalRequest",
|
|
11
|
+
"ApprovalStore",
|
|
12
|
+
"AuditEvent",
|
|
13
|
+
"AuditLogger",
|
|
14
|
+
"ModelArtifact",
|
|
15
|
+
"ModelRegistry",
|
|
16
|
+
"ModelVersion",
|
|
17
|
+
"DeploymentState",
|
|
18
|
+
"ModelRef",
|
|
19
|
+
"ReleaseManager",
|
|
20
|
+
]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import asdict, dataclass, field
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ApprovalAction:
|
|
12
|
+
actor: str
|
|
13
|
+
decision: str
|
|
14
|
+
timestamp: str
|
|
15
|
+
comment: Optional[str] = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ApprovalRequest:
|
|
20
|
+
model_name: str
|
|
21
|
+
model_version: str
|
|
22
|
+
requested_by: str
|
|
23
|
+
requested_at: str
|
|
24
|
+
status: str = "pending"
|
|
25
|
+
actions: List[ApprovalAction] = field(default_factory=list)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ApprovalStore:
|
|
29
|
+
"""Simple approval workflow stored as JSON."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, store_path: str | Path):
|
|
32
|
+
self.store_path = Path(store_path)
|
|
33
|
+
self.store_path.parent.mkdir(parents=True, exist_ok=True)
|
|
34
|
+
|
|
35
|
+
def _load(self) -> List[dict]:
|
|
36
|
+
if not self.store_path.exists():
|
|
37
|
+
return []
|
|
38
|
+
with self.store_path.open("r", encoding="utf-8") as fh:
|
|
39
|
+
return json.load(fh)
|
|
40
|
+
|
|
41
|
+
def _save(self, payload: List[dict]) -> None:
|
|
42
|
+
with self.store_path.open("w", encoding="utf-8") as fh:
|
|
43
|
+
json.dump(payload, fh, indent=2, ensure_ascii=True)
|
|
44
|
+
|
|
45
|
+
def request(self, model_name: str, model_version: str, requested_by: str) -> ApprovalRequest:
|
|
46
|
+
payload = self._load()
|
|
47
|
+
req = ApprovalRequest(
|
|
48
|
+
model_name=model_name,
|
|
49
|
+
model_version=model_version,
|
|
50
|
+
requested_by=requested_by,
|
|
51
|
+
requested_at=datetime.utcnow().isoformat(),
|
|
52
|
+
)
|
|
53
|
+
payload.append(asdict(req))
|
|
54
|
+
self._save(payload)
|
|
55
|
+
return req
|
|
56
|
+
|
|
57
|
+
def list_requests(self, model_name: Optional[str] = None) -> List[ApprovalRequest]:
|
|
58
|
+
payload = self._load()
|
|
59
|
+
requests = [ApprovalRequest(**entry) for entry in payload]
|
|
60
|
+
if model_name is None:
|
|
61
|
+
return requests
|
|
62
|
+
return [req for req in requests if req.model_name == model_name]
|
|
63
|
+
|
|
64
|
+
def act(
|
|
65
|
+
self,
|
|
66
|
+
model_name: str,
|
|
67
|
+
model_version: str,
|
|
68
|
+
*,
|
|
69
|
+
actor: str,
|
|
70
|
+
decision: str,
|
|
71
|
+
comment: Optional[str] = None,
|
|
72
|
+
) -> ApprovalRequest:
|
|
73
|
+
payload = self._load()
|
|
74
|
+
found = None
|
|
75
|
+
for entry in payload:
|
|
76
|
+
if entry["model_name"] == model_name and entry["model_version"] == model_version:
|
|
77
|
+
found = entry
|
|
78
|
+
break
|
|
79
|
+
if found is None:
|
|
80
|
+
raise ValueError("Approval request not found.")
|
|
81
|
+
action = ApprovalAction(
|
|
82
|
+
actor=actor,
|
|
83
|
+
decision=decision,
|
|
84
|
+
timestamp=datetime.utcnow().isoformat(),
|
|
85
|
+
comment=comment,
|
|
86
|
+
)
|
|
87
|
+
found["actions"].append(asdict(action))
|
|
88
|
+
if decision.lower() in {"approve", "approved"}:
|
|
89
|
+
found["status"] = "approved"
|
|
90
|
+
elif decision.lower() in {"reject", "rejected"}:
|
|
91
|
+
found["status"] = "rejected"
|
|
92
|
+
self._save(payload)
|
|
93
|
+
return ApprovalRequest(**found)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import asdict, dataclass
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class AuditEvent:
|
|
12
|
+
action: str
|
|
13
|
+
actor: str
|
|
14
|
+
timestamp: str
|
|
15
|
+
metadata: Dict[str, Any]
|
|
16
|
+
note: Optional[str] = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AuditLogger:
|
|
20
|
+
"""Append-only JSONL audit log."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, log_path: str | Path):
|
|
23
|
+
self.log_path = Path(log_path)
|
|
24
|
+
self.log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
25
|
+
|
|
26
|
+
def log(self, action: str, actor: str, *, metadata: Optional[Dict[str, Any]] = None,
|
|
27
|
+
note: Optional[str] = None) -> AuditEvent:
|
|
28
|
+
event = AuditEvent(
|
|
29
|
+
action=action,
|
|
30
|
+
actor=actor,
|
|
31
|
+
timestamp=datetime.utcnow().isoformat(),
|
|
32
|
+
metadata=metadata or {},
|
|
33
|
+
note=note,
|
|
34
|
+
)
|
|
35
|
+
with self.log_path.open("a", encoding="utf-8") as fh:
|
|
36
|
+
fh.write(json.dumps(asdict(event), ensure_ascii=True) + "\n")
|
|
37
|
+
return event
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import asdict, dataclass, field
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ModelArtifact:
|
|
12
|
+
path: str
|
|
13
|
+
description: Optional[str] = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class ModelVersion:
|
|
18
|
+
name: str
|
|
19
|
+
version: str
|
|
20
|
+
created_at: str
|
|
21
|
+
metrics: Dict[str, float] = field(default_factory=dict)
|
|
22
|
+
tags: Dict[str, str] = field(default_factory=dict)
|
|
23
|
+
artifacts: List[ModelArtifact] = field(default_factory=list)
|
|
24
|
+
status: str = "candidate"
|
|
25
|
+
notes: Optional[str] = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ModelRegistry:
|
|
29
|
+
"""Lightweight JSON-based model registry."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, registry_path: str | Path):
|
|
32
|
+
self.registry_path = Path(registry_path)
|
|
33
|
+
self.registry_path.parent.mkdir(parents=True, exist_ok=True)
|
|
34
|
+
|
|
35
|
+
def _load(self) -> Dict[str, List[dict]]:
|
|
36
|
+
if not self.registry_path.exists():
|
|
37
|
+
return {}
|
|
38
|
+
with self.registry_path.open("r", encoding="utf-8") as fh:
|
|
39
|
+
return json.load(fh)
|
|
40
|
+
|
|
41
|
+
def _save(self, payload: Dict[str, List[dict]]) -> None:
|
|
42
|
+
with self.registry_path.open("w", encoding="utf-8") as fh:
|
|
43
|
+
json.dump(payload, fh, indent=2, ensure_ascii=True)
|
|
44
|
+
|
|
45
|
+
def register(
|
|
46
|
+
self,
|
|
47
|
+
name: str,
|
|
48
|
+
version: str,
|
|
49
|
+
*,
|
|
50
|
+
metrics: Optional[Dict[str, float]] = None,
|
|
51
|
+
tags: Optional[Dict[str, str]] = None,
|
|
52
|
+
artifacts: Optional[List[ModelArtifact]] = None,
|
|
53
|
+
status: str = "candidate",
|
|
54
|
+
notes: Optional[str] = None,
|
|
55
|
+
) -> ModelVersion:
|
|
56
|
+
payload = self._load()
|
|
57
|
+
created_at = datetime.utcnow().isoformat()
|
|
58
|
+
entry = ModelVersion(
|
|
59
|
+
name=name,
|
|
60
|
+
version=version,
|
|
61
|
+
created_at=created_at,
|
|
62
|
+
metrics=metrics or {},
|
|
63
|
+
tags=tags or {},
|
|
64
|
+
artifacts=artifacts or [],
|
|
65
|
+
status=status,
|
|
66
|
+
notes=notes,
|
|
67
|
+
)
|
|
68
|
+
payload.setdefault(name, []).append(asdict(entry))
|
|
69
|
+
self._save(payload)
|
|
70
|
+
return entry
|
|
71
|
+
|
|
72
|
+
def list_versions(self, name: str) -> List[ModelVersion]:
|
|
73
|
+
payload = self._load()
|
|
74
|
+
versions = payload.get(name, [])
|
|
75
|
+
return [ModelVersion(**v) for v in versions]
|
|
76
|
+
|
|
77
|
+
def get_version(self, name: str, version: str) -> Optional[ModelVersion]:
|
|
78
|
+
for entry in self.list_versions(name):
|
|
79
|
+
if entry.version == version:
|
|
80
|
+
return entry
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
def promote(
|
|
84
|
+
self, name: str, version: str, *, new_status: str = "production"
|
|
85
|
+
) -> None:
|
|
86
|
+
payload = self._load()
|
|
87
|
+
if name not in payload:
|
|
88
|
+
raise ValueError("Model not found in registry.")
|
|
89
|
+
updated = False
|
|
90
|
+
for entry in payload[name]:
|
|
91
|
+
if entry["version"] == version:
|
|
92
|
+
entry["status"] = new_status
|
|
93
|
+
updated = True
|
|
94
|
+
elif new_status == "production":
|
|
95
|
+
if entry.get("status") == "production":
|
|
96
|
+
entry["status"] = "archived"
|
|
97
|
+
if not updated:
|
|
98
|
+
raise ValueError("Version not found in registry.")
|
|
99
|
+
self._save(payload)
|