ins-pricing 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. ins_pricing/README.md +60 -0
  2. ins_pricing/__init__.py +102 -0
  3. ins_pricing/governance/README.md +18 -0
  4. ins_pricing/governance/__init__.py +20 -0
  5. ins_pricing/governance/approval.py +93 -0
  6. ins_pricing/governance/audit.py +37 -0
  7. ins_pricing/governance/registry.py +99 -0
  8. ins_pricing/governance/release.py +159 -0
  9. ins_pricing/modelling/BayesOpt.py +146 -0
  10. ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
  11. ins_pricing/modelling/BayesOpt_entry.py +575 -0
  12. ins_pricing/modelling/BayesOpt_incremental.py +731 -0
  13. ins_pricing/modelling/Explain_Run.py +36 -0
  14. ins_pricing/modelling/Explain_entry.py +539 -0
  15. ins_pricing/modelling/Pricing_Run.py +36 -0
  16. ins_pricing/modelling/README.md +33 -0
  17. ins_pricing/modelling/__init__.py +44 -0
  18. ins_pricing/modelling/bayesopt/__init__.py +98 -0
  19. ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
  20. ins_pricing/modelling/bayesopt/core.py +1476 -0
  21. ins_pricing/modelling/bayesopt/models.py +2196 -0
  22. ins_pricing/modelling/bayesopt/trainers.py +2446 -0
  23. ins_pricing/modelling/bayesopt/utils.py +1021 -0
  24. ins_pricing/modelling/cli_common.py +136 -0
  25. ins_pricing/modelling/explain/__init__.py +55 -0
  26. ins_pricing/modelling/explain/gradients.py +334 -0
  27. ins_pricing/modelling/explain/metrics.py +176 -0
  28. ins_pricing/modelling/explain/permutation.py +155 -0
  29. ins_pricing/modelling/explain/shap_utils.py +146 -0
  30. ins_pricing/modelling/notebook_utils.py +284 -0
  31. ins_pricing/modelling/plotting/__init__.py +45 -0
  32. ins_pricing/modelling/plotting/common.py +63 -0
  33. ins_pricing/modelling/plotting/curves.py +572 -0
  34. ins_pricing/modelling/plotting/diagnostics.py +139 -0
  35. ins_pricing/modelling/plotting/geo.py +362 -0
  36. ins_pricing/modelling/plotting/importance.py +121 -0
  37. ins_pricing/modelling/run_logging.py +133 -0
  38. ins_pricing/modelling/tests/conftest.py +8 -0
  39. ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
  40. ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
  41. ins_pricing/modelling/tests/test_explain.py +56 -0
  42. ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
  43. ins_pricing/modelling/tests/test_graph_cache.py +33 -0
  44. ins_pricing/modelling/tests/test_plotting.py +63 -0
  45. ins_pricing/modelling/tests/test_plotting_library.py +150 -0
  46. ins_pricing/modelling/tests/test_preprocessor.py +48 -0
  47. ins_pricing/modelling/watchdog_run.py +211 -0
  48. ins_pricing/pricing/README.md +44 -0
  49. ins_pricing/pricing/__init__.py +27 -0
  50. ins_pricing/pricing/calibration.py +39 -0
  51. ins_pricing/pricing/data_quality.py +117 -0
  52. ins_pricing/pricing/exposure.py +85 -0
  53. ins_pricing/pricing/factors.py +91 -0
  54. ins_pricing/pricing/monitoring.py +99 -0
  55. ins_pricing/pricing/rate_table.py +78 -0
  56. ins_pricing/production/__init__.py +21 -0
  57. ins_pricing/production/drift.py +30 -0
  58. ins_pricing/production/monitoring.py +143 -0
  59. ins_pricing/production/scoring.py +40 -0
  60. ins_pricing/reporting/README.md +20 -0
  61. ins_pricing/reporting/__init__.py +11 -0
  62. ins_pricing/reporting/report_builder.py +72 -0
  63. ins_pricing/reporting/scheduler.py +45 -0
  64. ins_pricing/setup.py +41 -0
  65. ins_pricing v2/__init__.py +23 -0
  66. ins_pricing v2/governance/__init__.py +20 -0
  67. ins_pricing v2/governance/approval.py +93 -0
  68. ins_pricing v2/governance/audit.py +37 -0
  69. ins_pricing v2/governance/registry.py +99 -0
  70. ins_pricing v2/governance/release.py +159 -0
  71. ins_pricing v2/modelling/Explain_Run.py +36 -0
  72. ins_pricing v2/modelling/Pricing_Run.py +36 -0
  73. ins_pricing v2/modelling/__init__.py +151 -0
  74. ins_pricing v2/modelling/cli_common.py +141 -0
  75. ins_pricing v2/modelling/config.py +249 -0
  76. ins_pricing v2/modelling/config_preprocess.py +254 -0
  77. ins_pricing v2/modelling/core.py +741 -0
  78. ins_pricing v2/modelling/data_container.py +42 -0
  79. ins_pricing v2/modelling/explain/__init__.py +55 -0
  80. ins_pricing v2/modelling/explain/gradients.py +334 -0
  81. ins_pricing v2/modelling/explain/metrics.py +176 -0
  82. ins_pricing v2/modelling/explain/permutation.py +155 -0
  83. ins_pricing v2/modelling/explain/shap_utils.py +146 -0
  84. ins_pricing v2/modelling/features.py +215 -0
  85. ins_pricing v2/modelling/model_manager.py +148 -0
  86. ins_pricing v2/modelling/model_plotting.py +463 -0
  87. ins_pricing v2/modelling/models.py +2203 -0
  88. ins_pricing v2/modelling/notebook_utils.py +294 -0
  89. ins_pricing v2/modelling/plotting/__init__.py +45 -0
  90. ins_pricing v2/modelling/plotting/common.py +63 -0
  91. ins_pricing v2/modelling/plotting/curves.py +572 -0
  92. ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
  93. ins_pricing v2/modelling/plotting/geo.py +362 -0
  94. ins_pricing v2/modelling/plotting/importance.py +121 -0
  95. ins_pricing v2/modelling/run_logging.py +133 -0
  96. ins_pricing v2/modelling/tests/conftest.py +8 -0
  97. ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
  98. ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
  99. ins_pricing v2/modelling/tests/test_explain.py +56 -0
  100. ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
  101. ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
  102. ins_pricing v2/modelling/tests/test_plotting.py +63 -0
  103. ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
  104. ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
  105. ins_pricing v2/modelling/trainers.py +2447 -0
  106. ins_pricing v2/modelling/utils.py +1020 -0
  107. ins_pricing v2/modelling/watchdog_run.py +211 -0
  108. ins_pricing v2/pricing/__init__.py +27 -0
  109. ins_pricing v2/pricing/calibration.py +39 -0
  110. ins_pricing v2/pricing/data_quality.py +117 -0
  111. ins_pricing v2/pricing/exposure.py +85 -0
  112. ins_pricing v2/pricing/factors.py +91 -0
  113. ins_pricing v2/pricing/monitoring.py +99 -0
  114. ins_pricing v2/pricing/rate_table.py +78 -0
  115. ins_pricing v2/production/__init__.py +21 -0
  116. ins_pricing v2/production/drift.py +30 -0
  117. ins_pricing v2/production/monitoring.py +143 -0
  118. ins_pricing v2/production/scoring.py +40 -0
  119. ins_pricing v2/reporting/__init__.py +11 -0
  120. ins_pricing v2/reporting/report_builder.py +72 -0
  121. ins_pricing v2/reporting/scheduler.py +45 -0
  122. ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
  123. ins_pricing v2/scripts/Explain_entry.py +545 -0
  124. ins_pricing v2/scripts/__init__.py +1 -0
  125. ins_pricing v2/scripts/train.py +568 -0
  126. ins_pricing v2/setup.py +55 -0
  127. ins_pricing v2/smoke_test.py +28 -0
  128. ins_pricing-0.1.6.dist-info/METADATA +78 -0
  129. ins_pricing-0.1.6.dist-info/RECORD +169 -0
  130. ins_pricing-0.1.6.dist-info/WHEEL +5 -0
  131. ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
  132. user_packages/__init__.py +105 -0
  133. user_packages legacy/BayesOpt.py +5659 -0
  134. user_packages legacy/BayesOpt_entry.py +513 -0
  135. user_packages legacy/BayesOpt_incremental.py +685 -0
  136. user_packages legacy/Pricing_Run.py +36 -0
  137. user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
  138. user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
  139. user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
  140. user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
  141. user_packages legacy/Try/BayesOpt legacy.py +3280 -0
  142. user_packages legacy/Try/BayesOpt.py +838 -0
  143. user_packages legacy/Try/BayesOptAll.py +1569 -0
  144. user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
  145. user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
  146. user_packages legacy/Try/BayesOptSearch.py +830 -0
  147. user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
  148. user_packages legacy/Try/BayesOptV1.py +1911 -0
  149. user_packages legacy/Try/BayesOptV10.py +2973 -0
  150. user_packages legacy/Try/BayesOptV11.py +3001 -0
  151. user_packages legacy/Try/BayesOptV12.py +3001 -0
  152. user_packages legacy/Try/BayesOptV2.py +2065 -0
  153. user_packages legacy/Try/BayesOptV3.py +2209 -0
  154. user_packages legacy/Try/BayesOptV4.py +2342 -0
  155. user_packages legacy/Try/BayesOptV5.py +2372 -0
  156. user_packages legacy/Try/BayesOptV6.py +2759 -0
  157. user_packages legacy/Try/BayesOptV7.py +2832 -0
  158. user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
  159. user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
  160. user_packages legacy/Try/BayesOptV9.py +2927 -0
  161. user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
  162. user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
  163. user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
  164. user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
  165. user_packages legacy/Try/xgbbayesopt.py +523 -0
  166. user_packages legacy/__init__.py +19 -0
  167. user_packages legacy/cli_common.py +124 -0
  168. user_packages legacy/notebook_utils.py +228 -0
  169. user_packages legacy/watchdog_run.py +202 -0
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Optional
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+
10
+ def compute_base_rate(
11
+ df: pd.DataFrame,
12
+ *,
13
+ loss_col: str,
14
+ exposure_col: str,
15
+ weight_col: Optional[str] = None,
16
+ ) -> float:
17
+ """Compute base rate as loss / exposure."""
18
+ loss = df[loss_col].to_numpy(dtype=float, copy=False)
19
+ exposure = df[exposure_col].to_numpy(dtype=float, copy=False)
20
+ if weight_col and weight_col in df.columns:
21
+ weight = df[weight_col].to_numpy(dtype=float, copy=False)
22
+ loss = loss * weight
23
+ exposure = exposure * weight
24
+ total_exposure = float(np.sum(exposure))
25
+ if total_exposure <= 0:
26
+ return 0.0
27
+ return float(np.sum(loss) / total_exposure)
28
+
29
+
30
+ def apply_factor_tables(
31
+ df: pd.DataFrame,
32
+ factor_tables: Dict[str, pd.DataFrame],
33
+ *,
34
+ default_relativity: float = 1.0,
35
+ ) -> np.ndarray:
36
+ """Apply factor relativities and return a multiplicative factor."""
37
+ multiplier = np.ones(len(df), dtype=float)
38
+ for factor, table in factor_tables.items():
39
+ if factor not in df.columns:
40
+ raise ValueError(f"Missing factor column: {factor}")
41
+ if "level" not in table.columns or "relativity" not in table.columns:
42
+ raise ValueError("Factor table must include 'level' and 'relativity'.")
43
+ mapping = table.set_index("level")["relativity"]
44
+ rel = df[factor].map(mapping).fillna(default_relativity).to_numpy(dtype=float)
45
+ multiplier *= rel
46
+ return multiplier
47
+
48
+
49
+ def rate_premium(
50
+ df: pd.DataFrame,
51
+ *,
52
+ exposure_col: str,
53
+ base_rate: float,
54
+ factor_tables: Dict[str, pd.DataFrame],
55
+ default_relativity: float = 1.0,
56
+ ) -> np.ndarray:
57
+ """Compute premium using base rate and factor tables."""
58
+ exposure = df[exposure_col].to_numpy(dtype=float, copy=False)
59
+ factors = apply_factor_tables(
60
+ df, factor_tables, default_relativity=default_relativity
61
+ )
62
+ return exposure * float(base_rate) * factors
63
+
64
+
65
+ @dataclass
66
+ class RateTable:
67
+ base_rate: float
68
+ factor_tables: Dict[str, pd.DataFrame]
69
+ default_relativity: float = 1.0
70
+
71
+ def score(self, df: pd.DataFrame, *, exposure_col: str) -> np.ndarray:
72
+ return rate_premium(
73
+ df,
74
+ exposure_col=exposure_col,
75
+ base_rate=self.base_rate,
76
+ factor_tables=self.factor_tables,
77
+ default_relativity=self.default_relativity,
78
+ )
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+
3
+ from .drift import psi_report
4
+ from .monitoring import (
5
+ classification_metrics,
6
+ group_metrics,
7
+ loss_ratio,
8
+ metrics_report,
9
+ regression_metrics,
10
+ )
11
+ from .scoring import batch_score
12
+
13
+ __all__ = [
14
+ "psi_report",
15
+ "classification_metrics",
16
+ "group_metrics",
17
+ "loss_ratio",
18
+ "metrics_report",
19
+ "regression_metrics",
20
+ "batch_score",
21
+ ]
@@ -0,0 +1,30 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterable, Optional
4
+
5
+ import pandas as pd
6
+
7
+ try:
8
+ from ins_pricing.pricing.monitoring import psi_report as _psi_report
9
+ except Exception: # pragma: no cover - optional import
10
+ _psi_report = None
11
+
12
+
13
+ def psi_report(
14
+ expected_df: pd.DataFrame,
15
+ actual_df: pd.DataFrame,
16
+ *,
17
+ features: Optional[Iterable[str]] = None,
18
+ bins: int = 10,
19
+ strategy: str = "quantile",
20
+ ) -> pd.DataFrame:
21
+ """Population Stability Index report for drift monitoring."""
22
+ if _psi_report is None:
23
+ raise RuntimeError("psi_report requires ins_pricing.pricing.monitoring.")
24
+ return _psi_report(
25
+ expected_df,
26
+ actual_df,
27
+ features=features,
28
+ bins=bins,
29
+ strategy=strategy,
30
+ )
@@ -0,0 +1,143 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict, Iterable, Optional
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+
9
+ def _safe_div(numer: float, denom: float, default: float = 0.0) -> float:
10
+ if denom == 0:
11
+ return default
12
+ return numer / denom
13
+
14
+
15
+ def regression_metrics(
16
+ y_true: np.ndarray,
17
+ y_pred: np.ndarray,
18
+ *,
19
+ weight: Optional[np.ndarray] = None,
20
+ ) -> Dict[str, float]:
21
+ y_true = np.asarray(y_true, dtype=float).reshape(-1)
22
+ y_pred = np.asarray(y_pred, dtype=float).reshape(-1)
23
+ if weight is not None:
24
+ weight = np.asarray(weight, dtype=float).reshape(-1)
25
+ if weight.shape[0] != y_true.shape[0]:
26
+ raise ValueError("weight length must match y_true.")
27
+ err = y_true - y_pred
28
+ if weight is None:
29
+ mse = float(np.mean(err ** 2))
30
+ mae = float(np.mean(np.abs(err)))
31
+ else:
32
+ w_sum = float(np.sum(weight))
33
+ mse = float(np.sum(weight * (err ** 2)) / max(w_sum, 1.0))
34
+ mae = float(np.sum(weight * np.abs(err)) / max(w_sum, 1.0))
35
+ rmse = float(np.sqrt(mse))
36
+ denom = float(np.mean(y_true)) if np.mean(y_true) != 0 else 1.0
37
+ mape = float(np.mean(np.abs(err) / np.clip(np.abs(y_true), 1e-9, None)))
38
+ ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
39
+ ss_res = float(np.sum(err ** 2))
40
+ r2 = 1.0 - _safe_div(ss_res, ss_tot, default=0.0)
41
+ return {"rmse": rmse, "mae": mae, "mape": mape, "r2": r2}
42
+
43
+
44
+ def loss_ratio(
45
+ actual_loss: np.ndarray,
46
+ predicted_premium: np.ndarray,
47
+ *,
48
+ weight: Optional[np.ndarray] = None,
49
+ ) -> float:
50
+ actual_loss = np.asarray(actual_loss, dtype=float).reshape(-1)
51
+ predicted_premium = np.asarray(predicted_premium, dtype=float).reshape(-1)
52
+ if weight is not None:
53
+ weight = np.asarray(weight, dtype=float).reshape(-1)
54
+ actual_loss = actual_loss * weight
55
+ predicted_premium = predicted_premium * weight
56
+ return _safe_div(float(np.sum(actual_loss)), float(np.sum(predicted_premium)), default=0.0)
57
+
58
+
59
+ def classification_metrics(
60
+ y_true: np.ndarray,
61
+ y_pred: np.ndarray,
62
+ *,
63
+ threshold: float = 0.5,
64
+ ) -> Dict[str, float]:
65
+ y_true = np.asarray(y_true, dtype=float).reshape(-1)
66
+ y_pred = np.asarray(y_pred, dtype=float).reshape(-1)
67
+ pred_label = (y_pred >= threshold).astype(float)
68
+ acc = float(np.mean(pred_label == y_true))
69
+ precision = _safe_div(float(np.sum((pred_label == 1) & (y_true == 1))),
70
+ float(np.sum(pred_label == 1)), default=0.0)
71
+ recall = _safe_div(float(np.sum((pred_label == 1) & (y_true == 1))),
72
+ float(np.sum(y_true == 1)), default=0.0)
73
+ return {"accuracy": acc, "precision": precision, "recall": recall}
74
+
75
+
76
+ def metrics_report(
77
+ y_true: np.ndarray,
78
+ y_pred: np.ndarray,
79
+ *,
80
+ task_type: str = "regression",
81
+ weight: Optional[np.ndarray] = None,
82
+ ) -> Dict[str, float]:
83
+ if task_type == "classification":
84
+ metrics = classification_metrics(y_true, y_pred)
85
+ else:
86
+ metrics = regression_metrics(y_true, y_pred, weight=weight)
87
+ return metrics
88
+
89
+
90
+ def group_metrics(
91
+ df: pd.DataFrame,
92
+ *,
93
+ actual_col: str,
94
+ pred_col: str,
95
+ group_cols: Iterable[str],
96
+ weight_col: Optional[str] = None,
97
+ ) -> pd.DataFrame:
98
+ group_cols = list(group_cols)
99
+ work = df[group_cols].copy()
100
+ y_true = df[actual_col].to_numpy(dtype=float)
101
+ y_pred = df[pred_col].to_numpy(dtype=float)
102
+ err = y_true - y_pred
103
+ work["_y_true"] = y_true
104
+ work["_y_pred"] = y_pred
105
+ work["_err"] = err
106
+ work["_abs_err"] = np.abs(err)
107
+ work["_err_sq"] = err ** 2
108
+ work["_abs_ratio"] = work["_abs_err"] / np.clip(np.abs(work["_y_true"]), 1e-9, None)
109
+ work["_y_true_sq"] = work["_y_true"] ** 2
110
+
111
+ if weight_col:
112
+ w = df[weight_col].to_numpy(dtype=float)
113
+ work["_w"] = w
114
+ work["_w_err_sq"] = w * work["_err_sq"]
115
+ work["_w_abs_err"] = w * work["_abs_err"]
116
+
117
+ grouped = work.groupby(group_cols, dropna=False)
118
+ count = grouped["_y_true"].count().replace(0, 1.0)
119
+ sum_y = grouped["_y_true"].sum()
120
+ sum_y2 = grouped["_y_true_sq"].sum()
121
+ ss_tot = sum_y2 - (sum_y ** 2) / count
122
+ ss_tot = ss_tot.clip(lower=0.0)
123
+ ss_res = grouped["_err_sq"].sum()
124
+ r2 = 1.0 - (ss_res / ss_tot.replace(0.0, np.nan))
125
+ r2 = r2.fillna(0.0)
126
+
127
+ mape = grouped["_abs_ratio"].mean()
128
+ if weight_col:
129
+ sum_w = grouped["_w"].sum().replace(0, 1.0)
130
+ mse = grouped["_w_err_sq"].sum() / sum_w
131
+ mae = grouped["_w_abs_err"].sum() / sum_w
132
+ else:
133
+ mse = grouped["_err_sq"].sum() / count
134
+ mae = grouped["_abs_err"].sum() / count
135
+
136
+ rmse = np.sqrt(mse)
137
+ result = pd.DataFrame({
138
+ "rmse": rmse.astype(float),
139
+ "mae": mae.astype(float),
140
+ "mape": mape.astype(float),
141
+ "r2": r2.astype(float),
142
+ })
143
+ return result.reset_index()
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Callable, Optional
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+
10
+ def batch_score(
11
+ predict_fn: Callable[[pd.DataFrame], np.ndarray],
12
+ data: pd.DataFrame,
13
+ *,
14
+ output_col: str = "prediction",
15
+ batch_size: int = 10000,
16
+ output_path: Optional[str | Path] = None,
17
+ keep_input: bool = True,
18
+ ) -> pd.DataFrame:
19
+ """Batch scoring for large datasets."""
20
+ if batch_size <= 0:
21
+ raise ValueError("batch_size must be positive.")
22
+ n_rows = len(data)
23
+ prediction = np.empty(n_rows, dtype=float)
24
+ for start in range(0, n_rows, batch_size):
25
+ end = min(start + batch_size, n_rows)
26
+ chunk = data.iloc[start:end]
27
+ pred = np.asarray(predict_fn(chunk)).reshape(-1)
28
+ if pred.shape[0] != (end - start):
29
+ raise ValueError("predict_fn output length must match batch size.")
30
+ prediction[start:end] = pred
31
+ result = data.copy() if keep_input else pd.DataFrame(index=data.index)
32
+ result[output_col] = prediction
33
+ if output_path:
34
+ output_path = Path(output_path)
35
+ output_path.parent.mkdir(parents=True, exist_ok=True)
36
+ if output_path.suffix.lower() in {".parquet", ".pq"}:
37
+ result.to_parquet(output_path, index=False)
38
+ else:
39
+ result.to_csv(output_path, index=False)
40
+ return result
@@ -0,0 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from .report_builder import ReportPayload, build_report, write_report
4
+ from .scheduler import schedule_daily
5
+
6
+ __all__ = [
7
+ "ReportPayload",
8
+ "build_report",
9
+ "write_report",
10
+ "schedule_daily",
11
+ ]
@@ -0,0 +1,72 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Dict, Optional
7
+
8
+ import pandas as pd
9
+
10
+
11
+ def _df_to_markdown(df: pd.DataFrame, max_rows: int = 20) -> str:
12
+ if df is None or df.empty:
13
+ return "_(no data)_"
14
+ data = df.copy()
15
+ if len(data) > max_rows:
16
+ data = data.head(max_rows)
17
+ headers = list(data.columns)
18
+ rows = data.astype(str).values.tolist()
19
+ lines = []
20
+ lines.append("| " + " | ".join(headers) + " |")
21
+ lines.append("| " + " | ".join(["---"] * len(headers)) + " |")
22
+ for row in rows:
23
+ lines.append("| " + " | ".join(row) + " |")
24
+ return "\n".join(lines)
25
+
26
+
27
+ @dataclass
28
+ class ReportPayload:
29
+ model_name: str
30
+ model_version: str
31
+ metrics: Dict[str, float]
32
+ risk_trend: Optional[pd.DataFrame] = None
33
+ drift_report: Optional[pd.DataFrame] = None
34
+ validation_table: Optional[pd.DataFrame] = None
35
+ extra_notes: Optional[str] = None
36
+
37
+
38
+ def build_report(payload: ReportPayload) -> str:
39
+ now = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")
40
+ metrics_lines = [f"- {k}: {v:.6f}" for k, v in payload.metrics.items()]
41
+ metrics_block = "\n".join(metrics_lines) if metrics_lines else "_(no metrics)_"
42
+
43
+ report = [
44
+ f"# Model Report: {payload.model_name} ({payload.model_version})",
45
+ "",
46
+ f"Generated at: {now}",
47
+ "",
48
+ "## Model Validation",
49
+ metrics_block,
50
+ ]
51
+
52
+ if payload.validation_table is not None:
53
+ report.extend(["", "### Validation Details", _df_to_markdown(payload.validation_table)])
54
+
55
+ report.extend(["", "## Drift / Stability"])
56
+ report.append(_df_to_markdown(payload.drift_report))
57
+
58
+ report.extend(["", "## Risk Trend"])
59
+ report.append(_df_to_markdown(payload.risk_trend))
60
+
61
+ if payload.extra_notes:
62
+ report.extend(["", "## Notes", payload.extra_notes])
63
+
64
+ return "\n".join(report).strip() + "\n"
65
+
66
+
67
+ def write_report(payload: ReportPayload, output_path: str | Path) -> Path:
68
+ output_path = Path(output_path)
69
+ output_path.parent.mkdir(parents=True, exist_ok=True)
70
+ content = build_report(payload)
71
+ output_path.write_text(content, encoding="utf-8")
72
+ return output_path
@@ -0,0 +1,45 @@
1
+ from __future__ import annotations
2
+
3
+ import threading
4
+ import time
5
+ from datetime import datetime, timedelta
6
+ from typing import Callable, Optional
7
+
8
+
9
+ def _next_run(run_time: str, now: Optional[datetime] = None) -> datetime:
10
+ if now is None:
11
+ now = datetime.now()
12
+ hour, minute = [int(x) for x in run_time.split(":")]
13
+ candidate = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
14
+ if candidate <= now:
15
+ candidate = candidate + timedelta(days=1)
16
+ return candidate
17
+
18
+
19
+ def schedule_daily(
20
+ job_fn: Callable[[], None],
21
+ *,
22
+ run_time: str = "01:00",
23
+ stop_event: Optional[threading.Event] = None,
24
+ ) -> threading.Thread:
25
+ """Run job_fn daily at local time HH:MM in a background thread."""
26
+ if stop_event is None:
27
+ stop_event = threading.Event()
28
+
29
+ def _loop():
30
+ while not stop_event.is_set():
31
+ next_time = _next_run(run_time)
32
+ sleep_seconds = (next_time - datetime.now()).total_seconds()
33
+ if sleep_seconds > 0:
34
+ stop_event.wait(timeout=sleep_seconds)
35
+ if stop_event.is_set():
36
+ break
37
+ try:
38
+ job_fn()
39
+ except Exception:
40
+ pass
41
+ time.sleep(1)
42
+
43
+ thread = threading.Thread(target=_loop, daemon=True)
44
+ thread.start()
45
+ return thread