ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -1,143 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Dict, Iterable, Optional
4
-
5
- import numpy as np
6
- import pandas as pd
7
-
8
-
9
- def _safe_div(numer: float, denom: float, default: float = 0.0) -> float:
10
- if denom == 0:
11
- return default
12
- return numer / denom
13
-
14
-
15
- def regression_metrics(
16
- y_true: np.ndarray,
17
- y_pred: np.ndarray,
18
- *,
19
- weight: Optional[np.ndarray] = None,
20
- ) -> Dict[str, float]:
21
- y_true = np.asarray(y_true, dtype=float).reshape(-1)
22
- y_pred = np.asarray(y_pred, dtype=float).reshape(-1)
23
- if weight is not None:
24
- weight = np.asarray(weight, dtype=float).reshape(-1)
25
- if weight.shape[0] != y_true.shape[0]:
26
- raise ValueError("weight length must match y_true.")
27
- err = y_true - y_pred
28
- if weight is None:
29
- mse = float(np.mean(err ** 2))
30
- mae = float(np.mean(np.abs(err)))
31
- else:
32
- w_sum = float(np.sum(weight))
33
- mse = float(np.sum(weight * (err ** 2)) / max(w_sum, 1.0))
34
- mae = float(np.sum(weight * np.abs(err)) / max(w_sum, 1.0))
35
- rmse = float(np.sqrt(mse))
36
- denom = float(np.mean(y_true)) if np.mean(y_true) != 0 else 1.0
37
- mape = float(np.mean(np.abs(err) / np.clip(np.abs(y_true), 1e-9, None)))
38
- ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
39
- ss_res = float(np.sum(err ** 2))
40
- r2 = 1.0 - _safe_div(ss_res, ss_tot, default=0.0)
41
- return {"rmse": rmse, "mae": mae, "mape": mape, "r2": r2}
42
-
43
-
44
- def loss_ratio(
45
- actual_loss: np.ndarray,
46
- predicted_premium: np.ndarray,
47
- *,
48
- weight: Optional[np.ndarray] = None,
49
- ) -> float:
50
- actual_loss = np.asarray(actual_loss, dtype=float).reshape(-1)
51
- predicted_premium = np.asarray(predicted_premium, dtype=float).reshape(-1)
52
- if weight is not None:
53
- weight = np.asarray(weight, dtype=float).reshape(-1)
54
- actual_loss = actual_loss * weight
55
- predicted_premium = predicted_premium * weight
56
- return _safe_div(float(np.sum(actual_loss)), float(np.sum(predicted_premium)), default=0.0)
57
-
58
-
59
- def classification_metrics(
60
- y_true: np.ndarray,
61
- y_pred: np.ndarray,
62
- *,
63
- threshold: float = 0.5,
64
- ) -> Dict[str, float]:
65
- y_true = np.asarray(y_true, dtype=float).reshape(-1)
66
- y_pred = np.asarray(y_pred, dtype=float).reshape(-1)
67
- pred_label = (y_pred >= threshold).astype(float)
68
- acc = float(np.mean(pred_label == y_true))
69
- precision = _safe_div(float(np.sum((pred_label == 1) & (y_true == 1))),
70
- float(np.sum(pred_label == 1)), default=0.0)
71
- recall = _safe_div(float(np.sum((pred_label == 1) & (y_true == 1))),
72
- float(np.sum(y_true == 1)), default=0.0)
73
- return {"accuracy": acc, "precision": precision, "recall": recall}
74
-
75
-
76
- def metrics_report(
77
- y_true: np.ndarray,
78
- y_pred: np.ndarray,
79
- *,
80
- task_type: str = "regression",
81
- weight: Optional[np.ndarray] = None,
82
- ) -> Dict[str, float]:
83
- if task_type == "classification":
84
- metrics = classification_metrics(y_true, y_pred)
85
- else:
86
- metrics = regression_metrics(y_true, y_pred, weight=weight)
87
- return metrics
88
-
89
-
90
- def group_metrics(
91
- df: pd.DataFrame,
92
- *,
93
- actual_col: str,
94
- pred_col: str,
95
- group_cols: Iterable[str],
96
- weight_col: Optional[str] = None,
97
- ) -> pd.DataFrame:
98
- group_cols = list(group_cols)
99
- work = df[group_cols].copy()
100
- y_true = df[actual_col].to_numpy(dtype=float)
101
- y_pred = df[pred_col].to_numpy(dtype=float)
102
- err = y_true - y_pred
103
- work["_y_true"] = y_true
104
- work["_y_pred"] = y_pred
105
- work["_err"] = err
106
- work["_abs_err"] = np.abs(err)
107
- work["_err_sq"] = err ** 2
108
- work["_abs_ratio"] = work["_abs_err"] / np.clip(np.abs(work["_y_true"]), 1e-9, None)
109
- work["_y_true_sq"] = work["_y_true"] ** 2
110
-
111
- if weight_col:
112
- w = df[weight_col].to_numpy(dtype=float)
113
- work["_w"] = w
114
- work["_w_err_sq"] = w * work["_err_sq"]
115
- work["_w_abs_err"] = w * work["_abs_err"]
116
-
117
- grouped = work.groupby(group_cols, dropna=False)
118
- count = grouped["_y_true"].count().replace(0, 1.0)
119
- sum_y = grouped["_y_true"].sum()
120
- sum_y2 = grouped["_y_true_sq"].sum()
121
- ss_tot = sum_y2 - (sum_y ** 2) / count
122
- ss_tot = ss_tot.clip(lower=0.0)
123
- ss_res = grouped["_err_sq"].sum()
124
- r2 = 1.0 - (ss_res / ss_tot.replace(0.0, np.nan))
125
- r2 = r2.fillna(0.0)
126
-
127
- mape = grouped["_abs_ratio"].mean()
128
- if weight_col:
129
- sum_w = grouped["_w"].sum().replace(0, 1.0)
130
- mse = grouped["_w_err_sq"].sum() / sum_w
131
- mae = grouped["_w_abs_err"].sum() / sum_w
132
- else:
133
- mse = grouped["_err_sq"].sum() / count
134
- mae = grouped["_abs_err"].sum() / count
135
-
136
- rmse = np.sqrt(mse)
137
- result = pd.DataFrame({
138
- "rmse": rmse.astype(float),
139
- "mae": mae.astype(float),
140
- "mape": mape.astype(float),
141
- "r2": r2.astype(float),
142
- })
143
- return result.reset_index()
@@ -1,40 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from pathlib import Path
4
- from typing import Callable, Optional
5
-
6
- import numpy as np
7
- import pandas as pd
8
-
9
-
10
- def batch_score(
11
- predict_fn: Callable[[pd.DataFrame], np.ndarray],
12
- data: pd.DataFrame,
13
- *,
14
- output_col: str = "prediction",
15
- batch_size: int = 10000,
16
- output_path: Optional[str | Path] = None,
17
- keep_input: bool = True,
18
- ) -> pd.DataFrame:
19
- """Batch scoring for large datasets."""
20
- if batch_size <= 0:
21
- raise ValueError("batch_size must be positive.")
22
- n_rows = len(data)
23
- prediction = np.empty(n_rows, dtype=float)
24
- for start in range(0, n_rows, batch_size):
25
- end = min(start + batch_size, n_rows)
26
- chunk = data.iloc[start:end]
27
- pred = np.asarray(predict_fn(chunk)).reshape(-1)
28
- if pred.shape[0] != (end - start):
29
- raise ValueError("predict_fn output length must match batch size.")
30
- prediction[start:end] = pred
31
- result = data.copy() if keep_input else pd.DataFrame(index=data.index)
32
- result[output_col] = prediction
33
- if output_path:
34
- output_path = Path(output_path)
35
- output_path.parent.mkdir(parents=True, exist_ok=True)
36
- if output_path.suffix.lower() in {".parquet", ".pq"}:
37
- result.to_parquet(output_path, index=False)
38
- else:
39
- result.to_csv(output_path, index=False)
40
- return result
@@ -1,11 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from .report_builder import ReportPayload, build_report, write_report
4
- from .scheduler import schedule_daily
5
-
6
- __all__ = [
7
- "ReportPayload",
8
- "build_report",
9
- "write_report",
10
- "schedule_daily",
11
- ]
@@ -1,72 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from dataclasses import dataclass
4
- from datetime import datetime
5
- from pathlib import Path
6
- from typing import Dict, Optional
7
-
8
- import pandas as pd
9
-
10
-
11
- def _df_to_markdown(df: pd.DataFrame, max_rows: int = 20) -> str:
12
- if df is None or df.empty:
13
- return "_(no data)_"
14
- data = df.copy()
15
- if len(data) > max_rows:
16
- data = data.head(max_rows)
17
- headers = list(data.columns)
18
- rows = data.astype(str).values.tolist()
19
- lines = []
20
- lines.append("| " + " | ".join(headers) + " |")
21
- lines.append("| " + " | ".join(["---"] * len(headers)) + " |")
22
- for row in rows:
23
- lines.append("| " + " | ".join(row) + " |")
24
- return "\n".join(lines)
25
-
26
-
27
- @dataclass
28
- class ReportPayload:
29
- model_name: str
30
- model_version: str
31
- metrics: Dict[str, float]
32
- risk_trend: Optional[pd.DataFrame] = None
33
- drift_report: Optional[pd.DataFrame] = None
34
- validation_table: Optional[pd.DataFrame] = None
35
- extra_notes: Optional[str] = None
36
-
37
-
38
- def build_report(payload: ReportPayload) -> str:
39
- now = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC")
40
- metrics_lines = [f"- {k}: {v:.6f}" for k, v in payload.metrics.items()]
41
- metrics_block = "\n".join(metrics_lines) if metrics_lines else "_(no metrics)_"
42
-
43
- report = [
44
- f"# Model Report: {payload.model_name} ({payload.model_version})",
45
- "",
46
- f"Generated at: {now}",
47
- "",
48
- "## Model Validation",
49
- metrics_block,
50
- ]
51
-
52
- if payload.validation_table is not None:
53
- report.extend(["", "### Validation Details", _df_to_markdown(payload.validation_table)])
54
-
55
- report.extend(["", "## Drift / Stability"])
56
- report.append(_df_to_markdown(payload.drift_report))
57
-
58
- report.extend(["", "## Risk Trend"])
59
- report.append(_df_to_markdown(payload.risk_trend))
60
-
61
- if payload.extra_notes:
62
- report.extend(["", "## Notes", payload.extra_notes])
63
-
64
- return "\n".join(report).strip() + "\n"
65
-
66
-
67
- def write_report(payload: ReportPayload, output_path: str | Path) -> Path:
68
- output_path = Path(output_path)
69
- output_path.parent.mkdir(parents=True, exist_ok=True)
70
- content = build_report(payload)
71
- output_path.write_text(content, encoding="utf-8")
72
- return output_path
@@ -1,45 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import threading
4
- import time
5
- from datetime import datetime, timedelta
6
- from typing import Callable, Optional
7
-
8
-
9
- def _next_run(run_time: str, now: Optional[datetime] = None) -> datetime:
10
- if now is None:
11
- now = datetime.now()
12
- hour, minute = [int(x) for x in run_time.split(":")]
13
- candidate = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
14
- if candidate <= now:
15
- candidate = candidate + timedelta(days=1)
16
- return candidate
17
-
18
-
19
- def schedule_daily(
20
- job_fn: Callable[[], None],
21
- *,
22
- run_time: str = "01:00",
23
- stop_event: Optional[threading.Event] = None,
24
- ) -> threading.Thread:
25
- """Run job_fn daily at local time HH:MM in a background thread."""
26
- if stop_event is None:
27
- stop_event = threading.Event()
28
-
29
- def _loop():
30
- while not stop_event.is_set():
31
- next_time = _next_run(run_time)
32
- sleep_seconds = (next_time - datetime.now()).total_seconds()
33
- if sleep_seconds > 0:
34
- stop_event.wait(timeout=sleep_seconds)
35
- if stop_event.is_set():
36
- break
37
- try:
38
- job_fn()
39
- except Exception:
40
- pass
41
- time.sleep(1)
42
-
43
- thread = threading.Thread(target=_loop, daemon=True)
44
- thread.start()
45
- return thread