ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,296 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Optional
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import torch
8
+ import statsmodels.api as sm
9
+
10
+ try:
11
+ from ...explain import gradients as explain_gradients
12
+ from ...explain import permutation as explain_permutation
13
+ from ...explain import shap_utils as explain_shap
14
+ except Exception: # pragma: no cover - optional for legacy imports
15
+ try: # best-effort for non-package imports
16
+ from ins_pricing.explain import gradients as explain_gradients
17
+ from ins_pricing.explain import permutation as explain_permutation
18
+ from ins_pricing.explain import shap_utils as explain_shap
19
+ except Exception: # pragma: no cover
20
+ explain_gradients = None
21
+ explain_permutation = None
22
+ explain_shap = None
23
+
24
+
25
+ class BayesOptExplainMixin:
26
+ def compute_permutation_importance(self,
27
+ model_key: str,
28
+ on_train: bool = True,
29
+ metric: Any = "auto",
30
+ n_repeats: int = 5,
31
+ max_rows: int = 5000,
32
+ random_state: Optional[int] = None):
33
+ if explain_permutation is None:
34
+ raise RuntimeError("explain.permutation is not available.")
35
+
36
+ model_key = str(model_key)
37
+ data = self.train_data if on_train else self.test_data
38
+ if self.resp_nme not in data.columns:
39
+ raise RuntimeError("Missing response column for permutation importance.")
40
+ y = data[self.resp_nme]
41
+ w = data[self.weight_nme] if self.weight_nme in data.columns else None
42
+
43
+ if model_key == "resn":
44
+ if self.resn_best is None:
45
+ raise RuntimeError("ResNet model not trained.")
46
+ X = self.train_oht_scl_data if on_train else self.test_oht_scl_data
47
+ if X is None:
48
+ raise RuntimeError("Missing standardized features for ResNet.")
49
+ X = X[self.var_nmes]
50
+ predict_fn = lambda df: self.resn_best.predict(df)
51
+ elif model_key == "ft":
52
+ if self.ft_best is None:
53
+ raise RuntimeError("FT model not trained.")
54
+ if str(self.config.ft_role) != "model":
55
+ raise RuntimeError("FT role is not 'model'; FT predictions unavailable.")
56
+ X = data[self.factor_nmes]
57
+ geo_tokens = self.train_geo_tokens if on_train else self.test_geo_tokens
58
+ geo_np = None
59
+ if geo_tokens is not None:
60
+ geo_np = geo_tokens.to_numpy(dtype=np.float32, copy=False)
61
+ predict_fn = lambda df, geo=geo_np: self.ft_best.predict(df, geo_tokens=geo)
62
+ elif model_key == "xgb":
63
+ if self.xgb_best is None:
64
+ raise RuntimeError("XGB model not trained.")
65
+ X = data[self.factor_nmes]
66
+ predict_fn = lambda df: self.xgb_best.predict(df)
67
+ else:
68
+ raise ValueError("Unsupported model_key for permutation importance.")
69
+
70
+ return explain_permutation.permutation_importance(
71
+ predict_fn,
72
+ X,
73
+ y,
74
+ sample_weight=w,
75
+ metric=metric,
76
+ task_type=self.task_type,
77
+ n_repeats=n_repeats,
78
+ random_state=random_state,
79
+ max_rows=max_rows,
80
+ )
81
+
82
+ # ========= Deep explainability: Integrated Gradients =========
83
+
84
+ def compute_integrated_gradients_resn(self,
85
+ on_train: bool = True,
86
+ baseline: Any = None,
87
+ steps: int = 50,
88
+ batch_size: int = 256,
89
+ target: Optional[int] = None):
90
+ if explain_gradients is None:
91
+ raise RuntimeError("explain.gradients is not available.")
92
+ if self.resn_best is None:
93
+ raise RuntimeError("ResNet model not trained.")
94
+ X = self.train_oht_scl_data if on_train else self.test_oht_scl_data
95
+ if X is None:
96
+ raise RuntimeError("Missing standardized features for ResNet.")
97
+ X = X[self.var_nmes]
98
+ return explain_gradients.resnet_integrated_gradients(
99
+ self.resn_best,
100
+ X,
101
+ baseline=baseline,
102
+ steps=steps,
103
+ batch_size=batch_size,
104
+ target=target,
105
+ )
106
+
107
+
108
+ def compute_integrated_gradients_ft(self,
109
+ on_train: bool = True,
110
+ geo_tokens: Optional[np.ndarray] = None,
111
+ baseline_num: Any = None,
112
+ baseline_geo: Any = None,
113
+ steps: int = 50,
114
+ batch_size: int = 256,
115
+ target: Optional[int] = None):
116
+ if explain_gradients is None:
117
+ raise RuntimeError("explain.gradients is not available.")
118
+ if self.ft_best is None:
119
+ raise RuntimeError("FT model not trained.")
120
+ if str(self.config.ft_role) != "model":
121
+ raise RuntimeError("FT role is not 'model'; FT explanations unavailable.")
122
+
123
+ data = self.train_data if on_train else self.test_data
124
+ X = data[self.factor_nmes]
125
+
126
+ if geo_tokens is None and getattr(self.ft_best, "num_geo", 0) > 0:
127
+ tokens_df = self.train_geo_tokens if on_train else self.test_geo_tokens
128
+ if tokens_df is not None:
129
+ geo_tokens = tokens_df.to_numpy(dtype=np.float32, copy=False)
130
+
131
+ return explain_gradients.ft_integrated_gradients(
132
+ self.ft_best,
133
+ X,
134
+ geo_tokens=geo_tokens,
135
+ baseline_num=baseline_num,
136
+ baseline_geo=baseline_geo,
137
+ steps=steps,
138
+ batch_size=batch_size,
139
+ target=target,
140
+ )
141
+
142
+ def _sample_rows(self, data: pd.DataFrame, n: int) -> pd.DataFrame:
143
+ if len(data) == 0:
144
+ return data
145
+ return data.sample(min(len(data), n), random_state=self.rand_seed)
146
+
147
+ @staticmethod
148
+ def _shap_nsamples(arr: np.ndarray, max_nsamples: int = 300) -> int:
149
+ min_needed = arr.shape[1] + 2
150
+ return max(min_needed, min(max_nsamples, arr.shape[0] * arr.shape[1]))
151
+
152
+
153
+ def _build_ft_shap_matrix(self, data: pd.DataFrame) -> np.ndarray:
154
+ matrices = []
155
+ for col in self.factor_nmes:
156
+ s = data[col]
157
+ if col in self.cate_list:
158
+ cats = pd.Categorical(
159
+ s,
160
+ categories=self.cat_categories_for_shap[col]
161
+ )
162
+ codes = np.asarray(cats.codes, dtype=np.float64).reshape(-1, 1)
163
+ matrices.append(codes)
164
+ else:
165
+ vals = pd.to_numeric(s, errors="coerce")
166
+ arr = vals.to_numpy(dtype=np.float64, copy=True).reshape(-1, 1)
167
+ matrices.append(arr)
168
+ X_mat = np.concatenate(matrices, axis=1) # Result shape (N, F)
169
+ return X_mat
170
+
171
+
172
+ def _decode_ft_shap_matrix_to_df(self, X_mat: np.ndarray) -> pd.DataFrame:
173
+ data_dict = {}
174
+ for j, col in enumerate(self.factor_nmes):
175
+ col_vals = X_mat[:, j]
176
+ if col in self.cate_list:
177
+ cats = self.cat_categories_for_shap[col]
178
+ codes = np.round(col_vals).astype(int)
179
+ codes = np.clip(codes, -1, len(cats) - 1)
180
+ cat_series = pd.Categorical.from_codes(
181
+ codes,
182
+ categories=cats
183
+ )
184
+ data_dict[col] = cat_series
185
+ else:
186
+ data_dict[col] = col_vals.astype(float)
187
+
188
+ df = pd.DataFrame(data_dict, columns=self.factor_nmes)
189
+ for col in self.cate_list:
190
+ if col in df.columns:
191
+ df[col] = df[col].astype("category")
192
+ return df
193
+
194
+
195
+ def _build_glm_design(self, data: pd.DataFrame) -> pd.DataFrame:
196
+ X = data[self.var_nmes]
197
+ return sm.add_constant(X, has_constant='add')
198
+
199
+
200
+ def _compute_shap_core(self,
201
+ model_key: str,
202
+ n_background: int,
203
+ n_samples: int,
204
+ on_train: bool,
205
+ X_df: pd.DataFrame,
206
+ prep_fn,
207
+ predict_fn,
208
+ cleanup_fn=None):
209
+ if explain_shap is None:
210
+ raise RuntimeError("explain.shap_utils is not available.")
211
+ return explain_shap.compute_shap_core(
212
+ self,
213
+ model_key,
214
+ n_background,
215
+ n_samples,
216
+ on_train,
217
+ X_df=X_df,
218
+ prep_fn=prep_fn,
219
+ predict_fn=predict_fn,
220
+ cleanup_fn=cleanup_fn,
221
+ )
222
+
223
+ # ========= GLM SHAP explainability =========
224
+
225
+ def compute_shap_glm(self, n_background: int = 500,
226
+ n_samples: int = 200,
227
+ on_train: bool = True):
228
+ if explain_shap is None:
229
+ raise RuntimeError("explain.shap_utils is not available.")
230
+ self.shap_glm = explain_shap.compute_shap_glm(
231
+ self,
232
+ n_background=n_background,
233
+ n_samples=n_samples,
234
+ on_train=on_train,
235
+ )
236
+ return self.shap_glm
237
+
238
+ # ========= XGBoost SHAP explainability =========
239
+
240
+ def compute_shap_xgb(self, n_background: int = 500,
241
+ n_samples: int = 200,
242
+ on_train: bool = True):
243
+ if explain_shap is None:
244
+ raise RuntimeError("explain.shap_utils is not available.")
245
+ self.shap_xgb = explain_shap.compute_shap_xgb(
246
+ self,
247
+ n_background=n_background,
248
+ n_samples=n_samples,
249
+ on_train=on_train,
250
+ )
251
+ return self.shap_xgb
252
+
253
+ # ========= ResNet SHAP explainability =========
254
+
255
+ def _resn_predict_wrapper(self, X_np):
256
+ model = self.resn_best.resnet.to("cpu")
257
+ with torch.no_grad():
258
+ X_tensor = torch.tensor(X_np, dtype=torch.float32)
259
+ y_pred = model(X_tensor).cpu().numpy()
260
+ y_pred = np.clip(y_pred, 1e-6, None)
261
+ return y_pred.reshape(-1)
262
+
263
+
264
+ def compute_shap_resn(self, n_background: int = 500,
265
+ n_samples: int = 200,
266
+ on_train: bool = True):
267
+ if explain_shap is None:
268
+ raise RuntimeError("explain.shap_utils is not available.")
269
+ self.shap_resn = explain_shap.compute_shap_resn(
270
+ self,
271
+ n_background=n_background,
272
+ n_samples=n_samples,
273
+ on_train=on_train,
274
+ )
275
+ return self.shap_resn
276
+
277
+ # ========= FT-Transformer SHAP explainability =========
278
+
279
+ def _ft_shap_predict_wrapper(self, X_mat: np.ndarray) -> np.ndarray:
280
+ df_input = self._decode_ft_shap_matrix_to_df(X_mat)
281
+ y_pred = self.ft_best.predict(df_input)
282
+ return np.asarray(y_pred, dtype=np.float64).reshape(-1)
283
+
284
+
285
+ def compute_shap_ft(self, n_background: int = 500,
286
+ n_samples: int = 200,
287
+ on_train: bool = True):
288
+ if explain_shap is None:
289
+ raise RuntimeError("explain.shap_utils is not available.")
290
+ self.shap_ft = explain_shap.compute_shap_ft(
291
+ self,
292
+ n_background=n_background,
293
+ n_samples=n_samples,
294
+ on_train=on_train,
295
+ )
296
+ return self.shap_ft