ins-pricing 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. ins_pricing/README.md +48 -22
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +52 -50
  4. ins_pricing/cli/BayesOpt_incremental.py +39 -105
  5. ins_pricing/cli/Explain_Run.py +31 -23
  6. ins_pricing/cli/Explain_entry.py +532 -579
  7. ins_pricing/cli/Pricing_Run.py +31 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +11 -9
  9. ins_pricing/cli/utils/cli_common.py +256 -256
  10. ins_pricing/cli/utils/cli_config.py +375 -375
  11. ins_pricing/cli/utils/import_resolver.py +382 -365
  12. ins_pricing/cli/utils/notebook_utils.py +340 -340
  13. ins_pricing/cli/watchdog_run.py +209 -201
  14. ins_pricing/frontend/__init__.py +10 -10
  15. ins_pricing/frontend/example_workflows.py +1 -1
  16. ins_pricing/governance/__init__.py +20 -20
  17. ins_pricing/governance/release.py +159 -159
  18. ins_pricing/modelling/__init__.py +147 -92
  19. ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +2 -2
  20. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  21. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -562
  22. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -964
  23. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  24. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
  29. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
  32. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
  36. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  37. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  38. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  39. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  40. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  41. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
  42. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  43. ins_pricing/modelling/explain/__init__.py +55 -55
  44. ins_pricing/modelling/explain/metrics.py +27 -174
  45. ins_pricing/modelling/explain/permutation.py +237 -237
  46. ins_pricing/modelling/plotting/__init__.py +40 -36
  47. ins_pricing/modelling/plotting/compat.py +228 -0
  48. ins_pricing/modelling/plotting/curves.py +572 -572
  49. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  50. ins_pricing/modelling/plotting/geo.py +362 -362
  51. ins_pricing/modelling/plotting/importance.py +121 -121
  52. ins_pricing/pricing/__init__.py +27 -27
  53. ins_pricing/production/__init__.py +35 -25
  54. ins_pricing/production/{predict.py → inference.py} +140 -57
  55. ins_pricing/production/monitoring.py +8 -21
  56. ins_pricing/reporting/__init__.py +11 -11
  57. ins_pricing/setup.py +1 -1
  58. ins_pricing/tests/production/test_inference.py +90 -0
  59. ins_pricing/utils/__init__.py +116 -83
  60. ins_pricing/utils/device.py +255 -255
  61. ins_pricing/utils/features.py +53 -0
  62. ins_pricing/utils/io.py +72 -0
  63. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  64. ins_pricing/utils/metrics.py +158 -24
  65. ins_pricing/utils/numerics.py +76 -0
  66. ins_pricing/utils/paths.py +9 -1
  67. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +182 -182
  68. ins_pricing-0.5.0.dist-info/RECORD +131 -0
  69. ins_pricing/modelling/core/BayesOpt.py +0 -146
  70. ins_pricing/modelling/core/__init__.py +0 -1
  71. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  72. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  73. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  74. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  75. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  76. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  77. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  78. ins_pricing/tests/production/test_predict.py +0 -233
  79. ins_pricing-0.4.5.dist-info/RECORD +0 -130
  80. /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
  81. /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
  82. /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
  83. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
  84. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,548 +1,482 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- from typing import List, Optional
5
-
6
- try: # matplotlib is optional; avoid hard import failures in headless/minimal envs
7
- import matplotlib
8
- if os.name != "nt" and not os.environ.get("DISPLAY") and not os.environ.get("MPLBACKEND"):
9
- matplotlib.use("Agg")
10
- import matplotlib.pyplot as plt
11
- _MPL_IMPORT_ERROR: Optional[BaseException] = None
12
- except Exception as exc: # pragma: no cover - optional dependency
13
- plt = None # type: ignore[assignment]
14
- _MPL_IMPORT_ERROR = exc
15
-
16
- import numpy as np
17
- import pandas as pd
18
-
19
- from .utils import EPS, PlotUtils
20
-
21
- try:
22
- from ...plotting import curves as plot_curves
23
- from ...plotting import diagnostics as plot_diagnostics
24
- from ...plotting.common import PlotStyle, finalize_figure
25
- except Exception: # pragma: no cover - optional for legacy imports
26
- try: # best-effort for non-package imports
27
- from ins_pricing.plotting import curves as plot_curves
28
- from ins_pricing.plotting import diagnostics as plot_diagnostics
29
- from ins_pricing.plotting.common import PlotStyle, finalize_figure
30
- except Exception: # pragma: no cover
31
- plot_curves = None
32
- plot_diagnostics = None
33
- PlotStyle = None
34
- finalize_figure = None
35
-
36
-
37
- def _plot_skip(label: str) -> None:
38
- if _MPL_IMPORT_ERROR is not None:
39
- print(f"[Plot] Skip {label}: matplotlib unavailable ({_MPL_IMPORT_ERROR}).", flush=True)
40
- else:
41
- print(f"[Plot] Skip {label}: matplotlib unavailable.", flush=True)
42
-
43
-
44
- class BayesOptPlottingMixin:
45
- def plot_oneway(
46
- self,
47
- n_bins=10,
48
- pred_col: Optional[str] = None,
49
- pred_label: Optional[str] = None,
50
- pred_weighted: Optional[bool] = None,
51
- plot_subdir: Optional[str] = None,
52
- ):
53
- if plt is None and plot_diagnostics is None:
54
- _plot_skip("oneway plot")
55
- return
56
- if pred_col is not None and pred_col not in self.train_data.columns:
57
- print(
58
- f"[Oneway] Missing prediction column '{pred_col}'; skip predicted line.",
59
- flush=True,
60
- )
61
- pred_col = None
62
- if pred_weighted is None and pred_col is not None:
63
- pred_weighted = pred_col.startswith("w_pred_")
64
- if pred_weighted is None:
65
- pred_weighted = False
66
- plot_subdir = plot_subdir.strip("/\\") if plot_subdir else "oneway"
67
- plot_prefix = f"{self.model_nme}/{plot_subdir}"
68
-
69
- def _safe_tag(value: str) -> str:
70
- return (
71
- value.strip()
72
- .replace(" ", "_")
73
- .replace("/", "_")
74
- .replace("\\", "_")
75
- .replace(":", "_")
76
- )
77
-
78
- if plot_diagnostics is None:
79
- for c in self.factor_nmes:
80
- fig = plt.figure(figsize=(7, 5))
81
- if c in self.cate_list:
82
- group_col = c
83
- plot_source = self.train_data
84
- else:
85
- group_col = f'{c}_bins'
86
- bins = pd.qcut(
87
- self.train_data[c],
88
- n_bins,
89
- duplicates='drop' # Drop duplicate quantiles to avoid errors.
90
- )
91
- plot_source = self.train_data.assign(**{group_col: bins})
92
- if pred_col is not None and pred_col in plot_source.columns:
93
- if pred_weighted:
94
- plot_source = plot_source.assign(
95
- _pred_w=plot_source[pred_col]
96
- )
97
- else:
98
- plot_source = plot_source.assign(
99
- _pred_w=plot_source[pred_col] * plot_source[self.weight_nme]
100
- )
101
- plot_data = plot_source.groupby(
102
- [group_col], observed=True).sum(numeric_only=True)
103
- plot_data.reset_index(inplace=True)
104
- plot_data['act_v'] = plot_data['w_act'] / \
105
- plot_data[self.weight_nme]
106
- if pred_col is not None and "_pred_w" in plot_data.columns:
107
- plot_data["pred_v"] = plot_data["_pred_w"] / plot_data[self.weight_nme]
108
- ax = fig.add_subplot(111)
109
- ax.plot(plot_data.index, plot_data['act_v'],
110
- label='Actual', color='red')
111
- if pred_col is not None and "pred_v" in plot_data.columns:
112
- ax.plot(
113
- plot_data.index,
114
- plot_data["pred_v"],
115
- label=pred_label or "Predicted",
116
- color="tab:blue",
117
- )
118
- ax.set_title(
119
- 'Analysis of %s : Train Data' % group_col,
120
- fontsize=8)
121
- plt.xticks(plot_data.index,
122
- list(plot_data[group_col].astype(str)),
123
- rotation=90)
124
- if len(list(plot_data[group_col].astype(str))) > 50:
125
- plt.xticks(fontsize=3)
126
- else:
127
- plt.xticks(fontsize=6)
128
- plt.yticks(fontsize=6)
129
- ax2 = ax.twinx()
130
- ax2.bar(plot_data.index,
131
- plot_data[self.weight_nme],
132
- alpha=0.5, color='seagreen')
133
- plt.yticks(fontsize=6)
134
- plt.margins(0.05)
135
- plt.subplots_adjust(wspace=0.3)
136
- if pred_col is not None and "pred_v" in plot_data.columns:
137
- ax.legend(fontsize=6)
138
- pred_tag = _safe_tag(pred_label or pred_col) if pred_col else None
139
- if pred_tag:
140
- filename = f'00_{self.model_nme}_{group_col}_oneway_{pred_tag}.png'
141
- else:
142
- filename = f'00_{self.model_nme}_{group_col}_oneway.png'
143
- save_path = self._resolve_plot_path(plot_prefix, filename)
144
- plt.savefig(save_path, dpi=300)
145
- plt.close(fig)
146
- return
147
-
148
- if "w_act" not in self.train_data.columns:
149
- print("[Oneway] Missing w_act column; skip plotting.", flush=True)
150
- return
151
-
152
- for c in self.factor_nmes:
153
- is_cat = c in (self.cate_list or [])
154
- group_col = c if is_cat else f"{c}_bins"
155
- title = f"Analysis of {group_col} : Train Data"
156
- pred_tag = _safe_tag(pred_label or pred_col) if pred_col else None
157
- if pred_tag:
158
- filename = f"00_{self.model_nme}_{group_col}_oneway_{pred_tag}.png"
159
- else:
160
- filename = f"00_{self.model_nme}_{group_col}_oneway.png"
161
- save_path = self._resolve_plot_path(plot_prefix, filename)
162
- plot_diagnostics.plot_oneway(
163
- self.train_data,
164
- feature=c,
165
- weight_col=self.weight_nme,
166
- target_col="w_act",
167
- pred_col=pred_col,
168
- pred_weighted=pred_weighted,
169
- pred_label=pred_label,
170
- n_bins=n_bins,
171
- is_categorical=is_cat,
172
- title=title,
173
- save_path=save_path,
174
- show=False,
175
- )
176
-
177
-
178
- def _resolve_plot_path(self, subdir: Optional[str], filename: str) -> str:
179
- style = str(getattr(self.config, "plot_path_style", "nested") or "nested").strip().lower()
180
- if style in {"flat", "root"}:
181
- return self.output_manager.plot_path(filename)
182
- if subdir:
183
- return self.output_manager.plot_path(f"{subdir}/{filename}")
184
- return self.output_manager.plot_path(filename)
185
-
186
-
187
- def plot_lift(self, model_label, pred_nme, n_bins=10):
188
- if plt is None:
189
- _plot_skip("lift plot")
190
- return
191
- model_map = {
192
- 'Xgboost': 'pred_xgb',
193
- 'ResNet': 'pred_resn',
194
- 'ResNetClassifier': 'pred_resn',
195
- 'GLM': 'pred_glm',
196
- 'GNN': 'pred_gnn',
197
- }
198
- if str(self.config.ft_role) == "model":
199
- model_map.update({
200
- 'FTTransformer': 'pred_ft',
201
- 'FTTransformerClassifier': 'pred_ft',
202
- })
203
- for k, v in model_map.items():
204
- if model_label.startswith(k):
205
- pred_nme = v
206
- break
207
- safe_label = (
208
- str(model_label)
209
- .replace(" ", "_")
210
- .replace("/", "_")
211
- .replace("\\", "_")
212
- .replace(":", "_")
213
- )
214
- plot_prefix = f"{self.model_nme}/lift"
215
- filename = f"01_{self.model_nme}_{safe_label}_lift.png"
216
-
217
- datasets = []
218
- for title, data in [
219
- ('Lift Chart on Train Data', self.train_data),
220
- ('Lift Chart on Test Data', self.test_data),
221
- ]:
222
- if 'w_act' not in data.columns or data['w_act'].isna().all():
223
- print(
224
- f"[Lift] Missing labels for {title}; skip.",
225
- flush=True,
226
- )
227
- continue
228
- datasets.append((title, data))
229
-
230
- if not datasets:
231
- print("[Lift] No labeled data available; skip plotting.", flush=True)
232
- return
233
-
234
- if plot_curves is None:
235
- fig = plt.figure(figsize=(11, 5))
236
- positions = [111] if len(datasets) == 1 else [121, 122]
237
- for pos, (title, data) in zip(positions, datasets):
238
- if pred_nme not in data.columns or f'w_{pred_nme}' not in data.columns:
239
- print(
240
- f"[Lift] Missing prediction columns in {title}; skip.",
241
- flush=True,
242
- )
243
- continue
244
- lift_df = pd.DataFrame({
245
- 'pred': data[pred_nme].values,
246
- 'w_pred': data[f'w_{pred_nme}'].values,
247
- 'act': data['w_act'].values,
248
- 'weight': data[self.weight_nme].values
249
- })
250
- plot_data = PlotUtils.split_data(lift_df, 'pred', 'weight', n_bins)
251
- denom = np.maximum(plot_data['weight'], EPS)
252
- plot_data['exp_v'] = plot_data['w_pred'] / denom
253
- plot_data['act_v'] = plot_data['act'] / denom
254
- plot_data = plot_data.reset_index()
255
-
256
- ax = fig.add_subplot(pos)
257
- PlotUtils.plot_lift_ax(ax, plot_data, title)
258
-
259
- plt.subplots_adjust(wspace=0.3)
260
- save_path = self._resolve_plot_path(plot_prefix, filename)
261
- plt.savefig(save_path, dpi=300)
262
- plt.show()
263
- plt.close(fig)
264
- return
265
-
266
- style = PlotStyle() if PlotStyle else None
267
- fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
268
- if len(datasets) == 1:
269
- axes = [axes]
270
-
271
- for ax, (title, data) in zip(axes, datasets):
272
- pred_vals = None
273
- if pred_nme in data.columns:
274
- pred_vals = data[pred_nme].values
275
- else:
276
- w_pred_col = f"w_{pred_nme}"
277
- if w_pred_col in data.columns:
278
- denom = np.maximum(data[self.weight_nme].values, EPS)
279
- pred_vals = data[w_pred_col].values / denom
280
- if pred_vals is None:
281
- print(
282
- f"[Lift] Missing prediction columns in {title}; skip.",
283
- flush=True,
284
- )
285
- continue
286
-
287
- plot_curves.plot_lift_curve(
288
- pred_vals,
289
- data['w_act'].values,
290
- data[self.weight_nme].values,
291
- n_bins=n_bins,
292
- title=title,
293
- pred_label="Predicted",
294
- act_label="Actual",
295
- weight_label="Earned Exposure",
296
- pred_weighted=False,
297
- actual_weighted=True,
298
- ax=ax,
299
- show=False,
300
- style=style,
301
- )
302
-
303
- plt.subplots_adjust(wspace=0.3)
304
- save_path = self._resolve_plot_path(plot_prefix, filename)
305
- if finalize_figure:
306
- finalize_figure(fig, save_path=save_path, show=True, style=style)
307
- else:
308
- plt.savefig(save_path, dpi=300)
309
- plt.show()
310
- plt.close(fig)
311
-
312
- # Double lift curve plot.
313
-
314
- def plot_dlift(self, model_comp: List[str] = ['xgb', 'resn'], n_bins: int = 10) -> None:
315
- # Compare two models across bins.
316
- # Args:
317
- # model_comp: model keys to compare (e.g., ['xgb', 'resn']).
318
- # n_bins: number of bins for lift curves.
319
- if plt is None:
320
- _plot_skip("double lift plot")
321
- return
322
- if len(model_comp) != 2:
323
- raise ValueError("`model_comp` must contain two models to compare.")
324
-
325
- model_name_map = {
326
- 'xgb': 'Xgboost',
327
- 'resn': 'ResNet',
328
- 'glm': 'GLM',
329
- 'gnn': 'GNN',
330
- }
331
- if str(self.config.ft_role) == "model":
332
- model_name_map['ft'] = 'FTTransformer'
333
-
334
- name1, name2 = model_comp
335
- if name1 not in model_name_map or name2 not in model_name_map:
336
- raise ValueError(f"Unsupported model key. Choose from {list(model_name_map.keys())}.")
337
- plot_prefix = f"{self.model_nme}/double_lift"
338
- filename = f"02_{self.model_nme}_dlift_{name1}_vs_{name2}.png"
339
-
340
- datasets = []
341
- for data_name, data in [('Train Data', self.train_data),
342
- ('Test Data', self.test_data)]:
343
- if 'w_act' not in data.columns or data['w_act'].isna().all():
344
- print(
345
- f"[Double Lift] Missing labels for {data_name}; skip.",
346
- flush=True,
347
- )
348
- continue
349
- datasets.append((data_name, data))
350
-
351
- if not datasets:
352
- print("[Double Lift] No labeled data available; skip plotting.", flush=True)
353
- return
354
-
355
- if plot_curves is None:
356
- fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
357
- if len(datasets) == 1:
358
- axes = [axes]
359
-
360
- for ax, (data_name, data) in zip(axes, datasets):
361
- pred1_col = f'w_pred_{name1}'
362
- pred2_col = f'w_pred_{name2}'
363
-
364
- if pred1_col not in data.columns or pred2_col not in data.columns:
365
- print(
366
- f"Warning: missing prediction columns {pred1_col} or {pred2_col} in {data_name}. Skip plot.")
367
- continue
368
-
369
- lift_data = pd.DataFrame({
370
- 'pred1': data[pred1_col].values,
371
- 'pred2': data[pred2_col].values,
372
- 'diff_ly': data[pred1_col].values / np.maximum(data[pred2_col].values, EPS),
373
- 'act': data['w_act'].values,
374
- 'weight': data[self.weight_nme].values
375
- })
376
- plot_data = PlotUtils.split_data(
377
- lift_data, 'diff_ly', 'weight', n_bins)
378
- denom = np.maximum(plot_data['act'], EPS)
379
- plot_data['exp_v1'] = plot_data['pred1'] / denom
380
- plot_data['exp_v2'] = plot_data['pred2'] / denom
381
- plot_data['act_v'] = plot_data['act'] / denom
382
- plot_data.reset_index(inplace=True)
383
-
384
- label1 = model_name_map[name1]
385
- label2 = model_name_map[name2]
386
-
387
- PlotUtils.plot_dlift_ax(
388
- ax, plot_data, f'Double Lift Chart on {data_name}', label1, label2)
389
-
390
- plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8, wspace=0.3)
391
- save_path = self._resolve_plot_path(plot_prefix, filename)
392
- plt.savefig(save_path, dpi=300)
393
- plt.show()
394
- plt.close(fig)
395
- return
396
-
397
- style = PlotStyle() if PlotStyle else None
398
- fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
399
- if len(datasets) == 1:
400
- axes = [axes]
401
-
402
- label1 = model_name_map[name1]
403
- label2 = model_name_map[name2]
404
-
405
- for ax, (data_name, data) in zip(axes, datasets):
406
- weight_vals = data[self.weight_nme].values
407
- pred1 = None
408
- pred2 = None
409
-
410
- pred1_col = f"pred_{name1}"
411
- pred2_col = f"pred_{name2}"
412
- if pred1_col in data.columns:
413
- pred1 = data[pred1_col].values
414
- else:
415
- w_pred1_col = f"w_pred_{name1}"
416
- if w_pred1_col in data.columns:
417
- pred1 = data[w_pred1_col].values / np.maximum(weight_vals, EPS)
418
-
419
- if pred2_col in data.columns:
420
- pred2 = data[pred2_col].values
421
- else:
422
- w_pred2_col = f"w_pred_{name2}"
423
- if w_pred2_col in data.columns:
424
- pred2 = data[w_pred2_col].values / np.maximum(weight_vals, EPS)
425
-
426
- if pred1 is None or pred2 is None:
427
- print(
428
- f"Warning: missing pred_{name1}/pred_{name2} or w_pred columns in {data_name}. Skip plot.")
429
- continue
430
-
431
- plot_curves.plot_double_lift_curve(
432
- pred1,
433
- pred2,
434
- data['w_act'].values,
435
- weight_vals,
436
- n_bins=n_bins,
437
- title=f"Double Lift Chart on {data_name}",
438
- label1=label1,
439
- label2=label2,
440
- pred1_weighted=False,
441
- pred2_weighted=False,
442
- actual_weighted=True,
443
- ax=ax,
444
- show=False,
445
- style=style,
446
- )
447
-
448
- plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8, wspace=0.3)
449
- save_path = self._resolve_plot_path(plot_prefix, filename)
450
- if finalize_figure:
451
- finalize_figure(fig, save_path=save_path, show=True, style=style)
452
- else:
453
- plt.savefig(save_path, dpi=300)
454
- plt.show()
455
- plt.close(fig)
456
-
457
- # Conversion lift curve plot.
458
-
459
- def plot_conversion_lift(self, model_pred_col: str, n_bins: int = 20):
460
- if plt is None:
461
- _plot_skip("conversion lift plot")
462
- return
463
- if not self.binary_resp_nme:
464
- print("Error: `binary_resp_nme` not provided at BayesOptModel init; cannot plot conversion lift.")
465
- return
466
-
467
- if plot_curves is None:
468
- fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
469
- datasets = {
470
- 'Train Data': self.train_data,
471
- 'Test Data': self.test_data
472
- }
473
-
474
- for ax, (data_name, data) in zip(axes, datasets.items()):
475
- if model_pred_col not in data.columns:
476
- print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
477
- continue
478
-
479
- # Sort by model prediction and compute bins.
480
- plot_data = data.sort_values(by=model_pred_col).copy()
481
- plot_data['cum_weight'] = plot_data[self.weight_nme].cumsum()
482
- total_weight = plot_data[self.weight_nme].sum()
483
-
484
- if total_weight > EPS:
485
- plot_data['bin'] = pd.cut(
486
- plot_data['cum_weight'],
487
- bins=n_bins,
488
- labels=False,
489
- right=False
490
- )
491
- else:
492
- plot_data['bin'] = 0
493
-
494
- # Aggregate by bins.
495
- lift_agg = plot_data.groupby('bin').agg(
496
- total_weight=(self.weight_nme, 'sum'),
497
- actual_conversions=(self.binary_resp_nme, 'sum'),
498
- weighted_conversions=('w_binary_act', 'sum'),
499
- avg_pred=(model_pred_col, 'mean')
500
- ).reset_index()
501
-
502
- # Compute conversion rate.
503
- lift_agg['conversion_rate'] = lift_agg['weighted_conversions'] / \
504
- lift_agg['total_weight']
505
-
506
- # Compute overall average conversion rate.
507
- overall_conversion_rate = data['w_binary_act'].sum(
508
- ) / data[self.weight_nme].sum()
509
- ax.axhline(y=overall_conversion_rate, color='gray', linestyle='--',
510
- label=f'Overall Avg Rate ({overall_conversion_rate:.2%})')
511
-
512
- ax.plot(lift_agg['bin'], lift_agg['conversion_rate'],
513
- marker='o', linestyle='-', label='Actual Conversion Rate')
514
- ax.set_title(f'Conversion Rate Lift Chart on {data_name}')
515
- ax.set_xlabel(f'Model Score Decile (based on {model_pred_col})')
516
- ax.set_ylabel('Conversion Rate')
517
- ax.grid(True, linestyle='--', alpha=0.6)
518
- ax.legend()
519
-
520
- plt.tight_layout()
521
- plt.show()
522
- return
523
-
524
- fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
525
- datasets = {
526
- 'Train Data': self.train_data,
527
- 'Test Data': self.test_data
528
- }
529
-
530
- for ax, (data_name, data) in zip(axes, datasets.items()):
531
- if model_pred_col not in data.columns:
532
- print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
533
- continue
534
-
535
- plot_curves.plot_conversion_lift(
536
- data[model_pred_col].values,
537
- data[self.binary_resp_nme].values,
538
- data[self.weight_nme].values,
539
- n_bins=n_bins,
540
- title=f'Conversion Rate Lift Chart on {data_name}',
541
- ax=ax,
542
- show=False,
543
- )
544
-
545
- plt.tight_layout()
546
- plt.show()
547
-
548
- # ========= Lightweight explainability: Permutation Importance =========
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import List, Optional
5
+
6
+ try: # matplotlib is optional; avoid hard import failures in headless/minimal envs
7
+ import matplotlib
8
+ if os.name != "nt" and not os.environ.get("DISPLAY") and not os.environ.get("MPLBACKEND"):
9
+ matplotlib.use("Agg")
10
+ import matplotlib.pyplot as plt
11
+ _MPL_IMPORT_ERROR: Optional[BaseException] = None
12
+ except Exception as exc: # pragma: no cover - optional dependency
13
+ plt = None # type: ignore[assignment]
14
+ _MPL_IMPORT_ERROR = exc
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+
19
+ from ins_pricing.utils import EPS
20
+
21
+ try:
22
+ from ins_pricing.modelling.plotting import curves as plot_curves
23
+ from ins_pricing.modelling.plotting import diagnostics as plot_diagnostics
24
+ from ins_pricing.modelling.plotting.common import PlotStyle, finalize_figure
25
+ except Exception: # pragma: no cover - optional for legacy imports
26
+ try: # best-effort for non-package imports
27
+ from ins_pricing.plotting import curves as plot_curves
28
+ from ins_pricing.plotting import diagnostics as plot_diagnostics
29
+ from ins_pricing.plotting.common import PlotStyle, finalize_figure
30
+ except Exception: # pragma: no cover
31
+ plot_curves = None
32
+ plot_diagnostics = None
33
+ PlotStyle = None
34
+ finalize_figure = None
35
+
36
+
37
+ def _plot_skip(label: str) -> None:
38
+ if _MPL_IMPORT_ERROR is not None:
39
+ print(f"[Plot] Skip {label}: matplotlib unavailable ({_MPL_IMPORT_ERROR}).", flush=True)
40
+ else:
41
+ print(f"[Plot] Skip {label}: matplotlib unavailable.", flush=True)
42
+
43
+
44
+ class BayesOptPlottingMixin:
45
+ def plot_oneway(
46
+ self,
47
+ n_bins=10,
48
+ pred_col: Optional[str] = None,
49
+ pred_label: Optional[str] = None,
50
+ pred_weighted: Optional[bool] = None,
51
+ plot_subdir: Optional[str] = None,
52
+ ):
53
+ if plt is None and plot_diagnostics is None:
54
+ _plot_skip("oneway plot")
55
+ return
56
+ if pred_col is not None and pred_col not in self.train_data.columns:
57
+ print(
58
+ f"[Oneway] Missing prediction column '{pred_col}'; skip predicted line.",
59
+ flush=True,
60
+ )
61
+ pred_col = None
62
+ if pred_weighted is None and pred_col is not None:
63
+ pred_weighted = pred_col.startswith("w_pred_")
64
+ if pred_weighted is None:
65
+ pred_weighted = False
66
+ plot_subdir = plot_subdir.strip("/\\") if plot_subdir else "oneway"
67
+ plot_prefix = f"{self.model_nme}/{plot_subdir}"
68
+
69
+ def _safe_tag(value: str) -> str:
70
+ return (
71
+ value.strip()
72
+ .replace(" ", "_")
73
+ .replace("/", "_")
74
+ .replace("\\", "_")
75
+ .replace(":", "_")
76
+ )
77
+
78
+ if plot_diagnostics is None:
79
+ for c in self.factor_nmes:
80
+ fig = plt.figure(figsize=(7, 5))
81
+ if c in self.cate_list:
82
+ group_col = c
83
+ plot_source = self.train_data
84
+ else:
85
+ group_col = f'{c}_bins'
86
+ bins = pd.qcut(
87
+ self.train_data[c],
88
+ n_bins,
89
+ duplicates='drop' # Drop duplicate quantiles to avoid errors.
90
+ )
91
+ plot_source = self.train_data.assign(**{group_col: bins})
92
+ if pred_col is not None and pred_col in plot_source.columns:
93
+ if pred_weighted:
94
+ plot_source = plot_source.assign(
95
+ _pred_w=plot_source[pred_col]
96
+ )
97
+ else:
98
+ plot_source = plot_source.assign(
99
+ _pred_w=plot_source[pred_col] * plot_source[self.weight_nme]
100
+ )
101
+ plot_data = plot_source.groupby(
102
+ [group_col], observed=True).sum(numeric_only=True)
103
+ plot_data.reset_index(inplace=True)
104
+ plot_data['act_v'] = plot_data['w_act'] / \
105
+ plot_data[self.weight_nme]
106
+ if pred_col is not None and "_pred_w" in plot_data.columns:
107
+ plot_data["pred_v"] = plot_data["_pred_w"] / plot_data[self.weight_nme]
108
+ ax = fig.add_subplot(111)
109
+ ax.plot(plot_data.index, plot_data['act_v'],
110
+ label='Actual', color='red')
111
+ if pred_col is not None and "pred_v" in plot_data.columns:
112
+ ax.plot(
113
+ plot_data.index,
114
+ plot_data["pred_v"],
115
+ label=pred_label or "Predicted",
116
+ color="tab:blue",
117
+ )
118
+ ax.set_title(
119
+ 'Analysis of %s : Train Data' % group_col,
120
+ fontsize=8)
121
+ plt.xticks(plot_data.index,
122
+ list(plot_data[group_col].astype(str)),
123
+ rotation=90)
124
+ if len(list(plot_data[group_col].astype(str))) > 50:
125
+ plt.xticks(fontsize=3)
126
+ else:
127
+ plt.xticks(fontsize=6)
128
+ plt.yticks(fontsize=6)
129
+ ax2 = ax.twinx()
130
+ ax2.bar(plot_data.index,
131
+ plot_data[self.weight_nme],
132
+ alpha=0.5, color='seagreen')
133
+ plt.yticks(fontsize=6)
134
+ plt.margins(0.05)
135
+ plt.subplots_adjust(wspace=0.3)
136
+ if pred_col is not None and "pred_v" in plot_data.columns:
137
+ ax.legend(fontsize=6)
138
+ pred_tag = _safe_tag(pred_label or pred_col) if pred_col else None
139
+ if pred_tag:
140
+ filename = f'00_{self.model_nme}_{group_col}_oneway_{pred_tag}.png'
141
+ else:
142
+ filename = f'00_{self.model_nme}_{group_col}_oneway.png'
143
+ save_path = self._resolve_plot_path(plot_prefix, filename)
144
+ plt.savefig(save_path, dpi=300)
145
+ plt.close(fig)
146
+ return
147
+
148
+ if "w_act" not in self.train_data.columns:
149
+ print("[Oneway] Missing w_act column; skip plotting.", flush=True)
150
+ return
151
+
152
+ for c in self.factor_nmes:
153
+ is_cat = c in (self.cate_list or [])
154
+ group_col = c if is_cat else f"{c}_bins"
155
+ title = f"Analysis of {group_col} : Train Data"
156
+ pred_tag = _safe_tag(pred_label or pred_col) if pred_col else None
157
+ if pred_tag:
158
+ filename = f"00_{self.model_nme}_{group_col}_oneway_{pred_tag}.png"
159
+ else:
160
+ filename = f"00_{self.model_nme}_{group_col}_oneway.png"
161
+ save_path = self._resolve_plot_path(plot_prefix, filename)
162
+ plot_diagnostics.plot_oneway(
163
+ self.train_data,
164
+ feature=c,
165
+ weight_col=self.weight_nme,
166
+ target_col="w_act",
167
+ pred_col=pred_col,
168
+ pred_weighted=pred_weighted,
169
+ pred_label=pred_label,
170
+ n_bins=n_bins,
171
+ is_categorical=is_cat,
172
+ title=title,
173
+ save_path=save_path,
174
+ show=False,
175
+ )
176
+
177
+
178
+ def _resolve_plot_path(self, subdir: Optional[str], filename: str) -> str:
179
+ style = str(getattr(self.config, "plot_path_style", "nested") or "nested").strip().lower()
180
+ if style in {"flat", "root"}:
181
+ return self.output_manager.plot_path(filename)
182
+ if subdir:
183
+ return self.output_manager.plot_path(f"{subdir}/{filename}")
184
+ return self.output_manager.plot_path(filename)
185
+
186
+
187
+ def plot_lift(self, model_label, pred_nme, n_bins=10):
188
+ if plt is None:
189
+ _plot_skip("lift plot")
190
+ return
191
+ model_map = {
192
+ 'Xgboost': 'pred_xgb',
193
+ 'ResNet': 'pred_resn',
194
+ 'ResNetClassifier': 'pred_resn',
195
+ 'GLM': 'pred_glm',
196
+ 'GNN': 'pred_gnn',
197
+ }
198
+ if str(self.config.ft_role) == "model":
199
+ model_map.update({
200
+ 'FTTransformer': 'pred_ft',
201
+ 'FTTransformerClassifier': 'pred_ft',
202
+ })
203
+ for k, v in model_map.items():
204
+ if model_label.startswith(k):
205
+ pred_nme = v
206
+ break
207
+ safe_label = (
208
+ str(model_label)
209
+ .replace(" ", "_")
210
+ .replace("/", "_")
211
+ .replace("\\", "_")
212
+ .replace(":", "_")
213
+ )
214
+ plot_prefix = f"{self.model_nme}/lift"
215
+ filename = f"01_{self.model_nme}_{safe_label}_lift.png"
216
+
217
+ datasets = []
218
+ for title, data in [
219
+ ('Lift Chart on Train Data', self.train_data),
220
+ ('Lift Chart on Test Data', self.test_data),
221
+ ]:
222
+ if 'w_act' not in data.columns or data['w_act'].isna().all():
223
+ print(
224
+ f"[Lift] Missing labels for {title}; skip.",
225
+ flush=True,
226
+ )
227
+ continue
228
+ datasets.append((title, data))
229
+
230
+ if not datasets:
231
+ print("[Lift] No labeled data available; skip plotting.", flush=True)
232
+ return
233
+
234
+ if plot_curves is None:
235
+ _plot_skip("lift plot")
236
+ return
237
+
238
+ style = PlotStyle() if PlotStyle else None
239
+ fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
240
+ if len(datasets) == 1:
241
+ axes = [axes]
242
+
243
+ for ax, (title, data) in zip(axes, datasets):
244
+ pred_vals = None
245
+ if pred_nme in data.columns:
246
+ pred_vals = data[pred_nme].values
247
+ else:
248
+ w_pred_col = f"w_{pred_nme}"
249
+ if w_pred_col in data.columns:
250
+ denom = np.maximum(data[self.weight_nme].values, EPS)
251
+ pred_vals = data[w_pred_col].values / denom
252
+ if pred_vals is None:
253
+ print(
254
+ f"[Lift] Missing prediction columns in {title}; skip.",
255
+ flush=True,
256
+ )
257
+ continue
258
+
259
+ plot_curves.plot_lift_curve(
260
+ pred_vals,
261
+ data['w_act'].values,
262
+ data[self.weight_nme].values,
263
+ n_bins=n_bins,
264
+ title=title,
265
+ pred_label="Predicted",
266
+ act_label="Actual",
267
+ weight_label="Earned Exposure",
268
+ pred_weighted=False,
269
+ actual_weighted=True,
270
+ ax=ax,
271
+ show=False,
272
+ style=style,
273
+ )
274
+
275
+ plt.subplots_adjust(wspace=0.3)
276
+ save_path = self._resolve_plot_path(plot_prefix, filename)
277
+ if finalize_figure:
278
+ finalize_figure(fig, save_path=save_path, show=True, style=style)
279
+ else:
280
+ plt.savefig(save_path, dpi=300)
281
+ plt.show()
282
+ plt.close(fig)
283
+
284
+ # Double lift curve plot.
285
+
286
+ def plot_dlift(self, model_comp: List[str] = ['xgb', 'resn'], n_bins: int = 10) -> None:
287
+ # Compare two models across bins.
288
+ # Args:
289
+ # model_comp: model keys to compare (e.g., ['xgb', 'resn']).
290
+ # n_bins: number of bins for lift curves.
291
+ if plt is None:
292
+ _plot_skip("double lift plot")
293
+ return
294
+ if len(model_comp) != 2:
295
+ raise ValueError("`model_comp` must contain two models to compare.")
296
+
297
+ model_name_map = {
298
+ 'xgb': 'Xgboost',
299
+ 'resn': 'ResNet',
300
+ 'glm': 'GLM',
301
+ 'gnn': 'GNN',
302
+ }
303
+ if str(self.config.ft_role) == "model":
304
+ model_name_map['ft'] = 'FTTransformer'
305
+
306
+ name1, name2 = model_comp
307
+ if name1 not in model_name_map or name2 not in model_name_map:
308
+ raise ValueError(f"Unsupported model key. Choose from {list(model_name_map.keys())}.")
309
+ plot_prefix = f"{self.model_nme}/double_lift"
310
+ filename = f"02_{self.model_nme}_dlift_{name1}_vs_{name2}.png"
311
+
312
+ datasets = []
313
+ for data_name, data in [('Train Data', self.train_data),
314
+ ('Test Data', self.test_data)]:
315
+ if 'w_act' not in data.columns or data['w_act'].isna().all():
316
+ print(
317
+ f"[Double Lift] Missing labels for {data_name}; skip.",
318
+ flush=True,
319
+ )
320
+ continue
321
+ datasets.append((data_name, data))
322
+
323
+ if not datasets:
324
+ print("[Double Lift] No labeled data available; skip plotting.", flush=True)
325
+ return
326
+
327
+ if plot_curves is None:
328
+ _plot_skip("double lift plot")
329
+ return
330
+
331
+ style = PlotStyle() if PlotStyle else None
332
+ fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
333
+ if len(datasets) == 1:
334
+ axes = [axes]
335
+
336
+ label1 = model_name_map[name1]
337
+ label2 = model_name_map[name2]
338
+
339
+ for ax, (data_name, data) in zip(axes, datasets):
340
+ weight_vals = data[self.weight_nme].values
341
+ pred1 = None
342
+ pred2 = None
343
+
344
+ pred1_col = f"pred_{name1}"
345
+ pred2_col = f"pred_{name2}"
346
+ if pred1_col in data.columns:
347
+ pred1 = data[pred1_col].values
348
+ else:
349
+ w_pred1_col = f"w_pred_{name1}"
350
+ if w_pred1_col in data.columns:
351
+ pred1 = data[w_pred1_col].values / np.maximum(weight_vals, EPS)
352
+
353
+ if pred2_col in data.columns:
354
+ pred2 = data[pred2_col].values
355
+ else:
356
+ w_pred2_col = f"w_pred_{name2}"
357
+ if w_pred2_col in data.columns:
358
+ pred2 = data[w_pred2_col].values / np.maximum(weight_vals, EPS)
359
+
360
+ if pred1 is None or pred2 is None:
361
+ print(
362
+ f"Warning: missing pred_{name1}/pred_{name2} or w_pred columns in {data_name}. Skip plot.")
363
+ continue
364
+
365
+ plot_curves.plot_double_lift_curve(
366
+ pred1,
367
+ pred2,
368
+ data['w_act'].values,
369
+ weight_vals,
370
+ n_bins=n_bins,
371
+ title=f"Double Lift Chart on {data_name}",
372
+ label1=label1,
373
+ label2=label2,
374
+ pred1_weighted=False,
375
+ pred2_weighted=False,
376
+ actual_weighted=True,
377
+ ax=ax,
378
+ show=False,
379
+ style=style,
380
+ )
381
+
382
+ plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8, wspace=0.3)
383
+ save_path = self._resolve_plot_path(plot_prefix, filename)
384
+ if finalize_figure:
385
+ finalize_figure(fig, save_path=save_path, show=True, style=style)
386
+ else:
387
+ plt.savefig(save_path, dpi=300)
388
+ plt.show()
389
+ plt.close(fig)
390
+
391
+ # Conversion lift curve plot.
392
+
393
+ def plot_conversion_lift(self, model_pred_col: str, n_bins: int = 20):
394
+ if plt is None:
395
+ _plot_skip("conversion lift plot")
396
+ return
397
+ if not self.binary_resp_nme:
398
+ print("Error: `binary_resp_nme` not provided at BayesOptModel init; cannot plot conversion lift.")
399
+ return
400
+
401
+ if plot_curves is None:
402
+ fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
403
+ datasets = {
404
+ 'Train Data': self.train_data,
405
+ 'Test Data': self.test_data
406
+ }
407
+
408
+ for ax, (data_name, data) in zip(axes, datasets.items()):
409
+ if model_pred_col not in data.columns:
410
+ print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
411
+ continue
412
+
413
+ # Sort by model prediction and compute bins.
414
+ plot_data = data.sort_values(by=model_pred_col).copy()
415
+ plot_data['cum_weight'] = plot_data[self.weight_nme].cumsum()
416
+ total_weight = plot_data[self.weight_nme].sum()
417
+
418
+ if total_weight > EPS:
419
+ plot_data['bin'] = pd.cut(
420
+ plot_data['cum_weight'],
421
+ bins=n_bins,
422
+ labels=False,
423
+ right=False
424
+ )
425
+ else:
426
+ plot_data['bin'] = 0
427
+
428
+ # Aggregate by bins.
429
+ lift_agg = plot_data.groupby('bin').agg(
430
+ total_weight=(self.weight_nme, 'sum'),
431
+ actual_conversions=(self.binary_resp_nme, 'sum'),
432
+ weighted_conversions=('w_binary_act', 'sum'),
433
+ avg_pred=(model_pred_col, 'mean')
434
+ ).reset_index()
435
+
436
+ # Compute conversion rate.
437
+ lift_agg['conversion_rate'] = lift_agg['weighted_conversions'] / \
438
+ lift_agg['total_weight']
439
+
440
+ # Compute overall average conversion rate.
441
+ overall_conversion_rate = data['w_binary_act'].sum(
442
+ ) / data[self.weight_nme].sum()
443
+ ax.axhline(y=overall_conversion_rate, color='gray', linestyle='--',
444
+ label=f'Overall Avg Rate ({overall_conversion_rate:.2%})')
445
+
446
+ ax.plot(lift_agg['bin'], lift_agg['conversion_rate'],
447
+ marker='o', linestyle='-', label='Actual Conversion Rate')
448
+ ax.set_title(f'Conversion Rate Lift Chart on {data_name}')
449
+ ax.set_xlabel(f'Model Score Decile (based on {model_pred_col})')
450
+ ax.set_ylabel('Conversion Rate')
451
+ ax.grid(True, linestyle='--', alpha=0.6)
452
+ ax.legend()
453
+
454
+ plt.tight_layout()
455
+ plt.show()
456
+ return
457
+
458
+ fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
459
+ datasets = {
460
+ 'Train Data': self.train_data,
461
+ 'Test Data': self.test_data
462
+ }
463
+
464
+ for ax, (data_name, data) in zip(axes, datasets.items()):
465
+ if model_pred_col not in data.columns:
466
+ print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
467
+ continue
468
+
469
+ plot_curves.plot_conversion_lift(
470
+ data[model_pred_col].values,
471
+ data[self.binary_resp_nme].values,
472
+ data[self.weight_nme].values,
473
+ n_bins=n_bins,
474
+ title=f'Conversion Rate Lift Chart on {data_name}',
475
+ ax=ax,
476
+ show=False,
477
+ )
478
+
479
+ plt.tight_layout()
480
+ plt.show()
481
+
482
+ # ========= Lightweight explainability: Permutation Importance =========