ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,548 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import List, Optional
5
+
6
+ try: # matplotlib is optional; avoid hard import failures in headless/minimal envs
7
+ import matplotlib
8
+ if os.name != "nt" and not os.environ.get("DISPLAY") and not os.environ.get("MPLBACKEND"):
9
+ matplotlib.use("Agg")
10
+ import matplotlib.pyplot as plt
11
+ _MPL_IMPORT_ERROR: Optional[BaseException] = None
12
+ except Exception as exc: # pragma: no cover - optional dependency
13
+ plt = None # type: ignore[assignment]
14
+ _MPL_IMPORT_ERROR = exc
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+
19
+ from .utils import EPS, PlotUtils
20
+
21
+ try:
22
+ from ...plotting import curves as plot_curves
23
+ from ...plotting import diagnostics as plot_diagnostics
24
+ from ...plotting.common import PlotStyle, finalize_figure
25
+ except Exception: # pragma: no cover - optional for legacy imports
26
+ try: # best-effort for non-package imports
27
+ from ins_pricing.plotting import curves as plot_curves
28
+ from ins_pricing.plotting import diagnostics as plot_diagnostics
29
+ from ins_pricing.plotting.common import PlotStyle, finalize_figure
30
+ except Exception: # pragma: no cover
31
+ plot_curves = None
32
+ plot_diagnostics = None
33
+ PlotStyle = None
34
+ finalize_figure = None
35
+
36
+
37
+ def _plot_skip(label: str) -> None:
38
+ if _MPL_IMPORT_ERROR is not None:
39
+ print(f"[Plot] Skip {label}: matplotlib unavailable ({_MPL_IMPORT_ERROR}).", flush=True)
40
+ else:
41
+ print(f"[Plot] Skip {label}: matplotlib unavailable.", flush=True)
42
+
43
+
44
+ class BayesOptPlottingMixin:
45
+ def plot_oneway(
46
+ self,
47
+ n_bins=10,
48
+ pred_col: Optional[str] = None,
49
+ pred_label: Optional[str] = None,
50
+ pred_weighted: Optional[bool] = None,
51
+ plot_subdir: Optional[str] = None,
52
+ ):
53
+ if plt is None and plot_diagnostics is None:
54
+ _plot_skip("oneway plot")
55
+ return
56
+ if pred_col is not None and pred_col not in self.train_data.columns:
57
+ print(
58
+ f"[Oneway] Missing prediction column '{pred_col}'; skip predicted line.",
59
+ flush=True,
60
+ )
61
+ pred_col = None
62
+ if pred_weighted is None and pred_col is not None:
63
+ pred_weighted = pred_col.startswith("w_pred_")
64
+ if pred_weighted is None:
65
+ pred_weighted = False
66
+ plot_subdir = plot_subdir.strip("/\\") if plot_subdir else "oneway"
67
+ plot_prefix = f"{self.model_nme}/{plot_subdir}"
68
+
69
+ def _safe_tag(value: str) -> str:
70
+ return (
71
+ value.strip()
72
+ .replace(" ", "_")
73
+ .replace("/", "_")
74
+ .replace("\\", "_")
75
+ .replace(":", "_")
76
+ )
77
+
78
+ if plot_diagnostics is None:
79
+ for c in self.factor_nmes:
80
+ fig = plt.figure(figsize=(7, 5))
81
+ if c in self.cate_list:
82
+ group_col = c
83
+ plot_source = self.train_data
84
+ else:
85
+ group_col = f'{c}_bins'
86
+ bins = pd.qcut(
87
+ self.train_data[c],
88
+ n_bins,
89
+ duplicates='drop' # Drop duplicate quantiles to avoid errors.
90
+ )
91
+ plot_source = self.train_data.assign(**{group_col: bins})
92
+ if pred_col is not None and pred_col in plot_source.columns:
93
+ if pred_weighted:
94
+ plot_source = plot_source.assign(
95
+ _pred_w=plot_source[pred_col]
96
+ )
97
+ else:
98
+ plot_source = plot_source.assign(
99
+ _pred_w=plot_source[pred_col] * plot_source[self.weight_nme]
100
+ )
101
+ plot_data = plot_source.groupby(
102
+ [group_col], observed=True).sum(numeric_only=True)
103
+ plot_data.reset_index(inplace=True)
104
+ plot_data['act_v'] = plot_data['w_act'] / \
105
+ plot_data[self.weight_nme]
106
+ if pred_col is not None and "_pred_w" in plot_data.columns:
107
+ plot_data["pred_v"] = plot_data["_pred_w"] / plot_data[self.weight_nme]
108
+ ax = fig.add_subplot(111)
109
+ ax.plot(plot_data.index, plot_data['act_v'],
110
+ label='Actual', color='red')
111
+ if pred_col is not None and "pred_v" in plot_data.columns:
112
+ ax.plot(
113
+ plot_data.index,
114
+ plot_data["pred_v"],
115
+ label=pred_label or "Predicted",
116
+ color="tab:blue",
117
+ )
118
+ ax.set_title(
119
+ 'Analysis of %s : Train Data' % group_col,
120
+ fontsize=8)
121
+ plt.xticks(plot_data.index,
122
+ list(plot_data[group_col].astype(str)),
123
+ rotation=90)
124
+ if len(list(plot_data[group_col].astype(str))) > 50:
125
+ plt.xticks(fontsize=3)
126
+ else:
127
+ plt.xticks(fontsize=6)
128
+ plt.yticks(fontsize=6)
129
+ ax2 = ax.twinx()
130
+ ax2.bar(plot_data.index,
131
+ plot_data[self.weight_nme],
132
+ alpha=0.5, color='seagreen')
133
+ plt.yticks(fontsize=6)
134
+ plt.margins(0.05)
135
+ plt.subplots_adjust(wspace=0.3)
136
+ if pred_col is not None and "pred_v" in plot_data.columns:
137
+ ax.legend(fontsize=6)
138
+ pred_tag = _safe_tag(pred_label or pred_col) if pred_col else None
139
+ if pred_tag:
140
+ filename = f'00_{self.model_nme}_{group_col}_oneway_{pred_tag}.png'
141
+ else:
142
+ filename = f'00_{self.model_nme}_{group_col}_oneway.png'
143
+ save_path = self._resolve_plot_path(plot_prefix, filename)
144
+ plt.savefig(save_path, dpi=300)
145
+ plt.close(fig)
146
+ return
147
+
148
+ if "w_act" not in self.train_data.columns:
149
+ print("[Oneway] Missing w_act column; skip plotting.", flush=True)
150
+ return
151
+
152
+ for c in self.factor_nmes:
153
+ is_cat = c in (self.cate_list or [])
154
+ group_col = c if is_cat else f"{c}_bins"
155
+ title = f"Analysis of {group_col} : Train Data"
156
+ pred_tag = _safe_tag(pred_label or pred_col) if pred_col else None
157
+ if pred_tag:
158
+ filename = f"00_{self.model_nme}_{group_col}_oneway_{pred_tag}.png"
159
+ else:
160
+ filename = f"00_{self.model_nme}_{group_col}_oneway.png"
161
+ save_path = self._resolve_plot_path(plot_prefix, filename)
162
+ plot_diagnostics.plot_oneway(
163
+ self.train_data,
164
+ feature=c,
165
+ weight_col=self.weight_nme,
166
+ target_col="w_act",
167
+ pred_col=pred_col,
168
+ pred_weighted=pred_weighted,
169
+ pred_label=pred_label,
170
+ n_bins=n_bins,
171
+ is_categorical=is_cat,
172
+ title=title,
173
+ save_path=save_path,
174
+ show=False,
175
+ )
176
+
177
+
178
+ def _resolve_plot_path(self, subdir: Optional[str], filename: str) -> str:
179
+ style = str(getattr(self.config, "plot_path_style", "nested") or "nested").strip().lower()
180
+ if style in {"flat", "root"}:
181
+ return self.output_manager.plot_path(filename)
182
+ if subdir:
183
+ return self.output_manager.plot_path(f"{subdir}/{filename}")
184
+ return self.output_manager.plot_path(filename)
185
+
186
+
187
+ def plot_lift(self, model_label, pred_nme, n_bins=10):
188
+ if plt is None:
189
+ _plot_skip("lift plot")
190
+ return
191
+ model_map = {
192
+ 'Xgboost': 'pred_xgb',
193
+ 'ResNet': 'pred_resn',
194
+ 'ResNetClassifier': 'pred_resn',
195
+ 'GLM': 'pred_glm',
196
+ 'GNN': 'pred_gnn',
197
+ }
198
+ if str(self.config.ft_role) == "model":
199
+ model_map.update({
200
+ 'FTTransformer': 'pred_ft',
201
+ 'FTTransformerClassifier': 'pred_ft',
202
+ })
203
+ for k, v in model_map.items():
204
+ if model_label.startswith(k):
205
+ pred_nme = v
206
+ break
207
+ safe_label = (
208
+ str(model_label)
209
+ .replace(" ", "_")
210
+ .replace("/", "_")
211
+ .replace("\\", "_")
212
+ .replace(":", "_")
213
+ )
214
+ plot_prefix = f"{self.model_nme}/lift"
215
+ filename = f"01_{self.model_nme}_{safe_label}_lift.png"
216
+
217
+ datasets = []
218
+ for title, data in [
219
+ ('Lift Chart on Train Data', self.train_data),
220
+ ('Lift Chart on Test Data', self.test_data),
221
+ ]:
222
+ if 'w_act' not in data.columns or data['w_act'].isna().all():
223
+ print(
224
+ f"[Lift] Missing labels for {title}; skip.",
225
+ flush=True,
226
+ )
227
+ continue
228
+ datasets.append((title, data))
229
+
230
+ if not datasets:
231
+ print("[Lift] No labeled data available; skip plotting.", flush=True)
232
+ return
233
+
234
+ if plot_curves is None:
235
+ fig = plt.figure(figsize=(11, 5))
236
+ positions = [111] if len(datasets) == 1 else [121, 122]
237
+ for pos, (title, data) in zip(positions, datasets):
238
+ if pred_nme not in data.columns or f'w_{pred_nme}' not in data.columns:
239
+ print(
240
+ f"[Lift] Missing prediction columns in {title}; skip.",
241
+ flush=True,
242
+ )
243
+ continue
244
+ lift_df = pd.DataFrame({
245
+ 'pred': data[pred_nme].values,
246
+ 'w_pred': data[f'w_{pred_nme}'].values,
247
+ 'act': data['w_act'].values,
248
+ 'weight': data[self.weight_nme].values
249
+ })
250
+ plot_data = PlotUtils.split_data(lift_df, 'pred', 'weight', n_bins)
251
+ denom = np.maximum(plot_data['weight'], EPS)
252
+ plot_data['exp_v'] = plot_data['w_pred'] / denom
253
+ plot_data['act_v'] = plot_data['act'] / denom
254
+ plot_data = plot_data.reset_index()
255
+
256
+ ax = fig.add_subplot(pos)
257
+ PlotUtils.plot_lift_ax(ax, plot_data, title)
258
+
259
+ plt.subplots_adjust(wspace=0.3)
260
+ save_path = self._resolve_plot_path(plot_prefix, filename)
261
+ plt.savefig(save_path, dpi=300)
262
+ plt.show()
263
+ plt.close(fig)
264
+ return
265
+
266
+ style = PlotStyle() if PlotStyle else None
267
+ fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
268
+ if len(datasets) == 1:
269
+ axes = [axes]
270
+
271
+ for ax, (title, data) in zip(axes, datasets):
272
+ pred_vals = None
273
+ if pred_nme in data.columns:
274
+ pred_vals = data[pred_nme].values
275
+ else:
276
+ w_pred_col = f"w_{pred_nme}"
277
+ if w_pred_col in data.columns:
278
+ denom = np.maximum(data[self.weight_nme].values, EPS)
279
+ pred_vals = data[w_pred_col].values / denom
280
+ if pred_vals is None:
281
+ print(
282
+ f"[Lift] Missing prediction columns in {title}; skip.",
283
+ flush=True,
284
+ )
285
+ continue
286
+
287
+ plot_curves.plot_lift_curve(
288
+ pred_vals,
289
+ data['w_act'].values,
290
+ data[self.weight_nme].values,
291
+ n_bins=n_bins,
292
+ title=title,
293
+ pred_label="Predicted",
294
+ act_label="Actual",
295
+ weight_label="Earned Exposure",
296
+ pred_weighted=False,
297
+ actual_weighted=True,
298
+ ax=ax,
299
+ show=False,
300
+ style=style,
301
+ )
302
+
303
+ plt.subplots_adjust(wspace=0.3)
304
+ save_path = self._resolve_plot_path(plot_prefix, filename)
305
+ if finalize_figure:
306
+ finalize_figure(fig, save_path=save_path, show=True, style=style)
307
+ else:
308
+ plt.savefig(save_path, dpi=300)
309
+ plt.show()
310
+ plt.close(fig)
311
+
312
+ # Double lift curve plot.
313
+
314
+ def plot_dlift(self, model_comp: List[str] = ['xgb', 'resn'], n_bins: int = 10) -> None:
315
+ # Compare two models across bins.
316
+ # Args:
317
+ # model_comp: model keys to compare (e.g., ['xgb', 'resn']).
318
+ # n_bins: number of bins for lift curves.
319
+ if plt is None:
320
+ _plot_skip("double lift plot")
321
+ return
322
+ if len(model_comp) != 2:
323
+ raise ValueError("`model_comp` must contain two models to compare.")
324
+
325
+ model_name_map = {
326
+ 'xgb': 'Xgboost',
327
+ 'resn': 'ResNet',
328
+ 'glm': 'GLM',
329
+ 'gnn': 'GNN',
330
+ }
331
+ if str(self.config.ft_role) == "model":
332
+ model_name_map['ft'] = 'FTTransformer'
333
+
334
+ name1, name2 = model_comp
335
+ if name1 not in model_name_map or name2 not in model_name_map:
336
+ raise ValueError(f"Unsupported model key. Choose from {list(model_name_map.keys())}.")
337
+ plot_prefix = f"{self.model_nme}/double_lift"
338
+ filename = f"02_{self.model_nme}_dlift_{name1}_vs_{name2}.png"
339
+
340
+ datasets = []
341
+ for data_name, data in [('Train Data', self.train_data),
342
+ ('Test Data', self.test_data)]:
343
+ if 'w_act' not in data.columns or data['w_act'].isna().all():
344
+ print(
345
+ f"[Double Lift] Missing labels for {data_name}; skip.",
346
+ flush=True,
347
+ )
348
+ continue
349
+ datasets.append((data_name, data))
350
+
351
+ if not datasets:
352
+ print("[Double Lift] No labeled data available; skip plotting.", flush=True)
353
+ return
354
+
355
+ if plot_curves is None:
356
+ fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
357
+ if len(datasets) == 1:
358
+ axes = [axes]
359
+
360
+ for ax, (data_name, data) in zip(axes, datasets):
361
+ pred1_col = f'w_pred_{name1}'
362
+ pred2_col = f'w_pred_{name2}'
363
+
364
+ if pred1_col not in data.columns or pred2_col not in data.columns:
365
+ print(
366
+ f"Warning: missing prediction columns {pred1_col} or {pred2_col} in {data_name}. Skip plot.")
367
+ continue
368
+
369
+ lift_data = pd.DataFrame({
370
+ 'pred1': data[pred1_col].values,
371
+ 'pred2': data[pred2_col].values,
372
+ 'diff_ly': data[pred1_col].values / np.maximum(data[pred2_col].values, EPS),
373
+ 'act': data['w_act'].values,
374
+ 'weight': data[self.weight_nme].values
375
+ })
376
+ plot_data = PlotUtils.split_data(
377
+ lift_data, 'diff_ly', 'weight', n_bins)
378
+ denom = np.maximum(plot_data['act'], EPS)
379
+ plot_data['exp_v1'] = plot_data['pred1'] / denom
380
+ plot_data['exp_v2'] = plot_data['pred2'] / denom
381
+ plot_data['act_v'] = plot_data['act'] / denom
382
+ plot_data.reset_index(inplace=True)
383
+
384
+ label1 = model_name_map[name1]
385
+ label2 = model_name_map[name2]
386
+
387
+ PlotUtils.plot_dlift_ax(
388
+ ax, plot_data, f'Double Lift Chart on {data_name}', label1, label2)
389
+
390
+ plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8, wspace=0.3)
391
+ save_path = self._resolve_plot_path(plot_prefix, filename)
392
+ plt.savefig(save_path, dpi=300)
393
+ plt.show()
394
+ plt.close(fig)
395
+ return
396
+
397
+ style = PlotStyle() if PlotStyle else None
398
+ fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
399
+ if len(datasets) == 1:
400
+ axes = [axes]
401
+
402
+ label1 = model_name_map[name1]
403
+ label2 = model_name_map[name2]
404
+
405
+ for ax, (data_name, data) in zip(axes, datasets):
406
+ weight_vals = data[self.weight_nme].values
407
+ pred1 = None
408
+ pred2 = None
409
+
410
+ pred1_col = f"pred_{name1}"
411
+ pred2_col = f"pred_{name2}"
412
+ if pred1_col in data.columns:
413
+ pred1 = data[pred1_col].values
414
+ else:
415
+ w_pred1_col = f"w_pred_{name1}"
416
+ if w_pred1_col in data.columns:
417
+ pred1 = data[w_pred1_col].values / np.maximum(weight_vals, EPS)
418
+
419
+ if pred2_col in data.columns:
420
+ pred2 = data[pred2_col].values
421
+ else:
422
+ w_pred2_col = f"w_pred_{name2}"
423
+ if w_pred2_col in data.columns:
424
+ pred2 = data[w_pred2_col].values / np.maximum(weight_vals, EPS)
425
+
426
+ if pred1 is None or pred2 is None:
427
+ print(
428
+ f"Warning: missing pred_{name1}/pred_{name2} or w_pred columns in {data_name}. Skip plot.")
429
+ continue
430
+
431
+ plot_curves.plot_double_lift_curve(
432
+ pred1,
433
+ pred2,
434
+ data['w_act'].values,
435
+ weight_vals,
436
+ n_bins=n_bins,
437
+ title=f"Double Lift Chart on {data_name}",
438
+ label1=label1,
439
+ label2=label2,
440
+ pred1_weighted=False,
441
+ pred2_weighted=False,
442
+ actual_weighted=True,
443
+ ax=ax,
444
+ show=False,
445
+ style=style,
446
+ )
447
+
448
+ plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8, wspace=0.3)
449
+ save_path = self._resolve_plot_path(plot_prefix, filename)
450
+ if finalize_figure:
451
+ finalize_figure(fig, save_path=save_path, show=True, style=style)
452
+ else:
453
+ plt.savefig(save_path, dpi=300)
454
+ plt.show()
455
+ plt.close(fig)
456
+
457
+ # Conversion lift curve plot.
458
+
459
+ def plot_conversion_lift(self, model_pred_col: str, n_bins: int = 20):
460
+ if plt is None:
461
+ _plot_skip("conversion lift plot")
462
+ return
463
+ if not self.binary_resp_nme:
464
+ print("Error: `binary_resp_nme` not provided at BayesOptModel init; cannot plot conversion lift.")
465
+ return
466
+
467
+ if plot_curves is None:
468
+ fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
469
+ datasets = {
470
+ 'Train Data': self.train_data,
471
+ 'Test Data': self.test_data
472
+ }
473
+
474
+ for ax, (data_name, data) in zip(axes, datasets.items()):
475
+ if model_pred_col not in data.columns:
476
+ print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
477
+ continue
478
+
479
+ # Sort by model prediction and compute bins.
480
+ plot_data = data.sort_values(by=model_pred_col).copy()
481
+ plot_data['cum_weight'] = plot_data[self.weight_nme].cumsum()
482
+ total_weight = plot_data[self.weight_nme].sum()
483
+
484
+ if total_weight > EPS:
485
+ plot_data['bin'] = pd.cut(
486
+ plot_data['cum_weight'],
487
+ bins=n_bins,
488
+ labels=False,
489
+ right=False
490
+ )
491
+ else:
492
+ plot_data['bin'] = 0
493
+
494
+ # Aggregate by bins.
495
+ lift_agg = plot_data.groupby('bin').agg(
496
+ total_weight=(self.weight_nme, 'sum'),
497
+ actual_conversions=(self.binary_resp_nme, 'sum'),
498
+ weighted_conversions=('w_binary_act', 'sum'),
499
+ avg_pred=(model_pred_col, 'mean')
500
+ ).reset_index()
501
+
502
+ # Compute conversion rate.
503
+ lift_agg['conversion_rate'] = lift_agg['weighted_conversions'] / \
504
+ lift_agg['total_weight']
505
+
506
+ # Compute overall average conversion rate.
507
+ overall_conversion_rate = data['w_binary_act'].sum(
508
+ ) / data[self.weight_nme].sum()
509
+ ax.axhline(y=overall_conversion_rate, color='gray', linestyle='--',
510
+ label=f'Overall Avg Rate ({overall_conversion_rate:.2%})')
511
+
512
+ ax.plot(lift_agg['bin'], lift_agg['conversion_rate'],
513
+ marker='o', linestyle='-', label='Actual Conversion Rate')
514
+ ax.set_title(f'Conversion Rate Lift Chart on {data_name}')
515
+ ax.set_xlabel(f'Model Score Decile (based on {model_pred_col})')
516
+ ax.set_ylabel('Conversion Rate')
517
+ ax.grid(True, linestyle='--', alpha=0.6)
518
+ ax.legend()
519
+
520
+ plt.tight_layout()
521
+ plt.show()
522
+ return
523
+
524
+ fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
525
+ datasets = {
526
+ 'Train Data': self.train_data,
527
+ 'Test Data': self.test_data
528
+ }
529
+
530
+ for ax, (data_name, data) in zip(axes, datasets.items()):
531
+ if model_pred_col not in data.columns:
532
+ print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
533
+ continue
534
+
535
+ plot_curves.plot_conversion_lift(
536
+ data[model_pred_col].values,
537
+ data[self.binary_resp_nme].values,
538
+ data[self.weight_nme].values,
539
+ n_bins=n_bins,
540
+ title=f'Conversion Rate Lift Chart on {data_name}',
541
+ ax=ax,
542
+ show=False,
543
+ )
544
+
545
+ plt.tight_layout()
546
+ plt.show()
547
+
548
+ # ========= Lightweight explainability: Permutation Importance =========
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ from .model_ft_components import (
4
+ FeatureTokenizer,
5
+ FTTransformerCore,
6
+ MaskedTabularDataset,
7
+ ScaledTransformerEncoderLayer,
8
+ TabularDataset,
9
+ )
10
+ from .model_ft_trainer import FTTransformerSklearn
11
+ from .model_gnn import GraphNeuralNetSklearn, SimpleGNN, SimpleGraphLayer
12
+ from .model_resn import ResBlock, ResNetSequential, ResNetSklearn
13
+
14
+ __all__ = [
15
+ "FeatureTokenizer",
16
+ "FTTransformerCore",
17
+ "MaskedTabularDataset",
18
+ "ScaledTransformerEncoderLayer",
19
+ "TabularDataset",
20
+ "FTTransformerSklearn",
21
+ "GraphNeuralNetSklearn",
22
+ "SimpleGNN",
23
+ "SimpleGraphLayer",
24
+ "ResBlock",
25
+ "ResNetSequential",
26
+ "ResNetSklearn",
27
+ ]