ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -1,463 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import pandas as pd
4
- import numpy as np
5
- import os
6
- from typing import Optional, List, TYPE_CHECKING, Any
7
-
8
- if TYPE_CHECKING:
9
- from .core import BayesOptModel
10
-
11
- try:
12
- import matplotlib
13
- if os.name != "nt" and not os.environ.get("DISPLAY") and not os.environ.get("MPLBACKEND"):
14
- matplotlib.use("Agg")
15
- import matplotlib.pyplot as plt
16
- _MPL_IMPORT_ERROR: Optional[BaseException] = None
17
- except Exception as exc:
18
- plt = None
19
- _MPL_IMPORT_ERROR = exc
20
-
21
- from .utils import PlotUtils, EPS
22
-
23
- try:
24
- from .plotting import curves as plot_curves
25
- from .plotting import diagnostics as plot_diagnostics
26
- from .plotting.common import PlotStyle, finalize_figure
27
- except Exception:
28
- # Fallback if imports fail (e.g. running from wrong dir)
29
- try:
30
- from ins_pricing.plotting import curves as plot_curves
31
- from ins_pricing.plotting import diagnostics as plot_diagnostics
32
- from ins_pricing.plotting.common import PlotStyle, finalize_figure
33
- except Exception:
34
- plot_curves = None
35
- plot_diagnostics = None
36
- PlotStyle = None
37
- finalize_figure = None
38
-
39
-
40
- def _plot_skip(label: str) -> None:
41
- if _MPL_IMPORT_ERROR is not None:
42
- print(f"[Plot] Skip {label}: matplotlib unavailable ({_MPL_IMPORT_ERROR}).", flush=True)
43
- else:
44
- print(f"[Plot] Skip {label}: matplotlib unavailable.", flush=True)
45
-
46
-
47
- def plot_oneway(model: "BayesOptModel", n_bins=10):
48
- if plt is None and plot_diagnostics is None:
49
- _plot_skip("oneway plot")
50
- return
51
- if plot_diagnostics is None:
52
- # Legacy oneway implementation (inline)
53
- for c in model.factor_nmes:
54
- fig = plt.figure(figsize=(7, 5))
55
- if c in model.cate_list:
56
- group_col = c
57
- plot_source = model.train_data
58
- else:
59
- group_col = f'{c}_bins'
60
- bins = pd.qcut(
61
- model.train_data[c],
62
- n_bins,
63
- duplicates='drop'
64
- )
65
- plot_source = model.train_data.assign(**{group_col: bins})
66
- plot_data = plot_source.groupby(
67
- [group_col], observed=True).sum(numeric_only=True)
68
- plot_data.reset_index(inplace=True)
69
- plot_data['act_v'] = plot_data['w_act'] / \
70
- plot_data[model.weight_nme]
71
- ax = fig.add_subplot(111)
72
- ax.plot(plot_data.index, plot_data['act_v'],
73
- label='Actual', color='red')
74
- ax.set_title(
75
- 'Analysis of %s : Train Data' % group_col,
76
- fontsize=8)
77
- plt.xticks(plot_data.index,
78
- list(plot_data[group_col].astype(str)),
79
- rotation=90)
80
- if len(list(plot_data[group_col].astype(str))) > 50:
81
- plt.xticks(fontsize=3)
82
- else:
83
- plt.xticks(fontsize=6)
84
- plt.yticks(fontsize=6)
85
- ax2 = ax.twinx()
86
- ax2.bar(plot_data.index,
87
- plot_data[model.weight_nme],
88
- alpha=0.5, color='seagreen')
89
- plt.yticks(fontsize=6)
90
- plt.margins(0.05)
91
- plt.subplots_adjust(wspace=0.3)
92
- save_path = model.output_manager.plot_path(
93
- f'00_{model.model_nme}_{group_col}_oneway.png')
94
- plt.savefig(save_path, dpi=300)
95
- plt.close(fig)
96
- return
97
-
98
- if "w_act" not in model.train_data.columns:
99
- print("[Oneway] Missing w_act column; skip plotting.", flush=True)
100
- return
101
-
102
- for c in model.factor_nmes:
103
- is_cat = c in (model.cate_list or [])
104
- group_col = c if is_cat else f"{c}_bins"
105
- title = f"Analysis of {group_col} : Train Data"
106
- save_path = model.output_manager.plot_path(
107
- f"00_{model.model_nme}_{group_col}_oneway.png"
108
- )
109
- plot_diagnostics.plot_oneway(
110
- model.train_data,
111
- feature=c,
112
- weight_col=model.weight_nme,
113
- target_col="w_act",
114
- n_bins=n_bins,
115
- is_categorical=is_cat,
116
- title=title,
117
- save_path=save_path,
118
- show=False,
119
- )
120
-
121
- def plot_lift(model: "BayesOptModel", model_label, pred_nme, n_bins=10):
122
- if plt is None:
123
- _plot_skip("lift plot")
124
- return
125
- model_map = {
126
- 'Xgboost': 'pred_xgb',
127
- 'ResNet': 'pred_resn',
128
- 'ResNetClassifier': 'pred_resn',
129
- 'GLM': 'pred_glm',
130
- 'GNN': 'pred_gnn',
131
- }
132
- if str(model.config.ft_role) == "model":
133
- model_map.update({
134
- 'FTTransformer': 'pred_ft',
135
- 'FTTransformerClassifier': 'pred_ft',
136
- })
137
- for k, v in model_map.items():
138
- if model_label.startswith(k):
139
- pred_nme = v
140
- break
141
-
142
- datasets = []
143
- for title, data in [
144
- ('Lift Chart on Train Data', model.train_data),
145
- ('Lift Chart on Test Data', model.test_data),
146
- ]:
147
- if 'w_act' not in data.columns or data['w_act'].isna().all():
148
- print(
149
- f"[Lift] Missing labels for {title}; skip.",
150
- flush=True,
151
- )
152
- continue
153
- datasets.append((title, data))
154
-
155
- if not datasets:
156
- print("[Lift] No labeled data available; skip plotting.", flush=True)
157
- return
158
-
159
- if plot_curves is None:
160
- # Legacy inline plotting
161
- fig = plt.figure(figsize=(11, 5))
162
- positions = [111] if len(datasets) == 1 else [121, 122]
163
- for pos, (title, data) in zip(positions, datasets):
164
- if pred_nme not in data.columns or f'w_{pred_nme}' not in data.columns:
165
- print(
166
- f"[Lift] Missing prediction columns in {title}; skip.",
167
- flush=True
168
- )
169
- continue
170
- lift_df = pd.DataFrame({
171
- 'pred': data[pred_nme].values,
172
- 'w_pred': data[f'w_{pred_nme}'].values,
173
- 'act': data['w_act'].values,
174
- 'weight': data[model.weight_nme].values
175
- })
176
- plot_data = PlotUtils.split_data(lift_df, 'pred', 'weight', n_bins)
177
- denom = np.maximum(plot_data['weight'], EPS)
178
- plot_data['exp_v'] = plot_data['w_pred'] / denom
179
- plot_data['act_v'] = plot_data['act'] / denom
180
- plot_data = plot_data.reset_index()
181
-
182
- ax = fig.add_subplot(pos)
183
- PlotUtils.plot_lift_ax(ax, plot_data, title)
184
-
185
- plt.subplots_adjust(wspace=0.3)
186
- save_path = model.output_manager.plot_path(
187
- f'01_{model.model_nme}_{model_label}_lift.png')
188
- plt.savefig(save_path, dpi=300)
189
- plt.close(fig)
190
- return
191
-
192
- style = PlotStyle() if PlotStyle else None
193
- fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
194
- if len(datasets) == 1:
195
- axes = [axes]
196
-
197
- for ax, (title, data) in zip(axes, datasets):
198
- pred_vals = None
199
- if pred_nme in data.columns:
200
- pred_vals = data[pred_nme].values
201
- else:
202
- w_pred_col = f"w_{pred_nme}"
203
- if w_pred_col in data.columns:
204
- denom = np.maximum(data[model.weight_nme].values, EPS)
205
- pred_vals = data[w_pred_col].values / denom
206
- if pred_vals is None:
207
- print(
208
- f"[Lift] Missing prediction columns in {title}; skip.",
209
- flush=True,
210
- )
211
- continue
212
-
213
- plot_curves.plot_lift_curve(
214
- pred_vals,
215
- data['w_act'].values,
216
- data[model.weight_nme].values,
217
- n_bins=n_bins,
218
- title=title,
219
- pred_label="Predicted",
220
- act_label="Actual",
221
- weight_label="Earned Exposure",
222
- pred_weighted=False,
223
- actual_weighted=True,
224
- ax=ax,
225
- show=False,
226
- style=style,
227
- )
228
-
229
- plt.subplots_adjust(wspace=0.3)
230
- save_path = model.output_manager.plot_path(
231
- f'01_{model.model_nme}_{model_label}_lift.png')
232
- if finalize_figure:
233
- finalize_figure(fig, save_path=save_path, show=True, style=style)
234
- else:
235
- plt.savefig(save_path, dpi=300)
236
- plt.close(fig)
237
-
238
- def plot_dlift(model: "BayesOptModel", model_comp: List[str] = ['xgb', 'resn'], n_bins: int = 10) -> None:
239
- if plt is None:
240
- _plot_skip("double lift plot")
241
- return
242
- if len(model_comp) != 2:
243
- raise ValueError("`model_comp` must contain two models to compare.")
244
-
245
- model_name_map = {
246
- 'xgb': 'Xgboost',
247
- 'resn': 'ResNet',
248
- 'glm': 'GLM',
249
- 'gnn': 'GNN',
250
- }
251
- if str(model.config.ft_role) == "model":
252
- model_name_map['ft'] = 'FTTransformer'
253
-
254
- name1, name2 = model_comp
255
- if name1 not in model_name_map or name2 not in model_name_map:
256
- raise ValueError(f"Unsupported model key. Choose from {list(model_name_map.keys())}.")
257
-
258
- datasets = []
259
- for data_name, data in [('Train Data', model.train_data),
260
- ('Test Data', model.test_data)]:
261
- if 'w_act' not in data.columns or data['w_act'].isna().all():
262
- print(
263
- f"[Double Lift] Missing labels for {data_name}; skip.",
264
- flush=True,
265
- )
266
- continue
267
- datasets.append((data_name, data))
268
-
269
- if not datasets:
270
- print("[Double Lift] No labeled data available; skip plotting.", flush=True)
271
- return
272
-
273
- if plot_curves is None:
274
- # Legacy
275
- fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
276
- if len(datasets) == 1:
277
- axes = [axes]
278
-
279
- for ax, (data_name, data) in zip(axes, datasets):
280
- pred1_col = f'w_pred_{name1}'
281
- pred2_col = f'w_pred_{name2}'
282
-
283
- if pred1_col not in data.columns or pred2_col not in data.columns:
284
- print(
285
- f"Warning: missing prediction columns {pred1_col} or {pred2_col} in {data_name}. Skip plot.")
286
- continue
287
-
288
- lift_data = pd.DataFrame({
289
- 'pred1': data[pred1_col].values,
290
- 'pred2': data[pred2_col].values,
291
- 'diff_ly': data[pred1_col].values / np.maximum(data[pred2_col].values, EPS),
292
- 'act': data['w_act'].values,
293
- 'weight': data[model.weight_nme].values
294
- })
295
- plot_data = PlotUtils.split_data(
296
- lift_data, 'diff_ly', 'weight', n_bins)
297
- denom = np.maximum(plot_data['act'], EPS)
298
- plot_data['exp_v1'] = plot_data['pred1'] / denom
299
- plot_data['exp_v2'] = plot_data['pred2'] / denom
300
- plot_data['act_v'] = plot_data['act'] / denom
301
- plot_data.reset_index(inplace=True)
302
-
303
- label1 = model_name_map[name1]
304
- label2 = model_name_map[name2]
305
-
306
- PlotUtils.plot_dlift_ax(
307
- ax, plot_data, f'Double Lift Chart on {data_name}', label1, label2)
308
-
309
- plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8, wspace=0.3)
310
- save_path = model.output_manager.plot_path(
311
- f'02_{model.model_nme}_dlift_{name1}_vs_{name2}.png')
312
- plt.savefig(save_path, dpi=300)
313
- plt.close(fig)
314
- return
315
-
316
- style = PlotStyle() if PlotStyle else None
317
- fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
318
- if len(datasets) == 1:
319
- axes = [axes]
320
-
321
- label1 = model_name_map[name1]
322
- label2 = model_name_map[name2]
323
-
324
- for ax, (data_name, data) in zip(axes, datasets):
325
- weight_vals = data[model.weight_nme].values
326
- pred1 = None
327
- pred2 = None
328
-
329
- pred1_col = f"pred_{name1}"
330
- pred2_col = f"pred_{name2}"
331
- if pred1_col in data.columns:
332
- pred1 = data[pred1_col].values
333
- else:
334
- w_pred1_col = f"w_pred_{name1}"
335
- if w_pred1_col in data.columns:
336
- pred1 = data[w_pred1_col].values / np.maximum(weight_vals, EPS)
337
-
338
- if pred2_col in data.columns:
339
- pred2 = data[pred2_col].values
340
- else:
341
- w_pred2_col = f"w_pred_{name2}"
342
- if w_pred2_col in data.columns:
343
- pred2 = data[w_pred2_col].values / np.maximum(weight_vals, EPS)
344
-
345
- if pred1 is None or pred2 is None:
346
- print(
347
- f"Warning: missing pred_{name1}/pred_{name2} or w_pred columns in {data_name}. Skip plot.")
348
- continue
349
-
350
- plot_curves.plot_double_lift_curve(
351
- pred1,
352
- pred2,
353
- data['w_act'].values,
354
- weight_vals,
355
- n_bins=n_bins,
356
- title=f"Double Lift Chart on {data_name}",
357
- label1=label1,
358
- label2=label2,
359
- pred1_weighted=False,
360
- pred2_weighted=False,
361
- actual_weighted=True,
362
- ax=ax,
363
- show=False,
364
- style=style,
365
- )
366
-
367
- plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8, wspace=0.3)
368
- save_path = model.output_manager.plot_path(
369
- f'02_{model.model_nme}_dlift_{name1}_vs_{name2}.png')
370
- if finalize_figure:
371
- finalize_figure(fig, save_path=save_path, show=True, style=style)
372
- else:
373
- plt.savefig(save_path, dpi=300)
374
- plt.close(fig)
375
-
376
- def plot_conversion_lift(model: "BayesOptModel", model_pred_col: str, n_bins: int = 20):
377
- if plt is None:
378
- _plot_skip("conversion lift plot")
379
- return
380
- if not model.binary_resp_nme:
381
- print("Error: `binary_resp_nme` not provided at BayesOptModel init; cannot plot conversion lift.")
382
- return
383
-
384
- if plot_curves is None:
385
- fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
386
- datasets = {
387
- 'Train Data': model.train_data,
388
- 'Test Data': model.test_data
389
- }
390
-
391
- for ax, (data_name, data) in zip(axes, datasets.items()):
392
- if model_pred_col not in data.columns:
393
- print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
394
- continue
395
-
396
- # Sort by model prediction and compute bins.
397
- plot_data = data.sort_values(by=model_pred_col).copy()
398
- plot_data['cum_weight'] = plot_data[model.weight_nme].cumsum()
399
- total_weight = plot_data[model.weight_nme].sum()
400
-
401
- if total_weight > EPS:
402
- plot_data['bin'] = pd.cut(
403
- plot_data['cum_weight'],
404
- bins=n_bins,
405
- labels=False,
406
- right=False
407
- )
408
- else:
409
- plot_data['bin'] = 0
410
-
411
- # Aggregate by bins.
412
- lift_agg = plot_data.groupby('bin').agg(
413
- total_weight=(model.weight_nme, 'sum'),
414
- actual_conversions=(model.binary_resp_nme, 'sum'),
415
- weighted_conversions=('w_binary_act', 'sum'),
416
- avg_pred=(model_pred_col, 'mean')
417
- ).reset_index()
418
-
419
- # Compute conversion rate.
420
- lift_agg['conversion_rate'] = lift_agg['weighted_conversions'] / \
421
- lift_agg['total_weight']
422
-
423
- # Compute overall average conversion rate.
424
- overall_conversion_rate = data['w_binary_act'].sum(
425
- ) / data[model.weight_nme].sum()
426
- ax.axhline(y=overall_conversion_rate, color='gray', linestyle='--',
427
- label=f'Overall Avg Rate ({overall_conversion_rate:.2%})')
428
-
429
- ax.plot(lift_agg['bin'], lift_agg['conversion_rate'],
430
- marker='o', linestyle='-', label='Actual Conversion Rate')
431
- ax.set_title(f'Conversion Rate Lift Chart on {data_name}')
432
- ax.set_xlabel(f'Model Score Decile (based on {model_pred_col})')
433
- ax.set_ylabel('Conversion Rate')
434
- ax.grid(True, linestyle='--', alpha=0.6)
435
- ax.legend()
436
-
437
- plt.tight_layout()
438
- plt.show()
439
- return
440
-
441
- fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
442
- datasets = {
443
- 'Train Data': model.train_data,
444
- 'Test Data': model.test_data
445
- }
446
-
447
- for ax, (data_name, data) in zip(axes, datasets.items()):
448
- if model_pred_col not in data.columns:
449
- print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
450
- continue
451
-
452
- plot_curves.plot_conversion_lift(
453
- data[model_pred_col].values,
454
- data[model.binary_resp_nme].values,
455
- data[model.weight_nme].values,
456
- n_bins=n_bins,
457
- title=f'Conversion Rate Lift Chart on {data_name}',
458
- ax=ax,
459
- show=False,
460
- )
461
-
462
- plt.tight_layout()
463
- plt.show()