ins-pricing 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +48 -22
- ins_pricing/__init__.py +142 -90
- ins_pricing/cli/BayesOpt_entry.py +52 -50
- ins_pricing/cli/BayesOpt_incremental.py +39 -105
- ins_pricing/cli/Explain_Run.py +31 -23
- ins_pricing/cli/Explain_entry.py +532 -579
- ins_pricing/cli/Pricing_Run.py +31 -23
- ins_pricing/cli/bayesopt_entry_runner.py +11 -9
- ins_pricing/cli/utils/cli_common.py +256 -256
- ins_pricing/cli/utils/cli_config.py +375 -375
- ins_pricing/cli/utils/import_resolver.py +382 -365
- ins_pricing/cli/utils/notebook_utils.py +340 -340
- ins_pricing/cli/watchdog_run.py +209 -201
- ins_pricing/frontend/__init__.py +10 -10
- ins_pricing/frontend/example_workflows.py +1 -1
- ins_pricing/governance/__init__.py +20 -20
- ins_pricing/governance/release.py +159 -159
- ins_pricing/modelling/__init__.py +147 -92
- ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +2 -2
- ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
- ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -562
- ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -964
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
- ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
- ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
- ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
- ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
- ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
- ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
- ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
- ins_pricing/modelling/explain/__init__.py +55 -55
- ins_pricing/modelling/explain/metrics.py +27 -174
- ins_pricing/modelling/explain/permutation.py +237 -237
- ins_pricing/modelling/plotting/__init__.py +40 -36
- ins_pricing/modelling/plotting/compat.py +228 -0
- ins_pricing/modelling/plotting/curves.py +572 -572
- ins_pricing/modelling/plotting/diagnostics.py +163 -163
- ins_pricing/modelling/plotting/geo.py +362 -362
- ins_pricing/modelling/plotting/importance.py +121 -121
- ins_pricing/pricing/__init__.py +27 -27
- ins_pricing/production/__init__.py +35 -25
- ins_pricing/production/{predict.py → inference.py} +140 -57
- ins_pricing/production/monitoring.py +8 -21
- ins_pricing/reporting/__init__.py +11 -11
- ins_pricing/setup.py +1 -1
- ins_pricing/tests/production/test_inference.py +90 -0
- ins_pricing/utils/__init__.py +116 -83
- ins_pricing/utils/device.py +255 -255
- ins_pricing/utils/features.py +53 -0
- ins_pricing/utils/io.py +72 -0
- ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
- ins_pricing/utils/metrics.py +158 -24
- ins_pricing/utils/numerics.py +76 -0
- ins_pricing/utils/paths.py +9 -1
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +182 -182
- ins_pricing-0.5.0.dist-info/RECORD +131 -0
- ins_pricing/modelling/core/BayesOpt.py +0 -146
- ins_pricing/modelling/core/__init__.py +0 -1
- ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
- ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
- ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
- ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
- ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
- ins_pricing/modelling/core/bayesopt/utils.py +0 -105
- ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
- ins_pricing/tests/production/test_predict.py +0 -233
- ins_pricing-0.4.5.dist-info/RECORD +0 -130
- /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
- /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
- /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,548 +1,482 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
from typing import List, Optional
|
|
5
|
-
|
|
6
|
-
try: # matplotlib is optional; avoid hard import failures in headless/minimal envs
|
|
7
|
-
import matplotlib
|
|
8
|
-
if os.name != "nt" and not os.environ.get("DISPLAY") and not os.environ.get("MPLBACKEND"):
|
|
9
|
-
matplotlib.use("Agg")
|
|
10
|
-
import matplotlib.pyplot as plt
|
|
11
|
-
_MPL_IMPORT_ERROR: Optional[BaseException] = None
|
|
12
|
-
except Exception as exc: # pragma: no cover - optional dependency
|
|
13
|
-
plt = None # type: ignore[assignment]
|
|
14
|
-
_MPL_IMPORT_ERROR = exc
|
|
15
|
-
|
|
16
|
-
import numpy as np
|
|
17
|
-
import pandas as pd
|
|
18
|
-
|
|
19
|
-
from .utils import EPS
|
|
20
|
-
|
|
21
|
-
try:
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
except Exception: # pragma: no cover - optional for legacy imports
|
|
26
|
-
try: # best-effort for non-package imports
|
|
27
|
-
from ins_pricing.plotting import curves as plot_curves
|
|
28
|
-
from ins_pricing.plotting import diagnostics as plot_diagnostics
|
|
29
|
-
from ins_pricing.plotting.common import PlotStyle, finalize_figure
|
|
30
|
-
except Exception: # pragma: no cover
|
|
31
|
-
plot_curves = None
|
|
32
|
-
plot_diagnostics = None
|
|
33
|
-
PlotStyle = None
|
|
34
|
-
finalize_figure = None
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def _plot_skip(label: str) -> None:
|
|
38
|
-
if _MPL_IMPORT_ERROR is not None:
|
|
39
|
-
print(f"[Plot] Skip {label}: matplotlib unavailable ({_MPL_IMPORT_ERROR}).", flush=True)
|
|
40
|
-
else:
|
|
41
|
-
print(f"[Plot] Skip {label}: matplotlib unavailable.", flush=True)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class BayesOptPlottingMixin:
|
|
45
|
-
def plot_oneway(
|
|
46
|
-
self,
|
|
47
|
-
n_bins=10,
|
|
48
|
-
pred_col: Optional[str] = None,
|
|
49
|
-
pred_label: Optional[str] = None,
|
|
50
|
-
pred_weighted: Optional[bool] = None,
|
|
51
|
-
plot_subdir: Optional[str] = None,
|
|
52
|
-
):
|
|
53
|
-
if plt is None and plot_diagnostics is None:
|
|
54
|
-
_plot_skip("oneway plot")
|
|
55
|
-
return
|
|
56
|
-
if pred_col is not None and pred_col not in self.train_data.columns:
|
|
57
|
-
print(
|
|
58
|
-
f"[Oneway] Missing prediction column '{pred_col}'; skip predicted line.",
|
|
59
|
-
flush=True,
|
|
60
|
-
)
|
|
61
|
-
pred_col = None
|
|
62
|
-
if pred_weighted is None and pred_col is not None:
|
|
63
|
-
pred_weighted = pred_col.startswith("w_pred_")
|
|
64
|
-
if pred_weighted is None:
|
|
65
|
-
pred_weighted = False
|
|
66
|
-
plot_subdir = plot_subdir.strip("/\\") if plot_subdir else "oneway"
|
|
67
|
-
plot_prefix = f"{self.model_nme}/{plot_subdir}"
|
|
68
|
-
|
|
69
|
-
def _safe_tag(value: str) -> str:
|
|
70
|
-
return (
|
|
71
|
-
value.strip()
|
|
72
|
-
.replace(" ", "_")
|
|
73
|
-
.replace("/", "_")
|
|
74
|
-
.replace("\\", "_")
|
|
75
|
-
.replace(":", "_")
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
if plot_diagnostics is None:
|
|
79
|
-
for c in self.factor_nmes:
|
|
80
|
-
fig = plt.figure(figsize=(7, 5))
|
|
81
|
-
if c in self.cate_list:
|
|
82
|
-
group_col = c
|
|
83
|
-
plot_source = self.train_data
|
|
84
|
-
else:
|
|
85
|
-
group_col = f'{c}_bins'
|
|
86
|
-
bins = pd.qcut(
|
|
87
|
-
self.train_data[c],
|
|
88
|
-
n_bins,
|
|
89
|
-
duplicates='drop' # Drop duplicate quantiles to avoid errors.
|
|
90
|
-
)
|
|
91
|
-
plot_source = self.train_data.assign(**{group_col: bins})
|
|
92
|
-
if pred_col is not None and pred_col in plot_source.columns:
|
|
93
|
-
if pred_weighted:
|
|
94
|
-
plot_source = plot_source.assign(
|
|
95
|
-
_pred_w=plot_source[pred_col]
|
|
96
|
-
)
|
|
97
|
-
else:
|
|
98
|
-
plot_source = plot_source.assign(
|
|
99
|
-
_pred_w=plot_source[pred_col] * plot_source[self.weight_nme]
|
|
100
|
-
)
|
|
101
|
-
plot_data = plot_source.groupby(
|
|
102
|
-
[group_col], observed=True).sum(numeric_only=True)
|
|
103
|
-
plot_data.reset_index(inplace=True)
|
|
104
|
-
plot_data['act_v'] = plot_data['w_act'] / \
|
|
105
|
-
plot_data[self.weight_nme]
|
|
106
|
-
if pred_col is not None and "_pred_w" in plot_data.columns:
|
|
107
|
-
plot_data["pred_v"] = plot_data["_pred_w"] / plot_data[self.weight_nme]
|
|
108
|
-
ax = fig.add_subplot(111)
|
|
109
|
-
ax.plot(plot_data.index, plot_data['act_v'],
|
|
110
|
-
label='Actual', color='red')
|
|
111
|
-
if pred_col is not None and "pred_v" in plot_data.columns:
|
|
112
|
-
ax.plot(
|
|
113
|
-
plot_data.index,
|
|
114
|
-
plot_data["pred_v"],
|
|
115
|
-
label=pred_label or "Predicted",
|
|
116
|
-
color="tab:blue",
|
|
117
|
-
)
|
|
118
|
-
ax.set_title(
|
|
119
|
-
'Analysis of %s : Train Data' % group_col,
|
|
120
|
-
fontsize=8)
|
|
121
|
-
plt.xticks(plot_data.index,
|
|
122
|
-
list(plot_data[group_col].astype(str)),
|
|
123
|
-
rotation=90)
|
|
124
|
-
if len(list(plot_data[group_col].astype(str))) > 50:
|
|
125
|
-
plt.xticks(fontsize=3)
|
|
126
|
-
else:
|
|
127
|
-
plt.xticks(fontsize=6)
|
|
128
|
-
plt.yticks(fontsize=6)
|
|
129
|
-
ax2 = ax.twinx()
|
|
130
|
-
ax2.bar(plot_data.index,
|
|
131
|
-
plot_data[self.weight_nme],
|
|
132
|
-
alpha=0.5, color='seagreen')
|
|
133
|
-
plt.yticks(fontsize=6)
|
|
134
|
-
plt.margins(0.05)
|
|
135
|
-
plt.subplots_adjust(wspace=0.3)
|
|
136
|
-
if pred_col is not None and "pred_v" in plot_data.columns:
|
|
137
|
-
ax.legend(fontsize=6)
|
|
138
|
-
pred_tag = _safe_tag(pred_label or pred_col) if pred_col else None
|
|
139
|
-
if pred_tag:
|
|
140
|
-
filename = f'00_{self.model_nme}_{group_col}_oneway_{pred_tag}.png'
|
|
141
|
-
else:
|
|
142
|
-
filename = f'00_{self.model_nme}_{group_col}_oneway.png'
|
|
143
|
-
save_path = self._resolve_plot_path(plot_prefix, filename)
|
|
144
|
-
plt.savefig(save_path, dpi=300)
|
|
145
|
-
plt.close(fig)
|
|
146
|
-
return
|
|
147
|
-
|
|
148
|
-
if "w_act" not in self.train_data.columns:
|
|
149
|
-
print("[Oneway] Missing w_act column; skip plotting.", flush=True)
|
|
150
|
-
return
|
|
151
|
-
|
|
152
|
-
for c in self.factor_nmes:
|
|
153
|
-
is_cat = c in (self.cate_list or [])
|
|
154
|
-
group_col = c if is_cat else f"{c}_bins"
|
|
155
|
-
title = f"Analysis of {group_col} : Train Data"
|
|
156
|
-
pred_tag = _safe_tag(pred_label or pred_col) if pred_col else None
|
|
157
|
-
if pred_tag:
|
|
158
|
-
filename = f"00_{self.model_nme}_{group_col}_oneway_{pred_tag}.png"
|
|
159
|
-
else:
|
|
160
|
-
filename = f"00_{self.model_nme}_{group_col}_oneway.png"
|
|
161
|
-
save_path = self._resolve_plot_path(plot_prefix, filename)
|
|
162
|
-
plot_diagnostics.plot_oneway(
|
|
163
|
-
self.train_data,
|
|
164
|
-
feature=c,
|
|
165
|
-
weight_col=self.weight_nme,
|
|
166
|
-
target_col="w_act",
|
|
167
|
-
pred_col=pred_col,
|
|
168
|
-
pred_weighted=pred_weighted,
|
|
169
|
-
pred_label=pred_label,
|
|
170
|
-
n_bins=n_bins,
|
|
171
|
-
is_categorical=is_cat,
|
|
172
|
-
title=title,
|
|
173
|
-
save_path=save_path,
|
|
174
|
-
show=False,
|
|
175
|
-
)
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
def _resolve_plot_path(self, subdir: Optional[str], filename: str) -> str:
|
|
179
|
-
style = str(getattr(self.config, "plot_path_style", "nested") or "nested").strip().lower()
|
|
180
|
-
if style in {"flat", "root"}:
|
|
181
|
-
return self.output_manager.plot_path(filename)
|
|
182
|
-
if subdir:
|
|
183
|
-
return self.output_manager.plot_path(f"{subdir}/{filename}")
|
|
184
|
-
return self.output_manager.plot_path(filename)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
def plot_lift(self, model_label, pred_nme, n_bins=10):
|
|
188
|
-
if plt is None:
|
|
189
|
-
_plot_skip("lift plot")
|
|
190
|
-
return
|
|
191
|
-
model_map = {
|
|
192
|
-
'Xgboost': 'pred_xgb',
|
|
193
|
-
'ResNet': 'pred_resn',
|
|
194
|
-
'ResNetClassifier': 'pred_resn',
|
|
195
|
-
'GLM': 'pred_glm',
|
|
196
|
-
'GNN': 'pred_gnn',
|
|
197
|
-
}
|
|
198
|
-
if str(self.config.ft_role) == "model":
|
|
199
|
-
model_map.update({
|
|
200
|
-
'FTTransformer': 'pred_ft',
|
|
201
|
-
'FTTransformerClassifier': 'pred_ft',
|
|
202
|
-
})
|
|
203
|
-
for k, v in model_map.items():
|
|
204
|
-
if model_label.startswith(k):
|
|
205
|
-
pred_nme = v
|
|
206
|
-
break
|
|
207
|
-
safe_label = (
|
|
208
|
-
str(model_label)
|
|
209
|
-
.replace(" ", "_")
|
|
210
|
-
.replace("/", "_")
|
|
211
|
-
.replace("\\", "_")
|
|
212
|
-
.replace(":", "_")
|
|
213
|
-
)
|
|
214
|
-
plot_prefix = f"{self.model_nme}/lift"
|
|
215
|
-
filename = f"01_{self.model_nme}_{safe_label}_lift.png"
|
|
216
|
-
|
|
217
|
-
datasets = []
|
|
218
|
-
for title, data in [
|
|
219
|
-
('Lift Chart on Train Data', self.train_data),
|
|
220
|
-
('Lift Chart on Test Data', self.test_data),
|
|
221
|
-
]:
|
|
222
|
-
if 'w_act' not in data.columns or data['w_act'].isna().all():
|
|
223
|
-
print(
|
|
224
|
-
f"[Lift] Missing labels for {title}; skip.",
|
|
225
|
-
flush=True,
|
|
226
|
-
)
|
|
227
|
-
continue
|
|
228
|
-
datasets.append((title, data))
|
|
229
|
-
|
|
230
|
-
if not datasets:
|
|
231
|
-
print("[Lift] No labeled data available; skip plotting.", flush=True)
|
|
232
|
-
return
|
|
233
|
-
|
|
234
|
-
if plot_curves is None:
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
ax=
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
plt.
|
|
455
|
-
plt.
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
if total_weight > EPS:
|
|
485
|
-
plot_data['bin'] = pd.cut(
|
|
486
|
-
plot_data['cum_weight'],
|
|
487
|
-
bins=n_bins,
|
|
488
|
-
labels=False,
|
|
489
|
-
right=False
|
|
490
|
-
)
|
|
491
|
-
else:
|
|
492
|
-
plot_data['bin'] = 0
|
|
493
|
-
|
|
494
|
-
# Aggregate by bins.
|
|
495
|
-
lift_agg = plot_data.groupby('bin').agg(
|
|
496
|
-
total_weight=(self.weight_nme, 'sum'),
|
|
497
|
-
actual_conversions=(self.binary_resp_nme, 'sum'),
|
|
498
|
-
weighted_conversions=('w_binary_act', 'sum'),
|
|
499
|
-
avg_pred=(model_pred_col, 'mean')
|
|
500
|
-
).reset_index()
|
|
501
|
-
|
|
502
|
-
# Compute conversion rate.
|
|
503
|
-
lift_agg['conversion_rate'] = lift_agg['weighted_conversions'] / \
|
|
504
|
-
lift_agg['total_weight']
|
|
505
|
-
|
|
506
|
-
# Compute overall average conversion rate.
|
|
507
|
-
overall_conversion_rate = data['w_binary_act'].sum(
|
|
508
|
-
) / data[self.weight_nme].sum()
|
|
509
|
-
ax.axhline(y=overall_conversion_rate, color='gray', linestyle='--',
|
|
510
|
-
label=f'Overall Avg Rate ({overall_conversion_rate:.2%})')
|
|
511
|
-
|
|
512
|
-
ax.plot(lift_agg['bin'], lift_agg['conversion_rate'],
|
|
513
|
-
marker='o', linestyle='-', label='Actual Conversion Rate')
|
|
514
|
-
ax.set_title(f'Conversion Rate Lift Chart on {data_name}')
|
|
515
|
-
ax.set_xlabel(f'Model Score Decile (based on {model_pred_col})')
|
|
516
|
-
ax.set_ylabel('Conversion Rate')
|
|
517
|
-
ax.grid(True, linestyle='--', alpha=0.6)
|
|
518
|
-
ax.legend()
|
|
519
|
-
|
|
520
|
-
plt.tight_layout()
|
|
521
|
-
plt.show()
|
|
522
|
-
return
|
|
523
|
-
|
|
524
|
-
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
|
|
525
|
-
datasets = {
|
|
526
|
-
'Train Data': self.train_data,
|
|
527
|
-
'Test Data': self.test_data
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
for ax, (data_name, data) in zip(axes, datasets.items()):
|
|
531
|
-
if model_pred_col not in data.columns:
|
|
532
|
-
print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
|
|
533
|
-
continue
|
|
534
|
-
|
|
535
|
-
plot_curves.plot_conversion_lift(
|
|
536
|
-
data[model_pred_col].values,
|
|
537
|
-
data[self.binary_resp_nme].values,
|
|
538
|
-
data[self.weight_nme].values,
|
|
539
|
-
n_bins=n_bins,
|
|
540
|
-
title=f'Conversion Rate Lift Chart on {data_name}',
|
|
541
|
-
ax=ax,
|
|
542
|
-
show=False,
|
|
543
|
-
)
|
|
544
|
-
|
|
545
|
-
plt.tight_layout()
|
|
546
|
-
plt.show()
|
|
547
|
-
|
|
548
|
-
# ========= Lightweight explainability: Permutation Importance =========
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
try: # matplotlib is optional; avoid hard import failures in headless/minimal envs
|
|
7
|
+
import matplotlib
|
|
8
|
+
if os.name != "nt" and not os.environ.get("DISPLAY") and not os.environ.get("MPLBACKEND"):
|
|
9
|
+
matplotlib.use("Agg")
|
|
10
|
+
import matplotlib.pyplot as plt
|
|
11
|
+
_MPL_IMPORT_ERROR: Optional[BaseException] = None
|
|
12
|
+
except Exception as exc: # pragma: no cover - optional dependency
|
|
13
|
+
plt = None # type: ignore[assignment]
|
|
14
|
+
_MPL_IMPORT_ERROR = exc
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
from ins_pricing.utils import EPS
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
from ins_pricing.modelling.plotting import curves as plot_curves
|
|
23
|
+
from ins_pricing.modelling.plotting import diagnostics as plot_diagnostics
|
|
24
|
+
from ins_pricing.modelling.plotting.common import PlotStyle, finalize_figure
|
|
25
|
+
except Exception: # pragma: no cover - optional for legacy imports
|
|
26
|
+
try: # best-effort for non-package imports
|
|
27
|
+
from ins_pricing.plotting import curves as plot_curves
|
|
28
|
+
from ins_pricing.plotting import diagnostics as plot_diagnostics
|
|
29
|
+
from ins_pricing.plotting.common import PlotStyle, finalize_figure
|
|
30
|
+
except Exception: # pragma: no cover
|
|
31
|
+
plot_curves = None
|
|
32
|
+
plot_diagnostics = None
|
|
33
|
+
PlotStyle = None
|
|
34
|
+
finalize_figure = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _plot_skip(label: str) -> None:
|
|
38
|
+
if _MPL_IMPORT_ERROR is not None:
|
|
39
|
+
print(f"[Plot] Skip {label}: matplotlib unavailable ({_MPL_IMPORT_ERROR}).", flush=True)
|
|
40
|
+
else:
|
|
41
|
+
print(f"[Plot] Skip {label}: matplotlib unavailable.", flush=True)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class BayesOptPlottingMixin:
|
|
45
|
+
def plot_oneway(
|
|
46
|
+
self,
|
|
47
|
+
n_bins=10,
|
|
48
|
+
pred_col: Optional[str] = None,
|
|
49
|
+
pred_label: Optional[str] = None,
|
|
50
|
+
pred_weighted: Optional[bool] = None,
|
|
51
|
+
plot_subdir: Optional[str] = None,
|
|
52
|
+
):
|
|
53
|
+
if plt is None and plot_diagnostics is None:
|
|
54
|
+
_plot_skip("oneway plot")
|
|
55
|
+
return
|
|
56
|
+
if pred_col is not None and pred_col not in self.train_data.columns:
|
|
57
|
+
print(
|
|
58
|
+
f"[Oneway] Missing prediction column '{pred_col}'; skip predicted line.",
|
|
59
|
+
flush=True,
|
|
60
|
+
)
|
|
61
|
+
pred_col = None
|
|
62
|
+
if pred_weighted is None and pred_col is not None:
|
|
63
|
+
pred_weighted = pred_col.startswith("w_pred_")
|
|
64
|
+
if pred_weighted is None:
|
|
65
|
+
pred_weighted = False
|
|
66
|
+
plot_subdir = plot_subdir.strip("/\\") if plot_subdir else "oneway"
|
|
67
|
+
plot_prefix = f"{self.model_nme}/{plot_subdir}"
|
|
68
|
+
|
|
69
|
+
def _safe_tag(value: str) -> str:
|
|
70
|
+
return (
|
|
71
|
+
value.strip()
|
|
72
|
+
.replace(" ", "_")
|
|
73
|
+
.replace("/", "_")
|
|
74
|
+
.replace("\\", "_")
|
|
75
|
+
.replace(":", "_")
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
if plot_diagnostics is None:
|
|
79
|
+
for c in self.factor_nmes:
|
|
80
|
+
fig = plt.figure(figsize=(7, 5))
|
|
81
|
+
if c in self.cate_list:
|
|
82
|
+
group_col = c
|
|
83
|
+
plot_source = self.train_data
|
|
84
|
+
else:
|
|
85
|
+
group_col = f'{c}_bins'
|
|
86
|
+
bins = pd.qcut(
|
|
87
|
+
self.train_data[c],
|
|
88
|
+
n_bins,
|
|
89
|
+
duplicates='drop' # Drop duplicate quantiles to avoid errors.
|
|
90
|
+
)
|
|
91
|
+
plot_source = self.train_data.assign(**{group_col: bins})
|
|
92
|
+
if pred_col is not None and pred_col in plot_source.columns:
|
|
93
|
+
if pred_weighted:
|
|
94
|
+
plot_source = plot_source.assign(
|
|
95
|
+
_pred_w=plot_source[pred_col]
|
|
96
|
+
)
|
|
97
|
+
else:
|
|
98
|
+
plot_source = plot_source.assign(
|
|
99
|
+
_pred_w=plot_source[pred_col] * plot_source[self.weight_nme]
|
|
100
|
+
)
|
|
101
|
+
plot_data = plot_source.groupby(
|
|
102
|
+
[group_col], observed=True).sum(numeric_only=True)
|
|
103
|
+
plot_data.reset_index(inplace=True)
|
|
104
|
+
plot_data['act_v'] = plot_data['w_act'] / \
|
|
105
|
+
plot_data[self.weight_nme]
|
|
106
|
+
if pred_col is not None and "_pred_w" in plot_data.columns:
|
|
107
|
+
plot_data["pred_v"] = plot_data["_pred_w"] / plot_data[self.weight_nme]
|
|
108
|
+
ax = fig.add_subplot(111)
|
|
109
|
+
ax.plot(plot_data.index, plot_data['act_v'],
|
|
110
|
+
label='Actual', color='red')
|
|
111
|
+
if pred_col is not None and "pred_v" in plot_data.columns:
|
|
112
|
+
ax.plot(
|
|
113
|
+
plot_data.index,
|
|
114
|
+
plot_data["pred_v"],
|
|
115
|
+
label=pred_label or "Predicted",
|
|
116
|
+
color="tab:blue",
|
|
117
|
+
)
|
|
118
|
+
ax.set_title(
|
|
119
|
+
'Analysis of %s : Train Data' % group_col,
|
|
120
|
+
fontsize=8)
|
|
121
|
+
plt.xticks(plot_data.index,
|
|
122
|
+
list(plot_data[group_col].astype(str)),
|
|
123
|
+
rotation=90)
|
|
124
|
+
if len(list(plot_data[group_col].astype(str))) > 50:
|
|
125
|
+
plt.xticks(fontsize=3)
|
|
126
|
+
else:
|
|
127
|
+
plt.xticks(fontsize=6)
|
|
128
|
+
plt.yticks(fontsize=6)
|
|
129
|
+
ax2 = ax.twinx()
|
|
130
|
+
ax2.bar(plot_data.index,
|
|
131
|
+
plot_data[self.weight_nme],
|
|
132
|
+
alpha=0.5, color='seagreen')
|
|
133
|
+
plt.yticks(fontsize=6)
|
|
134
|
+
plt.margins(0.05)
|
|
135
|
+
plt.subplots_adjust(wspace=0.3)
|
|
136
|
+
if pred_col is not None and "pred_v" in plot_data.columns:
|
|
137
|
+
ax.legend(fontsize=6)
|
|
138
|
+
pred_tag = _safe_tag(pred_label or pred_col) if pred_col else None
|
|
139
|
+
if pred_tag:
|
|
140
|
+
filename = f'00_{self.model_nme}_{group_col}_oneway_{pred_tag}.png'
|
|
141
|
+
else:
|
|
142
|
+
filename = f'00_{self.model_nme}_{group_col}_oneway.png'
|
|
143
|
+
save_path = self._resolve_plot_path(plot_prefix, filename)
|
|
144
|
+
plt.savefig(save_path, dpi=300)
|
|
145
|
+
plt.close(fig)
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
if "w_act" not in self.train_data.columns:
|
|
149
|
+
print("[Oneway] Missing w_act column; skip plotting.", flush=True)
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
for c in self.factor_nmes:
|
|
153
|
+
is_cat = c in (self.cate_list or [])
|
|
154
|
+
group_col = c if is_cat else f"{c}_bins"
|
|
155
|
+
title = f"Analysis of {group_col} : Train Data"
|
|
156
|
+
pred_tag = _safe_tag(pred_label or pred_col) if pred_col else None
|
|
157
|
+
if pred_tag:
|
|
158
|
+
filename = f"00_{self.model_nme}_{group_col}_oneway_{pred_tag}.png"
|
|
159
|
+
else:
|
|
160
|
+
filename = f"00_{self.model_nme}_{group_col}_oneway.png"
|
|
161
|
+
save_path = self._resolve_plot_path(plot_prefix, filename)
|
|
162
|
+
plot_diagnostics.plot_oneway(
|
|
163
|
+
self.train_data,
|
|
164
|
+
feature=c,
|
|
165
|
+
weight_col=self.weight_nme,
|
|
166
|
+
target_col="w_act",
|
|
167
|
+
pred_col=pred_col,
|
|
168
|
+
pred_weighted=pred_weighted,
|
|
169
|
+
pred_label=pred_label,
|
|
170
|
+
n_bins=n_bins,
|
|
171
|
+
is_categorical=is_cat,
|
|
172
|
+
title=title,
|
|
173
|
+
save_path=save_path,
|
|
174
|
+
show=False,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _resolve_plot_path(self, subdir: Optional[str], filename: str) -> str:
|
|
179
|
+
style = str(getattr(self.config, "plot_path_style", "nested") or "nested").strip().lower()
|
|
180
|
+
if style in {"flat", "root"}:
|
|
181
|
+
return self.output_manager.plot_path(filename)
|
|
182
|
+
if subdir:
|
|
183
|
+
return self.output_manager.plot_path(f"{subdir}/{filename}")
|
|
184
|
+
return self.output_manager.plot_path(filename)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def plot_lift(self, model_label, pred_nme, n_bins=10):
|
|
188
|
+
if plt is None:
|
|
189
|
+
_plot_skip("lift plot")
|
|
190
|
+
return
|
|
191
|
+
model_map = {
|
|
192
|
+
'Xgboost': 'pred_xgb',
|
|
193
|
+
'ResNet': 'pred_resn',
|
|
194
|
+
'ResNetClassifier': 'pred_resn',
|
|
195
|
+
'GLM': 'pred_glm',
|
|
196
|
+
'GNN': 'pred_gnn',
|
|
197
|
+
}
|
|
198
|
+
if str(self.config.ft_role) == "model":
|
|
199
|
+
model_map.update({
|
|
200
|
+
'FTTransformer': 'pred_ft',
|
|
201
|
+
'FTTransformerClassifier': 'pred_ft',
|
|
202
|
+
})
|
|
203
|
+
for k, v in model_map.items():
|
|
204
|
+
if model_label.startswith(k):
|
|
205
|
+
pred_nme = v
|
|
206
|
+
break
|
|
207
|
+
safe_label = (
|
|
208
|
+
str(model_label)
|
|
209
|
+
.replace(" ", "_")
|
|
210
|
+
.replace("/", "_")
|
|
211
|
+
.replace("\\", "_")
|
|
212
|
+
.replace(":", "_")
|
|
213
|
+
)
|
|
214
|
+
plot_prefix = f"{self.model_nme}/lift"
|
|
215
|
+
filename = f"01_{self.model_nme}_{safe_label}_lift.png"
|
|
216
|
+
|
|
217
|
+
datasets = []
|
|
218
|
+
for title, data in [
|
|
219
|
+
('Lift Chart on Train Data', self.train_data),
|
|
220
|
+
('Lift Chart on Test Data', self.test_data),
|
|
221
|
+
]:
|
|
222
|
+
if 'w_act' not in data.columns or data['w_act'].isna().all():
|
|
223
|
+
print(
|
|
224
|
+
f"[Lift] Missing labels for {title}; skip.",
|
|
225
|
+
flush=True,
|
|
226
|
+
)
|
|
227
|
+
continue
|
|
228
|
+
datasets.append((title, data))
|
|
229
|
+
|
|
230
|
+
if not datasets:
|
|
231
|
+
print("[Lift] No labeled data available; skip plotting.", flush=True)
|
|
232
|
+
return
|
|
233
|
+
|
|
234
|
+
if plot_curves is None:
|
|
235
|
+
_plot_skip("lift plot")
|
|
236
|
+
return
|
|
237
|
+
|
|
238
|
+
style = PlotStyle() if PlotStyle else None
|
|
239
|
+
fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
|
|
240
|
+
if len(datasets) == 1:
|
|
241
|
+
axes = [axes]
|
|
242
|
+
|
|
243
|
+
for ax, (title, data) in zip(axes, datasets):
|
|
244
|
+
pred_vals = None
|
|
245
|
+
if pred_nme in data.columns:
|
|
246
|
+
pred_vals = data[pred_nme].values
|
|
247
|
+
else:
|
|
248
|
+
w_pred_col = f"w_{pred_nme}"
|
|
249
|
+
if w_pred_col in data.columns:
|
|
250
|
+
denom = np.maximum(data[self.weight_nme].values, EPS)
|
|
251
|
+
pred_vals = data[w_pred_col].values / denom
|
|
252
|
+
if pred_vals is None:
|
|
253
|
+
print(
|
|
254
|
+
f"[Lift] Missing prediction columns in {title}; skip.",
|
|
255
|
+
flush=True,
|
|
256
|
+
)
|
|
257
|
+
continue
|
|
258
|
+
|
|
259
|
+
plot_curves.plot_lift_curve(
|
|
260
|
+
pred_vals,
|
|
261
|
+
data['w_act'].values,
|
|
262
|
+
data[self.weight_nme].values,
|
|
263
|
+
n_bins=n_bins,
|
|
264
|
+
title=title,
|
|
265
|
+
pred_label="Predicted",
|
|
266
|
+
act_label="Actual",
|
|
267
|
+
weight_label="Earned Exposure",
|
|
268
|
+
pred_weighted=False,
|
|
269
|
+
actual_weighted=True,
|
|
270
|
+
ax=ax,
|
|
271
|
+
show=False,
|
|
272
|
+
style=style,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
plt.subplots_adjust(wspace=0.3)
|
|
276
|
+
save_path = self._resolve_plot_path(plot_prefix, filename)
|
|
277
|
+
if finalize_figure:
|
|
278
|
+
finalize_figure(fig, save_path=save_path, show=True, style=style)
|
|
279
|
+
else:
|
|
280
|
+
plt.savefig(save_path, dpi=300)
|
|
281
|
+
plt.show()
|
|
282
|
+
plt.close(fig)
|
|
283
|
+
|
|
284
|
+
# Double lift curve plot.
|
|
285
|
+
|
|
286
|
+
def plot_dlift(self, model_comp: List[str] = ['xgb', 'resn'], n_bins: int = 10) -> None:
|
|
287
|
+
# Compare two models across bins.
|
|
288
|
+
# Args:
|
|
289
|
+
# model_comp: model keys to compare (e.g., ['xgb', 'resn']).
|
|
290
|
+
# n_bins: number of bins for lift curves.
|
|
291
|
+
if plt is None:
|
|
292
|
+
_plot_skip("double lift plot")
|
|
293
|
+
return
|
|
294
|
+
if len(model_comp) != 2:
|
|
295
|
+
raise ValueError("`model_comp` must contain two models to compare.")
|
|
296
|
+
|
|
297
|
+
model_name_map = {
|
|
298
|
+
'xgb': 'Xgboost',
|
|
299
|
+
'resn': 'ResNet',
|
|
300
|
+
'glm': 'GLM',
|
|
301
|
+
'gnn': 'GNN',
|
|
302
|
+
}
|
|
303
|
+
if str(self.config.ft_role) == "model":
|
|
304
|
+
model_name_map['ft'] = 'FTTransformer'
|
|
305
|
+
|
|
306
|
+
name1, name2 = model_comp
|
|
307
|
+
if name1 not in model_name_map or name2 not in model_name_map:
|
|
308
|
+
raise ValueError(f"Unsupported model key. Choose from {list(model_name_map.keys())}.")
|
|
309
|
+
plot_prefix = f"{self.model_nme}/double_lift"
|
|
310
|
+
filename = f"02_{self.model_nme}_dlift_{name1}_vs_{name2}.png"
|
|
311
|
+
|
|
312
|
+
datasets = []
|
|
313
|
+
for data_name, data in [('Train Data', self.train_data),
|
|
314
|
+
('Test Data', self.test_data)]:
|
|
315
|
+
if 'w_act' not in data.columns or data['w_act'].isna().all():
|
|
316
|
+
print(
|
|
317
|
+
f"[Double Lift] Missing labels for {data_name}; skip.",
|
|
318
|
+
flush=True,
|
|
319
|
+
)
|
|
320
|
+
continue
|
|
321
|
+
datasets.append((data_name, data))
|
|
322
|
+
|
|
323
|
+
if not datasets:
|
|
324
|
+
print("[Double Lift] No labeled data available; skip plotting.", flush=True)
|
|
325
|
+
return
|
|
326
|
+
|
|
327
|
+
if plot_curves is None:
|
|
328
|
+
_plot_skip("double lift plot")
|
|
329
|
+
return
|
|
330
|
+
|
|
331
|
+
style = PlotStyle() if PlotStyle else None
|
|
332
|
+
fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
|
|
333
|
+
if len(datasets) == 1:
|
|
334
|
+
axes = [axes]
|
|
335
|
+
|
|
336
|
+
label1 = model_name_map[name1]
|
|
337
|
+
label2 = model_name_map[name2]
|
|
338
|
+
|
|
339
|
+
for ax, (data_name, data) in zip(axes, datasets):
|
|
340
|
+
weight_vals = data[self.weight_nme].values
|
|
341
|
+
pred1 = None
|
|
342
|
+
pred2 = None
|
|
343
|
+
|
|
344
|
+
pred1_col = f"pred_{name1}"
|
|
345
|
+
pred2_col = f"pred_{name2}"
|
|
346
|
+
if pred1_col in data.columns:
|
|
347
|
+
pred1 = data[pred1_col].values
|
|
348
|
+
else:
|
|
349
|
+
w_pred1_col = f"w_pred_{name1}"
|
|
350
|
+
if w_pred1_col in data.columns:
|
|
351
|
+
pred1 = data[w_pred1_col].values / np.maximum(weight_vals, EPS)
|
|
352
|
+
|
|
353
|
+
if pred2_col in data.columns:
|
|
354
|
+
pred2 = data[pred2_col].values
|
|
355
|
+
else:
|
|
356
|
+
w_pred2_col = f"w_pred_{name2}"
|
|
357
|
+
if w_pred2_col in data.columns:
|
|
358
|
+
pred2 = data[w_pred2_col].values / np.maximum(weight_vals, EPS)
|
|
359
|
+
|
|
360
|
+
if pred1 is None or pred2 is None:
|
|
361
|
+
print(
|
|
362
|
+
f"Warning: missing pred_{name1}/pred_{name2} or w_pred columns in {data_name}. Skip plot.")
|
|
363
|
+
continue
|
|
364
|
+
|
|
365
|
+
plot_curves.plot_double_lift_curve(
|
|
366
|
+
pred1,
|
|
367
|
+
pred2,
|
|
368
|
+
data['w_act'].values,
|
|
369
|
+
weight_vals,
|
|
370
|
+
n_bins=n_bins,
|
|
371
|
+
title=f"Double Lift Chart on {data_name}",
|
|
372
|
+
label1=label1,
|
|
373
|
+
label2=label2,
|
|
374
|
+
pred1_weighted=False,
|
|
375
|
+
pred2_weighted=False,
|
|
376
|
+
actual_weighted=True,
|
|
377
|
+
ax=ax,
|
|
378
|
+
show=False,
|
|
379
|
+
style=style,
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8, wspace=0.3)
|
|
383
|
+
save_path = self._resolve_plot_path(plot_prefix, filename)
|
|
384
|
+
if finalize_figure:
|
|
385
|
+
finalize_figure(fig, save_path=save_path, show=True, style=style)
|
|
386
|
+
else:
|
|
387
|
+
plt.savefig(save_path, dpi=300)
|
|
388
|
+
plt.show()
|
|
389
|
+
plt.close(fig)
|
|
390
|
+
|
|
391
|
+
# Conversion lift curve plot.
|
|
392
|
+
|
|
393
|
+
def plot_conversion_lift(self, model_pred_col: str, n_bins: int = 20):
|
|
394
|
+
if plt is None:
|
|
395
|
+
_plot_skip("conversion lift plot")
|
|
396
|
+
return
|
|
397
|
+
if not self.binary_resp_nme:
|
|
398
|
+
print("Error: `binary_resp_nme` not provided at BayesOptModel init; cannot plot conversion lift.")
|
|
399
|
+
return
|
|
400
|
+
|
|
401
|
+
if plot_curves is None:
|
|
402
|
+
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
|
|
403
|
+
datasets = {
|
|
404
|
+
'Train Data': self.train_data,
|
|
405
|
+
'Test Data': self.test_data
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
for ax, (data_name, data) in zip(axes, datasets.items()):
|
|
409
|
+
if model_pred_col not in data.columns:
|
|
410
|
+
print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
|
|
411
|
+
continue
|
|
412
|
+
|
|
413
|
+
# Sort by model prediction and compute bins.
|
|
414
|
+
plot_data = data.sort_values(by=model_pred_col).copy()
|
|
415
|
+
plot_data['cum_weight'] = plot_data[self.weight_nme].cumsum()
|
|
416
|
+
total_weight = plot_data[self.weight_nme].sum()
|
|
417
|
+
|
|
418
|
+
if total_weight > EPS:
|
|
419
|
+
plot_data['bin'] = pd.cut(
|
|
420
|
+
plot_data['cum_weight'],
|
|
421
|
+
bins=n_bins,
|
|
422
|
+
labels=False,
|
|
423
|
+
right=False
|
|
424
|
+
)
|
|
425
|
+
else:
|
|
426
|
+
plot_data['bin'] = 0
|
|
427
|
+
|
|
428
|
+
# Aggregate by bins.
|
|
429
|
+
lift_agg = plot_data.groupby('bin').agg(
|
|
430
|
+
total_weight=(self.weight_nme, 'sum'),
|
|
431
|
+
actual_conversions=(self.binary_resp_nme, 'sum'),
|
|
432
|
+
weighted_conversions=('w_binary_act', 'sum'),
|
|
433
|
+
avg_pred=(model_pred_col, 'mean')
|
|
434
|
+
).reset_index()
|
|
435
|
+
|
|
436
|
+
# Compute conversion rate.
|
|
437
|
+
lift_agg['conversion_rate'] = lift_agg['weighted_conversions'] / \
|
|
438
|
+
lift_agg['total_weight']
|
|
439
|
+
|
|
440
|
+
# Compute overall average conversion rate.
|
|
441
|
+
overall_conversion_rate = data['w_binary_act'].sum(
|
|
442
|
+
) / data[self.weight_nme].sum()
|
|
443
|
+
ax.axhline(y=overall_conversion_rate, color='gray', linestyle='--',
|
|
444
|
+
label=f'Overall Avg Rate ({overall_conversion_rate:.2%})')
|
|
445
|
+
|
|
446
|
+
ax.plot(lift_agg['bin'], lift_agg['conversion_rate'],
|
|
447
|
+
marker='o', linestyle='-', label='Actual Conversion Rate')
|
|
448
|
+
ax.set_title(f'Conversion Rate Lift Chart on {data_name}')
|
|
449
|
+
ax.set_xlabel(f'Model Score Decile (based on {model_pred_col})')
|
|
450
|
+
ax.set_ylabel('Conversion Rate')
|
|
451
|
+
ax.grid(True, linestyle='--', alpha=0.6)
|
|
452
|
+
ax.legend()
|
|
453
|
+
|
|
454
|
+
plt.tight_layout()
|
|
455
|
+
plt.show()
|
|
456
|
+
return
|
|
457
|
+
|
|
458
|
+
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
|
|
459
|
+
datasets = {
|
|
460
|
+
'Train Data': self.train_data,
|
|
461
|
+
'Test Data': self.test_data
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
for ax, (data_name, data) in zip(axes, datasets.items()):
|
|
465
|
+
if model_pred_col not in data.columns:
|
|
466
|
+
print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
|
|
467
|
+
continue
|
|
468
|
+
|
|
469
|
+
plot_curves.plot_conversion_lift(
|
|
470
|
+
data[model_pred_col].values,
|
|
471
|
+
data[self.binary_resp_nme].values,
|
|
472
|
+
data[self.weight_nme].values,
|
|
473
|
+
n_bins=n_bins,
|
|
474
|
+
title=f'Conversion Rate Lift Chart on {data_name}',
|
|
475
|
+
ax=ax,
|
|
476
|
+
show=False,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
plt.tight_layout()
|
|
480
|
+
plt.show()
|
|
481
|
+
|
|
482
|
+
# ========= Lightweight explainability: Permutation Importance =========
|