ins-pricing 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +60 -0
- ins_pricing/__init__.py +102 -0
- ins_pricing/governance/README.md +18 -0
- ins_pricing/governance/__init__.py +20 -0
- ins_pricing/governance/approval.py +93 -0
- ins_pricing/governance/audit.py +37 -0
- ins_pricing/governance/registry.py +99 -0
- ins_pricing/governance/release.py +159 -0
- ins_pricing/modelling/BayesOpt.py +146 -0
- ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
- ins_pricing/modelling/BayesOpt_entry.py +575 -0
- ins_pricing/modelling/BayesOpt_incremental.py +731 -0
- ins_pricing/modelling/Explain_Run.py +36 -0
- ins_pricing/modelling/Explain_entry.py +539 -0
- ins_pricing/modelling/Pricing_Run.py +36 -0
- ins_pricing/modelling/README.md +33 -0
- ins_pricing/modelling/__init__.py +44 -0
- ins_pricing/modelling/bayesopt/__init__.py +98 -0
- ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
- ins_pricing/modelling/bayesopt/core.py +1476 -0
- ins_pricing/modelling/bayesopt/models.py +2196 -0
- ins_pricing/modelling/bayesopt/trainers.py +2446 -0
- ins_pricing/modelling/bayesopt/utils.py +1021 -0
- ins_pricing/modelling/cli_common.py +136 -0
- ins_pricing/modelling/explain/__init__.py +55 -0
- ins_pricing/modelling/explain/gradients.py +334 -0
- ins_pricing/modelling/explain/metrics.py +176 -0
- ins_pricing/modelling/explain/permutation.py +155 -0
- ins_pricing/modelling/explain/shap_utils.py +146 -0
- ins_pricing/modelling/notebook_utils.py +284 -0
- ins_pricing/modelling/plotting/__init__.py +45 -0
- ins_pricing/modelling/plotting/common.py +63 -0
- ins_pricing/modelling/plotting/curves.py +572 -0
- ins_pricing/modelling/plotting/diagnostics.py +139 -0
- ins_pricing/modelling/plotting/geo.py +362 -0
- ins_pricing/modelling/plotting/importance.py +121 -0
- ins_pricing/modelling/run_logging.py +133 -0
- ins_pricing/modelling/tests/conftest.py +8 -0
- ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing/modelling/tests/test_explain.py +56 -0
- ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing/modelling/tests/test_plotting.py +63 -0
- ins_pricing/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing/modelling/watchdog_run.py +211 -0
- ins_pricing/pricing/README.md +44 -0
- ins_pricing/pricing/__init__.py +27 -0
- ins_pricing/pricing/calibration.py +39 -0
- ins_pricing/pricing/data_quality.py +117 -0
- ins_pricing/pricing/exposure.py +85 -0
- ins_pricing/pricing/factors.py +91 -0
- ins_pricing/pricing/monitoring.py +99 -0
- ins_pricing/pricing/rate_table.py +78 -0
- ins_pricing/production/__init__.py +21 -0
- ins_pricing/production/drift.py +30 -0
- ins_pricing/production/monitoring.py +143 -0
- ins_pricing/production/scoring.py +40 -0
- ins_pricing/reporting/README.md +20 -0
- ins_pricing/reporting/__init__.py +11 -0
- ins_pricing/reporting/report_builder.py +72 -0
- ins_pricing/reporting/scheduler.py +45 -0
- ins_pricing/setup.py +41 -0
- ins_pricing v2/__init__.py +23 -0
- ins_pricing v2/governance/__init__.py +20 -0
- ins_pricing v2/governance/approval.py +93 -0
- ins_pricing v2/governance/audit.py +37 -0
- ins_pricing v2/governance/registry.py +99 -0
- ins_pricing v2/governance/release.py +159 -0
- ins_pricing v2/modelling/Explain_Run.py +36 -0
- ins_pricing v2/modelling/Pricing_Run.py +36 -0
- ins_pricing v2/modelling/__init__.py +151 -0
- ins_pricing v2/modelling/cli_common.py +141 -0
- ins_pricing v2/modelling/config.py +249 -0
- ins_pricing v2/modelling/config_preprocess.py +254 -0
- ins_pricing v2/modelling/core.py +741 -0
- ins_pricing v2/modelling/data_container.py +42 -0
- ins_pricing v2/modelling/explain/__init__.py +55 -0
- ins_pricing v2/modelling/explain/gradients.py +334 -0
- ins_pricing v2/modelling/explain/metrics.py +176 -0
- ins_pricing v2/modelling/explain/permutation.py +155 -0
- ins_pricing v2/modelling/explain/shap_utils.py +146 -0
- ins_pricing v2/modelling/features.py +215 -0
- ins_pricing v2/modelling/model_manager.py +148 -0
- ins_pricing v2/modelling/model_plotting.py +463 -0
- ins_pricing v2/modelling/models.py +2203 -0
- ins_pricing v2/modelling/notebook_utils.py +294 -0
- ins_pricing v2/modelling/plotting/__init__.py +45 -0
- ins_pricing v2/modelling/plotting/common.py +63 -0
- ins_pricing v2/modelling/plotting/curves.py +572 -0
- ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
- ins_pricing v2/modelling/plotting/geo.py +362 -0
- ins_pricing v2/modelling/plotting/importance.py +121 -0
- ins_pricing v2/modelling/run_logging.py +133 -0
- ins_pricing v2/modelling/tests/conftest.py +8 -0
- ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing v2/modelling/tests/test_explain.py +56 -0
- ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing v2/modelling/tests/test_plotting.py +63 -0
- ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing v2/modelling/trainers.py +2447 -0
- ins_pricing v2/modelling/utils.py +1020 -0
- ins_pricing v2/modelling/watchdog_run.py +211 -0
- ins_pricing v2/pricing/__init__.py +27 -0
- ins_pricing v2/pricing/calibration.py +39 -0
- ins_pricing v2/pricing/data_quality.py +117 -0
- ins_pricing v2/pricing/exposure.py +85 -0
- ins_pricing v2/pricing/factors.py +91 -0
- ins_pricing v2/pricing/monitoring.py +99 -0
- ins_pricing v2/pricing/rate_table.py +78 -0
- ins_pricing v2/production/__init__.py +21 -0
- ins_pricing v2/production/drift.py +30 -0
- ins_pricing v2/production/monitoring.py +143 -0
- ins_pricing v2/production/scoring.py +40 -0
- ins_pricing v2/reporting/__init__.py +11 -0
- ins_pricing v2/reporting/report_builder.py +72 -0
- ins_pricing v2/reporting/scheduler.py +45 -0
- ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
- ins_pricing v2/scripts/Explain_entry.py +545 -0
- ins_pricing v2/scripts/__init__.py +1 -0
- ins_pricing v2/scripts/train.py +568 -0
- ins_pricing v2/setup.py +55 -0
- ins_pricing v2/smoke_test.py +28 -0
- ins_pricing-0.1.6.dist-info/METADATA +78 -0
- ins_pricing-0.1.6.dist-info/RECORD +169 -0
- ins_pricing-0.1.6.dist-info/WHEEL +5 -0
- ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
- user_packages/__init__.py +105 -0
- user_packages legacy/BayesOpt.py +5659 -0
- user_packages legacy/BayesOpt_entry.py +513 -0
- user_packages legacy/BayesOpt_incremental.py +685 -0
- user_packages legacy/Pricing_Run.py +36 -0
- user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
- user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
- user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
- user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
- user_packages legacy/Try/BayesOpt legacy.py +3280 -0
- user_packages legacy/Try/BayesOpt.py +838 -0
- user_packages legacy/Try/BayesOptAll.py +1569 -0
- user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
- user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
- user_packages legacy/Try/BayesOptSearch.py +830 -0
- user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
- user_packages legacy/Try/BayesOptV1.py +1911 -0
- user_packages legacy/Try/BayesOptV10.py +2973 -0
- user_packages legacy/Try/BayesOptV11.py +3001 -0
- user_packages legacy/Try/BayesOptV12.py +3001 -0
- user_packages legacy/Try/BayesOptV2.py +2065 -0
- user_packages legacy/Try/BayesOptV3.py +2209 -0
- user_packages legacy/Try/BayesOptV4.py +2342 -0
- user_packages legacy/Try/BayesOptV5.py +2372 -0
- user_packages legacy/Try/BayesOptV6.py +2759 -0
- user_packages legacy/Try/BayesOptV7.py +2832 -0
- user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
- user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
- user_packages legacy/Try/BayesOptV9.py +2927 -0
- user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
- user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
- user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
- user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
- user_packages legacy/Try/xgbbayesopt.py +523 -0
- user_packages legacy/__init__.py +19 -0
- user_packages legacy/cli_common.py +124 -0
- user_packages legacy/notebook_utils.py +228 -0
- user_packages legacy/watchdog_run.py +202 -0
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
import os
|
|
6
|
+
from typing import Optional, List, TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from .core import BayesOptModel
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import matplotlib
|
|
13
|
+
if os.name != "nt" and not os.environ.get("DISPLAY") and not os.environ.get("MPLBACKEND"):
|
|
14
|
+
matplotlib.use("Agg")
|
|
15
|
+
import matplotlib.pyplot as plt
|
|
16
|
+
_MPL_IMPORT_ERROR: Optional[BaseException] = None
|
|
17
|
+
except Exception as exc:
|
|
18
|
+
plt = None
|
|
19
|
+
_MPL_IMPORT_ERROR = exc
|
|
20
|
+
|
|
21
|
+
from .utils import PlotUtils, EPS
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
from .plotting import curves as plot_curves
|
|
25
|
+
from .plotting import diagnostics as plot_diagnostics
|
|
26
|
+
from .plotting.common import PlotStyle, finalize_figure
|
|
27
|
+
except Exception:
|
|
28
|
+
# Fallback if imports fail (e.g. running from wrong dir)
|
|
29
|
+
try:
|
|
30
|
+
from ins_pricing.plotting import curves as plot_curves
|
|
31
|
+
from ins_pricing.plotting import diagnostics as plot_diagnostics
|
|
32
|
+
from ins_pricing.plotting.common import PlotStyle, finalize_figure
|
|
33
|
+
except Exception:
|
|
34
|
+
plot_curves = None
|
|
35
|
+
plot_diagnostics = None
|
|
36
|
+
PlotStyle = None
|
|
37
|
+
finalize_figure = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _plot_skip(label: str) -> None:
|
|
41
|
+
if _MPL_IMPORT_ERROR is not None:
|
|
42
|
+
print(f"[Plot] Skip {label}: matplotlib unavailable ({_MPL_IMPORT_ERROR}).", flush=True)
|
|
43
|
+
else:
|
|
44
|
+
print(f"[Plot] Skip {label}: matplotlib unavailable.", flush=True)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def plot_oneway(model: "BayesOptModel", n_bins=10):
|
|
48
|
+
if plt is None and plot_diagnostics is None:
|
|
49
|
+
_plot_skip("oneway plot")
|
|
50
|
+
return
|
|
51
|
+
if plot_diagnostics is None:
|
|
52
|
+
# Legacy oneway implementation (inline)
|
|
53
|
+
for c in model.factor_nmes:
|
|
54
|
+
fig = plt.figure(figsize=(7, 5))
|
|
55
|
+
if c in model.cate_list:
|
|
56
|
+
group_col = c
|
|
57
|
+
plot_source = model.train_data
|
|
58
|
+
else:
|
|
59
|
+
group_col = f'{c}_bins'
|
|
60
|
+
bins = pd.qcut(
|
|
61
|
+
model.train_data[c],
|
|
62
|
+
n_bins,
|
|
63
|
+
duplicates='drop'
|
|
64
|
+
)
|
|
65
|
+
plot_source = model.train_data.assign(**{group_col: bins})
|
|
66
|
+
plot_data = plot_source.groupby(
|
|
67
|
+
[group_col], observed=True).sum(numeric_only=True)
|
|
68
|
+
plot_data.reset_index(inplace=True)
|
|
69
|
+
plot_data['act_v'] = plot_data['w_act'] / \
|
|
70
|
+
plot_data[model.weight_nme]
|
|
71
|
+
ax = fig.add_subplot(111)
|
|
72
|
+
ax.plot(plot_data.index, plot_data['act_v'],
|
|
73
|
+
label='Actual', color='red')
|
|
74
|
+
ax.set_title(
|
|
75
|
+
'Analysis of %s : Train Data' % group_col,
|
|
76
|
+
fontsize=8)
|
|
77
|
+
plt.xticks(plot_data.index,
|
|
78
|
+
list(plot_data[group_col].astype(str)),
|
|
79
|
+
rotation=90)
|
|
80
|
+
if len(list(plot_data[group_col].astype(str))) > 50:
|
|
81
|
+
plt.xticks(fontsize=3)
|
|
82
|
+
else:
|
|
83
|
+
plt.xticks(fontsize=6)
|
|
84
|
+
plt.yticks(fontsize=6)
|
|
85
|
+
ax2 = ax.twinx()
|
|
86
|
+
ax2.bar(plot_data.index,
|
|
87
|
+
plot_data[model.weight_nme],
|
|
88
|
+
alpha=0.5, color='seagreen')
|
|
89
|
+
plt.yticks(fontsize=6)
|
|
90
|
+
plt.margins(0.05)
|
|
91
|
+
plt.subplots_adjust(wspace=0.3)
|
|
92
|
+
save_path = model.output_manager.plot_path(
|
|
93
|
+
f'00_{model.model_nme}_{group_col}_oneway.png')
|
|
94
|
+
plt.savefig(save_path, dpi=300)
|
|
95
|
+
plt.close(fig)
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
if "w_act" not in model.train_data.columns:
|
|
99
|
+
print("[Oneway] Missing w_act column; skip plotting.", flush=True)
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
for c in model.factor_nmes:
|
|
103
|
+
is_cat = c in (model.cate_list or [])
|
|
104
|
+
group_col = c if is_cat else f"{c}_bins"
|
|
105
|
+
title = f"Analysis of {group_col} : Train Data"
|
|
106
|
+
save_path = model.output_manager.plot_path(
|
|
107
|
+
f"00_{model.model_nme}_{group_col}_oneway.png"
|
|
108
|
+
)
|
|
109
|
+
plot_diagnostics.plot_oneway(
|
|
110
|
+
model.train_data,
|
|
111
|
+
feature=c,
|
|
112
|
+
weight_col=model.weight_nme,
|
|
113
|
+
target_col="w_act",
|
|
114
|
+
n_bins=n_bins,
|
|
115
|
+
is_categorical=is_cat,
|
|
116
|
+
title=title,
|
|
117
|
+
save_path=save_path,
|
|
118
|
+
show=False,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
def plot_lift(model: "BayesOptModel", model_label, pred_nme, n_bins=10):
|
|
122
|
+
if plt is None:
|
|
123
|
+
_plot_skip("lift plot")
|
|
124
|
+
return
|
|
125
|
+
model_map = {
|
|
126
|
+
'Xgboost': 'pred_xgb',
|
|
127
|
+
'ResNet': 'pred_resn',
|
|
128
|
+
'ResNetClassifier': 'pred_resn',
|
|
129
|
+
'GLM': 'pred_glm',
|
|
130
|
+
'GNN': 'pred_gnn',
|
|
131
|
+
}
|
|
132
|
+
if str(model.config.ft_role) == "model":
|
|
133
|
+
model_map.update({
|
|
134
|
+
'FTTransformer': 'pred_ft',
|
|
135
|
+
'FTTransformerClassifier': 'pred_ft',
|
|
136
|
+
})
|
|
137
|
+
for k, v in model_map.items():
|
|
138
|
+
if model_label.startswith(k):
|
|
139
|
+
pred_nme = v
|
|
140
|
+
break
|
|
141
|
+
|
|
142
|
+
datasets = []
|
|
143
|
+
for title, data in [
|
|
144
|
+
('Lift Chart on Train Data', model.train_data),
|
|
145
|
+
('Lift Chart on Test Data', model.test_data),
|
|
146
|
+
]:
|
|
147
|
+
if 'w_act' not in data.columns or data['w_act'].isna().all():
|
|
148
|
+
print(
|
|
149
|
+
f"[Lift] Missing labels for {title}; skip.",
|
|
150
|
+
flush=True,
|
|
151
|
+
)
|
|
152
|
+
continue
|
|
153
|
+
datasets.append((title, data))
|
|
154
|
+
|
|
155
|
+
if not datasets:
|
|
156
|
+
print("[Lift] No labeled data available; skip plotting.", flush=True)
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
if plot_curves is None:
|
|
160
|
+
# Legacy inline plotting
|
|
161
|
+
fig = plt.figure(figsize=(11, 5))
|
|
162
|
+
positions = [111] if len(datasets) == 1 else [121, 122]
|
|
163
|
+
for pos, (title, data) in zip(positions, datasets):
|
|
164
|
+
if pred_nme not in data.columns or f'w_{pred_nme}' not in data.columns:
|
|
165
|
+
print(
|
|
166
|
+
f"[Lift] Missing prediction columns in {title}; skip.",
|
|
167
|
+
flush=True
|
|
168
|
+
)
|
|
169
|
+
continue
|
|
170
|
+
lift_df = pd.DataFrame({
|
|
171
|
+
'pred': data[pred_nme].values,
|
|
172
|
+
'w_pred': data[f'w_{pred_nme}'].values,
|
|
173
|
+
'act': data['w_act'].values,
|
|
174
|
+
'weight': data[model.weight_nme].values
|
|
175
|
+
})
|
|
176
|
+
plot_data = PlotUtils.split_data(lift_df, 'pred', 'weight', n_bins)
|
|
177
|
+
denom = np.maximum(plot_data['weight'], EPS)
|
|
178
|
+
plot_data['exp_v'] = plot_data['w_pred'] / denom
|
|
179
|
+
plot_data['act_v'] = plot_data['act'] / denom
|
|
180
|
+
plot_data = plot_data.reset_index()
|
|
181
|
+
|
|
182
|
+
ax = fig.add_subplot(pos)
|
|
183
|
+
PlotUtils.plot_lift_ax(ax, plot_data, title)
|
|
184
|
+
|
|
185
|
+
plt.subplots_adjust(wspace=0.3)
|
|
186
|
+
save_path = model.output_manager.plot_path(
|
|
187
|
+
f'01_{model.model_nme}_{model_label}_lift.png')
|
|
188
|
+
plt.savefig(save_path, dpi=300)
|
|
189
|
+
plt.close(fig)
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
style = PlotStyle() if PlotStyle else None
|
|
193
|
+
fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
|
|
194
|
+
if len(datasets) == 1:
|
|
195
|
+
axes = [axes]
|
|
196
|
+
|
|
197
|
+
for ax, (title, data) in zip(axes, datasets):
|
|
198
|
+
pred_vals = None
|
|
199
|
+
if pred_nme in data.columns:
|
|
200
|
+
pred_vals = data[pred_nme].values
|
|
201
|
+
else:
|
|
202
|
+
w_pred_col = f"w_{pred_nme}"
|
|
203
|
+
if w_pred_col in data.columns:
|
|
204
|
+
denom = np.maximum(data[model.weight_nme].values, EPS)
|
|
205
|
+
pred_vals = data[w_pred_col].values / denom
|
|
206
|
+
if pred_vals is None:
|
|
207
|
+
print(
|
|
208
|
+
f"[Lift] Missing prediction columns in {title}; skip.",
|
|
209
|
+
flush=True,
|
|
210
|
+
)
|
|
211
|
+
continue
|
|
212
|
+
|
|
213
|
+
plot_curves.plot_lift_curve(
|
|
214
|
+
pred_vals,
|
|
215
|
+
data['w_act'].values,
|
|
216
|
+
data[model.weight_nme].values,
|
|
217
|
+
n_bins=n_bins,
|
|
218
|
+
title=title,
|
|
219
|
+
pred_label="Predicted",
|
|
220
|
+
act_label="Actual",
|
|
221
|
+
weight_label="Earned Exposure",
|
|
222
|
+
pred_weighted=False,
|
|
223
|
+
actual_weighted=True,
|
|
224
|
+
ax=ax,
|
|
225
|
+
show=False,
|
|
226
|
+
style=style,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
plt.subplots_adjust(wspace=0.3)
|
|
230
|
+
save_path = model.output_manager.plot_path(
|
|
231
|
+
f'01_{model.model_nme}_{model_label}_lift.png')
|
|
232
|
+
if finalize_figure:
|
|
233
|
+
finalize_figure(fig, save_path=save_path, show=True, style=style)
|
|
234
|
+
else:
|
|
235
|
+
plt.savefig(save_path, dpi=300)
|
|
236
|
+
plt.close(fig)
|
|
237
|
+
|
|
238
|
+
def plot_dlift(model: "BayesOptModel", model_comp: List[str] = ['xgb', 'resn'], n_bins: int = 10) -> None:
|
|
239
|
+
if plt is None:
|
|
240
|
+
_plot_skip("double lift plot")
|
|
241
|
+
return
|
|
242
|
+
if len(model_comp) != 2:
|
|
243
|
+
raise ValueError("`model_comp` must contain two models to compare.")
|
|
244
|
+
|
|
245
|
+
model_name_map = {
|
|
246
|
+
'xgb': 'Xgboost',
|
|
247
|
+
'resn': 'ResNet',
|
|
248
|
+
'glm': 'GLM',
|
|
249
|
+
'gnn': 'GNN',
|
|
250
|
+
}
|
|
251
|
+
if str(model.config.ft_role) == "model":
|
|
252
|
+
model_name_map['ft'] = 'FTTransformer'
|
|
253
|
+
|
|
254
|
+
name1, name2 = model_comp
|
|
255
|
+
if name1 not in model_name_map or name2 not in model_name_map:
|
|
256
|
+
raise ValueError(f"Unsupported model key. Choose from {list(model_name_map.keys())}.")
|
|
257
|
+
|
|
258
|
+
datasets = []
|
|
259
|
+
for data_name, data in [('Train Data', model.train_data),
|
|
260
|
+
('Test Data', model.test_data)]:
|
|
261
|
+
if 'w_act' not in data.columns or data['w_act'].isna().all():
|
|
262
|
+
print(
|
|
263
|
+
f"[Double Lift] Missing labels for {data_name}; skip.",
|
|
264
|
+
flush=True,
|
|
265
|
+
)
|
|
266
|
+
continue
|
|
267
|
+
datasets.append((data_name, data))
|
|
268
|
+
|
|
269
|
+
if not datasets:
|
|
270
|
+
print("[Double Lift] No labeled data available; skip plotting.", flush=True)
|
|
271
|
+
return
|
|
272
|
+
|
|
273
|
+
if plot_curves is None:
|
|
274
|
+
# Legacy
|
|
275
|
+
fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
|
|
276
|
+
if len(datasets) == 1:
|
|
277
|
+
axes = [axes]
|
|
278
|
+
|
|
279
|
+
for ax, (data_name, data) in zip(axes, datasets):
|
|
280
|
+
pred1_col = f'w_pred_{name1}'
|
|
281
|
+
pred2_col = f'w_pred_{name2}'
|
|
282
|
+
|
|
283
|
+
if pred1_col not in data.columns or pred2_col not in data.columns:
|
|
284
|
+
print(
|
|
285
|
+
f"Warning: missing prediction columns {pred1_col} or {pred2_col} in {data_name}. Skip plot.")
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
lift_data = pd.DataFrame({
|
|
289
|
+
'pred1': data[pred1_col].values,
|
|
290
|
+
'pred2': data[pred2_col].values,
|
|
291
|
+
'diff_ly': data[pred1_col].values / np.maximum(data[pred2_col].values, EPS),
|
|
292
|
+
'act': data['w_act'].values,
|
|
293
|
+
'weight': data[model.weight_nme].values
|
|
294
|
+
})
|
|
295
|
+
plot_data = PlotUtils.split_data(
|
|
296
|
+
lift_data, 'diff_ly', 'weight', n_bins)
|
|
297
|
+
denom = np.maximum(plot_data['act'], EPS)
|
|
298
|
+
plot_data['exp_v1'] = plot_data['pred1'] / denom
|
|
299
|
+
plot_data['exp_v2'] = plot_data['pred2'] / denom
|
|
300
|
+
plot_data['act_v'] = plot_data['act'] / denom
|
|
301
|
+
plot_data.reset_index(inplace=True)
|
|
302
|
+
|
|
303
|
+
label1 = model_name_map[name1]
|
|
304
|
+
label2 = model_name_map[name2]
|
|
305
|
+
|
|
306
|
+
PlotUtils.plot_dlift_ax(
|
|
307
|
+
ax, plot_data, f'Double Lift Chart on {data_name}', label1, label2)
|
|
308
|
+
|
|
309
|
+
plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8, wspace=0.3)
|
|
310
|
+
save_path = model.output_manager.plot_path(
|
|
311
|
+
f'02_{model.model_nme}_dlift_{name1}_vs_{name2}.png')
|
|
312
|
+
plt.savefig(save_path, dpi=300)
|
|
313
|
+
plt.close(fig)
|
|
314
|
+
return
|
|
315
|
+
|
|
316
|
+
style = PlotStyle() if PlotStyle else None
|
|
317
|
+
fig, axes = plt.subplots(1, len(datasets), figsize=(11, 5))
|
|
318
|
+
if len(datasets) == 1:
|
|
319
|
+
axes = [axes]
|
|
320
|
+
|
|
321
|
+
label1 = model_name_map[name1]
|
|
322
|
+
label2 = model_name_map[name2]
|
|
323
|
+
|
|
324
|
+
for ax, (data_name, data) in zip(axes, datasets):
|
|
325
|
+
weight_vals = data[model.weight_nme].values
|
|
326
|
+
pred1 = None
|
|
327
|
+
pred2 = None
|
|
328
|
+
|
|
329
|
+
pred1_col = f"pred_{name1}"
|
|
330
|
+
pred2_col = f"pred_{name2}"
|
|
331
|
+
if pred1_col in data.columns:
|
|
332
|
+
pred1 = data[pred1_col].values
|
|
333
|
+
else:
|
|
334
|
+
w_pred1_col = f"w_pred_{name1}"
|
|
335
|
+
if w_pred1_col in data.columns:
|
|
336
|
+
pred1 = data[w_pred1_col].values / np.maximum(weight_vals, EPS)
|
|
337
|
+
|
|
338
|
+
if pred2_col in data.columns:
|
|
339
|
+
pred2 = data[pred2_col].values
|
|
340
|
+
else:
|
|
341
|
+
w_pred2_col = f"w_pred_{name2}"
|
|
342
|
+
if w_pred2_col in data.columns:
|
|
343
|
+
pred2 = data[w_pred2_col].values / np.maximum(weight_vals, EPS)
|
|
344
|
+
|
|
345
|
+
if pred1 is None or pred2 is None:
|
|
346
|
+
print(
|
|
347
|
+
f"Warning: missing pred_{name1}/pred_{name2} or w_pred columns in {data_name}. Skip plot.")
|
|
348
|
+
continue
|
|
349
|
+
|
|
350
|
+
plot_curves.plot_double_lift_curve(
|
|
351
|
+
pred1,
|
|
352
|
+
pred2,
|
|
353
|
+
data['w_act'].values,
|
|
354
|
+
weight_vals,
|
|
355
|
+
n_bins=n_bins,
|
|
356
|
+
title=f"Double Lift Chart on {data_name}",
|
|
357
|
+
label1=label1,
|
|
358
|
+
label2=label2,
|
|
359
|
+
pred1_weighted=False,
|
|
360
|
+
pred2_weighted=False,
|
|
361
|
+
actual_weighted=True,
|
|
362
|
+
ax=ax,
|
|
363
|
+
show=False,
|
|
364
|
+
style=style,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8, wspace=0.3)
|
|
368
|
+
save_path = model.output_manager.plot_path(
|
|
369
|
+
f'02_{model.model_nme}_dlift_{name1}_vs_{name2}.png')
|
|
370
|
+
if finalize_figure:
|
|
371
|
+
finalize_figure(fig, save_path=save_path, show=True, style=style)
|
|
372
|
+
else:
|
|
373
|
+
plt.savefig(save_path, dpi=300)
|
|
374
|
+
plt.close(fig)
|
|
375
|
+
|
|
376
|
+
def plot_conversion_lift(model: "BayesOptModel", model_pred_col: str, n_bins: int = 20):
|
|
377
|
+
if plt is None:
|
|
378
|
+
_plot_skip("conversion lift plot")
|
|
379
|
+
return
|
|
380
|
+
if not model.binary_resp_nme:
|
|
381
|
+
print("Error: `binary_resp_nme` not provided at BayesOptModel init; cannot plot conversion lift.")
|
|
382
|
+
return
|
|
383
|
+
|
|
384
|
+
if plot_curves is None:
|
|
385
|
+
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
|
|
386
|
+
datasets = {
|
|
387
|
+
'Train Data': model.train_data,
|
|
388
|
+
'Test Data': model.test_data
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
for ax, (data_name, data) in zip(axes, datasets.items()):
|
|
392
|
+
if model_pred_col not in data.columns:
|
|
393
|
+
print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
|
|
394
|
+
continue
|
|
395
|
+
|
|
396
|
+
# Sort by model prediction and compute bins.
|
|
397
|
+
plot_data = data.sort_values(by=model_pred_col).copy()
|
|
398
|
+
plot_data['cum_weight'] = plot_data[model.weight_nme].cumsum()
|
|
399
|
+
total_weight = plot_data[model.weight_nme].sum()
|
|
400
|
+
|
|
401
|
+
if total_weight > EPS:
|
|
402
|
+
plot_data['bin'] = pd.cut(
|
|
403
|
+
plot_data['cum_weight'],
|
|
404
|
+
bins=n_bins,
|
|
405
|
+
labels=False,
|
|
406
|
+
right=False
|
|
407
|
+
)
|
|
408
|
+
else:
|
|
409
|
+
plot_data['bin'] = 0
|
|
410
|
+
|
|
411
|
+
# Aggregate by bins.
|
|
412
|
+
lift_agg = plot_data.groupby('bin').agg(
|
|
413
|
+
total_weight=(model.weight_nme, 'sum'),
|
|
414
|
+
actual_conversions=(model.binary_resp_nme, 'sum'),
|
|
415
|
+
weighted_conversions=('w_binary_act', 'sum'),
|
|
416
|
+
avg_pred=(model_pred_col, 'mean')
|
|
417
|
+
).reset_index()
|
|
418
|
+
|
|
419
|
+
# Compute conversion rate.
|
|
420
|
+
lift_agg['conversion_rate'] = lift_agg['weighted_conversions'] / \
|
|
421
|
+
lift_agg['total_weight']
|
|
422
|
+
|
|
423
|
+
# Compute overall average conversion rate.
|
|
424
|
+
overall_conversion_rate = data['w_binary_act'].sum(
|
|
425
|
+
) / data[model.weight_nme].sum()
|
|
426
|
+
ax.axhline(y=overall_conversion_rate, color='gray', linestyle='--',
|
|
427
|
+
label=f'Overall Avg Rate ({overall_conversion_rate:.2%})')
|
|
428
|
+
|
|
429
|
+
ax.plot(lift_agg['bin'], lift_agg['conversion_rate'],
|
|
430
|
+
marker='o', linestyle='-', label='Actual Conversion Rate')
|
|
431
|
+
ax.set_title(f'Conversion Rate Lift Chart on {data_name}')
|
|
432
|
+
ax.set_xlabel(f'Model Score Decile (based on {model_pred_col})')
|
|
433
|
+
ax.set_ylabel('Conversion Rate')
|
|
434
|
+
ax.grid(True, linestyle='--', alpha=0.6)
|
|
435
|
+
ax.legend()
|
|
436
|
+
|
|
437
|
+
plt.tight_layout()
|
|
438
|
+
plt.show()
|
|
439
|
+
return
|
|
440
|
+
|
|
441
|
+
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)
|
|
442
|
+
datasets = {
|
|
443
|
+
'Train Data': model.train_data,
|
|
444
|
+
'Test Data': model.test_data
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
for ax, (data_name, data) in zip(axes, datasets.items()):
|
|
448
|
+
if model_pred_col not in data.columns:
|
|
449
|
+
print(f"Warning: missing prediction column '{model_pred_col}' in {data_name}. Skip plot.")
|
|
450
|
+
continue
|
|
451
|
+
|
|
452
|
+
plot_curves.plot_conversion_lift(
|
|
453
|
+
data[model_pred_col].values,
|
|
454
|
+
data[model.binary_resp_nme].values,
|
|
455
|
+
data[model.weight_nme].values,
|
|
456
|
+
n_bins=n_bins,
|
|
457
|
+
title=f'Conversion Rate Lift Chart on {data_name}',
|
|
458
|
+
ax=ax,
|
|
459
|
+
show=False,
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
plt.tight_layout()
|
|
463
|
+
plt.show()
|