ins-pricing 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. ins_pricing/README.md +60 -0
  2. ins_pricing/__init__.py +102 -0
  3. ins_pricing/governance/README.md +18 -0
  4. ins_pricing/governance/__init__.py +20 -0
  5. ins_pricing/governance/approval.py +93 -0
  6. ins_pricing/governance/audit.py +37 -0
  7. ins_pricing/governance/registry.py +99 -0
  8. ins_pricing/governance/release.py +159 -0
  9. ins_pricing/modelling/BayesOpt.py +146 -0
  10. ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
  11. ins_pricing/modelling/BayesOpt_entry.py +575 -0
  12. ins_pricing/modelling/BayesOpt_incremental.py +731 -0
  13. ins_pricing/modelling/Explain_Run.py +36 -0
  14. ins_pricing/modelling/Explain_entry.py +539 -0
  15. ins_pricing/modelling/Pricing_Run.py +36 -0
  16. ins_pricing/modelling/README.md +33 -0
  17. ins_pricing/modelling/__init__.py +44 -0
  18. ins_pricing/modelling/bayesopt/__init__.py +98 -0
  19. ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
  20. ins_pricing/modelling/bayesopt/core.py +1476 -0
  21. ins_pricing/modelling/bayesopt/models.py +2196 -0
  22. ins_pricing/modelling/bayesopt/trainers.py +2446 -0
  23. ins_pricing/modelling/bayesopt/utils.py +1021 -0
  24. ins_pricing/modelling/cli_common.py +136 -0
  25. ins_pricing/modelling/explain/__init__.py +55 -0
  26. ins_pricing/modelling/explain/gradients.py +334 -0
  27. ins_pricing/modelling/explain/metrics.py +176 -0
  28. ins_pricing/modelling/explain/permutation.py +155 -0
  29. ins_pricing/modelling/explain/shap_utils.py +146 -0
  30. ins_pricing/modelling/notebook_utils.py +284 -0
  31. ins_pricing/modelling/plotting/__init__.py +45 -0
  32. ins_pricing/modelling/plotting/common.py +63 -0
  33. ins_pricing/modelling/plotting/curves.py +572 -0
  34. ins_pricing/modelling/plotting/diagnostics.py +139 -0
  35. ins_pricing/modelling/plotting/geo.py +362 -0
  36. ins_pricing/modelling/plotting/importance.py +121 -0
  37. ins_pricing/modelling/run_logging.py +133 -0
  38. ins_pricing/modelling/tests/conftest.py +8 -0
  39. ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
  40. ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
  41. ins_pricing/modelling/tests/test_explain.py +56 -0
  42. ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
  43. ins_pricing/modelling/tests/test_graph_cache.py +33 -0
  44. ins_pricing/modelling/tests/test_plotting.py +63 -0
  45. ins_pricing/modelling/tests/test_plotting_library.py +150 -0
  46. ins_pricing/modelling/tests/test_preprocessor.py +48 -0
  47. ins_pricing/modelling/watchdog_run.py +211 -0
  48. ins_pricing/pricing/README.md +44 -0
  49. ins_pricing/pricing/__init__.py +27 -0
  50. ins_pricing/pricing/calibration.py +39 -0
  51. ins_pricing/pricing/data_quality.py +117 -0
  52. ins_pricing/pricing/exposure.py +85 -0
  53. ins_pricing/pricing/factors.py +91 -0
  54. ins_pricing/pricing/monitoring.py +99 -0
  55. ins_pricing/pricing/rate_table.py +78 -0
  56. ins_pricing/production/__init__.py +21 -0
  57. ins_pricing/production/drift.py +30 -0
  58. ins_pricing/production/monitoring.py +143 -0
  59. ins_pricing/production/scoring.py +40 -0
  60. ins_pricing/reporting/README.md +20 -0
  61. ins_pricing/reporting/__init__.py +11 -0
  62. ins_pricing/reporting/report_builder.py +72 -0
  63. ins_pricing/reporting/scheduler.py +45 -0
  64. ins_pricing/setup.py +41 -0
  65. ins_pricing v2/__init__.py +23 -0
  66. ins_pricing v2/governance/__init__.py +20 -0
  67. ins_pricing v2/governance/approval.py +93 -0
  68. ins_pricing v2/governance/audit.py +37 -0
  69. ins_pricing v2/governance/registry.py +99 -0
  70. ins_pricing v2/governance/release.py +159 -0
  71. ins_pricing v2/modelling/Explain_Run.py +36 -0
  72. ins_pricing v2/modelling/Pricing_Run.py +36 -0
  73. ins_pricing v2/modelling/__init__.py +151 -0
  74. ins_pricing v2/modelling/cli_common.py +141 -0
  75. ins_pricing v2/modelling/config.py +249 -0
  76. ins_pricing v2/modelling/config_preprocess.py +254 -0
  77. ins_pricing v2/modelling/core.py +741 -0
  78. ins_pricing v2/modelling/data_container.py +42 -0
  79. ins_pricing v2/modelling/explain/__init__.py +55 -0
  80. ins_pricing v2/modelling/explain/gradients.py +334 -0
  81. ins_pricing v2/modelling/explain/metrics.py +176 -0
  82. ins_pricing v2/modelling/explain/permutation.py +155 -0
  83. ins_pricing v2/modelling/explain/shap_utils.py +146 -0
  84. ins_pricing v2/modelling/features.py +215 -0
  85. ins_pricing v2/modelling/model_manager.py +148 -0
  86. ins_pricing v2/modelling/model_plotting.py +463 -0
  87. ins_pricing v2/modelling/models.py +2203 -0
  88. ins_pricing v2/modelling/notebook_utils.py +294 -0
  89. ins_pricing v2/modelling/plotting/__init__.py +45 -0
  90. ins_pricing v2/modelling/plotting/common.py +63 -0
  91. ins_pricing v2/modelling/plotting/curves.py +572 -0
  92. ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
  93. ins_pricing v2/modelling/plotting/geo.py +362 -0
  94. ins_pricing v2/modelling/plotting/importance.py +121 -0
  95. ins_pricing v2/modelling/run_logging.py +133 -0
  96. ins_pricing v2/modelling/tests/conftest.py +8 -0
  97. ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
  98. ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
  99. ins_pricing v2/modelling/tests/test_explain.py +56 -0
  100. ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
  101. ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
  102. ins_pricing v2/modelling/tests/test_plotting.py +63 -0
  103. ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
  104. ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
  105. ins_pricing v2/modelling/trainers.py +2447 -0
  106. ins_pricing v2/modelling/utils.py +1020 -0
  107. ins_pricing v2/modelling/watchdog_run.py +211 -0
  108. ins_pricing v2/pricing/__init__.py +27 -0
  109. ins_pricing v2/pricing/calibration.py +39 -0
  110. ins_pricing v2/pricing/data_quality.py +117 -0
  111. ins_pricing v2/pricing/exposure.py +85 -0
  112. ins_pricing v2/pricing/factors.py +91 -0
  113. ins_pricing v2/pricing/monitoring.py +99 -0
  114. ins_pricing v2/pricing/rate_table.py +78 -0
  115. ins_pricing v2/production/__init__.py +21 -0
  116. ins_pricing v2/production/drift.py +30 -0
  117. ins_pricing v2/production/monitoring.py +143 -0
  118. ins_pricing v2/production/scoring.py +40 -0
  119. ins_pricing v2/reporting/__init__.py +11 -0
  120. ins_pricing v2/reporting/report_builder.py +72 -0
  121. ins_pricing v2/reporting/scheduler.py +45 -0
  122. ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
  123. ins_pricing v2/scripts/Explain_entry.py +545 -0
  124. ins_pricing v2/scripts/__init__.py +1 -0
  125. ins_pricing v2/scripts/train.py +568 -0
  126. ins_pricing v2/setup.py +55 -0
  127. ins_pricing v2/smoke_test.py +28 -0
  128. ins_pricing-0.1.6.dist-info/METADATA +78 -0
  129. ins_pricing-0.1.6.dist-info/RECORD +169 -0
  130. ins_pricing-0.1.6.dist-info/WHEEL +5 -0
  131. ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
  132. user_packages/__init__.py +105 -0
  133. user_packages legacy/BayesOpt.py +5659 -0
  134. user_packages legacy/BayesOpt_entry.py +513 -0
  135. user_packages legacy/BayesOpt_incremental.py +685 -0
  136. user_packages legacy/Pricing_Run.py +36 -0
  137. user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
  138. user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
  139. user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
  140. user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
  141. user_packages legacy/Try/BayesOpt legacy.py +3280 -0
  142. user_packages legacy/Try/BayesOpt.py +838 -0
  143. user_packages legacy/Try/BayesOptAll.py +1569 -0
  144. user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
  145. user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
  146. user_packages legacy/Try/BayesOptSearch.py +830 -0
  147. user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
  148. user_packages legacy/Try/BayesOptV1.py +1911 -0
  149. user_packages legacy/Try/BayesOptV10.py +2973 -0
  150. user_packages legacy/Try/BayesOptV11.py +3001 -0
  151. user_packages legacy/Try/BayesOptV12.py +3001 -0
  152. user_packages legacy/Try/BayesOptV2.py +2065 -0
  153. user_packages legacy/Try/BayesOptV3.py +2209 -0
  154. user_packages legacy/Try/BayesOptV4.py +2342 -0
  155. user_packages legacy/Try/BayesOptV5.py +2372 -0
  156. user_packages legacy/Try/BayesOptV6.py +2759 -0
  157. user_packages legacy/Try/BayesOptV7.py +2832 -0
  158. user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
  159. user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
  160. user_packages legacy/Try/BayesOptV9.py +2927 -0
  161. user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
  162. user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
  163. user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
  164. user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
  165. user_packages legacy/Try/xgbbayesopt.py +523 -0
  166. user_packages legacy/__init__.py +19 -0
  167. user_packages legacy/cli_common.py +124 -0
  168. user_packages legacy/notebook_utils.py +228 -0
  169. user_packages legacy/watchdog_run.py +202 -0
@@ -0,0 +1,1569 @@
1
+ # 数据在CPU和GPU之间传输会带来较大开销,但可以多CUDA流同时传输数据和计算,从而实现更大数据集的操作。
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ from random import sample
6
+ from re import X
7
+ from turtle import st
8
+ from uuid import RESERVED_FUTURE
9
+ import numpy as np # 1.26.2
10
+ import pandas as pd # 2.2.3
11
+ import torch # 1.10.1+cu111
12
+ import torch.nn as nn
13
+ import torch.nn.functional as F
14
+ import optuna # 4.3.0
15
+ import xgboost as xgb # 1.7.0
16
+ import matplotlib.pyplot as plt
17
+ import os
18
+ import joblib
19
+ import torch.utils.checkpoint as cp
20
+ import copy
21
+
22
+ from torch.utils.data import Dataset, DataLoader, TensorDataset
23
+ from torch.cuda.amp import autocast, GradScaler
24
+ from torch.nn.utils import clip_grad_norm_
25
+ from sklearn.model_selection import KFold, ShuffleSplit, cross_val_score # 1.2.2
26
+ from sklearn.preprocessing import StandardScaler
27
+ from sklearn.metrics import make_scorer, mean_tweedie_deviance
28
+
29
+ # 定义torch下tweedie deviance损失函数
30
+ # 参考:https://scikit-learn.org/stable/modules/model_evaluation.html#mean-poisson-gamma-and-tweedie-deviances
31
+
32
+
33
+ def tweedie_loss(pred, target, p=1.5):
34
+ # Ensure predictions are positive for stability
35
+ eps = 1e-6
36
+ pred_clamped = torch.clamp(pred, min=eps)
37
+ # Compute Tweedie deviance components
38
+ if p == 1:
39
+ # Poisson case
40
+ term1 = target * torch.log(target / pred_clamped + eps)
41
+ term2 = -target + pred_clamped
42
+ term3 = 0
43
+ elif p == 0:
44
+ # Gaussian case
45
+ term1 = 0.5 * torch.pow(target - pred_clamped, 2)
46
+ term2 = 0
47
+ term3 = 0
48
+ elif p == 2:
49
+ # Gamma case
50
+ term1 = torch.log(pred_clamped / target + eps)
51
+ term2 = -target / pred_clamped + 1
52
+ term3 = 0
53
+ else:
54
+ term1 = torch.pow(target, 2 - p) / ((1 - p) * (2 - p))
55
+ term2 = target * torch.pow(pred_clamped, 1 - p) / (1 - p)
56
+ term3 = torch.pow(pred_clamped, 2 - p) / (2 - p)
57
+ # Tweedie negative log-likelihood (up to a constant)
58
+ return 2 * (term1 - term2 + term3)
59
+
60
+ # 定义分箱函数
61
+
62
+
63
+ def split_data(data, col_nme, wgt_nme, n_bins=10):
64
+ data.sort_values(by=col_nme, ascending=True, inplace=True)
65
+ data['cum_weight'] = data[wgt_nme].cumsum()
66
+ w_sum = data[wgt_nme].sum()
67
+ data.loc[:, 'bins'] = np.floor(data['cum_weight'] * float(n_bins) / w_sum)
68
+ data.loc[(data['bins'] == n_bins), 'bins'] = n_bins - 1
69
+ return data.groupby(['bins'], observed=True).sum(numeric_only=True)
70
+
71
+ # 定义Lift Chart绘制函数
72
+
73
+
74
+ def plot_lift_list(pred_model, w_pred_list, w_act_list,
75
+ weight_list, tgt_nme, n_bins=10,
76
+ fig_nme='Lift Chart'):
77
+ lift_data = pd.DataFrame()
78
+ lift_data.loc[:, 'pred'] = pred_model
79
+ lift_data.loc[:, 'w_pred'] = w_pred_list
80
+ lift_data.loc[:, 'act'] = w_act_list
81
+ lift_data.loc[:, 'weight'] = weight_list
82
+ plot_data = split_data(lift_data, 'pred', 'weight', n_bins)
83
+ plot_data['exp_v'] = plot_data['w_pred'] / plot_data['weight']
84
+ plot_data['act_v'] = plot_data['act'] / plot_data['weight']
85
+ plot_data.reset_index(inplace=True)
86
+ fig = plt.figure(figsize=(7, 5))
87
+ ax = fig.add_subplot(111)
88
+ ax.plot(plot_data.index, plot_data['act_v'],
89
+ label='Actual', color='red')
90
+ ax.plot(plot_data.index, plot_data['exp_v'],
91
+ label='Predicted', color='blue')
92
+ ax.set_title(
93
+ 'Lift Chart of %s' % tgt_nme, fontsize=8)
94
+ plt.xticks(plot_data.index,
95
+ plot_data.index,
96
+ rotation=90, fontsize=6)
97
+ plt.yticks(fontsize=6)
98
+ plt.legend(loc='upper left',
99
+ fontsize=5, frameon=False)
100
+ plt.margins(0.05)
101
+ ax2 = ax.twinx()
102
+ ax2.bar(plot_data.index, plot_data['weight'],
103
+ alpha=0.5, color='seagreen',
104
+ label='Earned Exposure')
105
+ plt.yticks(fontsize=6)
106
+ plt.legend(loc='upper right',
107
+ fontsize=5, frameon=False)
108
+ plt.subplots_adjust(wspace=0.3)
109
+ save_path = os.path.join(
110
+ os.getcwd(), 'plot', f'05_{tgt_nme}_{fig_nme}.png')
111
+ plt.savefig(save_path, dpi=300)
112
+ plt.close(fig)
113
+
114
+ # 定义Double Lift Chart绘制函数
115
+
116
+
117
+ def plot_dlift_list(pred_model_1, pred_model_2,
118
+ model_nme_1, model_nme_2,
119
+ tgt_nme,
120
+ w_list, w_act_list, n_bins=10,
121
+ fig_nme='Double Lift Chart'):
122
+ lift_data = pd.DataFrame()
123
+ lift_data.loc[:, 'pred1'] = pred_model_1
124
+ lift_data.loc[:, 'pred2'] = pred_model_2
125
+ lift_data.loc[:, 'diff_ly'] = lift_data['pred1'] / lift_data['pred2']
126
+ lift_data.loc[:, 'act'] = w_act_list
127
+ lift_data.loc[:, 'weight'] = w_list
128
+ lift_data.loc[:, 'w_pred1'] = lift_data['pred1'] * lift_data['weight']
129
+ lift_data.loc[:, 'w_pred2'] = lift_data['pred2'] * lift_data['weight']
130
+ plot_data = split_data(lift_data, 'diff_ly', 'weight', n_bins)
131
+ plot_data['exp_v1'] = plot_data['w_pred1'] / plot_data['act']
132
+ plot_data['exp_v2'] = plot_data['w_pred2'] / plot_data['act']
133
+ plot_data['act_v'] = plot_data['act']/plot_data['act']
134
+ plot_data.reset_index(inplace=True)
135
+ fig = plt.figure(figsize=(7, 5))
136
+ ax = fig.add_subplot(111)
137
+ ax.plot(plot_data.index, plot_data['act_v'],
138
+ label='Actual', color='red')
139
+ ax.plot(plot_data.index, plot_data['exp_v1'],
140
+ label=model_nme_1, color='blue')
141
+ ax.plot(plot_data.index, plot_data['exp_v2'],
142
+ label=model_nme_2, color='black')
143
+ ax.set_title(
144
+ 'Double Lift Chart of %s' % tgt_nme, fontsize=8)
145
+ plt.xticks(plot_data.index,
146
+ plot_data.index,
147
+ rotation=90, fontsize=6)
148
+ plt.xlabel('%s / %s' % (model_nme_1, model_nme_2), fontsize=6)
149
+ plt.yticks(fontsize=6)
150
+ plt.legend(loc='upper left',
151
+ fontsize=5, frameon=False)
152
+ plt.margins(0.1)
153
+ plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8)
154
+ ax2 = ax.twinx()
155
+ ax2.bar(plot_data.index, plot_data['weight'],
156
+ alpha=0.5, color='seagreen',
157
+ label='Earned Exposure')
158
+ plt.yticks(fontsize=6)
159
+ plt.legend(loc='upper right',
160
+ fontsize=5, frameon=False)
161
+ plt.subplots_adjust(wspace=0.3)
162
+ save_path = os.path.join(
163
+ os.getcwd(), 'plot', f'06_{tgt_nme}_{fig_nme}.png')
164
+ plt.savefig(save_path, dpi=300)
165
+ plt.close(fig)
166
+
167
+
168
+ # 开始定义ResNet模型结构
169
+ # 残差块:两层线性 + ReLU + 残差连接
170
+ # ResBlock 继承 nn.Module
171
+ class ResBlock(nn.Module):
172
+ def __init__(self, dim: int, dropout: float = 0.1,
173
+ use_layernorm: bool = False, residual_scale: float = 0.1
174
+ ):
175
+ super().__init__()
176
+ self.use_layernorm = use_layernorm
177
+
178
+ if use_layernorm:
179
+ Norm = nn.LayerNorm # 对最后一维做归一化
180
+ else:
181
+ def Norm(d): return nn.BatchNorm1d(d) # 保留一个开关,想试 BN 时也能用
182
+
183
+ self.norm1 = Norm(dim)
184
+ self.fc1 = nn.Linear(dim, dim, bias=True)
185
+ self.act = nn.ReLU(inplace=True)
186
+ self.dropout = nn.Dropout(dropout) if dropout > 0.0 else nn.Identity()
187
+ self.norm2 = Norm(dim)
188
+ self.fc2 = nn.Linear(dim, dim, bias=True)
189
+
190
+ # 残差缩放,防止一开始就把主干搞炸
191
+ self.res_scale = nn.Parameter(
192
+ torch.tensor(residual_scale, dtype=torch.float32)
193
+ )
194
+
195
+ def forward(self, x):
196
+ # pre-activation
197
+ out = self.norm1(x)
198
+ out = self.fc1(out)
199
+ out = self.act(out)
200
+ out = self.dropout(out)
201
+ out = self.norm2(out)
202
+ out = self.fc2(out)
203
+ # 残差缩放再相加
204
+ return F.relu(x + self.res_scale * out)
205
+
206
+ # ResNetSequential 继承 nn.Module,定义整个网络结构
207
+
208
+
209
+ class ResNetSequential(nn.Module):
210
+ # 输入: (batch, input_dim)
211
+ # 结构:
212
+ # fc1 -> LN/Bn -> ReLU -> ResBlock * block_num -> fc_out -> Softplus
213
+
214
+ def __init__(self, input_dim: int, hidden_dim: int = 64, block_num: int = 2,
215
+ use_layernorm: bool = True, dropout: float = 0.1,
216
+ residual_scale: float = 0.1):
217
+ super(ResNetSequential, self).__init__()
218
+
219
+ self.net = nn.Sequential()
220
+ self.net.add_module('fc1', nn.Linear(input_dim, hidden_dim))
221
+
222
+ if use_layernorm:
223
+ self.net.add_module('norm1', nn.LayerNorm(hidden_dim))
224
+ else:
225
+ self.net.add_module('norm1', nn.BatchNorm1d(hidden_dim))
226
+
227
+ self.net.add_module('relu1', nn.ReLU(inplace=True))
228
+
229
+ # 多个残差块
230
+ for i in range(block_num):
231
+ self.net.add_module(
232
+ f'ResBlk_{i+1}',
233
+ ResBlock(
234
+ hidden_dim,
235
+ dropout=dropout,
236
+ use_layernorm=use_layernorm,
237
+ residual_scale=residual_scale)
238
+ )
239
+
240
+ self.net.add_module('fc_out', nn.Linear(hidden_dim, 1))
241
+ self.net.add_module('softplus', nn.Softplus())
242
+
243
+ def forward(self, x):
244
+ return self.net(x)
245
+
246
+ # 定义ResNet模型的Scikit-Learn接口类
247
+
248
+
249
+ class ResNetScikitLearn(nn.Module):
250
+ def __init__(self, model_nme: str, input_dim: int, hidden_dim: int = 64,
251
+ block_num: int = 2, batch_num: int = 100, epochs: int = 100,
252
+ tweedie_power: float = 1.5, learning_rate: float = 0.01, patience: int = 10,
253
+ use_layernorm: bool = True, dropout: float = 0.1,
254
+ residual_scale: float = 0.1):
255
+ super(ResNetScikitLearn, self).__init__()
256
+
257
+ self.input_dim = input_dim
258
+ self.hidden_dim = hidden_dim
259
+ self.block_num = block_num
260
+ self.batch_num = batch_num
261
+ self.epochs = epochs
262
+ self.model_nme = model_nme
263
+ self.learning_rate = learning_rate
264
+ self.patience = patience
265
+ self.use_layernorm = use_layernorm
266
+ self.dropout = dropout
267
+ self.residual_scale = residual_scale
268
+
269
+ # 设备选择:cuda > mps > cpu
270
+ if torch.cuda.is_available():
271
+ self.device = torch.device('cuda')
272
+ elif torch.backends.mps.is_available():
273
+ self.device = torch.device('mps')
274
+ else:
275
+ self.device = torch.device('cpu')
276
+
277
+ # Tweedie power
278
+ if 'f' in self.model_nme:
279
+ self.tw_power = 1
280
+ elif 's' in self.model_nme:
281
+ self.tw_power = 2
282
+ else:
283
+ self.tw_power = tweedie_power
284
+
285
+ # 搭建网络
286
+ self.resnet = ResNetSequential(
287
+ self.input_dim,
288
+ self.hidden_dim,
289
+ self.block_num,
290
+ use_layernorm=self.use_layernorm,
291
+ dropout=self.dropout,
292
+ residual_scale=self.residual_scale
293
+ ).to(self.device)
294
+
295
+ def fit(self, X_train, y_train, w_train=None,
296
+ X_val=None, y_val=None, w_val=None):
297
+
298
+ # === 1. 训练集:留在 CPU,交给 DataLoader 再搬到 GPU ===
299
+ X_tensor = torch.tensor(X_train.values, dtype=torch.float32)
300
+ y_tensor = torch.tensor(
301
+ y_train.values, dtype=torch.float32).view(-1, 1)
302
+ if w_train is not None:
303
+ w_tensor = torch.tensor(
304
+ w_train.values, dtype=torch.float32).view(-1, 1)
305
+ else:
306
+ w_tensor = torch.ones_like(y_tensor)
307
+
308
+ # === 2. 验证集:先在 CPU 上建,后面一次性搬到 device ===
309
+ has_val = X_val is not None and y_val is not None
310
+ if has_val:
311
+ X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
312
+ y_val_tensor = torch.tensor(
313
+ y_val.values, dtype=torch.float32).view(-1, 1)
314
+ if w_val is not None:
315
+ w_val_tensor = torch.tensor(
316
+ w_val.values, dtype=torch.float32).view(-1, 1)
317
+ else:
318
+ w_val_tensor = torch.ones_like(y_val_tensor)
319
+ else:
320
+ X_val_tensor = y_val_tensor = w_val_tensor = None
321
+
322
+ # === 3. DataLoader ===
323
+ dataset = TensorDataset(X_tensor, y_tensor, w_tensor)
324
+ batch_size = max(
325
+ 4096,
326
+ int((self.learning_rate / (1e-4)) ** 0.5 *
327
+ (X_train.shape[0] / self.batch_num))
328
+ )
329
+
330
+ dataloader = DataLoader(
331
+ dataset,
332
+ batch_size=batch_size,
333
+ shuffle=True,
334
+ num_workers=1, # tabular: 0~1 一般够用
335
+ pin_memory=(self.device.type == 'cuda')
336
+ )
337
+
338
+ # === 4. 优化器 & AMP ===
339
+ optimizer = torch.optim.Adam(
340
+ self.resnet.parameters(), lr=self.learning_rate)
341
+ scaler = GradScaler(enabled=(self.device.type == 'cuda'))
342
+
343
+ # === 5. Early stopping ===
344
+ best_loss, patience_counter = float('inf'), 0
345
+ best_model_state = None
346
+
347
+ # 如果有验证集,先把它整个搬到 device,只搬一次
348
+ if has_val:
349
+ X_val_dev = X_val_tensor.to(self.device, non_blocking=True)
350
+ y_val_dev = y_val_tensor.to(self.device, non_blocking=True)
351
+ w_val_dev = w_val_tensor.to(self.device, non_blocking=True)
352
+
353
+ # === 6. 训练循环 ===
354
+ for epoch in range(1, self.epochs + 1):
355
+ self.resnet.train()
356
+ for X_batch, y_batch, w_batch in dataloader:
357
+ optimizer.zero_grad()
358
+
359
+ X_batch = X_batch.to(self.device, non_blocking=True)
360
+ y_batch = y_batch.to(self.device, non_blocking=True)
361
+ w_batch = w_batch.to(self.device, non_blocking=True)
362
+
363
+ with autocast(enabled=(self.device.type == 'cuda')):
364
+ y_pred = self.resnet(X_batch)
365
+ y_pred = torch.clamp(y_pred, min=1e-6)
366
+
367
+ losses = tweedie_loss(
368
+ y_pred, y_batch, p=self.tw_power).view(-1)
369
+ weighted_loss = (losses * w_batch.view(-1)
370
+ ).sum() / w_batch.sum()
371
+
372
+ scaler.scale(weighted_loss).backward()
373
+
374
+ if self.device.type == 'cuda':
375
+ scaler.unscale_(optimizer)
376
+ clip_grad_norm_(self.resnet.parameters(), max_norm=1.0)
377
+
378
+ scaler.step(optimizer)
379
+ scaler.update()
380
+
381
+ # === 7. 验证集损失 & early stopping ===
382
+ if has_val:
383
+ self.resnet.eval()
384
+ with torch.no_grad(), autocast(enabled=(self.device.type == 'cuda')):
385
+ y_val_pred = self.resnet(X_val_dev)
386
+ y_val_pred = torch.clamp(y_val_pred, min=1e-6)
387
+
388
+ val_loss_values = tweedie_loss(
389
+ y_val_pred, y_val_dev, p=self.tw_power
390
+ ).view(-1)
391
+ val_weighted_loss = (
392
+ val_loss_values * w_val_dev.view(-1)
393
+ ).sum() / w_val_dev.sum()
394
+
395
+ if val_weighted_loss < best_loss:
396
+ best_loss = val_weighted_loss
397
+ patience_counter = 0
398
+ best_model_state = copy.deepcopy(self.resnet.state_dict())
399
+ else:
400
+ patience_counter += 1
401
+
402
+ if patience_counter >= self.patience and best_model_state is not None:
403
+ self.resnet.load_state_dict(best_model_state)
404
+ break
405
+
406
+ # ---------------- predict ----------------
407
+
408
+ def predict(self, X_test):
409
+ self.resnet.eval()
410
+ with torch.no_grad():
411
+ X_tensor = torch.tensor(
412
+ X_test.values, dtype=torch.float32).to(self.device)
413
+ y_pred = self.resnet(X_tensor).cpu().numpy()
414
+
415
+ y_pred = np.clip(y_pred, 1e-6, None)
416
+ return y_pred.flatten()
417
+
418
+ # ---------------- set_params ----------------
419
+
420
+ def set_params(self, params):
421
+ for key, value in params.items():
422
+ if hasattr(self, key):
423
+ setattr(self, key, value)
424
+ else:
425
+ raise ValueError(f"Parameter {key} not found in model.")
426
+
427
+ # 开始定义FT Transformer模型结构
428
+
429
+
430
+ class FeatureTokenizer(nn.Module):
431
+ # 将数值特征 & 类别特征映射为 token (batch, n_tokens, d_model)
432
+ # 假设:
433
+ # - X_num: (batch, num_numeric)
434
+ # - X_cat: (batch, num_categorical),每列是已编码好的整数标签 [0, card-1]
435
+
436
+ def __init__(self, num_numeric: int, cat_cardinalities, d_model: int):
437
+ super().__init__()
438
+
439
+ self.num_numeric = num_numeric
440
+ self.has_numeric = num_numeric > 0
441
+
442
+ if self.has_numeric:
443
+ self.num_linear = nn.Linear(num_numeric, d_model)
444
+
445
+ self.embeddings = nn.ModuleList([
446
+ nn.Embedding(card, d_model) for card in cat_cardinalities
447
+ ])
448
+
449
+ def forward(self, X_num, X_cat):
450
+ tokens = []
451
+
452
+ if self.has_numeric:
453
+ # 数值特征映射为一个 token
454
+ num_token = self.num_linear(X_num) # (batch, d_model)
455
+ tokens.append(num_token)
456
+
457
+ # 每个类别特征一个 embedding token
458
+ for i, emb in enumerate(self.embeddings):
459
+ # X_cat[:, i]: (batch,) long
460
+ tok = emb(X_cat[:, i]) # (batch, d_model)
461
+ tokens.append(tok)
462
+
463
+ # (batch, n_tokens, d_model)
464
+ x = torch.stack(tokens, dim=1)
465
+ return x
466
+
467
+ # 定义FT-Transformer核心模型
468
+
469
+
470
+ class FTTransformerCore(nn.Module):
471
+ # 一个最小可用的 FT-Transformer:
472
+ # - FeatureTokenizer: 数值、类别 → token
473
+ # - TransformerEncoder: 捕捉特征交互
474
+ # - 池化 + MLP + Softplus: 输出正值 (适配 Tweedie/Gamma)
475
+
476
+ def __init__(self, num_numeric: int, cat_cardinalities, d_model: int = 64,
477
+ n_heads: int = 8, n_layers: int = 4, dropout: float = 0.1,
478
+ ):
479
+ super().__init__()
480
+
481
+ self.tokenizer = FeatureTokenizer(
482
+ num_numeric=num_numeric,
483
+ cat_cardinalities=cat_cardinalities,
484
+ d_model=d_model
485
+ )
486
+
487
+ encoder_layer = nn.TransformerEncoderLayer(
488
+ d_model=d_model,
489
+ nhead=n_heads,
490
+ dim_feedforward=d_model * 4,
491
+ dropout=dropout,
492
+ activation="gelu",
493
+ batch_first=True,
494
+ norm_first=True, # pre-norm 更稳
495
+ )
496
+ self.encoder = nn.TransformerEncoder(
497
+ encoder_layer,
498
+ num_layers=n_layers
499
+ )
500
+
501
+ self.head = nn.Sequential(
502
+ nn.LayerNorm(d_model),
503
+ nn.Linear(d_model, d_model),
504
+ nn.GELU(),
505
+ nn.Linear(d_model, 1),
506
+ nn.Softplus() # 保证输出为正,适合 Tweedie / Gamma
507
+ )
508
+
509
+ def forward(self, X_num, X_cat):
510
+
511
+ # X_num: (batch, num_numeric) float32
512
+ # X_cat: (batch, num_categorical) long
513
+
514
+ tokens = self.tokenizer(X_num, X_cat) # (batch, tokens, d_model)
515
+ x = self.encoder(tokens) # (batch, tokens, d_model)
516
+
517
+ # 简单地对 token 取平均池化
518
+ x = x.mean(dim=1) # (batch, d_model)
519
+
520
+ out = self.head(x) # (batch, 1), Softplus 已做
521
+ return out
522
+
523
+ # 定义TabularDataset类
524
+
525
+
526
+ class TabularDataset(Dataset):
527
+ def __init__(self, X_num, X_cat, y, w):
528
+
529
+ # X_num: torch.float32, (N, num_numeric)
530
+ # X_cat: torch.long, (N, num_categorical)
531
+ # y: torch.float32, (N, 1)
532
+ # w: torch.float32, (N, 1)
533
+
534
+ self.X_num = X_num
535
+ self.X_cat = X_cat
536
+ self.y = y
537
+ self.w = w
538
+
539
+ def __len__(self):
540
+ return self.y.shape[0]
541
+
542
+ def __getitem__(self, idx):
543
+ return (
544
+ self.X_num[idx],
545
+ self.X_cat[idx],
546
+ self.y[idx],
547
+ self.w[idx],
548
+ )
549
+
550
+ # 定义FTTransformer的Scikit-Learn接口类
551
+
552
+
553
+ class FTTransformerSklearn(nn.Module):
554
+
555
+ # sklearn 风格包装:
556
+ # - num_cols: 数值特征列名列表
557
+ # - cat_cols: 类别特征列名列表 (已做 label encoding,取值 [0, n_classes-1])
558
+
559
+ def __init__(self, model_nme: str, num_cols, cat_cols, d_model: int = 64, n_heads: int = 8,
560
+ n_layers: int = 4, dropout: float = 0.1, batch_num: int = 100, epochs: int = 100,
561
+ tweedie_power: float = 1.5, learning_rate: float = 1e-3, patience: int = 10,
562
+ ):
563
+ super().__init__()
564
+
565
+ self.model_nme = model_nme
566
+ self.num_cols = list(num_cols)
567
+ self.cat_cols = list(cat_cols)
568
+ self.d_model = d_model
569
+ self.n_heads = n_heads
570
+ self.n_layers = n_layers
571
+ self.dropout = dropout
572
+ self.batch_num = batch_num
573
+ self.epochs = epochs
574
+ self.learning_rate = learning_rate
575
+ self.patience = patience
576
+ if 'f' in self.model_nme:
577
+ self.tw_power = 1.0
578
+ elif 's' in self.model_nme:
579
+ self.tw_power = 2.0
580
+ else:
581
+ self.tw_power = tweedie_power
582
+ if torch.cuda.is_available():
583
+ self.device = torch.device("cuda")
584
+ elif torch.backends.mps.is_available():
585
+ self.device = torch.device("mps")
586
+ else:
587
+ self.device = torch.device("cpu")
588
+ self.cat_cardinalities = None
589
+ self.cat_categories = {}
590
+ self.ft = None
591
+
592
+ def _build_model(self, X_train):
593
+ num_numeric = len(self.num_cols)
594
+ cat_cardinalities = []
595
+
596
+ for col in self.cat_cols:
597
+ cats = X_train[col].astype('category')
598
+ categories = cats.cat.categories
599
+ self.cat_categories[col] = categories # 保存训练集类别全集
600
+
601
+ card = len(categories) + 1 # 多预留 1 类给“未知/缺失”
602
+ cat_cardinalities.append(card)
603
+
604
+ self.cat_cardinalities = cat_cardinalities
605
+
606
+ self.ft = FTTransformerCore(
607
+ num_numeric=num_numeric,
608
+ cat_cardinalities=cat_cardinalities,
609
+ d_model=self.d_model,
610
+ n_heads=self.n_heads,
611
+ n_layers=self.n_layers,
612
+ dropout=self.dropout,
613
+ ).to(self.device)
614
+
615
+ def _encode_cats(self, X):
616
+ # X: DataFrame,至少包含 self.cat_cols
617
+ # 返回: np.ndarray, shape (N, num_categorical), dtype=int64
618
+
619
+ if not self.cat_cols:
620
+ return np.zeros((len(X), 0), dtype='int64')
621
+
622
+ X_cat_list = []
623
+ for col in self.cat_cols:
624
+ # 使用训练时记录下来的 categories
625
+ categories = self.cat_categories[col]
626
+ # 用固定的 categories 构造 Categorical
627
+ cats = pd.Categorical(X[col], categories=categories)
628
+ codes = cats.codes.astype('int64', copy=True) # -1 表示未知或缺失
629
+ # 未知 / 缺失 映射到“未知类 bucket”,索引 = len(categories)
630
+ codes[codes < 0] = len(categories)
631
+ X_cat_list.append(codes)
632
+
633
+ X_cat_np = np.stack(X_cat_list, axis=1) # (N, num_categorical)
634
+ return X_cat_np
635
+
636
+ def fit(self, X_train, y_train, w_train=None,
637
+ X_val=None, y_val=None, w_val=None):
638
+
639
+ # 第一次 fit 时构建模型结构
640
+ if self.ft is None:
641
+ self._build_model(X_train)
642
+
643
+ # --- 构建训练张量 (全在 CPU,后面 batch 再搬 GPU) ---
644
+ X_num_train = X_train[self.num_cols].to_numpy(
645
+ dtype=np.float32, copy=True)
646
+ X_num_train = torch.tensor(
647
+ X_num_train,
648
+ dtype=torch.float32
649
+ )
650
+
651
+ if self.cat_cols:
652
+ # X_cat_train_list = []
653
+ # for col in self.cat_cols:
654
+ # cats = X_train[col].astype('category')
655
+ # codes = np.array(cats.cat.codes, dtype='int64',
656
+ # copy=True) # -1 表示缺失
657
+ # # 把缺失映射到最后一类(cardinality 之外)
658
+ # codes[codes < 0] = cats.cat.categories.size
659
+ # X_cat_train_list.append(codes)
660
+ # X_cat_train = torch.tensor(
661
+ # np.vstack(X_cat_train_list).T, # (N, num_categorical)
662
+ # dtype=torch.long
663
+ # )
664
+ X_cat_train_np = self._encode_cats(X_train)
665
+ X_cat_train = torch.tensor(X_cat_train_np, dtype=torch.long)
666
+ else:
667
+ X_cat_train = torch.zeros(
668
+ (X_num_train.size(0), 0), dtype=torch.long)
669
+
670
+ y_tensor = torch.tensor(
671
+ y_train.values,
672
+ dtype=torch.float32
673
+ ).view(-1, 1)
674
+
675
+ if w_train is not None:
676
+ w_tensor = torch.tensor(
677
+ w_train.values,
678
+ dtype=torch.float32
679
+ ).view(-1, 1)
680
+ else:
681
+ w_tensor = torch.ones_like(y_tensor)
682
+
683
+ # --- 验证集张量 (后面一次性搬到 device) ---
684
+ has_val = X_val is not None and y_val is not None
685
+ if has_val:
686
+ # ---------- 数值特征 ----------
687
+ X_num_val_np = X_val[self.num_cols].to_numpy(
688
+ dtype=np.float32, copy=True)
689
+ X_num_val = torch.tensor(X_num_val_np, dtype=torch.float32)
690
+
691
+ # ---------- 类别特征 ----------
692
+ if self.cat_cols:
693
+ # X_cat_val_list = []
694
+ # for col in self.cat_cols:
695
+ # cats = X_val[col].astype('category')
696
+ # codes = np.array(cats.cat.codes, dtype='int64', copy=True)
697
+ # codes[codes < 0] = cats.cat.categories.size
698
+ # X_cat_val_list.append(codes)
699
+ #
700
+ # X_cat_val_np = np.stack(
701
+ # X_cat_val_list, axis=1).astype('int64', copy=False)
702
+ # X_cat_val = torch.tensor(X_cat_val_np, dtype=torch.long)
703
+ X_cat_val_np = self._encode_cats(X_val)
704
+ X_cat_val = torch.tensor(X_cat_val_np, dtype=torch.long)
705
+ else:
706
+ X_cat_val = torch.zeros(
707
+ (X_num_val.shape[0], 0), dtype=torch.long)
708
+
709
+ # ---------- 目标 & 权重 ----------
710
+ y_val_np = y_val.values.astype(np.float32, copy=True)
711
+ y_val_tensor = torch.tensor(
712
+ y_val_np, dtype=torch.float32).view(-1, 1)
713
+
714
+ if w_val is not None:
715
+ w_val_np = w_val.values.astype(np.float32, copy=True)
716
+ w_val_tensor = torch.tensor(
717
+ w_val_np, dtype=torch.float32).view(-1, 1)
718
+ else:
719
+ w_val_tensor = torch.ones_like(y_val_tensor)
720
+
721
+ else:
722
+ X_num_val = X_cat_val = y_val_tensor = w_val_tensor = None
723
+
724
+ # --- DataLoader ---
725
+ dataset = TabularDataset(
726
+ X_num_train, X_cat_train, y_tensor, w_tensor
727
+ )
728
+
729
+ batch_size = max(
730
+ 32,
731
+ int((self.learning_rate / 1e-4) ** 0.5 *
732
+ (X_train.shape[0] / self.batch_num))
733
+ )
734
+
735
+ dataloader = DataLoader(
736
+ dataset,
737
+ batch_size=batch_size,
738
+ shuffle=True,
739
+ num_workers=1,
740
+ pin_memory=(self.device.type == 'cuda')
741
+ )
742
+
743
+ # --- 优化器 & AMP ---
744
+ optimizer = torch.optim.Adam(
745
+ self.ft.parameters(),
746
+ lr=self.learning_rate
747
+ )
748
+ scaler = GradScaler(enabled=(self.device.type == 'cuda'))
749
+
750
+ # --- Early stopping ---
751
+ best_loss = float('inf')
752
+ patience_counter = 0
753
+ best_model_state = None
754
+
755
+ # 验证集整体搬到 device(如果存在)
756
+ if has_val:
757
+ X_num_val_dev = X_num_val.to(self.device, non_blocking=True)
758
+ X_cat_val_dev = X_cat_val.to(self.device, non_blocking=True)
759
+ y_val_dev = y_val_tensor.to(self.device, non_blocking=True)
760
+ w_val_dev = w_val_tensor.to(self.device, non_blocking=True)
761
+
762
+ # --- 训练循环 ---
763
+ for epoch in range(1, self.epochs + 1):
764
+ self.ft.train()
765
+ for X_num_b, X_cat_b, y_b, w_b in dataloader:
766
+ optimizer.zero_grad()
767
+
768
+ X_num_b = X_num_b.to(self.device, non_blocking=True)
769
+ X_cat_b = X_cat_b.to(self.device, non_blocking=True)
770
+ y_b = y_b.to(self.device, non_blocking=True)
771
+ w_b = w_b.to(self.device, non_blocking=True)
772
+
773
+ with autocast(enabled=(self.device.type == 'cuda')):
774
+ y_pred = self.ft(X_num_b, X_cat_b)
775
+ y_pred = torch.clamp(y_pred, min=1e-6)
776
+
777
+ losses = tweedie_loss(
778
+ y_pred, y_b, p=self.tw_power
779
+ ).view(-1)
780
+
781
+ weighted_loss = (losses * w_b.view(-1)).sum() / w_b.sum()
782
+
783
+ scaler.scale(weighted_loss).backward()
784
+
785
+ if self.device.type == 'cuda':
786
+ scaler.unscale_(optimizer)
787
+ clip_grad_norm_(self.ft.parameters(), max_norm=1.0)
788
+
789
+ scaler.step(optimizer)
790
+ scaler.update()
791
+
792
+ # --- 验证 & early stopping ---
793
+ if has_val:
794
+ self.ft.eval()
795
+ with torch.no_grad(), autocast(enabled=(self.device.type == 'cuda')):
796
+ y_val_pred = self.ft(X_num_val_dev, X_cat_val_dev)
797
+ y_val_pred = torch.clamp(y_val_pred, min=1e-6)
798
+
799
+ val_losses = tweedie_loss(
800
+ y_val_pred, y_val_dev, p=self.tw_power
801
+ ).view(-1)
802
+
803
+ val_weighted_loss = (
804
+ val_losses * w_val_dev.view(-1)
805
+ ).sum() / w_val_dev.sum()
806
+
807
+ if val_weighted_loss < best_loss:
808
+ best_loss = val_weighted_loss
809
+ patience_counter = 0
810
+ best_model_state = copy.deepcopy(self.ft.state_dict())
811
+ else:
812
+ patience_counter += 1
813
+
814
+ if patience_counter >= self.patience and best_model_state is not None:
815
+ self.ft.load_state_dict(best_model_state)
816
+ break
817
+
818
+ def predict(self, X_test):
819
+ # X_test: DataFrame,包含 num_cols + cat_cols
820
+
821
+ self.ft.eval()
822
+ X_num = X_test[self.num_cols].to_numpy(dtype=np.float32, copy=True)
823
+ X_num = torch.tensor(
824
+ X_num,
825
+ dtype=torch.float32
826
+ )
827
+ if self.cat_cols:
828
+ # X_cat_list = []
829
+ # for col in self.cat_cols:
830
+ # cats = X_test[col].astype('category')
831
+ # codes = np.array(cats.cat.codes, dtype='int64',
832
+ # copy=True) # -1 表示缺失
833
+ # codes[codes < 0] = cats.cat.categories.size
834
+ # X_cat_list.append(codes)
835
+ # X_cat = torch.tensor(
836
+ # np.vstack(X_cat_list).T,
837
+ # dtype=torch.long
838
+ # )
839
+ X_cat_np = self._encode_cats(X_test)
840
+ X_cat = torch.tensor(X_cat_np, dtype=torch.long)
841
+ else:
842
+ X_cat = torch.zeros((X_num.size(0), 0), dtype=torch.long)
843
+
844
+ with torch.no_grad():
845
+ X_num = X_num.to(self.device, non_blocking=True)
846
+ X_cat = X_cat.to(self.device, non_blocking=True)
847
+ y_pred = self.ft(X_num, X_cat).cpu().numpy()
848
+
849
+ y_pred = np.clip(y_pred, 1e-6, None)
850
+ return y_pred.ravel()
851
+
852
+ def set_params(self, params: dict):
853
+
854
+ # 和 sklearn 风格保持一致。
855
+ # 注意:对结构性参数(如 d_model/n_heads)修改后,需要重新 fit 才会生效。
856
+
857
+ for key, value in params.items():
858
+ if hasattr(self, key):
859
+ setattr(self, key, value)
860
+ else:
861
+ raise ValueError(f"Parameter {key} not found in model.")
862
+ return self
863
+
864
+
865
+ # 定义贝叶斯优化模型类,包含XGBoost和ResNet模型
866
+
867
+ class BayesOptModel:
868
+ def __init__(self, train_data, test_data,
869
+ model_nme, resp_nme, weight_nme, factor_nmes,
870
+ cate_list=[], prop_test=0.25, rand_seed=None, epochs=100):
871
+ # 初始化数据
872
+ # train_data: 训练数据, test_data: 测试数据 格式需为DataFrame
873
+ # model_nme: 模型名称
874
+ # resp_nme: 因变量名称, weight_nme: 权重名称
875
+ # factor_nmes: 因子名称列表, space_params: 参数空间
876
+ # cate_list: 类别变量列表
877
+ # prop_test: 测试集比例, rand_seed
878
+ self.train_data = train_data
879
+ self.test_data = test_data
880
+ self.resp_nme = resp_nme
881
+ self.weight_nme = weight_nme
882
+ self.train_data.loc[:, 'w_act'] = self.train_data[self.resp_nme] * \
883
+ self.train_data[self.weight_nme]
884
+ self.test_data.loc[:, 'w_act'] = self.test_data[self.resp_nme] * \
885
+ self.test_data[self.weight_nme]
886
+ self.factor_nmes = factor_nmes
887
+ self.cate_list = cate_list
888
+ self.rand_seed = rand_seed if rand_seed is not None else np.random.randint(
889
+ 1, 10000)
890
+ if self.cate_list != []:
891
+ for cate in self.cate_list:
892
+ self.train_data[cate] = self.train_data[cate].astype(
893
+ 'category')
894
+ self.test_data[cate] = self.test_data[cate].astype('category')
895
+ self.prop_test = prop_test
896
+ self.cv = ShuffleSplit(n_splits=int(1/self.prop_test),
897
+ test_size=self.prop_test,
898
+ random_state=self.rand_seed)
899
+ self.model_nme = model_nme
900
+ if self.model_nme.find('f') != -1:
901
+ self.obj = 'count:poisson'
902
+ elif self.model_nme.find('s') != -1:
903
+ self.obj = 'reg:gamma'
904
+ elif self.model_nme.find('bc') != -1:
905
+ self.obj = 'reg:tweedie'
906
+ self.fit_params = {
907
+ 'sample_weight': self.train_data[self.weight_nme].values
908
+ }
909
+ self.num_features = [
910
+ nme for nme in self.factor_nmes if nme not in self.cate_list]
911
+ self.train_oht_scl_data = self.train_data[self.factor_nmes +
912
+ [self.weight_nme]+[self.resp_nme]].copy()
913
+ self.test_oht_scl_data = self.test_data[self.factor_nmes +
914
+ [self.weight_nme]+[self.resp_nme]].copy()
915
+ self.train_oht_scl_data = pd.get_dummies(
916
+ self.train_oht_scl_data,
917
+ columns=self.cate_list,
918
+ drop_first=True,
919
+ dtype=np.int8
920
+ )
921
+ self.test_oht_scl_data = pd.get_dummies(
922
+ self.test_oht_scl_data,
923
+ columns=self.cate_list,
924
+ drop_first=True,
925
+ dtype=np.int8
926
+ )
927
+ for num_chr in self.num_features:
928
+ scaler = StandardScaler()
929
+ self.train_oht_scl_data[num_chr] = scaler.fit_transform(
930
+ self.train_oht_scl_data[num_chr].values.reshape(-1, 1))
931
+ self.test_oht_scl_data[num_chr] = scaler.transform(
932
+ self.test_oht_scl_data[num_chr].values.reshape(-1, 1))
933
+ # 对测试集进行列对齐
934
+ self.test_oht_scl_data = self.test_oht_scl_data.reindex(
935
+ columns=self.train_oht_scl_data.columns,
936
+ fill_value=0
937
+ )
938
+ self.var_nmes = list(
939
+ set(list(self.train_oht_scl_data.columns)) -
940
+ set([self.weight_nme, self.resp_nme])
941
+ )
942
+ self.epochs = epochs
943
+ self.model_label = []
944
+
945
+ # 定义单因素画图函数
946
+ def plot_oneway(self, n_bins=10):
947
+ for c in self.factor_nmes:
948
+ fig = plt.figure(figsize=(7, 5))
949
+ if c in self.cate_list:
950
+ strs = c
951
+ else:
952
+ strs = c+'_bins'
953
+ self.train_data.loc[:, strs] = pd.qcut(
954
+ self.train_data[c],
955
+ n_bins,
956
+ duplicates='drop'
957
+ )
958
+ plot_data = self.train_data.groupby(
959
+ [strs], observed=True).sum(numeric_only=True)
960
+ plot_data.reset_index(inplace=True)
961
+ plot_data['act_v'] = plot_data['w_act'] / \
962
+ plot_data[self.weight_nme]
963
+ plot_data.head()
964
+ ax = fig.add_subplot(111)
965
+ ax.plot(plot_data.index, plot_data['act_v'],
966
+ label='Actual', color='red')
967
+ ax.set_title(
968
+ 'Analysis of %s : Train Data' % strs,
969
+ fontsize=8)
970
+ plt.xticks(plot_data.index,
971
+ list(plot_data[strs].astype(str)),
972
+ rotation=90)
973
+ if len(list(plot_data[strs].astype(str))) > 50:
974
+ plt.xticks(fontsize=3)
975
+ else:
976
+ plt.xticks(fontsize=6)
977
+ plt.yticks(fontsize=6)
978
+ ax2 = ax.twinx()
979
+ ax2.bar(plot_data.index,
980
+ plot_data[self.weight_nme],
981
+ alpha=0.5, color='seagreen')
982
+ plt.yticks(fontsize=6)
983
+ plt.margins(0.05)
984
+ plt.subplots_adjust(wspace=0.3)
985
+ save_path = os.path.join(
986
+ os.getcwd(), 'plot',
987
+ f'00_{self.model_nme}_{strs}_oneway.png')
988
+ plt.savefig(save_path, dpi=300)
989
+ plt.close(fig)
990
+
991
+ # Xgboost交叉验证函数
992
+ def cross_val_xgb(self, trial):
993
+ learning_rate = trial.suggest_float(
994
+ 'learning_rate', 1e-5, 1e-1, log=True)
995
+ gamma = trial.suggest_float(
996
+ 'gamma', 0, 10000)
997
+ max_depth = trial.suggest_int(
998
+ 'max_depth', 3, 25)
999
+ n_estimators = trial.suggest_int(
1000
+ 'n_estimators', 10, 500, step=10)
1001
+ min_child_weight = trial.suggest_int(
1002
+ 'min_child_weight', 100, 10000, step=100)
1003
+ reg_alpha = trial.suggest_float(
1004
+ 'reg_alpha', 1e-10, 1, log=True)
1005
+ reg_lambda = trial.suggest_float(
1006
+ 'reg_lambda', 1e-10, 1, log=True)
1007
+ if self.obj == 'reg:tweedie':
1008
+ tweedie_variance_power = trial.suggest_float(
1009
+ 'tweedie_variance_power', 1, 2)
1010
+ elif self.obj == 'count:poisson':
1011
+ tweedie_variance_power = 1
1012
+ elif self.obj == 'reg:gamma':
1013
+ tweedie_variance_power = 2
1014
+ clf = xgb.XGBRegressor(
1015
+ objective=self.obj,
1016
+ random_state=self.rand_seed,
1017
+ subsample=0.9,
1018
+ tree_method='gpu_hist' if torch.cuda.is_available() else 'hist',
1019
+ gpu_id=0,
1020
+ enable_categorical=True,
1021
+ predictor='gpu_predictor' if torch.cuda.is_available() else 'cpu_predictor'
1022
+ )
1023
+ params = {
1024
+ 'learning_rate': learning_rate,
1025
+ 'gamma': gamma,
1026
+ 'max_depth': max_depth,
1027
+ 'n_estimators': n_estimators,
1028
+ 'min_child_weight': min_child_weight,
1029
+ 'reg_alpha': reg_alpha,
1030
+ 'reg_lambda': reg_lambda
1031
+ }
1032
+ if self.obj == 'reg:tweedie':
1033
+ params['tweedie_variance_power'] = tweedie_variance_power
1034
+ clf.set_params(**params)
1035
+ acc = cross_val_score(
1036
+ clf,
1037
+ self.train_data[self.factor_nmes],
1038
+ self.train_data[self.resp_nme].values,
1039
+ fit_params=self.fit_params,
1040
+ cv=self.cv,
1041
+ scoring=make_scorer(
1042
+ mean_tweedie_deviance,
1043
+ power=tweedie_variance_power,
1044
+ greater_is_better=False),
1045
+ error_score='raise',
1046
+ n_jobs=int(1/self.prop_test)).mean()
1047
+ return -acc
1048
+
1049
+ # 定义Xgboost贝叶斯优化函数
1050
+ def bayesopt_xgb(self, max_evals=100):
1051
+ study = optuna.create_study(
1052
+ direction='minimize',
1053
+ sampler=optuna.samplers.TPESampler(seed=self.rand_seed))
1054
+ study.optimize(self.cross_val_xgb, n_trials=max_evals)
1055
+ self.best_xgb_params = study.best_params
1056
+ pd.DataFrame(self.best_xgb_params, index=[0]).to_csv(
1057
+ os.getcwd() + '/Results/' + self.model_nme + '_bestparams_xgb.csv')
1058
+ self.best_xgb_trial = study.best_trial
1059
+ self.xgb_best = xgb.XGBRegressor(
1060
+ objective=self.obj,
1061
+ random_state=self.rand_seed,
1062
+ subsample=0.9,
1063
+ tree_method='gpu_hist' if torch.cuda.is_available() else 'hist',
1064
+ gpu_id=0,
1065
+ enable_categorical=True,
1066
+ predictor='gpu_predictor' if torch.cuda.is_available() else 'cpu_predictor'
1067
+ )
1068
+ self.xgb_best.set_params(**self.best_xgb_params)
1069
+ self.xgb_best.fit(self.train_data[self.factor_nmes],
1070
+ self.train_data[self.resp_nme].values,
1071
+ **self.fit_params)
1072
+ self.model_label += ['Xgboost']
1073
+ self.train_data['pred_xgb'] = self.xgb_best.predict(
1074
+ self.train_data[self.factor_nmes])
1075
+ self.test_data['pred_xgb'] = self.xgb_best.predict(
1076
+ self.test_data[self.factor_nmes])
1077
+ self.train_data.loc[:, 'w_pred_xgb'] = self.train_data['pred_xgb'] * \
1078
+ self.train_data[self.weight_nme]
1079
+ self.test_data.loc[:, 'w_pred_xgb'] = self.test_data['pred_xgb'] * \
1080
+ self.test_data[self.weight_nme]
1081
+
1082
+ # ResNet交叉验证函数
1083
+ def cross_val_resn(self, trial):
1084
+
1085
+ learning_rate = trial.suggest_float(
1086
+ 'learning_rate', 1e-6, 1e-2, log=True) # 较低learning rate为了保证不会出险梯度爆炸
1087
+ hidden_dim = trial.suggest_int(
1088
+ 'hidden_dim', 32, 256, step=32)
1089
+ block_num = trial.suggest_int(
1090
+ 'block_num', 2, 10)
1091
+ batch_num = trial.suggest_int(
1092
+ 'batch_num',
1093
+ 10 if self.obj == 'reg:gamma' else 100,
1094
+ 100 if self.obj == 'reg:gamma' else 1000,
1095
+ step=10 if self.obj == 'reg:gamma' else 100)
1096
+ if self.obj == 'reg:tweedie':
1097
+ tw_power = trial.suggest_float(
1098
+ 'tw_power', 1, 2.0)
1099
+ elif self.obj == 'count:poisson':
1100
+ tw_power = 1
1101
+ elif self.obj == 'reg:gamma':
1102
+ tw_power = 2
1103
+ loss = 0
1104
+ for fold, (train_idx, test_idx) in enumerate(self.cv.split(self.train_oht_scl_data[self.var_nmes])):
1105
+ # 创建模型
1106
+ cv_net = ResNetScikitLearn(
1107
+ model_nme=self.model_nme,
1108
+ input_dim=self.train_oht_scl_data[self.var_nmes].shape[1],
1109
+ epochs=self.epochs,
1110
+ learning_rate=learning_rate,
1111
+ hidden_dim=hidden_dim,
1112
+ block_num=block_num,
1113
+ # 保证权重方差不变
1114
+ batch_num=batch_num,
1115
+ tweedie_power=tw_power
1116
+ # 再此可以调整normlayer,dropout,residual_scale等参数
1117
+ )
1118
+ # 训练模型
1119
+ cv_net.fit(
1120
+ self.train_oht_scl_data[self.var_nmes].iloc[train_idx],
1121
+ self.train_oht_scl_data[self.resp_nme].iloc[train_idx],
1122
+ self.train_oht_scl_data[self.weight_nme].iloc[train_idx],
1123
+ self.train_oht_scl_data[self.var_nmes].iloc[test_idx],
1124
+ self.train_oht_scl_data[self.resp_nme].iloc[test_idx],
1125
+ self.train_oht_scl_data[self.weight_nme].iloc[test_idx]
1126
+ )
1127
+ # 预测
1128
+ y_pred_fold = cv_net.predict(
1129
+ self.train_oht_scl_data[self.var_nmes].iloc[test_idx]
1130
+ )
1131
+ # 计算损失
1132
+ loss += mean_tweedie_deviance(
1133
+ self.train_oht_scl_data[self.resp_nme].iloc[test_idx],
1134
+ y_pred_fold,
1135
+ sample_weight=self.train_oht_scl_data[self.weight_nme].iloc[test_idx],
1136
+ power=tw_power
1137
+ )
1138
+ return loss / int(1/self.prop_test)
1139
+
1140
+ # 定义ResNet贝叶斯优化函数
1141
+ def bayesopt_resnet(self, max_evals=100):
1142
+ study = optuna.create_study(
1143
+ direction='minimize',
1144
+ sampler=optuna.samplers.TPESampler(seed=self.rand_seed))
1145
+ study.optimize(self.cross_val_resn, n_trials=max_evals)
1146
+ self.best_resn_params = study.best_params
1147
+ pd.DataFrame(self.best_resn_params, index=[0]).to_csv(
1148
+ os.getcwd() + '/Results/' + self.model_nme + '_bestparams_resn.csv')
1149
+ self.best_resn_trial = study.best_trial
1150
+ self.resn_best = ResNetScikitLearn(
1151
+ model_nme=self.model_nme,
1152
+ input_dim=self.train_oht_scl_data[self.var_nmes].shape[1]
1153
+ )
1154
+ self.resn_best.set_params(self.best_resn_params)
1155
+ self.resn_best.fit(self.train_oht_scl_data[self.var_nmes],
1156
+ self.train_oht_scl_data[self.resp_nme],
1157
+ self.train_oht_scl_data[self.weight_nme])
1158
+ self.model_label += ['ResNet']
1159
+ self.train_data['pred_resn'] = self.resn_best.predict(
1160
+ self.train_oht_scl_data[self.var_nmes])
1161
+ self.test_data['pred_resn'] = self.resn_best.predict(
1162
+ self.test_oht_scl_data[self.var_nmes])
1163
+ self.train_data.loc[:, 'w_pred_resn'] = self.train_data['pred_resn'] * \
1164
+ self.train_data[self.weight_nme]
1165
+ self.test_data.loc[:, 'w_pred_resn'] = self.test_data['pred_resn'] * \
1166
+ self.test_data[self.weight_nme]
1167
+
1168
+ # FT-Transformer 交叉验证函数
1169
+ def cross_val_ft(self, trial):
1170
+
1171
+ # 学习率
1172
+ learning_rate = trial.suggest_float(
1173
+ 'learning_rate', 1e-5, 1e-2, log=True
1174
+ )
1175
+
1176
+ # Transformer 维度与层数
1177
+ d_model = trial.suggest_int(
1178
+ 'd_model', 32, 128, step=32
1179
+ )
1180
+ n_heads = trial.suggest_categorical(
1181
+ 'n_heads', [2, 4, 8]
1182
+ )
1183
+ n_layers = trial.suggest_int(
1184
+ 'n_layers', 2, 6
1185
+ )
1186
+
1187
+ dropout = trial.suggest_float(
1188
+ 'dropout', 0.0, 0.3
1189
+ )
1190
+
1191
+ batch_num = trial.suggest_int(
1192
+ 'batch_num',
1193
+ 10 if self.obj == 'reg:gamma' else 100,
1194
+ 100 if self.obj == 'reg:gamma' else 1000,
1195
+ step=10 if self.obj == 'reg:gamma' else 100
1196
+ )
1197
+
1198
+ # Tweedie power
1199
+ if self.obj == 'reg:tweedie':
1200
+ tw_power = trial.suggest_float('tw_power', 1.0, 2.0)
1201
+ elif self.obj == 'count:poisson':
1202
+ tw_power = 1.0
1203
+ elif self.obj == 'reg:gamma':
1204
+ tw_power = 2.0
1205
+
1206
+ loss = 0.0
1207
+
1208
+ # 这里注意:FT 使用的是“原始特征”(self.factor_nmes),
1209
+ # 而不是 one-hot 之后的 self.train_oht_scl_data
1210
+ for fold, (train_idx, test_idx) in enumerate(
1211
+ self.cv.split(self.train_data[self.factor_nmes])):
1212
+
1213
+ X_train_fold = self.train_data.iloc[train_idx][self.factor_nmes]
1214
+ y_train_fold = self.train_data.iloc[train_idx][self.resp_nme]
1215
+ w_train_fold = self.train_data.iloc[train_idx][self.weight_nme]
1216
+
1217
+ X_val_fold = self.train_data.iloc[test_idx][self.factor_nmes]
1218
+ y_val_fold = self.train_data.iloc[test_idx][self.resp_nme]
1219
+ w_val_fold = self.train_data.iloc[test_idx][self.weight_nme]
1220
+
1221
+ # 创建 FT-Transformer 模型
1222
+ cv_ft = FTTransformerSklearn(
1223
+ model_nme=self.model_nme,
1224
+ num_cols=self.num_features, # 数值特征列表
1225
+ cat_cols=self.cate_list, # 类别特征列表(需是编码好的整数或category)
1226
+ d_model=d_model,
1227
+ n_heads=n_heads,
1228
+ n_layers=n_layers,
1229
+ dropout=dropout,
1230
+ batch_num=batch_num,
1231
+ epochs=self.epochs,
1232
+ tweedie_power=tw_power,
1233
+ learning_rate=learning_rate,
1234
+ patience=5 # 可以根据需要调整
1235
+ )
1236
+
1237
+ # 训练
1238
+ cv_ft.fit(
1239
+ X_train_fold,
1240
+ y_train_fold,
1241
+ w_train_fold,
1242
+ X_val_fold,
1243
+ y_val_fold,
1244
+ w_val_fold
1245
+ )
1246
+
1247
+ # 预测
1248
+ y_pred_fold = cv_ft.predict(X_val_fold)
1249
+
1250
+ # 计算损失(与 ResNet 一致:mean_tweedie_deviance)
1251
+ loss += mean_tweedie_deviance(
1252
+ y_val_fold,
1253
+ y_pred_fold,
1254
+ sample_weight=w_val_fold,
1255
+ power=tw_power
1256
+ )
1257
+
1258
+ return loss / int(1 / self.prop_test)
1259
+
1260
+ # 定义 FT-Transformer 贝叶斯优化函数
1261
+ def bayesopt_ft(self, max_evals=50):
1262
+ study = optuna.create_study(
1263
+ direction='minimize',
1264
+ sampler=optuna.samplers.TPESampler(seed=self.rand_seed)
1265
+ )
1266
+ study.optimize(self.cross_val_ft, n_trials=max_evals)
1267
+
1268
+ self.best_ft_params = study.best_params
1269
+ pd.DataFrame(self.best_ft_params, index=[0]).to_csv(
1270
+ os.getcwd() + '/Results/' + self.model_nme + '_bestparams_ft.csv'
1271
+ )
1272
+ self.best_ft_trial = study.best_trial
1273
+
1274
+ # 用最优参数重新建一个 FT 模型,在全量训练集上拟合
1275
+ self.ft_best = FTTransformerSklearn(
1276
+ model_nme=self.model_nme,
1277
+ num_cols=self.num_features,
1278
+ cat_cols=self.cate_list
1279
+ )
1280
+ # 设置最优超参
1281
+ self.ft_best.set_params(self.best_ft_params)
1282
+
1283
+ # 全量训练
1284
+ self.ft_best.fit(
1285
+ self.train_data[self.factor_nmes],
1286
+ self.train_data[self.resp_nme],
1287
+ self.train_data[self.weight_nme]
1288
+ )
1289
+
1290
+ # 记录模型标签
1291
+ self.model_label += ['FTTransformer']
1292
+
1293
+ # 训练集预测
1294
+ self.train_data['pred_ft'] = self.ft_best.predict(
1295
+ self.train_data[self.factor_nmes]
1296
+ )
1297
+ # 测试集预测
1298
+ self.test_data['pred_ft'] = self.ft_best.predict(
1299
+ self.test_data[self.factor_nmes]
1300
+ )
1301
+
1302
+ # 加权预测(和 XGB / ResNet 风格一致)
1303
+ self.train_data.loc[:, 'w_pred_ft'] = (
1304
+ self.train_data['pred_ft'] * self.train_data[self.weight_nme]
1305
+ )
1306
+ self.test_data.loc[:, 'w_pred_ft'] = (
1307
+ self.test_data['pred_ft'] * self.test_data[self.weight_nme]
1308
+ )
1309
+
1310
+ # 定义分箱函数
1311
+
1312
+ def _split_data(self, data, col_nme, wgt_nme, n_bins=10):
1313
+ data.sort_values(by=col_nme, ascending=True, inplace=True)
1314
+ data['cum_weight'] = data[wgt_nme].cumsum()
1315
+ w_sum = data[wgt_nme].sum()
1316
+ data.loc[:, 'bins'] = np.floor(
1317
+ data['cum_weight']*float(n_bins)/w_sum)
1318
+ data.loc[(data['bins'] == n_bins), 'bins'] = n_bins-1
1319
+ return data.groupby(['bins'], observed=True).sum(numeric_only=True)
1320
+
1321
+ # 定义Lift Chart绘制数据集函数
1322
+ def _plot_data_lift(self,
1323
+ pred_list, w_pred_list,
1324
+ w_act_list, weight_list, n_bins=10):
1325
+ lift_data = pd.DataFrame()
1326
+ lift_data.loc[:, 'pred'] = pred_list
1327
+ lift_data.loc[:, 'w_pred'] = w_pred_list
1328
+ lift_data.loc[:, 'act'] = w_act_list
1329
+ lift_data.loc[:, 'weight'] = weight_list
1330
+ plot_data = self._split_data(
1331
+ lift_data, 'pred', 'weight', n_bins)
1332
+ plot_data['exp_v'] = plot_data['w_pred'] / plot_data['weight']
1333
+ plot_data['act_v'] = plot_data['act'] / plot_data['weight']
1334
+ plot_data.reset_index(inplace=True)
1335
+ return plot_data
1336
+
1337
+ # 定义lift曲线绘制函数
1338
+ def plot_lift(self, model_label, pred_nme, n_bins=10):
1339
+ # 绘制建模集上结果
1340
+ figpos_list = [121, 122]
1341
+ plot_dict = {
1342
+ 121: self.train_data,
1343
+ 122: self.test_data
1344
+ }
1345
+ name_list = {
1346
+ 121: 'Train Data',
1347
+ 122: 'Test Data'
1348
+ }
1349
+ if model_label == 'Xgboost':
1350
+ pred_nme = 'pred_xgb'
1351
+ elif model_label == 'ResNet':
1352
+ pred_nme = 'pred_resn'
1353
+ elif model_label == 'FTTransformer':
1354
+ pred_nme = 'pred_ft'
1355
+
1356
+ fig = plt.figure(figsize=(11, 5))
1357
+ for figpos in figpos_list:
1358
+ plot_data = self._plot_data_lift(
1359
+ plot_dict[figpos][pred_nme].values,
1360
+ plot_dict[figpos]['w_'+pred_nme].values,
1361
+ plot_dict[figpos]['w_act'].values,
1362
+ plot_dict[figpos][self.weight_nme].values,
1363
+ n_bins)
1364
+ ax = fig.add_subplot(figpos)
1365
+ ax.plot(plot_data.index, plot_data['act_v'],
1366
+ label='Actual', color='red')
1367
+ ax.plot(plot_data.index, plot_data['exp_v'],
1368
+ label='Predicted', color='blue')
1369
+ ax.set_title(
1370
+ 'Lift Chart on %s' % name_list[figpos], fontsize=8)
1371
+ plt.xticks(plot_data.index,
1372
+ plot_data.index,
1373
+ rotation=90, fontsize=6)
1374
+ plt.yticks(fontsize=6)
1375
+ plt.legend(loc='upper left',
1376
+ fontsize=5, frameon=False)
1377
+ plt.margins(0.05)
1378
+ ax2 = ax.twinx()
1379
+ ax2.bar(plot_data.index, plot_data['weight'],
1380
+ alpha=0.5, color='seagreen',
1381
+ label='Earned Exposure')
1382
+ plt.yticks(fontsize=6)
1383
+ plt.legend(loc='upper right',
1384
+ fontsize=5, frameon=False)
1385
+ plt.subplots_adjust(wspace=0.3)
1386
+ save_path = os.path.join(
1387
+ os.getcwd(), 'plot', f'01_{self.model_nme}_{model_label}_lift.png')
1388
+ plt.savefig(save_path, dpi=300)
1389
+ plt.show()
1390
+ plt.close(fig)
1391
+
1392
+ # 定义Double Lift Chart绘制数据集函数
1393
+ def _plot_data_dlift(self,
1394
+ pred_list_model1, pred_list_model2,
1395
+ w_list, w_act_list, n_bins=10):
1396
+ lift_data = pd.DataFrame()
1397
+ lift_data.loc[:, 'pred1'] = pred_list_model1
1398
+ lift_data.loc[:, 'pred2'] = pred_list_model2
1399
+ lift_data.loc[:, 'diff_ly'] = lift_data['pred1'] / lift_data['pred2']
1400
+ lift_data.loc[:, 'act'] = w_act_list
1401
+ lift_data.loc[:, 'weight'] = w_list
1402
+ plot_data = self._split_data(lift_data, 'diff_ly', 'weight', n_bins)
1403
+ plot_data['exp_v1'] = plot_data['pred1'] / plot_data['act']
1404
+ plot_data['exp_v2'] = plot_data['pred2'] / plot_data['act']
1405
+ plot_data['act_v'] = plot_data['act'] / plot_data['act']
1406
+ plot_data.reset_index(inplace=True)
1407
+ return plot_data
1408
+
1409
+ # 定义绘制Double Lift Chart函数
1410
+ def plot_dlift(self, model_comp=['xgb', 'resn'], n_bins=10):
1411
+ # 指标名称
1412
+ # xgb = 'Xgboost'
1413
+ # resn = 'ResNet'
1414
+ # ft = 'FTTransformer'
1415
+ figpos_list = [121, 122]
1416
+ plot_dict = {
1417
+ 121: self.train_data,
1418
+ 122: self.test_data
1419
+ }
1420
+ name_list = {
1421
+ 121: 'Train Data',
1422
+ 122: 'Test Data'
1423
+ }
1424
+ fig = plt.figure(figsize=(11, 5))
1425
+ for figpos in figpos_list:
1426
+ plot_data = self._plot_data_dlift(
1427
+ plot_dict[figpos]['w_pred_'+model_comp[0]].values,
1428
+ plot_dict[figpos]['w_pred_'+model_comp[1]].values,
1429
+ plot_dict[figpos][self.weight_nme].values,
1430
+ plot_dict[figpos]['w_act'].values,
1431
+ n_bins)
1432
+ ax = fig.add_subplot(figpos)
1433
+ tt1 = 'Xgboost'
1434
+ tt2 = 'ResNet'
1435
+ ax.plot(plot_data.index, plot_data['act_v'],
1436
+ label='Actual', color='red')
1437
+ ax.plot(plot_data.index, plot_data['exp_v1'],
1438
+ label=tt1, color='blue')
1439
+ ax.plot(plot_data.index, plot_data['exp_v2'],
1440
+ label=tt2, color='black')
1441
+ ax.set_title(
1442
+ 'Double Lift Chart on %s' % name_list[figpos], fontsize=8)
1443
+ plt.xticks(plot_data.index,
1444
+ plot_data.index,
1445
+ rotation=90, fontsize=6)
1446
+ plt.xlabel('%s / %s' % (tt1, tt2), fontsize=6)
1447
+ plt.yticks(fontsize=6)
1448
+ plt.legend(loc='upper left',
1449
+ fontsize=5, frameon=False)
1450
+ plt.margins(0.1)
1451
+ plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8)
1452
+ ax2 = ax.twinx()
1453
+ ax2.bar(plot_data.index, plot_data['weight'],
1454
+ alpha=0.5, color='seagreen',
1455
+ label='Earned Exposure')
1456
+ plt.yticks(fontsize=6)
1457
+ plt.legend(loc='upper right',
1458
+ fontsize=5, frameon=False)
1459
+ plt.subplots_adjust(wspace=0.3)
1460
+ save_path = os.path.join(
1461
+ os.getcwd(), 'plot', f'02_{self.model_nme}_dlift.png')
1462
+ plt.savefig(save_path, dpi=300)
1463
+ plt.show()
1464
+ plt.close(fig)
1465
+
1466
+ # 保存模型
1467
+
1468
+ def save_model(self, model_name=None):
1469
+
1470
+ # model_name 可以是:
1471
+ # - None: 保存全部可用模型
1472
+ # - 'xgb': 只保存 Xgboost
1473
+ # - 'resn': 只保存 ResNet
1474
+ # - 'ft': 只保存 FT-Transformer
1475
+
1476
+ model_dir = os.path.join(os.getcwd(), 'model')
1477
+ if not os.path.exists(model_dir):
1478
+ os.makedirs(model_dir)
1479
+
1480
+ save_path_xgb = os.path.join(
1481
+ model_dir, f'01_{self.model_nme}_Xgboost.pkl'
1482
+ )
1483
+ save_path_resn = os.path.join(
1484
+ model_dir, f'01_{self.model_nme}_ResNet.pth'
1485
+ )
1486
+ save_path_ft = os.path.join(
1487
+ model_dir, f'01_{self.model_nme}_FTTransformer.pth'
1488
+ )
1489
+
1490
+ # 保存 XGBoost
1491
+ if model_name in (None, 'xgb'):
1492
+ if hasattr(self, 'xgb_best'):
1493
+ joblib.dump(self.xgb_best, save_path_xgb)
1494
+ else:
1495
+ print("[save_model] Warning: xgb_best 不存在,未保存 Xgboost 模型。")
1496
+
1497
+ # 保存 ResNet(只保存核心网络的 state_dict)
1498
+ if model_name in (None, 'resn'):
1499
+ if hasattr(self, 'resn_best'):
1500
+ torch.save(self.resn_best.resnet.state_dict(), save_path_resn)
1501
+ else:
1502
+ print("[save_model] Warning: resn_best 不存在,未保存 ResNet 模型。")
1503
+
1504
+ # 保存 FT-Transformer(直接保存整个 sklearn 风格 wrapper,方便恢复结构和参数)
1505
+ if model_name in (None, 'ft'):
1506
+ if hasattr(self, 'ft_best'):
1507
+ # 这里直接保存整个对象,包含结构和参数、best 超参等
1508
+ torch.save(self.ft_best, save_path_ft)
1509
+ else:
1510
+ print("[save_model] Warning: ft_best 不存在,未保存 FT-Transformer 模型。")
1511
+
1512
+ def load_model(self, model_name=None):
1513
+ # model_name 可以是:
1514
+ # - None: 加载全部能找到的模型
1515
+ # - 'xgb': 只加载 Xgboost
1516
+ # - 'resn': 只加载 ResNet
1517
+ # - 'ft': 只加载 FT-Transformer
1518
+
1519
+ model_dir = os.path.join(os.getcwd(), 'model')
1520
+ save_path_xgb = os.path.join(
1521
+ model_dir, f'01_{self.model_nme}_Xgboost.pkl'
1522
+ )
1523
+ save_path_resn = os.path.join(
1524
+ model_dir, f'01_{self.model_nme}_ResNet.pth'
1525
+ )
1526
+ save_path_ft = os.path.join(
1527
+ model_dir, f'01_{self.model_nme}_FTTransformer.pth'
1528
+ )
1529
+
1530
+ # 加载 XGBoost
1531
+ if model_name in (None, 'xgb'):
1532
+ if os.path.exists(save_path_xgb):
1533
+ self.xgb_load = joblib.load(save_path_xgb)
1534
+ else:
1535
+ print(
1536
+ f"[load_model] Warning: 未找到 Xgboost 模型文件:{save_path_xgb}")
1537
+
1538
+ # 加载 ResNet(重新构建 wrapper,然后加载 state_dict)
1539
+ if model_name in (None, 'resn'):
1540
+ if os.path.exists(save_path_resn):
1541
+ self.resn_load = ResNetScikitLearn(
1542
+ model_nme=self.model_nme,
1543
+ input_dim=self.train_oht_scl_data[self.var_nmes].shape[1]
1544
+ )
1545
+ state_dict = torch.load(
1546
+ save_path_resn, map_location=self.resn_load.device)
1547
+ self.resn_load.resnet.load_state_dict(state_dict)
1548
+ else:
1549
+ print(
1550
+ f"[load_model] Warning: 未找到 ResNet 模型文件:{save_path_resn}")
1551
+
1552
+ # 加载 FT-Transformer(直接反序列化 sklearn 风格 wrapper)
1553
+ if model_name in (None, 'ft'):
1554
+ if os.path.exists(save_path_ft):
1555
+ # 统一用 CPU 先加载,再按当前环境迁移
1556
+ ft_loaded = torch.load(save_path_ft, map_location='cpu')
1557
+ # 根据当前环境设置 device,并把内部 core 模型迁到对应 device
1558
+ if torch.cuda.is_available():
1559
+ ft_loaded.device = torch.device('cuda')
1560
+ elif torch.backends.mps.is_available():
1561
+ ft_loaded.device = torch.device('mps')
1562
+ else:
1563
+ ft_loaded.device = torch.device('cpu')
1564
+ ft_loaded.ft.to(ft_loaded.device)
1565
+
1566
+ self.ft_load = ft_loaded
1567
+ else:
1568
+ print(
1569
+ f"[load_model] Warning: 未找到 FT-Transformer 模型文件:{save_path_ft}")