ins-pricing 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. ins_pricing/README.md +60 -0
  2. ins_pricing/__init__.py +102 -0
  3. ins_pricing/governance/README.md +18 -0
  4. ins_pricing/governance/__init__.py +20 -0
  5. ins_pricing/governance/approval.py +93 -0
  6. ins_pricing/governance/audit.py +37 -0
  7. ins_pricing/governance/registry.py +99 -0
  8. ins_pricing/governance/release.py +159 -0
  9. ins_pricing/modelling/BayesOpt.py +146 -0
  10. ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
  11. ins_pricing/modelling/BayesOpt_entry.py +575 -0
  12. ins_pricing/modelling/BayesOpt_incremental.py +731 -0
  13. ins_pricing/modelling/Explain_Run.py +36 -0
  14. ins_pricing/modelling/Explain_entry.py +539 -0
  15. ins_pricing/modelling/Pricing_Run.py +36 -0
  16. ins_pricing/modelling/README.md +33 -0
  17. ins_pricing/modelling/__init__.py +44 -0
  18. ins_pricing/modelling/bayesopt/__init__.py +98 -0
  19. ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
  20. ins_pricing/modelling/bayesopt/core.py +1476 -0
  21. ins_pricing/modelling/bayesopt/models.py +2196 -0
  22. ins_pricing/modelling/bayesopt/trainers.py +2446 -0
  23. ins_pricing/modelling/bayesopt/utils.py +1021 -0
  24. ins_pricing/modelling/cli_common.py +136 -0
  25. ins_pricing/modelling/explain/__init__.py +55 -0
  26. ins_pricing/modelling/explain/gradients.py +334 -0
  27. ins_pricing/modelling/explain/metrics.py +176 -0
  28. ins_pricing/modelling/explain/permutation.py +155 -0
  29. ins_pricing/modelling/explain/shap_utils.py +146 -0
  30. ins_pricing/modelling/notebook_utils.py +284 -0
  31. ins_pricing/modelling/plotting/__init__.py +45 -0
  32. ins_pricing/modelling/plotting/common.py +63 -0
  33. ins_pricing/modelling/plotting/curves.py +572 -0
  34. ins_pricing/modelling/plotting/diagnostics.py +139 -0
  35. ins_pricing/modelling/plotting/geo.py +362 -0
  36. ins_pricing/modelling/plotting/importance.py +121 -0
  37. ins_pricing/modelling/run_logging.py +133 -0
  38. ins_pricing/modelling/tests/conftest.py +8 -0
  39. ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
  40. ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
  41. ins_pricing/modelling/tests/test_explain.py +56 -0
  42. ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
  43. ins_pricing/modelling/tests/test_graph_cache.py +33 -0
  44. ins_pricing/modelling/tests/test_plotting.py +63 -0
  45. ins_pricing/modelling/tests/test_plotting_library.py +150 -0
  46. ins_pricing/modelling/tests/test_preprocessor.py +48 -0
  47. ins_pricing/modelling/watchdog_run.py +211 -0
  48. ins_pricing/pricing/README.md +44 -0
  49. ins_pricing/pricing/__init__.py +27 -0
  50. ins_pricing/pricing/calibration.py +39 -0
  51. ins_pricing/pricing/data_quality.py +117 -0
  52. ins_pricing/pricing/exposure.py +85 -0
  53. ins_pricing/pricing/factors.py +91 -0
  54. ins_pricing/pricing/monitoring.py +99 -0
  55. ins_pricing/pricing/rate_table.py +78 -0
  56. ins_pricing/production/__init__.py +21 -0
  57. ins_pricing/production/drift.py +30 -0
  58. ins_pricing/production/monitoring.py +143 -0
  59. ins_pricing/production/scoring.py +40 -0
  60. ins_pricing/reporting/README.md +20 -0
  61. ins_pricing/reporting/__init__.py +11 -0
  62. ins_pricing/reporting/report_builder.py +72 -0
  63. ins_pricing/reporting/scheduler.py +45 -0
  64. ins_pricing/setup.py +41 -0
  65. ins_pricing v2/__init__.py +23 -0
  66. ins_pricing v2/governance/__init__.py +20 -0
  67. ins_pricing v2/governance/approval.py +93 -0
  68. ins_pricing v2/governance/audit.py +37 -0
  69. ins_pricing v2/governance/registry.py +99 -0
  70. ins_pricing v2/governance/release.py +159 -0
  71. ins_pricing v2/modelling/Explain_Run.py +36 -0
  72. ins_pricing v2/modelling/Pricing_Run.py +36 -0
  73. ins_pricing v2/modelling/__init__.py +151 -0
  74. ins_pricing v2/modelling/cli_common.py +141 -0
  75. ins_pricing v2/modelling/config.py +249 -0
  76. ins_pricing v2/modelling/config_preprocess.py +254 -0
  77. ins_pricing v2/modelling/core.py +741 -0
  78. ins_pricing v2/modelling/data_container.py +42 -0
  79. ins_pricing v2/modelling/explain/__init__.py +55 -0
  80. ins_pricing v2/modelling/explain/gradients.py +334 -0
  81. ins_pricing v2/modelling/explain/metrics.py +176 -0
  82. ins_pricing v2/modelling/explain/permutation.py +155 -0
  83. ins_pricing v2/modelling/explain/shap_utils.py +146 -0
  84. ins_pricing v2/modelling/features.py +215 -0
  85. ins_pricing v2/modelling/model_manager.py +148 -0
  86. ins_pricing v2/modelling/model_plotting.py +463 -0
  87. ins_pricing v2/modelling/models.py +2203 -0
  88. ins_pricing v2/modelling/notebook_utils.py +294 -0
  89. ins_pricing v2/modelling/plotting/__init__.py +45 -0
  90. ins_pricing v2/modelling/plotting/common.py +63 -0
  91. ins_pricing v2/modelling/plotting/curves.py +572 -0
  92. ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
  93. ins_pricing v2/modelling/plotting/geo.py +362 -0
  94. ins_pricing v2/modelling/plotting/importance.py +121 -0
  95. ins_pricing v2/modelling/run_logging.py +133 -0
  96. ins_pricing v2/modelling/tests/conftest.py +8 -0
  97. ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
  98. ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
  99. ins_pricing v2/modelling/tests/test_explain.py +56 -0
  100. ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
  101. ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
  102. ins_pricing v2/modelling/tests/test_plotting.py +63 -0
  103. ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
  104. ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
  105. ins_pricing v2/modelling/trainers.py +2447 -0
  106. ins_pricing v2/modelling/utils.py +1020 -0
  107. ins_pricing v2/modelling/watchdog_run.py +211 -0
  108. ins_pricing v2/pricing/__init__.py +27 -0
  109. ins_pricing v2/pricing/calibration.py +39 -0
  110. ins_pricing v2/pricing/data_quality.py +117 -0
  111. ins_pricing v2/pricing/exposure.py +85 -0
  112. ins_pricing v2/pricing/factors.py +91 -0
  113. ins_pricing v2/pricing/monitoring.py +99 -0
  114. ins_pricing v2/pricing/rate_table.py +78 -0
  115. ins_pricing v2/production/__init__.py +21 -0
  116. ins_pricing v2/production/drift.py +30 -0
  117. ins_pricing v2/production/monitoring.py +143 -0
  118. ins_pricing v2/production/scoring.py +40 -0
  119. ins_pricing v2/reporting/__init__.py +11 -0
  120. ins_pricing v2/reporting/report_builder.py +72 -0
  121. ins_pricing v2/reporting/scheduler.py +45 -0
  122. ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
  123. ins_pricing v2/scripts/Explain_entry.py +545 -0
  124. ins_pricing v2/scripts/__init__.py +1 -0
  125. ins_pricing v2/scripts/train.py +568 -0
  126. ins_pricing v2/setup.py +55 -0
  127. ins_pricing v2/smoke_test.py +28 -0
  128. ins_pricing-0.1.6.dist-info/METADATA +78 -0
  129. ins_pricing-0.1.6.dist-info/RECORD +169 -0
  130. ins_pricing-0.1.6.dist-info/WHEEL +5 -0
  131. ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
  132. user_packages/__init__.py +105 -0
  133. user_packages legacy/BayesOpt.py +5659 -0
  134. user_packages legacy/BayesOpt_entry.py +513 -0
  135. user_packages legacy/BayesOpt_incremental.py +685 -0
  136. user_packages legacy/Pricing_Run.py +36 -0
  137. user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
  138. user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
  139. user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
  140. user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
  141. user_packages legacy/Try/BayesOpt legacy.py +3280 -0
  142. user_packages legacy/Try/BayesOpt.py +838 -0
  143. user_packages legacy/Try/BayesOptAll.py +1569 -0
  144. user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
  145. user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
  146. user_packages legacy/Try/BayesOptSearch.py +830 -0
  147. user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
  148. user_packages legacy/Try/BayesOptV1.py +1911 -0
  149. user_packages legacy/Try/BayesOptV10.py +2973 -0
  150. user_packages legacy/Try/BayesOptV11.py +3001 -0
  151. user_packages legacy/Try/BayesOptV12.py +3001 -0
  152. user_packages legacy/Try/BayesOptV2.py +2065 -0
  153. user_packages legacy/Try/BayesOptV3.py +2209 -0
  154. user_packages legacy/Try/BayesOptV4.py +2342 -0
  155. user_packages legacy/Try/BayesOptV5.py +2372 -0
  156. user_packages legacy/Try/BayesOptV6.py +2759 -0
  157. user_packages legacy/Try/BayesOptV7.py +2832 -0
  158. user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
  159. user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
  160. user_packages legacy/Try/BayesOptV9.py +2927 -0
  161. user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
  162. user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
  163. user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
  164. user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
  165. user_packages legacy/Try/xgbbayesopt.py +523 -0
  166. user_packages legacy/__init__.py +19 -0
  167. user_packages legacy/cli_common.py +124 -0
  168. user_packages legacy/notebook_utils.py +228 -0
  169. user_packages legacy/watchdog_run.py +202 -0
@@ -0,0 +1,1911 @@
1
+ # 数据在CPU和GPU之间传输会带来较大开销,但可以多CUDA流同时传输数据和计算,从而实现更大数据集的操作。
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ from random import sample
6
+ from re import X
7
+ from turtle import st
8
+ from uuid import RESERVED_FUTURE
9
+ import numpy as np # 1.26.2
10
+ import pandas as pd # 2.2.3
11
+ import torch # 1.10.1+cu111
12
+ import torch.nn as nn
13
+ import torch.nn.functional as F
14
+ import optuna # 4.3.0
15
+ import xgboost as xgb # 1.7.0
16
+ import matplotlib.pyplot as plt
17
+ import os
18
+ import joblib
19
+ import copy
20
+ import shap
21
+ import math
22
+ import gc
23
+
24
+ from torch.utils.data import Dataset, DataLoader, TensorDataset
25
+ from torch.cuda.amp import autocast, GradScaler
26
+ from torch.nn.utils import clip_grad_norm_
27
+ from sklearn.model_selection import ShuffleSplit, cross_val_score # 1.2.2
28
+ from sklearn.preprocessing import StandardScaler
29
+ from sklearn.metrics import make_scorer, mean_tweedie_deviance
30
+
31
+
32
+ def ensure_parent_dir(file_path: str) -> None:
33
+ """Create parent directory for the provided file path if it is missing."""
34
+ directory = os.path.dirname(file_path)
35
+ if directory:
36
+ os.makedirs(directory, exist_ok=True)
37
+
38
+
39
+ # 定义torch下tweedie deviance损失函数
40
+ # 参考:https://scikit-learn.org/stable/modules/model_evaluation.html#mean-poisson-gamma-and-tweedie-deviances
41
+
42
+
43
+ def tweedie_loss(pred, target, p=1.5, eps=1e-6, max_clip=1e6):
44
+ # Ensure predictions are positive for stability
45
+ pred_clamped = torch.clamp(pred, min=eps)
46
+ # Compute Tweedie deviance components
47
+ if p == 1:
48
+ # Poisson case
49
+ term1 = target * torch.log(target / pred_clamped + eps)
50
+ term2 = -target + pred_clamped
51
+ term3 = 0
52
+ elif p == 0:
53
+ # Gaussian case
54
+ term1 = 0.5 * torch.pow(target - pred_clamped, 2)
55
+ term2 = 0
56
+ term3 = 0
57
+ elif p == 2:
58
+ # Gamma case
59
+ term1 = torch.log(pred_clamped / target + eps)
60
+ term2 = -target / pred_clamped + 1
61
+ term3 = 0
62
+ else:
63
+ term1 = torch.pow(target, 2 - p) / ((1 - p) * (2 - p))
64
+ term2 = target * torch.pow(pred_clamped, 1 - p) / (1 - p)
65
+ term3 = torch.pow(pred_clamped, 2 - p) / (2 - p)
66
+ # Tweedie negative log-likelihood (up to a constant)
67
+ return torch.nan_to_num(2 * (term1 - term2 + term3), nan=eps, posinf=max_clip, neginf=-max_clip)
68
+
69
+ # 定义释放CUDA内存函数
70
+
71
+
72
+ def free_cuda():
73
+ print(">>> Moving all models to CPU...")
74
+ for obj in gc.get_objects():
75
+ try:
76
+ if hasattr(obj, "to") and callable(obj.to):
77
+ # skip torch.device
78
+ obj.to("cpu")
79
+ except:
80
+ pass
81
+
82
+ print(">>> Deleting tensors, optimizers, dataloaders...")
83
+ gc.collect()
84
+
85
+ print(">>> Emptying CUDA cache...")
86
+ torch.cuda.empty_cache()
87
+ torch.cuda.synchronize()
88
+
89
+ print(">>> CUDA memory freed.")
90
+
91
+
92
+ # 定义分箱函数
93
+
94
+
95
+ def split_data(data, col_nme, wgt_nme, n_bins=10):
96
+ data.sort_values(by=col_nme, ascending=True, inplace=True)
97
+ data['cum_weight'] = data[wgt_nme].cumsum()
98
+ w_sum = data[wgt_nme].sum()
99
+ data.loc[:, 'bins'] = np.floor(data['cum_weight'] * float(n_bins) / w_sum)
100
+ data.loc[(data['bins'] == n_bins), 'bins'] = n_bins - 1
101
+ return data.groupby(['bins'], observed=True).sum(numeric_only=True)
102
+
103
+ # 定义Lift Chart绘制函数
104
+
105
+
106
+ def plot_lift_list(pred_model, w_pred_list, w_act_list,
107
+ weight_list, tgt_nme, n_bins=10,
108
+ fig_nme='Lift Chart'):
109
+ lift_data = pd.DataFrame()
110
+ lift_data.loc[:, 'pred'] = pred_model
111
+ lift_data.loc[:, 'w_pred'] = w_pred_list
112
+ lift_data.loc[:, 'act'] = w_act_list
113
+ lift_data.loc[:, 'weight'] = weight_list
114
+ plot_data = split_data(lift_data, 'pred', 'weight', n_bins)
115
+ plot_data['exp_v'] = plot_data['w_pred'] / plot_data['weight']
116
+ plot_data['act_v'] = plot_data['act'] / plot_data['weight']
117
+ plot_data.reset_index(inplace=True)
118
+ fig = plt.figure(figsize=(7, 5))
119
+ ax = fig.add_subplot(111)
120
+ ax.plot(plot_data.index, plot_data['act_v'],
121
+ label='Actual', color='red')
122
+ ax.plot(plot_data.index, plot_data['exp_v'],
123
+ label='Predicted', color='blue')
124
+ ax.set_title(
125
+ 'Lift Chart of %s' % tgt_nme, fontsize=8)
126
+ plt.xticks(plot_data.index,
127
+ plot_data.index,
128
+ rotation=90, fontsize=6)
129
+ plt.yticks(fontsize=6)
130
+ plt.legend(loc='upper left',
131
+ fontsize=5, frameon=False)
132
+ plt.margins(0.05)
133
+ ax2 = ax.twinx()
134
+ ax2.bar(plot_data.index, plot_data['weight'],
135
+ alpha=0.5, color='seagreen',
136
+ label='Earned Exposure')
137
+ plt.yticks(fontsize=6)
138
+ plt.legend(loc='upper right',
139
+ fontsize=5, frameon=False)
140
+ plt.subplots_adjust(wspace=0.3)
141
+ save_path = os.path.join(
142
+ os.getcwd(), 'plot', f'05_{tgt_nme}_{fig_nme}.png')
143
+ ensure_parent_dir(save_path)
144
+ plt.savefig(save_path, dpi=300)
145
+ plt.close(fig)
146
+
147
+ # 定义Double Lift Chart绘制函数
148
+
149
+
150
+ def plot_dlift_list(pred_model_1, pred_model_2,
151
+ model_nme_1, model_nme_2,
152
+ tgt_nme,
153
+ w_list, w_act_list, n_bins=10,
154
+ fig_nme='Double Lift Chart'):
155
+ lift_data = pd.DataFrame()
156
+ lift_data.loc[:, 'pred1'] = pred_model_1
157
+ lift_data.loc[:, 'pred2'] = pred_model_2
158
+ lift_data.loc[:, 'diff_ly'] = lift_data['pred1'] / lift_data['pred2']
159
+ lift_data.loc[:, 'act'] = w_act_list
160
+ lift_data.loc[:, 'weight'] = w_list
161
+ lift_data.loc[:, 'w_pred1'] = lift_data['pred1'] * lift_data['weight']
162
+ lift_data.loc[:, 'w_pred2'] = lift_data['pred2'] * lift_data['weight']
163
+ plot_data = split_data(lift_data, 'diff_ly', 'weight', n_bins)
164
+ plot_data['exp_v1'] = plot_data['w_pred1'] / plot_data['act']
165
+ plot_data['exp_v2'] = plot_data['w_pred2'] / plot_data['act']
166
+ plot_data['act_v'] = plot_data['act']/plot_data['act']
167
+ plot_data.reset_index(inplace=True)
168
+ fig = plt.figure(figsize=(7, 5))
169
+ ax = fig.add_subplot(111)
170
+ ax.plot(plot_data.index, plot_data['act_v'],
171
+ label='Actual', color='red')
172
+ ax.plot(plot_data.index, plot_data['exp_v1'],
173
+ label=model_nme_1, color='blue')
174
+ ax.plot(plot_data.index, plot_data['exp_v2'],
175
+ label=model_nme_2, color='black')
176
+ ax.set_title(
177
+ 'Double Lift Chart of %s' % tgt_nme, fontsize=8)
178
+ plt.xticks(plot_data.index,
179
+ plot_data.index,
180
+ rotation=90, fontsize=6)
181
+ plt.xlabel('%s / %s' % (model_nme_1, model_nme_2), fontsize=6)
182
+ plt.yticks(fontsize=6)
183
+ plt.legend(loc='upper left',
184
+ fontsize=5, frameon=False)
185
+ plt.margins(0.1)
186
+ plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8)
187
+ ax2 = ax.twinx()
188
+ ax2.bar(plot_data.index, plot_data['weight'],
189
+ alpha=0.5, color='seagreen',
190
+ label='Earned Exposure')
191
+ plt.yticks(fontsize=6)
192
+ plt.legend(loc='upper right',
193
+ fontsize=5, frameon=False)
194
+ plt.subplots_adjust(wspace=0.3)
195
+ save_path = os.path.join(
196
+ os.getcwd(), 'plot', f'06_{tgt_nme}_{fig_nme}.png')
197
+ ensure_parent_dir(save_path)
198
+ plt.savefig(save_path, dpi=300)
199
+ plt.close(fig)
200
+
201
+
202
+ # 开始定义ResNet模型结构
203
+ # 残差块:两层线性 + ReLU + 残差连接
204
+ # ResBlock 继承 nn.Module
205
+ class ResBlock(nn.Module):
206
+ def __init__(self, dim: int, dropout: float = 0.1,
207
+ use_layernorm: bool = False, residual_scale: float = 0.1
208
+ ):
209
+ super().__init__()
210
+ self.use_layernorm = use_layernorm
211
+
212
+ if use_layernorm:
213
+ Norm = nn.LayerNorm # 对最后一维做归一化
214
+ else:
215
+ def Norm(d): return nn.BatchNorm1d(d) # 保留一个开关,想试 BN 时也能用
216
+
217
+ self.norm1 = Norm(dim)
218
+ self.fc1 = nn.Linear(dim, dim, bias=True)
219
+ self.act = nn.ReLU(inplace=True)
220
+ self.dropout = nn.Dropout(dropout) if dropout > 0.0 else nn.Identity()
221
+ self.norm2 = Norm(dim)
222
+ self.fc2 = nn.Linear(dim, dim, bias=True)
223
+
224
+ # 残差缩放,防止一开始就把主干搞炸
225
+ self.res_scale = nn.Parameter(
226
+ torch.tensor(residual_scale, dtype=torch.float32)
227
+ )
228
+
229
+ def forward(self, x):
230
+ # pre-activation
231
+ out = self.norm1(x)
232
+ out = self.fc1(out)
233
+ out = self.act(out)
234
+ out = self.dropout(out)
235
+ out = self.norm2(out)
236
+ out = self.fc2(out)
237
+ # 残差缩放再相加
238
+ return F.relu(x + self.res_scale * out)
239
+
240
+ # ResNetSequential 继承 nn.Module,定义整个网络结构
241
+
242
+
243
+ class ResNetSequential(nn.Module):
244
+ # 输入: (batch, input_dim)
245
+ # 结构:
246
+ # fc1 -> LN/Bn -> ReLU -> ResBlock * block_num -> fc_out -> Softplus
247
+
248
+ def __init__(self, input_dim: int, hidden_dim: int = 64, block_num: int = 2,
249
+ use_layernorm: bool = True, dropout: float = 0.1,
250
+ residual_scale: float = 0.1):
251
+ super(ResNetSequential, self).__init__()
252
+
253
+ self.net = nn.Sequential()
254
+ self.net.add_module('fc1', nn.Linear(input_dim, hidden_dim))
255
+
256
+ if use_layernorm:
257
+ self.net.add_module('norm1', nn.LayerNorm(hidden_dim))
258
+ else:
259
+ self.net.add_module('norm1', nn.BatchNorm1d(hidden_dim))
260
+
261
+ self.net.add_module('relu1', nn.ReLU(inplace=True))
262
+
263
+ # 多个残差块
264
+ for i in range(block_num):
265
+ self.net.add_module(
266
+ f'ResBlk_{i+1}',
267
+ ResBlock(
268
+ hidden_dim,
269
+ dropout=dropout,
270
+ use_layernorm=use_layernorm,
271
+ residual_scale=residual_scale)
272
+ )
273
+
274
+ self.net.add_module('fc_out', nn.Linear(hidden_dim, 1))
275
+ self.net.add_module('softplus', nn.Softplus())
276
+
277
+ def forward(self, x):
278
+ return self.net(x)
279
+
280
+ # 定义ResNet模型的Scikit-Learn接口类
281
+
282
+
283
+ class ResNetSklearn(nn.Module):
284
+ def __init__(self, model_nme: str, input_dim: int, hidden_dim: int = 64,
285
+ block_num: int = 2, batch_num: int = 100, epochs: int = 100,
286
+ tweedie_power: float = 1.5, learning_rate: float = 0.01, patience: int = 10,
287
+ use_layernorm: bool = True, dropout: float = 0.1,
288
+ residual_scale: float = 0.1):
289
+ super(ResNetSklearn, self).__init__()
290
+
291
+ self.input_dim = input_dim
292
+ self.hidden_dim = hidden_dim
293
+ self.block_num = block_num
294
+ self.batch_num = batch_num
295
+ self.epochs = epochs
296
+ self.model_nme = model_nme
297
+ self.learning_rate = learning_rate
298
+ self.patience = patience
299
+ self.use_layernorm = use_layernorm
300
+ self.dropout = dropout
301
+ self.residual_scale = residual_scale
302
+
303
+ # 设备选择:cuda > mps > cpu
304
+ if torch.cuda.is_available():
305
+ self.device = torch.device('cuda')
306
+ elif torch.backends.mps.is_available():
307
+ self.device = torch.device('mps')
308
+ else:
309
+ self.device = torch.device('cpu')
310
+
311
+ # Tweedie power
312
+ if 'f' in self.model_nme:
313
+ self.tw_power = 1
314
+ elif 's' in self.model_nme:
315
+ self.tw_power = 2
316
+ else:
317
+ self.tw_power = tweedie_power
318
+
319
+ # 搭建网络
320
+ self.resnet = ResNetSequential(
321
+ self.input_dim,
322
+ self.hidden_dim,
323
+ self.block_num,
324
+ use_layernorm=self.use_layernorm,
325
+ dropout=self.dropout,
326
+ residual_scale=self.residual_scale
327
+ ).to(self.device)
328
+
329
+ def fit(self, X_train, y_train, w_train=None,
330
+ X_val=None, y_val=None, w_val=None):
331
+
332
+ # === 1. 训练集:留在 CPU,交给 DataLoader 再搬到 GPU ===
333
+ X_tensor = torch.tensor(X_train.values, dtype=torch.float32)
334
+ y_tensor = torch.tensor(
335
+ y_train.values, dtype=torch.float32).view(-1, 1)
336
+ if w_train is not None:
337
+ w_tensor = torch.tensor(
338
+ w_train.values, dtype=torch.float32).view(-1, 1)
339
+ else:
340
+ w_tensor = torch.ones_like(y_tensor)
341
+
342
+ # === 2. 验证集:先在 CPU 上建,后面一次性搬到 device ===
343
+ has_val = X_val is not None and y_val is not None
344
+ if has_val:
345
+ X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
346
+ y_val_tensor = torch.tensor(
347
+ y_val.values, dtype=torch.float32).view(-1, 1)
348
+ if w_val is not None:
349
+ w_val_tensor = torch.tensor(
350
+ w_val.values, dtype=torch.float32).view(-1, 1)
351
+ else:
352
+ w_val_tensor = torch.ones_like(y_val_tensor)
353
+ else:
354
+ X_val_tensor = y_val_tensor = w_val_tensor = None
355
+
356
+ # === 3. DataLoader ===
357
+ dataset = TensorDataset(X_tensor, y_tensor, w_tensor)
358
+ batch_size = max(
359
+ 4096,
360
+ int((self.learning_rate / (1e-4)) ** 0.5 *
361
+ (X_train.shape[0] / self.batch_num))
362
+ )
363
+
364
+ dataloader = DataLoader(
365
+ dataset,
366
+ batch_size=batch_size,
367
+ shuffle=True,
368
+ num_workers=1, # tabular: 0~1 一般够用
369
+ pin_memory=(self.device.type == 'cuda')
370
+ )
371
+
372
+ # === 4. 优化器 & AMP ===
373
+ optimizer = torch.optim.Adam(
374
+ self.resnet.parameters(), lr=self.learning_rate)
375
+ scaler = GradScaler(enabled=(self.device.type == 'cuda'))
376
+
377
+ # === 5. Early stopping ===
378
+ best_loss, patience_counter = float('inf'), 0
379
+ best_model_state = None
380
+
381
+ # 如果有验证集,先把它整个搬到 device,只搬一次
382
+ if has_val:
383
+ X_val_dev = X_val_tensor.to(self.device, non_blocking=True)
384
+ y_val_dev = y_val_tensor.to(self.device, non_blocking=True)
385
+ w_val_dev = w_val_tensor.to(self.device, non_blocking=True)
386
+
387
+ # === 6. 训练循环 ===
388
+ for epoch in range(1, self.epochs + 1):
389
+ self.resnet.train()
390
+ for X_batch, y_batch, w_batch in dataloader:
391
+ optimizer.zero_grad()
392
+
393
+ X_batch = X_batch.to(self.device, non_blocking=True)
394
+ y_batch = y_batch.to(self.device, non_blocking=True)
395
+ w_batch = w_batch.to(self.device, non_blocking=True)
396
+
397
+ with autocast(enabled=(self.device.type == 'cuda')):
398
+ y_pred = self.resnet(X_batch)
399
+ y_pred = torch.clamp(y_pred, min=1e-6)
400
+
401
+ losses = tweedie_loss(
402
+ y_pred, y_batch, p=self.tw_power).view(-1)
403
+ weighted_loss = (losses * w_batch.view(-1)
404
+ ).sum() / w_batch.sum()
405
+
406
+ scaler.scale(weighted_loss).backward()
407
+
408
+ if self.device.type == 'cuda':
409
+ scaler.unscale_(optimizer)
410
+ clip_grad_norm_(self.resnet.parameters(), max_norm=1.0)
411
+
412
+ scaler.step(optimizer)
413
+ scaler.update()
414
+
415
+ # === 7. 验证集损失 & early stopping ===
416
+ if has_val:
417
+ self.resnet.eval()
418
+ with torch.no_grad(), autocast(enabled=(self.device.type == 'cuda')):
419
+ y_val_pred = self.resnet(X_val_dev)
420
+ y_val_pred = torch.clamp(y_val_pred, min=1e-6)
421
+
422
+ val_loss_values = tweedie_loss(
423
+ y_val_pred, y_val_dev, p=self.tw_power
424
+ ).view(-1)
425
+ val_weighted_loss = (
426
+ val_loss_values * w_val_dev.view(-1)
427
+ ).sum() / w_val_dev.sum()
428
+
429
+ if val_weighted_loss < best_loss:
430
+ best_loss = val_weighted_loss
431
+ patience_counter = 0
432
+ best_model_state = copy.deepcopy(self.resnet.state_dict())
433
+ else:
434
+ patience_counter += 1
435
+
436
+ if patience_counter >= self.patience and best_model_state is not None:
437
+ self.resnet.load_state_dict(best_model_state)
438
+ break
439
+ if has_val and best_model_state is not None:
440
+ self.resnet.load_state_dict(best_model_state)
441
+
442
+ # ---------------- predict ----------------
443
+
444
+ def predict(self, X_test):
445
+ self.resnet.eval()
446
+ with torch.no_grad():
447
+ X_tensor = torch.tensor(
448
+ X_test.values, dtype=torch.float32).to(self.device)
449
+ y_pred = self.resnet(X_tensor).cpu().numpy()
450
+
451
+ y_pred = np.clip(y_pred, 1e-6, None)
452
+ return y_pred.flatten()
453
+
454
+ # ---------------- set_params ----------------
455
+
456
+ def set_params(self, params):
457
+ for key, value in params.items():
458
+ if hasattr(self, key):
459
+ setattr(self, key, value)
460
+ else:
461
+ raise ValueError(f"Parameter {key} not found in model.")
462
+
463
+ # 开始定义FT Transformer模型结构
464
+
465
+
466
+ class FeatureTokenizer(nn.Module):
467
+ # 将数值特征 & 类别特征映射为 token (batch, n_tokens, d_model)
468
+ # 假设:
469
+ # - X_num: (batch, num_numeric)
470
+ # - X_cat: (batch, num_categorical),每列是已编码好的整数标签 [0, card-1]
471
+
472
+ def __init__(self, num_numeric: int, cat_cardinalities, d_model: int):
473
+ super().__init__()
474
+
475
+ self.num_numeric = num_numeric
476
+ self.has_numeric = num_numeric > 0
477
+
478
+ if self.has_numeric:
479
+ self.num_linear = nn.Linear(num_numeric, d_model)
480
+
481
+ self.embeddings = nn.ModuleList([
482
+ nn.Embedding(card, d_model) for card in cat_cardinalities
483
+ ])
484
+
485
+ def forward(self, X_num, X_cat):
486
+ tokens = []
487
+
488
+ if self.has_numeric:
489
+ # 数值特征映射为一个 token
490
+ num_token = self.num_linear(X_num) # (batch, d_model)
491
+ tokens.append(num_token)
492
+
493
+ # 每个类别特征一个 embedding token
494
+ for i, emb in enumerate(self.embeddings):
495
+ tok = emb(X_cat[:, i]) # (batch, d_model)
496
+ tokens.append(tok)
497
+
498
+ # (batch, n_tokens, d_model)
499
+ x = torch.stack(tokens, dim=1)
500
+ return x
501
+
502
+ # 定义具有残差缩放的Encoder层
503
+
504
+
505
+ class ScaledTransformerEncoderLayer(nn.Module):
506
+ def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048,
507
+ dropout: float = 0.1, residual_scale_attn: float = 1.0,
508
+ residual_scale_ffn: float = 1.0, norm_first: bool = True,
509
+ ):
510
+ super().__init__()
511
+ self.self_attn = nn.MultiheadAttention(
512
+ embed_dim=d_model,
513
+ num_heads=nhead,
514
+ dropout=dropout,
515
+ batch_first=True
516
+ )
517
+
518
+ # FFN
519
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
520
+ self.dropout = nn.Dropout(dropout)
521
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
522
+
523
+ # Norm & Dropout
524
+ self.norm1 = nn.LayerNorm(d_model)
525
+ self.norm2 = nn.LayerNorm(d_model)
526
+ self.dropout1 = nn.Dropout(dropout)
527
+ self.dropout2 = nn.Dropout(dropout)
528
+
529
+ self.activation = nn.GELU()
530
+ self.norm_first = norm_first
531
+
532
+ # 残差缩放系数
533
+ self.res_scale_attn = residual_scale_attn
534
+ self.res_scale_ffn = residual_scale_ffn
535
+
536
+ def forward(self, src, src_mask=None, src_key_padding_mask=None):
537
+ """
538
+ src: (B, T, d_model)
539
+ """
540
+ x = src
541
+
542
+ if self.norm_first:
543
+ # pre-norm
544
+ x = x + self._sa_block(self.norm1(x), src_mask,
545
+ src_key_padding_mask)
546
+ x = x + self._ff_block(self.norm2(x))
547
+ else:
548
+ # post-norm(一般不用)
549
+ x = self.norm1(
550
+ x + self._sa_block(x, src_mask, src_key_padding_mask))
551
+ x = self.norm2(x + self._ff_block(x))
552
+
553
+ return x
554
+
555
+ def _sa_block(self, x, attn_mask, key_padding_mask):
556
+ # Self-Attention + 残差缩放
557
+ attn_out, _ = self.self_attn(
558
+ x, x, x,
559
+ attn_mask=attn_mask,
560
+ key_padding_mask=key_padding_mask,
561
+ need_weights=False
562
+ )
563
+ return self.res_scale_attn * self.dropout1(attn_out)
564
+
565
+ def _ff_block(self, x):
566
+ # FFN + 残差缩放
567
+ x2 = self.linear2(self.dropout(self.activation(self.linear1(x))))
568
+ return self.res_scale_ffn * self.dropout2(x2)
569
+
570
+ # 定义FT-Transformer核心模型
571
+
572
+
573
+ class FTTransformerCore(nn.Module):
574
+ # 一个最小可用的 FT-Transformer:
575
+ # - FeatureTokenizer: 数值、类别 → token
576
+ # - TransformerEncoder: 捕捉特征交互
577
+ # - 池化 + MLP + Softplus: 输出正值 (适配 Tweedie/Gamma)
578
+
579
+ def __init__(self, num_numeric: int, cat_cardinalities, d_model: int = 64,
580
+ n_heads: int = 8, n_layers: int = 4, dropout: float = 0.1,
581
+ ):
582
+ super().__init__()
583
+
584
+ self.tokenizer = FeatureTokenizer(
585
+ num_numeric=num_numeric,
586
+ cat_cardinalities=cat_cardinalities,
587
+ d_model=d_model
588
+ )
589
+ scale = 1.0 / math.sqrt(n_layers) # 推荐一个默认值
590
+ encoder_layer = ScaledTransformerEncoderLayer(
591
+ d_model=d_model,
592
+ nhead=n_heads,
593
+ dim_feedforward=d_model * 4,
594
+ dropout=dropout,
595
+ residual_scale_attn=scale,
596
+ residual_scale_ffn=scale,
597
+ norm_first=True,
598
+ )
599
+ self.encoder = nn.TransformerEncoder(
600
+ encoder_layer,
601
+ num_layers=n_layers
602
+ )
603
+ self.n_layers = n_layers
604
+
605
+ self.head = nn.Sequential(
606
+ nn.LayerNorm(d_model),
607
+ nn.Linear(d_model, d_model),
608
+ nn.GELU(),
609
+ nn.Linear(d_model, 1),
610
+ nn.Softplus() # 保证输出为正,适合 Tweedie / Gamma
611
+ )
612
+
613
+ def forward(self, X_num, X_cat):
614
+
615
+ # X_num: (batch, num_numeric) float32
616
+ # X_cat: (batch, num_categorical) long
617
+
618
+ tokens = self.tokenizer(X_num, X_cat) # (batch, tokens, d_model)
619
+ x = self.encoder(tokens) # (batch, tokens, d_model)
620
+
621
+ # 简单地对 token 取平均池化
622
+ x = x.mean(dim=1) # (batch, d_model)
623
+
624
+ out = self.head(x) # (batch, 1), Softplus 已做
625
+ return out
626
+
627
+ # 定义TabularDataset类
628
+
629
+
630
+ class TabularDataset(Dataset):
631
+ def __init__(self, X_num, X_cat, y, w):
632
+
633
+ # X_num: torch.float32, (N, num_numeric)
634
+ # X_cat: torch.long, (N, num_categorical)
635
+ # y: torch.float32, (N, 1)
636
+ # w: torch.float32, (N, 1)
637
+
638
+ self.X_num = X_num
639
+ self.X_cat = X_cat
640
+ self.y = y
641
+ self.w = w
642
+
643
+ def __len__(self):
644
+ return self.y.shape[0]
645
+
646
+ def __getitem__(self, idx):
647
+ return (
648
+ self.X_num[idx],
649
+ self.X_cat[idx],
650
+ self.y[idx],
651
+ self.w[idx],
652
+ )
653
+
654
+ # 定义FTTransformer的Scikit-Learn接口类
655
+
656
+
657
+ class FTTransformerSklearn(nn.Module):
658
+
659
+ # sklearn 风格包装:
660
+ # - num_cols: 数值特征列名列表
661
+ # - cat_cols: 类别特征列名列表 (已做 label encoding,取值 [0, n_classes-1])
662
+
663
+ def __init__(self, model_nme: str, num_cols, cat_cols, d_model: int = 64, n_heads: int = 8,
664
+ n_layers: int = 4, dropout: float = 0.1, batch_num: int = 100, epochs: int = 100,
665
+ tweedie_power: float = 1.5, learning_rate: float = 1e-3, patience: int = 10,
666
+ ):
667
+ super().__init__()
668
+
669
+ self.model_nme = model_nme
670
+ self.num_cols = list(num_cols)
671
+ self.cat_cols = list(cat_cols)
672
+ self.d_model = d_model
673
+ self.n_heads = n_heads
674
+ self.n_layers = n_layers
675
+ self.dropout = dropout
676
+ self.batch_num = batch_num
677
+ self.epochs = epochs
678
+ self.learning_rate = learning_rate
679
+ self.patience = patience
680
+ if 'f' in self.model_nme:
681
+ self.tw_power = 1.0
682
+ elif 's' in self.model_nme:
683
+ self.tw_power = 2.0
684
+ else:
685
+ self.tw_power = tweedie_power
686
+ if torch.cuda.is_available():
687
+ self.device = torch.device("cuda")
688
+ elif torch.backends.mps.is_available():
689
+ self.device = torch.device("mps")
690
+ else:
691
+ self.device = torch.device("cpu")
692
+ self.cat_cardinalities = None
693
+ self.cat_categories = {}
694
+ self.ft = None
695
+
696
+ def _build_model(self, X_train):
697
+ num_numeric = len(self.num_cols)
698
+ cat_cardinalities = []
699
+
700
+ for col in self.cat_cols:
701
+ cats = X_train[col].astype('category')
702
+ categories = cats.cat.categories
703
+ self.cat_categories[col] = categories # 保存训练集类别全集
704
+
705
+ card = len(categories) + 1 # 多预留 1 类给“未知/缺失”
706
+ cat_cardinalities.append(card)
707
+
708
+ self.cat_cardinalities = cat_cardinalities
709
+
710
+ self.ft = FTTransformerCore(
711
+ num_numeric=num_numeric,
712
+ cat_cardinalities=cat_cardinalities,
713
+ d_model=self.d_model,
714
+ n_heads=self.n_heads,
715
+ n_layers=self.n_layers,
716
+ dropout=self.dropout,
717
+ ).to(self.device)
718
+
719
+ def _encode_cats(self, X):
720
+ # X: DataFrame,至少包含 self.cat_cols
721
+ # 返回: np.ndarray, shape (N, num_categorical), dtype=int64
722
+
723
+ if not self.cat_cols:
724
+ return np.zeros((len(X), 0), dtype='int64')
725
+
726
+ X_cat_list = []
727
+ for col in self.cat_cols:
728
+ # 使用训练时记录下来的 categories
729
+ categories = self.cat_categories[col]
730
+ # 用固定的 categories 构造 Categorical
731
+ cats = pd.Categorical(X[col], categories=categories)
732
+ codes = cats.codes.astype('int64', copy=True) # -1 表示未知或缺失
733
+ # 未知 / 缺失 映射到“未知类 bucket”,索引 = len(categories)
734
+ codes[codes < 0] = len(categories)
735
+ X_cat_list.append(codes)
736
+
737
+ X_cat_np = np.stack(X_cat_list, axis=1) # (N, num_categorical)
738
+ return X_cat_np
739
+
740
+ def fit(self, X_train, y_train, w_train=None,
741
+ X_val=None, y_val=None, w_val=None):
742
+
743
+ # 第一次 fit 时构建模型结构
744
+ if self.ft is None:
745
+ self._build_model(X_train)
746
+
747
+ # --- 构建训练张量 (全在 CPU,后面 batch 再搬 GPU) ---
748
+ X_num_train = X_train[self.num_cols].to_numpy(
749
+ dtype=np.float32, copy=True)
750
+ X_num_train = torch.tensor(
751
+ X_num_train,
752
+ dtype=torch.float32
753
+ )
754
+
755
+ if self.cat_cols:
756
+ X_cat_train_np = self._encode_cats(X_train)
757
+ X_cat_train = torch.tensor(X_cat_train_np, dtype=torch.long)
758
+ else:
759
+ X_cat_train = torch.zeros(
760
+ (X_num_train.size(0), 0), dtype=torch.long)
761
+
762
+ y_tensor = torch.tensor(
763
+ y_train.values,
764
+ dtype=torch.float32
765
+ ).view(-1, 1)
766
+
767
+ if w_train is not None:
768
+ w_tensor = torch.tensor(
769
+ w_train.values,
770
+ dtype=torch.float32
771
+ ).view(-1, 1)
772
+ else:
773
+ w_tensor = torch.ones_like(y_tensor)
774
+
775
+ # --- 验证集张量 (后面一次性搬到 device) ---
776
+ has_val = X_val is not None and y_val is not None
777
+ if has_val:
778
+ # ---------- 数值特征 ----------
779
+ X_num_val_np = X_val[self.num_cols].to_numpy(
780
+ dtype=np.float32, copy=True)
781
+ X_num_val = torch.tensor(X_num_val_np, dtype=torch.float32)
782
+
783
+ # ---------- 类别特征 ----------
784
+ if self.cat_cols:
785
+ X_cat_val_np = self._encode_cats(X_val)
786
+ X_cat_val = torch.tensor(X_cat_val_np, dtype=torch.long)
787
+ else:
788
+ X_cat_val = torch.zeros(
789
+ (X_num_val.shape[0], 0), dtype=torch.long)
790
+
791
+ # ---------- 目标 & 权重 ----------
792
+ y_val_np = y_val.values.astype(np.float32, copy=True)
793
+ y_val_tensor = torch.tensor(
794
+ y_val_np, dtype=torch.float32).view(-1, 1)
795
+
796
+ if w_val is not None:
797
+ w_val_np = w_val.values.astype(np.float32, copy=True)
798
+ w_val_tensor = torch.tensor(
799
+ w_val_np, dtype=torch.float32).view(-1, 1)
800
+ else:
801
+ w_val_tensor = torch.ones_like(y_val_tensor)
802
+
803
+ else:
804
+ X_num_val = X_cat_val = y_val_tensor = w_val_tensor = None
805
+
806
+ # --- DataLoader ---
807
+ dataset = TabularDataset(
808
+ X_num_train, X_cat_train, y_tensor, w_tensor
809
+ )
810
+
811
+ batch_size = max(
812
+ 32,
813
+ int((self.learning_rate / 1e-4) ** 0.5 *
814
+ (X_train.shape[0] / self.batch_num))
815
+ )
816
+
817
+ dataloader = DataLoader(
818
+ dataset,
819
+ batch_size=batch_size,
820
+ shuffle=True,
821
+ num_workers=1,
822
+ pin_memory=(self.device.type == 'cuda')
823
+ )
824
+
825
+ # --- 优化器 & AMP ---
826
+ optimizer = torch.optim.Adam(
827
+ self.ft.parameters(),
828
+ lr=self.learning_rate
829
+ )
830
+ scaler = GradScaler(enabled=(self.device.type == 'cuda'))
831
+
832
+ # --- Early stopping ---
833
+ best_loss = float('inf')
834
+ patience_counter = 0
835
+ best_model_state = None
836
+
837
+ # 验证集整体搬到 device(如果存在)
838
+ if has_val:
839
+ X_num_val_dev = X_num_val.to(self.device, non_blocking=True)
840
+ X_cat_val_dev = X_cat_val.to(self.device, non_blocking=True)
841
+ y_val_dev = y_val_tensor.to(self.device, non_blocking=True)
842
+ w_val_dev = w_val_tensor.to(self.device, non_blocking=True)
843
+
844
+ # --- 训练循环 ---
845
+ for epoch in range(1, self.epochs + 1):
846
+ self.ft.train()
847
+ for X_num_b, X_cat_b, y_b, w_b in dataloader:
848
+ optimizer.zero_grad()
849
+
850
+ X_num_b = X_num_b.to(self.device, non_blocking=True)
851
+ X_cat_b = X_cat_b.to(self.device, non_blocking=True)
852
+ y_b = y_b.to(self.device, non_blocking=True)
853
+ w_b = w_b.to(self.device, non_blocking=True)
854
+
855
+ with autocast(enabled=(self.device.type == 'cuda')):
856
+ y_pred = self.ft(X_num_b, X_cat_b)
857
+ y_pred = torch.clamp(y_pred, min=1e-6)
858
+
859
+ losses = tweedie_loss(
860
+ y_pred, y_b, p=self.tw_power
861
+ ).view(-1)
862
+
863
+ weighted_loss = (losses * w_b.view(-1)).sum() / w_b.sum()
864
+
865
+ scaler.scale(weighted_loss).backward()
866
+
867
+ if self.device.type == 'cuda':
868
+ scaler.unscale_(optimizer)
869
+ clip_grad_norm_(self.ft.parameters(), max_norm=1.0)
870
+
871
+ scaler.step(optimizer)
872
+ scaler.update()
873
+
874
+ # --- 验证 & early stopping ---
875
+ if has_val:
876
+ self.ft.eval()
877
+ with torch.no_grad(), autocast(enabled=(self.device.type == 'cuda')):
878
+ y_val_pred = self.ft(X_num_val_dev, X_cat_val_dev)
879
+ y_val_pred = torch.clamp(y_val_pred, min=1e-6)
880
+
881
+ val_losses = tweedie_loss(
882
+ y_val_pred, y_val_dev, p=self.tw_power
883
+ ).view(-1)
884
+
885
+ val_weighted_loss = (
886
+ val_losses * w_val_dev.view(-1)
887
+ ).sum() / w_val_dev.sum()
888
+
889
+ if val_weighted_loss < best_loss:
890
+ best_loss = val_weighted_loss
891
+ patience_counter = 0
892
+ best_model_state = copy.deepcopy(self.ft.state_dict())
893
+ else:
894
+ patience_counter += 1
895
+
896
+ if patience_counter >= self.patience and best_model_state is not None:
897
+ self.ft.load_state_dict(best_model_state)
898
+ break
899
+ if has_val and best_model_state is not None:
900
+ self.ft.load_state_dict(best_model_state)
901
+
902
+ def predict(self, X_test):
903
+ # X_test: DataFrame,包含 num_cols + cat_cols
904
+
905
+ self.ft.eval()
906
+ X_num = X_test[self.num_cols].to_numpy(dtype=np.float32, copy=True)
907
+ X_num = torch.tensor(
908
+ X_num,
909
+ dtype=torch.float32
910
+ )
911
+ if self.cat_cols:
912
+ X_cat_np = self._encode_cats(X_test)
913
+ X_cat = torch.tensor(X_cat_np, dtype=torch.long)
914
+ else:
915
+ X_cat = torch.zeros((X_num.size(0), 0), dtype=torch.long)
916
+
917
+ with torch.no_grad():
918
+ X_num = X_num.to(self.device, non_blocking=True)
919
+ X_cat = X_cat.to(self.device, non_blocking=True)
920
+ y_pred = self.ft(X_num, X_cat).cpu().numpy()
921
+
922
+ y_pred = np.clip(y_pred, 1e-6, None)
923
+ return y_pred.ravel()
924
+
925
+ def set_params(self, params: dict):
926
+
927
+ # 和 sklearn 风格保持一致。
928
+ # 注意:对结构性参数(如 d_model/n_heads)修改后,需要重新 fit 才会生效。
929
+
930
+ for key, value in params.items():
931
+ if hasattr(self, key):
932
+ setattr(self, key, value)
933
+ else:
934
+ raise ValueError(f"Parameter {key} not found in model.")
935
+ return self
936
+
937
+
938
+ # 定义贝叶斯优化模型类,包含XGBoost和ResNet模型
939
+
940
+ class BayesOptModel:
941
+ def __init__(self, train_data, test_data,
942
+ model_nme, resp_nme, weight_nme, factor_nmes,
943
+ cate_list=None, prop_test=0.25, rand_seed=None,
944
+ epochs=100, use_gpu=True):
945
+ # 初始化数据
946
+ # train_data: 训练数据, test_data: 测试数据 格式需为DataFrame
947
+ # model_nme: 模型名称
948
+ # resp_nme: 因变量名称, weight_nme: 权重名称
949
+ # factor_nmes: 因子名称列表, space_params: 参数空间
950
+ # cate_list: 类别变量列表
951
+ # prop_test: 测试集比例, rand_seed
952
+ self.train_data = train_data.copy(deep=True)
953
+ self.test_data = test_data.copy(deep=True)
954
+ self.resp_nme = resp_nme
955
+ self.weight_nme = weight_nme
956
+ self.train_data.loc[:, 'w_act'] = self.train_data[self.resp_nme] * \
957
+ self.train_data[self.weight_nme]
958
+ self.test_data.loc[:, 'w_act'] = self.test_data[self.resp_nme] * \
959
+ self.test_data[self.weight_nme]
960
+ q99 = self.train_data[self.resp_nme].quantile(0.999)
961
+ self.train_data[self.resp_nme] = self.train_data[self.resp_nme].clip(
962
+ upper=q99)
963
+ self.factor_nmes = list(factor_nmes)
964
+ self.cate_list = list(cate_list) if cate_list else []
965
+ self.rand_seed = rand_seed if rand_seed is not None else np.random.randint(
966
+ 1, 10000)
967
+ if self.cate_list:
968
+ for cate in self.cate_list:
969
+ self.train_data[cate] = self.train_data[cate].astype(
970
+ 'category')
971
+ self.test_data[cate] = self.test_data[cate].astype('category')
972
+ self.prop_test = prop_test
973
+ self.cv = ShuffleSplit(n_splits=int(1/self.prop_test),
974
+ test_size=self.prop_test,
975
+ random_state=self.rand_seed)
976
+ self.model_nme = model_nme
977
+ if self.model_nme.find('f') != -1:
978
+ self.obj = 'count:poisson'
979
+ elif self.model_nme.find('s') != -1:
980
+ self.obj = 'reg:gamma'
981
+ elif self.model_nme.find('bc') != -1:
982
+ self.obj = 'reg:tweedie'
983
+ self.fit_params = {
984
+ 'sample_weight': self.train_data[self.weight_nme].values
985
+ }
986
+ self.use_gpu = bool(use_gpu and torch.cuda.is_available())
987
+ self.num_features = [
988
+ nme for nme in self.factor_nmes if nme not in self.cate_list]
989
+ self.train_oht_scl_data = self.train_data[self.factor_nmes +
990
+ [self.weight_nme]+[self.resp_nme]].copy()
991
+ self.test_oht_scl_data = self.test_data[self.factor_nmes +
992
+ [self.weight_nme]+[self.resp_nme]].copy()
993
+ self.train_oht_scl_data = pd.get_dummies(
994
+ self.train_oht_scl_data,
995
+ columns=self.cate_list,
996
+ drop_first=True,
997
+ dtype=np.int8
998
+ )
999
+ self.test_oht_scl_data = pd.get_dummies(
1000
+ self.test_oht_scl_data,
1001
+ columns=self.cate_list,
1002
+ drop_first=True,
1003
+ dtype=np.int8
1004
+ )
1005
+ for num_chr in self.num_features:
1006
+ scaler = StandardScaler()
1007
+ self.train_oht_scl_data[num_chr] = scaler.fit_transform(
1008
+ self.train_oht_scl_data[num_chr].values.reshape(-1, 1))
1009
+ self.test_oht_scl_data[num_chr] = scaler.transform(
1010
+ self.test_oht_scl_data[num_chr].values.reshape(-1, 1))
1011
+ # 对测试集进行列对齐
1012
+ self.test_oht_scl_data = self.test_oht_scl_data.reindex(
1013
+ columns=self.train_oht_scl_data.columns,
1014
+ fill_value=0
1015
+ )
1016
+ self.var_nmes = list(
1017
+ set(list(self.train_oht_scl_data.columns)) -
1018
+ set([self.weight_nme, self.resp_nme])
1019
+ )
1020
+ self.epochs = epochs
1021
+ self.model_label = []
1022
+ self.cat_categories_for_shap = {}
1023
+ for col in self.cate_list:
1024
+ cats = self.train_data[col].astype('category')
1025
+ self.cat_categories_for_shap[col] = list(cats.cat.categories)
1026
+
1027
+ # 定义单因素画图函数
1028
+ def plot_oneway(self, n_bins=10):
1029
+ for c in self.factor_nmes:
1030
+ fig = plt.figure(figsize=(7, 5))
1031
+ if c in self.cate_list:
1032
+ group_col = c
1033
+ plot_source = self.train_data
1034
+ else:
1035
+ group_col = f'{c}_bins'
1036
+ bins = pd.qcut(
1037
+ self.train_data[c],
1038
+ n_bins,
1039
+ duplicates='drop'
1040
+ )
1041
+ plot_source = self.train_data.assign(**{group_col: bins})
1042
+ plot_data = plot_source.groupby(
1043
+ [group_col], observed=True).sum(numeric_only=True)
1044
+ plot_data.reset_index(inplace=True)
1045
+ plot_data['act_v'] = plot_data['w_act'] / \
1046
+ plot_data[self.weight_nme]
1047
+ plot_data.head()
1048
+ ax = fig.add_subplot(111)
1049
+ ax.plot(plot_data.index, plot_data['act_v'],
1050
+ label='Actual', color='red')
1051
+ ax.set_title(
1052
+ 'Analysis of %s : Train Data' % group_col,
1053
+ fontsize=8)
1054
+ plt.xticks(plot_data.index,
1055
+ list(plot_data[group_col].astype(str)),
1056
+ rotation=90)
1057
+ if len(list(plot_data[group_col].astype(str))) > 50:
1058
+ plt.xticks(fontsize=3)
1059
+ else:
1060
+ plt.xticks(fontsize=6)
1061
+ plt.yticks(fontsize=6)
1062
+ ax2 = ax.twinx()
1063
+ ax2.bar(plot_data.index,
1064
+ plot_data[self.weight_nme],
1065
+ alpha=0.5, color='seagreen')
1066
+ plt.yticks(fontsize=6)
1067
+ plt.margins(0.05)
1068
+ plt.subplots_adjust(wspace=0.3)
1069
+ save_path = os.path.join(
1070
+ os.getcwd(), 'plot',
1071
+ f'00_{self.model_nme}_{group_col}_oneway.png')
1072
+ ensure_parent_dir(save_path)
1073
+ plt.savefig(save_path, dpi=300)
1074
+ plt.close(fig)
1075
+
1076
+ # Xgboost交叉验证函数
1077
+ def cross_val_xgb(self, trial):
1078
+ learning_rate = trial.suggest_float(
1079
+ 'learning_rate', 1e-5, 1e-1, log=True)
1080
+ gamma = trial.suggest_float(
1081
+ 'gamma', 0, 10000)
1082
+ max_depth = trial.suggest_int(
1083
+ 'max_depth', 3, 25)
1084
+ n_estimators = trial.suggest_int(
1085
+ 'n_estimators', 10, 500, step=10)
1086
+ min_child_weight = trial.suggest_int(
1087
+ 'min_child_weight', 100, 10000, step=100)
1088
+ reg_alpha = trial.suggest_float(
1089
+ 'reg_alpha', 1e-10, 1, log=True)
1090
+ reg_lambda = trial.suggest_float(
1091
+ 'reg_lambda', 1e-10, 1, log=True)
1092
+ if self.obj == 'reg:tweedie':
1093
+ tweedie_variance_power = trial.suggest_float(
1094
+ 'tweedie_variance_power', 1, 2)
1095
+ elif self.obj == 'count:poisson':
1096
+ tweedie_variance_power = 1
1097
+ elif self.obj == 'reg:gamma':
1098
+ tweedie_variance_power = 2
1099
+ xgb_kwargs = dict(
1100
+ objective=self.obj,
1101
+ random_state=self.rand_seed,
1102
+ subsample=0.9,
1103
+ tree_method='gpu_hist' if self.use_gpu else 'hist',
1104
+ enable_categorical=True,
1105
+ predictor='gpu_predictor' if self.use_gpu else 'cpu_predictor'
1106
+ )
1107
+ if self.use_gpu:
1108
+ xgb_kwargs['gpu_id'] = 0
1109
+ clf = xgb.XGBRegressor(**xgb_kwargs)
1110
+ params = {
1111
+ 'learning_rate': learning_rate,
1112
+ 'gamma': gamma,
1113
+ 'max_depth': max_depth,
1114
+ 'n_estimators': n_estimators,
1115
+ 'min_child_weight': min_child_weight,
1116
+ 'reg_alpha': reg_alpha,
1117
+ 'reg_lambda': reg_lambda
1118
+ }
1119
+ if self.obj == 'reg:tweedie':
1120
+ params['tweedie_variance_power'] = tweedie_variance_power
1121
+ clf.set_params(**params)
1122
+ cv_jobs = 1 if self.use_gpu else int(1/self.prop_test)
1123
+ acc = cross_val_score(
1124
+ clf,
1125
+ self.train_data[self.factor_nmes],
1126
+ self.train_data[self.resp_nme].values,
1127
+ fit_params=self.fit_params,
1128
+ cv=self.cv,
1129
+ scoring=make_scorer(
1130
+ mean_tweedie_deviance,
1131
+ power=tweedie_variance_power,
1132
+ greater_is_better=False),
1133
+ error_score='raise',
1134
+ n_jobs=cv_jobs).mean()
1135
+ return -acc
1136
+
1137
+ # 定义Xgboost贝叶斯优化函数
1138
+ def bayesopt_xgb(self, max_evals=100):
1139
+ study = optuna.create_study(
1140
+ direction='minimize',
1141
+ sampler=optuna.samplers.TPESampler(seed=self.rand_seed))
1142
+ study.optimize(self.cross_val_xgb, n_trials=max_evals)
1143
+ self.best_xgb_params = study.best_params
1144
+ xgb_params_path = os.path.join(
1145
+ os.getcwd(), 'Results', f'{self.model_nme}_bestparams_xgb.csv'
1146
+ )
1147
+ ensure_parent_dir(xgb_params_path)
1148
+ pd.DataFrame(self.best_xgb_params, index=[0]).to_csv(xgb_params_path)
1149
+ self.best_xgb_trial = study.best_trial
1150
+ best_xgb_kwargs = dict(
1151
+ objective=self.obj,
1152
+ random_state=self.rand_seed,
1153
+ subsample=0.9,
1154
+ tree_method='gpu_hist' if self.use_gpu else 'hist',
1155
+ enable_categorical=True,
1156
+ predictor='gpu_predictor' if self.use_gpu else 'cpu_predictor'
1157
+ )
1158
+ if self.use_gpu:
1159
+ best_xgb_kwargs['gpu_id'] = 0
1160
+ self.xgb_best = xgb.XGBRegressor(**best_xgb_kwargs)
1161
+ self.xgb_best.set_params(**self.best_xgb_params)
1162
+ self.xgb_best.fit(self.train_data[self.factor_nmes],
1163
+ self.train_data[self.resp_nme].values,
1164
+ **self.fit_params)
1165
+ self.model_label += ['Xgboost']
1166
+ self.train_data['pred_xgb'] = self.xgb_best.predict(
1167
+ self.train_data[self.factor_nmes])
1168
+ self.test_data['pred_xgb'] = self.xgb_best.predict(
1169
+ self.test_data[self.factor_nmes])
1170
+ self.train_data.loc[:, 'w_pred_xgb'] = self.train_data['pred_xgb'] * \
1171
+ self.train_data[self.weight_nme]
1172
+ self.test_data.loc[:, 'w_pred_xgb'] = self.test_data['pred_xgb'] * \
1173
+ self.test_data[self.weight_nme]
1174
+
1175
+ # ResNet交叉验证函数
1176
+ def cross_val_resn(self, trial):
1177
+
1178
+ learning_rate = trial.suggest_float(
1179
+ 'learning_rate', 1e-6, 1e-2, log=True) # 较低learning rate为了保证不会出险梯度爆炸
1180
+ hidden_dim = trial.suggest_int(
1181
+ 'hidden_dim', 32, 256, step=32)
1182
+ block_num = trial.suggest_int(
1183
+ 'block_num', 2, 10)
1184
+ batch_num = trial.suggest_int(
1185
+ 'batch_num',
1186
+ 10 if self.obj == 'reg:gamma' else 100,
1187
+ 100 if self.obj == 'reg:gamma' else 1000,
1188
+ step=10 if self.obj == 'reg:gamma' else 100)
1189
+ if self.obj == 'reg:tweedie':
1190
+ tw_power = trial.suggest_float(
1191
+ 'tw_power', 1, 2.0)
1192
+ elif self.obj == 'count:poisson':
1193
+ tw_power = 1
1194
+ elif self.obj == 'reg:gamma':
1195
+ tw_power = 2
1196
+ loss = 0
1197
+ for fold, (train_idx, test_idx) in enumerate(self.cv.split(self.train_oht_scl_data[self.var_nmes])):
1198
+ # 创建模型
1199
+ cv_net = ResNetSklearn(
1200
+ model_nme=self.model_nme,
1201
+ input_dim=self.train_oht_scl_data[self.var_nmes].shape[1],
1202
+ epochs=self.epochs,
1203
+ learning_rate=learning_rate,
1204
+ hidden_dim=hidden_dim,
1205
+ block_num=block_num,
1206
+ # 保证权重方差不变
1207
+ batch_num=batch_num,
1208
+ tweedie_power=tw_power
1209
+ # 再此可以调整normlayer,dropout,residual_scale等参数
1210
+ )
1211
+ # 训练模型
1212
+ cv_net.fit(
1213
+ self.train_oht_scl_data[self.var_nmes].iloc[train_idx],
1214
+ self.train_oht_scl_data[self.resp_nme].iloc[train_idx],
1215
+ self.train_oht_scl_data[self.weight_nme].iloc[train_idx],
1216
+ self.train_oht_scl_data[self.var_nmes].iloc[test_idx],
1217
+ self.train_oht_scl_data[self.resp_nme].iloc[test_idx],
1218
+ self.train_oht_scl_data[self.weight_nme].iloc[test_idx]
1219
+ )
1220
+ # 预测
1221
+ y_pred_fold = cv_net.predict(
1222
+ self.train_oht_scl_data[self.var_nmes].iloc[test_idx]
1223
+ )
1224
+ # 计算损失
1225
+ loss += mean_tweedie_deviance(
1226
+ self.train_oht_scl_data[self.resp_nme].iloc[test_idx],
1227
+ y_pred_fold,
1228
+ sample_weight=self.train_oht_scl_data[self.weight_nme].iloc[test_idx],
1229
+ power=tw_power
1230
+ )
1231
+ return loss / int(1/self.prop_test)
1232
+
1233
+ # 定义ResNet贝叶斯优化函数
1234
+ def bayesopt_resnet(self, max_evals=100):
1235
+ study = optuna.create_study(
1236
+ direction='minimize',
1237
+ sampler=optuna.samplers.TPESampler(seed=self.rand_seed))
1238
+ study.optimize(self.cross_val_resn, n_trials=max_evals)
1239
+ self.best_resn_params = study.best_params
1240
+ resn_params_path = os.path.join(
1241
+ os.getcwd(), 'Results', f'{self.model_nme}_bestparams_resn.csv'
1242
+ )
1243
+ ensure_parent_dir(resn_params_path)
1244
+ pd.DataFrame(self.best_resn_params, index=[0]).to_csv(resn_params_path)
1245
+ self.best_resn_trial = study.best_trial
1246
+ self.resn_best = ResNetSklearn(
1247
+ model_nme=self.model_nme,
1248
+ input_dim=self.train_oht_scl_data[self.var_nmes].shape[1]
1249
+ )
1250
+ self.resn_best.set_params(self.best_resn_params)
1251
+ self.resn_best.fit(self.train_oht_scl_data[self.var_nmes],
1252
+ self.train_oht_scl_data[self.resp_nme],
1253
+ self.train_oht_scl_data[self.weight_nme])
1254
+ self.model_label += ['ResNet']
1255
+ self.train_data['pred_resn'] = self.resn_best.predict(
1256
+ self.train_oht_scl_data[self.var_nmes])
1257
+ self.test_data['pred_resn'] = self.resn_best.predict(
1258
+ self.test_oht_scl_data[self.var_nmes])
1259
+ self.train_data.loc[:, 'w_pred_resn'] = self.train_data['pred_resn'] * \
1260
+ self.train_data[self.weight_nme]
1261
+ self.test_data.loc[:, 'w_pred_resn'] = self.test_data['pred_resn'] * \
1262
+ self.test_data[self.weight_nme]
1263
+
1264
+ # FT-Transformer 交叉验证函数
1265
+ def cross_val_ft(self, trial):
1266
+
1267
+ # 学习率
1268
+ learning_rate = trial.suggest_float(
1269
+ 'learning_rate', 1e-6, 1e-4, log=True
1270
+ )
1271
+
1272
+ # Transformer 维度与层数
1273
+ d_model = trial.suggest_int(
1274
+ 'd_model', 32, 128, step=32
1275
+ )
1276
+ n_heads = trial.suggest_categorical(
1277
+ 'n_heads', [2, 4, 8]
1278
+ )
1279
+ n_layers = trial.suggest_int(
1280
+ 'n_layers', 2, 6
1281
+ )
1282
+
1283
+ dropout = trial.suggest_float(
1284
+ 'dropout', 0.0, 0.2
1285
+ )
1286
+
1287
+ batch_num = trial.suggest_int(
1288
+ 'batch_num',
1289
+ 5 if self.obj == 'reg:gamma' else 10,
1290
+ 10 if self.obj == 'reg:gamma' else 100,
1291
+ step=1 if self.obj == 'reg:gamma' else 10
1292
+ )
1293
+
1294
+ # Tweedie power
1295
+ if self.obj == 'reg:tweedie':
1296
+ tw_power = trial.suggest_float('tw_power', 1.0, 2.0)
1297
+ elif self.obj == 'count:poisson':
1298
+ tw_power = 1.0
1299
+ elif self.obj == 'reg:gamma':
1300
+ tw_power = 2.0
1301
+
1302
+ loss = 0.0
1303
+
1304
+ # 这里注意:FT 使用的是“原始特征”(self.factor_nmes),
1305
+ # 而不是 one-hot 之后的 self.train_oht_scl_data
1306
+ for fold, (train_idx, test_idx) in enumerate(
1307
+ self.cv.split(self.train_data[self.factor_nmes])):
1308
+
1309
+ X_train_fold = self.train_data.iloc[train_idx][self.factor_nmes]
1310
+ y_train_fold = self.train_data.iloc[train_idx][self.resp_nme]
1311
+ w_train_fold = self.train_data.iloc[train_idx][self.weight_nme]
1312
+
1313
+ X_val_fold = self.train_data.iloc[test_idx][self.factor_nmes]
1314
+ y_val_fold = self.train_data.iloc[test_idx][self.resp_nme]
1315
+ w_val_fold = self.train_data.iloc[test_idx][self.weight_nme]
1316
+
1317
+ # 创建 FT-Transformer 模型
1318
+ cv_ft = FTTransformerSklearn(
1319
+ model_nme=self.model_nme,
1320
+ num_cols=self.num_features, # 数值特征列表
1321
+ cat_cols=self.cate_list, # 类别特征列表(需是编码好的整数或category)
1322
+ d_model=d_model,
1323
+ n_heads=n_heads,
1324
+ n_layers=n_layers,
1325
+ dropout=dropout,
1326
+ batch_num=batch_num,
1327
+ epochs=self.epochs,
1328
+ tweedie_power=tw_power,
1329
+ learning_rate=learning_rate,
1330
+ patience=5 # 可以根据需要调整
1331
+ )
1332
+
1333
+ # 训练
1334
+ cv_ft.fit(
1335
+ X_train_fold,
1336
+ y_train_fold,
1337
+ w_train_fold,
1338
+ X_val_fold,
1339
+ y_val_fold,
1340
+ w_val_fold
1341
+ )
1342
+
1343
+ # 预测
1344
+ y_pred_fold = cv_ft.predict(X_val_fold)
1345
+
1346
+ # 计算损失(与 ResNet 一致:mean_tweedie_deviance)
1347
+ loss += mean_tweedie_deviance(
1348
+ y_val_fold,
1349
+ y_pred_fold,
1350
+ sample_weight=w_val_fold,
1351
+ power=tw_power
1352
+ )
1353
+
1354
+ return loss / int(1 / self.prop_test)
1355
+
1356
+ # 定义 FT-Transformer 贝叶斯优化函数
1357
+ def bayesopt_ft(self, max_evals=50):
1358
+ study = optuna.create_study(
1359
+ direction='minimize',
1360
+ sampler=optuna.samplers.TPESampler(seed=self.rand_seed)
1361
+ )
1362
+ study.optimize(self.cross_val_ft, n_trials=max_evals)
1363
+
1364
+ self.best_ft_params = study.best_params
1365
+ ft_params_path = os.path.join(
1366
+ os.getcwd(), 'Results', f'{self.model_nme}_bestparams_ft.csv'
1367
+ )
1368
+ ensure_parent_dir(ft_params_path)
1369
+ pd.DataFrame(self.best_ft_params, index=[0]).to_csv(ft_params_path)
1370
+ self.best_ft_trial = study.best_trial
1371
+
1372
+ # 用最优参数重新建一个 FT 模型,在全量训练集上拟合
1373
+ self.ft_best = FTTransformerSklearn(
1374
+ model_nme=self.model_nme,
1375
+ num_cols=self.num_features,
1376
+ cat_cols=self.cate_list
1377
+ )
1378
+ # 设置最优超参
1379
+ self.ft_best.set_params(self.best_ft_params)
1380
+
1381
+ # 全量训练
1382
+ self.ft_best.fit(
1383
+ self.train_data[self.factor_nmes],
1384
+ self.train_data[self.resp_nme],
1385
+ self.train_data[self.weight_nme]
1386
+ )
1387
+
1388
+ # 记录模型标签
1389
+ self.model_label += ['FTTransformer']
1390
+
1391
+ # 训练集预测
1392
+ self.train_data['pred_ft'] = self.ft_best.predict(
1393
+ self.train_data[self.factor_nmes]
1394
+ )
1395
+ # 测试集预测
1396
+ self.test_data['pred_ft'] = self.ft_best.predict(
1397
+ self.test_data[self.factor_nmes]
1398
+ )
1399
+
1400
+ # 加权预测(和 XGB / ResNet 风格一致)
1401
+ self.train_data.loc[:, 'w_pred_ft'] = (
1402
+ self.train_data['pred_ft'] * self.train_data[self.weight_nme]
1403
+ )
1404
+ self.test_data.loc[:, 'w_pred_ft'] = (
1405
+ self.test_data['pred_ft'] * self.test_data[self.weight_nme]
1406
+ )
1407
+
1408
+ # 定义分箱函数
1409
+
1410
+ def _split_data(self, data, col_nme, wgt_nme, n_bins=10):
1411
+ data.sort_values(by=col_nme, ascending=True, inplace=True)
1412
+ data['cum_weight'] = data[wgt_nme].cumsum()
1413
+ w_sum = data[wgt_nme].sum()
1414
+ data.loc[:, 'bins'] = np.floor(
1415
+ data['cum_weight']*float(n_bins)/w_sum)
1416
+ data.loc[(data['bins'] == n_bins), 'bins'] = n_bins-1
1417
+ return data.groupby(['bins'], observed=True).sum(numeric_only=True)
1418
+
1419
+ # 定义Lift Chart绘制数据集函数
1420
+ def _plot_data_lift(self,
1421
+ pred_list, w_pred_list,
1422
+ w_act_list, weight_list, n_bins=10):
1423
+ lift_data = pd.DataFrame()
1424
+ lift_data.loc[:, 'pred'] = pred_list
1425
+ lift_data.loc[:, 'w_pred'] = w_pred_list
1426
+ lift_data.loc[:, 'act'] = w_act_list
1427
+ lift_data.loc[:, 'weight'] = weight_list
1428
+ plot_data = self._split_data(
1429
+ lift_data, 'pred', 'weight', n_bins)
1430
+ plot_data['exp_v'] = plot_data['w_pred'] / plot_data['weight']
1431
+ plot_data['act_v'] = plot_data['act'] / plot_data['weight']
1432
+ plot_data.reset_index(inplace=True)
1433
+ return plot_data
1434
+
1435
+ # 定义lift曲线绘制函数
1436
+ def plot_lift(self, model_label, pred_nme, n_bins=10):
1437
+ # 绘制建模集上结果
1438
+ figpos_list = [121, 122]
1439
+ plot_dict = {
1440
+ 121: self.train_data,
1441
+ 122: self.test_data
1442
+ }
1443
+ name_list = {
1444
+ 121: 'Train Data',
1445
+ 122: 'Test Data'
1446
+ }
1447
+ if model_label == 'Xgboost':
1448
+ pred_nme = 'pred_xgb'
1449
+ elif model_label == 'ResNet':
1450
+ pred_nme = 'pred_resn'
1451
+ elif model_label == 'FTTransformer':
1452
+ pred_nme = 'pred_ft'
1453
+
1454
+ fig = plt.figure(figsize=(11, 5))
1455
+ for figpos in figpos_list:
1456
+ plot_data = self._plot_data_lift(
1457
+ plot_dict[figpos][pred_nme].values,
1458
+ plot_dict[figpos]['w_'+pred_nme].values,
1459
+ plot_dict[figpos]['w_act'].values,
1460
+ plot_dict[figpos][self.weight_nme].values,
1461
+ n_bins)
1462
+ ax = fig.add_subplot(figpos)
1463
+ ax.plot(plot_data.index, plot_data['act_v'],
1464
+ label='Actual', color='red')
1465
+ ax.plot(plot_data.index, plot_data['exp_v'],
1466
+ label='Predicted', color='blue')
1467
+ ax.set_title(
1468
+ 'Lift Chart on %s' % name_list[figpos], fontsize=8)
1469
+ plt.xticks(plot_data.index,
1470
+ plot_data.index,
1471
+ rotation=90, fontsize=6)
1472
+ plt.yticks(fontsize=6)
1473
+ plt.legend(loc='upper left',
1474
+ fontsize=5, frameon=False)
1475
+ plt.margins(0.05)
1476
+ ax2 = ax.twinx()
1477
+ ax2.bar(plot_data.index, plot_data['weight'],
1478
+ alpha=0.5, color='seagreen',
1479
+ label='Earned Exposure')
1480
+ plt.yticks(fontsize=6)
1481
+ plt.legend(loc='upper right',
1482
+ fontsize=5, frameon=False)
1483
+ plt.subplots_adjust(wspace=0.3)
1484
+ save_path = os.path.join(
1485
+ os.getcwd(), 'plot', f'01_{self.model_nme}_{model_label}_lift.png')
1486
+ ensure_parent_dir(save_path)
1487
+ plt.savefig(save_path, dpi=300)
1488
+ plt.show()
1489
+ plt.close(fig)
1490
+
1491
+ # 定义Double Lift Chart绘制数据集函数
1492
+ def _plot_data_dlift(self,
1493
+ pred_list_model1, pred_list_model2,
1494
+ w_list, w_act_list, n_bins=10):
1495
+ lift_data = pd.DataFrame()
1496
+ lift_data.loc[:, 'pred1'] = pred_list_model1
1497
+ lift_data.loc[:, 'pred2'] = pred_list_model2
1498
+ lift_data.loc[:, 'diff_ly'] = lift_data['pred1'] / lift_data['pred2']
1499
+ lift_data.loc[:, 'act'] = w_act_list
1500
+ lift_data.loc[:, 'weight'] = w_list
1501
+ plot_data = self._split_data(lift_data, 'diff_ly', 'weight', n_bins)
1502
+ plot_data['exp_v1'] = plot_data['pred1'] / plot_data['act']
1503
+ plot_data['exp_v2'] = plot_data['pred2'] / plot_data['act']
1504
+ plot_data['act_v'] = plot_data['act'] / plot_data['act']
1505
+ plot_data.reset_index(inplace=True)
1506
+ return plot_data
1507
+
1508
+ # 定义绘制Double Lift Chart函数
1509
+ def plot_dlift(self, model_comp=['xgb', 'resn'], n_bins=10):
1510
+ # 指标名称
1511
+ # xgb = 'Xgboost'
1512
+ # resn = 'ResNet'
1513
+ # ft = 'FTTransformer'
1514
+ figpos_list = [121, 122]
1515
+ plot_dict = {
1516
+ 121: self.train_data,
1517
+ 122: self.test_data
1518
+ }
1519
+ name_list = {
1520
+ 121: 'Train Data',
1521
+ 122: 'Test Data'
1522
+ }
1523
+ fig = plt.figure(figsize=(11, 5))
1524
+ for figpos in figpos_list:
1525
+ plot_data = self._plot_data_dlift(
1526
+ plot_dict[figpos]['w_pred_'+model_comp[0]].values,
1527
+ plot_dict[figpos]['w_pred_'+model_comp[1]].values,
1528
+ plot_dict[figpos][self.weight_nme].values,
1529
+ plot_dict[figpos]['w_act'].values,
1530
+ n_bins)
1531
+ ax = fig.add_subplot(figpos)
1532
+ tt1 = 'Xgboost'
1533
+ tt2 = 'ResNet'
1534
+ ax.plot(plot_data.index, plot_data['act_v'],
1535
+ label='Actual', color='red')
1536
+ ax.plot(plot_data.index, plot_data['exp_v1'],
1537
+ label=tt1, color='blue')
1538
+ ax.plot(plot_data.index, plot_data['exp_v2'],
1539
+ label=tt2, color='black')
1540
+ ax.set_title(
1541
+ 'Double Lift Chart on %s' % name_list[figpos], fontsize=8)
1542
+ plt.xticks(plot_data.index,
1543
+ plot_data.index,
1544
+ rotation=90, fontsize=6)
1545
+ plt.xlabel('%s / %s' % (tt1, tt2), fontsize=6)
1546
+ plt.yticks(fontsize=6)
1547
+ plt.legend(loc='upper left',
1548
+ fontsize=5, frameon=False)
1549
+ plt.margins(0.1)
1550
+ plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8)
1551
+ ax2 = ax.twinx()
1552
+ ax2.bar(plot_data.index, plot_data['weight'],
1553
+ alpha=0.5, color='seagreen',
1554
+ label='Earned Exposure')
1555
+ plt.yticks(fontsize=6)
1556
+ plt.legend(loc='upper right',
1557
+ fontsize=5, frameon=False)
1558
+ plt.subplots_adjust(wspace=0.3)
1559
+ save_path = os.path.join(
1560
+ os.getcwd(), 'plot', f'02_{self.model_nme}_dlift.png')
1561
+ ensure_parent_dir(save_path)
1562
+ plt.savefig(save_path, dpi=300)
1563
+ plt.show()
1564
+ plt.close(fig)
1565
+
1566
+ # 保存模型
1567
+
1568
+ def save_model(self, model_name=None):
1569
+
1570
+ # model_name 可以是:
1571
+ # - None: 保存全部可用模型
1572
+ # - 'xgb': 只保存 Xgboost
1573
+ # - 'resn': 只保存 ResNet
1574
+ # - 'ft': 只保存 FT-Transformer
1575
+
1576
+ model_dir = os.path.join(os.getcwd(), 'model')
1577
+ if not os.path.exists(model_dir):
1578
+ os.makedirs(model_dir)
1579
+
1580
+ save_path_xgb = os.path.join(
1581
+ model_dir, f'01_{self.model_nme}_Xgboost.pkl'
1582
+ )
1583
+ save_path_resn = os.path.join(
1584
+ model_dir, f'01_{self.model_nme}_ResNet.pth'
1585
+ )
1586
+ save_path_ft = os.path.join(
1587
+ model_dir, f'01_{self.model_nme}_FTTransformer.pth'
1588
+ )
1589
+
1590
+ # 保存 XGBoost
1591
+ if model_name in (None, 'xgb'):
1592
+ if hasattr(self, 'xgb_best'):
1593
+ joblib.dump(self.xgb_best, save_path_xgb)
1594
+ else:
1595
+ print("[save_model] Warning: xgb_best 不存在,未保存 Xgboost 模型。")
1596
+
1597
+ # 保存 ResNet(只保存核心网络的 state_dict)
1598
+ if model_name in (None, 'resn'):
1599
+ if hasattr(self, 'resn_best'):
1600
+ torch.save(self.resn_best.resnet.state_dict(), save_path_resn)
1601
+ else:
1602
+ print("[save_model] Warning: resn_best 不存在,未保存 ResNet 模型。")
1603
+
1604
+ # 保存 FT-Transformer(直接保存整个 sklearn 风格 wrapper,方便恢复结构和参数)
1605
+ if model_name in (None, 'ft'):
1606
+ if hasattr(self, 'ft_best'):
1607
+ # 这里直接保存整个对象,包含结构和参数、best 超参等
1608
+ torch.save(self.ft_best, save_path_ft)
1609
+ else:
1610
+ print("[save_model] Warning: ft_best 不存在,未保存 FT-Transformer 模型。")
1611
+
1612
+ def load_model(self, model_name=None):
1613
+ # model_name 可以是:
1614
+ # - None: 加载全部能找到的模型
1615
+ # - 'xgb': 只加载 Xgboost
1616
+ # - 'resn': 只加载 ResNet
1617
+ # - 'ft': 只加载 FT-Transformer
1618
+
1619
+ model_dir = os.path.join(os.getcwd(), 'model')
1620
+ save_path_xgb = os.path.join(
1621
+ model_dir, f'01_{self.model_nme}_Xgboost.pkl'
1622
+ )
1623
+ save_path_resn = os.path.join(
1624
+ model_dir, f'01_{self.model_nme}_ResNet.pth'
1625
+ )
1626
+ save_path_ft = os.path.join(
1627
+ model_dir, f'01_{self.model_nme}_FTTransformer.pth'
1628
+ )
1629
+
1630
+ # 加载 XGBoost
1631
+ if model_name in (None, 'xgb'):
1632
+ if os.path.exists(save_path_xgb):
1633
+ self.xgb_load = joblib.load(save_path_xgb)
1634
+ else:
1635
+ print(
1636
+ f"[load_model] Warning: 未找到 Xgboost 模型文件:{save_path_xgb}")
1637
+
1638
+ # 加载 ResNet(重新构建 wrapper,然后加载 state_dict)
1639
+ if model_name in (None, 'resn'):
1640
+ if os.path.exists(save_path_resn):
1641
+ self.resn_load = ResNetSklearn(
1642
+ model_nme=self.model_nme,
1643
+ input_dim=self.train_oht_scl_data[self.var_nmes].shape[1]
1644
+ )
1645
+ state_dict = torch.load(
1646
+ save_path_resn, map_location=self.resn_load.device)
1647
+ self.resn_load.resnet.load_state_dict(state_dict)
1648
+ else:
1649
+ print(
1650
+ f"[load_model] Warning: 未找到 ResNet 模型文件:{save_path_resn}")
1651
+
1652
+ # 加载 FT-Transformer(直接反序列化 sklearn 风格 wrapper)
1653
+ if model_name in (None, 'ft'):
1654
+ if os.path.exists(save_path_ft):
1655
+ # 统一用 CPU 先加载,再按当前环境迁移
1656
+ ft_loaded = torch.load(save_path_ft, map_location='cpu')
1657
+ # 根据当前环境设置 device,并把内部 core 模型迁到对应 device
1658
+ if torch.cuda.is_available():
1659
+ ft_loaded.device = torch.device('cuda')
1660
+ elif torch.backends.mps.is_available():
1661
+ ft_loaded.device = torch.device('mps')
1662
+ else:
1663
+ ft_loaded.device = torch.device('cpu')
1664
+ ft_loaded.ft.to(ft_loaded.device)
1665
+
1666
+ self.ft_load = ft_loaded
1667
+ else:
1668
+ print(
1669
+ f"[load_model] Warning: 未找到 FT-Transformer 模型文件:{save_path_ft}")
1670
+
1671
+ def _build_ft_shap_matrix(self, data: pd.DataFrame) -> np.ndarray:
1672
+
1673
+ # 将原始特征 DataFrame (包含 self.factor_nmes) 转成
1674
+ # 纯数值矩阵: 数值列为 float64,类别列为整数 code(float64 存储)。
1675
+ # 列顺序与 self.factor_nmes 保持一致。
1676
+
1677
+ matrices = []
1678
+
1679
+ for col in self.factor_nmes:
1680
+ s = data[col]
1681
+
1682
+ if col in self.cate_list:
1683
+ # 类别列:按训练时的类别全集编码
1684
+ cats = pd.Categorical(
1685
+ s,
1686
+ categories=self.cat_categories_for_shap[col]
1687
+ )
1688
+ # cats.codes 是一个 Index / ndarray,用 np.asarray 包一下再 reshape
1689
+ codes = np.asarray(cats.codes, dtype=np.float64).reshape(-1, 1)
1690
+ matrices.append(codes)
1691
+ else:
1692
+ # 数值列:转成 Series -> numpy -> reshape
1693
+ vals = pd.to_numeric(s, errors="coerce")
1694
+ arr = vals.to_numpy(dtype=np.float64, copy=True).reshape(-1, 1)
1695
+ matrices.append(arr)
1696
+
1697
+ X_mat = np.concatenate(matrices, axis=1) # (N, F)
1698
+ return X_mat
1699
+
1700
+ def _decode_ft_shap_matrix_to_df(self, X_mat: np.ndarray) -> pd.DataFrame:
1701
+
1702
+ # 将 SHAP 的数值矩阵 (N, F) 还原为原始特征 DataFrame,
1703
+ # 数值列为 float,类别列还原为 pandas 的 category 类型,
1704
+ # 以便兼容 enable_categorical=True 的 XGBoost 和 FT-Transformer 的输入。
1705
+ # 列顺序 = self.factor_nmes
1706
+
1707
+ data_dict = {}
1708
+
1709
+ for j, col in enumerate(self.factor_nmes):
1710
+ col_vals = X_mat[:, j]
1711
+
1712
+ if col in self.cate_list:
1713
+ cats = self.cat_categories_for_shap[col]
1714
+
1715
+ # SHAP 会扰动成小数,这里 round 回整数 code
1716
+ codes = np.round(col_vals).astype(int)
1717
+ # 限制在 [-1, len(cats)-1]
1718
+ codes = np.clip(codes, -1, len(cats) - 1)
1719
+
1720
+ # 使用 pandas.Categorical.from_codes:
1721
+ # - codes = -1 被当成缺失 (NaN)
1722
+ # - 其他索引映射到 cats 中对应的类别
1723
+ cat_series = pd.Categorical.from_codes(
1724
+ codes,
1725
+ categories=cats
1726
+ )
1727
+ # 存的是 Categorical 类型,而不是 object
1728
+ data_dict[col] = cat_series
1729
+ else:
1730
+ # 数值列:直接 float
1731
+ data_dict[col] = col_vals.astype(float)
1732
+
1733
+ df = pd.DataFrame(data_dict, columns=self.factor_nmes)
1734
+
1735
+ # 再保险:确保所有类别列 dtype 真的是 category
1736
+ for col in self.cate_list:
1737
+ if col in df.columns:
1738
+ df[col] = df[col].astype("category")
1739
+ return df
1740
+
1741
+ # ========= XGBoost SHAP =========
1742
+
1743
+ def compute_shap_xgb(self, n_background: int = 500,
1744
+ n_samples: int = 200,
1745
+ on_train: bool = True):
1746
+ # 使用 KernelExplainer 计算 XGBoost 的 SHAP 值(黑盒方式)。
1747
+ #
1748
+ # - 对 SHAP:输入是一份纯数值矩阵:
1749
+ # * 数值特征:float64
1750
+ # * 类别特征:用 _build_ft_shap_matrix 编码后的整数 code(float64)
1751
+ # - 对模型:仍然用原始 DataFrame + xgb_best.predict(...)
1752
+
1753
+ if not hasattr(self, "xgb_best"):
1754
+ raise RuntimeError("请先运行 bayesopt_xgb() 训练好 self.xgb_best")
1755
+
1756
+ # 1) 选择数据源:训练集 or 测试集(原始特征空间)
1757
+ data = self.train_data if on_train else self.test_data
1758
+ X_raw = data[self.factor_nmes]
1759
+
1760
+ # 2) 构造背景矩阵(用和 FT 一样的数值编码)
1761
+ background_raw = X_raw.sample(
1762
+ min(len(X_raw), n_background),
1763
+ random_state=self.rand_seed
1764
+ )
1765
+ background_mat = self._build_ft_shap_matrix(
1766
+ background_raw
1767
+ ).astype(np.float64, copy=True)
1768
+
1769
+ # 3) 定义黑盒预测函数:数值矩阵 -> DataFrame -> xgb_best.predict
1770
+ def f_predict(x_mat: np.ndarray) -> np.ndarray:
1771
+ # 把编码矩阵还原成原始 DataFrame(数值+类别)
1772
+ df_input = self._decode_ft_shap_matrix_to_df(x_mat)
1773
+ # 注意:这里用的是 self.xgb_best.predict,和你训练/预测时一致
1774
+ y_pred = self.xgb_best.predict(df_input)
1775
+ return y_pred
1776
+
1777
+ explainer = shap.KernelExplainer(f_predict, background_mat)
1778
+
1779
+ # 4) 要解释的样本:原始特征 + 数值编码
1780
+ X_explain_raw = X_raw.sample(
1781
+ min(len(X_raw), n_samples),
1782
+ random_state=self.rand_seed
1783
+ )
1784
+ X_explain_mat = self._build_ft_shap_matrix(
1785
+ X_explain_raw
1786
+ ).astype(np.float64, copy=True)
1787
+
1788
+ # 5) 计算 SHAP 值(注意用 nsamples='auto' 控制复杂度)
1789
+ shap_values = explainer.shap_values(X_explain_mat, nsamples="auto")
1790
+
1791
+ # 6) 保存结果:
1792
+ # - shap_values:数值编码空间,对应 factor_nmes 的每一列
1793
+ # - X_explain_raw:原始 DataFrame,方便画图时显示真实类别名
1794
+ self.shap_xgb = {
1795
+ "explainer": explainer,
1796
+ "X_explain": X_explain_raw,
1797
+ "shap_values": shap_values,
1798
+ "base_value": explainer.expected_value,
1799
+ }
1800
+ return self.shap_xgb
1801
+ # ========= ResNet SHAP =========
1802
+
1803
+ def _resn_predict_wrapper(self, X_np: np.ndarray) -> np.ndarray:
1804
+ # 用于 SHAP 的 ResNet 预测包装。
1805
+ # X_np: numpy array, shape = (N, n_features), 列顺序对应 self.var_nmes
1806
+ X_df = pd.DataFrame(X_np, columns=self.var_nmes)
1807
+ return self.resn_best.predict(X_df)
1808
+
1809
+ def compute_shap_resn(self, n_background: int = 500,
1810
+ n_samples: int = 200,
1811
+ on_train: bool = True):
1812
+
1813
+ # 使用 KernelExplainer 计算 ResNet 的 SHAP 值。
1814
+ # 解释空间:已 one-hot & 标准化后的特征 self.var_nmes。
1815
+
1816
+ if not hasattr(self, 'resn_best'):
1817
+ raise RuntimeError("请先运行 bayesopt_resnet() 训练好 resn_best")
1818
+
1819
+ # 选择数据集(已 one-hot & 标准化)
1820
+ data = self.train_oht_scl_data if on_train else self.test_oht_scl_data
1821
+ X = data[self.var_nmes]
1822
+
1823
+ # 背景样本:float64 numpy
1824
+ background_df = X.sample(
1825
+ min(len(X), n_background),
1826
+ random_state=self.rand_seed
1827
+ )
1828
+ background_np = background_df.to_numpy(dtype=np.float64, copy=True)
1829
+
1830
+ # 黑盒预测函数
1831
+ def f_predict(x):
1832
+ return self._resn_predict_wrapper(x)
1833
+
1834
+ explainer = shap.KernelExplainer(f_predict, background_np)
1835
+
1836
+ # 要解释的样本
1837
+ X_explain_df = X.sample(
1838
+ min(len(X), n_samples),
1839
+ random_state=self.rand_seed
1840
+ )
1841
+ X_explain_np = X_explain_df.to_numpy(dtype=np.float64, copy=True)
1842
+
1843
+ shap_values = explainer.shap_values(X_explain_np, nsamples="auto")
1844
+
1845
+ self.shap_resn = {
1846
+ "explainer": explainer,
1847
+ "X_explain": X_explain_df, # DataFrame: 用于画图(有列名)
1848
+ "shap_values": shap_values, # numpy: (n_samples, n_features)
1849
+ "base_value": explainer.expected_value,
1850
+ }
1851
+ return self.shap_resn
1852
+
1853
+ # ========= FT-Transformer SHAP =========
1854
+
1855
+ def _ft_shap_predict_wrapper(self, X_mat: np.ndarray) -> np.ndarray:
1856
+
1857
+ # SHAP 的预测包装:
1858
+ # 数值矩阵 -> 还原为原始特征 DataFrame -> 调用 ft_best.predict
1859
+
1860
+ df_input = self._decode_ft_shap_matrix_to_df(X_mat)
1861
+ y_pred = self.ft_best.predict(df_input)
1862
+ return y_pred
1863
+
1864
+ def compute_shap_ft(self, n_background: int = 500,
1865
+ n_samples: int = 200,
1866
+ on_train: bool = True):
1867
+
1868
+ # 使用 KernelExplainer 计算 FT-Transformer 的 SHAP 值。
1869
+ # 解释空间:数值+类别code 的混合数值矩阵(float64),
1870
+ # 但对外展示时仍使用原始特征名/取值(X_explain)。
1871
+
1872
+ if not hasattr(self, "ft_best"):
1873
+ raise RuntimeError("请先运行 bayesopt_ft() 训练好 ft_best")
1874
+
1875
+ # 选择数据源(原始特征空间)
1876
+ data = self.train_data if on_train else self.test_data
1877
+ X_raw = data[self.factor_nmes]
1878
+
1879
+ # 背景矩阵
1880
+ background_raw = X_raw.sample(
1881
+ min(len(X_raw), n_background),
1882
+ random_state=self.rand_seed
1883
+ )
1884
+ background_mat = self._build_ft_shap_matrix(
1885
+ background_raw
1886
+ ).astype(np.float64, copy=True)
1887
+
1888
+ # 黑盒预测函数(数值矩阵 → DataFrame → FT 模型)
1889
+ def f_predict(x):
1890
+ return self._ft_shap_predict_wrapper(x)
1891
+
1892
+ explainer = shap.KernelExplainer(f_predict, background_mat)
1893
+
1894
+ # 要解释的样本(原始特征空间)
1895
+ X_explain_raw = X_raw.sample(
1896
+ min(len(X_raw), n_samples),
1897
+ random_state=self.rand_seed
1898
+ )
1899
+ X_explain_mat = self._build_ft_shap_matrix(
1900
+ X_explain_raw
1901
+ ).astype(np.float64, copy=True)
1902
+
1903
+ shap_values = explainer.shap_values(X_explain_mat, nsamples="auto")
1904
+
1905
+ self.shap_ft = {
1906
+ "explainer": explainer,
1907
+ "X_explain": X_explain_raw, # 原始特征 DataFrame,用来画图
1908
+ "shap_values": shap_values, # numpy: (n_samples, n_features)
1909
+ "base_value": explainer.expected_value,
1910
+ }
1911
+ return self.shap_ft