ins-pricing 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +60 -0
- ins_pricing/__init__.py +102 -0
- ins_pricing/governance/README.md +18 -0
- ins_pricing/governance/__init__.py +20 -0
- ins_pricing/governance/approval.py +93 -0
- ins_pricing/governance/audit.py +37 -0
- ins_pricing/governance/registry.py +99 -0
- ins_pricing/governance/release.py +159 -0
- ins_pricing/modelling/BayesOpt.py +146 -0
- ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
- ins_pricing/modelling/BayesOpt_entry.py +575 -0
- ins_pricing/modelling/BayesOpt_incremental.py +731 -0
- ins_pricing/modelling/Explain_Run.py +36 -0
- ins_pricing/modelling/Explain_entry.py +539 -0
- ins_pricing/modelling/Pricing_Run.py +36 -0
- ins_pricing/modelling/README.md +33 -0
- ins_pricing/modelling/__init__.py +44 -0
- ins_pricing/modelling/bayesopt/__init__.py +98 -0
- ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
- ins_pricing/modelling/bayesopt/core.py +1476 -0
- ins_pricing/modelling/bayesopt/models.py +2196 -0
- ins_pricing/modelling/bayesopt/trainers.py +2446 -0
- ins_pricing/modelling/bayesopt/utils.py +1021 -0
- ins_pricing/modelling/cli_common.py +136 -0
- ins_pricing/modelling/explain/__init__.py +55 -0
- ins_pricing/modelling/explain/gradients.py +334 -0
- ins_pricing/modelling/explain/metrics.py +176 -0
- ins_pricing/modelling/explain/permutation.py +155 -0
- ins_pricing/modelling/explain/shap_utils.py +146 -0
- ins_pricing/modelling/notebook_utils.py +284 -0
- ins_pricing/modelling/plotting/__init__.py +45 -0
- ins_pricing/modelling/plotting/common.py +63 -0
- ins_pricing/modelling/plotting/curves.py +572 -0
- ins_pricing/modelling/plotting/diagnostics.py +139 -0
- ins_pricing/modelling/plotting/geo.py +362 -0
- ins_pricing/modelling/plotting/importance.py +121 -0
- ins_pricing/modelling/run_logging.py +133 -0
- ins_pricing/modelling/tests/conftest.py +8 -0
- ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing/modelling/tests/test_explain.py +56 -0
- ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing/modelling/tests/test_plotting.py +63 -0
- ins_pricing/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing/modelling/watchdog_run.py +211 -0
- ins_pricing/pricing/README.md +44 -0
- ins_pricing/pricing/__init__.py +27 -0
- ins_pricing/pricing/calibration.py +39 -0
- ins_pricing/pricing/data_quality.py +117 -0
- ins_pricing/pricing/exposure.py +85 -0
- ins_pricing/pricing/factors.py +91 -0
- ins_pricing/pricing/monitoring.py +99 -0
- ins_pricing/pricing/rate_table.py +78 -0
- ins_pricing/production/__init__.py +21 -0
- ins_pricing/production/drift.py +30 -0
- ins_pricing/production/monitoring.py +143 -0
- ins_pricing/production/scoring.py +40 -0
- ins_pricing/reporting/README.md +20 -0
- ins_pricing/reporting/__init__.py +11 -0
- ins_pricing/reporting/report_builder.py +72 -0
- ins_pricing/reporting/scheduler.py +45 -0
- ins_pricing/setup.py +41 -0
- ins_pricing v2/__init__.py +23 -0
- ins_pricing v2/governance/__init__.py +20 -0
- ins_pricing v2/governance/approval.py +93 -0
- ins_pricing v2/governance/audit.py +37 -0
- ins_pricing v2/governance/registry.py +99 -0
- ins_pricing v2/governance/release.py +159 -0
- ins_pricing v2/modelling/Explain_Run.py +36 -0
- ins_pricing v2/modelling/Pricing_Run.py +36 -0
- ins_pricing v2/modelling/__init__.py +151 -0
- ins_pricing v2/modelling/cli_common.py +141 -0
- ins_pricing v2/modelling/config.py +249 -0
- ins_pricing v2/modelling/config_preprocess.py +254 -0
- ins_pricing v2/modelling/core.py +741 -0
- ins_pricing v2/modelling/data_container.py +42 -0
- ins_pricing v2/modelling/explain/__init__.py +55 -0
- ins_pricing v2/modelling/explain/gradients.py +334 -0
- ins_pricing v2/modelling/explain/metrics.py +176 -0
- ins_pricing v2/modelling/explain/permutation.py +155 -0
- ins_pricing v2/modelling/explain/shap_utils.py +146 -0
- ins_pricing v2/modelling/features.py +215 -0
- ins_pricing v2/modelling/model_manager.py +148 -0
- ins_pricing v2/modelling/model_plotting.py +463 -0
- ins_pricing v2/modelling/models.py +2203 -0
- ins_pricing v2/modelling/notebook_utils.py +294 -0
- ins_pricing v2/modelling/plotting/__init__.py +45 -0
- ins_pricing v2/modelling/plotting/common.py +63 -0
- ins_pricing v2/modelling/plotting/curves.py +572 -0
- ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
- ins_pricing v2/modelling/plotting/geo.py +362 -0
- ins_pricing v2/modelling/plotting/importance.py +121 -0
- ins_pricing v2/modelling/run_logging.py +133 -0
- ins_pricing v2/modelling/tests/conftest.py +8 -0
- ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing v2/modelling/tests/test_explain.py +56 -0
- ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing v2/modelling/tests/test_plotting.py +63 -0
- ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing v2/modelling/trainers.py +2447 -0
- ins_pricing v2/modelling/utils.py +1020 -0
- ins_pricing v2/modelling/watchdog_run.py +211 -0
- ins_pricing v2/pricing/__init__.py +27 -0
- ins_pricing v2/pricing/calibration.py +39 -0
- ins_pricing v2/pricing/data_quality.py +117 -0
- ins_pricing v2/pricing/exposure.py +85 -0
- ins_pricing v2/pricing/factors.py +91 -0
- ins_pricing v2/pricing/monitoring.py +99 -0
- ins_pricing v2/pricing/rate_table.py +78 -0
- ins_pricing v2/production/__init__.py +21 -0
- ins_pricing v2/production/drift.py +30 -0
- ins_pricing v2/production/monitoring.py +143 -0
- ins_pricing v2/production/scoring.py +40 -0
- ins_pricing v2/reporting/__init__.py +11 -0
- ins_pricing v2/reporting/report_builder.py +72 -0
- ins_pricing v2/reporting/scheduler.py +45 -0
- ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
- ins_pricing v2/scripts/Explain_entry.py +545 -0
- ins_pricing v2/scripts/__init__.py +1 -0
- ins_pricing v2/scripts/train.py +568 -0
- ins_pricing v2/setup.py +55 -0
- ins_pricing v2/smoke_test.py +28 -0
- ins_pricing-0.1.6.dist-info/METADATA +78 -0
- ins_pricing-0.1.6.dist-info/RECORD +169 -0
- ins_pricing-0.1.6.dist-info/WHEEL +5 -0
- ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
- user_packages/__init__.py +105 -0
- user_packages legacy/BayesOpt.py +5659 -0
- user_packages legacy/BayesOpt_entry.py +513 -0
- user_packages legacy/BayesOpt_incremental.py +685 -0
- user_packages legacy/Pricing_Run.py +36 -0
- user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
- user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
- user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
- user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
- user_packages legacy/Try/BayesOpt legacy.py +3280 -0
- user_packages legacy/Try/BayesOpt.py +838 -0
- user_packages legacy/Try/BayesOptAll.py +1569 -0
- user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
- user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
- user_packages legacy/Try/BayesOptSearch.py +830 -0
- user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
- user_packages legacy/Try/BayesOptV1.py +1911 -0
- user_packages legacy/Try/BayesOptV10.py +2973 -0
- user_packages legacy/Try/BayesOptV11.py +3001 -0
- user_packages legacy/Try/BayesOptV12.py +3001 -0
- user_packages legacy/Try/BayesOptV2.py +2065 -0
- user_packages legacy/Try/BayesOptV3.py +2209 -0
- user_packages legacy/Try/BayesOptV4.py +2342 -0
- user_packages legacy/Try/BayesOptV5.py +2372 -0
- user_packages legacy/Try/BayesOptV6.py +2759 -0
- user_packages legacy/Try/BayesOptV7.py +2832 -0
- user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
- user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
- user_packages legacy/Try/BayesOptV9.py +2927 -0
- user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
- user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
- user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
- user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
- user_packages legacy/Try/xgbbayesopt.py +523 -0
- user_packages legacy/__init__.py +19 -0
- user_packages legacy/cli_common.py +124 -0
- user_packages legacy/notebook_utils.py +228 -0
- user_packages legacy/watchdog_run.py +202 -0
|
@@ -0,0 +1,1569 @@
|
|
|
1
|
+
# 数据在CPU和GPU之间传输会带来较大开销,但可以多CUDA流同时传输数据和计算,从而实现更大数据集的操作。
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
from random import sample
|
|
6
|
+
from re import X
|
|
7
|
+
from turtle import st
|
|
8
|
+
from uuid import RESERVED_FUTURE
|
|
9
|
+
import numpy as np # 1.26.2
|
|
10
|
+
import pandas as pd # 2.2.3
|
|
11
|
+
import torch # 1.10.1+cu111
|
|
12
|
+
import torch.nn as nn
|
|
13
|
+
import torch.nn.functional as F
|
|
14
|
+
import optuna # 4.3.0
|
|
15
|
+
import xgboost as xgb # 1.7.0
|
|
16
|
+
import matplotlib.pyplot as plt
|
|
17
|
+
import os
|
|
18
|
+
import joblib
|
|
19
|
+
import torch.utils.checkpoint as cp
|
|
20
|
+
import copy
|
|
21
|
+
|
|
22
|
+
from torch.utils.data import Dataset, DataLoader, TensorDataset
|
|
23
|
+
from torch.cuda.amp import autocast, GradScaler
|
|
24
|
+
from torch.nn.utils import clip_grad_norm_
|
|
25
|
+
from sklearn.model_selection import KFold, ShuffleSplit, cross_val_score # 1.2.2
|
|
26
|
+
from sklearn.preprocessing import StandardScaler
|
|
27
|
+
from sklearn.metrics import make_scorer, mean_tweedie_deviance
|
|
28
|
+
|
|
29
|
+
# 定义torch下tweedie deviance损失函数
|
|
30
|
+
# 参考:https://scikit-learn.org/stable/modules/model_evaluation.html#mean-poisson-gamma-and-tweedie-deviances
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def tweedie_loss(pred, target, p=1.5):
|
|
34
|
+
# Ensure predictions are positive for stability
|
|
35
|
+
eps = 1e-6
|
|
36
|
+
pred_clamped = torch.clamp(pred, min=eps)
|
|
37
|
+
# Compute Tweedie deviance components
|
|
38
|
+
if p == 1:
|
|
39
|
+
# Poisson case
|
|
40
|
+
term1 = target * torch.log(target / pred_clamped + eps)
|
|
41
|
+
term2 = -target + pred_clamped
|
|
42
|
+
term3 = 0
|
|
43
|
+
elif p == 0:
|
|
44
|
+
# Gaussian case
|
|
45
|
+
term1 = 0.5 * torch.pow(target - pred_clamped, 2)
|
|
46
|
+
term2 = 0
|
|
47
|
+
term3 = 0
|
|
48
|
+
elif p == 2:
|
|
49
|
+
# Gamma case
|
|
50
|
+
term1 = torch.log(pred_clamped / target + eps)
|
|
51
|
+
term2 = -target / pred_clamped + 1
|
|
52
|
+
term3 = 0
|
|
53
|
+
else:
|
|
54
|
+
term1 = torch.pow(target, 2 - p) / ((1 - p) * (2 - p))
|
|
55
|
+
term2 = target * torch.pow(pred_clamped, 1 - p) / (1 - p)
|
|
56
|
+
term3 = torch.pow(pred_clamped, 2 - p) / (2 - p)
|
|
57
|
+
# Tweedie negative log-likelihood (up to a constant)
|
|
58
|
+
return 2 * (term1 - term2 + term3)
|
|
59
|
+
|
|
60
|
+
# 定义分箱函数
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def split_data(data, col_nme, wgt_nme, n_bins=10):
|
|
64
|
+
data.sort_values(by=col_nme, ascending=True, inplace=True)
|
|
65
|
+
data['cum_weight'] = data[wgt_nme].cumsum()
|
|
66
|
+
w_sum = data[wgt_nme].sum()
|
|
67
|
+
data.loc[:, 'bins'] = np.floor(data['cum_weight'] * float(n_bins) / w_sum)
|
|
68
|
+
data.loc[(data['bins'] == n_bins), 'bins'] = n_bins - 1
|
|
69
|
+
return data.groupby(['bins'], observed=True).sum(numeric_only=True)
|
|
70
|
+
|
|
71
|
+
# 定义Lift Chart绘制函数
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def plot_lift_list(pred_model, w_pred_list, w_act_list,
|
|
75
|
+
weight_list, tgt_nme, n_bins=10,
|
|
76
|
+
fig_nme='Lift Chart'):
|
|
77
|
+
lift_data = pd.DataFrame()
|
|
78
|
+
lift_data.loc[:, 'pred'] = pred_model
|
|
79
|
+
lift_data.loc[:, 'w_pred'] = w_pred_list
|
|
80
|
+
lift_data.loc[:, 'act'] = w_act_list
|
|
81
|
+
lift_data.loc[:, 'weight'] = weight_list
|
|
82
|
+
plot_data = split_data(lift_data, 'pred', 'weight', n_bins)
|
|
83
|
+
plot_data['exp_v'] = plot_data['w_pred'] / plot_data['weight']
|
|
84
|
+
plot_data['act_v'] = plot_data['act'] / plot_data['weight']
|
|
85
|
+
plot_data.reset_index(inplace=True)
|
|
86
|
+
fig = plt.figure(figsize=(7, 5))
|
|
87
|
+
ax = fig.add_subplot(111)
|
|
88
|
+
ax.plot(plot_data.index, plot_data['act_v'],
|
|
89
|
+
label='Actual', color='red')
|
|
90
|
+
ax.plot(plot_data.index, plot_data['exp_v'],
|
|
91
|
+
label='Predicted', color='blue')
|
|
92
|
+
ax.set_title(
|
|
93
|
+
'Lift Chart of %s' % tgt_nme, fontsize=8)
|
|
94
|
+
plt.xticks(plot_data.index,
|
|
95
|
+
plot_data.index,
|
|
96
|
+
rotation=90, fontsize=6)
|
|
97
|
+
plt.yticks(fontsize=6)
|
|
98
|
+
plt.legend(loc='upper left',
|
|
99
|
+
fontsize=5, frameon=False)
|
|
100
|
+
plt.margins(0.05)
|
|
101
|
+
ax2 = ax.twinx()
|
|
102
|
+
ax2.bar(plot_data.index, plot_data['weight'],
|
|
103
|
+
alpha=0.5, color='seagreen',
|
|
104
|
+
label='Earned Exposure')
|
|
105
|
+
plt.yticks(fontsize=6)
|
|
106
|
+
plt.legend(loc='upper right',
|
|
107
|
+
fontsize=5, frameon=False)
|
|
108
|
+
plt.subplots_adjust(wspace=0.3)
|
|
109
|
+
save_path = os.path.join(
|
|
110
|
+
os.getcwd(), 'plot', f'05_{tgt_nme}_{fig_nme}.png')
|
|
111
|
+
plt.savefig(save_path, dpi=300)
|
|
112
|
+
plt.close(fig)
|
|
113
|
+
|
|
114
|
+
# 定义Double Lift Chart绘制函数
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def plot_dlift_list(pred_model_1, pred_model_2,
|
|
118
|
+
model_nme_1, model_nme_2,
|
|
119
|
+
tgt_nme,
|
|
120
|
+
w_list, w_act_list, n_bins=10,
|
|
121
|
+
fig_nme='Double Lift Chart'):
|
|
122
|
+
lift_data = pd.DataFrame()
|
|
123
|
+
lift_data.loc[:, 'pred1'] = pred_model_1
|
|
124
|
+
lift_data.loc[:, 'pred2'] = pred_model_2
|
|
125
|
+
lift_data.loc[:, 'diff_ly'] = lift_data['pred1'] / lift_data['pred2']
|
|
126
|
+
lift_data.loc[:, 'act'] = w_act_list
|
|
127
|
+
lift_data.loc[:, 'weight'] = w_list
|
|
128
|
+
lift_data.loc[:, 'w_pred1'] = lift_data['pred1'] * lift_data['weight']
|
|
129
|
+
lift_data.loc[:, 'w_pred2'] = lift_data['pred2'] * lift_data['weight']
|
|
130
|
+
plot_data = split_data(lift_data, 'diff_ly', 'weight', n_bins)
|
|
131
|
+
plot_data['exp_v1'] = plot_data['w_pred1'] / plot_data['act']
|
|
132
|
+
plot_data['exp_v2'] = plot_data['w_pred2'] / plot_data['act']
|
|
133
|
+
plot_data['act_v'] = plot_data['act']/plot_data['act']
|
|
134
|
+
plot_data.reset_index(inplace=True)
|
|
135
|
+
fig = plt.figure(figsize=(7, 5))
|
|
136
|
+
ax = fig.add_subplot(111)
|
|
137
|
+
ax.plot(plot_data.index, plot_data['act_v'],
|
|
138
|
+
label='Actual', color='red')
|
|
139
|
+
ax.plot(plot_data.index, plot_data['exp_v1'],
|
|
140
|
+
label=model_nme_1, color='blue')
|
|
141
|
+
ax.plot(plot_data.index, plot_data['exp_v2'],
|
|
142
|
+
label=model_nme_2, color='black')
|
|
143
|
+
ax.set_title(
|
|
144
|
+
'Double Lift Chart of %s' % tgt_nme, fontsize=8)
|
|
145
|
+
plt.xticks(plot_data.index,
|
|
146
|
+
plot_data.index,
|
|
147
|
+
rotation=90, fontsize=6)
|
|
148
|
+
plt.xlabel('%s / %s' % (model_nme_1, model_nme_2), fontsize=6)
|
|
149
|
+
plt.yticks(fontsize=6)
|
|
150
|
+
plt.legend(loc='upper left',
|
|
151
|
+
fontsize=5, frameon=False)
|
|
152
|
+
plt.margins(0.1)
|
|
153
|
+
plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8)
|
|
154
|
+
ax2 = ax.twinx()
|
|
155
|
+
ax2.bar(plot_data.index, plot_data['weight'],
|
|
156
|
+
alpha=0.5, color='seagreen',
|
|
157
|
+
label='Earned Exposure')
|
|
158
|
+
plt.yticks(fontsize=6)
|
|
159
|
+
plt.legend(loc='upper right',
|
|
160
|
+
fontsize=5, frameon=False)
|
|
161
|
+
plt.subplots_adjust(wspace=0.3)
|
|
162
|
+
save_path = os.path.join(
|
|
163
|
+
os.getcwd(), 'plot', f'06_{tgt_nme}_{fig_nme}.png')
|
|
164
|
+
plt.savefig(save_path, dpi=300)
|
|
165
|
+
plt.close(fig)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# 开始定义ResNet模型结构
|
|
169
|
+
# 残差块:两层线性 + ReLU + 残差连接
|
|
170
|
+
# ResBlock 继承 nn.Module
|
|
171
|
+
class ResBlock(nn.Module):
|
|
172
|
+
def __init__(self, dim: int, dropout: float = 0.1,
|
|
173
|
+
use_layernorm: bool = False, residual_scale: float = 0.1
|
|
174
|
+
):
|
|
175
|
+
super().__init__()
|
|
176
|
+
self.use_layernorm = use_layernorm
|
|
177
|
+
|
|
178
|
+
if use_layernorm:
|
|
179
|
+
Norm = nn.LayerNorm # 对最后一维做归一化
|
|
180
|
+
else:
|
|
181
|
+
def Norm(d): return nn.BatchNorm1d(d) # 保留一个开关,想试 BN 时也能用
|
|
182
|
+
|
|
183
|
+
self.norm1 = Norm(dim)
|
|
184
|
+
self.fc1 = nn.Linear(dim, dim, bias=True)
|
|
185
|
+
self.act = nn.ReLU(inplace=True)
|
|
186
|
+
self.dropout = nn.Dropout(dropout) if dropout > 0.0 else nn.Identity()
|
|
187
|
+
self.norm2 = Norm(dim)
|
|
188
|
+
self.fc2 = nn.Linear(dim, dim, bias=True)
|
|
189
|
+
|
|
190
|
+
# 残差缩放,防止一开始就把主干搞炸
|
|
191
|
+
self.res_scale = nn.Parameter(
|
|
192
|
+
torch.tensor(residual_scale, dtype=torch.float32)
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def forward(self, x):
|
|
196
|
+
# pre-activation
|
|
197
|
+
out = self.norm1(x)
|
|
198
|
+
out = self.fc1(out)
|
|
199
|
+
out = self.act(out)
|
|
200
|
+
out = self.dropout(out)
|
|
201
|
+
out = self.norm2(out)
|
|
202
|
+
out = self.fc2(out)
|
|
203
|
+
# 残差缩放再相加
|
|
204
|
+
return F.relu(x + self.res_scale * out)
|
|
205
|
+
|
|
206
|
+
# ResNetSequential 继承 nn.Module,定义整个网络结构
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class ResNetSequential(nn.Module):
|
|
210
|
+
# 输入: (batch, input_dim)
|
|
211
|
+
# 结构:
|
|
212
|
+
# fc1 -> LN/Bn -> ReLU -> ResBlock * block_num -> fc_out -> Softplus
|
|
213
|
+
|
|
214
|
+
def __init__(self, input_dim: int, hidden_dim: int = 64, block_num: int = 2,
|
|
215
|
+
use_layernorm: bool = True, dropout: float = 0.1,
|
|
216
|
+
residual_scale: float = 0.1):
|
|
217
|
+
super(ResNetSequential, self).__init__()
|
|
218
|
+
|
|
219
|
+
self.net = nn.Sequential()
|
|
220
|
+
self.net.add_module('fc1', nn.Linear(input_dim, hidden_dim))
|
|
221
|
+
|
|
222
|
+
if use_layernorm:
|
|
223
|
+
self.net.add_module('norm1', nn.LayerNorm(hidden_dim))
|
|
224
|
+
else:
|
|
225
|
+
self.net.add_module('norm1', nn.BatchNorm1d(hidden_dim))
|
|
226
|
+
|
|
227
|
+
self.net.add_module('relu1', nn.ReLU(inplace=True))
|
|
228
|
+
|
|
229
|
+
# 多个残差块
|
|
230
|
+
for i in range(block_num):
|
|
231
|
+
self.net.add_module(
|
|
232
|
+
f'ResBlk_{i+1}',
|
|
233
|
+
ResBlock(
|
|
234
|
+
hidden_dim,
|
|
235
|
+
dropout=dropout,
|
|
236
|
+
use_layernorm=use_layernorm,
|
|
237
|
+
residual_scale=residual_scale)
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
self.net.add_module('fc_out', nn.Linear(hidden_dim, 1))
|
|
241
|
+
self.net.add_module('softplus', nn.Softplus())
|
|
242
|
+
|
|
243
|
+
def forward(self, x):
|
|
244
|
+
return self.net(x)
|
|
245
|
+
|
|
246
|
+
# 定义ResNet模型的Scikit-Learn接口类
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class ResNetScikitLearn(nn.Module):
|
|
250
|
+
def __init__(self, model_nme: str, input_dim: int, hidden_dim: int = 64,
|
|
251
|
+
block_num: int = 2, batch_num: int = 100, epochs: int = 100,
|
|
252
|
+
tweedie_power: float = 1.5, learning_rate: float = 0.01, patience: int = 10,
|
|
253
|
+
use_layernorm: bool = True, dropout: float = 0.1,
|
|
254
|
+
residual_scale: float = 0.1):
|
|
255
|
+
super(ResNetScikitLearn, self).__init__()
|
|
256
|
+
|
|
257
|
+
self.input_dim = input_dim
|
|
258
|
+
self.hidden_dim = hidden_dim
|
|
259
|
+
self.block_num = block_num
|
|
260
|
+
self.batch_num = batch_num
|
|
261
|
+
self.epochs = epochs
|
|
262
|
+
self.model_nme = model_nme
|
|
263
|
+
self.learning_rate = learning_rate
|
|
264
|
+
self.patience = patience
|
|
265
|
+
self.use_layernorm = use_layernorm
|
|
266
|
+
self.dropout = dropout
|
|
267
|
+
self.residual_scale = residual_scale
|
|
268
|
+
|
|
269
|
+
# 设备选择:cuda > mps > cpu
|
|
270
|
+
if torch.cuda.is_available():
|
|
271
|
+
self.device = torch.device('cuda')
|
|
272
|
+
elif torch.backends.mps.is_available():
|
|
273
|
+
self.device = torch.device('mps')
|
|
274
|
+
else:
|
|
275
|
+
self.device = torch.device('cpu')
|
|
276
|
+
|
|
277
|
+
# Tweedie power
|
|
278
|
+
if 'f' in self.model_nme:
|
|
279
|
+
self.tw_power = 1
|
|
280
|
+
elif 's' in self.model_nme:
|
|
281
|
+
self.tw_power = 2
|
|
282
|
+
else:
|
|
283
|
+
self.tw_power = tweedie_power
|
|
284
|
+
|
|
285
|
+
# 搭建网络
|
|
286
|
+
self.resnet = ResNetSequential(
|
|
287
|
+
self.input_dim,
|
|
288
|
+
self.hidden_dim,
|
|
289
|
+
self.block_num,
|
|
290
|
+
use_layernorm=self.use_layernorm,
|
|
291
|
+
dropout=self.dropout,
|
|
292
|
+
residual_scale=self.residual_scale
|
|
293
|
+
).to(self.device)
|
|
294
|
+
|
|
295
|
+
def fit(self, X_train, y_train, w_train=None,
|
|
296
|
+
X_val=None, y_val=None, w_val=None):
|
|
297
|
+
|
|
298
|
+
# === 1. 训练集:留在 CPU,交给 DataLoader 再搬到 GPU ===
|
|
299
|
+
X_tensor = torch.tensor(X_train.values, dtype=torch.float32)
|
|
300
|
+
y_tensor = torch.tensor(
|
|
301
|
+
y_train.values, dtype=torch.float32).view(-1, 1)
|
|
302
|
+
if w_train is not None:
|
|
303
|
+
w_tensor = torch.tensor(
|
|
304
|
+
w_train.values, dtype=torch.float32).view(-1, 1)
|
|
305
|
+
else:
|
|
306
|
+
w_tensor = torch.ones_like(y_tensor)
|
|
307
|
+
|
|
308
|
+
# === 2. 验证集:先在 CPU 上建,后面一次性搬到 device ===
|
|
309
|
+
has_val = X_val is not None and y_val is not None
|
|
310
|
+
if has_val:
|
|
311
|
+
X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
|
|
312
|
+
y_val_tensor = torch.tensor(
|
|
313
|
+
y_val.values, dtype=torch.float32).view(-1, 1)
|
|
314
|
+
if w_val is not None:
|
|
315
|
+
w_val_tensor = torch.tensor(
|
|
316
|
+
w_val.values, dtype=torch.float32).view(-1, 1)
|
|
317
|
+
else:
|
|
318
|
+
w_val_tensor = torch.ones_like(y_val_tensor)
|
|
319
|
+
else:
|
|
320
|
+
X_val_tensor = y_val_tensor = w_val_tensor = None
|
|
321
|
+
|
|
322
|
+
# === 3. DataLoader ===
|
|
323
|
+
dataset = TensorDataset(X_tensor, y_tensor, w_tensor)
|
|
324
|
+
batch_size = max(
|
|
325
|
+
4096,
|
|
326
|
+
int((self.learning_rate / (1e-4)) ** 0.5 *
|
|
327
|
+
(X_train.shape[0] / self.batch_num))
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
dataloader = DataLoader(
|
|
331
|
+
dataset,
|
|
332
|
+
batch_size=batch_size,
|
|
333
|
+
shuffle=True,
|
|
334
|
+
num_workers=1, # tabular: 0~1 一般够用
|
|
335
|
+
pin_memory=(self.device.type == 'cuda')
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# === 4. 优化器 & AMP ===
|
|
339
|
+
optimizer = torch.optim.Adam(
|
|
340
|
+
self.resnet.parameters(), lr=self.learning_rate)
|
|
341
|
+
scaler = GradScaler(enabled=(self.device.type == 'cuda'))
|
|
342
|
+
|
|
343
|
+
# === 5. Early stopping ===
|
|
344
|
+
best_loss, patience_counter = float('inf'), 0
|
|
345
|
+
best_model_state = None
|
|
346
|
+
|
|
347
|
+
# 如果有验证集,先把它整个搬到 device,只搬一次
|
|
348
|
+
if has_val:
|
|
349
|
+
X_val_dev = X_val_tensor.to(self.device, non_blocking=True)
|
|
350
|
+
y_val_dev = y_val_tensor.to(self.device, non_blocking=True)
|
|
351
|
+
w_val_dev = w_val_tensor.to(self.device, non_blocking=True)
|
|
352
|
+
|
|
353
|
+
# === 6. 训练循环 ===
|
|
354
|
+
for epoch in range(1, self.epochs + 1):
|
|
355
|
+
self.resnet.train()
|
|
356
|
+
for X_batch, y_batch, w_batch in dataloader:
|
|
357
|
+
optimizer.zero_grad()
|
|
358
|
+
|
|
359
|
+
X_batch = X_batch.to(self.device, non_blocking=True)
|
|
360
|
+
y_batch = y_batch.to(self.device, non_blocking=True)
|
|
361
|
+
w_batch = w_batch.to(self.device, non_blocking=True)
|
|
362
|
+
|
|
363
|
+
with autocast(enabled=(self.device.type == 'cuda')):
|
|
364
|
+
y_pred = self.resnet(X_batch)
|
|
365
|
+
y_pred = torch.clamp(y_pred, min=1e-6)
|
|
366
|
+
|
|
367
|
+
losses = tweedie_loss(
|
|
368
|
+
y_pred, y_batch, p=self.tw_power).view(-1)
|
|
369
|
+
weighted_loss = (losses * w_batch.view(-1)
|
|
370
|
+
).sum() / w_batch.sum()
|
|
371
|
+
|
|
372
|
+
scaler.scale(weighted_loss).backward()
|
|
373
|
+
|
|
374
|
+
if self.device.type == 'cuda':
|
|
375
|
+
scaler.unscale_(optimizer)
|
|
376
|
+
clip_grad_norm_(self.resnet.parameters(), max_norm=1.0)
|
|
377
|
+
|
|
378
|
+
scaler.step(optimizer)
|
|
379
|
+
scaler.update()
|
|
380
|
+
|
|
381
|
+
# === 7. 验证集损失 & early stopping ===
|
|
382
|
+
if has_val:
|
|
383
|
+
self.resnet.eval()
|
|
384
|
+
with torch.no_grad(), autocast(enabled=(self.device.type == 'cuda')):
|
|
385
|
+
y_val_pred = self.resnet(X_val_dev)
|
|
386
|
+
y_val_pred = torch.clamp(y_val_pred, min=1e-6)
|
|
387
|
+
|
|
388
|
+
val_loss_values = tweedie_loss(
|
|
389
|
+
y_val_pred, y_val_dev, p=self.tw_power
|
|
390
|
+
).view(-1)
|
|
391
|
+
val_weighted_loss = (
|
|
392
|
+
val_loss_values * w_val_dev.view(-1)
|
|
393
|
+
).sum() / w_val_dev.sum()
|
|
394
|
+
|
|
395
|
+
if val_weighted_loss < best_loss:
|
|
396
|
+
best_loss = val_weighted_loss
|
|
397
|
+
patience_counter = 0
|
|
398
|
+
best_model_state = copy.deepcopy(self.resnet.state_dict())
|
|
399
|
+
else:
|
|
400
|
+
patience_counter += 1
|
|
401
|
+
|
|
402
|
+
if patience_counter >= self.patience and best_model_state is not None:
|
|
403
|
+
self.resnet.load_state_dict(best_model_state)
|
|
404
|
+
break
|
|
405
|
+
|
|
406
|
+
# ---------------- predict ----------------
|
|
407
|
+
|
|
408
|
+
def predict(self, X_test):
|
|
409
|
+
self.resnet.eval()
|
|
410
|
+
with torch.no_grad():
|
|
411
|
+
X_tensor = torch.tensor(
|
|
412
|
+
X_test.values, dtype=torch.float32).to(self.device)
|
|
413
|
+
y_pred = self.resnet(X_tensor).cpu().numpy()
|
|
414
|
+
|
|
415
|
+
y_pred = np.clip(y_pred, 1e-6, None)
|
|
416
|
+
return y_pred.flatten()
|
|
417
|
+
|
|
418
|
+
# ---------------- set_params ----------------
|
|
419
|
+
|
|
420
|
+
def set_params(self, params):
|
|
421
|
+
for key, value in params.items():
|
|
422
|
+
if hasattr(self, key):
|
|
423
|
+
setattr(self, key, value)
|
|
424
|
+
else:
|
|
425
|
+
raise ValueError(f"Parameter {key} not found in model.")
|
|
426
|
+
|
|
427
|
+
# 开始定义FT Transformer模型结构
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
class FeatureTokenizer(nn.Module):
|
|
431
|
+
# 将数值特征 & 类别特征映射为 token (batch, n_tokens, d_model)
|
|
432
|
+
# 假设:
|
|
433
|
+
# - X_num: (batch, num_numeric)
|
|
434
|
+
# - X_cat: (batch, num_categorical),每列是已编码好的整数标签 [0, card-1]
|
|
435
|
+
|
|
436
|
+
def __init__(self, num_numeric: int, cat_cardinalities, d_model: int):
|
|
437
|
+
super().__init__()
|
|
438
|
+
|
|
439
|
+
self.num_numeric = num_numeric
|
|
440
|
+
self.has_numeric = num_numeric > 0
|
|
441
|
+
|
|
442
|
+
if self.has_numeric:
|
|
443
|
+
self.num_linear = nn.Linear(num_numeric, d_model)
|
|
444
|
+
|
|
445
|
+
self.embeddings = nn.ModuleList([
|
|
446
|
+
nn.Embedding(card, d_model) for card in cat_cardinalities
|
|
447
|
+
])
|
|
448
|
+
|
|
449
|
+
def forward(self, X_num, X_cat):
|
|
450
|
+
tokens = []
|
|
451
|
+
|
|
452
|
+
if self.has_numeric:
|
|
453
|
+
# 数值特征映射为一个 token
|
|
454
|
+
num_token = self.num_linear(X_num) # (batch, d_model)
|
|
455
|
+
tokens.append(num_token)
|
|
456
|
+
|
|
457
|
+
# 每个类别特征一个 embedding token
|
|
458
|
+
for i, emb in enumerate(self.embeddings):
|
|
459
|
+
# X_cat[:, i]: (batch,) long
|
|
460
|
+
tok = emb(X_cat[:, i]) # (batch, d_model)
|
|
461
|
+
tokens.append(tok)
|
|
462
|
+
|
|
463
|
+
# (batch, n_tokens, d_model)
|
|
464
|
+
x = torch.stack(tokens, dim=1)
|
|
465
|
+
return x
|
|
466
|
+
|
|
467
|
+
# 定义FT-Transformer核心模型
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
class FTTransformerCore(nn.Module):
|
|
471
|
+
# 一个最小可用的 FT-Transformer:
|
|
472
|
+
# - FeatureTokenizer: 数值、类别 → token
|
|
473
|
+
# - TransformerEncoder: 捕捉特征交互
|
|
474
|
+
# - 池化 + MLP + Softplus: 输出正值 (适配 Tweedie/Gamma)
|
|
475
|
+
|
|
476
|
+
def __init__(self, num_numeric: int, cat_cardinalities, d_model: int = 64,
|
|
477
|
+
n_heads: int = 8, n_layers: int = 4, dropout: float = 0.1,
|
|
478
|
+
):
|
|
479
|
+
super().__init__()
|
|
480
|
+
|
|
481
|
+
self.tokenizer = FeatureTokenizer(
|
|
482
|
+
num_numeric=num_numeric,
|
|
483
|
+
cat_cardinalities=cat_cardinalities,
|
|
484
|
+
d_model=d_model
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
encoder_layer = nn.TransformerEncoderLayer(
|
|
488
|
+
d_model=d_model,
|
|
489
|
+
nhead=n_heads,
|
|
490
|
+
dim_feedforward=d_model * 4,
|
|
491
|
+
dropout=dropout,
|
|
492
|
+
activation="gelu",
|
|
493
|
+
batch_first=True,
|
|
494
|
+
norm_first=True, # pre-norm 更稳
|
|
495
|
+
)
|
|
496
|
+
self.encoder = nn.TransformerEncoder(
|
|
497
|
+
encoder_layer,
|
|
498
|
+
num_layers=n_layers
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
self.head = nn.Sequential(
|
|
502
|
+
nn.LayerNorm(d_model),
|
|
503
|
+
nn.Linear(d_model, d_model),
|
|
504
|
+
nn.GELU(),
|
|
505
|
+
nn.Linear(d_model, 1),
|
|
506
|
+
nn.Softplus() # 保证输出为正,适合 Tweedie / Gamma
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
def forward(self, X_num, X_cat):
|
|
510
|
+
|
|
511
|
+
# X_num: (batch, num_numeric) float32
|
|
512
|
+
# X_cat: (batch, num_categorical) long
|
|
513
|
+
|
|
514
|
+
tokens = self.tokenizer(X_num, X_cat) # (batch, tokens, d_model)
|
|
515
|
+
x = self.encoder(tokens) # (batch, tokens, d_model)
|
|
516
|
+
|
|
517
|
+
# 简单地对 token 取平均池化
|
|
518
|
+
x = x.mean(dim=1) # (batch, d_model)
|
|
519
|
+
|
|
520
|
+
out = self.head(x) # (batch, 1), Softplus 已做
|
|
521
|
+
return out
|
|
522
|
+
|
|
523
|
+
# 定义TabularDataset类
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
class TabularDataset(Dataset):
|
|
527
|
+
def __init__(self, X_num, X_cat, y, w):
|
|
528
|
+
|
|
529
|
+
# X_num: torch.float32, (N, num_numeric)
|
|
530
|
+
# X_cat: torch.long, (N, num_categorical)
|
|
531
|
+
# y: torch.float32, (N, 1)
|
|
532
|
+
# w: torch.float32, (N, 1)
|
|
533
|
+
|
|
534
|
+
self.X_num = X_num
|
|
535
|
+
self.X_cat = X_cat
|
|
536
|
+
self.y = y
|
|
537
|
+
self.w = w
|
|
538
|
+
|
|
539
|
+
def __len__(self):
|
|
540
|
+
return self.y.shape[0]
|
|
541
|
+
|
|
542
|
+
def __getitem__(self, idx):
|
|
543
|
+
return (
|
|
544
|
+
self.X_num[idx],
|
|
545
|
+
self.X_cat[idx],
|
|
546
|
+
self.y[idx],
|
|
547
|
+
self.w[idx],
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
# 定义FTTransformer的Scikit-Learn接口类
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
class FTTransformerSklearn(nn.Module):
|
|
554
|
+
|
|
555
|
+
# sklearn 风格包装:
|
|
556
|
+
# - num_cols: 数值特征列名列表
|
|
557
|
+
# - cat_cols: 类别特征列名列表 (已做 label encoding,取值 [0, n_classes-1])
|
|
558
|
+
|
|
559
|
+
def __init__(self, model_nme: str, num_cols, cat_cols, d_model: int = 64, n_heads: int = 8,
|
|
560
|
+
n_layers: int = 4, dropout: float = 0.1, batch_num: int = 100, epochs: int = 100,
|
|
561
|
+
tweedie_power: float = 1.5, learning_rate: float = 1e-3, patience: int = 10,
|
|
562
|
+
):
|
|
563
|
+
super().__init__()
|
|
564
|
+
|
|
565
|
+
self.model_nme = model_nme
|
|
566
|
+
self.num_cols = list(num_cols)
|
|
567
|
+
self.cat_cols = list(cat_cols)
|
|
568
|
+
self.d_model = d_model
|
|
569
|
+
self.n_heads = n_heads
|
|
570
|
+
self.n_layers = n_layers
|
|
571
|
+
self.dropout = dropout
|
|
572
|
+
self.batch_num = batch_num
|
|
573
|
+
self.epochs = epochs
|
|
574
|
+
self.learning_rate = learning_rate
|
|
575
|
+
self.patience = patience
|
|
576
|
+
if 'f' in self.model_nme:
|
|
577
|
+
self.tw_power = 1.0
|
|
578
|
+
elif 's' in self.model_nme:
|
|
579
|
+
self.tw_power = 2.0
|
|
580
|
+
else:
|
|
581
|
+
self.tw_power = tweedie_power
|
|
582
|
+
if torch.cuda.is_available():
|
|
583
|
+
self.device = torch.device("cuda")
|
|
584
|
+
elif torch.backends.mps.is_available():
|
|
585
|
+
self.device = torch.device("mps")
|
|
586
|
+
else:
|
|
587
|
+
self.device = torch.device("cpu")
|
|
588
|
+
self.cat_cardinalities = None
|
|
589
|
+
self.cat_categories = {}
|
|
590
|
+
self.ft = None
|
|
591
|
+
|
|
592
|
+
def _build_model(self, X_train):
|
|
593
|
+
num_numeric = len(self.num_cols)
|
|
594
|
+
cat_cardinalities = []
|
|
595
|
+
|
|
596
|
+
for col in self.cat_cols:
|
|
597
|
+
cats = X_train[col].astype('category')
|
|
598
|
+
categories = cats.cat.categories
|
|
599
|
+
self.cat_categories[col] = categories # 保存训练集类别全集
|
|
600
|
+
|
|
601
|
+
card = len(categories) + 1 # 多预留 1 类给“未知/缺失”
|
|
602
|
+
cat_cardinalities.append(card)
|
|
603
|
+
|
|
604
|
+
self.cat_cardinalities = cat_cardinalities
|
|
605
|
+
|
|
606
|
+
self.ft = FTTransformerCore(
|
|
607
|
+
num_numeric=num_numeric,
|
|
608
|
+
cat_cardinalities=cat_cardinalities,
|
|
609
|
+
d_model=self.d_model,
|
|
610
|
+
n_heads=self.n_heads,
|
|
611
|
+
n_layers=self.n_layers,
|
|
612
|
+
dropout=self.dropout,
|
|
613
|
+
).to(self.device)
|
|
614
|
+
|
|
615
|
+
def _encode_cats(self, X):
|
|
616
|
+
# X: DataFrame,至少包含 self.cat_cols
|
|
617
|
+
# 返回: np.ndarray, shape (N, num_categorical), dtype=int64
|
|
618
|
+
|
|
619
|
+
if not self.cat_cols:
|
|
620
|
+
return np.zeros((len(X), 0), dtype='int64')
|
|
621
|
+
|
|
622
|
+
X_cat_list = []
|
|
623
|
+
for col in self.cat_cols:
|
|
624
|
+
# 使用训练时记录下来的 categories
|
|
625
|
+
categories = self.cat_categories[col]
|
|
626
|
+
# 用固定的 categories 构造 Categorical
|
|
627
|
+
cats = pd.Categorical(X[col], categories=categories)
|
|
628
|
+
codes = cats.codes.astype('int64', copy=True) # -1 表示未知或缺失
|
|
629
|
+
# 未知 / 缺失 映射到“未知类 bucket”,索引 = len(categories)
|
|
630
|
+
codes[codes < 0] = len(categories)
|
|
631
|
+
X_cat_list.append(codes)
|
|
632
|
+
|
|
633
|
+
X_cat_np = np.stack(X_cat_list, axis=1) # (N, num_categorical)
|
|
634
|
+
return X_cat_np
|
|
635
|
+
|
|
636
|
+
def fit(self, X_train, y_train, w_train=None,
|
|
637
|
+
X_val=None, y_val=None, w_val=None):
|
|
638
|
+
|
|
639
|
+
# 第一次 fit 时构建模型结构
|
|
640
|
+
if self.ft is None:
|
|
641
|
+
self._build_model(X_train)
|
|
642
|
+
|
|
643
|
+
# --- 构建训练张量 (全在 CPU,后面 batch 再搬 GPU) ---
|
|
644
|
+
X_num_train = X_train[self.num_cols].to_numpy(
|
|
645
|
+
dtype=np.float32, copy=True)
|
|
646
|
+
X_num_train = torch.tensor(
|
|
647
|
+
X_num_train,
|
|
648
|
+
dtype=torch.float32
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
if self.cat_cols:
|
|
652
|
+
# X_cat_train_list = []
|
|
653
|
+
# for col in self.cat_cols:
|
|
654
|
+
# cats = X_train[col].astype('category')
|
|
655
|
+
# codes = np.array(cats.cat.codes, dtype='int64',
|
|
656
|
+
# copy=True) # -1 表示缺失
|
|
657
|
+
# # 把缺失映射到最后一类(cardinality 之外)
|
|
658
|
+
# codes[codes < 0] = cats.cat.categories.size
|
|
659
|
+
# X_cat_train_list.append(codes)
|
|
660
|
+
# X_cat_train = torch.tensor(
|
|
661
|
+
# np.vstack(X_cat_train_list).T, # (N, num_categorical)
|
|
662
|
+
# dtype=torch.long
|
|
663
|
+
# )
|
|
664
|
+
X_cat_train_np = self._encode_cats(X_train)
|
|
665
|
+
X_cat_train = torch.tensor(X_cat_train_np, dtype=torch.long)
|
|
666
|
+
else:
|
|
667
|
+
X_cat_train = torch.zeros(
|
|
668
|
+
(X_num_train.size(0), 0), dtype=torch.long)
|
|
669
|
+
|
|
670
|
+
y_tensor = torch.tensor(
|
|
671
|
+
y_train.values,
|
|
672
|
+
dtype=torch.float32
|
|
673
|
+
).view(-1, 1)
|
|
674
|
+
|
|
675
|
+
if w_train is not None:
|
|
676
|
+
w_tensor = torch.tensor(
|
|
677
|
+
w_train.values,
|
|
678
|
+
dtype=torch.float32
|
|
679
|
+
).view(-1, 1)
|
|
680
|
+
else:
|
|
681
|
+
w_tensor = torch.ones_like(y_tensor)
|
|
682
|
+
|
|
683
|
+
# --- 验证集张量 (后面一次性搬到 device) ---
|
|
684
|
+
has_val = X_val is not None and y_val is not None
|
|
685
|
+
if has_val:
|
|
686
|
+
# ---------- 数值特征 ----------
|
|
687
|
+
X_num_val_np = X_val[self.num_cols].to_numpy(
|
|
688
|
+
dtype=np.float32, copy=True)
|
|
689
|
+
X_num_val = torch.tensor(X_num_val_np, dtype=torch.float32)
|
|
690
|
+
|
|
691
|
+
# ---------- 类别特征 ----------
|
|
692
|
+
if self.cat_cols:
|
|
693
|
+
# X_cat_val_list = []
|
|
694
|
+
# for col in self.cat_cols:
|
|
695
|
+
# cats = X_val[col].astype('category')
|
|
696
|
+
# codes = np.array(cats.cat.codes, dtype='int64', copy=True)
|
|
697
|
+
# codes[codes < 0] = cats.cat.categories.size
|
|
698
|
+
# X_cat_val_list.append(codes)
|
|
699
|
+
#
|
|
700
|
+
# X_cat_val_np = np.stack(
|
|
701
|
+
# X_cat_val_list, axis=1).astype('int64', copy=False)
|
|
702
|
+
# X_cat_val = torch.tensor(X_cat_val_np, dtype=torch.long)
|
|
703
|
+
X_cat_val_np = self._encode_cats(X_val)
|
|
704
|
+
X_cat_val = torch.tensor(X_cat_val_np, dtype=torch.long)
|
|
705
|
+
else:
|
|
706
|
+
X_cat_val = torch.zeros(
|
|
707
|
+
(X_num_val.shape[0], 0), dtype=torch.long)
|
|
708
|
+
|
|
709
|
+
# ---------- 目标 & 权重 ----------
|
|
710
|
+
y_val_np = y_val.values.astype(np.float32, copy=True)
|
|
711
|
+
y_val_tensor = torch.tensor(
|
|
712
|
+
y_val_np, dtype=torch.float32).view(-1, 1)
|
|
713
|
+
|
|
714
|
+
if w_val is not None:
|
|
715
|
+
w_val_np = w_val.values.astype(np.float32, copy=True)
|
|
716
|
+
w_val_tensor = torch.tensor(
|
|
717
|
+
w_val_np, dtype=torch.float32).view(-1, 1)
|
|
718
|
+
else:
|
|
719
|
+
w_val_tensor = torch.ones_like(y_val_tensor)
|
|
720
|
+
|
|
721
|
+
else:
|
|
722
|
+
X_num_val = X_cat_val = y_val_tensor = w_val_tensor = None
|
|
723
|
+
|
|
724
|
+
# --- DataLoader ---
|
|
725
|
+
dataset = TabularDataset(
|
|
726
|
+
X_num_train, X_cat_train, y_tensor, w_tensor
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
batch_size = max(
|
|
730
|
+
32,
|
|
731
|
+
int((self.learning_rate / 1e-4) ** 0.5 *
|
|
732
|
+
(X_train.shape[0] / self.batch_num))
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
dataloader = DataLoader(
|
|
736
|
+
dataset,
|
|
737
|
+
batch_size=batch_size,
|
|
738
|
+
shuffle=True,
|
|
739
|
+
num_workers=1,
|
|
740
|
+
pin_memory=(self.device.type == 'cuda')
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
# --- 优化器 & AMP ---
|
|
744
|
+
optimizer = torch.optim.Adam(
|
|
745
|
+
self.ft.parameters(),
|
|
746
|
+
lr=self.learning_rate
|
|
747
|
+
)
|
|
748
|
+
scaler = GradScaler(enabled=(self.device.type == 'cuda'))
|
|
749
|
+
|
|
750
|
+
# --- Early stopping ---
|
|
751
|
+
best_loss = float('inf')
|
|
752
|
+
patience_counter = 0
|
|
753
|
+
best_model_state = None
|
|
754
|
+
|
|
755
|
+
# 验证集整体搬到 device(如果存在)
|
|
756
|
+
if has_val:
|
|
757
|
+
X_num_val_dev = X_num_val.to(self.device, non_blocking=True)
|
|
758
|
+
X_cat_val_dev = X_cat_val.to(self.device, non_blocking=True)
|
|
759
|
+
y_val_dev = y_val_tensor.to(self.device, non_blocking=True)
|
|
760
|
+
w_val_dev = w_val_tensor.to(self.device, non_blocking=True)
|
|
761
|
+
|
|
762
|
+
# --- 训练循环 ---
|
|
763
|
+
for epoch in range(1, self.epochs + 1):
|
|
764
|
+
self.ft.train()
|
|
765
|
+
for X_num_b, X_cat_b, y_b, w_b in dataloader:
|
|
766
|
+
optimizer.zero_grad()
|
|
767
|
+
|
|
768
|
+
X_num_b = X_num_b.to(self.device, non_blocking=True)
|
|
769
|
+
X_cat_b = X_cat_b.to(self.device, non_blocking=True)
|
|
770
|
+
y_b = y_b.to(self.device, non_blocking=True)
|
|
771
|
+
w_b = w_b.to(self.device, non_blocking=True)
|
|
772
|
+
|
|
773
|
+
with autocast(enabled=(self.device.type == 'cuda')):
|
|
774
|
+
y_pred = self.ft(X_num_b, X_cat_b)
|
|
775
|
+
y_pred = torch.clamp(y_pred, min=1e-6)
|
|
776
|
+
|
|
777
|
+
losses = tweedie_loss(
|
|
778
|
+
y_pred, y_b, p=self.tw_power
|
|
779
|
+
).view(-1)
|
|
780
|
+
|
|
781
|
+
weighted_loss = (losses * w_b.view(-1)).sum() / w_b.sum()
|
|
782
|
+
|
|
783
|
+
scaler.scale(weighted_loss).backward()
|
|
784
|
+
|
|
785
|
+
if self.device.type == 'cuda':
|
|
786
|
+
scaler.unscale_(optimizer)
|
|
787
|
+
clip_grad_norm_(self.ft.parameters(), max_norm=1.0)
|
|
788
|
+
|
|
789
|
+
scaler.step(optimizer)
|
|
790
|
+
scaler.update()
|
|
791
|
+
|
|
792
|
+
# --- 验证 & early stopping ---
|
|
793
|
+
if has_val:
|
|
794
|
+
self.ft.eval()
|
|
795
|
+
with torch.no_grad(), autocast(enabled=(self.device.type == 'cuda')):
|
|
796
|
+
y_val_pred = self.ft(X_num_val_dev, X_cat_val_dev)
|
|
797
|
+
y_val_pred = torch.clamp(y_val_pred, min=1e-6)
|
|
798
|
+
|
|
799
|
+
val_losses = tweedie_loss(
|
|
800
|
+
y_val_pred, y_val_dev, p=self.tw_power
|
|
801
|
+
).view(-1)
|
|
802
|
+
|
|
803
|
+
val_weighted_loss = (
|
|
804
|
+
val_losses * w_val_dev.view(-1)
|
|
805
|
+
).sum() / w_val_dev.sum()
|
|
806
|
+
|
|
807
|
+
if val_weighted_loss < best_loss:
|
|
808
|
+
best_loss = val_weighted_loss
|
|
809
|
+
patience_counter = 0
|
|
810
|
+
best_model_state = copy.deepcopy(self.ft.state_dict())
|
|
811
|
+
else:
|
|
812
|
+
patience_counter += 1
|
|
813
|
+
|
|
814
|
+
if patience_counter >= self.patience and best_model_state is not None:
|
|
815
|
+
self.ft.load_state_dict(best_model_state)
|
|
816
|
+
break
|
|
817
|
+
|
|
818
|
+
def predict(self, X_test):
|
|
819
|
+
# X_test: DataFrame,包含 num_cols + cat_cols
|
|
820
|
+
|
|
821
|
+
self.ft.eval()
|
|
822
|
+
X_num = X_test[self.num_cols].to_numpy(dtype=np.float32, copy=True)
|
|
823
|
+
X_num = torch.tensor(
|
|
824
|
+
X_num,
|
|
825
|
+
dtype=torch.float32
|
|
826
|
+
)
|
|
827
|
+
if self.cat_cols:
|
|
828
|
+
# X_cat_list = []
|
|
829
|
+
# for col in self.cat_cols:
|
|
830
|
+
# cats = X_test[col].astype('category')
|
|
831
|
+
# codes = np.array(cats.cat.codes, dtype='int64',
|
|
832
|
+
# copy=True) # -1 表示缺失
|
|
833
|
+
# codes[codes < 0] = cats.cat.categories.size
|
|
834
|
+
# X_cat_list.append(codes)
|
|
835
|
+
# X_cat = torch.tensor(
|
|
836
|
+
# np.vstack(X_cat_list).T,
|
|
837
|
+
# dtype=torch.long
|
|
838
|
+
# )
|
|
839
|
+
X_cat_np = self._encode_cats(X_test)
|
|
840
|
+
X_cat = torch.tensor(X_cat_np, dtype=torch.long)
|
|
841
|
+
else:
|
|
842
|
+
X_cat = torch.zeros((X_num.size(0), 0), dtype=torch.long)
|
|
843
|
+
|
|
844
|
+
with torch.no_grad():
|
|
845
|
+
X_num = X_num.to(self.device, non_blocking=True)
|
|
846
|
+
X_cat = X_cat.to(self.device, non_blocking=True)
|
|
847
|
+
y_pred = self.ft(X_num, X_cat).cpu().numpy()
|
|
848
|
+
|
|
849
|
+
y_pred = np.clip(y_pred, 1e-6, None)
|
|
850
|
+
return y_pred.ravel()
|
|
851
|
+
|
|
852
|
+
def set_params(self, params: dict):
|
|
853
|
+
|
|
854
|
+
# 和 sklearn 风格保持一致。
|
|
855
|
+
# 注意:对结构性参数(如 d_model/n_heads)修改后,需要重新 fit 才会生效。
|
|
856
|
+
|
|
857
|
+
for key, value in params.items():
|
|
858
|
+
if hasattr(self, key):
|
|
859
|
+
setattr(self, key, value)
|
|
860
|
+
else:
|
|
861
|
+
raise ValueError(f"Parameter {key} not found in model.")
|
|
862
|
+
return self
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
# 定义贝叶斯优化模型类,包含XGBoost和ResNet模型
|
|
866
|
+
|
|
867
|
+
class BayesOptModel:
|
|
868
|
+
def __init__(self, train_data, test_data,
|
|
869
|
+
model_nme, resp_nme, weight_nme, factor_nmes,
|
|
870
|
+
cate_list=[], prop_test=0.25, rand_seed=None, epochs=100):
|
|
871
|
+
# 初始化数据
|
|
872
|
+
# train_data: 训练数据, test_data: 测试数据 格式需为DataFrame
|
|
873
|
+
# model_nme: 模型名称
|
|
874
|
+
# resp_nme: 因变量名称, weight_nme: 权重名称
|
|
875
|
+
# factor_nmes: 因子名称列表, space_params: 参数空间
|
|
876
|
+
# cate_list: 类别变量列表
|
|
877
|
+
# prop_test: 测试集比例, rand_seed
|
|
878
|
+
self.train_data = train_data
|
|
879
|
+
self.test_data = test_data
|
|
880
|
+
self.resp_nme = resp_nme
|
|
881
|
+
self.weight_nme = weight_nme
|
|
882
|
+
self.train_data.loc[:, 'w_act'] = self.train_data[self.resp_nme] * \
|
|
883
|
+
self.train_data[self.weight_nme]
|
|
884
|
+
self.test_data.loc[:, 'w_act'] = self.test_data[self.resp_nme] * \
|
|
885
|
+
self.test_data[self.weight_nme]
|
|
886
|
+
self.factor_nmes = factor_nmes
|
|
887
|
+
self.cate_list = cate_list
|
|
888
|
+
self.rand_seed = rand_seed if rand_seed is not None else np.random.randint(
|
|
889
|
+
1, 10000)
|
|
890
|
+
if self.cate_list != []:
|
|
891
|
+
for cate in self.cate_list:
|
|
892
|
+
self.train_data[cate] = self.train_data[cate].astype(
|
|
893
|
+
'category')
|
|
894
|
+
self.test_data[cate] = self.test_data[cate].astype('category')
|
|
895
|
+
self.prop_test = prop_test
|
|
896
|
+
self.cv = ShuffleSplit(n_splits=int(1/self.prop_test),
|
|
897
|
+
test_size=self.prop_test,
|
|
898
|
+
random_state=self.rand_seed)
|
|
899
|
+
self.model_nme = model_nme
|
|
900
|
+
if self.model_nme.find('f') != -1:
|
|
901
|
+
self.obj = 'count:poisson'
|
|
902
|
+
elif self.model_nme.find('s') != -1:
|
|
903
|
+
self.obj = 'reg:gamma'
|
|
904
|
+
elif self.model_nme.find('bc') != -1:
|
|
905
|
+
self.obj = 'reg:tweedie'
|
|
906
|
+
self.fit_params = {
|
|
907
|
+
'sample_weight': self.train_data[self.weight_nme].values
|
|
908
|
+
}
|
|
909
|
+
self.num_features = [
|
|
910
|
+
nme for nme in self.factor_nmes if nme not in self.cate_list]
|
|
911
|
+
self.train_oht_scl_data = self.train_data[self.factor_nmes +
|
|
912
|
+
[self.weight_nme]+[self.resp_nme]].copy()
|
|
913
|
+
self.test_oht_scl_data = self.test_data[self.factor_nmes +
|
|
914
|
+
[self.weight_nme]+[self.resp_nme]].copy()
|
|
915
|
+
self.train_oht_scl_data = pd.get_dummies(
|
|
916
|
+
self.train_oht_scl_data,
|
|
917
|
+
columns=self.cate_list,
|
|
918
|
+
drop_first=True,
|
|
919
|
+
dtype=np.int8
|
|
920
|
+
)
|
|
921
|
+
self.test_oht_scl_data = pd.get_dummies(
|
|
922
|
+
self.test_oht_scl_data,
|
|
923
|
+
columns=self.cate_list,
|
|
924
|
+
drop_first=True,
|
|
925
|
+
dtype=np.int8
|
|
926
|
+
)
|
|
927
|
+
for num_chr in self.num_features:
|
|
928
|
+
scaler = StandardScaler()
|
|
929
|
+
self.train_oht_scl_data[num_chr] = scaler.fit_transform(
|
|
930
|
+
self.train_oht_scl_data[num_chr].values.reshape(-1, 1))
|
|
931
|
+
self.test_oht_scl_data[num_chr] = scaler.transform(
|
|
932
|
+
self.test_oht_scl_data[num_chr].values.reshape(-1, 1))
|
|
933
|
+
# 对测试集进行列对齐
|
|
934
|
+
self.test_oht_scl_data = self.test_oht_scl_data.reindex(
|
|
935
|
+
columns=self.train_oht_scl_data.columns,
|
|
936
|
+
fill_value=0
|
|
937
|
+
)
|
|
938
|
+
self.var_nmes = list(
|
|
939
|
+
set(list(self.train_oht_scl_data.columns)) -
|
|
940
|
+
set([self.weight_nme, self.resp_nme])
|
|
941
|
+
)
|
|
942
|
+
self.epochs = epochs
|
|
943
|
+
self.model_label = []
|
|
944
|
+
|
|
945
|
+
# 定义单因素画图函数
|
|
946
|
+
def plot_oneway(self, n_bins=10):
|
|
947
|
+
for c in self.factor_nmes:
|
|
948
|
+
fig = plt.figure(figsize=(7, 5))
|
|
949
|
+
if c in self.cate_list:
|
|
950
|
+
strs = c
|
|
951
|
+
else:
|
|
952
|
+
strs = c+'_bins'
|
|
953
|
+
self.train_data.loc[:, strs] = pd.qcut(
|
|
954
|
+
self.train_data[c],
|
|
955
|
+
n_bins,
|
|
956
|
+
duplicates='drop'
|
|
957
|
+
)
|
|
958
|
+
plot_data = self.train_data.groupby(
|
|
959
|
+
[strs], observed=True).sum(numeric_only=True)
|
|
960
|
+
plot_data.reset_index(inplace=True)
|
|
961
|
+
plot_data['act_v'] = plot_data['w_act'] / \
|
|
962
|
+
plot_data[self.weight_nme]
|
|
963
|
+
plot_data.head()
|
|
964
|
+
ax = fig.add_subplot(111)
|
|
965
|
+
ax.plot(plot_data.index, plot_data['act_v'],
|
|
966
|
+
label='Actual', color='red')
|
|
967
|
+
ax.set_title(
|
|
968
|
+
'Analysis of %s : Train Data' % strs,
|
|
969
|
+
fontsize=8)
|
|
970
|
+
plt.xticks(plot_data.index,
|
|
971
|
+
list(plot_data[strs].astype(str)),
|
|
972
|
+
rotation=90)
|
|
973
|
+
if len(list(plot_data[strs].astype(str))) > 50:
|
|
974
|
+
plt.xticks(fontsize=3)
|
|
975
|
+
else:
|
|
976
|
+
plt.xticks(fontsize=6)
|
|
977
|
+
plt.yticks(fontsize=6)
|
|
978
|
+
ax2 = ax.twinx()
|
|
979
|
+
ax2.bar(plot_data.index,
|
|
980
|
+
plot_data[self.weight_nme],
|
|
981
|
+
alpha=0.5, color='seagreen')
|
|
982
|
+
plt.yticks(fontsize=6)
|
|
983
|
+
plt.margins(0.05)
|
|
984
|
+
plt.subplots_adjust(wspace=0.3)
|
|
985
|
+
save_path = os.path.join(
|
|
986
|
+
os.getcwd(), 'plot',
|
|
987
|
+
f'00_{self.model_nme}_{strs}_oneway.png')
|
|
988
|
+
plt.savefig(save_path, dpi=300)
|
|
989
|
+
plt.close(fig)
|
|
990
|
+
|
|
991
|
+
# Xgboost交叉验证函数
|
|
992
|
+
def cross_val_xgb(self, trial):
|
|
993
|
+
learning_rate = trial.suggest_float(
|
|
994
|
+
'learning_rate', 1e-5, 1e-1, log=True)
|
|
995
|
+
gamma = trial.suggest_float(
|
|
996
|
+
'gamma', 0, 10000)
|
|
997
|
+
max_depth = trial.suggest_int(
|
|
998
|
+
'max_depth', 3, 25)
|
|
999
|
+
n_estimators = trial.suggest_int(
|
|
1000
|
+
'n_estimators', 10, 500, step=10)
|
|
1001
|
+
min_child_weight = trial.suggest_int(
|
|
1002
|
+
'min_child_weight', 100, 10000, step=100)
|
|
1003
|
+
reg_alpha = trial.suggest_float(
|
|
1004
|
+
'reg_alpha', 1e-10, 1, log=True)
|
|
1005
|
+
reg_lambda = trial.suggest_float(
|
|
1006
|
+
'reg_lambda', 1e-10, 1, log=True)
|
|
1007
|
+
if self.obj == 'reg:tweedie':
|
|
1008
|
+
tweedie_variance_power = trial.suggest_float(
|
|
1009
|
+
'tweedie_variance_power', 1, 2)
|
|
1010
|
+
elif self.obj == 'count:poisson':
|
|
1011
|
+
tweedie_variance_power = 1
|
|
1012
|
+
elif self.obj == 'reg:gamma':
|
|
1013
|
+
tweedie_variance_power = 2
|
|
1014
|
+
clf = xgb.XGBRegressor(
|
|
1015
|
+
objective=self.obj,
|
|
1016
|
+
random_state=self.rand_seed,
|
|
1017
|
+
subsample=0.9,
|
|
1018
|
+
tree_method='gpu_hist' if torch.cuda.is_available() else 'hist',
|
|
1019
|
+
gpu_id=0,
|
|
1020
|
+
enable_categorical=True,
|
|
1021
|
+
predictor='gpu_predictor' if torch.cuda.is_available() else 'cpu_predictor'
|
|
1022
|
+
)
|
|
1023
|
+
params = {
|
|
1024
|
+
'learning_rate': learning_rate,
|
|
1025
|
+
'gamma': gamma,
|
|
1026
|
+
'max_depth': max_depth,
|
|
1027
|
+
'n_estimators': n_estimators,
|
|
1028
|
+
'min_child_weight': min_child_weight,
|
|
1029
|
+
'reg_alpha': reg_alpha,
|
|
1030
|
+
'reg_lambda': reg_lambda
|
|
1031
|
+
}
|
|
1032
|
+
if self.obj == 'reg:tweedie':
|
|
1033
|
+
params['tweedie_variance_power'] = tweedie_variance_power
|
|
1034
|
+
clf.set_params(**params)
|
|
1035
|
+
acc = cross_val_score(
|
|
1036
|
+
clf,
|
|
1037
|
+
self.train_data[self.factor_nmes],
|
|
1038
|
+
self.train_data[self.resp_nme].values,
|
|
1039
|
+
fit_params=self.fit_params,
|
|
1040
|
+
cv=self.cv,
|
|
1041
|
+
scoring=make_scorer(
|
|
1042
|
+
mean_tweedie_deviance,
|
|
1043
|
+
power=tweedie_variance_power,
|
|
1044
|
+
greater_is_better=False),
|
|
1045
|
+
error_score='raise',
|
|
1046
|
+
n_jobs=int(1/self.prop_test)).mean()
|
|
1047
|
+
return -acc
|
|
1048
|
+
|
|
1049
|
+
# 定义Xgboost贝叶斯优化函数
|
|
1050
|
+
def bayesopt_xgb(self, max_evals=100):
|
|
1051
|
+
study = optuna.create_study(
|
|
1052
|
+
direction='minimize',
|
|
1053
|
+
sampler=optuna.samplers.TPESampler(seed=self.rand_seed))
|
|
1054
|
+
study.optimize(self.cross_val_xgb, n_trials=max_evals)
|
|
1055
|
+
self.best_xgb_params = study.best_params
|
|
1056
|
+
pd.DataFrame(self.best_xgb_params, index=[0]).to_csv(
|
|
1057
|
+
os.getcwd() + '/Results/' + self.model_nme + '_bestparams_xgb.csv')
|
|
1058
|
+
self.best_xgb_trial = study.best_trial
|
|
1059
|
+
self.xgb_best = xgb.XGBRegressor(
|
|
1060
|
+
objective=self.obj,
|
|
1061
|
+
random_state=self.rand_seed,
|
|
1062
|
+
subsample=0.9,
|
|
1063
|
+
tree_method='gpu_hist' if torch.cuda.is_available() else 'hist',
|
|
1064
|
+
gpu_id=0,
|
|
1065
|
+
enable_categorical=True,
|
|
1066
|
+
predictor='gpu_predictor' if torch.cuda.is_available() else 'cpu_predictor'
|
|
1067
|
+
)
|
|
1068
|
+
self.xgb_best.set_params(**self.best_xgb_params)
|
|
1069
|
+
self.xgb_best.fit(self.train_data[self.factor_nmes],
|
|
1070
|
+
self.train_data[self.resp_nme].values,
|
|
1071
|
+
**self.fit_params)
|
|
1072
|
+
self.model_label += ['Xgboost']
|
|
1073
|
+
self.train_data['pred_xgb'] = self.xgb_best.predict(
|
|
1074
|
+
self.train_data[self.factor_nmes])
|
|
1075
|
+
self.test_data['pred_xgb'] = self.xgb_best.predict(
|
|
1076
|
+
self.test_data[self.factor_nmes])
|
|
1077
|
+
self.train_data.loc[:, 'w_pred_xgb'] = self.train_data['pred_xgb'] * \
|
|
1078
|
+
self.train_data[self.weight_nme]
|
|
1079
|
+
self.test_data.loc[:, 'w_pred_xgb'] = self.test_data['pred_xgb'] * \
|
|
1080
|
+
self.test_data[self.weight_nme]
|
|
1081
|
+
|
|
1082
|
+
# ResNet交叉验证函数
|
|
1083
|
+
def cross_val_resn(self, trial):
|
|
1084
|
+
|
|
1085
|
+
learning_rate = trial.suggest_float(
|
|
1086
|
+
'learning_rate', 1e-6, 1e-2, log=True) # 较低learning rate为了保证不会出险梯度爆炸
|
|
1087
|
+
hidden_dim = trial.suggest_int(
|
|
1088
|
+
'hidden_dim', 32, 256, step=32)
|
|
1089
|
+
block_num = trial.suggest_int(
|
|
1090
|
+
'block_num', 2, 10)
|
|
1091
|
+
batch_num = trial.suggest_int(
|
|
1092
|
+
'batch_num',
|
|
1093
|
+
10 if self.obj == 'reg:gamma' else 100,
|
|
1094
|
+
100 if self.obj == 'reg:gamma' else 1000,
|
|
1095
|
+
step=10 if self.obj == 'reg:gamma' else 100)
|
|
1096
|
+
if self.obj == 'reg:tweedie':
|
|
1097
|
+
tw_power = trial.suggest_float(
|
|
1098
|
+
'tw_power', 1, 2.0)
|
|
1099
|
+
elif self.obj == 'count:poisson':
|
|
1100
|
+
tw_power = 1
|
|
1101
|
+
elif self.obj == 'reg:gamma':
|
|
1102
|
+
tw_power = 2
|
|
1103
|
+
loss = 0
|
|
1104
|
+
for fold, (train_idx, test_idx) in enumerate(self.cv.split(self.train_oht_scl_data[self.var_nmes])):
|
|
1105
|
+
# 创建模型
|
|
1106
|
+
cv_net = ResNetScikitLearn(
|
|
1107
|
+
model_nme=self.model_nme,
|
|
1108
|
+
input_dim=self.train_oht_scl_data[self.var_nmes].shape[1],
|
|
1109
|
+
epochs=self.epochs,
|
|
1110
|
+
learning_rate=learning_rate,
|
|
1111
|
+
hidden_dim=hidden_dim,
|
|
1112
|
+
block_num=block_num,
|
|
1113
|
+
# 保证权重方差不变
|
|
1114
|
+
batch_num=batch_num,
|
|
1115
|
+
tweedie_power=tw_power
|
|
1116
|
+
# 再此可以调整normlayer,dropout,residual_scale等参数
|
|
1117
|
+
)
|
|
1118
|
+
# 训练模型
|
|
1119
|
+
cv_net.fit(
|
|
1120
|
+
self.train_oht_scl_data[self.var_nmes].iloc[train_idx],
|
|
1121
|
+
self.train_oht_scl_data[self.resp_nme].iloc[train_idx],
|
|
1122
|
+
self.train_oht_scl_data[self.weight_nme].iloc[train_idx],
|
|
1123
|
+
self.train_oht_scl_data[self.var_nmes].iloc[test_idx],
|
|
1124
|
+
self.train_oht_scl_data[self.resp_nme].iloc[test_idx],
|
|
1125
|
+
self.train_oht_scl_data[self.weight_nme].iloc[test_idx]
|
|
1126
|
+
)
|
|
1127
|
+
# 预测
|
|
1128
|
+
y_pred_fold = cv_net.predict(
|
|
1129
|
+
self.train_oht_scl_data[self.var_nmes].iloc[test_idx]
|
|
1130
|
+
)
|
|
1131
|
+
# 计算损失
|
|
1132
|
+
loss += mean_tweedie_deviance(
|
|
1133
|
+
self.train_oht_scl_data[self.resp_nme].iloc[test_idx],
|
|
1134
|
+
y_pred_fold,
|
|
1135
|
+
sample_weight=self.train_oht_scl_data[self.weight_nme].iloc[test_idx],
|
|
1136
|
+
power=tw_power
|
|
1137
|
+
)
|
|
1138
|
+
return loss / int(1/self.prop_test)
|
|
1139
|
+
|
|
1140
|
+
# 定义ResNet贝叶斯优化函数
|
|
1141
|
+
def bayesopt_resnet(self, max_evals=100):
|
|
1142
|
+
study = optuna.create_study(
|
|
1143
|
+
direction='minimize',
|
|
1144
|
+
sampler=optuna.samplers.TPESampler(seed=self.rand_seed))
|
|
1145
|
+
study.optimize(self.cross_val_resn, n_trials=max_evals)
|
|
1146
|
+
self.best_resn_params = study.best_params
|
|
1147
|
+
pd.DataFrame(self.best_resn_params, index=[0]).to_csv(
|
|
1148
|
+
os.getcwd() + '/Results/' + self.model_nme + '_bestparams_resn.csv')
|
|
1149
|
+
self.best_resn_trial = study.best_trial
|
|
1150
|
+
self.resn_best = ResNetScikitLearn(
|
|
1151
|
+
model_nme=self.model_nme,
|
|
1152
|
+
input_dim=self.train_oht_scl_data[self.var_nmes].shape[1]
|
|
1153
|
+
)
|
|
1154
|
+
self.resn_best.set_params(self.best_resn_params)
|
|
1155
|
+
self.resn_best.fit(self.train_oht_scl_data[self.var_nmes],
|
|
1156
|
+
self.train_oht_scl_data[self.resp_nme],
|
|
1157
|
+
self.train_oht_scl_data[self.weight_nme])
|
|
1158
|
+
self.model_label += ['ResNet']
|
|
1159
|
+
self.train_data['pred_resn'] = self.resn_best.predict(
|
|
1160
|
+
self.train_oht_scl_data[self.var_nmes])
|
|
1161
|
+
self.test_data['pred_resn'] = self.resn_best.predict(
|
|
1162
|
+
self.test_oht_scl_data[self.var_nmes])
|
|
1163
|
+
self.train_data.loc[:, 'w_pred_resn'] = self.train_data['pred_resn'] * \
|
|
1164
|
+
self.train_data[self.weight_nme]
|
|
1165
|
+
self.test_data.loc[:, 'w_pred_resn'] = self.test_data['pred_resn'] * \
|
|
1166
|
+
self.test_data[self.weight_nme]
|
|
1167
|
+
|
|
1168
|
+
# FT-Transformer 交叉验证函数
|
|
1169
|
+
def cross_val_ft(self, trial):
|
|
1170
|
+
|
|
1171
|
+
# 学习率
|
|
1172
|
+
learning_rate = trial.suggest_float(
|
|
1173
|
+
'learning_rate', 1e-5, 1e-2, log=True
|
|
1174
|
+
)
|
|
1175
|
+
|
|
1176
|
+
# Transformer 维度与层数
|
|
1177
|
+
d_model = trial.suggest_int(
|
|
1178
|
+
'd_model', 32, 128, step=32
|
|
1179
|
+
)
|
|
1180
|
+
n_heads = trial.suggest_categorical(
|
|
1181
|
+
'n_heads', [2, 4, 8]
|
|
1182
|
+
)
|
|
1183
|
+
n_layers = trial.suggest_int(
|
|
1184
|
+
'n_layers', 2, 6
|
|
1185
|
+
)
|
|
1186
|
+
|
|
1187
|
+
dropout = trial.suggest_float(
|
|
1188
|
+
'dropout', 0.0, 0.3
|
|
1189
|
+
)
|
|
1190
|
+
|
|
1191
|
+
batch_num = trial.suggest_int(
|
|
1192
|
+
'batch_num',
|
|
1193
|
+
10 if self.obj == 'reg:gamma' else 100,
|
|
1194
|
+
100 if self.obj == 'reg:gamma' else 1000,
|
|
1195
|
+
step=10 if self.obj == 'reg:gamma' else 100
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
# Tweedie power
|
|
1199
|
+
if self.obj == 'reg:tweedie':
|
|
1200
|
+
tw_power = trial.suggest_float('tw_power', 1.0, 2.0)
|
|
1201
|
+
elif self.obj == 'count:poisson':
|
|
1202
|
+
tw_power = 1.0
|
|
1203
|
+
elif self.obj == 'reg:gamma':
|
|
1204
|
+
tw_power = 2.0
|
|
1205
|
+
|
|
1206
|
+
loss = 0.0
|
|
1207
|
+
|
|
1208
|
+
# 这里注意:FT 使用的是“原始特征”(self.factor_nmes),
|
|
1209
|
+
# 而不是 one-hot 之后的 self.train_oht_scl_data
|
|
1210
|
+
for fold, (train_idx, test_idx) in enumerate(
|
|
1211
|
+
self.cv.split(self.train_data[self.factor_nmes])):
|
|
1212
|
+
|
|
1213
|
+
X_train_fold = self.train_data.iloc[train_idx][self.factor_nmes]
|
|
1214
|
+
y_train_fold = self.train_data.iloc[train_idx][self.resp_nme]
|
|
1215
|
+
w_train_fold = self.train_data.iloc[train_idx][self.weight_nme]
|
|
1216
|
+
|
|
1217
|
+
X_val_fold = self.train_data.iloc[test_idx][self.factor_nmes]
|
|
1218
|
+
y_val_fold = self.train_data.iloc[test_idx][self.resp_nme]
|
|
1219
|
+
w_val_fold = self.train_data.iloc[test_idx][self.weight_nme]
|
|
1220
|
+
|
|
1221
|
+
# 创建 FT-Transformer 模型
|
|
1222
|
+
cv_ft = FTTransformerSklearn(
|
|
1223
|
+
model_nme=self.model_nme,
|
|
1224
|
+
num_cols=self.num_features, # 数值特征列表
|
|
1225
|
+
cat_cols=self.cate_list, # 类别特征列表(需是编码好的整数或category)
|
|
1226
|
+
d_model=d_model,
|
|
1227
|
+
n_heads=n_heads,
|
|
1228
|
+
n_layers=n_layers,
|
|
1229
|
+
dropout=dropout,
|
|
1230
|
+
batch_num=batch_num,
|
|
1231
|
+
epochs=self.epochs,
|
|
1232
|
+
tweedie_power=tw_power,
|
|
1233
|
+
learning_rate=learning_rate,
|
|
1234
|
+
patience=5 # 可以根据需要调整
|
|
1235
|
+
)
|
|
1236
|
+
|
|
1237
|
+
# 训练
|
|
1238
|
+
cv_ft.fit(
|
|
1239
|
+
X_train_fold,
|
|
1240
|
+
y_train_fold,
|
|
1241
|
+
w_train_fold,
|
|
1242
|
+
X_val_fold,
|
|
1243
|
+
y_val_fold,
|
|
1244
|
+
w_val_fold
|
|
1245
|
+
)
|
|
1246
|
+
|
|
1247
|
+
# 预测
|
|
1248
|
+
y_pred_fold = cv_ft.predict(X_val_fold)
|
|
1249
|
+
|
|
1250
|
+
# 计算损失(与 ResNet 一致:mean_tweedie_deviance)
|
|
1251
|
+
loss += mean_tweedie_deviance(
|
|
1252
|
+
y_val_fold,
|
|
1253
|
+
y_pred_fold,
|
|
1254
|
+
sample_weight=w_val_fold,
|
|
1255
|
+
power=tw_power
|
|
1256
|
+
)
|
|
1257
|
+
|
|
1258
|
+
return loss / int(1 / self.prop_test)
|
|
1259
|
+
|
|
1260
|
+
# 定义 FT-Transformer 贝叶斯优化函数
|
|
1261
|
+
def bayesopt_ft(self, max_evals=50):
|
|
1262
|
+
study = optuna.create_study(
|
|
1263
|
+
direction='minimize',
|
|
1264
|
+
sampler=optuna.samplers.TPESampler(seed=self.rand_seed)
|
|
1265
|
+
)
|
|
1266
|
+
study.optimize(self.cross_val_ft, n_trials=max_evals)
|
|
1267
|
+
|
|
1268
|
+
self.best_ft_params = study.best_params
|
|
1269
|
+
pd.DataFrame(self.best_ft_params, index=[0]).to_csv(
|
|
1270
|
+
os.getcwd() + '/Results/' + self.model_nme + '_bestparams_ft.csv'
|
|
1271
|
+
)
|
|
1272
|
+
self.best_ft_trial = study.best_trial
|
|
1273
|
+
|
|
1274
|
+
# 用最优参数重新建一个 FT 模型,在全量训练集上拟合
|
|
1275
|
+
self.ft_best = FTTransformerSklearn(
|
|
1276
|
+
model_nme=self.model_nme,
|
|
1277
|
+
num_cols=self.num_features,
|
|
1278
|
+
cat_cols=self.cate_list
|
|
1279
|
+
)
|
|
1280
|
+
# 设置最优超参
|
|
1281
|
+
self.ft_best.set_params(self.best_ft_params)
|
|
1282
|
+
|
|
1283
|
+
# 全量训练
|
|
1284
|
+
self.ft_best.fit(
|
|
1285
|
+
self.train_data[self.factor_nmes],
|
|
1286
|
+
self.train_data[self.resp_nme],
|
|
1287
|
+
self.train_data[self.weight_nme]
|
|
1288
|
+
)
|
|
1289
|
+
|
|
1290
|
+
# 记录模型标签
|
|
1291
|
+
self.model_label += ['FTTransformer']
|
|
1292
|
+
|
|
1293
|
+
# 训练集预测
|
|
1294
|
+
self.train_data['pred_ft'] = self.ft_best.predict(
|
|
1295
|
+
self.train_data[self.factor_nmes]
|
|
1296
|
+
)
|
|
1297
|
+
# 测试集预测
|
|
1298
|
+
self.test_data['pred_ft'] = self.ft_best.predict(
|
|
1299
|
+
self.test_data[self.factor_nmes]
|
|
1300
|
+
)
|
|
1301
|
+
|
|
1302
|
+
# 加权预测(和 XGB / ResNet 风格一致)
|
|
1303
|
+
self.train_data.loc[:, 'w_pred_ft'] = (
|
|
1304
|
+
self.train_data['pred_ft'] * self.train_data[self.weight_nme]
|
|
1305
|
+
)
|
|
1306
|
+
self.test_data.loc[:, 'w_pred_ft'] = (
|
|
1307
|
+
self.test_data['pred_ft'] * self.test_data[self.weight_nme]
|
|
1308
|
+
)
|
|
1309
|
+
|
|
1310
|
+
# 定义分箱函数
|
|
1311
|
+
|
|
1312
|
+
def _split_data(self, data, col_nme, wgt_nme, n_bins=10):
|
|
1313
|
+
data.sort_values(by=col_nme, ascending=True, inplace=True)
|
|
1314
|
+
data['cum_weight'] = data[wgt_nme].cumsum()
|
|
1315
|
+
w_sum = data[wgt_nme].sum()
|
|
1316
|
+
data.loc[:, 'bins'] = np.floor(
|
|
1317
|
+
data['cum_weight']*float(n_bins)/w_sum)
|
|
1318
|
+
data.loc[(data['bins'] == n_bins), 'bins'] = n_bins-1
|
|
1319
|
+
return data.groupby(['bins'], observed=True).sum(numeric_only=True)
|
|
1320
|
+
|
|
1321
|
+
# 定义Lift Chart绘制数据集函数
|
|
1322
|
+
def _plot_data_lift(self,
|
|
1323
|
+
pred_list, w_pred_list,
|
|
1324
|
+
w_act_list, weight_list, n_bins=10):
|
|
1325
|
+
lift_data = pd.DataFrame()
|
|
1326
|
+
lift_data.loc[:, 'pred'] = pred_list
|
|
1327
|
+
lift_data.loc[:, 'w_pred'] = w_pred_list
|
|
1328
|
+
lift_data.loc[:, 'act'] = w_act_list
|
|
1329
|
+
lift_data.loc[:, 'weight'] = weight_list
|
|
1330
|
+
plot_data = self._split_data(
|
|
1331
|
+
lift_data, 'pred', 'weight', n_bins)
|
|
1332
|
+
plot_data['exp_v'] = plot_data['w_pred'] / plot_data['weight']
|
|
1333
|
+
plot_data['act_v'] = plot_data['act'] / plot_data['weight']
|
|
1334
|
+
plot_data.reset_index(inplace=True)
|
|
1335
|
+
return plot_data
|
|
1336
|
+
|
|
1337
|
+
# 定义lift曲线绘制函数
|
|
1338
|
+
def plot_lift(self, model_label, pred_nme, n_bins=10):
|
|
1339
|
+
# 绘制建模集上结果
|
|
1340
|
+
figpos_list = [121, 122]
|
|
1341
|
+
plot_dict = {
|
|
1342
|
+
121: self.train_data,
|
|
1343
|
+
122: self.test_data
|
|
1344
|
+
}
|
|
1345
|
+
name_list = {
|
|
1346
|
+
121: 'Train Data',
|
|
1347
|
+
122: 'Test Data'
|
|
1348
|
+
}
|
|
1349
|
+
if model_label == 'Xgboost':
|
|
1350
|
+
pred_nme = 'pred_xgb'
|
|
1351
|
+
elif model_label == 'ResNet':
|
|
1352
|
+
pred_nme = 'pred_resn'
|
|
1353
|
+
elif model_label == 'FTTransformer':
|
|
1354
|
+
pred_nme = 'pred_ft'
|
|
1355
|
+
|
|
1356
|
+
fig = plt.figure(figsize=(11, 5))
|
|
1357
|
+
for figpos in figpos_list:
|
|
1358
|
+
plot_data = self._plot_data_lift(
|
|
1359
|
+
plot_dict[figpos][pred_nme].values,
|
|
1360
|
+
plot_dict[figpos]['w_'+pred_nme].values,
|
|
1361
|
+
plot_dict[figpos]['w_act'].values,
|
|
1362
|
+
plot_dict[figpos][self.weight_nme].values,
|
|
1363
|
+
n_bins)
|
|
1364
|
+
ax = fig.add_subplot(figpos)
|
|
1365
|
+
ax.plot(plot_data.index, plot_data['act_v'],
|
|
1366
|
+
label='Actual', color='red')
|
|
1367
|
+
ax.plot(plot_data.index, plot_data['exp_v'],
|
|
1368
|
+
label='Predicted', color='blue')
|
|
1369
|
+
ax.set_title(
|
|
1370
|
+
'Lift Chart on %s' % name_list[figpos], fontsize=8)
|
|
1371
|
+
plt.xticks(plot_data.index,
|
|
1372
|
+
plot_data.index,
|
|
1373
|
+
rotation=90, fontsize=6)
|
|
1374
|
+
plt.yticks(fontsize=6)
|
|
1375
|
+
plt.legend(loc='upper left',
|
|
1376
|
+
fontsize=5, frameon=False)
|
|
1377
|
+
plt.margins(0.05)
|
|
1378
|
+
ax2 = ax.twinx()
|
|
1379
|
+
ax2.bar(plot_data.index, plot_data['weight'],
|
|
1380
|
+
alpha=0.5, color='seagreen',
|
|
1381
|
+
label='Earned Exposure')
|
|
1382
|
+
plt.yticks(fontsize=6)
|
|
1383
|
+
plt.legend(loc='upper right',
|
|
1384
|
+
fontsize=5, frameon=False)
|
|
1385
|
+
plt.subplots_adjust(wspace=0.3)
|
|
1386
|
+
save_path = os.path.join(
|
|
1387
|
+
os.getcwd(), 'plot', f'01_{self.model_nme}_{model_label}_lift.png')
|
|
1388
|
+
plt.savefig(save_path, dpi=300)
|
|
1389
|
+
plt.show()
|
|
1390
|
+
plt.close(fig)
|
|
1391
|
+
|
|
1392
|
+
# 定义Double Lift Chart绘制数据集函数
|
|
1393
|
+
def _plot_data_dlift(self,
|
|
1394
|
+
pred_list_model1, pred_list_model2,
|
|
1395
|
+
w_list, w_act_list, n_bins=10):
|
|
1396
|
+
lift_data = pd.DataFrame()
|
|
1397
|
+
lift_data.loc[:, 'pred1'] = pred_list_model1
|
|
1398
|
+
lift_data.loc[:, 'pred2'] = pred_list_model2
|
|
1399
|
+
lift_data.loc[:, 'diff_ly'] = lift_data['pred1'] / lift_data['pred2']
|
|
1400
|
+
lift_data.loc[:, 'act'] = w_act_list
|
|
1401
|
+
lift_data.loc[:, 'weight'] = w_list
|
|
1402
|
+
plot_data = self._split_data(lift_data, 'diff_ly', 'weight', n_bins)
|
|
1403
|
+
plot_data['exp_v1'] = plot_data['pred1'] / plot_data['act']
|
|
1404
|
+
plot_data['exp_v2'] = plot_data['pred2'] / plot_data['act']
|
|
1405
|
+
plot_data['act_v'] = plot_data['act'] / plot_data['act']
|
|
1406
|
+
plot_data.reset_index(inplace=True)
|
|
1407
|
+
return plot_data
|
|
1408
|
+
|
|
1409
|
+
# 定义绘制Double Lift Chart函数
|
|
1410
|
+
def plot_dlift(self, model_comp=['xgb', 'resn'], n_bins=10):
|
|
1411
|
+
# 指标名称
|
|
1412
|
+
# xgb = 'Xgboost'
|
|
1413
|
+
# resn = 'ResNet'
|
|
1414
|
+
# ft = 'FTTransformer'
|
|
1415
|
+
figpos_list = [121, 122]
|
|
1416
|
+
plot_dict = {
|
|
1417
|
+
121: self.train_data,
|
|
1418
|
+
122: self.test_data
|
|
1419
|
+
}
|
|
1420
|
+
name_list = {
|
|
1421
|
+
121: 'Train Data',
|
|
1422
|
+
122: 'Test Data'
|
|
1423
|
+
}
|
|
1424
|
+
fig = plt.figure(figsize=(11, 5))
|
|
1425
|
+
for figpos in figpos_list:
|
|
1426
|
+
plot_data = self._plot_data_dlift(
|
|
1427
|
+
plot_dict[figpos]['w_pred_'+model_comp[0]].values,
|
|
1428
|
+
plot_dict[figpos]['w_pred_'+model_comp[1]].values,
|
|
1429
|
+
plot_dict[figpos][self.weight_nme].values,
|
|
1430
|
+
plot_dict[figpos]['w_act'].values,
|
|
1431
|
+
n_bins)
|
|
1432
|
+
ax = fig.add_subplot(figpos)
|
|
1433
|
+
tt1 = 'Xgboost'
|
|
1434
|
+
tt2 = 'ResNet'
|
|
1435
|
+
ax.plot(plot_data.index, plot_data['act_v'],
|
|
1436
|
+
label='Actual', color='red')
|
|
1437
|
+
ax.plot(plot_data.index, plot_data['exp_v1'],
|
|
1438
|
+
label=tt1, color='blue')
|
|
1439
|
+
ax.plot(plot_data.index, plot_data['exp_v2'],
|
|
1440
|
+
label=tt2, color='black')
|
|
1441
|
+
ax.set_title(
|
|
1442
|
+
'Double Lift Chart on %s' % name_list[figpos], fontsize=8)
|
|
1443
|
+
plt.xticks(plot_data.index,
|
|
1444
|
+
plot_data.index,
|
|
1445
|
+
rotation=90, fontsize=6)
|
|
1446
|
+
plt.xlabel('%s / %s' % (tt1, tt2), fontsize=6)
|
|
1447
|
+
plt.yticks(fontsize=6)
|
|
1448
|
+
plt.legend(loc='upper left',
|
|
1449
|
+
fontsize=5, frameon=False)
|
|
1450
|
+
plt.margins(0.1)
|
|
1451
|
+
plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8)
|
|
1452
|
+
ax2 = ax.twinx()
|
|
1453
|
+
ax2.bar(plot_data.index, plot_data['weight'],
|
|
1454
|
+
alpha=0.5, color='seagreen',
|
|
1455
|
+
label='Earned Exposure')
|
|
1456
|
+
plt.yticks(fontsize=6)
|
|
1457
|
+
plt.legend(loc='upper right',
|
|
1458
|
+
fontsize=5, frameon=False)
|
|
1459
|
+
plt.subplots_adjust(wspace=0.3)
|
|
1460
|
+
save_path = os.path.join(
|
|
1461
|
+
os.getcwd(), 'plot', f'02_{self.model_nme}_dlift.png')
|
|
1462
|
+
plt.savefig(save_path, dpi=300)
|
|
1463
|
+
plt.show()
|
|
1464
|
+
plt.close(fig)
|
|
1465
|
+
|
|
1466
|
+
# 保存模型
|
|
1467
|
+
|
|
1468
|
+
def save_model(self, model_name=None):
|
|
1469
|
+
|
|
1470
|
+
# model_name 可以是:
|
|
1471
|
+
# - None: 保存全部可用模型
|
|
1472
|
+
# - 'xgb': 只保存 Xgboost
|
|
1473
|
+
# - 'resn': 只保存 ResNet
|
|
1474
|
+
# - 'ft': 只保存 FT-Transformer
|
|
1475
|
+
|
|
1476
|
+
model_dir = os.path.join(os.getcwd(), 'model')
|
|
1477
|
+
if not os.path.exists(model_dir):
|
|
1478
|
+
os.makedirs(model_dir)
|
|
1479
|
+
|
|
1480
|
+
save_path_xgb = os.path.join(
|
|
1481
|
+
model_dir, f'01_{self.model_nme}_Xgboost.pkl'
|
|
1482
|
+
)
|
|
1483
|
+
save_path_resn = os.path.join(
|
|
1484
|
+
model_dir, f'01_{self.model_nme}_ResNet.pth'
|
|
1485
|
+
)
|
|
1486
|
+
save_path_ft = os.path.join(
|
|
1487
|
+
model_dir, f'01_{self.model_nme}_FTTransformer.pth'
|
|
1488
|
+
)
|
|
1489
|
+
|
|
1490
|
+
# 保存 XGBoost
|
|
1491
|
+
if model_name in (None, 'xgb'):
|
|
1492
|
+
if hasattr(self, 'xgb_best'):
|
|
1493
|
+
joblib.dump(self.xgb_best, save_path_xgb)
|
|
1494
|
+
else:
|
|
1495
|
+
print("[save_model] Warning: xgb_best 不存在,未保存 Xgboost 模型。")
|
|
1496
|
+
|
|
1497
|
+
# 保存 ResNet(只保存核心网络的 state_dict)
|
|
1498
|
+
if model_name in (None, 'resn'):
|
|
1499
|
+
if hasattr(self, 'resn_best'):
|
|
1500
|
+
torch.save(self.resn_best.resnet.state_dict(), save_path_resn)
|
|
1501
|
+
else:
|
|
1502
|
+
print("[save_model] Warning: resn_best 不存在,未保存 ResNet 模型。")
|
|
1503
|
+
|
|
1504
|
+
# 保存 FT-Transformer(直接保存整个 sklearn 风格 wrapper,方便恢复结构和参数)
|
|
1505
|
+
if model_name in (None, 'ft'):
|
|
1506
|
+
if hasattr(self, 'ft_best'):
|
|
1507
|
+
# 这里直接保存整个对象,包含结构和参数、best 超参等
|
|
1508
|
+
torch.save(self.ft_best, save_path_ft)
|
|
1509
|
+
else:
|
|
1510
|
+
print("[save_model] Warning: ft_best 不存在,未保存 FT-Transformer 模型。")
|
|
1511
|
+
|
|
1512
|
+
def load_model(self, model_name=None):
|
|
1513
|
+
# model_name 可以是:
|
|
1514
|
+
# - None: 加载全部能找到的模型
|
|
1515
|
+
# - 'xgb': 只加载 Xgboost
|
|
1516
|
+
# - 'resn': 只加载 ResNet
|
|
1517
|
+
# - 'ft': 只加载 FT-Transformer
|
|
1518
|
+
|
|
1519
|
+
model_dir = os.path.join(os.getcwd(), 'model')
|
|
1520
|
+
save_path_xgb = os.path.join(
|
|
1521
|
+
model_dir, f'01_{self.model_nme}_Xgboost.pkl'
|
|
1522
|
+
)
|
|
1523
|
+
save_path_resn = os.path.join(
|
|
1524
|
+
model_dir, f'01_{self.model_nme}_ResNet.pth'
|
|
1525
|
+
)
|
|
1526
|
+
save_path_ft = os.path.join(
|
|
1527
|
+
model_dir, f'01_{self.model_nme}_FTTransformer.pth'
|
|
1528
|
+
)
|
|
1529
|
+
|
|
1530
|
+
# 加载 XGBoost
|
|
1531
|
+
if model_name in (None, 'xgb'):
|
|
1532
|
+
if os.path.exists(save_path_xgb):
|
|
1533
|
+
self.xgb_load = joblib.load(save_path_xgb)
|
|
1534
|
+
else:
|
|
1535
|
+
print(
|
|
1536
|
+
f"[load_model] Warning: 未找到 Xgboost 模型文件:{save_path_xgb}")
|
|
1537
|
+
|
|
1538
|
+
# 加载 ResNet(重新构建 wrapper,然后加载 state_dict)
|
|
1539
|
+
if model_name in (None, 'resn'):
|
|
1540
|
+
if os.path.exists(save_path_resn):
|
|
1541
|
+
self.resn_load = ResNetScikitLearn(
|
|
1542
|
+
model_nme=self.model_nme,
|
|
1543
|
+
input_dim=self.train_oht_scl_data[self.var_nmes].shape[1]
|
|
1544
|
+
)
|
|
1545
|
+
state_dict = torch.load(
|
|
1546
|
+
save_path_resn, map_location=self.resn_load.device)
|
|
1547
|
+
self.resn_load.resnet.load_state_dict(state_dict)
|
|
1548
|
+
else:
|
|
1549
|
+
print(
|
|
1550
|
+
f"[load_model] Warning: 未找到 ResNet 模型文件:{save_path_resn}")
|
|
1551
|
+
|
|
1552
|
+
# 加载 FT-Transformer(直接反序列化 sklearn 风格 wrapper)
|
|
1553
|
+
if model_name in (None, 'ft'):
|
|
1554
|
+
if os.path.exists(save_path_ft):
|
|
1555
|
+
# 统一用 CPU 先加载,再按当前环境迁移
|
|
1556
|
+
ft_loaded = torch.load(save_path_ft, map_location='cpu')
|
|
1557
|
+
# 根据当前环境设置 device,并把内部 core 模型迁到对应 device
|
|
1558
|
+
if torch.cuda.is_available():
|
|
1559
|
+
ft_loaded.device = torch.device('cuda')
|
|
1560
|
+
elif torch.backends.mps.is_available():
|
|
1561
|
+
ft_loaded.device = torch.device('mps')
|
|
1562
|
+
else:
|
|
1563
|
+
ft_loaded.device = torch.device('cpu')
|
|
1564
|
+
ft_loaded.ft.to(ft_loaded.device)
|
|
1565
|
+
|
|
1566
|
+
self.ft_load = ft_loaded
|
|
1567
|
+
else:
|
|
1568
|
+
print(
|
|
1569
|
+
f"[load_model] Warning: 未找到 FT-Transformer 模型文件:{save_path_ft}")
|