ins-pricing 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. ins_pricing/README.md +60 -0
  2. ins_pricing/__init__.py +102 -0
  3. ins_pricing/governance/README.md +18 -0
  4. ins_pricing/governance/__init__.py +20 -0
  5. ins_pricing/governance/approval.py +93 -0
  6. ins_pricing/governance/audit.py +37 -0
  7. ins_pricing/governance/registry.py +99 -0
  8. ins_pricing/governance/release.py +159 -0
  9. ins_pricing/modelling/BayesOpt.py +146 -0
  10. ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
  11. ins_pricing/modelling/BayesOpt_entry.py +575 -0
  12. ins_pricing/modelling/BayesOpt_incremental.py +731 -0
  13. ins_pricing/modelling/Explain_Run.py +36 -0
  14. ins_pricing/modelling/Explain_entry.py +539 -0
  15. ins_pricing/modelling/Pricing_Run.py +36 -0
  16. ins_pricing/modelling/README.md +33 -0
  17. ins_pricing/modelling/__init__.py +44 -0
  18. ins_pricing/modelling/bayesopt/__init__.py +98 -0
  19. ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
  20. ins_pricing/modelling/bayesopt/core.py +1476 -0
  21. ins_pricing/modelling/bayesopt/models.py +2196 -0
  22. ins_pricing/modelling/bayesopt/trainers.py +2446 -0
  23. ins_pricing/modelling/bayesopt/utils.py +1021 -0
  24. ins_pricing/modelling/cli_common.py +136 -0
  25. ins_pricing/modelling/explain/__init__.py +55 -0
  26. ins_pricing/modelling/explain/gradients.py +334 -0
  27. ins_pricing/modelling/explain/metrics.py +176 -0
  28. ins_pricing/modelling/explain/permutation.py +155 -0
  29. ins_pricing/modelling/explain/shap_utils.py +146 -0
  30. ins_pricing/modelling/notebook_utils.py +284 -0
  31. ins_pricing/modelling/plotting/__init__.py +45 -0
  32. ins_pricing/modelling/plotting/common.py +63 -0
  33. ins_pricing/modelling/plotting/curves.py +572 -0
  34. ins_pricing/modelling/plotting/diagnostics.py +139 -0
  35. ins_pricing/modelling/plotting/geo.py +362 -0
  36. ins_pricing/modelling/plotting/importance.py +121 -0
  37. ins_pricing/modelling/run_logging.py +133 -0
  38. ins_pricing/modelling/tests/conftest.py +8 -0
  39. ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
  40. ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
  41. ins_pricing/modelling/tests/test_explain.py +56 -0
  42. ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
  43. ins_pricing/modelling/tests/test_graph_cache.py +33 -0
  44. ins_pricing/modelling/tests/test_plotting.py +63 -0
  45. ins_pricing/modelling/tests/test_plotting_library.py +150 -0
  46. ins_pricing/modelling/tests/test_preprocessor.py +48 -0
  47. ins_pricing/modelling/watchdog_run.py +211 -0
  48. ins_pricing/pricing/README.md +44 -0
  49. ins_pricing/pricing/__init__.py +27 -0
  50. ins_pricing/pricing/calibration.py +39 -0
  51. ins_pricing/pricing/data_quality.py +117 -0
  52. ins_pricing/pricing/exposure.py +85 -0
  53. ins_pricing/pricing/factors.py +91 -0
  54. ins_pricing/pricing/monitoring.py +99 -0
  55. ins_pricing/pricing/rate_table.py +78 -0
  56. ins_pricing/production/__init__.py +21 -0
  57. ins_pricing/production/drift.py +30 -0
  58. ins_pricing/production/monitoring.py +143 -0
  59. ins_pricing/production/scoring.py +40 -0
  60. ins_pricing/reporting/README.md +20 -0
  61. ins_pricing/reporting/__init__.py +11 -0
  62. ins_pricing/reporting/report_builder.py +72 -0
  63. ins_pricing/reporting/scheduler.py +45 -0
  64. ins_pricing/setup.py +41 -0
  65. ins_pricing v2/__init__.py +23 -0
  66. ins_pricing v2/governance/__init__.py +20 -0
  67. ins_pricing v2/governance/approval.py +93 -0
  68. ins_pricing v2/governance/audit.py +37 -0
  69. ins_pricing v2/governance/registry.py +99 -0
  70. ins_pricing v2/governance/release.py +159 -0
  71. ins_pricing v2/modelling/Explain_Run.py +36 -0
  72. ins_pricing v2/modelling/Pricing_Run.py +36 -0
  73. ins_pricing v2/modelling/__init__.py +151 -0
  74. ins_pricing v2/modelling/cli_common.py +141 -0
  75. ins_pricing v2/modelling/config.py +249 -0
  76. ins_pricing v2/modelling/config_preprocess.py +254 -0
  77. ins_pricing v2/modelling/core.py +741 -0
  78. ins_pricing v2/modelling/data_container.py +42 -0
  79. ins_pricing v2/modelling/explain/__init__.py +55 -0
  80. ins_pricing v2/modelling/explain/gradients.py +334 -0
  81. ins_pricing v2/modelling/explain/metrics.py +176 -0
  82. ins_pricing v2/modelling/explain/permutation.py +155 -0
  83. ins_pricing v2/modelling/explain/shap_utils.py +146 -0
  84. ins_pricing v2/modelling/features.py +215 -0
  85. ins_pricing v2/modelling/model_manager.py +148 -0
  86. ins_pricing v2/modelling/model_plotting.py +463 -0
  87. ins_pricing v2/modelling/models.py +2203 -0
  88. ins_pricing v2/modelling/notebook_utils.py +294 -0
  89. ins_pricing v2/modelling/plotting/__init__.py +45 -0
  90. ins_pricing v2/modelling/plotting/common.py +63 -0
  91. ins_pricing v2/modelling/plotting/curves.py +572 -0
  92. ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
  93. ins_pricing v2/modelling/plotting/geo.py +362 -0
  94. ins_pricing v2/modelling/plotting/importance.py +121 -0
  95. ins_pricing v2/modelling/run_logging.py +133 -0
  96. ins_pricing v2/modelling/tests/conftest.py +8 -0
  97. ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
  98. ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
  99. ins_pricing v2/modelling/tests/test_explain.py +56 -0
  100. ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
  101. ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
  102. ins_pricing v2/modelling/tests/test_plotting.py +63 -0
  103. ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
  104. ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
  105. ins_pricing v2/modelling/trainers.py +2447 -0
  106. ins_pricing v2/modelling/utils.py +1020 -0
  107. ins_pricing v2/modelling/watchdog_run.py +211 -0
  108. ins_pricing v2/pricing/__init__.py +27 -0
  109. ins_pricing v2/pricing/calibration.py +39 -0
  110. ins_pricing v2/pricing/data_quality.py +117 -0
  111. ins_pricing v2/pricing/exposure.py +85 -0
  112. ins_pricing v2/pricing/factors.py +91 -0
  113. ins_pricing v2/pricing/monitoring.py +99 -0
  114. ins_pricing v2/pricing/rate_table.py +78 -0
  115. ins_pricing v2/production/__init__.py +21 -0
  116. ins_pricing v2/production/drift.py +30 -0
  117. ins_pricing v2/production/monitoring.py +143 -0
  118. ins_pricing v2/production/scoring.py +40 -0
  119. ins_pricing v2/reporting/__init__.py +11 -0
  120. ins_pricing v2/reporting/report_builder.py +72 -0
  121. ins_pricing v2/reporting/scheduler.py +45 -0
  122. ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
  123. ins_pricing v2/scripts/Explain_entry.py +545 -0
  124. ins_pricing v2/scripts/__init__.py +1 -0
  125. ins_pricing v2/scripts/train.py +568 -0
  126. ins_pricing v2/setup.py +55 -0
  127. ins_pricing v2/smoke_test.py +28 -0
  128. ins_pricing-0.1.6.dist-info/METADATA +78 -0
  129. ins_pricing-0.1.6.dist-info/RECORD +169 -0
  130. ins_pricing-0.1.6.dist-info/WHEEL +5 -0
  131. ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
  132. user_packages/__init__.py +105 -0
  133. user_packages legacy/BayesOpt.py +5659 -0
  134. user_packages legacy/BayesOpt_entry.py +513 -0
  135. user_packages legacy/BayesOpt_incremental.py +685 -0
  136. user_packages legacy/Pricing_Run.py +36 -0
  137. user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
  138. user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
  139. user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
  140. user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
  141. user_packages legacy/Try/BayesOpt legacy.py +3280 -0
  142. user_packages legacy/Try/BayesOpt.py +838 -0
  143. user_packages legacy/Try/BayesOptAll.py +1569 -0
  144. user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
  145. user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
  146. user_packages legacy/Try/BayesOptSearch.py +830 -0
  147. user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
  148. user_packages legacy/Try/BayesOptV1.py +1911 -0
  149. user_packages legacy/Try/BayesOptV10.py +2973 -0
  150. user_packages legacy/Try/BayesOptV11.py +3001 -0
  151. user_packages legacy/Try/BayesOptV12.py +3001 -0
  152. user_packages legacy/Try/BayesOptV2.py +2065 -0
  153. user_packages legacy/Try/BayesOptV3.py +2209 -0
  154. user_packages legacy/Try/BayesOptV4.py +2342 -0
  155. user_packages legacy/Try/BayesOptV5.py +2372 -0
  156. user_packages legacy/Try/BayesOptV6.py +2759 -0
  157. user_packages legacy/Try/BayesOptV7.py +2832 -0
  158. user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
  159. user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
  160. user_packages legacy/Try/BayesOptV9.py +2927 -0
  161. user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
  162. user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
  163. user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
  164. user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
  165. user_packages legacy/Try/xgbbayesopt.py +523 -0
  166. user_packages legacy/__init__.py +19 -0
  167. user_packages legacy/cli_common.py +124 -0
  168. user_packages legacy/notebook_utils.py +228 -0
  169. user_packages legacy/watchdog_run.py +202 -0
@@ -0,0 +1,838 @@
1
+ # 不再进行GPU和CPU之间的数据传输,所有数据在创建张量时即放置在正确的设备上
2
+
3
+ from random import sample
4
+ from turtle import st
5
+ from uuid import RESERVED_FUTURE
6
+ import numpy as np # 1.26.2
7
+ import pandas as pd # 2.2.3
8
+ import torch # 1.10.1+cu111
9
+ import torch.nn as nn
10
+ import torch.nn.functional as F
11
+ import optuna # 4.3.0
12
+ import xgboost as xgb # 1.7.0
13
+ import matplotlib.pyplot as plt
14
+ import os
15
+ import joblib
16
+ import torch.utils.checkpoint as cp
17
+ import copy
18
+
19
+ from torch.utils.data import DataLoader, TensorDataset
20
+ from torch.cuda.amp import autocast, GradScaler
21
+ from torch.nn.utils import clip_grad_norm_
22
+ from sklearn.model_selection import KFold, ShuffleSplit, cross_val_score # 1.2.2
23
+ from sklearn.preprocessing import StandardScaler
24
+ from sklearn.metrics import make_scorer, mean_tweedie_deviance
25
+
26
+ # 定义torch下tweedie deviance损失函数
27
+ # 参考:https://scikit-learn.org/stable/modules/model_evaluation.html#mean-poisson-gamma-and-tweedie-deviances
28
+
29
+
30
+
31
+ def tweedie_loss(pred, target, p=1.5):
32
+ # Ensure predictions are positive for stability
33
+ eps = 1e-6
34
+ pred_clamped = torch.clamp(pred, min=eps)
35
+ # Compute Tweedie deviance components
36
+ if p == 1:
37
+ # Poisson case
38
+ term1 = target * torch.log(target / pred_clamped + eps)
39
+ term2 = -target + pred_clamped
40
+ term3 = 0
41
+ elif p == 0:
42
+ # Gaussian case
43
+ term1 = 0.5 * torch.pow(target - pred_clamped, 2)
44
+ term2 = 0
45
+ term3 = 0
46
+ elif p == 2:
47
+ # Gamma case
48
+ term1 = torch.log(pred_clamped / target + eps)
49
+ term2 = -target / pred_clamped + 1
50
+ term3 = 0
51
+ else:
52
+ term1 = torch.pow(target, 2 - p) / ((1 - p) * (2 - p))
53
+ term2 = target * torch.pow(pred_clamped, 1 - p) / (1 - p)
54
+ term3 = torch.pow(pred_clamped, 2 - p) / (2 - p)
55
+ # Tweedie negative log-likelihood (up to a constant)
56
+ return 2 * (term1 - term2 + term3)
57
+
58
+ # 定义分箱函数
59
+
60
+
61
+ def split_data(data, col_nme, wgt_nme, n_bins=10):
62
+ data.sort_values(by=col_nme, ascending=True, inplace=True)
63
+ data['cum_weight'] = data[wgt_nme].cumsum()
64
+ w_sum = data[wgt_nme].sum()
65
+ data.loc[:, 'bins'] = np.floor(data['cum_weight'] * float(n_bins) / w_sum)
66
+ data.loc[(data['bins'] == n_bins), 'bins'] = n_bins - 1
67
+ return data.groupby(['bins'], observed=True).sum(numeric_only=True)
68
+
69
+ # 定义Lift Chart绘制函数
70
+
71
+
72
+ def plot_lift_list(pred_model, w_pred_list, w_act_list,
73
+ weight_list, tgt_nme, n_bins=10,
74
+ fig_nme='Lift Chart'):
75
+ lift_data = pd.DataFrame()
76
+ lift_data.loc[:, 'pred'] = pred_model
77
+ lift_data.loc[:, 'w_pred'] = w_pred_list
78
+ lift_data.loc[:, 'act'] = w_act_list
79
+ lift_data.loc[:, 'weight'] = weight_list
80
+ plot_data = split_data(lift_data, 'pred', 'weight', n_bins)
81
+ plot_data['exp_v'] = plot_data['w_pred'] / plot_data['weight']
82
+ plot_data['act_v'] = plot_data['act'] / plot_data['weight']
83
+ plot_data.reset_index(inplace=True)
84
+ fig = plt.figure(figsize=(7, 5))
85
+ ax = fig.add_subplot(111)
86
+ ax.plot(plot_data.index, plot_data['act_v'],
87
+ label='Actual', color='red')
88
+ ax.plot(plot_data.index, plot_data['exp_v'],
89
+ label='Predicted', color='blue')
90
+ ax.set_title(
91
+ 'Lift Chart of %s' % tgt_nme, fontsize=8)
92
+ plt.xticks(plot_data.index,
93
+ plot_data.index,
94
+ rotation=90, fontsize=6)
95
+ plt.yticks(fontsize=6)
96
+ plt.legend(loc='upper left',
97
+ fontsize=5, frameon=False)
98
+ plt.margins(0.05)
99
+ ax2 = ax.twinx()
100
+ ax2.bar(plot_data.index, plot_data['weight'],
101
+ alpha=0.5, color='seagreen',
102
+ label='Earned Exposure')
103
+ plt.yticks(fontsize=6)
104
+ plt.legend(loc='upper right',
105
+ fontsize=5, frameon=False)
106
+ plt.subplots_adjust(wspace=0.3)
107
+ save_path = os.path.join(
108
+ os.getcwd(), 'plot', f'05_{tgt_nme}_{fig_nme}.png')
109
+ plt.savefig(save_path, dpi=300)
110
+ plt.close(fig)
111
+
112
+ # 定义Double Lift Chart绘制函数
113
+
114
+
115
+ def plot_dlift_list(pred_model_1, pred_model_2,
116
+ model_nme_1, model_nme_2,
117
+ tgt_nme,
118
+ w_list, w_act_list, n_bins=10,
119
+ fig_nme='Double Lift Chart'):
120
+ lift_data = pd.DataFrame()
121
+ lift_data.loc[:, 'pred1'] = pred_model_1
122
+ lift_data.loc[:, 'pred2'] = pred_model_2
123
+ lift_data.loc[:, 'diff_ly'] = lift_data['pred1'] / lift_data['pred2']
124
+ lift_data.loc[:, 'act'] = w_act_list
125
+ lift_data.loc[:, 'weight'] = w_list
126
+ lift_data.loc[:, 'w_pred1'] = lift_data['pred1'] * lift_data['weight']
127
+ lift_data.loc[:, 'w_pred2'] = lift_data['pred2'] * lift_data['weight']
128
+ plot_data = split_data(lift_data, 'diff_ly', 'weight', n_bins)
129
+ plot_data['exp_v1'] = plot_data['w_pred1'] / plot_data['act']
130
+ plot_data['exp_v2'] = plot_data['w_pred2'] / plot_data['act']
131
+ plot_data['act_v'] = plot_data['act']/plot_data['act']
132
+ plot_data.reset_index(inplace=True)
133
+ fig = plt.figure(figsize=(7, 5))
134
+ ax = fig.add_subplot(111)
135
+ ax.plot(plot_data.index, plot_data['act_v'],
136
+ label='Actual', color='red')
137
+ ax.plot(plot_data.index, plot_data['exp_v1'],
138
+ label=model_nme_1, color='blue')
139
+ ax.plot(plot_data.index, plot_data['exp_v2'],
140
+ label=model_nme_2, color='black')
141
+ ax.set_title(
142
+ 'Double Lift Chart of %s' % tgt_nme, fontsize=8)
143
+ plt.xticks(plot_data.index,
144
+ plot_data.index,
145
+ rotation=90, fontsize=6)
146
+ plt.xlabel('%s / %s' % (model_nme_1, model_nme_2), fontsize=6)
147
+ plt.yticks(fontsize=6)
148
+ plt.legend(loc='upper left',
149
+ fontsize=5, frameon=False)
150
+ plt.margins(0.1)
151
+ plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8)
152
+ ax2 = ax.twinx()
153
+ ax2.bar(plot_data.index, plot_data['weight'],
154
+ alpha=0.5, color='seagreen',
155
+ label='Earned Exposure')
156
+ plt.yticks(fontsize=6)
157
+ plt.legend(loc='upper right',
158
+ fontsize=5, frameon=False)
159
+ plt.subplots_adjust(wspace=0.3)
160
+ save_path = os.path.join(
161
+ os.getcwd(), 'plot', f'06_{tgt_nme}_{fig_nme}.png')
162
+ plt.savefig(save_path, dpi=300)
163
+ plt.close(fig)
164
+
165
+
166
+
167
+
168
+ # 残差块:两层线性 + ReLU + 残差连接
169
+ # ResBlock 继承 nn.Module
170
+ class ResBlock(nn.Module):
171
+ def __init__(self, dim: int, dropout: float = 0.1, use_layernorm: bool = False, use_checkpoint: bool = False):
172
+ super().__init__()
173
+ self.block = nn.Sequential(
174
+ nn.Linear(dim, dim, bias=True),
175
+ nn.LayerNorm(dim) if use_layernorm else nn.BatchNorm1d(dim),
176
+ nn.ReLU(inplace=True),
177
+ # nn.Dropout(dropout) if dropout > 0.0 else nn.Identity(), DropOut与BatchNorm不太兼容,去掉
178
+ nn.Linear(dim, dim, bias=True)
179
+ )
180
+
181
+
182
+ def forward(self, x):
183
+ '''if self.use_checkpoint and self.training:
184
+ out = cp.checkpoint(self._forward, x)
185
+ else:
186
+ out = self._forward(x)
187
+ return self.act(out + x)'''
188
+ return F.relu(self.block(x) + x)
189
+
190
+ # ResNetSequential 继承 nn.Module,定义整个网络结构
191
+
192
+ class ResNetSequential(nn.Module):
193
+ # 整个网络用 nn.Sequential 串联:输入 -> ResBlock*block_num -> 输出
194
+ def __init__(self, input_dim, hidden_dim=64, block_num=2):
195
+ super(ResNetSequential, self).__init__()
196
+ self.net = nn.Sequential()
197
+ self.net.add_module('fc1', nn.Linear(input_dim, hidden_dim))
198
+ self.net.add_module('bn1', nn.BatchNorm1d(hidden_dim))
199
+ # self.net.add_module('ReLU1', nn.ReLU())
200
+ for i in range(block_num):
201
+ self.net.add_module('ResBlk_'+str(i+1), ResBlock(hidden_dim))
202
+ self.net.add_module('fc2', nn.Linear(hidden_dim, 1))
203
+ self.net.add_module('softplus', nn.Softplus())
204
+
205
+ def forward(self, x):
206
+ return self.net(x)
207
+
208
+ # 贝叶斯优化类,使用高斯过程进行超参数优化
209
+
210
+
211
+ class ResNetScikitLearn(nn.Module):
212
+ def __init__(self, model_nme, input_dim, hidden_dim=64,
213
+ block_num=2, batch_num=100, epochs=100,
214
+ tweedie_power=1.5, learning_rate=0.01, patience=10):
215
+ super(ResNetScikitLearn, self).__init__()
216
+ self.input_dim = input_dim
217
+ self.hidden_dim = hidden_dim
218
+ self.block_num = block_num
219
+ if torch.cuda.is_available():
220
+ self.device = torch.device('cuda')
221
+ elif torch.backends.mps.is_available():
222
+ self.device = torch.device('mps')
223
+ else:
224
+ self.device = torch.device('cpu')
225
+ self.resnet = ResNetSequential(
226
+ self.input_dim,
227
+ self.hidden_dim,
228
+ self.block_num
229
+ ).to(self.device)
230
+ '''if torch.cuda.device_count() > 1:
231
+ self.resnet = nn.DataParallel(
232
+ self.resnet,
233
+ device_ids=list(range(torch.cuda.device_count()))
234
+ )'''
235
+ self.batch_num = batch_num
236
+ self.epochs = epochs
237
+ self.model_nme = model_nme
238
+ if self.model_nme.find('f') != -1:
239
+ self.tw_power = 1
240
+ elif self.model_nme.find('s') != -1:
241
+ self.tw_power = 2
242
+ else:
243
+ self.tw_power = tweedie_power
244
+ self.learning_rate = learning_rate
245
+ self.patience = patience # Early stopping patience
246
+
247
+ def fit(self, X_train, y_train, w_train=None, X_val=None, y_val=None, w_val=None):
248
+ # 将数据转换为 PyTorch 张量
249
+ X_tensor = torch.tensor(
250
+ X_train.values, dtype=torch.float32, device=self.device)
251
+ y_tensor = torch.tensor(
252
+ y_train.values, dtype=torch.float32, device=self.device).view(-1, 1)
253
+ w_tensor = torch.tensor(
254
+ w_train.values, dtype=torch.float32, device=self.device).view(-1, 1) if w_train is not None else torch.ones_like(y_tensor, device=self.device)
255
+ # 验证集张量
256
+ if X_val is not None:
257
+ X_val_tensor = torch.tensor(
258
+ X_val.values, dtype=torch.float32, device=self.device)
259
+ y_val_tensor = torch.tensor(
260
+ y_val.values, dtype=torch.float32, device=self.device).view(-1, 1)
261
+ w_val_tensor = torch.tensor(
262
+ w_val.values, dtype=torch.float32, device=self.device).view(-1, 1) if w_val is not None else torch.ones_like(y_val_tensor, device=self.device)
263
+ # 创建数据集和数据加载器
264
+ dataset = TensorDataset(
265
+ X_tensor, y_tensor, w_tensor
266
+ )
267
+ dataloader = DataLoader(
268
+ dataset,
269
+ batch_size=max(32, int((self.learning_rate/(1e-4))**0.5 *
270
+ (X_train.shape[0]/self.batch_num))),
271
+ shuffle=True,
272
+ num_workers=0,
273
+ pin_memory=False
274
+ )
275
+ # 定义损失函数和优化器
276
+ optimizer = torch.optim.Adam(
277
+ self.resnet.parameters(), lr=self.learning_rate)
278
+ scaler = GradScaler(enabled=(self.device.type == 'cuda'))
279
+
280
+ # Early stopping 参数
281
+ best_loss, patience_counter = float('inf'), 0
282
+ best_model_state = None # Initialize best_model_state
283
+
284
+ # 训练模型
285
+ for epoch in range(1, self.epochs + 1):
286
+ self.resnet.train()
287
+ for X_batch, y_batch, w_batch in dataloader:
288
+ optimizer.zero_grad()
289
+ X_batch = X_batch.to(self.device, non_blocking=True)
290
+ y_batch = y_batch.to(self.device, non_blocking=True)
291
+ w_batch = w_batch.to(self.device, non_blocking=True)
292
+ # 如果运行设备为 CUDA,则启用混合精度。
293
+ with autocast(enabled=(self.device.type == 'cuda')):
294
+ y_pred = self.resnet(X_batch)
295
+ y_pred = torch.clamp(y_pred, min=1e-6)
296
+ losses = tweedie_loss(
297
+ y_pred, y_batch, p=self.tw_power).view(-1)
298
+ weighted_loss = (losses * w_batch.view(-1)
299
+ ).sum() / w_batch.sum()
300
+ scaler.scale(weighted_loss).backward()
301
+ # gradient clipping
302
+ # 如进行gradient clipping,需要在反向传播之前取消缩放
303
+ if self.device.type == 'cuda':
304
+ scaler.unscale_(optimizer)
305
+ clip_grad_norm_(
306
+ self.resnet.parameters(),
307
+ max_norm=1.0
308
+ )
309
+ scaler.step(optimizer)
310
+ scaler.update()
311
+
312
+ # 验证集损失计算
313
+ if X_val is not None and y_val is not None:
314
+ self.resnet.eval()
315
+ with torch.no_grad(), autocast(enabled=(self.device.type == 'cuda')):
316
+ y_val_pred = self.resnet(X_val_tensor)
317
+ y_val_pred = torch.clamp(y_val_pred, min=1e-6)
318
+ val_loss_values = tweedie_loss(
319
+ y_val_pred, y_val_tensor, p=self.tw_power).view(-1)
320
+ val_weighted_loss = (
321
+ val_loss_values * w_val_tensor.view(-1)).sum() / w_val_tensor.sum()
322
+
323
+ # Early stopping 检查
324
+ if val_weighted_loss < best_loss:
325
+ best_loss, patience_counter = val_weighted_loss, 0
326
+ # 保存当前最佳模型
327
+ best_model_state = copy.deepcopy(self.resnet.state_dict())
328
+ else:
329
+ patience_counter += 1
330
+ if patience_counter >= self.patience:
331
+ self.resnet.load_state_dict(best_model_state) # 恢复最佳模型
332
+ break
333
+
334
+ def predict(self, X_test):
335
+ self.resnet.eval()
336
+
337
+ with torch.no_grad():
338
+ X_tensor = torch.tensor(
339
+ X_test.values, dtype=torch.float32).to(self.device)
340
+ y_pred = self.resnet(X_tensor).cpu().numpy()
341
+
342
+ y_pred = np.clip(y_pred, 1e-6, None)
343
+ return y_pred.flatten()
344
+
345
+ def set_params(self, params):
346
+ # 设置模型参数
347
+ for key, value in params.items():
348
+ if hasattr(self, key):
349
+ setattr(self, key, value)
350
+ else:
351
+ raise ValueError(f"Parameter {key} not found in model.")
352
+
353
+ # 定义贝叶斯优化模型类,包含XGBoost和ResNet模型
354
+
355
+
356
+ class BayesOptModel:
357
+ def __init__(self, train_data, test_data,
358
+ model_nme, resp_nme, weight_nme, factor_nmes,
359
+ cate_list=[], prop_test=0.25, rand_seed=None, epochs=100):
360
+ # 初始化数据
361
+ # train_data: 训练数据, test_data: 测试数据 格式需为DataFrame
362
+ # model_nme: 模型名称
363
+ # resp_nme: 因变量名称, weight_nme: 权重名称
364
+ # factor_nmes: 因子名称列表, space_params: 参数空间
365
+ # cate_list: 类别变量列表
366
+ # prop_test: 测试集比例, rand_seed
367
+ self.train_data = train_data
368
+ self.test_data = test_data
369
+ self.resp_nme = resp_nme
370
+ self.weight_nme = weight_nme
371
+ self.train_data.loc[:, 'w_act'] = self.train_data[self.resp_nme] * \
372
+ self.train_data[self.weight_nme]
373
+ self.test_data.loc[:, 'w_act'] = self.test_data[self.resp_nme] * \
374
+ self.test_data[self.weight_nme]
375
+ self.factor_nmes = factor_nmes
376
+ self.cate_list = cate_list
377
+ self.rand_seed = rand_seed if rand_seed is not None else np.random.randint(
378
+ 1, 10000)
379
+ if self.cate_list != []:
380
+ for cate in self.cate_list:
381
+ self.train_data[cate] = self.train_data[cate].astype(
382
+ 'category')
383
+ self.test_data[cate] = self.test_data[cate].astype('category')
384
+ self.prop_test = prop_test
385
+ self.cv = ShuffleSplit(n_splits=int(1/self.prop_test),
386
+ test_size=self.prop_test,
387
+ random_state=self.rand_seed)
388
+ self.model_nme = model_nme
389
+ if self.model_nme.find('f') != -1:
390
+ self.obj = 'count:poisson'
391
+ elif self.model_nme.find('s') != -1:
392
+ self.obj = 'reg:gamma'
393
+ elif self.model_nme.find('bc') != -1:
394
+ self.obj = 'reg:tweedie'
395
+ self.fit_params = {
396
+ 'sample_weight': self.train_data[self.weight_nme].values
397
+ }
398
+ self.num_features = [
399
+ nme for nme in self.factor_nmes if nme not in self.cate_list]
400
+ self.train_oht_scl_data = self.train_data[self.factor_nmes +
401
+ [self.weight_nme]+[self.resp_nme]].copy()
402
+ self.test_oht_scl_data = self.test_data[self.factor_nmes +
403
+ [self.weight_nme]+[self.resp_nme]].copy()
404
+ self.train_oht_scl_data = pd.get_dummies(
405
+ self.train_oht_scl_data,
406
+ columns=self.cate_list,
407
+ drop_first=True,
408
+ dtype=np.int8
409
+ )
410
+ self.test_oht_scl_data = pd.get_dummies(
411
+ self.test_oht_scl_data,
412
+ columns=self.cate_list,
413
+ drop_first=True,
414
+ dtype=np.int8
415
+ )
416
+ for num_chr in self.num_features:
417
+ scaler = StandardScaler()
418
+ self.train_oht_scl_data[num_chr] = scaler.fit_transform(
419
+ self.train_oht_scl_data[num_chr].values.reshape(-1, 1))
420
+ self.test_oht_scl_data[num_chr] = scaler.transform(
421
+ self.test_oht_scl_data[num_chr].values.reshape(-1, 1))
422
+ # 对测试集进行列对齐
423
+ self.test_oht_scl_data = self.test_oht_scl_data.reindex(
424
+ columns=self.train_oht_scl_data.columns,
425
+ fill_value=0
426
+ )
427
+ self.var_nmes = list(
428
+ set(list(self.train_oht_scl_data.columns)) -
429
+ set([self.weight_nme, self.resp_nme])
430
+ )
431
+ self.epochs = epochs
432
+ self.model_label = []
433
+
434
+ # 定义单因素画图函数
435
+ def plot_oneway(self, n_bins=10):
436
+ for c in self.factor_nmes:
437
+ fig = plt.figure(figsize=(7, 5))
438
+ if c in self.cate_list:
439
+ strs = c
440
+ else:
441
+ strs = c+'_bins'
442
+ self.train_data.loc[:, strs] = pd.qcut(
443
+ self.train_data[c],
444
+ n_bins,
445
+ duplicates='drop'
446
+ )
447
+ plot_data = self.train_data.groupby(
448
+ [strs], observed=True).sum(numeric_only=True)
449
+ plot_data.reset_index(inplace=True)
450
+ plot_data['act_v'] = plot_data['w_act'] / \
451
+ plot_data[self.weight_nme]
452
+ plot_data.head()
453
+ ax = fig.add_subplot(111)
454
+ ax.plot(plot_data.index, plot_data['act_v'],
455
+ label='Actual', color='red')
456
+ ax.set_title(
457
+ 'Analysis of %s : Train Data' % strs,
458
+ fontsize=8)
459
+ plt.xticks(plot_data.index,
460
+ list(plot_data[strs].astype(str)),
461
+ rotation=90)
462
+ if len(list(plot_data[strs].astype(str))) > 50:
463
+ plt.xticks(fontsize=3)
464
+ else:
465
+ plt.xticks(fontsize=6)
466
+ plt.yticks(fontsize=6)
467
+ ax2 = ax.twinx()
468
+ ax2.bar(plot_data.index,
469
+ plot_data[self.weight_nme],
470
+ alpha=0.5, color='seagreen')
471
+ plt.yticks(fontsize=6)
472
+ plt.margins(0.05)
473
+ plt.subplots_adjust(wspace=0.3)
474
+ save_path = os.path.join(
475
+ os.getcwd(), 'plot',
476
+ f'00_{self.model_nme}_{strs}_oneway.png')
477
+ plt.savefig(save_path, dpi=300)
478
+ plt.close(fig)
479
+
480
+ # Xgboost交叉验证函数
481
+ def cross_val_xgb(self, trial):
482
+ learning_rate = trial.suggest_float(
483
+ 'learning_rate', 1e-5, 1e-1, log=True)
484
+ gamma = trial.suggest_float(
485
+ 'gamma', 0, 10000)
486
+ max_depth = trial.suggest_int(
487
+ 'max_depth', 3, 25)
488
+ n_estimators = trial.suggest_int(
489
+ 'n_estimators', 10, 500, step=10)
490
+ min_child_weight = trial.suggest_int(
491
+ 'min_child_weight', 100, 10000, step=100)
492
+ reg_alpha = trial.suggest_float(
493
+ 'reg_alpha', 1e-10, 1, log=True)
494
+ reg_lambda = trial.suggest_float(
495
+ 'reg_lambda', 1e-10, 1, log=True)
496
+ if self.obj == 'reg:tweedie':
497
+ tweedie_variance_power = trial.suggest_float(
498
+ 'tweedie_variance_power', 1, 2)
499
+ elif self.obj == 'count:poisson':
500
+ tweedie_variance_power = 1
501
+ elif self.obj == 'reg:gamma':
502
+ tweedie_variance_power = 2
503
+ clf = xgb.XGBRegressor(
504
+ objective=self.obj,
505
+ random_state=self.rand_seed,
506
+ subsample=0.9,
507
+ tree_method='gpu_hist' if torch.cuda.is_available() else 'hist',
508
+ gpu_id=0,
509
+ enable_categorical=True,
510
+ predictor='gpu_predictor' if torch.cuda.is_available() else 'cpu_predictor'
511
+ )
512
+ params = {
513
+ 'learning_rate': learning_rate,
514
+ 'gamma': gamma,
515
+ 'max_depth': max_depth,
516
+ 'n_estimators': n_estimators,
517
+ 'min_child_weight': min_child_weight,
518
+ 'reg_alpha': reg_alpha,
519
+ 'reg_lambda': reg_lambda
520
+ }
521
+ if self.obj == 'reg:tweedie':
522
+ params['tweedie_variance_power'] = tweedie_variance_power
523
+ clf.set_params(**params)
524
+ acc = cross_val_score(
525
+ clf,
526
+ self.train_data[self.factor_nmes],
527
+ self.train_data[self.resp_nme].values,
528
+ fit_params=self.fit_params,
529
+ cv=self.cv,
530
+ scoring=make_scorer(
531
+ mean_tweedie_deviance,
532
+ power=tweedie_variance_power,
533
+ greater_is_better=False),
534
+ error_score='raise',
535
+ n_jobs=int(1/self.prop_test)).mean()
536
+ return -acc
537
+
538
+ # 定义Xgboost贝叶斯优化函数
539
+ def bayesopt_xgb(self, max_evals=100):
540
+ study = optuna.create_study(
541
+ direction='minimize',
542
+ sampler=optuna.samplers.TPESampler(seed=self.rand_seed))
543
+ study.optimize(self.cross_val_xgb, n_trials=max_evals)
544
+ self.best_xgb_params = study.best_params
545
+ pd.DataFrame(self.best_xgb_params, index=[0]).to_csv(
546
+ os.getcwd() + '/Results/' + self.model_nme + '_bestparams_xgb.csv')
547
+ self.best_xgb_trial = study.best_trial
548
+ self.xgb_best = xgb.XGBRegressor(
549
+ objective=self.obj,
550
+ random_state=self.rand_seed,
551
+ subsample=0.9,
552
+ tree_method='gpu_hist' if torch.cuda.is_available() else 'hist',
553
+ gpu_id=0,
554
+ enable_categorical=True,
555
+ predictor='gpu_predictor' if torch.cuda.is_available() else 'cpu_predictor'
556
+ )
557
+ self.xgb_best.set_params(**self.best_xgb_params)
558
+ self.xgb_best.fit(self.train_data[self.factor_nmes],
559
+ self.train_data[self.resp_nme].values,
560
+ **self.fit_params)
561
+ self.model_label += ['Xgboost']
562
+ self.train_data['pred_xgb'] = self.xgb_best.predict(
563
+ self.train_data[self.factor_nmes])
564
+ self.test_data['pred_xgb'] = self.xgb_best.predict(
565
+ self.test_data[self.factor_nmes])
566
+ self.train_data.loc[:, 'w_pred_xgb'] = self.train_data['pred_xgb'] * \
567
+ self.train_data[self.weight_nme]
568
+ self.test_data.loc[:, 'w_pred_xgb'] = self.test_data['pred_xgb'] * \
569
+ self.test_data[self.weight_nme]
570
+
571
+ # ResNet交叉验证函数
572
+ def cross_val_resn(self, trial):
573
+
574
+ learning_rate = trial.suggest_float(
575
+ 'learning_rate', 1e-6, 1e-2, log=True) # 较低learning rate为了保证不会出险梯度爆炸
576
+ hidden_dim = trial.suggest_int(
577
+ 'hidden_dim', 32, 256, step=32)
578
+ block_num = trial.suggest_int(
579
+ 'block_num', 2, 10)
580
+ batch_num = trial.suggest_int(
581
+ 'batch_num',
582
+ 10 if self.obj == 'reg:gamma' else 100,
583
+ 100 if self.obj == 'reg:gamma' else 1000,
584
+ step=10 if self.obj == 'reg:gamma' else 100)
585
+ if self.obj == 'reg:tweedie':
586
+ tw_power = trial.suggest_float(
587
+ 'tw_power', 1, 2.0)
588
+ elif self.obj == 'count:poisson':
589
+ tw_power = 1
590
+ elif self.obj == 'reg:gamma':
591
+ tw_power = 2
592
+ '''fold_num = int(1/self.prop_test)
593
+ kf = KFold(n_splits=fold_num, shuffle=True,
594
+ random_state=self.rand_seed)'''
595
+ loss = 0
596
+ for fold, (train_idx, test_idx) in enumerate(self.cv.split(self.train_oht_scl_data[self.var_nmes])):
597
+ # 创建模型
598
+ cv_net = ResNetScikitLearn(
599
+ model_nme=self.model_nme,
600
+ input_dim=self.train_oht_scl_data[self.var_nmes].shape[1],
601
+ epochs=self.epochs,
602
+ learning_rate=learning_rate,
603
+ hidden_dim=hidden_dim,
604
+ block_num=block_num,
605
+ # 保证权重方差不变
606
+ batch_num=batch_num,
607
+ tweedie_power=tw_power
608
+ )
609
+ # 训练模型
610
+ cv_net.fit(
611
+ self.train_oht_scl_data[self.var_nmes].iloc[train_idx],
612
+ self.train_oht_scl_data[self.resp_nme].iloc[train_idx],
613
+ self.train_oht_scl_data[self.weight_nme].iloc[train_idx],
614
+ self.train_oht_scl_data[self.var_nmes].iloc[test_idx],
615
+ self.train_oht_scl_data[self.resp_nme].iloc[test_idx],
616
+ self.train_oht_scl_data[self.weight_nme].iloc[test_idx]
617
+ )
618
+ # 预测
619
+ y_pred_fold = cv_net.predict(
620
+ self.train_oht_scl_data[self.var_nmes].iloc[test_idx]
621
+ )
622
+ # 计算损失
623
+ loss += mean_tweedie_deviance(
624
+ self.train_oht_scl_data[self.resp_nme].iloc[test_idx],
625
+ y_pred_fold,
626
+ sample_weight=self.train_oht_scl_data[self.weight_nme].iloc[test_idx],
627
+ power=tw_power
628
+ )
629
+ return loss / int(1/self.prop_test)
630
+
631
+ # 定义ResNet贝叶斯优化函数
632
+ def bayesopt_resnet(self, max_evals=100):
633
+ study = optuna.create_study(
634
+ direction='minimize',
635
+ sampler=optuna.samplers.TPESampler(seed=self.rand_seed))
636
+ study.optimize(self.cross_val_resn, n_trials=max_evals)
637
+ self.best_resn_params = study.best_params
638
+ pd.DataFrame(self.best_resn_params, index=[0]).to_csv(
639
+ os.getcwd() + '/Results/' + self.model_nme + '_bestparams_resn.csv')
640
+ self.best_resn_trial = study.best_trial
641
+ self.resn_best = ResNetScikitLearn(
642
+ model_nme=self.model_nme,
643
+ input_dim=self.train_oht_scl_data[self.var_nmes].shape[1]
644
+ )
645
+ self.resn_best.set_params(self.best_resn_params)
646
+ self.resn_best.fit(self.train_oht_scl_data[self.var_nmes],
647
+ self.train_oht_scl_data[self.resp_nme],
648
+ self.train_oht_scl_data[self.weight_nme])
649
+ self.model_label += ['ResNet']
650
+ self.train_data['pred_resn'] = self.resn_best.predict(
651
+ self.train_oht_scl_data[self.var_nmes])
652
+ self.test_data['pred_resn'] = self.resn_best.predict(
653
+ self.test_oht_scl_data[self.var_nmes])
654
+ self.train_data.loc[:, 'w_pred_resn'] = self.train_data['pred_resn'] * \
655
+ self.train_data[self.weight_nme]
656
+ self.test_data.loc[:, 'w_pred_resn'] = self.test_data['pred_resn'] * \
657
+ self.test_data[self.weight_nme]
658
+
659
+ # 定义分箱函数
660
+ def _split_data(self, data, col_nme, wgt_nme, n_bins=10):
661
+ data.sort_values(by=col_nme, ascending=True, inplace=True)
662
+ data['cum_weight'] = data[wgt_nme].cumsum()
663
+ w_sum = data[wgt_nme].sum()
664
+ data.loc[:, 'bins'] = np.floor(
665
+ data['cum_weight']*float(n_bins)/w_sum)
666
+ data.loc[(data['bins'] == n_bins), 'bins'] = n_bins-1
667
+ return data.groupby(['bins'], observed=True).sum(numeric_only=True)
668
+
669
+ # 定义Lift Chart绘制数据集函数
670
+ def _plot_data_lift(self,
671
+ pred_list, w_pred_list,
672
+ w_act_list, weight_list, n_bins=10):
673
+ lift_data = pd.DataFrame()
674
+ lift_data.loc[:, 'pred'] = pred_list
675
+ lift_data.loc[:, 'w_pred'] = w_pred_list
676
+ lift_data.loc[:, 'act'] = w_act_list
677
+ lift_data.loc[:, 'weight'] = weight_list
678
+ plot_data = self._split_data(
679
+ lift_data, 'pred', 'weight', n_bins)
680
+ plot_data['exp_v'] = plot_data['w_pred'] / plot_data['weight']
681
+ plot_data['act_v'] = plot_data['act'] / plot_data['weight']
682
+ plot_data.reset_index(inplace=True)
683
+ return plot_data
684
+
685
+ # 定义lift曲线绘制函数
686
+ def plot_lift(self, model_label, pred_nme, n_bins=10):
687
+ # 绘制建模集上结果
688
+ figpos_list = [121, 122]
689
+ plot_dict = {
690
+ 121: self.train_data,
691
+ 122: self.test_data
692
+ }
693
+ name_list = {
694
+ 121: 'Train Data',
695
+ 122: 'Test Data'
696
+ }
697
+ if model_label == 'Xgboost':
698
+ pred_nme = 'pred_xgb'
699
+ elif model_label == 'ResNet':
700
+ pred_nme = 'pred_resn'
701
+
702
+ fig = plt.figure(figsize=(11, 5))
703
+ for figpos in figpos_list:
704
+ plot_data = self._plot_data_lift(
705
+ plot_dict[figpos][pred_nme].values,
706
+ plot_dict[figpos]['w_'+pred_nme].values,
707
+ plot_dict[figpos]['w_act'].values,
708
+ plot_dict[figpos][self.weight_nme].values,
709
+ n_bins)
710
+ ax = fig.add_subplot(figpos)
711
+ ax.plot(plot_data.index, plot_data['act_v'],
712
+ label='Actual', color='red')
713
+ ax.plot(plot_data.index, plot_data['exp_v'],
714
+ label='Predicted', color='blue')
715
+ ax.set_title(
716
+ 'Lift Chart on %s' % name_list[figpos], fontsize=8)
717
+ plt.xticks(plot_data.index,
718
+ plot_data.index,
719
+ rotation=90, fontsize=6)
720
+ plt.yticks(fontsize=6)
721
+ plt.legend(loc='upper left',
722
+ fontsize=5, frameon=False)
723
+ plt.margins(0.05)
724
+ ax2 = ax.twinx()
725
+ ax2.bar(plot_data.index, plot_data['weight'],
726
+ alpha=0.5, color='seagreen',
727
+ label='Earned Exposure')
728
+ plt.yticks(fontsize=6)
729
+ plt.legend(loc='upper right',
730
+ fontsize=5, frameon=False)
731
+ plt.subplots_adjust(wspace=0.3)
732
+ save_path = os.path.join(
733
+ os.getcwd(), 'plot', f'01_{self.model_nme}_{model_label}_lift.png')
734
+ plt.savefig(save_path, dpi=300)
735
+ plt.show()
736
+ plt.close(fig)
737
+
738
+ # 定义Double Lift Chart绘制数据集函数
739
+ def _plot_data_dlift(self,
740
+ pred_list_model1, pred_list_model2,
741
+ w_list, w_act_list, n_bins=10):
742
+ lift_data = pd.DataFrame()
743
+ lift_data.loc[:, 'pred1'] = pred_list_model1
744
+ lift_data.loc[:, 'pred2'] = pred_list_model2
745
+ lift_data.loc[:, 'diff_ly'] = lift_data['pred1'] / lift_data['pred2']
746
+ lift_data.loc[:, 'act'] = w_act_list
747
+ lift_data.loc[:, 'weight'] = w_list
748
+ plot_data = self._split_data(lift_data, 'diff_ly', 'weight', n_bins)
749
+ plot_data['exp_v1'] = plot_data['pred1'] / plot_data['act']
750
+ plot_data['exp_v2'] = plot_data['pred2'] / plot_data['act']
751
+ plot_data['act_v'] = plot_data['act'] / plot_data['act']
752
+ plot_data.reset_index(inplace=True)
753
+ return plot_data
754
+
755
+ # 定义绘制Double Lift Chart函数
756
+ def plot_dlift(self, n_bins=10):
757
+ # 绘制建模集上结果
758
+ figpos_list = [121, 122]
759
+ plot_dict = {
760
+ 121: self.train_data,
761
+ 122: self.test_data
762
+ }
763
+ name_list = {
764
+ 121: 'Train Data',
765
+ 122: 'Test Data'
766
+ }
767
+ fig = plt.figure(figsize=(11, 5))
768
+ for figpos in figpos_list:
769
+ plot_data = self._plot_data_dlift(
770
+ plot_dict[figpos]['w_pred_xgb'].values,
771
+ plot_dict[figpos]['w_pred_resn'].values,
772
+ plot_dict[figpos][self.weight_nme].values,
773
+ plot_dict[figpos]['w_act'].values,
774
+ n_bins)
775
+ ax = fig.add_subplot(figpos)
776
+ tt1 = 'Xgboost'
777
+ tt2 = 'ResNet'
778
+ ax.plot(plot_data.index, plot_data['act_v'],
779
+ label='Actual', color='red')
780
+ ax.plot(plot_data.index, plot_data['exp_v1'],
781
+ label=tt1, color='blue')
782
+ ax.plot(plot_data.index, plot_data['exp_v2'],
783
+ label=tt2, color='black')
784
+ ax.set_title(
785
+ 'Double Lift Chart on %s' % name_list[figpos], fontsize=8)
786
+ plt.xticks(plot_data.index,
787
+ plot_data.index,
788
+ rotation=90, fontsize=6)
789
+ plt.xlabel('%s / %s' % (tt1, tt2), fontsize=6)
790
+ plt.yticks(fontsize=6)
791
+ plt.legend(loc='upper left',
792
+ fontsize=5, frameon=False)
793
+ plt.margins(0.1)
794
+ plt.subplots_adjust(bottom=0.25, top=0.95, right=0.8)
795
+ ax2 = ax.twinx()
796
+ ax2.bar(plot_data.index, plot_data['weight'],
797
+ alpha=0.5, color='seagreen',
798
+ label='Earned Exposure')
799
+ plt.yticks(fontsize=6)
800
+ plt.legend(loc='upper right',
801
+ fontsize=5, frameon=False)
802
+ plt.subplots_adjust(wspace=0.3)
803
+ save_path = os.path.join(
804
+ os.getcwd(), 'plot', f'02_{self.model_nme}_dlift.png')
805
+ plt.savefig(save_path, dpi=300)
806
+ plt.show()
807
+ plt.close(fig)
808
+
809
+ # 保存模型
810
+ def save_model(self, model_name=None):
811
+ # model_name 可以是 'xgb', 'resn' 或 None
812
+ save_path_xgb = os.path.join(
813
+ os.getcwd(), 'model', f'01_{self.model_nme}_Xgboost.pkl')
814
+ save_path_resn = os.path.join(
815
+ os.getcwd(), 'model', f'01_{self.model_nme}_ResNet.pth')
816
+ if not os.path.exists(os.path.dirname(save_path_xgb)):
817
+ os.makedirs(os.path.dirname(save_path_xgb))
818
+ # self.xgb_best.save_model(save_path_xgb)
819
+ if model_name != 'resn':
820
+ joblib.dump(self.xgb_best, save_path_xgb)
821
+ if model_name != 'xgb':
822
+ torch.save(self.resn_best.resnet.state_dict(), save_path_resn)
823
+
824
+ def load_model(self, model_name=None):
825
+ # model_name 可以是 'xgb', 'resn' 或 None
826
+ save_path_xgb = os.path.join(
827
+ os.getcwd(), 'model', f'01_{self.model_nme}_Xgboost.pkl')
828
+ save_path_resn = os.path.join(
829
+ os.getcwd(), 'model', f'01_{self.model_nme}_ResNet.pth')
830
+ if model_name != 'resn':
831
+ self.xgb_load = joblib.load(save_path_xgb)
832
+ if model_name != 'xgb':
833
+ self.resn_load = ResNetScikitLearn(
834
+ model_nme=self.model_nme,
835
+ input_dim=self.train_oht_scl_data[self.var_nmes].shape[1]
836
+ )
837
+ self.resn_load.resnet.load_state_dict(
838
+ torch.load(save_path_resn, map_location=self.resn_load.device))