ins-pricing 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +48 -22
- ins_pricing/__init__.py +142 -90
- ins_pricing/cli/BayesOpt_entry.py +58 -46
- ins_pricing/cli/BayesOpt_incremental.py +77 -110
- ins_pricing/cli/Explain_Run.py +42 -23
- ins_pricing/cli/Explain_entry.py +551 -577
- ins_pricing/cli/Pricing_Run.py +42 -23
- ins_pricing/cli/bayesopt_entry_runner.py +51 -16
- ins_pricing/cli/utils/bootstrap.py +23 -0
- ins_pricing/cli/utils/cli_common.py +256 -256
- ins_pricing/cli/utils/cli_config.py +379 -360
- ins_pricing/cli/utils/import_resolver.py +375 -358
- ins_pricing/cli/utils/notebook_utils.py +256 -242
- ins_pricing/cli/watchdog_run.py +216 -198
- ins_pricing/frontend/__init__.py +10 -10
- ins_pricing/frontend/app.py +132 -61
- ins_pricing/frontend/config_builder.py +33 -0
- ins_pricing/frontend/example_config.json +11 -0
- ins_pricing/frontend/example_workflows.py +1 -1
- ins_pricing/frontend/runner.py +340 -388
- ins_pricing/governance/__init__.py +20 -20
- ins_pricing/governance/release.py +159 -159
- ins_pricing/modelling/README.md +1 -1
- ins_pricing/modelling/__init__.py +147 -92
- ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +31 -13
- ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
- ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +12 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +589 -552
- ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +987 -958
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +488 -548
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +349 -342
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +921 -913
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +794 -785
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +454 -446
- ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1294 -1282
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +64 -56
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +203 -198
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +333 -325
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +279 -267
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +515 -313
- ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
- ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +193 -186
- ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
- ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
- ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +636 -623
- ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
- ins_pricing/modelling/explain/__init__.py +55 -55
- ins_pricing/modelling/explain/metrics.py +27 -174
- ins_pricing/modelling/explain/permutation.py +237 -237
- ins_pricing/modelling/plotting/__init__.py +40 -36
- ins_pricing/modelling/plotting/compat.py +228 -0
- ins_pricing/modelling/plotting/curves.py +572 -572
- ins_pricing/modelling/plotting/diagnostics.py +163 -163
- ins_pricing/modelling/plotting/geo.py +362 -362
- ins_pricing/modelling/plotting/importance.py +121 -121
- ins_pricing/pricing/__init__.py +27 -27
- ins_pricing/pricing/factors.py +67 -56
- ins_pricing/production/__init__.py +35 -25
- ins_pricing/production/{predict.py → inference.py} +140 -57
- ins_pricing/production/monitoring.py +8 -21
- ins_pricing/reporting/__init__.py +11 -11
- ins_pricing/setup.py +1 -1
- ins_pricing/tests/production/test_inference.py +90 -0
- ins_pricing/utils/__init__.py +112 -78
- ins_pricing/utils/device.py +258 -237
- ins_pricing/utils/features.py +53 -0
- ins_pricing/utils/io.py +72 -0
- ins_pricing/utils/logging.py +34 -1
- ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
- ins_pricing/utils/metrics.py +158 -24
- ins_pricing/utils/numerics.py +76 -0
- ins_pricing/utils/paths.py +9 -1
- ins_pricing/utils/profiling.py +8 -4
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/METADATA +1 -1
- ins_pricing-0.5.1.dist-info/RECORD +132 -0
- ins_pricing/modelling/core/BayesOpt.py +0 -146
- ins_pricing/modelling/core/__init__.py +0 -1
- ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
- ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
- ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
- ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
- ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
- ins_pricing/modelling/core/bayesopt/utils.py +0 -105
- ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
- ins_pricing/tests/production/test_predict.py +0 -233
- ins_pricing-0.4.5.dist-info/RECORD +0 -130
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/WHEEL +0 -0
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -1,65 +1,247 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
3
|
import inspect
|
|
4
|
+
import os
|
|
4
5
|
from typing import Any, Dict, List, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import optuna
|
|
9
|
+
import torch
|
|
10
|
+
import xgboost as xgb
|
|
11
|
+
from sklearn.metrics import log_loss
|
|
12
|
+
|
|
13
|
+
from ins_pricing.modelling.bayesopt.trainers.trainer_base import TrainerBase
|
|
14
|
+
from ins_pricing.utils import EPS, get_logger, log_print
|
|
15
|
+
from ins_pricing.utils.losses import regression_loss
|
|
16
|
+
|
|
17
|
+
_logger = get_logger("ins_pricing.trainer.xgb")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _log(*args, **kwargs) -> None:
|
|
21
|
+
log_print(_logger, *args, **kwargs)
|
|
22
|
+
|
|
23
|
+
_XGB_CUDA_CHECKED = False
|
|
24
|
+
_XGB_HAS_CUDA = False
|
|
5
25
|
|
|
6
|
-
import numpy as np
|
|
7
|
-
import optuna
|
|
8
|
-
import torch
|
|
9
|
-
import xgboost as xgb
|
|
10
|
-
from sklearn.metrics import log_loss
|
|
11
26
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
27
|
+
def _is_oom_error(exc: Exception) -> bool:
|
|
28
|
+
msg = str(exc).lower()
|
|
29
|
+
return "out of memory" in msg or ("cuda" in msg and "memory" in msg)
|
|
15
30
|
|
|
16
|
-
_XGB_CUDA_CHECKED = False
|
|
17
|
-
_XGB_HAS_CUDA = False
|
|
18
31
|
|
|
19
|
-
|
|
20
|
-
|
|
32
|
+
class _XGBDMatrixWrapper:
|
|
33
|
+
"""Sklearn-like wrapper that uses xgb.train + (Quantile)DMatrix internally."""
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
params: Dict[str, Any],
|
|
38
|
+
*,
|
|
39
|
+
task_type: str,
|
|
40
|
+
use_gpu: bool,
|
|
41
|
+
allow_cpu_fallback: bool = True,
|
|
42
|
+
) -> None:
|
|
43
|
+
self.params = dict(params)
|
|
44
|
+
self.task_type = task_type
|
|
45
|
+
self.use_gpu = bool(use_gpu)
|
|
46
|
+
self.allow_cpu_fallback = allow_cpu_fallback
|
|
47
|
+
self._booster: Optional[xgb.Booster] = None
|
|
48
|
+
self.best_iteration: Optional[int] = None
|
|
49
|
+
|
|
50
|
+
def set_params(self, **params: Any) -> "_XGBDMatrixWrapper":
|
|
51
|
+
self.params.update(params)
|
|
52
|
+
return self
|
|
21
53
|
|
|
54
|
+
def get_params(self, deep: bool = True) -> Dict[str, Any]:
|
|
55
|
+
_ = deep
|
|
56
|
+
return dict(self.params)
|
|
22
57
|
|
|
23
|
-
def
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
if not torch.cuda.is_available():
|
|
30
|
-
_XGB_HAS_CUDA = False
|
|
31
|
-
return False
|
|
32
|
-
try:
|
|
33
|
-
build_info = getattr(xgb, "build_info", None)
|
|
34
|
-
if callable(build_info):
|
|
35
|
-
info = build_info()
|
|
36
|
-
for key in ("USE_CUDA", "use_cuda", "cuda"):
|
|
37
|
-
if key in info:
|
|
38
|
-
val = info[key]
|
|
39
|
-
if isinstance(val, str):
|
|
40
|
-
_XGB_HAS_CUDA = val.strip().upper() in (
|
|
41
|
-
"ON", "YES", "TRUE", "1")
|
|
42
|
-
else:
|
|
43
|
-
_XGB_HAS_CUDA = bool(val)
|
|
44
|
-
return _XGB_HAS_CUDA
|
|
45
|
-
except Exception:
|
|
46
|
-
pass
|
|
47
|
-
try:
|
|
48
|
-
has_cuda = getattr(getattr(xgb, "core", None), "_has_cuda_support", None)
|
|
49
|
-
if callable(has_cuda):
|
|
50
|
-
_XGB_HAS_CUDA = bool(has_cuda())
|
|
51
|
-
return _XGB_HAS_CUDA
|
|
52
|
-
except Exception:
|
|
53
|
-
pass
|
|
54
|
-
_XGB_HAS_CUDA = False
|
|
55
|
-
return False
|
|
58
|
+
def _select_dmatrix_class(self) -> Any:
|
|
59
|
+
if self.use_gpu and hasattr(xgb, "DeviceQuantileDMatrix"):
|
|
60
|
+
return xgb.DeviceQuantileDMatrix
|
|
61
|
+
if hasattr(xgb, "QuantileDMatrix"):
|
|
62
|
+
return xgb.QuantileDMatrix
|
|
63
|
+
return xgb.DMatrix
|
|
56
64
|
|
|
65
|
+
def _build_dmatrix(self, X, y=None, weight=None) -> xgb.DMatrix:
|
|
66
|
+
if isinstance(X, (str, os.PathLike)):
|
|
67
|
+
raise ValueError(
|
|
68
|
+
"External-memory DMatrix is disabled; pass in-memory data instead."
|
|
69
|
+
)
|
|
70
|
+
if isinstance(X, xgb.DMatrix):
|
|
71
|
+
raise ValueError(
|
|
72
|
+
"DMatrix inputs are disabled; pass raw in-memory data instead."
|
|
73
|
+
)
|
|
74
|
+
dmatrix_cls = self._select_dmatrix_class()
|
|
75
|
+
kwargs: Dict[str, Any] = {}
|
|
76
|
+
if y is not None:
|
|
77
|
+
kwargs["label"] = y
|
|
78
|
+
if weight is not None:
|
|
79
|
+
kwargs["weight"] = weight
|
|
80
|
+
if bool(self.params.get("enable_categorical", False)):
|
|
81
|
+
kwargs["enable_categorical"] = True
|
|
82
|
+
try:
|
|
83
|
+
return dmatrix_cls(X, **kwargs)
|
|
84
|
+
except TypeError:
|
|
85
|
+
kwargs.pop("enable_categorical", None)
|
|
86
|
+
return dmatrix_cls(X, **kwargs)
|
|
87
|
+
except Exception:
|
|
88
|
+
if dmatrix_cls is not xgb.DMatrix:
|
|
89
|
+
return xgb.DMatrix(X, **kwargs)
|
|
90
|
+
raise
|
|
91
|
+
|
|
92
|
+
def _resolve_train_params(self) -> Dict[str, Any]:
|
|
93
|
+
params = dict(self.params)
|
|
94
|
+
if not self.use_gpu:
|
|
95
|
+
params["tree_method"] = "hist"
|
|
96
|
+
params["predictor"] = "cpu_predictor"
|
|
97
|
+
params.pop("gpu_id", None)
|
|
98
|
+
return params
|
|
99
|
+
|
|
100
|
+
def _train_booster(
|
|
101
|
+
self,
|
|
102
|
+
X,
|
|
103
|
+
y,
|
|
104
|
+
*,
|
|
105
|
+
sample_weight=None,
|
|
106
|
+
eval_set=None,
|
|
107
|
+
sample_weight_eval_set=None,
|
|
108
|
+
early_stopping_rounds: Optional[int] = None,
|
|
109
|
+
verbose: bool = False,
|
|
110
|
+
) -> None:
|
|
111
|
+
params = self._resolve_train_params()
|
|
112
|
+
num_boost_round = int(params.pop("n_estimators", 100))
|
|
113
|
+
dtrain = self._build_dmatrix(X, y, sample_weight)
|
|
114
|
+
evals = []
|
|
115
|
+
if eval_set:
|
|
116
|
+
weights = sample_weight_eval_set or []
|
|
117
|
+
for idx, (X_val, y_val) in enumerate(eval_set):
|
|
118
|
+
w_val = weights[idx] if idx < len(weights) else None
|
|
119
|
+
dval = self._build_dmatrix(X_val, y_val, w_val)
|
|
120
|
+
evals.append((dval, f"val{idx}"))
|
|
121
|
+
self._booster = xgb.train(
|
|
122
|
+
params,
|
|
123
|
+
dtrain,
|
|
124
|
+
num_boost_round=num_boost_round,
|
|
125
|
+
evals=evals,
|
|
126
|
+
early_stopping_rounds=early_stopping_rounds,
|
|
127
|
+
verbose_eval=verbose,
|
|
128
|
+
)
|
|
129
|
+
self.best_iteration = getattr(self._booster, "best_iteration", None)
|
|
130
|
+
|
|
131
|
+
def fit(self, X, y, **fit_kwargs) -> "_XGBDMatrixWrapper":
|
|
132
|
+
sample_weight = fit_kwargs.pop("sample_weight", None)
|
|
133
|
+
eval_set = fit_kwargs.pop("eval_set", None)
|
|
134
|
+
sample_weight_eval_set = fit_kwargs.pop("sample_weight_eval_set", None)
|
|
135
|
+
early_stopping_rounds = fit_kwargs.pop("early_stopping_rounds", None)
|
|
136
|
+
verbose = bool(fit_kwargs.pop("verbose", False))
|
|
137
|
+
fit_kwargs.pop("eval_metric", None)
|
|
138
|
+
try:
|
|
139
|
+
self._train_booster(
|
|
140
|
+
X,
|
|
141
|
+
y,
|
|
142
|
+
sample_weight=sample_weight,
|
|
143
|
+
eval_set=eval_set,
|
|
144
|
+
sample_weight_eval_set=sample_weight_eval_set,
|
|
145
|
+
early_stopping_rounds=early_stopping_rounds,
|
|
146
|
+
verbose=verbose,
|
|
147
|
+
)
|
|
148
|
+
except Exception as exc:
|
|
149
|
+
if self.use_gpu and self.allow_cpu_fallback and _is_oom_error(exc):
|
|
150
|
+
_log("[XGBoost] GPU OOM detected; retrying with CPU.", flush=True)
|
|
151
|
+
self.use_gpu = False
|
|
152
|
+
self._train_booster(
|
|
153
|
+
X,
|
|
154
|
+
y,
|
|
155
|
+
sample_weight=sample_weight,
|
|
156
|
+
eval_set=eval_set,
|
|
157
|
+
sample_weight_eval_set=sample_weight_eval_set,
|
|
158
|
+
early_stopping_rounds=early_stopping_rounds,
|
|
159
|
+
verbose=verbose,
|
|
160
|
+
)
|
|
161
|
+
else:
|
|
162
|
+
raise
|
|
163
|
+
return self
|
|
164
|
+
|
|
165
|
+
def _resolve_iteration_range(self) -> Optional[Tuple[int, int]]:
|
|
166
|
+
if self.best_iteration is None:
|
|
167
|
+
return None
|
|
168
|
+
return (0, int(self.best_iteration) + 1)
|
|
169
|
+
|
|
170
|
+
def _predict_raw(self, X) -> np.ndarray:
|
|
171
|
+
if self._booster is None:
|
|
172
|
+
raise RuntimeError("Booster not trained.")
|
|
173
|
+
dtest = self._build_dmatrix(X)
|
|
174
|
+
iteration_range = self._resolve_iteration_range()
|
|
175
|
+
if iteration_range is None:
|
|
176
|
+
return self._booster.predict(dtest)
|
|
177
|
+
try:
|
|
178
|
+
return self._booster.predict(dtest, iteration_range=iteration_range)
|
|
179
|
+
except TypeError:
|
|
180
|
+
return self._booster.predict(dtest, ntree_limit=iteration_range[1])
|
|
181
|
+
|
|
182
|
+
def predict(self, X, **_kwargs) -> np.ndarray:
|
|
183
|
+
pred = self._predict_raw(X)
|
|
184
|
+
if self.task_type == "classification":
|
|
185
|
+
if pred.ndim == 1:
|
|
186
|
+
return (pred > 0.5).astype(int)
|
|
187
|
+
return np.argmax(pred, axis=1)
|
|
188
|
+
return pred
|
|
189
|
+
|
|
190
|
+
def predict_proba(self, X, **_kwargs) -> np.ndarray:
|
|
191
|
+
pred = self._predict_raw(X)
|
|
192
|
+
if pred.ndim == 1:
|
|
193
|
+
return np.column_stack([1 - pred, pred])
|
|
194
|
+
return pred
|
|
195
|
+
|
|
196
|
+
def get_booster(self) -> Optional[xgb.Booster]:
|
|
197
|
+
return self._booster
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _xgb_cuda_available() -> bool:
|
|
201
|
+
# Best-effort check for XGBoost CUDA build; cached to avoid repeated checks.
|
|
202
|
+
global _XGB_CUDA_CHECKED, _XGB_HAS_CUDA
|
|
203
|
+
if _XGB_CUDA_CHECKED:
|
|
204
|
+
return _XGB_HAS_CUDA
|
|
205
|
+
_XGB_CUDA_CHECKED = True
|
|
206
|
+
if not torch.cuda.is_available():
|
|
207
|
+
_XGB_HAS_CUDA = False
|
|
208
|
+
return False
|
|
209
|
+
try:
|
|
210
|
+
build_info = getattr(xgb, "build_info", None)
|
|
211
|
+
if callable(build_info):
|
|
212
|
+
info = build_info()
|
|
213
|
+
for key in ("USE_CUDA", "use_cuda", "cuda"):
|
|
214
|
+
if key in info:
|
|
215
|
+
val = info[key]
|
|
216
|
+
if isinstance(val, str):
|
|
217
|
+
_XGB_HAS_CUDA = val.strip().upper() in (
|
|
218
|
+
"ON", "YES", "TRUE", "1")
|
|
219
|
+
else:
|
|
220
|
+
_XGB_HAS_CUDA = bool(val)
|
|
221
|
+
return _XGB_HAS_CUDA
|
|
222
|
+
except Exception:
|
|
223
|
+
pass
|
|
224
|
+
try:
|
|
225
|
+
has_cuda = getattr(getattr(xgb, "core", None), "_has_cuda_support", None)
|
|
226
|
+
if callable(has_cuda):
|
|
227
|
+
_XGB_HAS_CUDA = bool(has_cuda())
|
|
228
|
+
return _XGB_HAS_CUDA
|
|
229
|
+
except Exception:
|
|
230
|
+
pass
|
|
231
|
+
_XGB_HAS_CUDA = False
|
|
232
|
+
return False
|
|
233
|
+
|
|
57
234
|
class XGBTrainer(TrainerBase):
|
|
58
|
-
def __init__(self, context: "BayesOptModel") -> None:
|
|
59
|
-
super().__init__(context, 'Xgboost', 'Xgboost')
|
|
60
|
-
self.model: Optional[xgb.XGBModel] = None
|
|
61
|
-
self._xgb_use_gpu = False
|
|
62
|
-
self._xgb_gpu_warned = False
|
|
235
|
+
def __init__(self, context: "BayesOptModel") -> None:
|
|
236
|
+
super().__init__(context, 'Xgboost', 'Xgboost')
|
|
237
|
+
self.model: Optional[xgb.XGBModel] = None
|
|
238
|
+
self._xgb_use_gpu = False
|
|
239
|
+
self._xgb_gpu_warned = False
|
|
240
|
+
|
|
241
|
+
def _build_sklearn_estimator(self, params: Dict[str, Any]) -> xgb.XGBModel:
|
|
242
|
+
if self.ctx.task_type == 'classification':
|
|
243
|
+
return xgb.XGBClassifier(**params)
|
|
244
|
+
return xgb.XGBRegressor(**params)
|
|
63
245
|
|
|
64
246
|
def _build_estimator(self) -> xgb.XGBModel:
|
|
65
247
|
use_gpu = bool(self.ctx.use_gpu and _xgb_cuda_available())
|
|
@@ -72,276 +254,296 @@ class XGBTrainer(TrainerBase):
|
|
|
72
254
|
enable_categorical=True,
|
|
73
255
|
predictor='gpu_predictor' if use_gpu else 'cpu_predictor'
|
|
74
256
|
)
|
|
75
|
-
if self.ctx.use_gpu and not use_gpu and not self._xgb_gpu_warned:
|
|
76
|
-
|
|
77
|
-
"[XGBoost] CUDA requested but not available; falling back to CPU.",
|
|
78
|
-
flush=True,
|
|
79
|
-
)
|
|
80
|
-
self._xgb_gpu_warned = True
|
|
257
|
+
if self.ctx.use_gpu and not use_gpu and not self._xgb_gpu_warned:
|
|
258
|
+
_log(
|
|
259
|
+
"[XGBoost] CUDA requested but not available; falling back to CPU.",
|
|
260
|
+
flush=True,
|
|
261
|
+
)
|
|
262
|
+
self._xgb_gpu_warned = True
|
|
81
263
|
if use_gpu:
|
|
82
|
-
|
|
83
|
-
|
|
264
|
+
gpu_id = self._resolve_gpu_id()
|
|
265
|
+
params['gpu_id'] = gpu_id
|
|
266
|
+
_log(f">>> XGBoost using GPU ID: {gpu_id}")
|
|
84
267
|
eval_metric = self._resolve_eval_metric()
|
|
85
268
|
if eval_metric is not None:
|
|
86
269
|
params.setdefault("eval_metric", eval_metric)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
return "logloss" if self.ctx.task_type == 'classification' else "rmse"
|
|
96
|
-
return eval_metric
|
|
270
|
+
use_dmatrix = bool(getattr(self.config, "xgb_use_dmatrix", True))
|
|
271
|
+
if use_dmatrix:
|
|
272
|
+
return _XGBDMatrixWrapper(
|
|
273
|
+
params,
|
|
274
|
+
task_type=self.ctx.task_type,
|
|
275
|
+
use_gpu=use_gpu,
|
|
276
|
+
)
|
|
277
|
+
return self._build_sklearn_estimator(params)
|
|
97
278
|
|
|
98
|
-
def
|
|
279
|
+
def _resolve_gpu_id(self) -> int:
|
|
280
|
+
gpu_id = getattr(self.config, "xgb_gpu_id", None)
|
|
281
|
+
if gpu_id is None:
|
|
282
|
+
return 0
|
|
99
283
|
try:
|
|
100
|
-
|
|
101
|
-
return name in inspect.signature(fit).parameters
|
|
284
|
+
return int(gpu_id)
|
|
102
285
|
except (TypeError, ValueError):
|
|
103
|
-
return
|
|
104
|
-
|
|
105
|
-
def _resolve_early_stopping_rounds(self, n_estimators: int) -> int:
|
|
106
|
-
n_estimators = max(1, int(n_estimators))
|
|
107
|
-
base = max(5, n_estimators // 10)
|
|
108
|
-
return min(50, base)
|
|
109
|
-
|
|
110
|
-
def _build_fit_kwargs(self,
|
|
111
|
-
w_train,
|
|
112
|
-
X_val=None,
|
|
113
|
-
y_val=None,
|
|
114
|
-
w_val=None,
|
|
115
|
-
n_estimators: Optional[int] = None) -> Dict[str, Any]:
|
|
116
|
-
supports_early = self._fit_supports_param("early_stopping_rounds")
|
|
117
|
-
fit_kwargs = dict(self.ctx.fit_params or {})
|
|
118
|
-
fit_kwargs.pop("sample_weight", None)
|
|
119
|
-
fit_kwargs.pop("eval_metric", None)
|
|
120
|
-
fit_kwargs["sample_weight"] = w_train
|
|
121
|
-
|
|
122
|
-
if "eval_set" not in fit_kwargs and X_val is not None and y_val is not None:
|
|
123
|
-
fit_kwargs["eval_set"] = [(X_val, y_val)]
|
|
124
|
-
if w_val is not None:
|
|
125
|
-
fit_kwargs["sample_weight_eval_set"] = [w_val]
|
|
126
|
-
|
|
127
|
-
if (
|
|
128
|
-
supports_early
|
|
129
|
-
and "early_stopping_rounds" not in fit_kwargs
|
|
130
|
-
and "eval_set" in fit_kwargs
|
|
131
|
-
):
|
|
132
|
-
rounds = self._resolve_early_stopping_rounds(n_estimators or 100)
|
|
133
|
-
fit_kwargs["early_stopping_rounds"] = rounds
|
|
134
|
-
if not supports_early:
|
|
135
|
-
fit_kwargs.pop("early_stopping_rounds", None)
|
|
136
|
-
|
|
137
|
-
fit_kwargs.setdefault("verbose", False)
|
|
138
|
-
return fit_kwargs
|
|
286
|
+
return 0
|
|
139
287
|
|
|
140
|
-
def
|
|
141
|
-
if not self.
|
|
142
|
-
raise RuntimeError("Run tune() first to obtain best XGB parameters.")
|
|
143
|
-
k = max(2, int(k))
|
|
144
|
-
X_all = self.ctx.train_data[self.ctx.factor_nmes]
|
|
145
|
-
y_all = self.ctx.train_data[self.ctx.resp_nme].values
|
|
146
|
-
w_all = self.ctx.train_data[self.ctx.weight_nme].values
|
|
147
|
-
X_test = self.ctx.test_data[self.ctx.factor_nmes]
|
|
148
|
-
n_samples = len(X_all)
|
|
149
|
-
split_iter, _ = self._resolve_ensemble_splits(X_all, k=k)
|
|
150
|
-
if split_iter is None:
|
|
151
|
-
print(
|
|
152
|
-
f"[XGB Ensemble] unable to build CV split (n_samples={n_samples}); skip ensemble.",
|
|
153
|
-
flush=True,
|
|
154
|
-
)
|
|
288
|
+
def _maybe_cleanup_gpu(self) -> None:
|
|
289
|
+
if not bool(getattr(self.config, "xgb_cleanup_per_fold", False)):
|
|
155
290
|
return
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
291
|
+
synchronize = bool(getattr(self.config, "xgb_cleanup_synchronize", False))
|
|
292
|
+
self._clean_gpu(synchronize=synchronize)
|
|
293
|
+
|
|
294
|
+
def _resolve_eval_metric(self) -> Optional[Any]:
|
|
295
|
+
fit_params = self.ctx.fit_params or {}
|
|
296
|
+
eval_metric = fit_params.get("eval_metric")
|
|
297
|
+
if eval_metric is None:
|
|
298
|
+
return "logloss" if self.ctx.task_type == 'classification' else "rmse"
|
|
299
|
+
return eval_metric
|
|
300
|
+
|
|
301
|
+
def _fit_supports_param(self, name: str) -> bool:
|
|
302
|
+
try:
|
|
303
|
+
fit = xgb.XGBClassifier.fit if self.ctx.task_type == 'classification' else xgb.XGBRegressor.fit
|
|
304
|
+
return name in inspect.signature(fit).parameters
|
|
305
|
+
except (TypeError, ValueError):
|
|
306
|
+
return True
|
|
307
|
+
|
|
308
|
+
def _resolve_early_stopping_rounds(self, n_estimators: int) -> int:
|
|
309
|
+
n_estimators = max(1, int(n_estimators))
|
|
310
|
+
base = max(5, n_estimators // 10)
|
|
311
|
+
return min(50, base)
|
|
312
|
+
|
|
313
|
+
def _build_fit_kwargs(self,
|
|
314
|
+
w_train,
|
|
315
|
+
X_val=None,
|
|
316
|
+
y_val=None,
|
|
317
|
+
w_val=None,
|
|
318
|
+
n_estimators: Optional[int] = None) -> Dict[str, Any]:
|
|
319
|
+
supports_early = self._fit_supports_param("early_stopping_rounds")
|
|
320
|
+
fit_kwargs = dict(self.ctx.fit_params or {})
|
|
321
|
+
fit_kwargs.pop("sample_weight", None)
|
|
322
|
+
fit_kwargs.pop("eval_metric", None)
|
|
323
|
+
fit_kwargs["sample_weight"] = w_train
|
|
324
|
+
|
|
325
|
+
if "eval_set" not in fit_kwargs and X_val is not None and y_val is not None:
|
|
326
|
+
fit_kwargs["eval_set"] = [(X_val, y_val)]
|
|
327
|
+
if w_val is not None:
|
|
328
|
+
fit_kwargs["sample_weight_eval_set"] = [w_val]
|
|
329
|
+
|
|
330
|
+
if (
|
|
331
|
+
supports_early
|
|
332
|
+
and "early_stopping_rounds" not in fit_kwargs
|
|
333
|
+
and "eval_set" in fit_kwargs
|
|
334
|
+
):
|
|
335
|
+
rounds = self._resolve_early_stopping_rounds(n_estimators or 100)
|
|
336
|
+
fit_kwargs["early_stopping_rounds"] = rounds
|
|
337
|
+
if not supports_early:
|
|
338
|
+
fit_kwargs.pop("early_stopping_rounds", None)
|
|
339
|
+
|
|
340
|
+
fit_kwargs.setdefault("verbose", False)
|
|
341
|
+
return fit_kwargs
|
|
342
|
+
|
|
343
|
+
def ensemble_predict(self, k: int) -> None:
|
|
344
|
+
if not self.best_params:
|
|
345
|
+
raise RuntimeError("Run tune() first to obtain best XGB parameters.")
|
|
346
|
+
k = max(2, int(k))
|
|
347
|
+
X_all = self.ctx.train_data[self.ctx.factor_nmes]
|
|
348
|
+
y_all = self.ctx.train_data[self.ctx.resp_nme].values
|
|
349
|
+
w_all = self.ctx.train_data[self.ctx.weight_nme].values
|
|
350
|
+
X_test = self.ctx.test_data[self.ctx.factor_nmes]
|
|
351
|
+
n_samples = len(X_all)
|
|
352
|
+
split_iter, _ = self._resolve_ensemble_splits(X_all, k=k)
|
|
353
|
+
if split_iter is None:
|
|
354
|
+
_log(
|
|
355
|
+
f"[XGB Ensemble] unable to build CV split (n_samples={n_samples}); skip ensemble.",
|
|
356
|
+
flush=True,
|
|
357
|
+
)
|
|
358
|
+
return
|
|
359
|
+
preds_train_sum = np.zeros(n_samples, dtype=np.float64)
|
|
360
|
+
preds_test_sum = np.zeros(len(X_test), dtype=np.float64)
|
|
361
|
+
|
|
362
|
+
split_count = 0
|
|
363
|
+
for train_idx, val_idx in split_iter:
|
|
364
|
+
X_train = X_all.iloc[train_idx]
|
|
365
|
+
y_train = y_all[train_idx]
|
|
366
|
+
w_train = w_all[train_idx]
|
|
367
|
+
X_val = X_all.iloc[val_idx]
|
|
368
|
+
y_val = y_all[val_idx]
|
|
369
|
+
w_val = w_all[val_idx]
|
|
370
|
+
|
|
371
|
+
clf = self._build_estimator()
|
|
372
|
+
clf.set_params(**self.best_params)
|
|
373
|
+
fit_kwargs = self._build_fit_kwargs(
|
|
374
|
+
w_train=w_train,
|
|
375
|
+
X_val=X_val,
|
|
376
|
+
y_val=y_val,
|
|
377
|
+
w_val=w_val,
|
|
378
|
+
n_estimators=self.best_params.get("n_estimators", 100),
|
|
379
|
+
)
|
|
380
|
+
clf.fit(X_train, y_train, **fit_kwargs)
|
|
381
|
+
|
|
382
|
+
if self.ctx.task_type == 'classification':
|
|
383
|
+
pred_train = clf.predict_proba(X_all)[:, 1]
|
|
384
|
+
pred_test = clf.predict_proba(X_test)[:, 1]
|
|
385
|
+
else:
|
|
386
|
+
pred_train = clf.predict(X_all)
|
|
387
|
+
pred_test = clf.predict(X_test)
|
|
388
|
+
preds_train_sum += np.asarray(pred_train, dtype=np.float64)
|
|
389
|
+
preds_test_sum += np.asarray(pred_test, dtype=np.float64)
|
|
390
|
+
self._maybe_cleanup_gpu()
|
|
188
391
|
split_count += 1
|
|
189
|
-
|
|
190
|
-
if split_count < 1:
|
|
191
|
-
|
|
192
|
-
f"[XGB Ensemble] no CV splits generated; skip ensemble.",
|
|
193
|
-
flush=True,
|
|
194
|
-
)
|
|
195
|
-
return
|
|
196
|
-
preds_train = preds_train_sum / float(split_count)
|
|
197
|
-
preds_test = preds_test_sum / float(split_count)
|
|
198
|
-
self._cache_predictions("xgb", preds_train, preds_test)
|
|
199
|
-
|
|
200
|
-
def cross_val(self, trial: optuna.trial.Trial) -> float:
|
|
201
|
-
learning_rate = trial.suggest_float(
|
|
202
|
-
'learning_rate', 1e-5, 1e-1, log=True)
|
|
203
|
-
gamma = trial.suggest_float('gamma', 0, 10000)
|
|
204
|
-
max_depth_max = max(
|
|
205
|
-
3, int(getattr(self.config, "xgb_max_depth_max", 25)))
|
|
206
|
-
n_estimators_max = max(
|
|
207
|
-
10, int(getattr(self.config, "xgb_n_estimators_max", 500)))
|
|
208
|
-
max_depth = trial.suggest_int('max_depth', 3, max_depth_max)
|
|
209
|
-
n_estimators = trial.suggest_int(
|
|
210
|
-
'n_estimators', 10, n_estimators_max, step=10)
|
|
211
|
-
min_child_weight = trial.suggest_int(
|
|
212
|
-
'min_child_weight', 100, 10000, step=100)
|
|
213
|
-
reg_alpha = trial.suggest_float('reg_alpha', 1e-10, 1, log=True)
|
|
214
|
-
reg_lambda = trial.suggest_float('reg_lambda', 1e-10, 1, log=True)
|
|
215
|
-
if trial is not None:
|
|
216
|
-
|
|
217
|
-
f"[Optuna][Xgboost] trial_id={trial.number} max_depth={max_depth} "
|
|
218
|
-
f"n_estimators={n_estimators}",
|
|
219
|
-
flush=True,
|
|
220
|
-
)
|
|
221
|
-
if max_depth >= 20 and n_estimators >= 300:
|
|
222
|
-
raise optuna.TrialPruned(
|
|
223
|
-
"XGB config is likely too slow (max_depth>=20 & n_estimators>=300)")
|
|
224
|
-
clf = self._build_estimator()
|
|
225
|
-
params = {
|
|
226
|
-
'learning_rate': learning_rate,
|
|
227
|
-
'gamma': gamma,
|
|
228
|
-
'max_depth': max_depth,
|
|
229
|
-
'n_estimators': n_estimators,
|
|
230
|
-
'min_child_weight': min_child_weight,
|
|
231
|
-
'reg_alpha': reg_alpha,
|
|
232
|
-
'reg_lambda': reg_lambda
|
|
233
|
-
}
|
|
234
|
-
loss_name = getattr(self.ctx, "loss_name", "tweedie")
|
|
235
|
-
tweedie_variance_power = None
|
|
236
|
-
if self.ctx.task_type != 'classification':
|
|
237
|
-
if loss_name == "tweedie":
|
|
238
|
-
tweedie_variance_power = trial.suggest_float(
|
|
239
|
-
'tweedie_variance_power', 1, 2)
|
|
240
|
-
params['tweedie_variance_power'] = tweedie_variance_power
|
|
241
|
-
elif loss_name == "poisson":
|
|
242
|
-
tweedie_variance_power = 1.0
|
|
243
|
-
elif loss_name == "gamma":
|
|
244
|
-
tweedie_variance_power = 2.0
|
|
245
|
-
X_all = self.ctx.train_data[self.ctx.factor_nmes]
|
|
246
|
-
y_all = self.ctx.train_data[self.ctx.resp_nme].values
|
|
247
|
-
w_all = self.ctx.train_data[self.ctx.weight_nme].values
|
|
248
|
-
|
|
249
|
-
losses: List[float] = []
|
|
250
|
-
for train_idx, val_idx in self.ctx.cv.split(X_all):
|
|
251
|
-
X_train = X_all.iloc[train_idx]
|
|
252
|
-
y_train = y_all[train_idx]
|
|
253
|
-
w_train = w_all[train_idx]
|
|
254
|
-
X_val = X_all.iloc[val_idx]
|
|
255
|
-
y_val = y_all[val_idx]
|
|
256
|
-
w_val = w_all[val_idx]
|
|
257
|
-
|
|
258
|
-
clf = self._build_estimator()
|
|
259
|
-
clf.set_params(**params)
|
|
260
|
-
fit_kwargs = self._build_fit_kwargs(
|
|
261
|
-
w_train=w_train,
|
|
262
|
-
X_val=X_val,
|
|
263
|
-
y_val=y_val,
|
|
264
|
-
w_val=w_val,
|
|
265
|
-
n_estimators=n_estimators,
|
|
266
|
-
)
|
|
267
|
-
clf.fit(X_train, y_train, **fit_kwargs)
|
|
268
|
-
|
|
269
|
-
if self.ctx.task_type == 'classification':
|
|
270
|
-
y_pred = clf.predict_proba(X_val)[:, 1]
|
|
271
|
-
y_pred = np.clip(y_pred, EPS, 1 - EPS)
|
|
272
|
-
loss = log_loss(y_val, y_pred, sample_weight=w_val)
|
|
273
|
-
else:
|
|
274
|
-
y_pred = clf.predict(X_val)
|
|
275
|
-
loss = regression_loss(
|
|
276
|
-
y_val,
|
|
277
|
-
y_pred,
|
|
278
|
-
w_val,
|
|
279
|
-
loss_name=loss_name,
|
|
280
|
-
tweedie_power=tweedie_variance_power,
|
|
281
|
-
)
|
|
282
|
-
losses.append(float(loss))
|
|
283
|
-
self.
|
|
392
|
+
|
|
393
|
+
if split_count < 1:
|
|
394
|
+
_log(
|
|
395
|
+
f"[XGB Ensemble] no CV splits generated; skip ensemble.",
|
|
396
|
+
flush=True,
|
|
397
|
+
)
|
|
398
|
+
return
|
|
399
|
+
preds_train = preds_train_sum / float(split_count)
|
|
400
|
+
preds_test = preds_test_sum / float(split_count)
|
|
401
|
+
self._cache_predictions("xgb", preds_train, preds_test)
|
|
402
|
+
|
|
403
|
+
def cross_val(self, trial: optuna.trial.Trial) -> float:
|
|
404
|
+
learning_rate = trial.suggest_float(
|
|
405
|
+
'learning_rate', 1e-5, 1e-1, log=True)
|
|
406
|
+
gamma = trial.suggest_float('gamma', 0, 10000)
|
|
407
|
+
max_depth_max = max(
|
|
408
|
+
3, int(getattr(self.config, "xgb_max_depth_max", 25)))
|
|
409
|
+
n_estimators_max = max(
|
|
410
|
+
10, int(getattr(self.config, "xgb_n_estimators_max", 500)))
|
|
411
|
+
max_depth = trial.suggest_int('max_depth', 3, max_depth_max)
|
|
412
|
+
n_estimators = trial.suggest_int(
|
|
413
|
+
'n_estimators', 10, n_estimators_max, step=10)
|
|
414
|
+
min_child_weight = trial.suggest_int(
|
|
415
|
+
'min_child_weight', 100, 10000, step=100)
|
|
416
|
+
reg_alpha = trial.suggest_float('reg_alpha', 1e-10, 1, log=True)
|
|
417
|
+
reg_lambda = trial.suggest_float('reg_lambda', 1e-10, 1, log=True)
|
|
418
|
+
if trial is not None:
|
|
419
|
+
_log(
|
|
420
|
+
f"[Optuna][Xgboost] trial_id={trial.number} max_depth={max_depth} "
|
|
421
|
+
f"n_estimators={n_estimators}",
|
|
422
|
+
flush=True,
|
|
423
|
+
)
|
|
424
|
+
if max_depth >= 20 and n_estimators >= 300:
|
|
425
|
+
raise optuna.TrialPruned(
|
|
426
|
+
"XGB config is likely too slow (max_depth>=20 & n_estimators>=300)")
|
|
427
|
+
clf = self._build_estimator()
|
|
428
|
+
params = {
|
|
429
|
+
'learning_rate': learning_rate,
|
|
430
|
+
'gamma': gamma,
|
|
431
|
+
'max_depth': max_depth,
|
|
432
|
+
'n_estimators': n_estimators,
|
|
433
|
+
'min_child_weight': min_child_weight,
|
|
434
|
+
'reg_alpha': reg_alpha,
|
|
435
|
+
'reg_lambda': reg_lambda
|
|
436
|
+
}
|
|
437
|
+
loss_name = getattr(self.ctx, "loss_name", "tweedie")
|
|
438
|
+
tweedie_variance_power = None
|
|
439
|
+
if self.ctx.task_type != 'classification':
|
|
440
|
+
if loss_name == "tweedie":
|
|
441
|
+
tweedie_variance_power = trial.suggest_float(
|
|
442
|
+
'tweedie_variance_power', 1, 2)
|
|
443
|
+
params['tweedie_variance_power'] = tweedie_variance_power
|
|
444
|
+
elif loss_name == "poisson":
|
|
445
|
+
tweedie_variance_power = 1.0
|
|
446
|
+
elif loss_name == "gamma":
|
|
447
|
+
tweedie_variance_power = 2.0
|
|
448
|
+
X_all = self.ctx.train_data[self.ctx.factor_nmes]
|
|
449
|
+
y_all = self.ctx.train_data[self.ctx.resp_nme].values
|
|
450
|
+
w_all = self.ctx.train_data[self.ctx.weight_nme].values
|
|
451
|
+
|
|
452
|
+
losses: List[float] = []
|
|
453
|
+
for train_idx, val_idx in self.ctx.cv.split(X_all):
|
|
454
|
+
X_train = X_all.iloc[train_idx]
|
|
455
|
+
y_train = y_all[train_idx]
|
|
456
|
+
w_train = w_all[train_idx]
|
|
457
|
+
X_val = X_all.iloc[val_idx]
|
|
458
|
+
y_val = y_all[val_idx]
|
|
459
|
+
w_val = w_all[val_idx]
|
|
460
|
+
|
|
461
|
+
clf = self._build_estimator()
|
|
462
|
+
clf.set_params(**params)
|
|
463
|
+
fit_kwargs = self._build_fit_kwargs(
|
|
464
|
+
w_train=w_train,
|
|
465
|
+
X_val=X_val,
|
|
466
|
+
y_val=y_val,
|
|
467
|
+
w_val=w_val,
|
|
468
|
+
n_estimators=n_estimators,
|
|
469
|
+
)
|
|
470
|
+
clf.fit(X_train, y_train, **fit_kwargs)
|
|
471
|
+
|
|
472
|
+
if self.ctx.task_type == 'classification':
|
|
473
|
+
y_pred = clf.predict_proba(X_val)[:, 1]
|
|
474
|
+
y_pred = np.clip(y_pred, EPS, 1 - EPS)
|
|
475
|
+
loss = log_loss(y_val, y_pred, sample_weight=w_val)
|
|
476
|
+
else:
|
|
477
|
+
y_pred = clf.predict(X_val)
|
|
478
|
+
loss = regression_loss(
|
|
479
|
+
y_val,
|
|
480
|
+
y_pred,
|
|
481
|
+
w_val,
|
|
482
|
+
loss_name=loss_name,
|
|
483
|
+
tweedie_power=tweedie_variance_power,
|
|
484
|
+
)
|
|
485
|
+
losses.append(float(loss))
|
|
486
|
+
self._maybe_cleanup_gpu()
|
|
284
487
|
|
|
285
488
|
return float(np.mean(losses))
|
|
286
|
-
|
|
287
|
-
def train(self) -> None:
|
|
288
|
-
if not self.best_params:
|
|
289
|
-
raise RuntimeError("Run tune() first to obtain best XGB parameters.")
|
|
290
|
-
self.model = self._build_estimator()
|
|
291
|
-
self.model.set_params(**self.best_params)
|
|
292
|
-
use_refit = bool(getattr(self.ctx.config, "final_refit", True))
|
|
293
|
-
predict_fn = None
|
|
294
|
-
if self.ctx.task_type == 'classification':
|
|
295
|
-
def _predict_proba(X, **_kwargs):
|
|
296
|
-
return self.model.predict_proba(X)[:, 1]
|
|
297
|
-
predict_fn = _predict_proba
|
|
298
|
-
X_all = self.ctx.train_data[self.ctx.factor_nmes]
|
|
299
|
-
y_all = self.ctx.train_data[self.ctx.resp_nme].values
|
|
300
|
-
w_all = self.ctx.train_data[self.ctx.weight_nme].values
|
|
301
|
-
|
|
302
|
-
split = self._resolve_train_val_indices(X_all)
|
|
303
|
-
if split is not None:
|
|
304
|
-
train_idx, val_idx = split
|
|
305
|
-
X_train = X_all.iloc[train_idx]
|
|
306
|
-
y_train = y_all[train_idx]
|
|
307
|
-
w_train = w_all[train_idx]
|
|
308
|
-
X_val = X_all.iloc[val_idx]
|
|
309
|
-
y_val = y_all[val_idx]
|
|
310
|
-
w_val = w_all[val_idx]
|
|
311
|
-
fit_kwargs = self._build_fit_kwargs(
|
|
312
|
-
w_train=w_train,
|
|
313
|
-
X_val=X_val,
|
|
314
|
-
y_val=y_val,
|
|
315
|
-
w_val=w_val,
|
|
316
|
-
n_estimators=self.best_params.get("n_estimators", 100),
|
|
317
|
-
)
|
|
318
|
-
self.model.fit(X_train, y_train, **fit_kwargs)
|
|
319
|
-
best_iter = getattr(self.model, "best_iteration", None)
|
|
320
|
-
if use_refit and best_iter is not None:
|
|
321
|
-
refit_model = self._build_estimator()
|
|
322
|
-
refit_params = dict(self.best_params)
|
|
323
|
-
refit_params["n_estimators"] = int(best_iter) + 1
|
|
324
|
-
refit_model.set_params(**refit_params)
|
|
325
|
-
refit_kwargs = dict(self.ctx.fit_params or {})
|
|
326
|
-
refit_kwargs.setdefault("sample_weight", w_all)
|
|
327
|
-
refit_kwargs.pop("eval_set", None)
|
|
328
|
-
refit_kwargs.pop("sample_weight_eval_set", None)
|
|
329
|
-
refit_kwargs.pop("early_stopping_rounds", None)
|
|
330
|
-
refit_kwargs.pop("eval_metric", None)
|
|
331
|
-
refit_kwargs.setdefault("verbose", False)
|
|
332
|
-
refit_model.fit(X_all, y_all, **refit_kwargs)
|
|
333
|
-
self.model = refit_model
|
|
334
|
-
else:
|
|
335
|
-
fit_kwargs = dict(self.ctx.fit_params or {})
|
|
336
|
-
fit_kwargs.setdefault("sample_weight", w_all)
|
|
337
|
-
fit_kwargs.pop("eval_metric", None)
|
|
338
|
-
self.model.fit(X_all, y_all, **fit_kwargs)
|
|
339
|
-
|
|
340
|
-
self.ctx.model_label.append(self.label)
|
|
341
|
-
self._predict_and_cache(
|
|
342
|
-
self.model,
|
|
343
|
-
pred_prefix='xgb',
|
|
344
|
-
predict_fn=predict_fn
|
|
345
|
-
)
|
|
346
|
-
self.ctx.xgb_best = self.model
|
|
347
|
-
|
|
489
|
+
|
|
490
|
+
def train(self) -> None:
|
|
491
|
+
if not self.best_params:
|
|
492
|
+
raise RuntimeError("Run tune() first to obtain best XGB parameters.")
|
|
493
|
+
self.model = self._build_estimator()
|
|
494
|
+
self.model.set_params(**self.best_params)
|
|
495
|
+
use_refit = bool(getattr(self.ctx.config, "final_refit", True))
|
|
496
|
+
predict_fn = None
|
|
497
|
+
if self.ctx.task_type == 'classification':
|
|
498
|
+
def _predict_proba(X, **_kwargs):
|
|
499
|
+
return self.model.predict_proba(X)[:, 1]
|
|
500
|
+
predict_fn = _predict_proba
|
|
501
|
+
X_all = self.ctx.train_data[self.ctx.factor_nmes]
|
|
502
|
+
y_all = self.ctx.train_data[self.ctx.resp_nme].values
|
|
503
|
+
w_all = self.ctx.train_data[self.ctx.weight_nme].values
|
|
504
|
+
|
|
505
|
+
split = self._resolve_train_val_indices(X_all)
|
|
506
|
+
if split is not None:
|
|
507
|
+
train_idx, val_idx = split
|
|
508
|
+
X_train = X_all.iloc[train_idx]
|
|
509
|
+
y_train = y_all[train_idx]
|
|
510
|
+
w_train = w_all[train_idx]
|
|
511
|
+
X_val = X_all.iloc[val_idx]
|
|
512
|
+
y_val = y_all[val_idx]
|
|
513
|
+
w_val = w_all[val_idx]
|
|
514
|
+
fit_kwargs = self._build_fit_kwargs(
|
|
515
|
+
w_train=w_train,
|
|
516
|
+
X_val=X_val,
|
|
517
|
+
y_val=y_val,
|
|
518
|
+
w_val=w_val,
|
|
519
|
+
n_estimators=self.best_params.get("n_estimators", 100),
|
|
520
|
+
)
|
|
521
|
+
self.model.fit(X_train, y_train, **fit_kwargs)
|
|
522
|
+
best_iter = getattr(self.model, "best_iteration", None)
|
|
523
|
+
if use_refit and best_iter is not None:
|
|
524
|
+
refit_model = self._build_estimator()
|
|
525
|
+
refit_params = dict(self.best_params)
|
|
526
|
+
refit_params["n_estimators"] = int(best_iter) + 1
|
|
527
|
+
refit_model.set_params(**refit_params)
|
|
528
|
+
refit_kwargs = dict(self.ctx.fit_params or {})
|
|
529
|
+
refit_kwargs.setdefault("sample_weight", w_all)
|
|
530
|
+
refit_kwargs.pop("eval_set", None)
|
|
531
|
+
refit_kwargs.pop("sample_weight_eval_set", None)
|
|
532
|
+
refit_kwargs.pop("early_stopping_rounds", None)
|
|
533
|
+
refit_kwargs.pop("eval_metric", None)
|
|
534
|
+
refit_kwargs.setdefault("verbose", False)
|
|
535
|
+
refit_model.fit(X_all, y_all, **refit_kwargs)
|
|
536
|
+
self.model = refit_model
|
|
537
|
+
else:
|
|
538
|
+
fit_kwargs = dict(self.ctx.fit_params or {})
|
|
539
|
+
fit_kwargs.setdefault("sample_weight", w_all)
|
|
540
|
+
fit_kwargs.pop("eval_metric", None)
|
|
541
|
+
self.model.fit(X_all, y_all, **fit_kwargs)
|
|
542
|
+
|
|
543
|
+
self.ctx.model_label.append(self.label)
|
|
544
|
+
self._predict_and_cache(
|
|
545
|
+
self.model,
|
|
546
|
+
pred_prefix='xgb',
|
|
547
|
+
predict_fn=predict_fn
|
|
548
|
+
)
|
|
549
|
+
self.ctx.xgb_best = self.model
|