ins-pricing 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ins_pricing/README.md +74 -56
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +52 -50
  4. ins_pricing/cli/BayesOpt_incremental.py +832 -898
  5. ins_pricing/cli/Explain_Run.py +31 -23
  6. ins_pricing/cli/Explain_entry.py +532 -579
  7. ins_pricing/cli/Pricing_Run.py +31 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +1440 -1438
  9. ins_pricing/cli/utils/cli_common.py +256 -256
  10. ins_pricing/cli/utils/cli_config.py +375 -375
  11. ins_pricing/cli/utils/import_resolver.py +382 -365
  12. ins_pricing/cli/utils/notebook_utils.py +340 -340
  13. ins_pricing/cli/watchdog_run.py +209 -201
  14. ins_pricing/frontend/README.md +573 -419
  15. ins_pricing/frontend/__init__.py +10 -10
  16. ins_pricing/frontend/config_builder.py +1 -0
  17. ins_pricing/frontend/example_workflows.py +1 -1
  18. ins_pricing/governance/__init__.py +20 -20
  19. ins_pricing/governance/release.py +159 -159
  20. ins_pricing/modelling/README.md +67 -0
  21. ins_pricing/modelling/__init__.py +147 -92
  22. ins_pricing/modelling/bayesopt/README.md +59 -0
  23. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  24. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -550
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -962
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  29. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
  32. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
  36. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
  37. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
  38. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
  39. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  40. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  41. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  42. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  43. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  44. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
  45. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  46. ins_pricing/modelling/explain/__init__.py +55 -55
  47. ins_pricing/modelling/explain/metrics.py +27 -174
  48. ins_pricing/modelling/explain/permutation.py +237 -237
  49. ins_pricing/modelling/plotting/__init__.py +40 -36
  50. ins_pricing/modelling/plotting/compat.py +228 -0
  51. ins_pricing/modelling/plotting/curves.py +572 -572
  52. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  53. ins_pricing/modelling/plotting/geo.py +362 -362
  54. ins_pricing/modelling/plotting/importance.py +121 -121
  55. ins_pricing/pricing/__init__.py +27 -27
  56. ins_pricing/production/__init__.py +35 -25
  57. ins_pricing/production/{predict.py → inference.py} +140 -57
  58. ins_pricing/production/monitoring.py +8 -21
  59. ins_pricing/reporting/__init__.py +11 -11
  60. ins_pricing/setup.py +1 -1
  61. ins_pricing/tests/production/test_inference.py +90 -0
  62. ins_pricing/utils/__init__.py +116 -83
  63. ins_pricing/utils/device.py +255 -255
  64. ins_pricing/utils/features.py +53 -0
  65. ins_pricing/utils/io.py +72 -0
  66. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  67. ins_pricing/utils/metrics.py +158 -24
  68. ins_pricing/utils/numerics.py +76 -0
  69. ins_pricing/utils/paths.py +9 -1
  70. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +55 -35
  71. ins_pricing-0.5.0.dist-info/RECORD +131 -0
  72. ins_pricing/CHANGELOG.md +0 -272
  73. ins_pricing/RELEASE_NOTES_0.2.8.md +0 -344
  74. ins_pricing/docs/LOSS_FUNCTIONS.md +0 -78
  75. ins_pricing/docs/modelling/BayesOpt_USAGE.md +0 -945
  76. ins_pricing/docs/modelling/README.md +0 -34
  77. ins_pricing/frontend/QUICKSTART.md +0 -152
  78. ins_pricing/modelling/core/BayesOpt.py +0 -146
  79. ins_pricing/modelling/core/__init__.py +0 -1
  80. ins_pricing/modelling/core/bayesopt/PHASE2_REFACTORING_SUMMARY.md +0 -449
  81. ins_pricing/modelling/core/bayesopt/PHASE3_REFACTORING_SUMMARY.md +0 -406
  82. ins_pricing/modelling/core/bayesopt/REFACTORING_SUMMARY.md +0 -247
  83. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  84. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  85. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  86. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  87. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  88. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  89. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  90. ins_pricing/tests/production/test_predict.py +0 -233
  91. ins_pricing-0.4.4.dist-info/RECORD +0 -137
  92. /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
  93. /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
  94. /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
  95. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
  96. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,347 +1,346 @@
1
- from __future__ import annotations
2
-
3
- import inspect
4
- from typing import Any, Dict, List, Optional, Tuple
5
-
6
- import numpy as np
7
- import optuna
8
- import torch
9
- import xgboost as xgb
10
- from sklearn.metrics import log_loss
11
-
12
- from .trainer_base import TrainerBase
13
- from ..utils import EPS
14
- from ..utils.losses import regression_loss
15
-
16
- _XGB_CUDA_CHECKED = False
17
- _XGB_HAS_CUDA = False
18
-
19
- _XGB_CUDA_CHECKED = False
20
- _XGB_HAS_CUDA = False
21
-
22
-
23
- def _xgb_cuda_available() -> bool:
24
- # Best-effort check for XGBoost CUDA build; cached to avoid repeated checks.
25
- global _XGB_CUDA_CHECKED, _XGB_HAS_CUDA
26
- if _XGB_CUDA_CHECKED:
27
- return _XGB_HAS_CUDA
28
- _XGB_CUDA_CHECKED = True
29
- if not torch.cuda.is_available():
30
- _XGB_HAS_CUDA = False
31
- return False
32
- try:
33
- build_info = getattr(xgb, "build_info", None)
34
- if callable(build_info):
35
- info = build_info()
36
- for key in ("USE_CUDA", "use_cuda", "cuda"):
37
- if key in info:
38
- val = info[key]
39
- if isinstance(val, str):
40
- _XGB_HAS_CUDA = val.strip().upper() in (
41
- "ON", "YES", "TRUE", "1")
42
- else:
43
- _XGB_HAS_CUDA = bool(val)
44
- return _XGB_HAS_CUDA
45
- except Exception:
46
- pass
47
- try:
48
- has_cuda = getattr(getattr(xgb, "core", None), "_has_cuda_support", None)
49
- if callable(has_cuda):
50
- _XGB_HAS_CUDA = bool(has_cuda())
51
- return _XGB_HAS_CUDA
52
- except Exception:
53
- pass
54
- _XGB_HAS_CUDA = False
55
- return False
56
-
57
- class XGBTrainer(TrainerBase):
58
- def __init__(self, context: "BayesOptModel") -> None:
59
- super().__init__(context, 'Xgboost', 'Xgboost')
60
- self.model: Optional[xgb.XGBModel] = None
61
- self._xgb_use_gpu = False
62
- self._xgb_gpu_warned = False
63
-
64
- def _build_estimator(self) -> xgb.XGBModel:
65
- use_gpu = bool(self.ctx.use_gpu and _xgb_cuda_available())
66
- self._xgb_use_gpu = use_gpu
67
- params = dict(
68
- objective=self.ctx.obj,
69
- random_state=self.ctx.rand_seed,
70
- subsample=0.9,
71
- tree_method='gpu_hist' if use_gpu else 'hist',
72
- enable_categorical=True,
73
- predictor='gpu_predictor' if use_gpu else 'cpu_predictor'
74
- )
75
- if self.ctx.use_gpu and not use_gpu and not self._xgb_gpu_warned:
76
- print(
77
- "[XGBoost] CUDA requested but not available; falling back to CPU.",
78
- flush=True,
79
- )
80
- self._xgb_gpu_warned = True
81
- if use_gpu:
82
- params['gpu_id'] = 0
83
- print(f">>> XGBoost using GPU ID: 0 (Single GPU Mode)")
84
- eval_metric = self._resolve_eval_metric()
85
- if eval_metric is not None:
86
- params.setdefault("eval_metric", eval_metric)
87
- if self.ctx.task_type == 'classification':
88
- return xgb.XGBClassifier(**params)
89
- return xgb.XGBRegressor(**params)
90
-
91
- def _resolve_eval_metric(self) -> Optional[Any]:
92
- fit_params = self.ctx.fit_params or {}
93
- eval_metric = fit_params.get("eval_metric")
94
- if eval_metric is None:
95
- return "logloss" if self.ctx.task_type == 'classification' else "rmse"
96
- return eval_metric
97
-
98
- def _fit_supports_param(self, name: str) -> bool:
99
- try:
100
- fit = xgb.XGBClassifier.fit if self.ctx.task_type == 'classification' else xgb.XGBRegressor.fit
101
- return name in inspect.signature(fit).parameters
102
- except (TypeError, ValueError):
103
- return True
104
-
105
- def _resolve_early_stopping_rounds(self, n_estimators: int) -> int:
106
- n_estimators = max(1, int(n_estimators))
107
- base = max(5, n_estimators // 10)
108
- return min(50, base)
109
-
110
- def _build_fit_kwargs(self,
111
- w_train,
112
- X_val=None,
113
- y_val=None,
114
- w_val=None,
115
- n_estimators: Optional[int] = None) -> Dict[str, Any]:
116
- supports_early = self._fit_supports_param("early_stopping_rounds")
117
- fit_kwargs = dict(self.ctx.fit_params or {})
118
- fit_kwargs.pop("sample_weight", None)
119
- fit_kwargs.pop("eval_metric", None)
120
- fit_kwargs["sample_weight"] = w_train
121
-
122
- if "eval_set" not in fit_kwargs and X_val is not None and y_val is not None:
123
- fit_kwargs["eval_set"] = [(X_val, y_val)]
124
- if w_val is not None:
125
- fit_kwargs["sample_weight_eval_set"] = [w_val]
126
-
127
- if (
128
- supports_early
129
- and "early_stopping_rounds" not in fit_kwargs
130
- and "eval_set" in fit_kwargs
131
- ):
132
- rounds = self._resolve_early_stopping_rounds(n_estimators or 100)
133
- fit_kwargs["early_stopping_rounds"] = rounds
134
- if not supports_early:
135
- fit_kwargs.pop("early_stopping_rounds", None)
136
-
137
- fit_kwargs.setdefault("verbose", False)
138
- return fit_kwargs
139
-
140
- def ensemble_predict(self, k: int) -> None:
141
- if not self.best_params:
142
- raise RuntimeError("Run tune() first to obtain best XGB parameters.")
143
- k = max(2, int(k))
144
- X_all = self.ctx.train_data[self.ctx.factor_nmes]
145
- y_all = self.ctx.train_data[self.ctx.resp_nme].values
146
- w_all = self.ctx.train_data[self.ctx.weight_nme].values
147
- X_test = self.ctx.test_data[self.ctx.factor_nmes]
148
- n_samples = len(X_all)
149
- split_iter, _ = self._resolve_ensemble_splits(X_all, k=k)
150
- if split_iter is None:
151
- print(
152
- f"[XGB Ensemble] unable to build CV split (n_samples={n_samples}); skip ensemble.",
153
- flush=True,
154
- )
155
- return
156
- preds_train_sum = np.zeros(n_samples, dtype=np.float64)
157
- preds_test_sum = np.zeros(len(X_test), dtype=np.float64)
158
-
159
- split_count = 0
160
- for train_idx, val_idx in split_iter:
161
- X_train = X_all.iloc[train_idx]
162
- y_train = y_all[train_idx]
163
- w_train = w_all[train_idx]
164
- X_val = X_all.iloc[val_idx]
165
- y_val = y_all[val_idx]
166
- w_val = w_all[val_idx]
167
-
168
- clf = self._build_estimator()
169
- clf.set_params(**self.best_params)
170
- fit_kwargs = self._build_fit_kwargs(
171
- w_train=w_train,
172
- X_val=X_val,
173
- y_val=y_val,
174
- w_val=w_val,
175
- n_estimators=self.best_params.get("n_estimators", 100),
176
- )
177
- clf.fit(X_train, y_train, **fit_kwargs)
178
-
179
- if self.ctx.task_type == 'classification':
180
- pred_train = clf.predict_proba(X_all)[:, 1]
181
- pred_test = clf.predict_proba(X_test)[:, 1]
182
- else:
183
- pred_train = clf.predict(X_all)
184
- pred_test = clf.predict(X_test)
185
- preds_train_sum += np.asarray(pred_train, dtype=np.float64)
186
- preds_test_sum += np.asarray(pred_test, dtype=np.float64)
187
- self._clean_gpu()
188
- split_count += 1
189
-
190
- if split_count < 1:
191
- print(
192
- f"[XGB Ensemble] no CV splits generated; skip ensemble.",
193
- flush=True,
194
- )
195
- return
196
- preds_train = preds_train_sum / float(split_count)
197
- preds_test = preds_test_sum / float(split_count)
198
- self._cache_predictions("xgb", preds_train, preds_test)
199
-
200
- def cross_val(self, trial: optuna.trial.Trial) -> float:
201
- learning_rate = trial.suggest_float(
202
- 'learning_rate', 1e-5, 1e-1, log=True)
203
- gamma = trial.suggest_float('gamma', 0, 10000)
204
- max_depth_max = max(
205
- 3, int(getattr(self.config, "xgb_max_depth_max", 25)))
206
- n_estimators_max = max(
207
- 10, int(getattr(self.config, "xgb_n_estimators_max", 500)))
208
- max_depth = trial.suggest_int('max_depth', 3, max_depth_max)
209
- n_estimators = trial.suggest_int(
210
- 'n_estimators', 10, n_estimators_max, step=10)
211
- min_child_weight = trial.suggest_int(
212
- 'min_child_weight', 100, 10000, step=100)
213
- reg_alpha = trial.suggest_float('reg_alpha', 1e-10, 1, log=True)
214
- reg_lambda = trial.suggest_float('reg_lambda', 1e-10, 1, log=True)
215
- if trial is not None:
216
- print(
217
- f"[Optuna][Xgboost] trial_id={trial.number} max_depth={max_depth} "
218
- f"n_estimators={n_estimators}",
219
- flush=True,
220
- )
221
- if max_depth >= 20 and n_estimators >= 300:
222
- raise optuna.TrialPruned(
223
- "XGB config is likely too slow (max_depth>=20 & n_estimators>=300)")
224
- clf = self._build_estimator()
225
- params = {
226
- 'learning_rate': learning_rate,
227
- 'gamma': gamma,
228
- 'max_depth': max_depth,
229
- 'n_estimators': n_estimators,
230
- 'min_child_weight': min_child_weight,
231
- 'reg_alpha': reg_alpha,
232
- 'reg_lambda': reg_lambda
233
- }
234
- loss_name = getattr(self.ctx, "loss_name", "tweedie")
235
- tweedie_variance_power = None
236
- if self.ctx.task_type != 'classification':
237
- if loss_name == "tweedie":
238
- tweedie_variance_power = trial.suggest_float(
239
- 'tweedie_variance_power', 1, 2)
240
- params['tweedie_variance_power'] = tweedie_variance_power
241
- elif loss_name == "poisson":
242
- tweedie_variance_power = 1.0
243
- elif loss_name == "gamma":
244
- tweedie_variance_power = 2.0
245
- X_all = self.ctx.train_data[self.ctx.factor_nmes]
246
- y_all = self.ctx.train_data[self.ctx.resp_nme].values
247
- w_all = self.ctx.train_data[self.ctx.weight_nme].values
248
-
249
- losses: List[float] = []
250
- for train_idx, val_idx in self.ctx.cv.split(X_all):
251
- X_train = X_all.iloc[train_idx]
252
- y_train = y_all[train_idx]
253
- w_train = w_all[train_idx]
254
- X_val = X_all.iloc[val_idx]
255
- y_val = y_all[val_idx]
256
- w_val = w_all[val_idx]
257
-
258
- clf = self._build_estimator()
259
- clf.set_params(**params)
260
- fit_kwargs = self._build_fit_kwargs(
261
- w_train=w_train,
262
- X_val=X_val,
263
- y_val=y_val,
264
- w_val=w_val,
265
- n_estimators=n_estimators,
266
- )
267
- clf.fit(X_train, y_train, **fit_kwargs)
268
-
269
- if self.ctx.task_type == 'classification':
270
- y_pred = clf.predict_proba(X_val)[:, 1]
271
- y_pred = np.clip(y_pred, EPS, 1 - EPS)
272
- loss = log_loss(y_val, y_pred, sample_weight=w_val)
273
- else:
274
- y_pred = clf.predict(X_val)
275
- loss = regression_loss(
276
- y_val,
277
- y_pred,
278
- w_val,
279
- loss_name=loss_name,
280
- tweedie_power=tweedie_variance_power,
281
- )
282
- losses.append(float(loss))
283
- self._clean_gpu()
284
-
285
- return float(np.mean(losses))
286
-
287
- def train(self) -> None:
288
- if not self.best_params:
289
- raise RuntimeError("Run tune() first to obtain best XGB parameters.")
290
- self.model = self._build_estimator()
291
- self.model.set_params(**self.best_params)
292
- use_refit = bool(getattr(self.ctx.config, "final_refit", True))
293
- predict_fn = None
294
- if self.ctx.task_type == 'classification':
295
- def _predict_proba(X, **_kwargs):
296
- return self.model.predict_proba(X)[:, 1]
297
- predict_fn = _predict_proba
298
- X_all = self.ctx.train_data[self.ctx.factor_nmes]
299
- y_all = self.ctx.train_data[self.ctx.resp_nme].values
300
- w_all = self.ctx.train_data[self.ctx.weight_nme].values
301
-
302
- split = self._resolve_train_val_indices(X_all)
303
- if split is not None:
304
- train_idx, val_idx = split
305
- X_train = X_all.iloc[train_idx]
306
- y_train = y_all[train_idx]
307
- w_train = w_all[train_idx]
308
- X_val = X_all.iloc[val_idx]
309
- y_val = y_all[val_idx]
310
- w_val = w_all[val_idx]
311
- fit_kwargs = self._build_fit_kwargs(
312
- w_train=w_train,
313
- X_val=X_val,
314
- y_val=y_val,
315
- w_val=w_val,
316
- n_estimators=self.best_params.get("n_estimators", 100),
317
- )
318
- self.model.fit(X_train, y_train, **fit_kwargs)
319
- best_iter = getattr(self.model, "best_iteration", None)
320
- if use_refit and best_iter is not None:
321
- refit_model = self._build_estimator()
322
- refit_params = dict(self.best_params)
323
- refit_params["n_estimators"] = int(best_iter) + 1
324
- refit_model.set_params(**refit_params)
325
- refit_kwargs = dict(self.ctx.fit_params or {})
326
- refit_kwargs.setdefault("sample_weight", w_all)
327
- refit_kwargs.pop("eval_set", None)
328
- refit_kwargs.pop("sample_weight_eval_set", None)
329
- refit_kwargs.pop("early_stopping_rounds", None)
330
- refit_kwargs.pop("eval_metric", None)
331
- refit_kwargs.setdefault("verbose", False)
332
- refit_model.fit(X_all, y_all, **refit_kwargs)
333
- self.model = refit_model
334
- else:
335
- fit_kwargs = dict(self.ctx.fit_params or {})
336
- fit_kwargs.setdefault("sample_weight", w_all)
337
- fit_kwargs.pop("eval_metric", None)
338
- self.model.fit(X_all, y_all, **fit_kwargs)
339
-
340
- self.ctx.model_label.append(self.label)
341
- self._predict_and_cache(
342
- self.model,
343
- pred_prefix='xgb',
344
- predict_fn=predict_fn
345
- )
346
- self.ctx.xgb_best = self.model
347
-
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ from typing import Any, Dict, List, Optional, Tuple
5
+
6
+ import numpy as np
7
+ import optuna
8
+ import torch
9
+ import xgboost as xgb
10
+ from sklearn.metrics import log_loss
11
+
12
+ from ins_pricing.modelling.bayesopt.trainers.trainer_base import TrainerBase
13
+ from ins_pricing.utils import EPS
14
+ from ins_pricing.utils.losses import regression_loss
15
+
16
+ _XGB_CUDA_CHECKED = False
17
+ _XGB_HAS_CUDA = False
18
+
19
+ _XGB_CUDA_CHECKED = False
20
+ _XGB_HAS_CUDA = False
21
+
22
+
23
+ def _xgb_cuda_available() -> bool:
24
+ # Best-effort check for XGBoost CUDA build; cached to avoid repeated checks.
25
+ global _XGB_CUDA_CHECKED, _XGB_HAS_CUDA
26
+ if _XGB_CUDA_CHECKED:
27
+ return _XGB_HAS_CUDA
28
+ _XGB_CUDA_CHECKED = True
29
+ if not torch.cuda.is_available():
30
+ _XGB_HAS_CUDA = False
31
+ return False
32
+ try:
33
+ build_info = getattr(xgb, "build_info", None)
34
+ if callable(build_info):
35
+ info = build_info()
36
+ for key in ("USE_CUDA", "use_cuda", "cuda"):
37
+ if key in info:
38
+ val = info[key]
39
+ if isinstance(val, str):
40
+ _XGB_HAS_CUDA = val.strip().upper() in (
41
+ "ON", "YES", "TRUE", "1")
42
+ else:
43
+ _XGB_HAS_CUDA = bool(val)
44
+ return _XGB_HAS_CUDA
45
+ except Exception:
46
+ pass
47
+ try:
48
+ has_cuda = getattr(getattr(xgb, "core", None), "_has_cuda_support", None)
49
+ if callable(has_cuda):
50
+ _XGB_HAS_CUDA = bool(has_cuda())
51
+ return _XGB_HAS_CUDA
52
+ except Exception:
53
+ pass
54
+ _XGB_HAS_CUDA = False
55
+ return False
56
+
57
+ class XGBTrainer(TrainerBase):
58
+ def __init__(self, context: "BayesOptModel") -> None:
59
+ super().__init__(context, 'Xgboost', 'Xgboost')
60
+ self.model: Optional[xgb.XGBModel] = None
61
+ self._xgb_use_gpu = False
62
+ self._xgb_gpu_warned = False
63
+
64
+ def _build_estimator(self) -> xgb.XGBModel:
65
+ use_gpu = bool(self.ctx.use_gpu and _xgb_cuda_available())
66
+ self._xgb_use_gpu = use_gpu
67
+ params = dict(
68
+ objective=self.ctx.obj,
69
+ random_state=self.ctx.rand_seed,
70
+ subsample=0.9,
71
+ tree_method='gpu_hist' if use_gpu else 'hist',
72
+ enable_categorical=True,
73
+ predictor='gpu_predictor' if use_gpu else 'cpu_predictor'
74
+ )
75
+ if self.ctx.use_gpu and not use_gpu and not self._xgb_gpu_warned:
76
+ print(
77
+ "[XGBoost] CUDA requested but not available; falling back to CPU.",
78
+ flush=True,
79
+ )
80
+ self._xgb_gpu_warned = True
81
+ if use_gpu:
82
+ params['gpu_id'] = 0
83
+ print(f">>> XGBoost using GPU ID: 0 (Single GPU Mode)")
84
+ eval_metric = self._resolve_eval_metric()
85
+ if eval_metric is not None:
86
+ params.setdefault("eval_metric", eval_metric)
87
+ if self.ctx.task_type == 'classification':
88
+ return xgb.XGBClassifier(**params)
89
+ return xgb.XGBRegressor(**params)
90
+
91
+ def _resolve_eval_metric(self) -> Optional[Any]:
92
+ fit_params = self.ctx.fit_params or {}
93
+ eval_metric = fit_params.get("eval_metric")
94
+ if eval_metric is None:
95
+ return "logloss" if self.ctx.task_type == 'classification' else "rmse"
96
+ return eval_metric
97
+
98
+ def _fit_supports_param(self, name: str) -> bool:
99
+ try:
100
+ fit = xgb.XGBClassifier.fit if self.ctx.task_type == 'classification' else xgb.XGBRegressor.fit
101
+ return name in inspect.signature(fit).parameters
102
+ except (TypeError, ValueError):
103
+ return True
104
+
105
+ def _resolve_early_stopping_rounds(self, n_estimators: int) -> int:
106
+ n_estimators = max(1, int(n_estimators))
107
+ base = max(5, n_estimators // 10)
108
+ return min(50, base)
109
+
110
+ def _build_fit_kwargs(self,
111
+ w_train,
112
+ X_val=None,
113
+ y_val=None,
114
+ w_val=None,
115
+ n_estimators: Optional[int] = None) -> Dict[str, Any]:
116
+ supports_early = self._fit_supports_param("early_stopping_rounds")
117
+ fit_kwargs = dict(self.ctx.fit_params or {})
118
+ fit_kwargs.pop("sample_weight", None)
119
+ fit_kwargs.pop("eval_metric", None)
120
+ fit_kwargs["sample_weight"] = w_train
121
+
122
+ if "eval_set" not in fit_kwargs and X_val is not None and y_val is not None:
123
+ fit_kwargs["eval_set"] = [(X_val, y_val)]
124
+ if w_val is not None:
125
+ fit_kwargs["sample_weight_eval_set"] = [w_val]
126
+
127
+ if (
128
+ supports_early
129
+ and "early_stopping_rounds" not in fit_kwargs
130
+ and "eval_set" in fit_kwargs
131
+ ):
132
+ rounds = self._resolve_early_stopping_rounds(n_estimators or 100)
133
+ fit_kwargs["early_stopping_rounds"] = rounds
134
+ if not supports_early:
135
+ fit_kwargs.pop("early_stopping_rounds", None)
136
+
137
+ fit_kwargs.setdefault("verbose", False)
138
+ return fit_kwargs
139
+
140
+ def ensemble_predict(self, k: int) -> None:
141
+ if not self.best_params:
142
+ raise RuntimeError("Run tune() first to obtain best XGB parameters.")
143
+ k = max(2, int(k))
144
+ X_all = self.ctx.train_data[self.ctx.factor_nmes]
145
+ y_all = self.ctx.train_data[self.ctx.resp_nme].values
146
+ w_all = self.ctx.train_data[self.ctx.weight_nme].values
147
+ X_test = self.ctx.test_data[self.ctx.factor_nmes]
148
+ n_samples = len(X_all)
149
+ split_iter, _ = self._resolve_ensemble_splits(X_all, k=k)
150
+ if split_iter is None:
151
+ print(
152
+ f"[XGB Ensemble] unable to build CV split (n_samples={n_samples}); skip ensemble.",
153
+ flush=True,
154
+ )
155
+ return
156
+ preds_train_sum = np.zeros(n_samples, dtype=np.float64)
157
+ preds_test_sum = np.zeros(len(X_test), dtype=np.float64)
158
+
159
+ split_count = 0
160
+ for train_idx, val_idx in split_iter:
161
+ X_train = X_all.iloc[train_idx]
162
+ y_train = y_all[train_idx]
163
+ w_train = w_all[train_idx]
164
+ X_val = X_all.iloc[val_idx]
165
+ y_val = y_all[val_idx]
166
+ w_val = w_all[val_idx]
167
+
168
+ clf = self._build_estimator()
169
+ clf.set_params(**self.best_params)
170
+ fit_kwargs = self._build_fit_kwargs(
171
+ w_train=w_train,
172
+ X_val=X_val,
173
+ y_val=y_val,
174
+ w_val=w_val,
175
+ n_estimators=self.best_params.get("n_estimators", 100),
176
+ )
177
+ clf.fit(X_train, y_train, **fit_kwargs)
178
+
179
+ if self.ctx.task_type == 'classification':
180
+ pred_train = clf.predict_proba(X_all)[:, 1]
181
+ pred_test = clf.predict_proba(X_test)[:, 1]
182
+ else:
183
+ pred_train = clf.predict(X_all)
184
+ pred_test = clf.predict(X_test)
185
+ preds_train_sum += np.asarray(pred_train, dtype=np.float64)
186
+ preds_test_sum += np.asarray(pred_test, dtype=np.float64)
187
+ self._clean_gpu()
188
+ split_count += 1
189
+
190
+ if split_count < 1:
191
+ print(
192
+ f"[XGB Ensemble] no CV splits generated; skip ensemble.",
193
+ flush=True,
194
+ )
195
+ return
196
+ preds_train = preds_train_sum / float(split_count)
197
+ preds_test = preds_test_sum / float(split_count)
198
+ self._cache_predictions("xgb", preds_train, preds_test)
199
+
200
+ def cross_val(self, trial: optuna.trial.Trial) -> float:
201
+ learning_rate = trial.suggest_float(
202
+ 'learning_rate', 1e-5, 1e-1, log=True)
203
+ gamma = trial.suggest_float('gamma', 0, 10000)
204
+ max_depth_max = max(
205
+ 3, int(getattr(self.config, "xgb_max_depth_max", 25)))
206
+ n_estimators_max = max(
207
+ 10, int(getattr(self.config, "xgb_n_estimators_max", 500)))
208
+ max_depth = trial.suggest_int('max_depth', 3, max_depth_max)
209
+ n_estimators = trial.suggest_int(
210
+ 'n_estimators', 10, n_estimators_max, step=10)
211
+ min_child_weight = trial.suggest_int(
212
+ 'min_child_weight', 100, 10000, step=100)
213
+ reg_alpha = trial.suggest_float('reg_alpha', 1e-10, 1, log=True)
214
+ reg_lambda = trial.suggest_float('reg_lambda', 1e-10, 1, log=True)
215
+ if trial is not None:
216
+ print(
217
+ f"[Optuna][Xgboost] trial_id={trial.number} max_depth={max_depth} "
218
+ f"n_estimators={n_estimators}",
219
+ flush=True,
220
+ )
221
+ if max_depth >= 20 and n_estimators >= 300:
222
+ raise optuna.TrialPruned(
223
+ "XGB config is likely too slow (max_depth>=20 & n_estimators>=300)")
224
+ clf = self._build_estimator()
225
+ params = {
226
+ 'learning_rate': learning_rate,
227
+ 'gamma': gamma,
228
+ 'max_depth': max_depth,
229
+ 'n_estimators': n_estimators,
230
+ 'min_child_weight': min_child_weight,
231
+ 'reg_alpha': reg_alpha,
232
+ 'reg_lambda': reg_lambda
233
+ }
234
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
235
+ tweedie_variance_power = None
236
+ if self.ctx.task_type != 'classification':
237
+ if loss_name == "tweedie":
238
+ tweedie_variance_power = trial.suggest_float(
239
+ 'tweedie_variance_power', 1, 2)
240
+ params['tweedie_variance_power'] = tweedie_variance_power
241
+ elif loss_name == "poisson":
242
+ tweedie_variance_power = 1.0
243
+ elif loss_name == "gamma":
244
+ tweedie_variance_power = 2.0
245
+ X_all = self.ctx.train_data[self.ctx.factor_nmes]
246
+ y_all = self.ctx.train_data[self.ctx.resp_nme].values
247
+ w_all = self.ctx.train_data[self.ctx.weight_nme].values
248
+
249
+ losses: List[float] = []
250
+ for train_idx, val_idx in self.ctx.cv.split(X_all):
251
+ X_train = X_all.iloc[train_idx]
252
+ y_train = y_all[train_idx]
253
+ w_train = w_all[train_idx]
254
+ X_val = X_all.iloc[val_idx]
255
+ y_val = y_all[val_idx]
256
+ w_val = w_all[val_idx]
257
+
258
+ clf = self._build_estimator()
259
+ clf.set_params(**params)
260
+ fit_kwargs = self._build_fit_kwargs(
261
+ w_train=w_train,
262
+ X_val=X_val,
263
+ y_val=y_val,
264
+ w_val=w_val,
265
+ n_estimators=n_estimators,
266
+ )
267
+ clf.fit(X_train, y_train, **fit_kwargs)
268
+
269
+ if self.ctx.task_type == 'classification':
270
+ y_pred = clf.predict_proba(X_val)[:, 1]
271
+ y_pred = np.clip(y_pred, EPS, 1 - EPS)
272
+ loss = log_loss(y_val, y_pred, sample_weight=w_val)
273
+ else:
274
+ y_pred = clf.predict(X_val)
275
+ loss = regression_loss(
276
+ y_val,
277
+ y_pred,
278
+ w_val,
279
+ loss_name=loss_name,
280
+ tweedie_power=tweedie_variance_power,
281
+ )
282
+ losses.append(float(loss))
283
+ self._clean_gpu()
284
+
285
+ return float(np.mean(losses))
286
+
287
+ def train(self) -> None:
288
+ if not self.best_params:
289
+ raise RuntimeError("Run tune() first to obtain best XGB parameters.")
290
+ self.model = self._build_estimator()
291
+ self.model.set_params(**self.best_params)
292
+ use_refit = bool(getattr(self.ctx.config, "final_refit", True))
293
+ predict_fn = None
294
+ if self.ctx.task_type == 'classification':
295
+ def _predict_proba(X, **_kwargs):
296
+ return self.model.predict_proba(X)[:, 1]
297
+ predict_fn = _predict_proba
298
+ X_all = self.ctx.train_data[self.ctx.factor_nmes]
299
+ y_all = self.ctx.train_data[self.ctx.resp_nme].values
300
+ w_all = self.ctx.train_data[self.ctx.weight_nme].values
301
+
302
+ split = self._resolve_train_val_indices(X_all)
303
+ if split is not None:
304
+ train_idx, val_idx = split
305
+ X_train = X_all.iloc[train_idx]
306
+ y_train = y_all[train_idx]
307
+ w_train = w_all[train_idx]
308
+ X_val = X_all.iloc[val_idx]
309
+ y_val = y_all[val_idx]
310
+ w_val = w_all[val_idx]
311
+ fit_kwargs = self._build_fit_kwargs(
312
+ w_train=w_train,
313
+ X_val=X_val,
314
+ y_val=y_val,
315
+ w_val=w_val,
316
+ n_estimators=self.best_params.get("n_estimators", 100),
317
+ )
318
+ self.model.fit(X_train, y_train, **fit_kwargs)
319
+ best_iter = getattr(self.model, "best_iteration", None)
320
+ if use_refit and best_iter is not None:
321
+ refit_model = self._build_estimator()
322
+ refit_params = dict(self.best_params)
323
+ refit_params["n_estimators"] = int(best_iter) + 1
324
+ refit_model.set_params(**refit_params)
325
+ refit_kwargs = dict(self.ctx.fit_params or {})
326
+ refit_kwargs.setdefault("sample_weight", w_all)
327
+ refit_kwargs.pop("eval_set", None)
328
+ refit_kwargs.pop("sample_weight_eval_set", None)
329
+ refit_kwargs.pop("early_stopping_rounds", None)
330
+ refit_kwargs.pop("eval_metric", None)
331
+ refit_kwargs.setdefault("verbose", False)
332
+ refit_model.fit(X_all, y_all, **refit_kwargs)
333
+ self.model = refit_model
334
+ else:
335
+ fit_kwargs = dict(self.ctx.fit_params or {})
336
+ fit_kwargs.setdefault("sample_weight", w_all)
337
+ fit_kwargs.pop("eval_metric", None)
338
+ self.model.fit(X_all, y_all, **fit_kwargs)
339
+
340
+ self.ctx.model_label.append(self.label)
341
+ self._predict_and_cache(
342
+ self.model,
343
+ pred_prefix='xgb',
344
+ predict_fn=predict_fn
345
+ )
346
+ self.ctx.xgb_best = self.model