ins-pricing 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. ins_pricing/README.md +48 -22
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +58 -46
  4. ins_pricing/cli/BayesOpt_incremental.py +77 -110
  5. ins_pricing/cli/Explain_Run.py +42 -23
  6. ins_pricing/cli/Explain_entry.py +551 -577
  7. ins_pricing/cli/Pricing_Run.py +42 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +51 -16
  9. ins_pricing/cli/utils/bootstrap.py +23 -0
  10. ins_pricing/cli/utils/cli_common.py +256 -256
  11. ins_pricing/cli/utils/cli_config.py +379 -360
  12. ins_pricing/cli/utils/import_resolver.py +375 -358
  13. ins_pricing/cli/utils/notebook_utils.py +256 -242
  14. ins_pricing/cli/watchdog_run.py +216 -198
  15. ins_pricing/frontend/__init__.py +10 -10
  16. ins_pricing/frontend/app.py +132 -61
  17. ins_pricing/frontend/config_builder.py +33 -0
  18. ins_pricing/frontend/example_config.json +11 -0
  19. ins_pricing/frontend/example_workflows.py +1 -1
  20. ins_pricing/frontend/runner.py +340 -388
  21. ins_pricing/governance/__init__.py +20 -20
  22. ins_pricing/governance/release.py +159 -159
  23. ins_pricing/modelling/README.md +1 -1
  24. ins_pricing/modelling/__init__.py +147 -92
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +31 -13
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +12 -0
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +589 -552
  29. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +987 -958
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +488 -548
  32. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +349 -342
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +921 -913
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +794 -785
  36. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +454 -446
  37. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  38. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1294 -1282
  39. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +64 -56
  40. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +203 -198
  41. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +333 -325
  42. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +279 -267
  43. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +515 -313
  44. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  45. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  46. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +193 -186
  47. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  48. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  49. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  50. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +636 -623
  51. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  52. ins_pricing/modelling/explain/__init__.py +55 -55
  53. ins_pricing/modelling/explain/metrics.py +27 -174
  54. ins_pricing/modelling/explain/permutation.py +237 -237
  55. ins_pricing/modelling/plotting/__init__.py +40 -36
  56. ins_pricing/modelling/plotting/compat.py +228 -0
  57. ins_pricing/modelling/plotting/curves.py +572 -572
  58. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  59. ins_pricing/modelling/plotting/geo.py +362 -362
  60. ins_pricing/modelling/plotting/importance.py +121 -121
  61. ins_pricing/pricing/__init__.py +27 -27
  62. ins_pricing/pricing/factors.py +67 -56
  63. ins_pricing/production/__init__.py +35 -25
  64. ins_pricing/production/{predict.py → inference.py} +140 -57
  65. ins_pricing/production/monitoring.py +8 -21
  66. ins_pricing/reporting/__init__.py +11 -11
  67. ins_pricing/setup.py +1 -1
  68. ins_pricing/tests/production/test_inference.py +90 -0
  69. ins_pricing/utils/__init__.py +112 -78
  70. ins_pricing/utils/device.py +258 -237
  71. ins_pricing/utils/features.py +53 -0
  72. ins_pricing/utils/io.py +72 -0
  73. ins_pricing/utils/logging.py +34 -1
  74. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  75. ins_pricing/utils/metrics.py +158 -24
  76. ins_pricing/utils/numerics.py +76 -0
  77. ins_pricing/utils/paths.py +9 -1
  78. ins_pricing/utils/profiling.py +8 -4
  79. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/METADATA +1 -1
  80. ins_pricing-0.5.1.dist-info/RECORD +132 -0
  81. ins_pricing/modelling/core/BayesOpt.py +0 -146
  82. ins_pricing/modelling/core/__init__.py +0 -1
  83. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  84. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  85. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  86. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  87. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  88. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  89. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  90. ins_pricing/tests/production/test_predict.py +0 -233
  91. ins_pricing-0.4.5.dist-info/RECORD +0 -130
  92. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/WHEEL +0 -0
  93. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/top_level.txt +0 -0
@@ -1,65 +1,247 @@
1
- from __future__ import annotations
2
-
1
+ from __future__ import annotations
2
+
3
3
  import inspect
4
+ import os
4
5
  from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import numpy as np
8
+ import optuna
9
+ import torch
10
+ import xgboost as xgb
11
+ from sklearn.metrics import log_loss
12
+
13
+ from ins_pricing.modelling.bayesopt.trainers.trainer_base import TrainerBase
14
+ from ins_pricing.utils import EPS, get_logger, log_print
15
+ from ins_pricing.utils.losses import regression_loss
16
+
17
+ _logger = get_logger("ins_pricing.trainer.xgb")
18
+
19
+
20
+ def _log(*args, **kwargs) -> None:
21
+ log_print(_logger, *args, **kwargs)
22
+
23
+ _XGB_CUDA_CHECKED = False
24
+ _XGB_HAS_CUDA = False
5
25
 
6
- import numpy as np
7
- import optuna
8
- import torch
9
- import xgboost as xgb
10
- from sklearn.metrics import log_loss
11
26
 
12
- from .trainer_base import TrainerBase
13
- from ..utils import EPS
14
- from ..utils.losses import regression_loss
27
+ def _is_oom_error(exc: Exception) -> bool:
28
+ msg = str(exc).lower()
29
+ return "out of memory" in msg or ("cuda" in msg and "memory" in msg)
15
30
 
16
- _XGB_CUDA_CHECKED = False
17
- _XGB_HAS_CUDA = False
18
31
 
19
- _XGB_CUDA_CHECKED = False
20
- _XGB_HAS_CUDA = False
32
+ class _XGBDMatrixWrapper:
33
+ """Sklearn-like wrapper that uses xgb.train + (Quantile)DMatrix internally."""
34
+
35
+ def __init__(
36
+ self,
37
+ params: Dict[str, Any],
38
+ *,
39
+ task_type: str,
40
+ use_gpu: bool,
41
+ allow_cpu_fallback: bool = True,
42
+ ) -> None:
43
+ self.params = dict(params)
44
+ self.task_type = task_type
45
+ self.use_gpu = bool(use_gpu)
46
+ self.allow_cpu_fallback = allow_cpu_fallback
47
+ self._booster: Optional[xgb.Booster] = None
48
+ self.best_iteration: Optional[int] = None
49
+
50
+ def set_params(self, **params: Any) -> "_XGBDMatrixWrapper":
51
+ self.params.update(params)
52
+ return self
21
53
 
54
+ def get_params(self, deep: bool = True) -> Dict[str, Any]:
55
+ _ = deep
56
+ return dict(self.params)
22
57
 
23
- def _xgb_cuda_available() -> bool:
24
- # Best-effort check for XGBoost CUDA build; cached to avoid repeated checks.
25
- global _XGB_CUDA_CHECKED, _XGB_HAS_CUDA
26
- if _XGB_CUDA_CHECKED:
27
- return _XGB_HAS_CUDA
28
- _XGB_CUDA_CHECKED = True
29
- if not torch.cuda.is_available():
30
- _XGB_HAS_CUDA = False
31
- return False
32
- try:
33
- build_info = getattr(xgb, "build_info", None)
34
- if callable(build_info):
35
- info = build_info()
36
- for key in ("USE_CUDA", "use_cuda", "cuda"):
37
- if key in info:
38
- val = info[key]
39
- if isinstance(val, str):
40
- _XGB_HAS_CUDA = val.strip().upper() in (
41
- "ON", "YES", "TRUE", "1")
42
- else:
43
- _XGB_HAS_CUDA = bool(val)
44
- return _XGB_HAS_CUDA
45
- except Exception:
46
- pass
47
- try:
48
- has_cuda = getattr(getattr(xgb, "core", None), "_has_cuda_support", None)
49
- if callable(has_cuda):
50
- _XGB_HAS_CUDA = bool(has_cuda())
51
- return _XGB_HAS_CUDA
52
- except Exception:
53
- pass
54
- _XGB_HAS_CUDA = False
55
- return False
58
+ def _select_dmatrix_class(self) -> Any:
59
+ if self.use_gpu and hasattr(xgb, "DeviceQuantileDMatrix"):
60
+ return xgb.DeviceQuantileDMatrix
61
+ if hasattr(xgb, "QuantileDMatrix"):
62
+ return xgb.QuantileDMatrix
63
+ return xgb.DMatrix
56
64
 
65
+ def _build_dmatrix(self, X, y=None, weight=None) -> xgb.DMatrix:
66
+ if isinstance(X, (str, os.PathLike)):
67
+ raise ValueError(
68
+ "External-memory DMatrix is disabled; pass in-memory data instead."
69
+ )
70
+ if isinstance(X, xgb.DMatrix):
71
+ raise ValueError(
72
+ "DMatrix inputs are disabled; pass raw in-memory data instead."
73
+ )
74
+ dmatrix_cls = self._select_dmatrix_class()
75
+ kwargs: Dict[str, Any] = {}
76
+ if y is not None:
77
+ kwargs["label"] = y
78
+ if weight is not None:
79
+ kwargs["weight"] = weight
80
+ if bool(self.params.get("enable_categorical", False)):
81
+ kwargs["enable_categorical"] = True
82
+ try:
83
+ return dmatrix_cls(X, **kwargs)
84
+ except TypeError:
85
+ kwargs.pop("enable_categorical", None)
86
+ return dmatrix_cls(X, **kwargs)
87
+ except Exception:
88
+ if dmatrix_cls is not xgb.DMatrix:
89
+ return xgb.DMatrix(X, **kwargs)
90
+ raise
91
+
92
+ def _resolve_train_params(self) -> Dict[str, Any]:
93
+ params = dict(self.params)
94
+ if not self.use_gpu:
95
+ params["tree_method"] = "hist"
96
+ params["predictor"] = "cpu_predictor"
97
+ params.pop("gpu_id", None)
98
+ return params
99
+
100
+ def _train_booster(
101
+ self,
102
+ X,
103
+ y,
104
+ *,
105
+ sample_weight=None,
106
+ eval_set=None,
107
+ sample_weight_eval_set=None,
108
+ early_stopping_rounds: Optional[int] = None,
109
+ verbose: bool = False,
110
+ ) -> None:
111
+ params = self._resolve_train_params()
112
+ num_boost_round = int(params.pop("n_estimators", 100))
113
+ dtrain = self._build_dmatrix(X, y, sample_weight)
114
+ evals = []
115
+ if eval_set:
116
+ weights = sample_weight_eval_set or []
117
+ for idx, (X_val, y_val) in enumerate(eval_set):
118
+ w_val = weights[idx] if idx < len(weights) else None
119
+ dval = self._build_dmatrix(X_val, y_val, w_val)
120
+ evals.append((dval, f"val{idx}"))
121
+ self._booster = xgb.train(
122
+ params,
123
+ dtrain,
124
+ num_boost_round=num_boost_round,
125
+ evals=evals,
126
+ early_stopping_rounds=early_stopping_rounds,
127
+ verbose_eval=verbose,
128
+ )
129
+ self.best_iteration = getattr(self._booster, "best_iteration", None)
130
+
131
+ def fit(self, X, y, **fit_kwargs) -> "_XGBDMatrixWrapper":
132
+ sample_weight = fit_kwargs.pop("sample_weight", None)
133
+ eval_set = fit_kwargs.pop("eval_set", None)
134
+ sample_weight_eval_set = fit_kwargs.pop("sample_weight_eval_set", None)
135
+ early_stopping_rounds = fit_kwargs.pop("early_stopping_rounds", None)
136
+ verbose = bool(fit_kwargs.pop("verbose", False))
137
+ fit_kwargs.pop("eval_metric", None)
138
+ try:
139
+ self._train_booster(
140
+ X,
141
+ y,
142
+ sample_weight=sample_weight,
143
+ eval_set=eval_set,
144
+ sample_weight_eval_set=sample_weight_eval_set,
145
+ early_stopping_rounds=early_stopping_rounds,
146
+ verbose=verbose,
147
+ )
148
+ except Exception as exc:
149
+ if self.use_gpu and self.allow_cpu_fallback and _is_oom_error(exc):
150
+ _log("[XGBoost] GPU OOM detected; retrying with CPU.", flush=True)
151
+ self.use_gpu = False
152
+ self._train_booster(
153
+ X,
154
+ y,
155
+ sample_weight=sample_weight,
156
+ eval_set=eval_set,
157
+ sample_weight_eval_set=sample_weight_eval_set,
158
+ early_stopping_rounds=early_stopping_rounds,
159
+ verbose=verbose,
160
+ )
161
+ else:
162
+ raise
163
+ return self
164
+
165
+ def _resolve_iteration_range(self) -> Optional[Tuple[int, int]]:
166
+ if self.best_iteration is None:
167
+ return None
168
+ return (0, int(self.best_iteration) + 1)
169
+
170
+ def _predict_raw(self, X) -> np.ndarray:
171
+ if self._booster is None:
172
+ raise RuntimeError("Booster not trained.")
173
+ dtest = self._build_dmatrix(X)
174
+ iteration_range = self._resolve_iteration_range()
175
+ if iteration_range is None:
176
+ return self._booster.predict(dtest)
177
+ try:
178
+ return self._booster.predict(dtest, iteration_range=iteration_range)
179
+ except TypeError:
180
+ return self._booster.predict(dtest, ntree_limit=iteration_range[1])
181
+
182
+ def predict(self, X, **_kwargs) -> np.ndarray:
183
+ pred = self._predict_raw(X)
184
+ if self.task_type == "classification":
185
+ if pred.ndim == 1:
186
+ return (pred > 0.5).astype(int)
187
+ return np.argmax(pred, axis=1)
188
+ return pred
189
+
190
+ def predict_proba(self, X, **_kwargs) -> np.ndarray:
191
+ pred = self._predict_raw(X)
192
+ if pred.ndim == 1:
193
+ return np.column_stack([1 - pred, pred])
194
+ return pred
195
+
196
+ def get_booster(self) -> Optional[xgb.Booster]:
197
+ return self._booster
198
+
199
+
200
+ def _xgb_cuda_available() -> bool:
201
+ # Best-effort check for XGBoost CUDA build; cached to avoid repeated checks.
202
+ global _XGB_CUDA_CHECKED, _XGB_HAS_CUDA
203
+ if _XGB_CUDA_CHECKED:
204
+ return _XGB_HAS_CUDA
205
+ _XGB_CUDA_CHECKED = True
206
+ if not torch.cuda.is_available():
207
+ _XGB_HAS_CUDA = False
208
+ return False
209
+ try:
210
+ build_info = getattr(xgb, "build_info", None)
211
+ if callable(build_info):
212
+ info = build_info()
213
+ for key in ("USE_CUDA", "use_cuda", "cuda"):
214
+ if key in info:
215
+ val = info[key]
216
+ if isinstance(val, str):
217
+ _XGB_HAS_CUDA = val.strip().upper() in (
218
+ "ON", "YES", "TRUE", "1")
219
+ else:
220
+ _XGB_HAS_CUDA = bool(val)
221
+ return _XGB_HAS_CUDA
222
+ except Exception:
223
+ pass
224
+ try:
225
+ has_cuda = getattr(getattr(xgb, "core", None), "_has_cuda_support", None)
226
+ if callable(has_cuda):
227
+ _XGB_HAS_CUDA = bool(has_cuda())
228
+ return _XGB_HAS_CUDA
229
+ except Exception:
230
+ pass
231
+ _XGB_HAS_CUDA = False
232
+ return False
233
+
57
234
  class XGBTrainer(TrainerBase):
58
- def __init__(self, context: "BayesOptModel") -> None:
59
- super().__init__(context, 'Xgboost', 'Xgboost')
60
- self.model: Optional[xgb.XGBModel] = None
61
- self._xgb_use_gpu = False
62
- self._xgb_gpu_warned = False
235
+ def __init__(self, context: "BayesOptModel") -> None:
236
+ super().__init__(context, 'Xgboost', 'Xgboost')
237
+ self.model: Optional[xgb.XGBModel] = None
238
+ self._xgb_use_gpu = False
239
+ self._xgb_gpu_warned = False
240
+
241
+ def _build_sklearn_estimator(self, params: Dict[str, Any]) -> xgb.XGBModel:
242
+ if self.ctx.task_type == 'classification':
243
+ return xgb.XGBClassifier(**params)
244
+ return xgb.XGBRegressor(**params)
63
245
 
64
246
  def _build_estimator(self) -> xgb.XGBModel:
65
247
  use_gpu = bool(self.ctx.use_gpu and _xgb_cuda_available())
@@ -72,276 +254,296 @@ class XGBTrainer(TrainerBase):
72
254
  enable_categorical=True,
73
255
  predictor='gpu_predictor' if use_gpu else 'cpu_predictor'
74
256
  )
75
- if self.ctx.use_gpu and not use_gpu and not self._xgb_gpu_warned:
76
- print(
77
- "[XGBoost] CUDA requested but not available; falling back to CPU.",
78
- flush=True,
79
- )
80
- self._xgb_gpu_warned = True
257
+ if self.ctx.use_gpu and not use_gpu and not self._xgb_gpu_warned:
258
+ _log(
259
+ "[XGBoost] CUDA requested but not available; falling back to CPU.",
260
+ flush=True,
261
+ )
262
+ self._xgb_gpu_warned = True
81
263
  if use_gpu:
82
- params['gpu_id'] = 0
83
- print(f">>> XGBoost using GPU ID: 0 (Single GPU Mode)")
264
+ gpu_id = self._resolve_gpu_id()
265
+ params['gpu_id'] = gpu_id
266
+ _log(f">>> XGBoost using GPU ID: {gpu_id}")
84
267
  eval_metric = self._resolve_eval_metric()
85
268
  if eval_metric is not None:
86
269
  params.setdefault("eval_metric", eval_metric)
87
- if self.ctx.task_type == 'classification':
88
- return xgb.XGBClassifier(**params)
89
- return xgb.XGBRegressor(**params)
90
-
91
- def _resolve_eval_metric(self) -> Optional[Any]:
92
- fit_params = self.ctx.fit_params or {}
93
- eval_metric = fit_params.get("eval_metric")
94
- if eval_metric is None:
95
- return "logloss" if self.ctx.task_type == 'classification' else "rmse"
96
- return eval_metric
270
+ use_dmatrix = bool(getattr(self.config, "xgb_use_dmatrix", True))
271
+ if use_dmatrix:
272
+ return _XGBDMatrixWrapper(
273
+ params,
274
+ task_type=self.ctx.task_type,
275
+ use_gpu=use_gpu,
276
+ )
277
+ return self._build_sklearn_estimator(params)
97
278
 
98
- def _fit_supports_param(self, name: str) -> bool:
279
+ def _resolve_gpu_id(self) -> int:
280
+ gpu_id = getattr(self.config, "xgb_gpu_id", None)
281
+ if gpu_id is None:
282
+ return 0
99
283
  try:
100
- fit = xgb.XGBClassifier.fit if self.ctx.task_type == 'classification' else xgb.XGBRegressor.fit
101
- return name in inspect.signature(fit).parameters
284
+ return int(gpu_id)
102
285
  except (TypeError, ValueError):
103
- return True
104
-
105
- def _resolve_early_stopping_rounds(self, n_estimators: int) -> int:
106
- n_estimators = max(1, int(n_estimators))
107
- base = max(5, n_estimators // 10)
108
- return min(50, base)
109
-
110
- def _build_fit_kwargs(self,
111
- w_train,
112
- X_val=None,
113
- y_val=None,
114
- w_val=None,
115
- n_estimators: Optional[int] = None) -> Dict[str, Any]:
116
- supports_early = self._fit_supports_param("early_stopping_rounds")
117
- fit_kwargs = dict(self.ctx.fit_params or {})
118
- fit_kwargs.pop("sample_weight", None)
119
- fit_kwargs.pop("eval_metric", None)
120
- fit_kwargs["sample_weight"] = w_train
121
-
122
- if "eval_set" not in fit_kwargs and X_val is not None and y_val is not None:
123
- fit_kwargs["eval_set"] = [(X_val, y_val)]
124
- if w_val is not None:
125
- fit_kwargs["sample_weight_eval_set"] = [w_val]
126
-
127
- if (
128
- supports_early
129
- and "early_stopping_rounds" not in fit_kwargs
130
- and "eval_set" in fit_kwargs
131
- ):
132
- rounds = self._resolve_early_stopping_rounds(n_estimators or 100)
133
- fit_kwargs["early_stopping_rounds"] = rounds
134
- if not supports_early:
135
- fit_kwargs.pop("early_stopping_rounds", None)
136
-
137
- fit_kwargs.setdefault("verbose", False)
138
- return fit_kwargs
286
+ return 0
139
287
 
140
- def ensemble_predict(self, k: int) -> None:
141
- if not self.best_params:
142
- raise RuntimeError("Run tune() first to obtain best XGB parameters.")
143
- k = max(2, int(k))
144
- X_all = self.ctx.train_data[self.ctx.factor_nmes]
145
- y_all = self.ctx.train_data[self.ctx.resp_nme].values
146
- w_all = self.ctx.train_data[self.ctx.weight_nme].values
147
- X_test = self.ctx.test_data[self.ctx.factor_nmes]
148
- n_samples = len(X_all)
149
- split_iter, _ = self._resolve_ensemble_splits(X_all, k=k)
150
- if split_iter is None:
151
- print(
152
- f"[XGB Ensemble] unable to build CV split (n_samples={n_samples}); skip ensemble.",
153
- flush=True,
154
- )
288
+ def _maybe_cleanup_gpu(self) -> None:
289
+ if not bool(getattr(self.config, "xgb_cleanup_per_fold", False)):
155
290
  return
156
- preds_train_sum = np.zeros(n_samples, dtype=np.float64)
157
- preds_test_sum = np.zeros(len(X_test), dtype=np.float64)
158
-
159
- split_count = 0
160
- for train_idx, val_idx in split_iter:
161
- X_train = X_all.iloc[train_idx]
162
- y_train = y_all[train_idx]
163
- w_train = w_all[train_idx]
164
- X_val = X_all.iloc[val_idx]
165
- y_val = y_all[val_idx]
166
- w_val = w_all[val_idx]
167
-
168
- clf = self._build_estimator()
169
- clf.set_params(**self.best_params)
170
- fit_kwargs = self._build_fit_kwargs(
171
- w_train=w_train,
172
- X_val=X_val,
173
- y_val=y_val,
174
- w_val=w_val,
175
- n_estimators=self.best_params.get("n_estimators", 100),
176
- )
177
- clf.fit(X_train, y_train, **fit_kwargs)
178
-
179
- if self.ctx.task_type == 'classification':
180
- pred_train = clf.predict_proba(X_all)[:, 1]
181
- pred_test = clf.predict_proba(X_test)[:, 1]
182
- else:
183
- pred_train = clf.predict(X_all)
184
- pred_test = clf.predict(X_test)
185
- preds_train_sum += np.asarray(pred_train, dtype=np.float64)
186
- preds_test_sum += np.asarray(pred_test, dtype=np.float64)
187
- self._clean_gpu()
291
+ synchronize = bool(getattr(self.config, "xgb_cleanup_synchronize", False))
292
+ self._clean_gpu(synchronize=synchronize)
293
+
294
+ def _resolve_eval_metric(self) -> Optional[Any]:
295
+ fit_params = self.ctx.fit_params or {}
296
+ eval_metric = fit_params.get("eval_metric")
297
+ if eval_metric is None:
298
+ return "logloss" if self.ctx.task_type == 'classification' else "rmse"
299
+ return eval_metric
300
+
301
+ def _fit_supports_param(self, name: str) -> bool:
302
+ try:
303
+ fit = xgb.XGBClassifier.fit if self.ctx.task_type == 'classification' else xgb.XGBRegressor.fit
304
+ return name in inspect.signature(fit).parameters
305
+ except (TypeError, ValueError):
306
+ return True
307
+
308
+ def _resolve_early_stopping_rounds(self, n_estimators: int) -> int:
309
+ n_estimators = max(1, int(n_estimators))
310
+ base = max(5, n_estimators // 10)
311
+ return min(50, base)
312
+
313
+ def _build_fit_kwargs(self,
314
+ w_train,
315
+ X_val=None,
316
+ y_val=None,
317
+ w_val=None,
318
+ n_estimators: Optional[int] = None) -> Dict[str, Any]:
319
+ supports_early = self._fit_supports_param("early_stopping_rounds")
320
+ fit_kwargs = dict(self.ctx.fit_params or {})
321
+ fit_kwargs.pop("sample_weight", None)
322
+ fit_kwargs.pop("eval_metric", None)
323
+ fit_kwargs["sample_weight"] = w_train
324
+
325
+ if "eval_set" not in fit_kwargs and X_val is not None and y_val is not None:
326
+ fit_kwargs["eval_set"] = [(X_val, y_val)]
327
+ if w_val is not None:
328
+ fit_kwargs["sample_weight_eval_set"] = [w_val]
329
+
330
+ if (
331
+ supports_early
332
+ and "early_stopping_rounds" not in fit_kwargs
333
+ and "eval_set" in fit_kwargs
334
+ ):
335
+ rounds = self._resolve_early_stopping_rounds(n_estimators or 100)
336
+ fit_kwargs["early_stopping_rounds"] = rounds
337
+ if not supports_early:
338
+ fit_kwargs.pop("early_stopping_rounds", None)
339
+
340
+ fit_kwargs.setdefault("verbose", False)
341
+ return fit_kwargs
342
+
343
+ def ensemble_predict(self, k: int) -> None:
344
+ if not self.best_params:
345
+ raise RuntimeError("Run tune() first to obtain best XGB parameters.")
346
+ k = max(2, int(k))
347
+ X_all = self.ctx.train_data[self.ctx.factor_nmes]
348
+ y_all = self.ctx.train_data[self.ctx.resp_nme].values
349
+ w_all = self.ctx.train_data[self.ctx.weight_nme].values
350
+ X_test = self.ctx.test_data[self.ctx.factor_nmes]
351
+ n_samples = len(X_all)
352
+ split_iter, _ = self._resolve_ensemble_splits(X_all, k=k)
353
+ if split_iter is None:
354
+ _log(
355
+ f"[XGB Ensemble] unable to build CV split (n_samples={n_samples}); skip ensemble.",
356
+ flush=True,
357
+ )
358
+ return
359
+ preds_train_sum = np.zeros(n_samples, dtype=np.float64)
360
+ preds_test_sum = np.zeros(len(X_test), dtype=np.float64)
361
+
362
+ split_count = 0
363
+ for train_idx, val_idx in split_iter:
364
+ X_train = X_all.iloc[train_idx]
365
+ y_train = y_all[train_idx]
366
+ w_train = w_all[train_idx]
367
+ X_val = X_all.iloc[val_idx]
368
+ y_val = y_all[val_idx]
369
+ w_val = w_all[val_idx]
370
+
371
+ clf = self._build_estimator()
372
+ clf.set_params(**self.best_params)
373
+ fit_kwargs = self._build_fit_kwargs(
374
+ w_train=w_train,
375
+ X_val=X_val,
376
+ y_val=y_val,
377
+ w_val=w_val,
378
+ n_estimators=self.best_params.get("n_estimators", 100),
379
+ )
380
+ clf.fit(X_train, y_train, **fit_kwargs)
381
+
382
+ if self.ctx.task_type == 'classification':
383
+ pred_train = clf.predict_proba(X_all)[:, 1]
384
+ pred_test = clf.predict_proba(X_test)[:, 1]
385
+ else:
386
+ pred_train = clf.predict(X_all)
387
+ pred_test = clf.predict(X_test)
388
+ preds_train_sum += np.asarray(pred_train, dtype=np.float64)
389
+ preds_test_sum += np.asarray(pred_test, dtype=np.float64)
390
+ self._maybe_cleanup_gpu()
188
391
  split_count += 1
189
-
190
- if split_count < 1:
191
- print(
192
- f"[XGB Ensemble] no CV splits generated; skip ensemble.",
193
- flush=True,
194
- )
195
- return
196
- preds_train = preds_train_sum / float(split_count)
197
- preds_test = preds_test_sum / float(split_count)
198
- self._cache_predictions("xgb", preds_train, preds_test)
199
-
200
- def cross_val(self, trial: optuna.trial.Trial) -> float:
201
- learning_rate = trial.suggest_float(
202
- 'learning_rate', 1e-5, 1e-1, log=True)
203
- gamma = trial.suggest_float('gamma', 0, 10000)
204
- max_depth_max = max(
205
- 3, int(getattr(self.config, "xgb_max_depth_max", 25)))
206
- n_estimators_max = max(
207
- 10, int(getattr(self.config, "xgb_n_estimators_max", 500)))
208
- max_depth = trial.suggest_int('max_depth', 3, max_depth_max)
209
- n_estimators = trial.suggest_int(
210
- 'n_estimators', 10, n_estimators_max, step=10)
211
- min_child_weight = trial.suggest_int(
212
- 'min_child_weight', 100, 10000, step=100)
213
- reg_alpha = trial.suggest_float('reg_alpha', 1e-10, 1, log=True)
214
- reg_lambda = trial.suggest_float('reg_lambda', 1e-10, 1, log=True)
215
- if trial is not None:
216
- print(
217
- f"[Optuna][Xgboost] trial_id={trial.number} max_depth={max_depth} "
218
- f"n_estimators={n_estimators}",
219
- flush=True,
220
- )
221
- if max_depth >= 20 and n_estimators >= 300:
222
- raise optuna.TrialPruned(
223
- "XGB config is likely too slow (max_depth>=20 & n_estimators>=300)")
224
- clf = self._build_estimator()
225
- params = {
226
- 'learning_rate': learning_rate,
227
- 'gamma': gamma,
228
- 'max_depth': max_depth,
229
- 'n_estimators': n_estimators,
230
- 'min_child_weight': min_child_weight,
231
- 'reg_alpha': reg_alpha,
232
- 'reg_lambda': reg_lambda
233
- }
234
- loss_name = getattr(self.ctx, "loss_name", "tweedie")
235
- tweedie_variance_power = None
236
- if self.ctx.task_type != 'classification':
237
- if loss_name == "tweedie":
238
- tweedie_variance_power = trial.suggest_float(
239
- 'tweedie_variance_power', 1, 2)
240
- params['tweedie_variance_power'] = tweedie_variance_power
241
- elif loss_name == "poisson":
242
- tweedie_variance_power = 1.0
243
- elif loss_name == "gamma":
244
- tweedie_variance_power = 2.0
245
- X_all = self.ctx.train_data[self.ctx.factor_nmes]
246
- y_all = self.ctx.train_data[self.ctx.resp_nme].values
247
- w_all = self.ctx.train_data[self.ctx.weight_nme].values
248
-
249
- losses: List[float] = []
250
- for train_idx, val_idx in self.ctx.cv.split(X_all):
251
- X_train = X_all.iloc[train_idx]
252
- y_train = y_all[train_idx]
253
- w_train = w_all[train_idx]
254
- X_val = X_all.iloc[val_idx]
255
- y_val = y_all[val_idx]
256
- w_val = w_all[val_idx]
257
-
258
- clf = self._build_estimator()
259
- clf.set_params(**params)
260
- fit_kwargs = self._build_fit_kwargs(
261
- w_train=w_train,
262
- X_val=X_val,
263
- y_val=y_val,
264
- w_val=w_val,
265
- n_estimators=n_estimators,
266
- )
267
- clf.fit(X_train, y_train, **fit_kwargs)
268
-
269
- if self.ctx.task_type == 'classification':
270
- y_pred = clf.predict_proba(X_val)[:, 1]
271
- y_pred = np.clip(y_pred, EPS, 1 - EPS)
272
- loss = log_loss(y_val, y_pred, sample_weight=w_val)
273
- else:
274
- y_pred = clf.predict(X_val)
275
- loss = regression_loss(
276
- y_val,
277
- y_pred,
278
- w_val,
279
- loss_name=loss_name,
280
- tweedie_power=tweedie_variance_power,
281
- )
282
- losses.append(float(loss))
283
- self._clean_gpu()
392
+
393
+ if split_count < 1:
394
+ _log(
395
+ f"[XGB Ensemble] no CV splits generated; skip ensemble.",
396
+ flush=True,
397
+ )
398
+ return
399
+ preds_train = preds_train_sum / float(split_count)
400
+ preds_test = preds_test_sum / float(split_count)
401
+ self._cache_predictions("xgb", preds_train, preds_test)
402
+
403
+ def cross_val(self, trial: optuna.trial.Trial) -> float:
404
+ learning_rate = trial.suggest_float(
405
+ 'learning_rate', 1e-5, 1e-1, log=True)
406
+ gamma = trial.suggest_float('gamma', 0, 10000)
407
+ max_depth_max = max(
408
+ 3, int(getattr(self.config, "xgb_max_depth_max", 25)))
409
+ n_estimators_max = max(
410
+ 10, int(getattr(self.config, "xgb_n_estimators_max", 500)))
411
+ max_depth = trial.suggest_int('max_depth', 3, max_depth_max)
412
+ n_estimators = trial.suggest_int(
413
+ 'n_estimators', 10, n_estimators_max, step=10)
414
+ min_child_weight = trial.suggest_int(
415
+ 'min_child_weight', 100, 10000, step=100)
416
+ reg_alpha = trial.suggest_float('reg_alpha', 1e-10, 1, log=True)
417
+ reg_lambda = trial.suggest_float('reg_lambda', 1e-10, 1, log=True)
418
+ if trial is not None:
419
+ _log(
420
+ f"[Optuna][Xgboost] trial_id={trial.number} max_depth={max_depth} "
421
+ f"n_estimators={n_estimators}",
422
+ flush=True,
423
+ )
424
+ if max_depth >= 20 and n_estimators >= 300:
425
+ raise optuna.TrialPruned(
426
+ "XGB config is likely too slow (max_depth>=20 & n_estimators>=300)")
427
+ clf = self._build_estimator()
428
+ params = {
429
+ 'learning_rate': learning_rate,
430
+ 'gamma': gamma,
431
+ 'max_depth': max_depth,
432
+ 'n_estimators': n_estimators,
433
+ 'min_child_weight': min_child_weight,
434
+ 'reg_alpha': reg_alpha,
435
+ 'reg_lambda': reg_lambda
436
+ }
437
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
438
+ tweedie_variance_power = None
439
+ if self.ctx.task_type != 'classification':
440
+ if loss_name == "tweedie":
441
+ tweedie_variance_power = trial.suggest_float(
442
+ 'tweedie_variance_power', 1, 2)
443
+ params['tweedie_variance_power'] = tweedie_variance_power
444
+ elif loss_name == "poisson":
445
+ tweedie_variance_power = 1.0
446
+ elif loss_name == "gamma":
447
+ tweedie_variance_power = 2.0
448
+ X_all = self.ctx.train_data[self.ctx.factor_nmes]
449
+ y_all = self.ctx.train_data[self.ctx.resp_nme].values
450
+ w_all = self.ctx.train_data[self.ctx.weight_nme].values
451
+
452
+ losses: List[float] = []
453
+ for train_idx, val_idx in self.ctx.cv.split(X_all):
454
+ X_train = X_all.iloc[train_idx]
455
+ y_train = y_all[train_idx]
456
+ w_train = w_all[train_idx]
457
+ X_val = X_all.iloc[val_idx]
458
+ y_val = y_all[val_idx]
459
+ w_val = w_all[val_idx]
460
+
461
+ clf = self._build_estimator()
462
+ clf.set_params(**params)
463
+ fit_kwargs = self._build_fit_kwargs(
464
+ w_train=w_train,
465
+ X_val=X_val,
466
+ y_val=y_val,
467
+ w_val=w_val,
468
+ n_estimators=n_estimators,
469
+ )
470
+ clf.fit(X_train, y_train, **fit_kwargs)
471
+
472
+ if self.ctx.task_type == 'classification':
473
+ y_pred = clf.predict_proba(X_val)[:, 1]
474
+ y_pred = np.clip(y_pred, EPS, 1 - EPS)
475
+ loss = log_loss(y_val, y_pred, sample_weight=w_val)
476
+ else:
477
+ y_pred = clf.predict(X_val)
478
+ loss = regression_loss(
479
+ y_val,
480
+ y_pred,
481
+ w_val,
482
+ loss_name=loss_name,
483
+ tweedie_power=tweedie_variance_power,
484
+ )
485
+ losses.append(float(loss))
486
+ self._maybe_cleanup_gpu()
284
487
 
285
488
  return float(np.mean(losses))
286
-
287
- def train(self) -> None:
288
- if not self.best_params:
289
- raise RuntimeError("Run tune() first to obtain best XGB parameters.")
290
- self.model = self._build_estimator()
291
- self.model.set_params(**self.best_params)
292
- use_refit = bool(getattr(self.ctx.config, "final_refit", True))
293
- predict_fn = None
294
- if self.ctx.task_type == 'classification':
295
- def _predict_proba(X, **_kwargs):
296
- return self.model.predict_proba(X)[:, 1]
297
- predict_fn = _predict_proba
298
- X_all = self.ctx.train_data[self.ctx.factor_nmes]
299
- y_all = self.ctx.train_data[self.ctx.resp_nme].values
300
- w_all = self.ctx.train_data[self.ctx.weight_nme].values
301
-
302
- split = self._resolve_train_val_indices(X_all)
303
- if split is not None:
304
- train_idx, val_idx = split
305
- X_train = X_all.iloc[train_idx]
306
- y_train = y_all[train_idx]
307
- w_train = w_all[train_idx]
308
- X_val = X_all.iloc[val_idx]
309
- y_val = y_all[val_idx]
310
- w_val = w_all[val_idx]
311
- fit_kwargs = self._build_fit_kwargs(
312
- w_train=w_train,
313
- X_val=X_val,
314
- y_val=y_val,
315
- w_val=w_val,
316
- n_estimators=self.best_params.get("n_estimators", 100),
317
- )
318
- self.model.fit(X_train, y_train, **fit_kwargs)
319
- best_iter = getattr(self.model, "best_iteration", None)
320
- if use_refit and best_iter is not None:
321
- refit_model = self._build_estimator()
322
- refit_params = dict(self.best_params)
323
- refit_params["n_estimators"] = int(best_iter) + 1
324
- refit_model.set_params(**refit_params)
325
- refit_kwargs = dict(self.ctx.fit_params or {})
326
- refit_kwargs.setdefault("sample_weight", w_all)
327
- refit_kwargs.pop("eval_set", None)
328
- refit_kwargs.pop("sample_weight_eval_set", None)
329
- refit_kwargs.pop("early_stopping_rounds", None)
330
- refit_kwargs.pop("eval_metric", None)
331
- refit_kwargs.setdefault("verbose", False)
332
- refit_model.fit(X_all, y_all, **refit_kwargs)
333
- self.model = refit_model
334
- else:
335
- fit_kwargs = dict(self.ctx.fit_params or {})
336
- fit_kwargs.setdefault("sample_weight", w_all)
337
- fit_kwargs.pop("eval_metric", None)
338
- self.model.fit(X_all, y_all, **fit_kwargs)
339
-
340
- self.ctx.model_label.append(self.label)
341
- self._predict_and_cache(
342
- self.model,
343
- pred_prefix='xgb',
344
- predict_fn=predict_fn
345
- )
346
- self.ctx.xgb_best = self.model
347
-
489
+
490
+ def train(self) -> None:
491
+ if not self.best_params:
492
+ raise RuntimeError("Run tune() first to obtain best XGB parameters.")
493
+ self.model = self._build_estimator()
494
+ self.model.set_params(**self.best_params)
495
+ use_refit = bool(getattr(self.ctx.config, "final_refit", True))
496
+ predict_fn = None
497
+ if self.ctx.task_type == 'classification':
498
+ def _predict_proba(X, **_kwargs):
499
+ return self.model.predict_proba(X)[:, 1]
500
+ predict_fn = _predict_proba
501
+ X_all = self.ctx.train_data[self.ctx.factor_nmes]
502
+ y_all = self.ctx.train_data[self.ctx.resp_nme].values
503
+ w_all = self.ctx.train_data[self.ctx.weight_nme].values
504
+
505
+ split = self._resolve_train_val_indices(X_all)
506
+ if split is not None:
507
+ train_idx, val_idx = split
508
+ X_train = X_all.iloc[train_idx]
509
+ y_train = y_all[train_idx]
510
+ w_train = w_all[train_idx]
511
+ X_val = X_all.iloc[val_idx]
512
+ y_val = y_all[val_idx]
513
+ w_val = w_all[val_idx]
514
+ fit_kwargs = self._build_fit_kwargs(
515
+ w_train=w_train,
516
+ X_val=X_val,
517
+ y_val=y_val,
518
+ w_val=w_val,
519
+ n_estimators=self.best_params.get("n_estimators", 100),
520
+ )
521
+ self.model.fit(X_train, y_train, **fit_kwargs)
522
+ best_iter = getattr(self.model, "best_iteration", None)
523
+ if use_refit and best_iter is not None:
524
+ refit_model = self._build_estimator()
525
+ refit_params = dict(self.best_params)
526
+ refit_params["n_estimators"] = int(best_iter) + 1
527
+ refit_model.set_params(**refit_params)
528
+ refit_kwargs = dict(self.ctx.fit_params or {})
529
+ refit_kwargs.setdefault("sample_weight", w_all)
530
+ refit_kwargs.pop("eval_set", None)
531
+ refit_kwargs.pop("sample_weight_eval_set", None)
532
+ refit_kwargs.pop("early_stopping_rounds", None)
533
+ refit_kwargs.pop("eval_metric", None)
534
+ refit_kwargs.setdefault("verbose", False)
535
+ refit_model.fit(X_all, y_all, **refit_kwargs)
536
+ self.model = refit_model
537
+ else:
538
+ fit_kwargs = dict(self.ctx.fit_params or {})
539
+ fit_kwargs.setdefault("sample_weight", w_all)
540
+ fit_kwargs.pop("eval_metric", None)
541
+ self.model.fit(X_all, y_all, **fit_kwargs)
542
+
543
+ self.ctx.model_label.append(self.label)
544
+ self._predict_and_cache(
545
+ self.model,
546
+ pred_prefix='xgb',
547
+ predict_fn=predict_fn
548
+ )
549
+ self.ctx.xgb_best = self.model