ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,261 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Any, Dict, List, Optional, Tuple
5
+
6
+ import numpy as np
7
+ import optuna
8
+ import torch
9
+ from sklearn.metrics import log_loss, mean_tweedie_deviance
10
+
11
+ from .trainer_base import TrainerBase
12
+ from ..models import ResNetSklearn
13
+
14
+ class ResNetTrainer(TrainerBase):
15
+ def __init__(self, context: "BayesOptModel") -> None:
16
+ if context.task_type == 'classification':
17
+ super().__init__(context, 'ResNetClassifier', 'ResNet')
18
+ else:
19
+ super().__init__(context, 'ResNet', 'ResNet')
20
+ self.model: Optional[ResNetSklearn] = None
21
+ self.enable_distributed_optuna = bool(context.config.use_resn_ddp)
22
+
23
+ def _resolve_input_dim(self) -> int:
24
+ data = getattr(self.ctx, "train_oht_scl_data", None)
25
+ if data is not None and getattr(self.ctx, "var_nmes", None):
26
+ return int(data[self.ctx.var_nmes].shape[1])
27
+ return int(len(self.ctx.var_nmes or []))
28
+
29
+ def _build_model(self, params: Optional[Dict[str, Any]] = None) -> ResNetSklearn:
30
+ params = params or {}
31
+ power = params.get("tw_power", self.ctx.default_tweedie_power())
32
+ if power is not None:
33
+ power = float(power)
34
+ resn_weight_decay = float(
35
+ params.get(
36
+ "weight_decay",
37
+ getattr(self.ctx.config, "resn_weight_decay", 1e-4),
38
+ )
39
+ )
40
+ return ResNetSklearn(
41
+ model_nme=self.ctx.model_nme,
42
+ input_dim=self._resolve_input_dim(),
43
+ hidden_dim=int(params.get("hidden_dim", 64)),
44
+ block_num=int(params.get("block_num", 2)),
45
+ task_type=self.ctx.task_type,
46
+ epochs=self.ctx.epochs,
47
+ tweedie_power=power,
48
+ learning_rate=float(params.get("learning_rate", 0.01)),
49
+ patience=int(params.get("patience", 10)),
50
+ use_layernorm=True,
51
+ dropout=float(params.get("dropout", 0.1)),
52
+ residual_scale=float(params.get("residual_scale", 0.1)),
53
+ stochastic_depth=float(params.get("stochastic_depth", 0.0)),
54
+ weight_decay=resn_weight_decay,
55
+ use_data_parallel=self.ctx.config.use_resn_data_parallel,
56
+ use_ddp=self.ctx.config.use_resn_ddp
57
+ )
58
+
59
+ # ========= Cross-validation (for BayesOpt) =========
60
+ def cross_val(self, trial: optuna.trial.Trial) -> float:
61
+ # ResNet CV focuses on memory control:
62
+ # - Create a ResNetSklearn per fold and release it immediately after.
63
+ # - Move model to CPU, delete, and call gc/empty_cache after each fold.
64
+ # - Optionally sample part of training data during BayesOpt to reduce memory.
65
+
66
+ base_tw_power = self.ctx.default_tweedie_power()
67
+
68
+ def data_provider():
69
+ data = self.ctx.train_oht_data if self.ctx.train_oht_data is not None else self.ctx.train_oht_scl_data
70
+ assert data is not None, "Preprocessed training data is missing."
71
+ return data[self.ctx.var_nmes], data[self.ctx.resp_nme], data[self.ctx.weight_nme]
72
+
73
+ metric_ctx: Dict[str, Any] = {}
74
+
75
+ def model_builder(params):
76
+ power = params.get("tw_power", base_tw_power)
77
+ metric_ctx["tw_power"] = power
78
+ params_local = dict(params)
79
+ params_local["tw_power"] = power
80
+ return self._build_model(params_local)
81
+
82
+ def preprocess_fn(X_train, X_val):
83
+ X_train_s, X_val_s, _ = self._standardize_fold(
84
+ X_train, X_val, self.ctx.num_features)
85
+ return X_train_s, X_val_s
86
+
87
+ def fit_predict(model, X_train, y_train, w_train, X_val, y_val, w_val, trial_obj):
88
+ model.fit(
89
+ X_train, y_train, w_train,
90
+ X_val, y_val, w_val,
91
+ trial=trial_obj
92
+ )
93
+ return model.predict(X_val)
94
+
95
+ def metric_fn(y_true, y_pred, weight):
96
+ if self.ctx.task_type == 'regression':
97
+ return mean_tweedie_deviance(
98
+ y_true,
99
+ y_pred,
100
+ sample_weight=weight,
101
+ power=metric_ctx.get("tw_power", base_tw_power)
102
+ )
103
+ return log_loss(y_true, y_pred, sample_weight=weight)
104
+
105
+ sample_cap = data_provider()[0]
106
+ max_rows_for_resnet_bo = min(100000, int(len(sample_cap)/5))
107
+
108
+ return self.cross_val_generic(
109
+ trial=trial,
110
+ hyperparameter_space={
111
+ "learning_rate": lambda t: t.suggest_float('learning_rate', 1e-6, 1e-2, log=True),
112
+ "hidden_dim": lambda t: t.suggest_int('hidden_dim', 8, 32, step=2),
113
+ "block_num": lambda t: t.suggest_int('block_num', 2, 10),
114
+ "dropout": lambda t: t.suggest_float('dropout', 0.0, 0.3, step=0.05),
115
+ "residual_scale": lambda t: t.suggest_float('residual_scale', 0.05, 0.3, step=0.05),
116
+ "patience": lambda t: t.suggest_int('patience', 3, 12),
117
+ "stochastic_depth": lambda t: t.suggest_float('stochastic_depth', 0.0, 0.2, step=0.05),
118
+ **({"tw_power": lambda t: t.suggest_float('tw_power', 1.0, 2.0)} if self.ctx.task_type == 'regression' and self.ctx.obj == 'reg:tweedie' else {})
119
+ },
120
+ data_provider=data_provider,
121
+ model_builder=model_builder,
122
+ metric_fn=metric_fn,
123
+ sample_limit=max_rows_for_resnet_bo if len(
124
+ sample_cap) > max_rows_for_resnet_bo > 0 else None,
125
+ preprocess_fn=preprocess_fn,
126
+ fit_predict_fn=fit_predict,
127
+ cleanup_fn=lambda m: getattr(
128
+ getattr(m, "resnet", None), "to", lambda *_args, **_kwargs: None)("cpu")
129
+ )
130
+
131
+ # ========= Train final ResNet with best hyperparameters =========
132
+ def train(self) -> None:
133
+ if not self.best_params:
134
+ raise RuntimeError("Run tune() first to obtain best ResNet parameters.")
135
+
136
+ params = dict(self.best_params)
137
+ use_refit = bool(getattr(self.ctx.config, "final_refit", True))
138
+ data = self.ctx.train_oht_scl_data
139
+ if data is None:
140
+ raise RuntimeError("Missing standardized data for ResNet training.")
141
+ X_all = data[self.ctx.var_nmes]
142
+ y_all = data[self.ctx.resp_nme]
143
+ w_all = data[self.ctx.weight_nme]
144
+
145
+ refit_epochs = None
146
+ split = self._resolve_train_val_indices(X_all)
147
+ if use_refit and split is not None:
148
+ train_idx, val_idx = split
149
+ tmp_model = self._build_model(params)
150
+ tmp_model.fit(
151
+ X_all.iloc[train_idx],
152
+ y_all.iloc[train_idx],
153
+ w_all.iloc[train_idx],
154
+ X_all.iloc[val_idx],
155
+ y_all.iloc[val_idx],
156
+ w_all.iloc[val_idx],
157
+ trial=None,
158
+ )
159
+ refit_epochs = self._resolve_best_epoch(
160
+ getattr(tmp_model, "training_history", None),
161
+ default_epochs=int(self.ctx.epochs),
162
+ )
163
+ getattr(getattr(tmp_model, "resnet", None), "to",
164
+ lambda *_args, **_kwargs: None)("cpu")
165
+ self._clean_gpu()
166
+
167
+ self.model = self._build_model(params)
168
+ if refit_epochs is not None:
169
+ self.model.epochs = int(refit_epochs)
170
+ self.best_params = params
171
+ loss_plot_path = self.output.plot_path(
172
+ f'{self.ctx.model_nme}/loss/loss_{self.ctx.model_nme}_{self.model_name_prefix}.png')
173
+ self.model.loss_curve_path = loss_plot_path
174
+
175
+ self._fit_predict_cache(
176
+ self.model,
177
+ X_all,
178
+ y_all,
179
+ sample_weight=w_all,
180
+ pred_prefix='resn',
181
+ use_oht=True,
182
+ sample_weight_arg='w_train'
183
+ )
184
+
185
+ # Convenience wrapper for external callers.
186
+ self.ctx.resn_best = self.model
187
+
188
+ def ensemble_predict(self, k: int) -> None:
189
+ if not self.best_params:
190
+ raise RuntimeError("Run tune() first to obtain best ResNet parameters.")
191
+ data = self.ctx.train_oht_scl_data
192
+ test_data = self.ctx.test_oht_scl_data
193
+ if data is None or test_data is None:
194
+ raise RuntimeError("Missing standardized data for ResNet ensemble.")
195
+ X_all = data[self.ctx.var_nmes]
196
+ y_all = data[self.ctx.resp_nme]
197
+ w_all = data[self.ctx.weight_nme]
198
+ X_test = test_data[self.ctx.var_nmes]
199
+
200
+ k = max(2, int(k))
201
+ n_samples = len(X_all)
202
+ split_iter, _ = self._resolve_ensemble_splits(X_all, k=k)
203
+ if split_iter is None:
204
+ print(
205
+ f"[ResNet Ensemble] unable to build CV split (n_samples={n_samples}); skip ensemble.",
206
+ flush=True,
207
+ )
208
+ return
209
+ preds_train_sum = np.zeros(n_samples, dtype=np.float64)
210
+ preds_test_sum = np.zeros(len(X_test), dtype=np.float64)
211
+
212
+ split_count = 0
213
+ for train_idx, val_idx in split_iter:
214
+ model = self._build_model(self.best_params)
215
+ model.fit(
216
+ X_all.iloc[train_idx],
217
+ y_all.iloc[train_idx],
218
+ w_all.iloc[train_idx],
219
+ X_all.iloc[val_idx],
220
+ y_all.iloc[val_idx],
221
+ w_all.iloc[val_idx],
222
+ trial=None,
223
+ )
224
+ pred_train = model.predict(X_all)
225
+ pred_test = model.predict(X_test)
226
+ preds_train_sum += np.asarray(pred_train, dtype=np.float64)
227
+ preds_test_sum += np.asarray(pred_test, dtype=np.float64)
228
+ getattr(getattr(model, "resnet", None), "to",
229
+ lambda *_args, **_kwargs: None)("cpu")
230
+ self._clean_gpu()
231
+ split_count += 1
232
+
233
+ if split_count < 1:
234
+ print(
235
+ f"[ResNet Ensemble] no CV splits generated; skip ensemble.",
236
+ flush=True,
237
+ )
238
+ return
239
+ preds_train = preds_train_sum / float(split_count)
240
+ preds_test = preds_test_sum / float(split_count)
241
+ self._cache_predictions("resn", preds_train, preds_test)
242
+
243
+ # ========= Save / Load =========
244
+ # ResNet is saved as state_dict and needs a custom load path.
245
+ # Save logic is implemented in TrainerBase (checks .resnet attribute).
246
+
247
+ def load(self) -> None:
248
+ # Load ResNet weights to the current device to match context.
249
+ path = self.output.model_path(self._get_model_filename())
250
+ if os.path.exists(path):
251
+ resn_loaded = self._build_model(self.best_params)
252
+ state_dict = torch.load(path, map_location='cpu')
253
+ resn_loaded.resnet.load_state_dict(state_dict)
254
+
255
+ self._move_to_device(resn_loaded)
256
+ self.model = resn_loaded
257
+ self.ctx.resn_best = self.model
258
+ else:
259
+ print(f"[ResNetTrainer.load] Model file not found: {path}")
260
+
261
+
@@ -0,0 +1,348 @@
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ from typing import Any, Dict, List, Optional, Tuple
5
+
6
+ import numpy as np
7
+ import optuna
8
+ import torch
9
+ import xgboost as xgb
10
+ from sklearn.metrics import log_loss, mean_tweedie_deviance
11
+
12
+ from .trainer_base import TrainerBase
13
+ from ..utils import EPS
14
+
15
+ _XGB_CUDA_CHECKED = False
16
+ _XGB_HAS_CUDA = False
17
+
18
+ _XGB_CUDA_CHECKED = False
19
+ _XGB_HAS_CUDA = False
20
+
21
+
22
+ def _xgb_cuda_available() -> bool:
23
+ # Best-effort check for XGBoost CUDA build; cached to avoid repeated checks.
24
+ global _XGB_CUDA_CHECKED, _XGB_HAS_CUDA
25
+ if _XGB_CUDA_CHECKED:
26
+ return _XGB_HAS_CUDA
27
+ _XGB_CUDA_CHECKED = True
28
+ if not torch.cuda.is_available():
29
+ _XGB_HAS_CUDA = False
30
+ return False
31
+ try:
32
+ build_info = getattr(xgb, "build_info", None)
33
+ if callable(build_info):
34
+ info = build_info()
35
+ for key in ("USE_CUDA", "use_cuda", "cuda"):
36
+ if key in info:
37
+ val = info[key]
38
+ if isinstance(val, str):
39
+ _XGB_HAS_CUDA = val.strip().upper() in (
40
+ "ON", "YES", "TRUE", "1")
41
+ else:
42
+ _XGB_HAS_CUDA = bool(val)
43
+ return _XGB_HAS_CUDA
44
+ except Exception:
45
+ pass
46
+ try:
47
+ has_cuda = getattr(getattr(xgb, "core", None), "_has_cuda_support", None)
48
+ if callable(has_cuda):
49
+ _XGB_HAS_CUDA = bool(has_cuda())
50
+ return _XGB_HAS_CUDA
51
+ except Exception:
52
+ pass
53
+ _XGB_HAS_CUDA = False
54
+ return False
55
+
56
+ class XGBTrainer(TrainerBase):
57
+ def __init__(self, context: "BayesOptModel") -> None:
58
+ super().__init__(context, 'Xgboost', 'Xgboost')
59
+ self.model: Optional[xgb.XGBModel] = None
60
+ self._xgb_use_gpu = False
61
+ self._xgb_gpu_warned = False
62
+
63
+ def _build_estimator(self) -> xgb.XGBModel:
64
+ use_gpu = bool(self.ctx.use_gpu and _xgb_cuda_available())
65
+ self._xgb_use_gpu = use_gpu
66
+ params = dict(
67
+ objective=self.ctx.obj,
68
+ random_state=self.ctx.rand_seed,
69
+ subsample=0.9,
70
+ tree_method='gpu_hist' if use_gpu else 'hist',
71
+ enable_categorical=True,
72
+ predictor='gpu_predictor' if use_gpu else 'cpu_predictor'
73
+ )
74
+ if self.ctx.use_gpu and not use_gpu and not self._xgb_gpu_warned:
75
+ print(
76
+ "[XGBoost] CUDA requested but not available; falling back to CPU.",
77
+ flush=True,
78
+ )
79
+ self._xgb_gpu_warned = True
80
+ if use_gpu:
81
+ params['gpu_id'] = 0
82
+ print(f">>> XGBoost using GPU ID: 0 (Single GPU Mode)")
83
+ eval_metric = self._resolve_eval_metric()
84
+ if eval_metric is not None:
85
+ params.setdefault("eval_metric", eval_metric)
86
+ if self.ctx.task_type == 'classification':
87
+ return xgb.XGBClassifier(**params)
88
+ return xgb.XGBRegressor(**params)
89
+
90
+ def _resolve_eval_metric(self) -> Optional[Any]:
91
+ fit_params = self.ctx.fit_params or {}
92
+ eval_metric = fit_params.get("eval_metric")
93
+ if eval_metric is None:
94
+ return "logloss" if self.ctx.task_type == 'classification' else "rmse"
95
+ return eval_metric
96
+
97
+ def _fit_supports_param(self, name: str) -> bool:
98
+ try:
99
+ fit = xgb.XGBClassifier.fit if self.ctx.task_type == 'classification' else xgb.XGBRegressor.fit
100
+ return name in inspect.signature(fit).parameters
101
+ except (TypeError, ValueError):
102
+ return True
103
+
104
+ def _resolve_early_stopping_rounds(self, n_estimators: int) -> int:
105
+ n_estimators = max(1, int(n_estimators))
106
+ base = max(5, n_estimators // 10)
107
+ return min(50, base)
108
+
109
+ def _build_fit_kwargs(self,
110
+ w_train,
111
+ X_val=None,
112
+ y_val=None,
113
+ w_val=None,
114
+ n_estimators: Optional[int] = None) -> Dict[str, Any]:
115
+ supports_early = self._fit_supports_param("early_stopping_rounds")
116
+ fit_kwargs = dict(self.ctx.fit_params or {})
117
+ fit_kwargs.pop("sample_weight", None)
118
+ fit_kwargs.pop("eval_metric", None)
119
+ fit_kwargs["sample_weight"] = w_train
120
+
121
+ if "eval_set" not in fit_kwargs and X_val is not None and y_val is not None:
122
+ fit_kwargs["eval_set"] = [(X_val, y_val)]
123
+ if w_val is not None:
124
+ fit_kwargs["sample_weight_eval_set"] = [w_val]
125
+
126
+ if (
127
+ supports_early
128
+ and "early_stopping_rounds" not in fit_kwargs
129
+ and "eval_set" in fit_kwargs
130
+ ):
131
+ rounds = self._resolve_early_stopping_rounds(n_estimators or 100)
132
+ fit_kwargs["early_stopping_rounds"] = rounds
133
+ if not supports_early:
134
+ fit_kwargs.pop("early_stopping_rounds", None)
135
+
136
+ fit_kwargs.setdefault("verbose", False)
137
+ return fit_kwargs
138
+
139
+ def ensemble_predict(self, k: int) -> None:
140
+ if not self.best_params:
141
+ raise RuntimeError("Run tune() first to obtain best XGB parameters.")
142
+ k = max(2, int(k))
143
+ X_all = self.ctx.train_data[self.ctx.factor_nmes]
144
+ y_all = self.ctx.train_data[self.ctx.resp_nme].values
145
+ w_all = self.ctx.train_data[self.ctx.weight_nme].values
146
+ X_test = self.ctx.test_data[self.ctx.factor_nmes]
147
+ n_samples = len(X_all)
148
+ split_iter, _ = self._resolve_ensemble_splits(X_all, k=k)
149
+ if split_iter is None:
150
+ print(
151
+ f"[XGB Ensemble] unable to build CV split (n_samples={n_samples}); skip ensemble.",
152
+ flush=True,
153
+ )
154
+ return
155
+ preds_train_sum = np.zeros(n_samples, dtype=np.float64)
156
+ preds_test_sum = np.zeros(len(X_test), dtype=np.float64)
157
+
158
+ split_count = 0
159
+ for train_idx, val_idx in split_iter:
160
+ X_train = X_all.iloc[train_idx]
161
+ y_train = y_all[train_idx]
162
+ w_train = w_all[train_idx]
163
+ X_val = X_all.iloc[val_idx]
164
+ y_val = y_all[val_idx]
165
+ w_val = w_all[val_idx]
166
+
167
+ clf = self._build_estimator()
168
+ clf.set_params(**self.best_params)
169
+ fit_kwargs = self._build_fit_kwargs(
170
+ w_train=w_train,
171
+ X_val=X_val,
172
+ y_val=y_val,
173
+ w_val=w_val,
174
+ n_estimators=self.best_params.get("n_estimators", 100),
175
+ )
176
+ clf.fit(X_train, y_train, **fit_kwargs)
177
+
178
+ if self.ctx.task_type == 'classification':
179
+ pred_train = clf.predict_proba(X_all)[:, 1]
180
+ pred_test = clf.predict_proba(X_test)[:, 1]
181
+ else:
182
+ pred_train = clf.predict(X_all)
183
+ pred_test = clf.predict(X_test)
184
+ preds_train_sum += np.asarray(pred_train, dtype=np.float64)
185
+ preds_test_sum += np.asarray(pred_test, dtype=np.float64)
186
+ self._clean_gpu()
187
+ split_count += 1
188
+
189
+ if split_count < 1:
190
+ print(
191
+ f"[XGB Ensemble] no CV splits generated; skip ensemble.",
192
+ flush=True,
193
+ )
194
+ return
195
+ preds_train = preds_train_sum / float(split_count)
196
+ preds_test = preds_test_sum / float(split_count)
197
+ self._cache_predictions("xgb", preds_train, preds_test)
198
+
199
+ def cross_val(self, trial: optuna.trial.Trial) -> float:
200
+ learning_rate = trial.suggest_float(
201
+ 'learning_rate', 1e-5, 1e-1, log=True)
202
+ gamma = trial.suggest_float('gamma', 0, 10000)
203
+ max_depth_max = max(
204
+ 3, int(getattr(self.config, "xgb_max_depth_max", 25)))
205
+ n_estimators_max = max(
206
+ 10, int(getattr(self.config, "xgb_n_estimators_max", 500)))
207
+ max_depth = trial.suggest_int('max_depth', 3, max_depth_max)
208
+ n_estimators = trial.suggest_int(
209
+ 'n_estimators', 10, n_estimators_max, step=10)
210
+ min_child_weight = trial.suggest_int(
211
+ 'min_child_weight', 100, 10000, step=100)
212
+ reg_alpha = trial.suggest_float('reg_alpha', 1e-10, 1, log=True)
213
+ reg_lambda = trial.suggest_float('reg_lambda', 1e-10, 1, log=True)
214
+ if trial is not None:
215
+ print(
216
+ f"[Optuna][Xgboost] trial_id={trial.number} max_depth={max_depth} "
217
+ f"n_estimators={n_estimators}",
218
+ flush=True,
219
+ )
220
+ if max_depth >= 20 and n_estimators >= 300:
221
+ raise optuna.TrialPruned(
222
+ "XGB config is likely too slow (max_depth>=20 & n_estimators>=300)")
223
+ clf = self._build_estimator()
224
+ params = {
225
+ 'learning_rate': learning_rate,
226
+ 'gamma': gamma,
227
+ 'max_depth': max_depth,
228
+ 'n_estimators': n_estimators,
229
+ 'min_child_weight': min_child_weight,
230
+ 'reg_alpha': reg_alpha,
231
+ 'reg_lambda': reg_lambda
232
+ }
233
+ tweedie_variance_power = None
234
+ if self.ctx.task_type != 'classification':
235
+ if self.ctx.obj == 'reg:tweedie':
236
+ tweedie_variance_power = trial.suggest_float(
237
+ 'tweedie_variance_power', 1, 2)
238
+ params['tweedie_variance_power'] = tweedie_variance_power
239
+ elif self.ctx.obj == 'count:poisson':
240
+ tweedie_variance_power = 1
241
+ elif self.ctx.obj == 'reg:gamma':
242
+ tweedie_variance_power = 2
243
+ else:
244
+ tweedie_variance_power = 1.5
245
+ X_all = self.ctx.train_data[self.ctx.factor_nmes]
246
+ y_all = self.ctx.train_data[self.ctx.resp_nme].values
247
+ w_all = self.ctx.train_data[self.ctx.weight_nme].values
248
+
249
+ losses: List[float] = []
250
+ for train_idx, val_idx in self.ctx.cv.split(X_all):
251
+ X_train = X_all.iloc[train_idx]
252
+ y_train = y_all[train_idx]
253
+ w_train = w_all[train_idx]
254
+ X_val = X_all.iloc[val_idx]
255
+ y_val = y_all[val_idx]
256
+ w_val = w_all[val_idx]
257
+
258
+ clf = self._build_estimator()
259
+ clf.set_params(**params)
260
+ fit_kwargs = self._build_fit_kwargs(
261
+ w_train=w_train,
262
+ X_val=X_val,
263
+ y_val=y_val,
264
+ w_val=w_val,
265
+ n_estimators=n_estimators,
266
+ )
267
+ clf.fit(X_train, y_train, **fit_kwargs)
268
+
269
+ if self.ctx.task_type == 'classification':
270
+ y_pred = clf.predict_proba(X_val)[:, 1]
271
+ y_pred = np.clip(y_pred, EPS, 1 - EPS)
272
+ loss = log_loss(y_val, y_pred, sample_weight=w_val)
273
+ else:
274
+ y_pred = clf.predict(X_val)
275
+ y_pred_safe = np.maximum(y_pred, EPS)
276
+ loss = mean_tweedie_deviance(
277
+ y_val,
278
+ y_pred_safe,
279
+ sample_weight=w_val,
280
+ power=tweedie_variance_power,
281
+ )
282
+ losses.append(float(loss))
283
+ self._clean_gpu()
284
+
285
+ return float(np.mean(losses))
286
+
287
+ def train(self) -> None:
288
+ if not self.best_params:
289
+ raise RuntimeError("Run tune() first to obtain best XGB parameters.")
290
+ self.model = self._build_estimator()
291
+ self.model.set_params(**self.best_params)
292
+ use_refit = bool(getattr(self.ctx.config, "final_refit", True))
293
+ predict_fn = None
294
+ if self.ctx.task_type == 'classification':
295
+ def _predict_proba(X, **_kwargs):
296
+ return self.model.predict_proba(X)[:, 1]
297
+ predict_fn = _predict_proba
298
+ X_all = self.ctx.train_data[self.ctx.factor_nmes]
299
+ y_all = self.ctx.train_data[self.ctx.resp_nme].values
300
+ w_all = self.ctx.train_data[self.ctx.weight_nme].values
301
+
302
+ split = self._resolve_train_val_indices(X_all)
303
+ if split is not None:
304
+ train_idx, val_idx = split
305
+ X_train = X_all.iloc[train_idx]
306
+ y_train = y_all[train_idx]
307
+ w_train = w_all[train_idx]
308
+ X_val = X_all.iloc[val_idx]
309
+ y_val = y_all[val_idx]
310
+ w_val = w_all[val_idx]
311
+ fit_kwargs = self._build_fit_kwargs(
312
+ w_train=w_train,
313
+ X_val=X_val,
314
+ y_val=y_val,
315
+ w_val=w_val,
316
+ n_estimators=self.best_params.get("n_estimators", 100),
317
+ )
318
+ self.model.fit(X_train, y_train, **fit_kwargs)
319
+ best_iter = getattr(self.model, "best_iteration", None)
320
+ if use_refit and best_iter is not None:
321
+ refit_model = self._build_estimator()
322
+ refit_params = dict(self.best_params)
323
+ refit_params["n_estimators"] = int(best_iter) + 1
324
+ refit_model.set_params(**refit_params)
325
+ refit_kwargs = dict(self.ctx.fit_params or {})
326
+ refit_kwargs.setdefault("sample_weight", w_all)
327
+ refit_kwargs.pop("eval_set", None)
328
+ refit_kwargs.pop("sample_weight_eval_set", None)
329
+ refit_kwargs.pop("early_stopping_rounds", None)
330
+ refit_kwargs.pop("eval_metric", None)
331
+ refit_kwargs.setdefault("verbose", False)
332
+ refit_model.fit(X_all, y_all, **refit_kwargs)
333
+ self.model = refit_model
334
+ else:
335
+ fit_kwargs = dict(self.ctx.fit_params or {})
336
+ fit_kwargs.setdefault("sample_weight", w_all)
337
+ fit_kwargs.pop("eval_metric", None)
338
+ self.model.fit(X_all, y_all, **fit_kwargs)
339
+
340
+ self.ctx.model_label.append(self.label)
341
+ self._predict_and_cache(
342
+ self.model,
343
+ pred_prefix='xgb',
344
+ predict_fn=predict_fn
345
+ )
346
+ self.ctx.xgb_best = self.model
347
+
348
+
@@ -38,8 +38,8 @@ from torch.utils.data import DataLoader, DistributedSampler
38
38
 
39
39
  # Optional: unify plotting with shared plotting package
40
40
  try:
41
- from ..plotting import curves as plot_curves_common
42
- from ..plotting.diagnostics import plot_loss_curve as plot_loss_curve_common
41
+ from ...plotting import curves as plot_curves_common
42
+ from ...plotting.diagnostics import plot_loss_curve as plot_loss_curve_common
43
43
  except Exception: # pragma: no cover
44
44
  try:
45
45
  from ins_pricing.plotting import curves as plot_curves_common