ins-pricing 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ins_pricing/README.md +74 -56
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +52 -50
  4. ins_pricing/cli/BayesOpt_incremental.py +832 -898
  5. ins_pricing/cli/Explain_Run.py +31 -23
  6. ins_pricing/cli/Explain_entry.py +532 -579
  7. ins_pricing/cli/Pricing_Run.py +31 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +1440 -1438
  9. ins_pricing/cli/utils/cli_common.py +256 -256
  10. ins_pricing/cli/utils/cli_config.py +375 -375
  11. ins_pricing/cli/utils/import_resolver.py +382 -365
  12. ins_pricing/cli/utils/notebook_utils.py +340 -340
  13. ins_pricing/cli/watchdog_run.py +209 -201
  14. ins_pricing/frontend/README.md +573 -419
  15. ins_pricing/frontend/__init__.py +10 -10
  16. ins_pricing/frontend/config_builder.py +1 -0
  17. ins_pricing/frontend/example_workflows.py +1 -1
  18. ins_pricing/governance/__init__.py +20 -20
  19. ins_pricing/governance/release.py +159 -159
  20. ins_pricing/modelling/README.md +67 -0
  21. ins_pricing/modelling/__init__.py +147 -92
  22. ins_pricing/modelling/bayesopt/README.md +59 -0
  23. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  24. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -550
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -962
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  29. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
  32. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
  36. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
  37. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
  38. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
  39. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  40. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  41. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  42. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  43. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  44. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
  45. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  46. ins_pricing/modelling/explain/__init__.py +55 -55
  47. ins_pricing/modelling/explain/metrics.py +27 -174
  48. ins_pricing/modelling/explain/permutation.py +237 -237
  49. ins_pricing/modelling/plotting/__init__.py +40 -36
  50. ins_pricing/modelling/plotting/compat.py +228 -0
  51. ins_pricing/modelling/plotting/curves.py +572 -572
  52. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  53. ins_pricing/modelling/plotting/geo.py +362 -362
  54. ins_pricing/modelling/plotting/importance.py +121 -121
  55. ins_pricing/pricing/__init__.py +27 -27
  56. ins_pricing/production/__init__.py +35 -25
  57. ins_pricing/production/{predict.py → inference.py} +140 -57
  58. ins_pricing/production/monitoring.py +8 -21
  59. ins_pricing/reporting/__init__.py +11 -11
  60. ins_pricing/setup.py +1 -1
  61. ins_pricing/tests/production/test_inference.py +90 -0
  62. ins_pricing/utils/__init__.py +116 -83
  63. ins_pricing/utils/device.py +255 -255
  64. ins_pricing/utils/features.py +53 -0
  65. ins_pricing/utils/io.py +72 -0
  66. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  67. ins_pricing/utils/metrics.py +158 -24
  68. ins_pricing/utils/numerics.py +76 -0
  69. ins_pricing/utils/paths.py +9 -1
  70. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +55 -35
  71. ins_pricing-0.5.0.dist-info/RECORD +131 -0
  72. ins_pricing/CHANGELOG.md +0 -272
  73. ins_pricing/RELEASE_NOTES_0.2.8.md +0 -344
  74. ins_pricing/docs/LOSS_FUNCTIONS.md +0 -78
  75. ins_pricing/docs/modelling/BayesOpt_USAGE.md +0 -945
  76. ins_pricing/docs/modelling/README.md +0 -34
  77. ins_pricing/frontend/QUICKSTART.md +0 -152
  78. ins_pricing/modelling/core/BayesOpt.py +0 -146
  79. ins_pricing/modelling/core/__init__.py +0 -1
  80. ins_pricing/modelling/core/bayesopt/PHASE2_REFACTORING_SUMMARY.md +0 -449
  81. ins_pricing/modelling/core/bayesopt/PHASE3_REFACTORING_SUMMARY.md +0 -406
  82. ins_pricing/modelling/core/bayesopt/REFACTORING_SUMMARY.md +0 -247
  83. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  84. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  85. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  86. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  87. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  88. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  89. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  90. ins_pricing/tests/production/test_predict.py +0 -233
  91. ins_pricing-0.4.4.dist-info/RECORD +0 -137
  92. /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
  93. /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
  94. /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
  95. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
  96. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
@@ -8,9 +8,9 @@ import pandas as pd
8
8
  from sklearn.metrics import log_loss
9
9
  from sklearn.model_selection import GroupKFold, TimeSeriesSplit
10
10
 
11
- from .trainer_base import TrainerBase
12
- from ..models import FTTransformerSklearn
13
- from ..utils.losses import regression_loss
11
+ from ins_pricing.modelling.bayesopt.trainers.trainer_base import TrainerBase
12
+ from ins_pricing.modelling.bayesopt.models import FTTransformerSklearn
13
+ from ins_pricing.utils.losses import regression_loss
14
14
 
15
15
 
16
16
  class FTTrainer(TrainerBase):
@@ -1,198 +1,197 @@
1
- from __future__ import annotations
2
-
3
- from typing import Any, Dict, List, Optional, Tuple
4
-
5
- import numpy as np
6
- import optuna
7
- import pandas as pd
8
- import statsmodels.api as sm
9
- from sklearn.metrics import log_loss
10
-
11
- from .trainer_base import TrainerBase
12
- from ..utils import EPS
13
- from ..utils.losses import regression_loss
14
-
15
- class GLMTrainer(TrainerBase):
16
- def __init__(self, context: "BayesOptModel") -> None:
17
- super().__init__(context, 'GLM', 'GLM')
18
- self.model = None
19
-
20
- def _select_family(self, tweedie_power: Optional[float] = None):
21
- if self.ctx.task_type == 'classification':
22
- return sm.families.Binomial()
23
- loss_name = getattr(self.ctx, "loss_name", "tweedie")
24
- if loss_name == "poisson":
25
- return sm.families.Poisson()
26
- if loss_name == "gamma":
27
- return sm.families.Gamma()
28
- if loss_name in {"mse", "mae"}:
29
- return sm.families.Gaussian()
30
- power = tweedie_power if tweedie_power is not None else 1.5
31
- return sm.families.Tweedie(var_power=power, link=sm.families.links.log())
32
-
33
- def _prepare_design(self, data: pd.DataFrame) -> pd.DataFrame:
34
- # Add intercept to the statsmodels design matrix.
35
- X = data[self.ctx.var_nmes]
36
- return sm.add_constant(X, has_constant='add')
37
-
38
- def _metric_power(self, family, tweedie_power: Optional[float]) -> float:
39
- if isinstance(family, sm.families.Poisson):
40
- return 1.0
41
- if isinstance(family, sm.families.Gamma):
42
- return 2.0
43
- if isinstance(family, sm.families.Tweedie):
44
- return tweedie_power if tweedie_power is not None else getattr(family, 'var_power', 1.5)
45
- return 1.5
46
-
47
- def cross_val(self, trial: optuna.trial.Trial) -> float:
48
- param_space = {
49
- "alpha": lambda t: t.suggest_float('alpha', 1e-6, 1e2, log=True),
50
- "l1_ratio": lambda t: t.suggest_float('l1_ratio', 0.0, 1.0)
51
- }
52
- loss_name = getattr(self.ctx, "loss_name", "tweedie")
53
- if self.ctx.task_type == 'regression' and loss_name == 'tweedie':
54
- param_space["tweedie_power"] = lambda t: t.suggest_float(
55
- 'tweedie_power', 1.0, 2.0)
56
-
57
- def data_provider():
58
- data = self.ctx.train_oht_data if self.ctx.train_oht_data is not None else self.ctx.train_oht_scl_data
59
- assert data is not None, "Preprocessed training data is missing."
60
- return data[self.ctx.var_nmes], data[self.ctx.resp_nme], data[self.ctx.weight_nme]
61
-
62
- def preprocess_fn(X_train, X_val):
63
- X_train_s, X_val_s, _ = self._standardize_fold(
64
- X_train, X_val, self.ctx.num_features)
65
- return self._prepare_design(X_train_s), self._prepare_design(X_val_s)
66
-
67
- metric_ctx: Dict[str, Any] = {}
68
-
69
- def model_builder(params):
70
- family = self._select_family(params.get("tweedie_power"))
71
- metric_ctx["family"] = family
72
- metric_ctx["tweedie_power"] = params.get("tweedie_power")
73
- return {
74
- "family": family,
75
- "alpha": params["alpha"],
76
- "l1_ratio": params["l1_ratio"],
77
- "tweedie_power": params.get("tweedie_power")
78
- }
79
-
80
- def fit_predict(model_cfg, X_train, y_train, w_train, X_val, y_val, w_val, _trial):
81
- glm = sm.GLM(y_train, X_train,
82
- family=model_cfg["family"],
83
- freq_weights=w_train)
84
- result = glm.fit_regularized(
85
- alpha=model_cfg["alpha"],
86
- L1_wt=model_cfg["l1_ratio"],
87
- maxiter=200
88
- )
89
- return result.predict(X_val)
90
-
91
- def metric_fn(y_true, y_pred, weight):
92
- if self.ctx.task_type == 'classification':
93
- y_pred_clipped = np.clip(y_pred, EPS, 1 - EPS)
94
- return log_loss(y_true, y_pred_clipped, sample_weight=weight)
95
- return regression_loss(
96
- y_true,
97
- y_pred,
98
- weight,
99
- loss_name=loss_name,
100
- tweedie_power=metric_ctx.get("tweedie_power"),
101
- )
102
-
103
- return self.cross_val_generic(
104
- trial=trial,
105
- hyperparameter_space=param_space,
106
- data_provider=data_provider,
107
- model_builder=model_builder,
108
- metric_fn=metric_fn,
109
- preprocess_fn=preprocess_fn,
110
- fit_predict_fn=fit_predict
111
- )
112
-
113
- def train(self) -> None:
114
- if not self.best_params:
115
- raise RuntimeError("Run tune() first to obtain best GLM parameters.")
116
- tweedie_power = self.best_params.get('tweedie_power')
117
- family = self._select_family(tweedie_power)
118
-
119
- X_train = self._prepare_design(self.ctx.train_oht_scl_data)
120
- y_train = self.ctx.train_oht_scl_data[self.ctx.resp_nme]
121
- w_train = self.ctx.train_oht_scl_data[self.ctx.weight_nme]
122
-
123
- glm = sm.GLM(y_train, X_train, family=family,
124
- freq_weights=w_train)
125
- self.model = glm.fit_regularized(
126
- alpha=self.best_params['alpha'],
127
- L1_wt=self.best_params['l1_ratio'],
128
- maxiter=300
129
- )
130
-
131
- self.ctx.glm_best = self.model
132
- self.ctx.model_label += [self.label]
133
- self._predict_and_cache(
134
- self.model,
135
- 'glm',
136
- design_fn=lambda train: self._prepare_design(
137
- self.ctx.train_oht_scl_data if train else self.ctx.test_oht_scl_data
138
- )
139
- )
140
-
141
- def ensemble_predict(self, k: int) -> None:
142
- if not self.best_params:
143
- raise RuntimeError("Run tune() first to obtain best GLM parameters.")
144
- k = max(2, int(k))
145
- data = self.ctx.train_oht_scl_data
146
- if data is None:
147
- raise RuntimeError("Missing standardized data for GLM ensemble.")
148
- X_all = data[self.ctx.var_nmes]
149
- y_all = data[self.ctx.resp_nme]
150
- w_all = data[self.ctx.weight_nme]
151
- X_test = self.ctx.test_oht_scl_data
152
- if X_test is None:
153
- raise RuntimeError("Missing standardized test data for GLM ensemble.")
154
-
155
- n_samples = len(X_all)
156
- X_all_design = self._prepare_design(data)
157
- X_test_design = self._prepare_design(X_test)
158
- tweedie_power = self.best_params.get('tweedie_power')
159
- family = self._select_family(tweedie_power)
160
-
161
- split_iter, _ = self._resolve_ensemble_splits(X_all, k=k)
162
- if split_iter is None:
163
- print(
164
- f"[GLM Ensemble] unable to build CV split (n_samples={n_samples}); skip ensemble.",
165
- flush=True,
166
- )
167
- return
168
- preds_train_sum = np.zeros(n_samples, dtype=np.float64)
169
- preds_test_sum = np.zeros(len(X_test_design), dtype=np.float64)
170
-
171
- split_count = 0
172
- for train_idx, _val_idx in split_iter:
173
- X_train = X_all_design.iloc[train_idx]
174
- y_train = y_all.iloc[train_idx]
175
- w_train = w_all.iloc[train_idx]
176
-
177
- glm = sm.GLM(y_train, X_train, family=family, freq_weights=w_train)
178
- result = glm.fit_regularized(
179
- alpha=self.best_params['alpha'],
180
- L1_wt=self.best_params['l1_ratio'],
181
- maxiter=300
182
- )
183
- pred_train = result.predict(X_all_design)
184
- pred_test = result.predict(X_test_design)
185
- preds_train_sum += np.asarray(pred_train, dtype=np.float64)
186
- preds_test_sum += np.asarray(pred_test, dtype=np.float64)
187
- split_count += 1
188
-
189
- if split_count < 1:
190
- print(
191
- f"[GLM Ensemble] no CV splits generated; skip ensemble.",
192
- flush=True,
193
- )
194
- return
195
- preds_train = preds_train_sum / float(split_count)
196
- preds_test = preds_test_sum / float(split_count)
197
- self._cache_predictions("glm", preds_train, preds_test)
198
-
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List, Optional, Tuple
4
+
5
+ import numpy as np
6
+ import optuna
7
+ import pandas as pd
8
+ import statsmodels.api as sm
9
+ from sklearn.metrics import log_loss
10
+
11
+ from ins_pricing.modelling.bayesopt.trainers.trainer_base import TrainerBase
12
+ from ins_pricing.utils import EPS
13
+ from ins_pricing.utils.losses import regression_loss
14
+
15
+ class GLMTrainer(TrainerBase):
16
+ def __init__(self, context: "BayesOptModel") -> None:
17
+ super().__init__(context, 'GLM', 'GLM')
18
+ self.model = None
19
+
20
+ def _select_family(self, tweedie_power: Optional[float] = None):
21
+ if self.ctx.task_type == 'classification':
22
+ return sm.families.Binomial()
23
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
24
+ if loss_name == "poisson":
25
+ return sm.families.Poisson()
26
+ if loss_name == "gamma":
27
+ return sm.families.Gamma()
28
+ if loss_name in {"mse", "mae"}:
29
+ return sm.families.Gaussian()
30
+ power = tweedie_power if tweedie_power is not None else 1.5
31
+ return sm.families.Tweedie(var_power=power, link=sm.families.links.log())
32
+
33
+ def _prepare_design(self, data: pd.DataFrame) -> pd.DataFrame:
34
+ # Add intercept to the statsmodels design matrix.
35
+ X = data[self.ctx.var_nmes]
36
+ return sm.add_constant(X, has_constant='add')
37
+
38
+ def _metric_power(self, family, tweedie_power: Optional[float]) -> float:
39
+ if isinstance(family, sm.families.Poisson):
40
+ return 1.0
41
+ if isinstance(family, sm.families.Gamma):
42
+ return 2.0
43
+ if isinstance(family, sm.families.Tweedie):
44
+ return tweedie_power if tweedie_power is not None else getattr(family, 'var_power', 1.5)
45
+ return 1.5
46
+
47
+ def cross_val(self, trial: optuna.trial.Trial) -> float:
48
+ param_space = {
49
+ "alpha": lambda t: t.suggest_float('alpha', 1e-6, 1e2, log=True),
50
+ "l1_ratio": lambda t: t.suggest_float('l1_ratio', 0.0, 1.0)
51
+ }
52
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
53
+ if self.ctx.task_type == 'regression' and loss_name == 'tweedie':
54
+ param_space["tweedie_power"] = lambda t: t.suggest_float(
55
+ 'tweedie_power', 1.0, 2.0)
56
+
57
+ def data_provider():
58
+ data = self.ctx.train_oht_data if self.ctx.train_oht_data is not None else self.ctx.train_oht_scl_data
59
+ assert data is not None, "Preprocessed training data is missing."
60
+ return data[self.ctx.var_nmes], data[self.ctx.resp_nme], data[self.ctx.weight_nme]
61
+
62
+ def preprocess_fn(X_train, X_val):
63
+ X_train_s, X_val_s, _ = self._standardize_fold(
64
+ X_train, X_val, self.ctx.num_features)
65
+ return self._prepare_design(X_train_s), self._prepare_design(X_val_s)
66
+
67
+ metric_ctx: Dict[str, Any] = {}
68
+
69
+ def model_builder(params):
70
+ family = self._select_family(params.get("tweedie_power"))
71
+ metric_ctx["family"] = family
72
+ metric_ctx["tweedie_power"] = params.get("tweedie_power")
73
+ return {
74
+ "family": family,
75
+ "alpha": params["alpha"],
76
+ "l1_ratio": params["l1_ratio"],
77
+ "tweedie_power": params.get("tweedie_power")
78
+ }
79
+
80
+ def fit_predict(model_cfg, X_train, y_train, w_train, X_val, y_val, w_val, _trial):
81
+ glm = sm.GLM(y_train, X_train,
82
+ family=model_cfg["family"],
83
+ freq_weights=w_train)
84
+ result = glm.fit_regularized(
85
+ alpha=model_cfg["alpha"],
86
+ L1_wt=model_cfg["l1_ratio"],
87
+ maxiter=200
88
+ )
89
+ return result.predict(X_val)
90
+
91
+ def metric_fn(y_true, y_pred, weight):
92
+ if self.ctx.task_type == 'classification':
93
+ y_pred_clipped = np.clip(y_pred, EPS, 1 - EPS)
94
+ return log_loss(y_true, y_pred_clipped, sample_weight=weight)
95
+ return regression_loss(
96
+ y_true,
97
+ y_pred,
98
+ weight,
99
+ loss_name=loss_name,
100
+ tweedie_power=metric_ctx.get("tweedie_power"),
101
+ )
102
+
103
+ return self.cross_val_generic(
104
+ trial=trial,
105
+ hyperparameter_space=param_space,
106
+ data_provider=data_provider,
107
+ model_builder=model_builder,
108
+ metric_fn=metric_fn,
109
+ preprocess_fn=preprocess_fn,
110
+ fit_predict_fn=fit_predict
111
+ )
112
+
113
+ def train(self) -> None:
114
+ if not self.best_params:
115
+ raise RuntimeError("Run tune() first to obtain best GLM parameters.")
116
+ tweedie_power = self.best_params.get('tweedie_power')
117
+ family = self._select_family(tweedie_power)
118
+
119
+ X_train = self._prepare_design(self.ctx.train_oht_scl_data)
120
+ y_train = self.ctx.train_oht_scl_data[self.ctx.resp_nme]
121
+ w_train = self.ctx.train_oht_scl_data[self.ctx.weight_nme]
122
+
123
+ glm = sm.GLM(y_train, X_train, family=family,
124
+ freq_weights=w_train)
125
+ self.model = glm.fit_regularized(
126
+ alpha=self.best_params['alpha'],
127
+ L1_wt=self.best_params['l1_ratio'],
128
+ maxiter=300
129
+ )
130
+
131
+ self.ctx.glm_best = self.model
132
+ self.ctx.model_label += [self.label]
133
+ self._predict_and_cache(
134
+ self.model,
135
+ 'glm',
136
+ design_fn=lambda train: self._prepare_design(
137
+ self.ctx.train_oht_scl_data if train else self.ctx.test_oht_scl_data
138
+ )
139
+ )
140
+
141
+ def ensemble_predict(self, k: int) -> None:
142
+ if not self.best_params:
143
+ raise RuntimeError("Run tune() first to obtain best GLM parameters.")
144
+ k = max(2, int(k))
145
+ data = self.ctx.train_oht_scl_data
146
+ if data is None:
147
+ raise RuntimeError("Missing standardized data for GLM ensemble.")
148
+ X_all = data[self.ctx.var_nmes]
149
+ y_all = data[self.ctx.resp_nme]
150
+ w_all = data[self.ctx.weight_nme]
151
+ X_test = self.ctx.test_oht_scl_data
152
+ if X_test is None:
153
+ raise RuntimeError("Missing standardized test data for GLM ensemble.")
154
+
155
+ n_samples = len(X_all)
156
+ X_all_design = self._prepare_design(data)
157
+ X_test_design = self._prepare_design(X_test)
158
+ tweedie_power = self.best_params.get('tweedie_power')
159
+ family = self._select_family(tweedie_power)
160
+
161
+ split_iter, _ = self._resolve_ensemble_splits(X_all, k=k)
162
+ if split_iter is None:
163
+ print(
164
+ f"[GLM Ensemble] unable to build CV split (n_samples={n_samples}); skip ensemble.",
165
+ flush=True,
166
+ )
167
+ return
168
+ preds_train_sum = np.zeros(n_samples, dtype=np.float64)
169
+ preds_test_sum = np.zeros(len(X_test_design), dtype=np.float64)
170
+
171
+ split_count = 0
172
+ for train_idx, _val_idx in split_iter:
173
+ X_train = X_all_design.iloc[train_idx]
174
+ y_train = y_all.iloc[train_idx]
175
+ w_train = w_all.iloc[train_idx]
176
+
177
+ glm = sm.GLM(y_train, X_train, family=family, freq_weights=w_train)
178
+ result = glm.fit_regularized(
179
+ alpha=self.best_params['alpha'],
180
+ L1_wt=self.best_params['l1_ratio'],
181
+ maxiter=300
182
+ )
183
+ pred_train = result.predict(X_all_design)
184
+ pred_test = result.predict(X_test_design)
185
+ preds_train_sum += np.asarray(pred_train, dtype=np.float64)
186
+ preds_test_sum += np.asarray(pred_test, dtype=np.float64)
187
+ split_count += 1
188
+
189
+ if split_count < 1:
190
+ print(
191
+ f"[GLM Ensemble] no CV splits generated; skip ensemble.",
192
+ flush=True,
193
+ )
194
+ return
195
+ preds_train = preds_train_sum / float(split_count)
196
+ preds_test = preds_test_sum / float(split_count)
197
+ self._cache_predictions("glm", preds_train, preds_test)