ins-pricing 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/docs/LOSS_FUNCTIONS.md +78 -0
- ins_pricing/docs/modelling/BayesOpt_USAGE.md +3 -3
- ins_pricing/frontend/QUICKSTART.md +152 -0
- ins_pricing/frontend/README.md +388 -0
- ins_pricing/frontend/__init__.py +10 -0
- ins_pricing/frontend/app.py +903 -0
- ins_pricing/frontend/config_builder.py +352 -0
- ins_pricing/frontend/example_config.json +36 -0
- ins_pricing/frontend/example_workflows.py +979 -0
- ins_pricing/frontend/ft_workflow.py +316 -0
- ins_pricing/frontend/runner.py +388 -0
- ins_pricing/modelling/core/bayesopt/config_preprocess.py +12 -0
- ins_pricing/modelling/core/bayesopt/core.py +21 -8
- ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +16 -6
- ins_pricing/modelling/core/bayesopt/models/model_gnn.py +16 -6
- ins_pricing/modelling/core/bayesopt/models/model_resn.py +16 -7
- ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +2 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +25 -8
- ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +14 -11
- ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +29 -10
- ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +28 -12
- ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +13 -14
- ins_pricing/modelling/core/bayesopt/utils/losses.py +129 -0
- ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +18 -3
- ins_pricing/modelling/core/bayesopt/utils/torch_trainer_mixin.py +24 -3
- ins_pricing/production/predict.py +693 -635
- ins_pricing/setup.py +1 -1
- ins_pricing/utils/metrics.py +27 -3
- {ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/METADATA +162 -162
- {ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/RECORD +32 -21
- {ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/WHEEL +1 -1
- {ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -7,10 +7,11 @@ import numpy as np
|
|
|
7
7
|
import optuna
|
|
8
8
|
import torch
|
|
9
9
|
import xgboost as xgb
|
|
10
|
-
from sklearn.metrics import log_loss
|
|
10
|
+
from sklearn.metrics import log_loss
|
|
11
11
|
|
|
12
12
|
from .trainer_base import TrainerBase
|
|
13
13
|
from ..utils import EPS
|
|
14
|
+
from ..utils.losses import regression_loss
|
|
14
15
|
|
|
15
16
|
_XGB_CUDA_CHECKED = False
|
|
16
17
|
_XGB_HAS_CUDA = False
|
|
@@ -230,18 +231,17 @@ class XGBTrainer(TrainerBase):
|
|
|
230
231
|
'reg_alpha': reg_alpha,
|
|
231
232
|
'reg_lambda': reg_lambda
|
|
232
233
|
}
|
|
234
|
+
loss_name = getattr(self.ctx, "loss_name", "tweedie")
|
|
233
235
|
tweedie_variance_power = None
|
|
234
236
|
if self.ctx.task_type != 'classification':
|
|
235
|
-
if
|
|
237
|
+
if loss_name == "tweedie":
|
|
236
238
|
tweedie_variance_power = trial.suggest_float(
|
|
237
239
|
'tweedie_variance_power', 1, 2)
|
|
238
240
|
params['tweedie_variance_power'] = tweedie_variance_power
|
|
239
|
-
elif
|
|
240
|
-
tweedie_variance_power = 1
|
|
241
|
-
elif
|
|
242
|
-
tweedie_variance_power = 2
|
|
243
|
-
else:
|
|
244
|
-
tweedie_variance_power = 1.5
|
|
241
|
+
elif loss_name == "poisson":
|
|
242
|
+
tweedie_variance_power = 1.0
|
|
243
|
+
elif loss_name == "gamma":
|
|
244
|
+
tweedie_variance_power = 2.0
|
|
245
245
|
X_all = self.ctx.train_data[self.ctx.factor_nmes]
|
|
246
246
|
y_all = self.ctx.train_data[self.ctx.resp_nme].values
|
|
247
247
|
w_all = self.ctx.train_data[self.ctx.weight_nme].values
|
|
@@ -272,12 +272,12 @@ class XGBTrainer(TrainerBase):
|
|
|
272
272
|
loss = log_loss(y_val, y_pred, sample_weight=w_val)
|
|
273
273
|
else:
|
|
274
274
|
y_pred = clf.predict(X_val)
|
|
275
|
-
|
|
276
|
-
loss = mean_tweedie_deviance(
|
|
275
|
+
loss = regression_loss(
|
|
277
276
|
y_val,
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
277
|
+
y_pred,
|
|
278
|
+
w_val,
|
|
279
|
+
loss_name=loss_name,
|
|
280
|
+
tweedie_power=tweedie_variance_power,
|
|
281
281
|
)
|
|
282
282
|
losses.append(float(loss))
|
|
283
283
|
self._clean_gpu()
|
|
@@ -345,4 +345,3 @@ class XGBTrainer(TrainerBase):
|
|
|
345
345
|
)
|
|
346
346
|
self.ctx.xgb_best = self.model
|
|
347
347
|
|
|
348
|
-
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Loss selection and regression loss utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from ....explain.metrics import (
|
|
10
|
+
gamma_deviance,
|
|
11
|
+
poisson_deviance,
|
|
12
|
+
tweedie_deviance,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
LOSS_ALIASES = {
|
|
16
|
+
"poisson_deviance": "poisson",
|
|
17
|
+
"gamma_deviance": "gamma",
|
|
18
|
+
"tweedie_deviance": "tweedie",
|
|
19
|
+
"l2": "mse",
|
|
20
|
+
"l1": "mae",
|
|
21
|
+
"absolute": "mae",
|
|
22
|
+
"gaussian": "mse",
|
|
23
|
+
"normal": "mse",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
REGRESSION_LOSSES = {"tweedie", "poisson", "gamma", "mse", "mae"}
|
|
27
|
+
CLASSIFICATION_LOSSES = {"logloss", "bce"}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def normalize_loss_name(loss_name: Optional[str], task_type: str) -> str:
|
|
31
|
+
"""Normalize the loss name and validate against supported values."""
|
|
32
|
+
name = str(loss_name or "auto").strip().lower()
|
|
33
|
+
if not name or name == "auto":
|
|
34
|
+
return "auto"
|
|
35
|
+
name = LOSS_ALIASES.get(name, name)
|
|
36
|
+
if task_type == "classification":
|
|
37
|
+
if name not in CLASSIFICATION_LOSSES:
|
|
38
|
+
raise ValueError(
|
|
39
|
+
f"Unsupported classification loss '{loss_name}'. "
|
|
40
|
+
f"Supported: {sorted(CLASSIFICATION_LOSSES)}"
|
|
41
|
+
)
|
|
42
|
+
else:
|
|
43
|
+
if name not in REGRESSION_LOSSES:
|
|
44
|
+
raise ValueError(
|
|
45
|
+
f"Unsupported regression loss '{loss_name}'. "
|
|
46
|
+
f"Supported: {sorted(REGRESSION_LOSSES)}"
|
|
47
|
+
)
|
|
48
|
+
return name
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def infer_loss_name_from_model_name(model_name: str) -> str:
|
|
52
|
+
"""Preserve legacy heuristic for loss selection based on model name."""
|
|
53
|
+
name = str(model_name or "")
|
|
54
|
+
if "f" in name:
|
|
55
|
+
return "poisson"
|
|
56
|
+
if "s" in name:
|
|
57
|
+
return "gamma"
|
|
58
|
+
return "tweedie"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def resolve_tweedie_power(loss_name: str, default: float = 1.5) -> Optional[float]:
|
|
62
|
+
"""Resolve Tweedie power based on loss name."""
|
|
63
|
+
if loss_name == "poisson":
|
|
64
|
+
return 1.0
|
|
65
|
+
if loss_name == "gamma":
|
|
66
|
+
return 2.0
|
|
67
|
+
if loss_name == "tweedie":
|
|
68
|
+
return float(default)
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def resolve_xgb_objective(loss_name: str) -> str:
|
|
73
|
+
"""Map regression loss name to XGBoost objective."""
|
|
74
|
+
name = loss_name if loss_name != "auto" else "tweedie"
|
|
75
|
+
mapping = {
|
|
76
|
+
"tweedie": "reg:tweedie",
|
|
77
|
+
"poisson": "count:poisson",
|
|
78
|
+
"gamma": "reg:gamma",
|
|
79
|
+
"mse": "reg:squarederror",
|
|
80
|
+
"mae": "reg:absoluteerror",
|
|
81
|
+
}
|
|
82
|
+
return mapping.get(name, "reg:tweedie")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def regression_loss(
|
|
86
|
+
y_true,
|
|
87
|
+
y_pred,
|
|
88
|
+
sample_weight=None,
|
|
89
|
+
*,
|
|
90
|
+
loss_name: str,
|
|
91
|
+
tweedie_power: Optional[float] = 1.5,
|
|
92
|
+
eps: float = 1e-8,
|
|
93
|
+
) -> float:
|
|
94
|
+
"""Compute weighted regression loss based on configured loss name."""
|
|
95
|
+
name = normalize_loss_name(loss_name, task_type="regression")
|
|
96
|
+
if name == "auto":
|
|
97
|
+
name = "tweedie"
|
|
98
|
+
|
|
99
|
+
y_t = np.asarray(y_true, dtype=float).reshape(-1)
|
|
100
|
+
y_p = np.asarray(y_pred, dtype=float).reshape(-1)
|
|
101
|
+
w = None if sample_weight is None else np.asarray(sample_weight, dtype=float).reshape(-1)
|
|
102
|
+
|
|
103
|
+
if name == "mse":
|
|
104
|
+
err = (y_t - y_p) ** 2
|
|
105
|
+
return _weighted_mean(err, w)
|
|
106
|
+
if name == "mae":
|
|
107
|
+
err = np.abs(y_t - y_p)
|
|
108
|
+
return _weighted_mean(err, w)
|
|
109
|
+
if name == "poisson":
|
|
110
|
+
return poisson_deviance(y_t, y_p, sample_weight=w, eps=eps)
|
|
111
|
+
if name == "gamma":
|
|
112
|
+
return gamma_deviance(y_t, y_p, sample_weight=w, eps=eps)
|
|
113
|
+
|
|
114
|
+
power = 1.5 if tweedie_power is None else float(tweedie_power)
|
|
115
|
+
return tweedie_deviance(y_t, y_p, sample_weight=w, power=power, eps=eps)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def loss_requires_positive(loss_name: str) -> bool:
|
|
119
|
+
"""Return True if the loss requires positive predictions."""
|
|
120
|
+
return loss_name in {"tweedie", "poisson", "gamma"}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _weighted_mean(values: np.ndarray, weight: Optional[np.ndarray]) -> float:
|
|
124
|
+
if weight is None:
|
|
125
|
+
return float(np.mean(values))
|
|
126
|
+
total = float(np.sum(weight))
|
|
127
|
+
if total <= 0:
|
|
128
|
+
return float(np.mean(values))
|
|
129
|
+
return float(np.sum(values * weight) / total)
|
|
@@ -24,7 +24,7 @@ import pandas as pd
|
|
|
24
24
|
import torch
|
|
25
25
|
import torch.nn as nn
|
|
26
26
|
from torch.nn.parallel import DistributedDataParallel as DDP
|
|
27
|
-
from sklearn.metrics import log_loss, mean_tweedie_deviance
|
|
27
|
+
from sklearn.metrics import log_loss, mean_absolute_error, mean_squared_error, mean_tweedie_deviance
|
|
28
28
|
from sklearn.model_selection import KFold, GroupKFold, TimeSeriesSplit, StratifiedKFold
|
|
29
29
|
|
|
30
30
|
# Try to import plotting dependencies
|
|
@@ -112,6 +112,7 @@ class MetricFactory:
|
|
|
112
112
|
self,
|
|
113
113
|
task_type: str = "regression",
|
|
114
114
|
tweedie_power: float = 1.5,
|
|
115
|
+
loss_name: str = "tweedie",
|
|
115
116
|
clip_min: float = 1e-8,
|
|
116
117
|
clip_max: float = 1 - 1e-8,
|
|
117
118
|
):
|
|
@@ -120,11 +121,13 @@ class MetricFactory:
|
|
|
120
121
|
Args:
|
|
121
122
|
task_type: Either 'regression' or 'classification'
|
|
122
123
|
tweedie_power: Power parameter for Tweedie deviance (1.0-2.0)
|
|
124
|
+
loss_name: Regression loss name ('tweedie', 'poisson', 'gamma', 'mse', 'mae')
|
|
123
125
|
clip_min: Minimum value for clipping predictions
|
|
124
126
|
clip_max: Maximum value for clipping predictions (for classification)
|
|
125
127
|
"""
|
|
126
128
|
self.task_type = task_type
|
|
127
129
|
self.tweedie_power = tweedie_power
|
|
130
|
+
self.loss_name = loss_name
|
|
128
131
|
self.clip_min = clip_min
|
|
129
132
|
self.clip_max = clip_max
|
|
130
133
|
|
|
@@ -151,13 +154,25 @@ class MetricFactory:
|
|
|
151
154
|
y_pred_clipped = np.clip(y_pred, self.clip_min, self.clip_max)
|
|
152
155
|
return float(log_loss(y_true, y_pred_clipped, sample_weight=sample_weight))
|
|
153
156
|
|
|
154
|
-
|
|
157
|
+
loss_name = str(self.loss_name or "tweedie").strip().lower()
|
|
158
|
+
if loss_name in {"mse", "mae"}:
|
|
159
|
+
if loss_name == "mse":
|
|
160
|
+
return float(mean_squared_error(
|
|
161
|
+
y_true, y_pred, sample_weight=sample_weight))
|
|
162
|
+
return float(mean_absolute_error(
|
|
163
|
+
y_true, y_pred, sample_weight=sample_weight))
|
|
164
|
+
|
|
155
165
|
y_pred_safe = np.maximum(y_pred, self.clip_min)
|
|
166
|
+
power = self.tweedie_power
|
|
167
|
+
if loss_name == "poisson":
|
|
168
|
+
power = 1.0
|
|
169
|
+
elif loss_name == "gamma":
|
|
170
|
+
power = 2.0
|
|
156
171
|
return float(mean_tweedie_deviance(
|
|
157
172
|
y_true,
|
|
158
173
|
y_pred_safe,
|
|
159
174
|
sample_weight=sample_weight,
|
|
160
|
-
power=
|
|
175
|
+
power=power,
|
|
161
176
|
))
|
|
162
177
|
|
|
163
178
|
def update_power(self, power: float) -> None:
|
|
@@ -52,6 +52,12 @@ except Exception:
|
|
|
52
52
|
|
|
53
53
|
# Import from other utils modules
|
|
54
54
|
from .constants import EPS, compute_batch_size, tweedie_loss, ensure_parent_dir
|
|
55
|
+
from .losses import (
|
|
56
|
+
infer_loss_name_from_model_name,
|
|
57
|
+
loss_requires_positive,
|
|
58
|
+
normalize_loss_name,
|
|
59
|
+
resolve_tweedie_power,
|
|
60
|
+
)
|
|
55
61
|
from .distributed_utils import DistributedUtils
|
|
56
62
|
|
|
57
63
|
|
|
@@ -359,11 +365,26 @@ class TorchTrainerMixin:
|
|
|
359
365
|
if task == 'classification':
|
|
360
366
|
loss_fn = nn.BCEWithLogitsLoss(reduction='none')
|
|
361
367
|
return loss_fn(y_pred, y_true).view(-1)
|
|
368
|
+
loss_name = normalize_loss_name(
|
|
369
|
+
getattr(self, "loss_name", None), task_type="regression"
|
|
370
|
+
)
|
|
371
|
+
if loss_name == "auto":
|
|
372
|
+
loss_name = infer_loss_name_from_model_name(getattr(self, "model_nme", ""))
|
|
362
373
|
if apply_softplus:
|
|
363
374
|
y_pred = F.softplus(y_pred)
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
375
|
+
if loss_requires_positive(loss_name):
|
|
376
|
+
y_pred = torch.clamp(y_pred, min=1e-6)
|
|
377
|
+
power = resolve_tweedie_power(
|
|
378
|
+
loss_name, default=float(getattr(self, "tw_power", 1.5) or 1.5)
|
|
379
|
+
)
|
|
380
|
+
if power is None:
|
|
381
|
+
power = float(getattr(self, "tw_power", 1.5) or 1.5)
|
|
382
|
+
return tweedie_loss(y_pred, y_true, p=power).view(-1)
|
|
383
|
+
if loss_name == "mse":
|
|
384
|
+
return (y_pred - y_true).pow(2).view(-1)
|
|
385
|
+
if loss_name == "mae":
|
|
386
|
+
return (y_pred - y_true).abs().view(-1)
|
|
387
|
+
raise ValueError(f"Unsupported loss_name '{loss_name}' for regression.")
|
|
367
388
|
|
|
368
389
|
def _compute_weighted_loss(self, y_pred, y_true, weights, apply_softplus: bool = False):
|
|
369
390
|
"""Compute weighted loss."""
|