ins-pricing 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. ins_pricing/docs/LOSS_FUNCTIONS.md +78 -0
  2. ins_pricing/docs/modelling/BayesOpt_USAGE.md +3 -3
  3. ins_pricing/frontend/QUICKSTART.md +152 -0
  4. ins_pricing/frontend/README.md +388 -0
  5. ins_pricing/frontend/__init__.py +10 -0
  6. ins_pricing/frontend/app.py +903 -0
  7. ins_pricing/frontend/config_builder.py +352 -0
  8. ins_pricing/frontend/example_config.json +36 -0
  9. ins_pricing/frontend/example_workflows.py +979 -0
  10. ins_pricing/frontend/ft_workflow.py +316 -0
  11. ins_pricing/frontend/runner.py +388 -0
  12. ins_pricing/modelling/core/bayesopt/config_preprocess.py +12 -0
  13. ins_pricing/modelling/core/bayesopt/core.py +21 -8
  14. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +16 -6
  15. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +16 -6
  16. ins_pricing/modelling/core/bayesopt/models/model_resn.py +16 -7
  17. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +2 -0
  18. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +25 -8
  19. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +14 -11
  20. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +29 -10
  21. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +28 -12
  22. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +13 -14
  23. ins_pricing/modelling/core/bayesopt/utils/losses.py +129 -0
  24. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +18 -3
  25. ins_pricing/modelling/core/bayesopt/utils/torch_trainer_mixin.py +24 -3
  26. ins_pricing/production/predict.py +693 -635
  27. ins_pricing/setup.py +1 -1
  28. ins_pricing/utils/metrics.py +27 -3
  29. {ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/METADATA +162 -162
  30. {ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/RECORD +32 -21
  31. {ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/WHEEL +1 -1
  32. {ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/top_level.txt +0 -0
@@ -7,10 +7,11 @@ import numpy as np
7
7
  import optuna
8
8
  import torch
9
9
  import xgboost as xgb
10
- from sklearn.metrics import log_loss, mean_tweedie_deviance
10
+ from sklearn.metrics import log_loss
11
11
 
12
12
  from .trainer_base import TrainerBase
13
13
  from ..utils import EPS
14
+ from ..utils.losses import regression_loss
14
15
 
15
16
  _XGB_CUDA_CHECKED = False
16
17
  _XGB_HAS_CUDA = False
@@ -230,18 +231,17 @@ class XGBTrainer(TrainerBase):
230
231
  'reg_alpha': reg_alpha,
231
232
  'reg_lambda': reg_lambda
232
233
  }
234
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
233
235
  tweedie_variance_power = None
234
236
  if self.ctx.task_type != 'classification':
235
- if self.ctx.obj == 'reg:tweedie':
237
+ if loss_name == "tweedie":
236
238
  tweedie_variance_power = trial.suggest_float(
237
239
  'tweedie_variance_power', 1, 2)
238
240
  params['tweedie_variance_power'] = tweedie_variance_power
239
- elif self.ctx.obj == 'count:poisson':
240
- tweedie_variance_power = 1
241
- elif self.ctx.obj == 'reg:gamma':
242
- tweedie_variance_power = 2
243
- else:
244
- tweedie_variance_power = 1.5
241
+ elif loss_name == "poisson":
242
+ tweedie_variance_power = 1.0
243
+ elif loss_name == "gamma":
244
+ tweedie_variance_power = 2.0
245
245
  X_all = self.ctx.train_data[self.ctx.factor_nmes]
246
246
  y_all = self.ctx.train_data[self.ctx.resp_nme].values
247
247
  w_all = self.ctx.train_data[self.ctx.weight_nme].values
@@ -272,12 +272,12 @@ class XGBTrainer(TrainerBase):
272
272
  loss = log_loss(y_val, y_pred, sample_weight=w_val)
273
273
  else:
274
274
  y_pred = clf.predict(X_val)
275
- y_pred_safe = np.maximum(y_pred, EPS)
276
- loss = mean_tweedie_deviance(
275
+ loss = regression_loss(
277
276
  y_val,
278
- y_pred_safe,
279
- sample_weight=w_val,
280
- power=tweedie_variance_power,
277
+ y_pred,
278
+ w_val,
279
+ loss_name=loss_name,
280
+ tweedie_power=tweedie_variance_power,
281
281
  )
282
282
  losses.append(float(loss))
283
283
  self._clean_gpu()
@@ -345,4 +345,3 @@ class XGBTrainer(TrainerBase):
345
345
  )
346
346
  self.ctx.xgb_best = self.model
347
347
 
348
-
@@ -0,0 +1,129 @@
1
+ """Loss selection and regression loss utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+ import numpy as np
8
+
9
+ from ....explain.metrics import (
10
+ gamma_deviance,
11
+ poisson_deviance,
12
+ tweedie_deviance,
13
+ )
14
+
15
+ LOSS_ALIASES = {
16
+ "poisson_deviance": "poisson",
17
+ "gamma_deviance": "gamma",
18
+ "tweedie_deviance": "tweedie",
19
+ "l2": "mse",
20
+ "l1": "mae",
21
+ "absolute": "mae",
22
+ "gaussian": "mse",
23
+ "normal": "mse",
24
+ }
25
+
26
+ REGRESSION_LOSSES = {"tweedie", "poisson", "gamma", "mse", "mae"}
27
+ CLASSIFICATION_LOSSES = {"logloss", "bce"}
28
+
29
+
30
+ def normalize_loss_name(loss_name: Optional[str], task_type: str) -> str:
31
+ """Normalize the loss name and validate against supported values."""
32
+ name = str(loss_name or "auto").strip().lower()
33
+ if not name or name == "auto":
34
+ return "auto"
35
+ name = LOSS_ALIASES.get(name, name)
36
+ if task_type == "classification":
37
+ if name not in CLASSIFICATION_LOSSES:
38
+ raise ValueError(
39
+ f"Unsupported classification loss '{loss_name}'. "
40
+ f"Supported: {sorted(CLASSIFICATION_LOSSES)}"
41
+ )
42
+ else:
43
+ if name not in REGRESSION_LOSSES:
44
+ raise ValueError(
45
+ f"Unsupported regression loss '{loss_name}'. "
46
+ f"Supported: {sorted(REGRESSION_LOSSES)}"
47
+ )
48
+ return name
49
+
50
+
51
+ def infer_loss_name_from_model_name(model_name: str) -> str:
52
+ """Preserve legacy heuristic for loss selection based on model name."""
53
+ name = str(model_name or "")
54
+ if "f" in name:
55
+ return "poisson"
56
+ if "s" in name:
57
+ return "gamma"
58
+ return "tweedie"
59
+
60
+
61
+ def resolve_tweedie_power(loss_name: str, default: float = 1.5) -> Optional[float]:
62
+ """Resolve Tweedie power based on loss name."""
63
+ if loss_name == "poisson":
64
+ return 1.0
65
+ if loss_name == "gamma":
66
+ return 2.0
67
+ if loss_name == "tweedie":
68
+ return float(default)
69
+ return None
70
+
71
+
72
+ def resolve_xgb_objective(loss_name: str) -> str:
73
+ """Map regression loss name to XGBoost objective."""
74
+ name = loss_name if loss_name != "auto" else "tweedie"
75
+ mapping = {
76
+ "tweedie": "reg:tweedie",
77
+ "poisson": "count:poisson",
78
+ "gamma": "reg:gamma",
79
+ "mse": "reg:squarederror",
80
+ "mae": "reg:absoluteerror",
81
+ }
82
+ return mapping.get(name, "reg:tweedie")
83
+
84
+
85
+ def regression_loss(
86
+ y_true,
87
+ y_pred,
88
+ sample_weight=None,
89
+ *,
90
+ loss_name: str,
91
+ tweedie_power: Optional[float] = 1.5,
92
+ eps: float = 1e-8,
93
+ ) -> float:
94
+ """Compute weighted regression loss based on configured loss name."""
95
+ name = normalize_loss_name(loss_name, task_type="regression")
96
+ if name == "auto":
97
+ name = "tweedie"
98
+
99
+ y_t = np.asarray(y_true, dtype=float).reshape(-1)
100
+ y_p = np.asarray(y_pred, dtype=float).reshape(-1)
101
+ w = None if sample_weight is None else np.asarray(sample_weight, dtype=float).reshape(-1)
102
+
103
+ if name == "mse":
104
+ err = (y_t - y_p) ** 2
105
+ return _weighted_mean(err, w)
106
+ if name == "mae":
107
+ err = np.abs(y_t - y_p)
108
+ return _weighted_mean(err, w)
109
+ if name == "poisson":
110
+ return poisson_deviance(y_t, y_p, sample_weight=w, eps=eps)
111
+ if name == "gamma":
112
+ return gamma_deviance(y_t, y_p, sample_weight=w, eps=eps)
113
+
114
+ power = 1.5 if tweedie_power is None else float(tweedie_power)
115
+ return tweedie_deviance(y_t, y_p, sample_weight=w, power=power, eps=eps)
116
+
117
+
118
+ def loss_requires_positive(loss_name: str) -> bool:
119
+ """Return True if the loss requires positive predictions."""
120
+ return loss_name in {"tweedie", "poisson", "gamma"}
121
+
122
+
123
+ def _weighted_mean(values: np.ndarray, weight: Optional[np.ndarray]) -> float:
124
+ if weight is None:
125
+ return float(np.mean(values))
126
+ total = float(np.sum(weight))
127
+ if total <= 0:
128
+ return float(np.mean(values))
129
+ return float(np.sum(values * weight) / total)
@@ -24,7 +24,7 @@ import pandas as pd
24
24
  import torch
25
25
  import torch.nn as nn
26
26
  from torch.nn.parallel import DistributedDataParallel as DDP
27
- from sklearn.metrics import log_loss, mean_tweedie_deviance
27
+ from sklearn.metrics import log_loss, mean_absolute_error, mean_squared_error, mean_tweedie_deviance
28
28
  from sklearn.model_selection import KFold, GroupKFold, TimeSeriesSplit, StratifiedKFold
29
29
 
30
30
  # Try to import plotting dependencies
@@ -112,6 +112,7 @@ class MetricFactory:
112
112
  self,
113
113
  task_type: str = "regression",
114
114
  tweedie_power: float = 1.5,
115
+ loss_name: str = "tweedie",
115
116
  clip_min: float = 1e-8,
116
117
  clip_max: float = 1 - 1e-8,
117
118
  ):
@@ -120,11 +121,13 @@ class MetricFactory:
120
121
  Args:
121
122
  task_type: Either 'regression' or 'classification'
122
123
  tweedie_power: Power parameter for Tweedie deviance (1.0-2.0)
124
+ loss_name: Regression loss name ('tweedie', 'poisson', 'gamma', 'mse', 'mae')
123
125
  clip_min: Minimum value for clipping predictions
124
126
  clip_max: Maximum value for clipping predictions (for classification)
125
127
  """
126
128
  self.task_type = task_type
127
129
  self.tweedie_power = tweedie_power
130
+ self.loss_name = loss_name
128
131
  self.clip_min = clip_min
129
132
  self.clip_max = clip_max
130
133
 
@@ -151,13 +154,25 @@ class MetricFactory:
151
154
  y_pred_clipped = np.clip(y_pred, self.clip_min, self.clip_max)
152
155
  return float(log_loss(y_true, y_pred_clipped, sample_weight=sample_weight))
153
156
 
154
- # Regression: use Tweedie deviance
157
+ loss_name = str(self.loss_name or "tweedie").strip().lower()
158
+ if loss_name in {"mse", "mae"}:
159
+ if loss_name == "mse":
160
+ return float(mean_squared_error(
161
+ y_true, y_pred, sample_weight=sample_weight))
162
+ return float(mean_absolute_error(
163
+ y_true, y_pred, sample_weight=sample_weight))
164
+
155
165
  y_pred_safe = np.maximum(y_pred, self.clip_min)
166
+ power = self.tweedie_power
167
+ if loss_name == "poisson":
168
+ power = 1.0
169
+ elif loss_name == "gamma":
170
+ power = 2.0
156
171
  return float(mean_tweedie_deviance(
157
172
  y_true,
158
173
  y_pred_safe,
159
174
  sample_weight=sample_weight,
160
- power=self.tweedie_power,
175
+ power=power,
161
176
  ))
162
177
 
163
178
  def update_power(self, power: float) -> None:
@@ -52,6 +52,12 @@ except Exception:
52
52
 
53
53
  # Import from other utils modules
54
54
  from .constants import EPS, compute_batch_size, tweedie_loss, ensure_parent_dir
55
+ from .losses import (
56
+ infer_loss_name_from_model_name,
57
+ loss_requires_positive,
58
+ normalize_loss_name,
59
+ resolve_tweedie_power,
60
+ )
55
61
  from .distributed_utils import DistributedUtils
56
62
 
57
63
 
@@ -359,11 +365,26 @@ class TorchTrainerMixin:
359
365
  if task == 'classification':
360
366
  loss_fn = nn.BCEWithLogitsLoss(reduction='none')
361
367
  return loss_fn(y_pred, y_true).view(-1)
368
+ loss_name = normalize_loss_name(
369
+ getattr(self, "loss_name", None), task_type="regression"
370
+ )
371
+ if loss_name == "auto":
372
+ loss_name = infer_loss_name_from_model_name(getattr(self, "model_nme", ""))
362
373
  if apply_softplus:
363
374
  y_pred = F.softplus(y_pred)
364
- y_pred = torch.clamp(y_pred, min=1e-6)
365
- power = getattr(self, "tw_power", 1.5)
366
- return tweedie_loss(y_pred, y_true, p=power).view(-1)
375
+ if loss_requires_positive(loss_name):
376
+ y_pred = torch.clamp(y_pred, min=1e-6)
377
+ power = resolve_tweedie_power(
378
+ loss_name, default=float(getattr(self, "tw_power", 1.5) or 1.5)
379
+ )
380
+ if power is None:
381
+ power = float(getattr(self, "tw_power", 1.5) or 1.5)
382
+ return tweedie_loss(y_pred, y_true, p=power).view(-1)
383
+ if loss_name == "mse":
384
+ return (y_pred - y_true).pow(2).view(-1)
385
+ if loss_name == "mae":
386
+ return (y_pred - y_true).abs().view(-1)
387
+ raise ValueError(f"Unsupported loss_name '{loss_name}' for regression.")
367
388
 
368
389
  def _compute_weighted_loss(self, y_pred, y_true, weights, apply_softplus: bool = False):
369
390
  """Compute weighted loss."""