ins-pricing 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,10 +6,11 @@ from typing import Any, Dict, List, Optional, Tuple
6
6
  import numpy as np
7
7
  import optuna
8
8
  import torch
9
- from sklearn.metrics import log_loss, mean_tweedie_deviance
9
+ from sklearn.metrics import log_loss
10
10
 
11
11
  from .trainer_base import TrainerBase
12
12
  from ..models import ResNetSklearn
13
+ from ..utils.losses import regression_loss
13
14
 
14
15
  class ResNetTrainer(TrainerBase):
15
16
  def __init__(self, context: "BayesOptModel") -> None:
@@ -28,9 +29,16 @@ class ResNetTrainer(TrainerBase):
28
29
 
29
30
  def _build_model(self, params: Optional[Dict[str, Any]] = None) -> ResNetSklearn:
30
31
  params = params or {}
31
- power = params.get("tw_power", self.ctx.default_tweedie_power())
32
- if power is not None:
33
- power = float(power)
32
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
33
+ power = params.get("tw_power")
34
+ if self.ctx.task_type == "regression":
35
+ base_tw = self.ctx.default_tweedie_power()
36
+ if loss_name == "tweedie":
37
+ power = base_tw if power is None else float(power)
38
+ elif loss_name in ("poisson", "gamma"):
39
+ power = base_tw
40
+ else:
41
+ power = None
34
42
  resn_weight_decay = float(
35
43
  params.get(
36
44
  "weight_decay",
@@ -53,7 +61,8 @@ class ResNetTrainer(TrainerBase):
53
61
  stochastic_depth=float(params.get("stochastic_depth", 0.0)),
54
62
  weight_decay=resn_weight_decay,
55
63
  use_data_parallel=self.ctx.config.use_resn_data_parallel,
56
- use_ddp=self.ctx.config.use_resn_ddp
64
+ use_ddp=self.ctx.config.use_resn_ddp,
65
+ loss_name=loss_name
57
66
  )
58
67
 
59
68
  # ========= Cross-validation (for BayesOpt) =========
@@ -64,6 +73,7 @@ class ResNetTrainer(TrainerBase):
64
73
  # - Optionally sample part of training data during BayesOpt to reduce memory.
65
74
 
66
75
  base_tw_power = self.ctx.default_tweedie_power()
76
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
67
77
 
68
78
  def data_provider():
69
79
  data = self.ctx.train_oht_data if self.ctx.train_oht_data is not None else self.ctx.train_oht_scl_data
@@ -73,10 +83,16 @@ class ResNetTrainer(TrainerBase):
73
83
  metric_ctx: Dict[str, Any] = {}
74
84
 
75
85
  def model_builder(params):
76
- power = params.get("tw_power", base_tw_power)
86
+ if loss_name == "tweedie":
87
+ power = params.get("tw_power", base_tw_power)
88
+ elif loss_name in ("poisson", "gamma"):
89
+ power = base_tw_power
90
+ else:
91
+ power = None
77
92
  metric_ctx["tw_power"] = power
78
93
  params_local = dict(params)
79
- params_local["tw_power"] = power
94
+ if power is not None:
95
+ params_local["tw_power"] = power
80
96
  return self._build_model(params_local)
81
97
 
82
98
  def preprocess_fn(X_train, X_val):
@@ -94,11 +110,12 @@ class ResNetTrainer(TrainerBase):
94
110
 
95
111
  def metric_fn(y_true, y_pred, weight):
96
112
  if self.ctx.task_type == 'regression':
97
- return mean_tweedie_deviance(
113
+ return regression_loss(
98
114
  y_true,
99
115
  y_pred,
100
- sample_weight=weight,
101
- power=metric_ctx.get("tw_power", base_tw_power)
116
+ weight,
117
+ loss_name=loss_name,
118
+ tweedie_power=metric_ctx.get("tw_power", base_tw_power),
102
119
  )
103
120
  return log_loss(y_true, y_pred, sample_weight=weight)
104
121
 
@@ -115,7 +132,7 @@ class ResNetTrainer(TrainerBase):
115
132
  "residual_scale": lambda t: t.suggest_float('residual_scale', 0.05, 0.3, step=0.05),
116
133
  "patience": lambda t: t.suggest_int('patience', 3, 12),
117
134
  "stochastic_depth": lambda t: t.suggest_float('stochastic_depth', 0.0, 0.2, step=0.05),
118
- **({"tw_power": lambda t: t.suggest_float('tw_power', 1.0, 2.0)} if self.ctx.task_type == 'regression' and self.ctx.obj == 'reg:tweedie' else {})
135
+ **({"tw_power": lambda t: t.suggest_float('tw_power', 1.0, 2.0)} if self.ctx.task_type == 'regression' and loss_name == 'tweedie' else {})
119
136
  },
120
137
  data_provider=data_provider,
121
138
  model_builder=model_builder,
@@ -263,4 +280,3 @@ class ResNetTrainer(TrainerBase):
263
280
  self.ctx.resn_best = self.model
264
281
  else:
265
282
  print(f"[ResNetTrainer.load] Model file not found: {path}")
266
-
@@ -7,10 +7,11 @@ import numpy as np
7
7
  import optuna
8
8
  import torch
9
9
  import xgboost as xgb
10
- from sklearn.metrics import log_loss, mean_tweedie_deviance
10
+ from sklearn.metrics import log_loss
11
11
 
12
12
  from .trainer_base import TrainerBase
13
13
  from ..utils import EPS
14
+ from ..utils.losses import regression_loss
14
15
 
15
16
  _XGB_CUDA_CHECKED = False
16
17
  _XGB_HAS_CUDA = False
@@ -230,18 +231,17 @@ class XGBTrainer(TrainerBase):
230
231
  'reg_alpha': reg_alpha,
231
232
  'reg_lambda': reg_lambda
232
233
  }
234
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
233
235
  tweedie_variance_power = None
234
236
  if self.ctx.task_type != 'classification':
235
- if self.ctx.obj == 'reg:tweedie':
237
+ if loss_name == "tweedie":
236
238
  tweedie_variance_power = trial.suggest_float(
237
239
  'tweedie_variance_power', 1, 2)
238
240
  params['tweedie_variance_power'] = tweedie_variance_power
239
- elif self.ctx.obj == 'count:poisson':
240
- tweedie_variance_power = 1
241
- elif self.ctx.obj == 'reg:gamma':
242
- tweedie_variance_power = 2
243
- else:
244
- tweedie_variance_power = 1.5
241
+ elif loss_name == "poisson":
242
+ tweedie_variance_power = 1.0
243
+ elif loss_name == "gamma":
244
+ tweedie_variance_power = 2.0
245
245
  X_all = self.ctx.train_data[self.ctx.factor_nmes]
246
246
  y_all = self.ctx.train_data[self.ctx.resp_nme].values
247
247
  w_all = self.ctx.train_data[self.ctx.weight_nme].values
@@ -272,12 +272,12 @@ class XGBTrainer(TrainerBase):
272
272
  loss = log_loss(y_val, y_pred, sample_weight=w_val)
273
273
  else:
274
274
  y_pred = clf.predict(X_val)
275
- y_pred_safe = np.maximum(y_pred, EPS)
276
- loss = mean_tweedie_deviance(
275
+ loss = regression_loss(
277
276
  y_val,
278
- y_pred_safe,
279
- sample_weight=w_val,
280
- power=tweedie_variance_power,
277
+ y_pred,
278
+ w_val,
279
+ loss_name=loss_name,
280
+ tweedie_power=tweedie_variance_power,
281
281
  )
282
282
  losses.append(float(loss))
283
283
  self._clean_gpu()
@@ -345,4 +345,3 @@ class XGBTrainer(TrainerBase):
345
345
  )
346
346
  self.ctx.xgb_best = self.model
347
347
 
348
-
@@ -0,0 +1,129 @@
1
+ """Loss selection and regression loss utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+ import numpy as np
8
+
9
+ from ....explain.metrics import (
10
+ gamma_deviance,
11
+ poisson_deviance,
12
+ tweedie_deviance,
13
+ )
14
+
15
+ LOSS_ALIASES = {
16
+ "poisson_deviance": "poisson",
17
+ "gamma_deviance": "gamma",
18
+ "tweedie_deviance": "tweedie",
19
+ "l2": "mse",
20
+ "l1": "mae",
21
+ "absolute": "mae",
22
+ "gaussian": "mse",
23
+ "normal": "mse",
24
+ }
25
+
26
+ REGRESSION_LOSSES = {"tweedie", "poisson", "gamma", "mse", "mae"}
27
+ CLASSIFICATION_LOSSES = {"logloss", "bce"}
28
+
29
+
30
+ def normalize_loss_name(loss_name: Optional[str], task_type: str) -> str:
31
+ """Normalize the loss name and validate against supported values."""
32
+ name = str(loss_name or "auto").strip().lower()
33
+ if not name or name == "auto":
34
+ return "auto"
35
+ name = LOSS_ALIASES.get(name, name)
36
+ if task_type == "classification":
37
+ if name not in CLASSIFICATION_LOSSES:
38
+ raise ValueError(
39
+ f"Unsupported classification loss '{loss_name}'. "
40
+ f"Supported: {sorted(CLASSIFICATION_LOSSES)}"
41
+ )
42
+ else:
43
+ if name not in REGRESSION_LOSSES:
44
+ raise ValueError(
45
+ f"Unsupported regression loss '{loss_name}'. "
46
+ f"Supported: {sorted(REGRESSION_LOSSES)}"
47
+ )
48
+ return name
49
+
50
+
51
+ def infer_loss_name_from_model_name(model_name: str) -> str:
52
+ """Preserve legacy heuristic for loss selection based on model name."""
53
+ name = str(model_name or "")
54
+ if "f" in name:
55
+ return "poisson"
56
+ if "s" in name:
57
+ return "gamma"
58
+ return "tweedie"
59
+
60
+
61
+ def resolve_tweedie_power(loss_name: str, default: float = 1.5) -> Optional[float]:
62
+ """Resolve Tweedie power based on loss name."""
63
+ if loss_name == "poisson":
64
+ return 1.0
65
+ if loss_name == "gamma":
66
+ return 2.0
67
+ if loss_name == "tweedie":
68
+ return float(default)
69
+ return None
70
+
71
+
72
+ def resolve_xgb_objective(loss_name: str) -> str:
73
+ """Map regression loss name to XGBoost objective."""
74
+ name = loss_name if loss_name != "auto" else "tweedie"
75
+ mapping = {
76
+ "tweedie": "reg:tweedie",
77
+ "poisson": "count:poisson",
78
+ "gamma": "reg:gamma",
79
+ "mse": "reg:squarederror",
80
+ "mae": "reg:absoluteerror",
81
+ }
82
+ return mapping.get(name, "reg:tweedie")
83
+
84
+
85
+ def regression_loss(
86
+ y_true,
87
+ y_pred,
88
+ sample_weight=None,
89
+ *,
90
+ loss_name: str,
91
+ tweedie_power: Optional[float] = 1.5,
92
+ eps: float = 1e-8,
93
+ ) -> float:
94
+ """Compute weighted regression loss based on configured loss name."""
95
+ name = normalize_loss_name(loss_name, task_type="regression")
96
+ if name == "auto":
97
+ name = "tweedie"
98
+
99
+ y_t = np.asarray(y_true, dtype=float).reshape(-1)
100
+ y_p = np.asarray(y_pred, dtype=float).reshape(-1)
101
+ w = None if sample_weight is None else np.asarray(sample_weight, dtype=float).reshape(-1)
102
+
103
+ if name == "mse":
104
+ err = (y_t - y_p) ** 2
105
+ return _weighted_mean(err, w)
106
+ if name == "mae":
107
+ err = np.abs(y_t - y_p)
108
+ return _weighted_mean(err, w)
109
+ if name == "poisson":
110
+ return poisson_deviance(y_t, y_p, sample_weight=w, eps=eps)
111
+ if name == "gamma":
112
+ return gamma_deviance(y_t, y_p, sample_weight=w, eps=eps)
113
+
114
+ power = 1.5 if tweedie_power is None else float(tweedie_power)
115
+ return tweedie_deviance(y_t, y_p, sample_weight=w, power=power, eps=eps)
116
+
117
+
118
+ def loss_requires_positive(loss_name: str) -> bool:
119
+ """Return True if the loss requires positive predictions."""
120
+ return loss_name in {"tweedie", "poisson", "gamma"}
121
+
122
+
123
+ def _weighted_mean(values: np.ndarray, weight: Optional[np.ndarray]) -> float:
124
+ if weight is None:
125
+ return float(np.mean(values))
126
+ total = float(np.sum(weight))
127
+ if total <= 0:
128
+ return float(np.mean(values))
129
+ return float(np.sum(values * weight) / total)
@@ -24,7 +24,7 @@ import pandas as pd
24
24
  import torch
25
25
  import torch.nn as nn
26
26
  from torch.nn.parallel import DistributedDataParallel as DDP
27
- from sklearn.metrics import log_loss, mean_tweedie_deviance
27
+ from sklearn.metrics import log_loss, mean_absolute_error, mean_squared_error, mean_tweedie_deviance
28
28
  from sklearn.model_selection import KFold, GroupKFold, TimeSeriesSplit, StratifiedKFold
29
29
 
30
30
  # Try to import plotting dependencies
@@ -112,6 +112,7 @@ class MetricFactory:
112
112
  self,
113
113
  task_type: str = "regression",
114
114
  tweedie_power: float = 1.5,
115
+ loss_name: str = "tweedie",
115
116
  clip_min: float = 1e-8,
116
117
  clip_max: float = 1 - 1e-8,
117
118
  ):
@@ -120,11 +121,13 @@ class MetricFactory:
120
121
  Args:
121
122
  task_type: Either 'regression' or 'classification'
122
123
  tweedie_power: Power parameter for Tweedie deviance (1.0-2.0)
124
+ loss_name: Regression loss name ('tweedie', 'poisson', 'gamma', 'mse', 'mae')
123
125
  clip_min: Minimum value for clipping predictions
124
126
  clip_max: Maximum value for clipping predictions (for classification)
125
127
  """
126
128
  self.task_type = task_type
127
129
  self.tweedie_power = tweedie_power
130
+ self.loss_name = loss_name
128
131
  self.clip_min = clip_min
129
132
  self.clip_max = clip_max
130
133
 
@@ -151,13 +154,25 @@ class MetricFactory:
151
154
  y_pred_clipped = np.clip(y_pred, self.clip_min, self.clip_max)
152
155
  return float(log_loss(y_true, y_pred_clipped, sample_weight=sample_weight))
153
156
 
154
- # Regression: use Tweedie deviance
157
+ loss_name = str(self.loss_name or "tweedie").strip().lower()
158
+ if loss_name in {"mse", "mae"}:
159
+ if loss_name == "mse":
160
+ return float(mean_squared_error(
161
+ y_true, y_pred, sample_weight=sample_weight))
162
+ return float(mean_absolute_error(
163
+ y_true, y_pred, sample_weight=sample_weight))
164
+
155
165
  y_pred_safe = np.maximum(y_pred, self.clip_min)
166
+ power = self.tweedie_power
167
+ if loss_name == "poisson":
168
+ power = 1.0
169
+ elif loss_name == "gamma":
170
+ power = 2.0
156
171
  return float(mean_tweedie_deviance(
157
172
  y_true,
158
173
  y_pred_safe,
159
174
  sample_weight=sample_weight,
160
- power=self.tweedie_power,
175
+ power=power,
161
176
  ))
162
177
 
163
178
  def update_power(self, power: float) -> None:
@@ -52,6 +52,12 @@ except Exception:
52
52
 
53
53
  # Import from other utils modules
54
54
  from .constants import EPS, compute_batch_size, tweedie_loss, ensure_parent_dir
55
+ from .losses import (
56
+ infer_loss_name_from_model_name,
57
+ loss_requires_positive,
58
+ normalize_loss_name,
59
+ resolve_tweedie_power,
60
+ )
55
61
  from .distributed_utils import DistributedUtils
56
62
 
57
63
 
@@ -359,11 +365,26 @@ class TorchTrainerMixin:
359
365
  if task == 'classification':
360
366
  loss_fn = nn.BCEWithLogitsLoss(reduction='none')
361
367
  return loss_fn(y_pred, y_true).view(-1)
368
+ loss_name = normalize_loss_name(
369
+ getattr(self, "loss_name", None), task_type="regression"
370
+ )
371
+ if loss_name == "auto":
372
+ loss_name = infer_loss_name_from_model_name(getattr(self, "model_nme", ""))
362
373
  if apply_softplus:
363
374
  y_pred = F.softplus(y_pred)
364
- y_pred = torch.clamp(y_pred, min=1e-6)
365
- power = getattr(self, "tw_power", 1.5)
366
- return tweedie_loss(y_pred, y_true, p=power).view(-1)
375
+ if loss_requires_positive(loss_name):
376
+ y_pred = torch.clamp(y_pred, min=1e-6)
377
+ power = resolve_tweedie_power(
378
+ loss_name, default=float(getattr(self, "tw_power", 1.5) or 1.5)
379
+ )
380
+ if power is None:
381
+ power = float(getattr(self, "tw_power", 1.5) or 1.5)
382
+ return tweedie_loss(y_pred, y_true, p=power).view(-1)
383
+ if loss_name == "mse":
384
+ return (y_pred - y_true).pow(2).view(-1)
385
+ if loss_name == "mae":
386
+ return (y_pred - y_true).abs().view(-1)
387
+ raise ValueError(f"Unsupported loss_name '{loss_name}' for regression.")
367
388
 
368
389
  def _compute_weighted_loss(self, y_pred, y_true, weights, apply_softplus: bool = False):
369
390
  """Compute weighted loss."""
@@ -23,6 +23,11 @@ from .preprocess import (
23
23
  from .scoring import batch_score
24
24
  from ..modelling.core.bayesopt.models.model_gnn import GraphNeuralNetSklearn
25
25
  from ..modelling.core.bayesopt.models.model_resn import ResNetSklearn
26
+ from ..modelling.core.bayesopt.utils.losses import (
27
+ infer_loss_name_from_model_name,
28
+ normalize_loss_name,
29
+ resolve_tweedie_power,
30
+ )
26
31
  from ins_pricing.utils import DeviceManager, get_logger
27
32
  from ins_pricing.utils.torch_compat import torch_load
28
33
 
@@ -50,6 +55,15 @@ def _default_tweedie_power(model_name: str, task_type: str) -> Optional[float]:
50
55
  return 1.5
51
56
 
52
57
 
58
+ def _resolve_loss_name(cfg: Dict[str, Any], model_name: str, task_type: str) -> str:
59
+ normalized = normalize_loss_name(cfg.get("loss_name"), task_type)
60
+ if task_type == "classification":
61
+ return "logloss" if normalized == "auto" else normalized
62
+ if normalized == "auto":
63
+ return infer_loss_name_from_model_name(model_name)
64
+ return normalized
65
+
66
+
53
67
  def _resolve_value(
54
68
  value: Any,
55
69
  *,
@@ -182,11 +196,14 @@ def _build_resn_model(
182
196
  task_type: str,
183
197
  epochs: int,
184
198
  resn_weight_decay: float,
199
+ loss_name: str,
185
200
  params: Dict[str, Any],
186
201
  ) -> ResNetSklearn:
187
- power = params.get("tw_power", _default_tweedie_power(model_name, task_type))
188
- if power is not None:
189
- power = float(power)
202
+ if loss_name == "tweedie":
203
+ power = params.get("tw_power", _default_tweedie_power(model_name, task_type))
204
+ power = float(power) if power is not None else None
205
+ else:
206
+ power = resolve_tweedie_power(loss_name, default=1.5)
190
207
  weight_decay = float(params.get("weight_decay", resn_weight_decay))
191
208
  return ResNetSklearn(
192
209
  model_nme=model_name,
@@ -205,6 +222,7 @@ def _build_resn_model(
205
222
  weight_decay=weight_decay,
206
223
  use_data_parallel=False,
207
224
  use_ddp=False,
225
+ loss_name=loss_name,
208
226
  )
209
227
 
210
228
 
@@ -215,9 +233,15 @@ def _build_gnn_model(
215
233
  task_type: str,
216
234
  epochs: int,
217
235
  cfg: Dict[str, Any],
236
+ loss_name: str,
218
237
  params: Dict[str, Any],
219
238
  ) -> GraphNeuralNetSklearn:
220
239
  base_tw = _default_tweedie_power(model_name, task_type)
240
+ if loss_name == "tweedie":
241
+ tw_power = params.get("tw_power", base_tw)
242
+ tw_power = float(tw_power) if tw_power is not None else None
243
+ else:
244
+ tw_power = resolve_tweedie_power(loss_name, default=1.5)
221
245
  return GraphNeuralNetSklearn(
222
246
  model_nme=f"{model_name}_gnn",
223
247
  input_dim=input_dim,
@@ -229,7 +253,7 @@ def _build_gnn_model(
229
253
  epochs=int(params.get("epochs", epochs)),
230
254
  patience=int(params.get("patience", 5)),
231
255
  task_type=task_type,
232
- tweedie_power=float(params.get("tw_power", base_tw or 1.5)),
256
+ tweedie_power=tw_power,
233
257
  weight_decay=float(params.get("weight_decay", 0.0)),
234
258
  use_data_parallel=False,
235
259
  use_ddp=False,
@@ -239,6 +263,7 @@ def _build_gnn_model(
239
263
  max_gpu_knn_nodes=cfg.get("gnn_max_gpu_knn_nodes"),
240
264
  knn_gpu_mem_ratio=cfg.get("gnn_knn_gpu_mem_ratio", 0.9),
241
265
  knn_gpu_mem_overhead=cfg.get("gnn_knn_gpu_mem_overhead", 2.0),
266
+ loss_name=loss_name,
242
267
  )
243
268
 
244
269
 
@@ -273,6 +298,9 @@ def load_saved_model(
273
298
  from ..modelling.core.bayesopt.models.model_ft_components import FTTransformerCore
274
299
 
275
300
  # Reconstruct model from config
301
+ resolved_loss = model_config.get("loss_name")
302
+ if not resolved_loss:
303
+ resolved_loss = _resolve_loss_name(cfg, model_name, task_type)
276
304
  model = FTTransformerSklearn(
277
305
  model_nme=model_config.get("model_nme", ""),
278
306
  num_cols=model_config.get("num_cols", []),
@@ -282,6 +310,7 @@ def load_saved_model(
282
310
  n_layers=model_config.get("n_layers", 4),
283
311
  dropout=model_config.get("dropout", 0.1),
284
312
  task_type=model_config.get("task_type", "regression"),
313
+ loss_name=resolved_loss,
285
314
  tweedie_power=model_config.get("tw_power", 1.5),
286
315
  num_numeric_tokens=model_config.get("num_numeric_tokens"),
287
316
  use_data_parallel=False,
@@ -337,12 +366,14 @@ def load_saved_model(
337
366
  params = load_best_params(output_dir, model_name, model_key)
338
367
  if params is None:
339
368
  raise RuntimeError("Best params not found for resn")
369
+ loss_name = _resolve_loss_name(cfg, model_name, task_type)
340
370
  model = _build_resn_model(
341
371
  model_name=model_name,
342
372
  input_dim=input_dim,
343
373
  task_type=task_type,
344
374
  epochs=int(cfg.get("epochs", 50)),
345
375
  resn_weight_decay=float(cfg.get("resn_weight_decay", 1e-4)),
376
+ loss_name=loss_name,
346
377
  params=params,
347
378
  )
348
379
  model.resnet.load_state_dict(state_dict)
@@ -357,12 +388,14 @@ def load_saved_model(
357
388
  raise ValueError(f"Invalid GNN checkpoint: {model_path}")
358
389
  params = payload.get("best_params") or {}
359
390
  state_dict = payload.get("state_dict")
391
+ loss_name = _resolve_loss_name(cfg, model_name, task_type)
360
392
  model = _build_gnn_model(
361
393
  model_name=model_name,
362
394
  input_dim=input_dim,
363
395
  task_type=task_type,
364
396
  epochs=int(cfg.get("epochs", 50)),
365
397
  cfg=cfg,
398
+ loss_name=loss_name,
366
399
  params=params,
367
400
  )
368
401
  model.set_params(dict(params))
@@ -628,8 +661,4 @@ def predict_from_config(
628
661
  if output_path:
629
662
  output_path = Path(output_path)
630
663
  output_path.parent.mkdir(parents=True, exist_ok=True)
631
- if output_path.suffix.lower() in {".parquet", ".pq"}:
632
- result.to_parquet(output_path, index=False)
633
- else:
634
- result.to_csv(output_path, index=False)
635
- return result
664
+ if output_path.suffix.lower
ins_pricing/setup.py CHANGED
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
3
3
 
4
4
  setup(
5
5
  name="ins_pricing",
6
- version="0.3.3",
6
+ version="0.3.4",
7
7
  description="Reusable modelling, pricing, governance, and reporting utilities.",
8
8
  author="meishi125478",
9
9
  license="Proprietary",
@@ -22,9 +22,16 @@ import numpy as np
22
22
  import pandas as pd
23
23
 
24
24
  try:
25
- from sklearn.metrics import log_loss, mean_tweedie_deviance
25
+ from sklearn.metrics import (
26
+ log_loss,
27
+ mean_absolute_error,
28
+ mean_squared_error,
29
+ mean_tweedie_deviance,
30
+ )
26
31
  except ImportError:
27
32
  log_loss = None
33
+ mean_absolute_error = None
34
+ mean_squared_error = None
28
35
  mean_tweedie_deviance = None
29
36
 
30
37
 
@@ -198,6 +205,7 @@ class MetricFactory:
198
205
  self,
199
206
  task_type: str = "regression",
200
207
  tweedie_power: float = 1.5,
208
+ loss_name: str = "tweedie",
201
209
  clip_min: float = 1e-8,
202
210
  clip_max: float = 1 - 1e-8,
203
211
  ):
@@ -206,11 +214,13 @@ class MetricFactory:
206
214
  Args:
207
215
  task_type: Either 'regression' or 'classification'
208
216
  tweedie_power: Power parameter for Tweedie deviance (1.0-2.0)
217
+ loss_name: Regression loss name ('tweedie', 'poisson', 'gamma', 'mse', 'mae')
209
218
  clip_min: Minimum value for clipping predictions
210
219
  clip_max: Maximum value for clipping predictions (for classification)
211
220
  """
212
221
  self.task_type = task_type
213
222
  self.tweedie_power = tweedie_power
223
+ self.loss_name = loss_name
214
224
  self.clip_min = clip_min
215
225
  self.clip_max = clip_max
216
226
 
@@ -240,14 +250,28 @@ class MetricFactory:
240
250
  y_pred_clipped = np.clip(y_pred, self.clip_min, self.clip_max)
241
251
  return float(log_loss(y_true, y_pred_clipped, sample_weight=sample_weight))
242
252
 
243
- # Regression: use Tweedie deviance
253
+ loss_name = str(self.loss_name or "tweedie").strip().lower()
254
+ if loss_name in {"mse", "mae"}:
255
+ if mean_squared_error is None or mean_absolute_error is None:
256
+ raise ImportError("sklearn is required for metric computation")
257
+ if loss_name == "mse":
258
+ return float(mean_squared_error(
259
+ y_true, y_pred, sample_weight=sample_weight))
260
+ return float(mean_absolute_error(
261
+ y_true, y_pred, sample_weight=sample_weight))
262
+
244
263
  y_pred_safe = np.maximum(y_pred, self.clip_min)
264
+ power = self.tweedie_power
265
+ if loss_name == "poisson":
266
+ power = 1.0
267
+ elif loss_name == "gamma":
268
+ power = 2.0
245
269
  return float(
246
270
  mean_tweedie_deviance(
247
271
  y_true,
248
272
  y_pred_safe,
249
273
  sample_weight=sample_weight,
250
- power=self.tweedie_power,
274
+ power=power,
251
275
  )
252
276
  )
253
277