PyPI - ins-pricing - Versions diffs - 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

ins-pricing 0.3.3py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

ins_pricing/docs/LOSS_FUNCTIONS.md +78 -0
ins_pricing/docs/modelling/BayesOpt_USAGE.md +3 -3
ins_pricing/frontend/QUICKSTART.md +152 -0
ins_pricing/frontend/README.md +388 -0
ins_pricing/frontend/__init__.py +10 -0
ins_pricing/frontend/app.py +903 -0
ins_pricing/frontend/config_builder.py +352 -0
ins_pricing/frontend/example_config.json +36 -0
ins_pricing/frontend/example_workflows.py +979 -0
ins_pricing/frontend/ft_workflow.py +316 -0
ins_pricing/frontend/runner.py +388 -0
ins_pricing/modelling/core/bayesopt/config_preprocess.py +12 -0
ins_pricing/modelling/core/bayesopt/core.py +21 -8
ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +16 -6
ins_pricing/modelling/core/bayesopt/models/model_gnn.py +16 -6
ins_pricing/modelling/core/bayesopt/models/model_resn.py +16 -7
ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +2 -0
ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +25 -8
ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +14 -11
ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +29 -10
ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +28 -12
ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +13 -14
ins_pricing/modelling/core/bayesopt/utils/losses.py +129 -0
ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +18 -3
ins_pricing/modelling/core/bayesopt/utils/torch_trainer_mixin.py +24 -3
ins_pricing/production/predict.py +693 -635
ins_pricing/setup.py +1 -1
ins_pricing/utils/metrics.py +27 -3
{ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/METADATA +162 -162
{ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/RECORD +32 -21
{ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/WHEEL +1 -1
{ins_pricing-0.3.3.dist-info → ins_pricing-0.4.0.dist-info}/top_level.txt +0 -0

ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py CHANGED Viewed

@@ -7,10 +7,11 @@ import numpy as np
 import optuna
 import torch
 import xgboost as xgb
-from sklearn.metrics import log_loss, mean_tweedie_deviance
+from sklearn.metrics import log_loss
 from .trainer_base import TrainerBase
 from ..utils import EPS
+from ..utils.losses import regression_loss
 _XGB_CUDA_CHECKED = False
 _XGB_HAS_CUDA = False
@@ -230,18 +231,17 @@ class XGBTrainer(TrainerBase):
             'reg_alpha': reg_alpha,
             'reg_lambda': reg_lambda
         }
+        loss_name = getattr(self.ctx, "loss_name", "tweedie")
         tweedie_variance_power = None
         if self.ctx.task_type != 'classification':
-            if self.ctx.obj == 'reg:tweedie':
+            if loss_name == "tweedie":
                 tweedie_variance_power = trial.suggest_float(
                     'tweedie_variance_power', 1, 2)
                 params['tweedie_variance_power'] = tweedie_variance_power
-            elif self.ctx.obj == 'count:poisson':
-                tweedie_variance_power = 1
-            elif self.ctx.obj == 'reg:gamma':
-                tweedie_variance_power = 2
-            else:
-                tweedie_variance_power = 1.5
+            elif loss_name == "poisson":
+                tweedie_variance_power = 1.0
+            elif loss_name == "gamma":
+                tweedie_variance_power = 2.0
         X_all = self.ctx.train_data[self.ctx.factor_nmes]
         y_all = self.ctx.train_data[self.ctx.resp_nme].values
         w_all = self.ctx.train_data[self.ctx.weight_nme].values
@@ -272,12 +272,12 @@ class XGBTrainer(TrainerBase):
                 loss = log_loss(y_val, y_pred, sample_weight=w_val)
             else:
                 y_pred = clf.predict(X_val)
-                y_pred_safe = np.maximum(y_pred, EPS)
-                loss = mean_tweedie_deviance(
+                loss = regression_loss(
                     y_val,
-                    y_pred_safe,
-                    sample_weight=w_val,
-                    power=tweedie_variance_power,
+                    y_pred,
+                    w_val,
+                    loss_name=loss_name,
+                    tweedie_power=tweedie_variance_power,
                 )
             losses.append(float(loss))
             self._clean_gpu()
@@ -345,4 +345,3 @@ class XGBTrainer(TrainerBase):
         )
         self.ctx.xgb_best = self.model

ins_pricing/modelling/core/bayesopt/utils/losses.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""Loss selection and regression loss utilities."""
+from __future__ import annotations
+from typing import Optional
+import numpy as np
+from ....explain.metrics import (
+    gamma_deviance,
+    poisson_deviance,
+    tweedie_deviance,
+)
+LOSS_ALIASES = {
+    "poisson_deviance": "poisson",
+    "gamma_deviance": "gamma",
+    "tweedie_deviance": "tweedie",
+    "l2": "mse",
+    "l1": "mae",
+    "absolute": "mae",
+    "gaussian": "mse",
+    "normal": "mse",
+}
+REGRESSION_LOSSES = {"tweedie", "poisson", "gamma", "mse", "mae"}
+CLASSIFICATION_LOSSES = {"logloss", "bce"}
+def normalize_loss_name(loss_name: Optional[str], task_type: str) -> str:
+    """Normalize the loss name and validate against supported values."""
+    name = str(loss_name or "auto").strip().lower()
+    if not name or name == "auto":
+        return "auto"
+    name = LOSS_ALIASES.get(name, name)
+    if task_type == "classification":
+        if name not in CLASSIFICATION_LOSSES:
+            raise ValueError(
+                f"Unsupported classification loss '{loss_name}'. "
+                f"Supported: {sorted(CLASSIFICATION_LOSSES)}"
+            )
+    else:
+        if name not in REGRESSION_LOSSES:
+            raise ValueError(
+                f"Unsupported regression loss '{loss_name}'. "
+                f"Supported: {sorted(REGRESSION_LOSSES)}"
+            )
+    return name
+def infer_loss_name_from_model_name(model_name: str) -> str:
+    """Preserve legacy heuristic for loss selection based on model name."""
+    name = str(model_name or "")
+    if "f" in name:
+        return "poisson"
+    if "s" in name:
+        return "gamma"
+    return "tweedie"
+def resolve_tweedie_power(loss_name: str, default: float = 1.5) -> Optional[float]:
+    """Resolve Tweedie power based on loss name."""
+    if loss_name == "poisson":
+        return 1.0
+    if loss_name == "gamma":
+        return 2.0
+    if loss_name == "tweedie":
+        return float(default)
+    return None
+def resolve_xgb_objective(loss_name: str) -> str:
+    """Map regression loss name to XGBoost objective."""
+    name = loss_name if loss_name != "auto" else "tweedie"
+    mapping = {
+        "tweedie": "reg:tweedie",
+        "poisson": "count:poisson",
+        "gamma": "reg:gamma",
+        "mse": "reg:squarederror",
+        "mae": "reg:absoluteerror",
+    }
+    return mapping.get(name, "reg:tweedie")
+def regression_loss(
+    y_true,
+    y_pred,
+    sample_weight=None,
+    *,
+    loss_name: str,
+    tweedie_power: Optional[float] = 1.5,
+    eps: float = 1e-8,
+) -> float:
+    """Compute weighted regression loss based on configured loss name."""
+    name = normalize_loss_name(loss_name, task_type="regression")
+    if name == "auto":
+        name = "tweedie"
+    y_t = np.asarray(y_true, dtype=float).reshape(-1)
+    y_p = np.asarray(y_pred, dtype=float).reshape(-1)
+    w = None if sample_weight is None else np.asarray(sample_weight, dtype=float).reshape(-1)
+    if name == "mse":
+        err = (y_t - y_p) ** 2
+        return _weighted_mean(err, w)
+    if name == "mae":
+        err = np.abs(y_t - y_p)
+        return _weighted_mean(err, w)
+    if name == "poisson":
+        return poisson_deviance(y_t, y_p, sample_weight=w, eps=eps)
+    if name == "gamma":
+        return gamma_deviance(y_t, y_p, sample_weight=w, eps=eps)
+    power = 1.5 if tweedie_power is None else float(tweedie_power)
+    return tweedie_deviance(y_t, y_p, sample_weight=w, power=power, eps=eps)
+def loss_requires_positive(loss_name: str) -> bool:
+    """Return True if the loss requires positive predictions."""
+    return loss_name in {"tweedie", "poisson", "gamma"}
+def _weighted_mean(values: np.ndarray, weight: Optional[np.ndarray]) -> float:
+    if weight is None:
+        return float(np.mean(values))
+    total = float(np.sum(weight))
+    if total <= 0:
+        return float(np.mean(values))
+    return float(np.sum(values * weight) / total)

ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py CHANGED Viewed

@@ -24,7 +24,7 @@ import pandas as pd
 import torch
 import torch.nn as nn
 from torch.nn.parallel import DistributedDataParallel as DDP
-from sklearn.metrics import log_loss, mean_tweedie_deviance
+from sklearn.metrics import log_loss, mean_absolute_error, mean_squared_error, mean_tweedie_deviance
 from sklearn.model_selection import KFold, GroupKFold, TimeSeriesSplit, StratifiedKFold
 # Try to import plotting dependencies
@@ -112,6 +112,7 @@ class MetricFactory:
         self,
         task_type: str = "regression",
         tweedie_power: float = 1.5,
+        loss_name: str = "tweedie",
         clip_min: float = 1e-8,
         clip_max: float = 1 - 1e-8,
     ):
@@ -120,11 +121,13 @@ class MetricFactory:
         Args:
             task_type: Either 'regression' or 'classification'
             tweedie_power: Power parameter for Tweedie deviance (1.0-2.0)
+            loss_name: Regression loss name ('tweedie', 'poisson', 'gamma', 'mse', 'mae')
             clip_min: Minimum value for clipping predictions
             clip_max: Maximum value for clipping predictions (for classification)
         """
         self.task_type = task_type
         self.tweedie_power = tweedie_power
+        self.loss_name = loss_name
         self.clip_min = clip_min
         self.clip_max = clip_max
@@ -151,13 +154,25 @@ class MetricFactory:
             y_pred_clipped = np.clip(y_pred, self.clip_min, self.clip_max)
             return float(log_loss(y_true, y_pred_clipped, sample_weight=sample_weight))
-        # Regression: use Tweedie deviance
+        loss_name = str(self.loss_name or "tweedie").strip().lower()
+        if loss_name in {"mse", "mae"}:
+            if loss_name == "mse":
+                return float(mean_squared_error(
+                    y_true, y_pred, sample_weight=sample_weight))
+            return float(mean_absolute_error(
+                y_true, y_pred, sample_weight=sample_weight))
         y_pred_safe = np.maximum(y_pred, self.clip_min)
+        power = self.tweedie_power
+        if loss_name == "poisson":
+            power = 1.0
+        elif loss_name == "gamma":
+            power = 2.0
         return float(mean_tweedie_deviance(
             y_true,
             y_pred_safe,
             sample_weight=sample_weight,
-            power=self.tweedie_power,
+            power=power,
         ))
     def update_power(self, power: float) -> None:

ins_pricing/modelling/core/bayesopt/utils/torch_trainer_mixin.py CHANGED Viewed

@@ -52,6 +52,12 @@ except Exception:
 # Import from other utils modules
 from .constants import EPS, compute_batch_size, tweedie_loss, ensure_parent_dir
+from .losses import (
+    infer_loss_name_from_model_name,
+    loss_requires_positive,
+    normalize_loss_name,
+    resolve_tweedie_power,
+)
 from .distributed_utils import DistributedUtils
@@ -359,11 +365,26 @@ class TorchTrainerMixin:
         if task == 'classification':
             loss_fn = nn.BCEWithLogitsLoss(reduction='none')
             return loss_fn(y_pred, y_true).view(-1)
+        loss_name = normalize_loss_name(
+            getattr(self, "loss_name", None), task_type="regression"
+        )
+        if loss_name == "auto":
+            loss_name = infer_loss_name_from_model_name(getattr(self, "model_nme", ""))
         if apply_softplus:
             y_pred = F.softplus(y_pred)
-        y_pred = torch.clamp(y_pred, min=1e-6)
-        power = getattr(self, "tw_power", 1.5)
-        return tweedie_loss(y_pred, y_true, p=power).view(-1)
+        if loss_requires_positive(loss_name):
+            y_pred = torch.clamp(y_pred, min=1e-6)
+            power = resolve_tweedie_power(
+                loss_name, default=float(getattr(self, "tw_power", 1.5) or 1.5)
+            )
+            if power is None:
+                power = float(getattr(self, "tw_power", 1.5) or 1.5)
+            return tweedie_loss(y_pred, y_true, p=power).view(-1)
+        if loss_name == "mse":
+            return (y_pred - y_true).pow(2).view(-1)
+        if loss_name == "mae":
+            return (y_pred - y_true).abs().view(-1)
+        raise ValueError(f"Unsupported loss_name '{loss_name}' for regression.")
     def _compute_weighted_loss(self, y_pred, y_true, weights, apply_softplus: bool = False):
         """Compute weighted loss."""

ins-pricing 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl

ins-pricing 0.3.3py3-none-any.whl → 0.4.0py3-none-any.whl