PyPI - ins-pricing - Versions diffs - 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

ins-pricing 0.4.5py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

ins_pricing/README.md +48 -22
ins_pricing/__init__.py +142 -90
ins_pricing/cli/BayesOpt_entry.py +58 -46
ins_pricing/cli/BayesOpt_incremental.py +77 -110
ins_pricing/cli/Explain_Run.py +42 -23
ins_pricing/cli/Explain_entry.py +551 -577
ins_pricing/cli/Pricing_Run.py +42 -23
ins_pricing/cli/bayesopt_entry_runner.py +51 -16
ins_pricing/cli/utils/bootstrap.py +23 -0
ins_pricing/cli/utils/cli_common.py +256 -256
ins_pricing/cli/utils/cli_config.py +379 -360
ins_pricing/cli/utils/import_resolver.py +375 -358
ins_pricing/cli/utils/notebook_utils.py +256 -242
ins_pricing/cli/watchdog_run.py +216 -198
ins_pricing/frontend/__init__.py +10 -10
ins_pricing/frontend/app.py +132 -61
ins_pricing/frontend/config_builder.py +33 -0
ins_pricing/frontend/example_config.json +11 -0
ins_pricing/frontend/example_workflows.py +1 -1
ins_pricing/frontend/runner.py +340 -388
ins_pricing/governance/__init__.py +20 -20
ins_pricing/governance/release.py +159 -159
ins_pricing/modelling/README.md +1 -1
ins_pricing/modelling/__init__.py +147 -92
ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +31 -13
ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +12 -0
ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +589 -552
ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +987 -958
ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +488 -548
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +349 -342
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +921 -913
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +794 -785
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +454 -446
ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1294 -1282
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +64 -56
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +203 -198
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +333 -325
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +279 -267
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +515 -313
ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +193 -186
ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +636 -623
ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
ins_pricing/modelling/explain/__init__.py +55 -55
ins_pricing/modelling/explain/metrics.py +27 -174
ins_pricing/modelling/explain/permutation.py +237 -237
ins_pricing/modelling/plotting/__init__.py +40 -36
ins_pricing/modelling/plotting/compat.py +228 -0
ins_pricing/modelling/plotting/curves.py +572 -572
ins_pricing/modelling/plotting/diagnostics.py +163 -163
ins_pricing/modelling/plotting/geo.py +362 -362
ins_pricing/modelling/plotting/importance.py +121 -121
ins_pricing/pricing/__init__.py +27 -27
ins_pricing/pricing/factors.py +67 -56
ins_pricing/production/__init__.py +35 -25
ins_pricing/production/{predict.py → inference.py} +140 -57
ins_pricing/production/monitoring.py +8 -21
ins_pricing/reporting/__init__.py +11 -11
ins_pricing/setup.py +1 -1
ins_pricing/tests/production/test_inference.py +90 -0
ins_pricing/utils/__init__.py +112 -78
ins_pricing/utils/device.py +258 -237
ins_pricing/utils/features.py +53 -0
ins_pricing/utils/io.py +72 -0
ins_pricing/utils/logging.py +34 -1
ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
ins_pricing/utils/metrics.py +158 -24
ins_pricing/utils/numerics.py +76 -0
ins_pricing/utils/paths.py +9 -1
ins_pricing/utils/profiling.py +8 -4
{ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/METADATA +1 -1
ins_pricing-0.5.1.dist-info/RECORD +132 -0
ins_pricing/modelling/core/BayesOpt.py +0 -146
ins_pricing/modelling/core/__init__.py +0 -1
ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
ins_pricing/modelling/core/bayesopt/utils.py +0 -105
ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
ins_pricing/tests/production/test_predict.py +0 -233
ins_pricing-0.4.5.dist-info/RECORD +0 -130
{ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/WHEEL +0 -0
{ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/top_level.txt +0 -0

ins_pricing/utils/logging.py CHANGED Viewed

@@ -18,7 +18,7 @@ from __future__ import annotations
 import logging
 import os
 from functools import lru_cache
-from typing import Optional
+from typing import Optional, Union
 @lru_cache(maxsize=1)
@@ -72,3 +72,36 @@ def configure_logging(
         formatter = logging.Formatter(format_string)
         for handler in logger.handlers:
             handler.setFormatter(formatter)
+def log_print(
+    logger: logging.Logger,
+    *args,
+    level: Optional[Union[int, str]] = None,
+    **kwargs,
+) -> None:
+    """Print-like helper that routes messages to a logger.
+    This preserves basic print semantics (sep/end) while ignoring file/flush,
+    and it auto-detects severity when level is not provided.
+    """
+    sep = kwargs.get("sep", " ")
+    msg = sep.join(str(arg) for arg in args)
+    if not msg:
+        return
+    if level is None:
+        lowered = msg.lstrip().lower()
+        if lowered.startswith(("warn", "[warn]", "warning")):
+            level_value = logging.WARNING
+        elif lowered.startswith(("error", "[error]", "err")):
+            level_value = logging.ERROR
+        else:
+            level_value = logging.INFO
+    else:
+        if isinstance(level, str):
+            level_value = getattr(logging, level.upper(), logging.INFO)
+        else:
+            level_value = int(level)
+    logger.log(level_value, msg)

ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py RENAMED Viewed

@@ -1,129 +1,125 @@
-"""Loss selection and regression loss utilities."""
-from __future__ import annotations
-from typing import Optional
-import numpy as np
-from ....explain.metrics import (
-    gamma_deviance,
-    poisson_deviance,
-    tweedie_deviance,
-)
-LOSS_ALIASES = {
-    "poisson_deviance": "poisson",
-    "gamma_deviance": "gamma",
-    "tweedie_deviance": "tweedie",
-    "l2": "mse",
-    "l1": "mae",
-    "absolute": "mae",
-    "gaussian": "mse",
-    "normal": "mse",
-}
-REGRESSION_LOSSES = {"tweedie", "poisson", "gamma", "mse", "mae"}
-CLASSIFICATION_LOSSES = {"logloss", "bce"}
-def normalize_loss_name(loss_name: Optional[str], task_type: str) -> str:
-    """Normalize the loss name and validate against supported values."""
-    name = str(loss_name or "auto").strip().lower()
-    if not name or name == "auto":
-        return "auto"
-    name = LOSS_ALIASES.get(name, name)
-    if task_type == "classification":
-        if name not in CLASSIFICATION_LOSSES:
-            raise ValueError(
-                f"Unsupported classification loss '{loss_name}'. "
-                f"Supported: {sorted(CLASSIFICATION_LOSSES)}"
-            )
-    else:
-        if name not in REGRESSION_LOSSES:
-            raise ValueError(
-                f"Unsupported regression loss '{loss_name}'. "
-                f"Supported: {sorted(REGRESSION_LOSSES)}"
-            )
-    return name
-def infer_loss_name_from_model_name(model_name: str) -> str:
-    """Preserve legacy heuristic for loss selection based on model name."""
-    name = str(model_name or "")
-    if "f" in name:
-        return "poisson"
-    if "s" in name:
-        return "gamma"
-    return "tweedie"
-def resolve_tweedie_power(loss_name: str, default: float = 1.5) -> Optional[float]:
-    """Resolve Tweedie power based on loss name."""
-    if loss_name == "poisson":
-        return 1.0
-    if loss_name == "gamma":
-        return 2.0
-    if loss_name == "tweedie":
-        return float(default)
-    return None
-def resolve_xgb_objective(loss_name: str) -> str:
-    """Map regression loss name to XGBoost objective."""
-    name = loss_name if loss_name != "auto" else "tweedie"
-    mapping = {
-        "tweedie": "reg:tweedie",
-        "poisson": "count:poisson",
-        "gamma": "reg:gamma",
-        "mse": "reg:squarederror",
-        "mae": "reg:absoluteerror",
-    }
-    return mapping.get(name, "reg:tweedie")
-def regression_loss(
-    y_true,
-    y_pred,
-    sample_weight=None,
-    *,
-    loss_name: str,
-    tweedie_power: Optional[float] = 1.5,
-    eps: float = 1e-8,
-) -> float:
-    """Compute weighted regression loss based on configured loss name."""
-    name = normalize_loss_name(loss_name, task_type="regression")
-    if name == "auto":
-        name = "tweedie"
-    y_t = np.asarray(y_true, dtype=float).reshape(-1)
-    y_p = np.asarray(y_pred, dtype=float).reshape(-1)
-    w = None if sample_weight is None else np.asarray(sample_weight, dtype=float).reshape(-1)
-    if name == "mse":
-        err = (y_t - y_p) ** 2
-        return _weighted_mean(err, w)
-    if name == "mae":
-        err = np.abs(y_t - y_p)
-        return _weighted_mean(err, w)
-    if name == "poisson":
-        return poisson_deviance(y_t, y_p, sample_weight=w, eps=eps)
-    if name == "gamma":
-        return gamma_deviance(y_t, y_p, sample_weight=w, eps=eps)
-    power = 1.5 if tweedie_power is None else float(tweedie_power)
-    return tweedie_deviance(y_t, y_p, sample_weight=w, power=power, eps=eps)
-def loss_requires_positive(loss_name: str) -> bool:
-    """Return True if the loss requires positive predictions."""
-    return loss_name in {"tweedie", "poisson", "gamma"}
-def _weighted_mean(values: np.ndarray, weight: Optional[np.ndarray]) -> float:
-    if weight is None:
-        return float(np.mean(values))
-    total = float(np.sum(weight))
-    if total <= 0:
-        return float(np.mean(values))
-    return float(np.sum(values * weight) / total)
+"""Loss selection and regression loss utilities."""
+from __future__ import annotations
+from typing import Optional
+import numpy as np
+from ins_pricing.utils.metrics import gamma_deviance, poisson_deviance, tweedie_deviance
+LOSS_ALIASES = {
+    "poisson_deviance": "poisson",
+    "gamma_deviance": "gamma",
+    "tweedie_deviance": "tweedie",
+    "l2": "mse",
+    "l1": "mae",
+    "absolute": "mae",
+    "gaussian": "mse",
+    "normal": "mse",
+}
+REGRESSION_LOSSES = {"tweedie", "poisson", "gamma", "mse", "mae"}
+CLASSIFICATION_LOSSES = {"logloss", "bce"}
+def normalize_loss_name(loss_name: Optional[str], task_type: str) -> str:
+    """Normalize the loss name and validate against supported values."""
+    name = str(loss_name or "auto").strip().lower()
+    if not name or name == "auto":
+        return "auto"
+    name = LOSS_ALIASES.get(name, name)
+    if task_type == "classification":
+        if name not in CLASSIFICATION_LOSSES:
+            raise ValueError(
+                f"Unsupported classification loss '{loss_name}'. "
+                f"Supported: {sorted(CLASSIFICATION_LOSSES)}"
+            )
+    else:
+        if name not in REGRESSION_LOSSES:
+            raise ValueError(
+                f"Unsupported regression loss '{loss_name}'. "
+                f"Supported: {sorted(REGRESSION_LOSSES)}"
+            )
+    return name
+def infer_loss_name_from_model_name(model_name: str) -> str:
+    """Preserve legacy heuristic for loss selection based on model name."""
+    name = str(model_name or "")
+    if "f" in name:
+        return "poisson"
+    if "s" in name:
+        return "gamma"
+    return "tweedie"
+def resolve_tweedie_power(loss_name: str, default: float = 1.5) -> Optional[float]:
+    """Resolve Tweedie power based on loss name."""
+    if loss_name == "poisson":
+        return 1.0
+    if loss_name == "gamma":
+        return 2.0
+    if loss_name == "tweedie":
+        return float(default)
+    return None
+def resolve_xgb_objective(loss_name: str) -> str:
+    """Map regression loss name to XGBoost objective."""
+    name = loss_name if loss_name != "auto" else "tweedie"
+    mapping = {
+        "tweedie": "reg:tweedie",
+        "poisson": "count:poisson",
+        "gamma": "reg:gamma",
+        "mse": "reg:squarederror",
+        "mae": "reg:absoluteerror",
+    }
+    return mapping.get(name, "reg:tweedie")
+def regression_loss(
+    y_true,
+    y_pred,
+    sample_weight=None,
+    *,
+    loss_name: str,
+    tweedie_power: Optional[float] = 1.5,
+    eps: float = 1e-8,
+) -> float:
+    """Compute weighted regression loss based on configured loss name."""
+    name = normalize_loss_name(loss_name, task_type="regression")
+    if name == "auto":
+        name = "tweedie"
+    y_t = np.asarray(y_true, dtype=float).reshape(-1)
+    y_p = np.asarray(y_pred, dtype=float).reshape(-1)
+    w = None if sample_weight is None else np.asarray(sample_weight, dtype=float).reshape(-1)
+    if name == "mse":
+        err = (y_t - y_p) ** 2
+        return _weighted_mean(err, w)
+    if name == "mae":
+        err = np.abs(y_t - y_p)
+        return _weighted_mean(err, w)
+    if name == "poisson":
+        return poisson_deviance(y_t, y_p, sample_weight=w, eps=eps)
+    if name == "gamma":
+        return gamma_deviance(y_t, y_p, sample_weight=w, eps=eps)
+    power = 1.5 if tweedie_power is None else float(tweedie_power)
+    return tweedie_deviance(y_t, y_p, sample_weight=w, power=power, eps=eps)
+def loss_requires_positive(loss_name: str) -> bool:
+    """Return True if the loss requires positive predictions."""
+    return loss_name in {"tweedie", "poisson", "gamma"}
+def _weighted_mean(values: np.ndarray, weight: Optional[np.ndarray]) -> float:
+    if weight is None:
+        return float(np.mean(values))
+    total = float(np.sum(weight))
+    if total <= 0:
+        return float(np.mean(values))
+    return float(np.sum(values * weight) / total)

ins_pricing/utils/metrics.py CHANGED Viewed

@@ -3,7 +3,7 @@
 This module consolidates metric computation used across:
 - pricing/monitoring.py: PSI for feature drift
 - production/drift.py: PSI wrapper for production monitoring
-- modelling/core/bayesopt/: Model evaluation metrics
+- modelling/bayesopt/: Model evaluation metrics
 Example:
     >>> from ins_pricing.utils import psi_report, MetricFactory
@@ -16,23 +16,15 @@ Example:
 from __future__ import annotations
-from typing import Any, Iterable, List, Optional
+from typing import Iterable, List, Optional
 import numpy as np
 import pandas as pd
 try:
-    from sklearn.metrics import (
-        log_loss,
-        mean_absolute_error,
-        mean_squared_error,
-        mean_tweedie_deviance,
-    )
-except ImportError:
-    log_loss = None
-    mean_absolute_error = None
-    mean_squared_error = None
-    mean_tweedie_deviance = None
+    from sklearn.metrics import roc_auc_score
+except Exception:  # pragma: no cover - optional dependency
+    roc_auc_score = None
 # =============================================================================
@@ -190,6 +182,152 @@ def psi_report(
 # =============================================================================
+def _to_numpy(arr) -> np.ndarray:
+    out = np.asarray(arr, dtype=float)
+    return out.reshape(-1)
+def _align(y_true, y_pred, sample_weight=None):
+    y_t = _to_numpy(y_true)
+    y_p = _to_numpy(y_pred)
+    if y_t.shape[0] != y_p.shape[0]:
+        raise ValueError("y_true and y_pred must have the same length.")
+    if sample_weight is None:
+        return y_t, y_p, None
+    w = _to_numpy(sample_weight)
+    if w.shape[0] != y_t.shape[0]:
+        raise ValueError("sample_weight must have the same length as y_true.")
+    return y_t, y_p, w
+def _weighted_mean(values: np.ndarray, weight: Optional[np.ndarray]) -> float:
+    if weight is None:
+        return float(np.mean(values))
+    total = float(np.sum(weight))
+    if total <= 0:
+        return float(np.mean(values))
+    return float(np.sum(values * weight) / total)
+def rmse(y_true, y_pred, sample_weight=None) -> float:
+    y_t, y_p, w = _align(y_true, y_pred, sample_weight)
+    err = (y_t - y_p) ** 2
+    return float(np.sqrt(_weighted_mean(err, w)))
+def mae(y_true, y_pred, sample_weight=None) -> float:
+    y_t, y_p, w = _align(y_true, y_pred, sample_weight)
+    err = np.abs(y_t - y_p)
+    return _weighted_mean(err, w)
+def mape(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
+    y_t, y_p, w = _align(y_true, y_pred, sample_weight)
+    denom = np.maximum(np.abs(y_t), eps)
+    err = np.abs((y_t - y_p) / denom)
+    return _weighted_mean(err, w)
+def r2_score(y_true, y_pred, sample_weight=None) -> float:
+    y_t, y_p, w = _align(y_true, y_pred, sample_weight)
+    if w is None:
+        y_mean = float(np.mean(y_t))
+        sse = float(np.sum((y_t - y_p) ** 2))
+        sst = float(np.sum((y_t - y_mean) ** 2))
+    else:
+        w_sum = float(np.sum(w))
+        y_mean = float(np.sum(w * y_t) / w_sum) if w_sum > 0 else float(np.mean(y_t))
+        sse = float(np.sum(w * (y_t - y_p) ** 2))
+        sst = float(np.sum(w * (y_t - y_mean) ** 2))
+    if sst <= 0:
+        return 0.0
+    return 1.0 - sse / sst
+def logloss(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
+    y_t, y_p, w = _align(y_true, y_pred, sample_weight)
+    p = np.clip(y_p, eps, 1 - eps)
+    loss = -(y_t * np.log(p) + (1 - y_t) * np.log(1 - p))
+    return _weighted_mean(loss, w)
+def tweedie_deviance(
+    y_true,
+    y_pred,
+    sample_weight=None,
+    *,
+    power: float = 1.5,
+    eps: float = 1e-8,
+) -> float:
+    if power < 0:
+        raise ValueError("power must be >= 0.")
+    y_t, y_p, w = _align(y_true, y_pred, sample_weight)
+    y_p = np.clip(y_p, eps, None)
+    y_t_safe = np.clip(y_t, eps, None)
+    if power == 0:
+        dev = (y_t - y_p) ** 2
+    elif power == 1:
+        dev = 2 * (y_t_safe * np.log(y_t_safe / y_p) - (y_t_safe - y_p))
+    elif power == 2:
+        ratio = y_t_safe / y_p
+        dev = 2 * ((ratio - 1) - np.log(ratio))
+    else:
+        term1 = np.power(y_t_safe, 2 - power) / ((1 - power) * (2 - power))
+        term2 = y_t_safe * np.power(y_p, 1 - power) / (1 - power)
+        term3 = np.power(y_p, 2 - power) / (2 - power)
+        dev = 2 * (term1 - term2 + term3)
+    return _weighted_mean(dev, w)
+def poisson_deviance(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
+    return tweedie_deviance(
+        y_true, y_pred, sample_weight=sample_weight, power=1.0, eps=eps
+    )
+def gamma_deviance(y_true, y_pred, sample_weight=None, eps: float = 1e-8) -> float:
+    return tweedie_deviance(
+        y_true, y_pred, sample_weight=sample_weight, power=2.0, eps=eps
+    )
+def auc_score(y_true, y_pred, sample_weight=None) -> float:
+    if roc_auc_score is None:
+        raise RuntimeError("auc requires scikit-learn.")
+    y_t, y_p, w = _align(y_true, y_pred, sample_weight)
+    return float(roc_auc_score(y_t, y_p, sample_weight=w))
+def resolve_metric(metric, *, task_type: Optional[str] = None, higher_is_better: Optional[bool] = None):
+    if callable(metric):
+        if higher_is_better is None:
+            raise ValueError("higher_is_better must be provided for custom metric.")
+        return metric, bool(higher_is_better), getattr(metric, "__name__", "custom")
+    name = str(metric or "auto").lower()
+    if name == "auto":
+        name = "logloss" if task_type == "classification" else "rmse"
+    mapping = {
+        "rmse": (rmse, False),
+        "mae": (mae, False),
+        "mape": (mape, False),
+        "r2": (r2_score, True),
+        "logloss": (logloss, False),
+        "poisson": (poisson_deviance, False),
+        "gamma": (gamma_deviance, False),
+        "tweedie": (tweedie_deviance, False),
+        "auc": (auc_score, True),
+    }
+    if name not in mapping:
+        raise ValueError(f"Unsupported metric: {metric}")
+    fn, hib = mapping[name]
+    if higher_is_better is not None:
+        hib = bool(higher_is_better)
+    return fn, hib, name
 class MetricFactory:
     """Factory for computing evaluation metrics consistently across all trainers.
@@ -240,25 +378,21 @@ class MetricFactory:
         Returns:
             Computed metric value (lower is better)
         """
-        if log_loss is None or mean_tweedie_deviance is None:
-            raise ImportError("sklearn is required for metric computation")
         y_pred = np.asarray(y_pred)
         y_true = np.asarray(y_true)
         if self.task_type == "classification":
             y_pred_clipped = np.clip(y_pred, self.clip_min, self.clip_max)
-            return float(log_loss(y_true, y_pred_clipped, sample_weight=sample_weight))
+            return float(logloss(y_true, y_pred_clipped, sample_weight=sample_weight))
         loss_name = str(self.loss_name or "tweedie").strip().lower()
         if loss_name in {"mse", "mae"}:
-            if mean_squared_error is None or mean_absolute_error is None:
-                raise ImportError("sklearn is required for metric computation")
+            y_t, y_p, w = _align(y_true, y_pred, sample_weight)
             if loss_name == "mse":
-                return float(mean_squared_error(
-                    y_true, y_pred, sample_weight=sample_weight))
-            return float(mean_absolute_error(
-                y_true, y_pred, sample_weight=sample_weight))
+                err = (y_t - y_p) ** 2
+                return _weighted_mean(err, w)
+            err = np.abs(y_t - y_p)
+            return _weighted_mean(err, w)
         y_pred_safe = np.maximum(y_pred, self.clip_min)
         power = self.tweedie_power
@@ -267,7 +401,7 @@ class MetricFactory:
         elif loss_name == "gamma":
             power = 2.0
         return float(
-            mean_tweedie_deviance(
+            tweedie_deviance(
                 y_true,
                 y_pred_safe,
                 sample_weight=sample_weight,

ins_pricing/utils/numerics.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""Numerical utilities shared across ins_pricing.
+This module centralizes small, dependency-light numerical helpers so that
+other subpackages can reuse them without importing bayesopt-specific code.
+"""
+from __future__ import annotations
+import random
+import numpy as np
+try:
+    import torch
+    _TORCH_AVAILABLE = True
+except Exception:  # pragma: no cover - optional dependency
+    torch = None  # type: ignore[assignment]
+    _TORCH_AVAILABLE = False
+EPS = 1e-8
+"""Small epsilon value for numerical stability."""
+def set_global_seed(seed: int) -> None:
+    """Set random seed for reproducibility across numpy/python/torch."""
+    random.seed(seed)
+    np.random.seed(seed)
+    if _TORCH_AVAILABLE:
+        torch.manual_seed(seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(seed)
+def compute_batch_size(data_size: int, learning_rate: float, batch_num: int, minimum: int) -> int:
+    """Compute adaptive batch size based on data size and learning rate."""
+    estimated = int((learning_rate / 1e-4) ** 0.5 * (data_size / max(batch_num, 1)))
+    return max(1, min(int(data_size), max(int(minimum), estimated)))
+def tweedie_loss(
+    pred,
+    target,
+    *,
+    p: float = 1.5,
+    eps: float = 1e-6,
+    max_clip: float = 1e6,
+):
+    """Compute Tweedie deviance loss for PyTorch tensors."""
+    if not _TORCH_AVAILABLE:
+        raise ImportError("tweedie_loss requires torch to be installed.")
+    pred_clamped = torch.clamp(pred, min=eps)
+    if p == 1:
+        term1 = target * torch.log(target / pred_clamped + eps)
+        term2 = -target + pred_clamped
+        term3 = 0
+    elif p == 0:
+        term1 = 0.5 * torch.pow(target - pred_clamped, 2)
+        term2 = 0
+        term3 = 0
+    elif p == 2:
+        term1 = torch.log(pred_clamped / target + eps)
+        term2 = -target / pred_clamped + 1
+        term3 = 0
+    else:
+        term1 = torch.pow(target, 2 - p) / ((1 - p) * (2 - p))
+        term2 = target * torch.pow(pred_clamped, 1 - p) / (1 - p)
+        term3 = torch.pow(pred_clamped, 2 - p) / (2 - p)
+    return torch.nan_to_num(
+        2 * (term1 - term2 + term3),
+        nan=eps,
+        posinf=max_clip,
+        neginf=-max_clip,
+    )

ins_pricing/utils/paths.py CHANGED Viewed

@@ -217,6 +217,7 @@ def load_dataset(
     data_format: str = "auto",
     dtype_map: Optional[Dict[str, Any]] = None,
     low_memory: bool = False,
+    chunksize: Optional[int] = None,
 ) -> pd.DataFrame:
     """Load a dataset from various formats.
@@ -225,6 +226,7 @@ def load_dataset(
         data_format: Format ('csv', 'parquet', 'feather', 'auto')
         dtype_map: Column type mapping
         low_memory: Whether to use low memory mode for CSV
+        chunksize: Optional chunk size for CSV streaming
     Returns:
         Loaded DataFrame
@@ -238,7 +240,13 @@ def load_dataset(
     elif fmt == "feather":
         df = pd.read_feather(path)
     elif fmt == "csv":
-        df = pd.read_csv(path, low_memory=low_memory, dtype=dtype_map or None)
+        if chunksize is not None:
+            chunks = []
+            for chunk in pd.read_csv(path, low_memory=low_memory, dtype=dtype_map or None, chunksize=chunksize):
+                chunks.append(chunk)
+            df = pd.concat(chunks, ignore_index=True) if chunks else pd.DataFrame()
+        else:
+            df = pd.read_csv(path, low_memory=low_memory, dtype=dtype_map or None)
     else:
         raise ValueError(f"Unsupported data_format: {data_format}")

ins-pricing 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl

ins-pricing 0.4.5py3-none-any.whl → 0.5.1py3-none-any.whl