PyPI - ins-pricing - Versions diffs - 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl - Mend

ins-pricing 0.2.7py3-none-any.whl → 0.2.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

ins_pricing/CHANGELOG.md +179 -0
ins_pricing/RELEASE_NOTES_0.2.8.md +344 -0
ins_pricing/modelling/core/bayesopt/utils.py +2 -1
ins_pricing/modelling/explain/shap_utils.py +209 -6
ins_pricing/pricing/calibration.py +125 -1
ins_pricing/pricing/factors.py +110 -1
ins_pricing/production/preprocess.py +166 -0
ins_pricing/setup.py +1 -1
ins_pricing/tests/governance/__init__.py +1 -0
ins_pricing/tests/governance/test_audit.py +56 -0
ins_pricing/tests/governance/test_registry.py +128 -0
ins_pricing/tests/governance/test_release.py +74 -0
ins_pricing/tests/pricing/__init__.py +1 -0
ins_pricing/tests/pricing/test_calibration.py +72 -0
ins_pricing/tests/pricing/test_exposure.py +64 -0
ins_pricing/tests/pricing/test_factors.py +156 -0
ins_pricing/tests/pricing/test_rate_table.py +40 -0
ins_pricing/tests/production/__init__.py +1 -0
ins_pricing/tests/production/test_monitoring.py +350 -0
ins_pricing/tests/production/test_predict.py +233 -0
ins_pricing/tests/production/test_preprocess.py +339 -0
ins_pricing/tests/production/test_scoring.py +311 -0
ins_pricing/utils/profiling.py +377 -0
ins_pricing/utils/validation.py +427 -0
ins_pricing-0.2.9.dist-info/METADATA +149 -0
{ins_pricing-0.2.7.dist-info → ins_pricing-0.2.9.dist-info}/RECORD +28 -12
ins_pricing/CHANGELOG_20260114.md +0 -275
ins_pricing/CODE_REVIEW_IMPROVEMENTS.md +0 -715
ins_pricing-0.2.7.dist-info/METADATA +0 -101
{ins_pricing-0.2.7.dist-info → ins_pricing-0.2.9.dist-info}/WHEEL +0 -0
{ins_pricing-0.2.7.dist-info → ins_pricing-0.2.9.dist-info}/top_level.txt +0 -0

ins_pricing/production/preprocess.py CHANGED Viewed

@@ -1,3 +1,33 @@
+"""Production preprocessing utilities for applying training-time transformations.
+This module provides functions for loading and applying preprocessing artifacts
+that were saved during model training. It ensures that production data undergoes
+the same transformations as training data.
+Typical workflow:
+    1. Load preprocessing artifacts from training
+    2. Prepare raw features (type conversion, missing value handling)
+    3. Apply full preprocessing pipeline (one-hot encoding, scaling)
+Example:
+    >>> from ins_pricing.production.preprocess import load_preprocess_artifacts, apply_preprocess_artifacts
+    >>>
+    >>> # Load artifacts saved during training
+    >>> artifacts = load_preprocess_artifacts("models/my_model/preprocess_artifacts.json")
+    >>>
+    >>> # Apply to new production data
+    >>> import pandas as pd
+    >>> raw_data = pd.read_csv("new_policies.csv")
+    >>> preprocessed = apply_preprocess_artifacts(raw_data, artifacts)
+    >>>
+    >>> # Now ready for model prediction
+    >>> predictions = model.predict(preprocessed)
+Note:
+    Preprocessing artifacts must match the exact configuration used during training
+    to ensure consistency between training and production predictions.
+"""
 from __future__ import annotations
 import json
@@ -8,6 +38,39 @@ import pandas as pd
 def load_preprocess_artifacts(path: str | Path) -> Dict[str, Any]:
+    """Load preprocessing artifacts from a JSON file.
+    Preprocessing artifacts contain all information needed to transform
+    raw production data the same way as training data, including:
+    - Feature names and types
+    - Categorical feature categories
+    - Numeric feature scaling parameters (mean, scale)
+    - One-hot encoding configuration
+    Args:
+        path: Path to the preprocessing artifacts JSON file
+    Returns:
+        Dictionary containing preprocessing configuration and parameters:
+        - factor_nmes: List of feature column names
+        - cate_list: List of categorical feature names
+        - num_features: List of numeric feature names
+        - cat_categories: Dict mapping categorical features to their categories
+        - numeric_scalers: Dict with scaling parameters (mean, scale) per feature
+        - var_nmes: List of final column names after preprocessing
+        - drop_first: Whether first category was dropped in one-hot encoding
+    Raises:
+        ValueError: If the artifact file is not a valid JSON dictionary
+        FileNotFoundError: If the artifact file does not exist
+    Example:
+        >>> artifacts = load_preprocess_artifacts("models/xgb_model/preprocess.json")
+        >>> print(artifacts.keys())
+        dict_keys(['factor_nmes', 'cate_list', 'num_features', ...])
+        >>> print(artifacts['factor_nmes'])
+        ['age', 'gender', 'region', 'vehicle_age']
+    """
     artifact_path = Path(path)
     payload = json.loads(artifact_path.read_text(encoding="utf-8", errors="replace"))
     if not isinstance(payload, dict):
@@ -16,6 +79,52 @@ def load_preprocess_artifacts(path: str | Path) -> Dict[str, Any]:
 def prepare_raw_features(df: pd.DataFrame, artifacts: Dict[str, Any]) -> pd.DataFrame:
+    """Prepare raw features for preprocessing by handling types and missing values.
+    This function performs initial data preparation:
+    1. Ensures all required features exist (adds missing columns with NA)
+    2. Converts numeric features to numeric type (coercing errors to 0)
+    3. Converts categorical features to proper categorical type
+    4. Applies category constraints from training data
+    Args:
+        df: Raw input DataFrame with policy/claim data
+        artifacts: Preprocessing artifacts from load_preprocess_artifacts()
+                  Must contain: factor_nmes, cate_list, num_features, cat_categories
+    Returns:
+        DataFrame with:
+        - Only feature columns (factor_nmes)
+        - Numeric features as float64
+        - Categorical features as object or Categorical
+        - Missing columns filled with NA
+        - Invalid numeric values filled with 0
+    Example:
+        >>> raw_df = pd.DataFrame({
+        ...     'age': ['25', '30', 'invalid'],
+        ...     'gender': ['M', 'F', 'X'],
+        ...     'missing_feature': [1, 2, 3]
+        ... })
+        >>> artifacts = {
+        ...     'factor_nmes': ['age', 'gender', 'region'],
+        ...     'num_features': ['age'],
+        ...     'cate_list': ['gender', 'region'],
+        ...     'cat_categories': {'gender': ['M', 'F'], 'region': ['North', 'South']}
+        ... }
+        >>> prepared = prepare_raw_features(raw_df, artifacts)
+        >>> print(prepared.dtypes)
+        age        float64
+        gender     category
+        region     object
+        >>> print(prepared['age'].tolist())
+        [25.0, 30.0, 0.0]  # 'invalid' coerced to 0
+    Note:
+        - Missing numeric values are filled with 0 (not NaN)
+        - Unknown categories are kept as-is (handled later in one-hot encoding)
+        - Extra columns in input df are dropped
+    """
     factor_nmes = list(artifacts.get("factor_nmes") or [])
     cate_list = list(artifacts.get("cate_list") or [])
     num_features = set(artifacts.get("num_features") or [])
@@ -42,6 +151,63 @@ def prepare_raw_features(df: pd.DataFrame, artifacts: Dict[str, Any]) -> pd.Data
 def apply_preprocess_artifacts(df: pd.DataFrame, artifacts: Dict[str, Any]) -> pd.DataFrame:
+    """Apply complete preprocessing pipeline to production data.
+    This is the main preprocessing function that applies the full transformation
+    pipeline used during training:
+    1. Prepare raw features (via prepare_raw_features)
+    2. One-hot encode categorical features
+    3. Standardize numeric features using training statistics
+    4. Align columns to match training data exactly
+    The output is ready for model prediction and guaranteed to have the same
+    column structure as the training data.
+    Args:
+        df: Raw input DataFrame with policy/claim data
+        artifacts: Complete preprocessing artifacts dictionary containing:
+            - factor_nmes: Feature names
+            - cate_list: Categorical feature names
+            - num_features: Numeric feature names
+            - cat_categories: Categorical feature categories
+            - numeric_scalers: Dict with 'mean' and 'scale' for each numeric feature
+            - var_nmes: Final column names after preprocessing
+            - drop_first: Whether to drop first category in one-hot encoding
+    Returns:
+        Preprocessed DataFrame ready for model prediction with:
+        - One-hot encoded categorical features
+        - Standardized numeric features: (value - mean) / scale
+        - Exact column structure matching training data
+        - Missing columns filled with 0
+        - dtype: int8 for one-hot columns, float64 for numeric
+    Raises:
+        KeyError: If artifacts are missing required keys
+    Example:
+        >>> # Complete preprocessing pipeline
+        >>> artifacts = load_preprocess_artifacts("models/xgb/preprocess.json")
+        >>> raw_data = pd.DataFrame({
+        ...     'age': [25, 30, 35],
+        ...     'gender': ['M', 'F', 'M'],
+        ...     'region': ['North', 'South', 'East']
+        ... })
+        >>> processed = apply_preprocess_artifacts(raw_data, artifacts)
+        >>> print(processed.shape)
+        (3, 50)  # More columns after one-hot encoding
+        >>> print(processed.columns[:5])
+        Index(['age', 'gender_F', 'gender_M', 'region_East', 'region_North'], dtype='object')
+        >>> # Age is now standardized
+        >>> print(processed['age'].tolist())
+        [-0.52, 0.0, 0.52]  # Standardized values
+    Note:
+        - Categorical features not seen during training will be ignored (dropped in one-hot)
+        - Numeric features are standardized using training mean and std
+        - Output column order matches training data exactly
+        - Use this function for production scoring to ensure consistency
+    """
     cate_list = list(artifacts.get("cate_list") or [])
     num_features = list(artifacts.get("num_features") or [])
     var_nmes = list(artifacts.get("var_nmes") or [])

ins_pricing/setup.py CHANGED Viewed

@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
 setup(
     name="ins_pricing",
-    version="0.2.7",
+    version="0.2.9",
     description="Reusable modelling, pricing, governance, and reporting utilities.",
     author="meishi125478",
     license="Proprietary",

ins_pricing/tests/governance/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Tests for the governance module."""

ins_pricing/tests/governance/test_audit.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""Tests for audit logging module."""
+import pytest
+from datetime import datetime
+from pathlib import Path
+from ins_pricing.exceptions import GovernanceError
+class TestAuditLogging:
+    """Test audit logging functionality."""
+    def test_log_model_action(self, tmp_path):
+        """Test logging a model action."""
+        from ins_pricing.governance.audit import AuditLogger
+        logger = AuditLogger(audit_dir=tmp_path)
+        logger.log(
+            action="model_registered",
+            model_name="test_model",
+            user="test_user",
+            details={"version": "1.0.0"}
+        )
+        logs = logger.get_logs(model_name="test_model")
+        assert len(logs) > 0
+        assert logs[0]["action"] == "model_registered"
+    def test_get_audit_trail(self, tmp_path):
+        """Test retrieving complete audit trail."""
+        from ins_pricing.governance.audit import AuditLogger
+        logger = AuditLogger(audit_dir=tmp_path)
+        # Log multiple actions
+        logger.log("registered", "model_a", "user1")
+        logger.log("trained", "model_a", "user1")
+        logger.log("deployed", "model_a", "user2")
+        trail = logger.get_audit_trail("model_a")
+        assert len(trail) == 3
+        assert trail[-1]["action"] == "deployed"
+    def test_filter_logs_by_date(self, tmp_path):
+        """Test filtering audit logs by date range."""
+        from ins_pricing.governance.audit import AuditLogger
+        logger = AuditLogger(audit_dir=tmp_path)
+        logger.log("action1", "model", "user")
+        # Filter by today
+        today = datetime.now().date()
+        logs = logger.get_logs(start_date=today, end_date=today)
+        assert len(logs) > 0

ins_pricing/tests/governance/test_registry.py ADDED Viewed

@@ -0,0 +1,128 @@
+"""Tests for model registry module."""
+import json
+from pathlib import Path
+import pytest
+from datetime import datetime
+from ins_pricing.exceptions import GovernanceError
+@pytest.fixture
+def sample_model_metadata():
+    """Sample model metadata."""
+    return {
+        "model_name": "test_model_v1",
+        "version": "1.0.0",
+        "created_at": datetime.now().isoformat(),
+        "model_type": "xgboost",
+        "metrics": {"mse": 100.5, "r2": 0.85},
+        "features": ["age", "premium", "region"],
+        "author": "test_user"
+    }
+class TestModelRegistry:
+    """Test model registry functionality."""
+    def test_register_new_model(self, tmp_path, sample_model_metadata):
+        """Test registering a new model."""
+        from ins_pricing.governance.registry import ModelRegistry
+        registry = ModelRegistry(registry_path=tmp_path / "registry.json")
+        registry.register(sample_model_metadata)
+        assert registry.exists(sample_model_metadata["model_name"])
+    def test_duplicate_registration_error(self, tmp_path, sample_model_metadata):
+        """Test error on duplicate model registration."""
+        from ins_pricing.governance.registry import ModelRegistry
+        registry = ModelRegistry(registry_path=tmp_path / "registry.json")
+        registry.register(sample_model_metadata)
+        with pytest.raises(GovernanceError):
+            registry.register(sample_model_metadata)  # Duplicate
+    def test_get_model_metadata(self, tmp_path, sample_model_metadata):
+        """Test retrieving model metadata."""
+        from ins_pricing.governance.registry import ModelRegistry
+        registry = ModelRegistry(registry_path=tmp_path / "registry.json")
+        registry.register(sample_model_metadata)
+        metadata = registry.get(sample_model_metadata["model_name"])
+        assert metadata["version"] == "1.0.0"
+        assert "metrics" in metadata
+    def test_list_all_models(self, tmp_path):
+        """Test listing all registered models."""
+        from ins_pricing.governance.registry import ModelRegistry
+        registry = ModelRegistry(registry_path=tmp_path / "registry.json")
+        registry.register({"model_name": "model_a", "version": "1.0.0"})
+        registry.register({"model_name": "model_b", "version": "2.0.0"})
+        models = registry.list_all()
+        assert len(models) == 2
+        assert any(m["model_name"] == "model_a" for m in models)
+    def test_update_model_metadata(self, tmp_path, sample_model_metadata):
+        """Test updating model metadata."""
+        from ins_pricing.governance.registry import ModelRegistry
+        registry = ModelRegistry(registry_path=tmp_path / "registry.json")
+        registry.register(sample_model_metadata)
+        # Update metrics
+        registry.update(
+            sample_model_metadata["model_name"],
+            {"metrics": {"mse": 95.0, "r2": 0.87}}
+        )
+        updated = registry.get(sample_model_metadata["model_name"])
+        assert updated["metrics"]["mse"] == 95.0
+    def test_delete_model(self, tmp_path, sample_model_metadata):
+        """Test deleting a model from registry."""
+        from ins_pricing.governance.registry import ModelRegistry
+        registry = ModelRegistry(registry_path=tmp_path / "registry.json")
+        registry.register(sample_model_metadata)
+        registry.delete(sample_model_metadata["model_name"])
+        assert not registry.exists(sample_model_metadata["model_name"])
+class TestModelVersioning:
+    """Test model versioning functionality."""
+    def test_register_multiple_versions(self, tmp_path):
+        """Test registering multiple versions of same model."""
+        from ins_pricing.governance.registry import ModelRegistry
+        registry = ModelRegistry(registry_path=tmp_path / "registry.json")
+        registry.register({"model_name": "my_model", "version": "1.0.0"})
+        registry.register({"model_name": "my_model", "version": "1.1.0"})
+        versions = registry.get_versions("my_model")
+        assert len(versions) == 2
+        assert "1.0.0" in versions
+        assert "1.1.0" in versions
+    def test_get_latest_version(self, tmp_path):
+        """Test getting the latest version of a model."""
+        from ins_pricing.governance.registry import ModelRegistry
+        registry = ModelRegistry(registry_path=tmp_path / "registry.json")
+        registry.register({"model_name": "my_model", "version": "1.0.0"})
+        registry.register({"model_name": "my_model", "version": "2.0.0"})
+        latest = registry.get_latest("my_model")
+        assert latest["version"] == "2.0.0"

ins_pricing/tests/governance/test_release.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""Tests for model release module."""
+import pytest
+from pathlib import Path
+from datetime import datetime
+from ins_pricing.exceptions import GovernanceError
+class TestModelRelease:
+    """Test model release workflow."""
+    def test_create_release(self, tmp_path):
+        """Test creating a new model release."""
+        from ins_pricing.governance.release import ReleaseManager
+        manager = ReleaseManager(release_dir=tmp_path)
+        release_id = manager.create_release(
+            model_name="test_model",
+            version="1.0.0",
+            artifacts=["model.pkl", "config.json"]
+        )
+        assert release_id is not None
+        assert manager.release_exists(release_id)
+    def test_get_release_info(self, tmp_path):
+        """Test getting release information."""
+        from ins_pricing.governance.release import ReleaseManager
+        manager = ReleaseManager(release_dir=tmp_path)
+        release_id = manager.create_release(
+            model_name="test_model",
+            version="1.0.0"
+        )
+        info = manager.get_release_info(release_id)
+        assert info["model_name"] == "test_model"
+        assert info["version"] == "1.0.0"
+    def test_promote_release(self, tmp_path):
+        """Test promoting a release to production."""
+        from ins_pricing.governance.release import ReleaseManager
+        manager = ReleaseManager(release_dir=tmp_path)
+        release_id = manager.create_release(
+            model_name="test_model",
+            version="1.0.0"
+        )
+        manager.promote_to_production(release_id)
+        info = manager.get_release_info(release_id)
+        assert info["status"] == "production"
+    def test_rollback_release(self, tmp_path):
+        """Test rolling back a release."""
+        from ins_pricing.governance.release import ReleaseManager
+        manager = ReleaseManager(release_dir=tmp_path)
+        # Create and promote two releases
+        release1 = manager.create_release("test_model", "1.0.0")
+        manager.promote_to_production(release1)
+        release2 = manager.create_release("test_model", "2.0.0")
+        manager.promote_to_production(release2)
+        # Rollback to version 1.0.0
+        manager.rollback_to(release1)
+        current = manager.get_production_release("test_model")
+        assert current["version"] == "1.0.0"

ins_pricing/tests/pricing/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Tests for the pricing module."""

ins_pricing/tests/pricing/test_calibration.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""Tests for pricing calibration module."""
+import numpy as np
+import pandas as pd
+import pytest
+@pytest.fixture
+def sample_model_predictions():
+    """Sample model predictions and actuals."""
+    np.random.seed(42)
+    return pd.DataFrame({
+        "actual_loss": np.random.exponential(500, 1000),
+        "predicted_loss": np.random.exponential(480, 1000),
+        "exposure": np.ones(1000),
+        "premium": np.random.uniform(200, 1000, 1000)
+    })
+class TestGlobalCalibration:
+    """Test global calibration methods."""
+    def test_multiplicative_calibration(self, sample_model_predictions):
+        """Test multiplicative calibration factor."""
+        from ins_pricing.pricing.calibration import calibrate_multiplicative
+        calibration_factor = calibrate_multiplicative(
+            actual=sample_model_predictions["actual_loss"],
+            predicted=sample_model_predictions["predicted_loss"],
+            weights=sample_model_predictions["exposure"]
+        )
+        assert isinstance(calibration_factor, (int, float, np.number))
+        assert calibration_factor > 0
+    def test_additive_calibration(self, sample_model_predictions):
+        """Test additive calibration adjustment."""
+        from ins_pricing.pricing.calibration import calibrate_additive
+        calibration_adjustment = calibrate_additive(
+            actual=sample_model_predictions["actual_loss"],
+            predicted=sample_model_predictions["predicted_loss"],
+            weights=sample_model_predictions["exposure"]
+        )
+        assert isinstance(calibration_adjustment, (int, float, np.number))
+class TestSegmentCalibration:
+    """Test segment-specific calibration."""
+    def test_calibrate_by_segment(self):
+        """Test calibration within segments."""
+        from ins_pricing.pricing.calibration import calibrate_by_segment
+        df = pd.DataFrame({
+            "segment": ["A", "B", "A", "B", "A"] * 200,
+            "actual": np.random.exponential(500, 1000),
+            "predicted": np.random.exponential(480, 1000),
+            "exposure": np.ones(1000)
+        })
+        calibrated = calibrate_by_segment(
+            df,
+            actual_col="actual",
+            pred_col="predicted",
+            segment_col="segment",
+            weight_col="exposure"
+        )
+        assert "calibration_factor" in calibrated.columns
+        assert len(calibrated["segment"].unique()) == 2

ins_pricing/tests/pricing/test_exposure.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""Tests for pricing exposure calculation module."""
+import numpy as np
+import pandas as pd
+import pytest
+from datetime import datetime, timedelta
+@pytest.fixture
+def sample_policy_dates():
+    """Sample policy with start and end dates."""
+    start_date = datetime(2023, 1, 1)
+    return pd.DataFrame({
+        "policy_id": range(100),
+        "start_date": [start_date + timedelta(days=i) for i in range(100)],
+        "end_date": [start_date + timedelta(days=i+365) for i in range(100)],
+        "premium": np.random.uniform(200, 1000, 100)
+    })
+class TestExposureCalculation:
+    """Test exposure calculation functions."""
+    def test_calculate_policy_exposure_years(self, sample_policy_dates):
+        """Test calculating exposure in years."""
+        from ins_pricing.pricing.exposure import compute_exposure
+        df = compute_exposure(
+            sample_policy_dates,
+            start_col="start_date",
+            end_col="end_date",
+            time_unit="years"
+        )
+        assert "exposure" in df.columns
+        assert all(df["exposure"] > 0)
+        assert all(df["exposure"] <= 1.1)  # Roughly 1 year
+    def test_calculate_policy_exposure_days(self, sample_policy_dates):
+        """Test calculating exposure in days."""
+        from ins_pricing.pricing.exposure import compute_exposure
+        df = compute_exposure(
+            sample_policy_dates,
+            start_col="start_date",
+            end_col="end_date",
+            time_unit="days"
+        )
+        assert all(df["exposure"] >= 365)
+        assert all(df["exposure"] <= 366)
+    def test_partial_period_exposure(self):
+        """Test exposure for partial periods."""
+        from ins_pricing.pricing.exposure import compute_exposure
+        df = pd.DataFrame({
+            "start_date": [datetime(2023, 1, 1)],
+            "end_date": [datetime(2023, 6, 30)]  # 6 months
+        })
+        result = compute_exposure(df, "start_date", "end_date", time_unit="years")
+        assert 0.48 < result["exposure"].iloc[0] < 0.52  # Roughly 0.5 years

ins-pricing 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

ins-pricing 0.2.7py3-none-any.whl → 0.2.9py3-none-any.whl