PyPI - ins-pricing - Versions diffs - 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

ins-pricing 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

ins_pricing/README.md +74 -56
ins_pricing/__init__.py +142 -90
ins_pricing/cli/BayesOpt_entry.py +52 -50
ins_pricing/cli/BayesOpt_incremental.py +832 -898
ins_pricing/cli/Explain_Run.py +31 -23
ins_pricing/cli/Explain_entry.py +532 -579
ins_pricing/cli/Pricing_Run.py +31 -23
ins_pricing/cli/bayesopt_entry_runner.py +1440 -1438
ins_pricing/cli/utils/cli_common.py +256 -256
ins_pricing/cli/utils/cli_config.py +375 -375
ins_pricing/cli/utils/import_resolver.py +382 -365
ins_pricing/cli/utils/notebook_utils.py +340 -340
ins_pricing/cli/watchdog_run.py +209 -201
ins_pricing/frontend/README.md +573 -419
ins_pricing/frontend/__init__.py +10 -10
ins_pricing/frontend/config_builder.py +1 -0
ins_pricing/frontend/example_workflows.py +1 -1
ins_pricing/governance/__init__.py +20 -20
ins_pricing/governance/release.py +159 -159
ins_pricing/modelling/README.md +67 -0
ins_pricing/modelling/__init__.py +147 -92
ins_pricing/modelling/bayesopt/README.md +59 -0
ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -550
ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -962
ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
ins_pricing/modelling/explain/__init__.py +55 -55
ins_pricing/modelling/explain/metrics.py +27 -174
ins_pricing/modelling/explain/permutation.py +237 -237
ins_pricing/modelling/plotting/__init__.py +40 -36
ins_pricing/modelling/plotting/compat.py +228 -0
ins_pricing/modelling/plotting/curves.py +572 -572
ins_pricing/modelling/plotting/diagnostics.py +163 -163
ins_pricing/modelling/plotting/geo.py +362 -362
ins_pricing/modelling/plotting/importance.py +121 -121
ins_pricing/pricing/__init__.py +27 -27
ins_pricing/production/__init__.py +35 -25
ins_pricing/production/{predict.py → inference.py} +140 -57
ins_pricing/production/monitoring.py +8 -21
ins_pricing/reporting/__init__.py +11 -11
ins_pricing/setup.py +1 -1
ins_pricing/tests/production/test_inference.py +90 -0
ins_pricing/utils/__init__.py +116 -83
ins_pricing/utils/device.py +255 -255
ins_pricing/utils/features.py +53 -0
ins_pricing/utils/io.py +72 -0
ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
ins_pricing/utils/metrics.py +158 -24
ins_pricing/utils/numerics.py +76 -0
ins_pricing/utils/paths.py +9 -1
{ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +55 -35
ins_pricing-0.5.0.dist-info/RECORD +131 -0
ins_pricing/CHANGELOG.md +0 -272
ins_pricing/RELEASE_NOTES_0.2.8.md +0 -344
ins_pricing/docs/LOSS_FUNCTIONS.md +0 -78
ins_pricing/docs/modelling/BayesOpt_USAGE.md +0 -945
ins_pricing/docs/modelling/README.md +0 -34
ins_pricing/frontend/QUICKSTART.md +0 -152
ins_pricing/modelling/core/BayesOpt.py +0 -146
ins_pricing/modelling/core/__init__.py +0 -1
ins_pricing/modelling/core/bayesopt/PHASE2_REFACTORING_SUMMARY.md +0 -449
ins_pricing/modelling/core/bayesopt/PHASE3_REFACTORING_SUMMARY.md +0 -406
ins_pricing/modelling/core/bayesopt/REFACTORING_SUMMARY.md +0 -247
ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
ins_pricing/modelling/core/bayesopt/utils.py +0 -105
ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
ins_pricing/tests/production/test_predict.py +0 -233
ins_pricing-0.4.4.dist-info/RECORD +0 -137
/ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
/ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
/ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
{ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
{ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0

ins_pricing/RELEASE_NOTES_0.2.8.md DELETED Viewed

@@ -1,344 +0,0 @@
-# Release Notes: ins_pricing v0.2.8
-**Release Date:** January 14, 2026
-**Type:** Minor Release (Quality & Performance Improvements)
----
-## 🎯 Overview
-Version 0.2.8 is a significant quality and performance improvement release that focuses on:
-- **Code quality and maintainability**
-- **Performance optimization** (3-6x faster SHAP, 30-40% memory reduction)
-- **Comprehensive documentation**
-- **Extensive test coverage** (35% → 60%+)
-**All changes are backward compatible.** No breaking changes.
----
-## ⭐ Highlights
-### 1. 🚀 Performance Optimizations
-#### SHAP Parallelization (3-6x Speedup)
-```python
-# Before (slow - serial processing)
-result = compute_shap_xgb(ctx, n_samples=200)  # ~10 minutes
-# After (fast - parallel processing)
-result = compute_shap_xgb(ctx, n_samples=200, use_parallel=True)  # ~2 minutes
-```
-**Impact:** 3-6x faster on multi-core systems for n_samples > 100
-#### Memory Optimization (30-40% Reduction)
-- DatasetPreprocessor reduces unnecessary DataFrame copies
-- Conditional copying only when needed
-- Direct reference assignment where safe
-#### Binning Cache (5-10x Speedup)
-```python
-from ins_pricing.pricing.factors import get_cache_info, clear_binning_cache
-# Automatic caching for repeated binning
-factor_table = build_factor_table(df, factor_col='age', n_bins=10)  # Cached!
-# Check cache performance
-info = get_cache_info()
-print(f"Cache hit rate: {info['hits'] / (info['hits'] + info['misses']):.1%}")
-```
----
-### 2. 🛠️ New Utility Modules
-#### Data Validation Toolkit
-```python
-from ins_pricing.utils.validation import (
-    validate_required_columns,
-    validate_column_types,
-    validate_value_range,
-    validate_no_nulls,
-    validate_positive
-)
-# Validate DataFrame structure
-validate_required_columns(df, ['age', 'premium', 'exposure'], df_name='policy_data')
-# Validate data types
-df = validate_column_types(df, {'age': 'int64', 'premium': 'float64'}, coerce=True)
-# Validate value ranges
-validate_value_range(df, 'age', min_val=0, max_val=120)
-validate_positive(df, ['premium', 'exposure'], allow_zero=False)
-```
-#### Performance Profiling
-```python
-from ins_pricing.utils.profiling import profile_section, MemoryMonitor
-# Simple profiling
-with profile_section("Data Processing", logger):
-    process_large_dataset()
-# Output: [Profile] Data Processing: 5.23s, RAM: +1250.3MB, GPU peak: 2048.5MB
-# Memory monitoring with auto-cleanup
-with MemoryMonitor("Training", threshold_gb=16.0, logger=logger):
-    train_model()
-```
----
-### 3. 📚 Documentation Overhaul
-#### Complete Module Documentation
-- **production/preprocess.py**: Module + 3 functions fully documented
-- **pricing/calibration.py**: Module + 2 functions with business context
-- All docs include practical examples and business rationale
-#### Example Quality
-```python
-def fit_calibration_factor(pred, actual, *, weight=None, target_lr=None):
-    """Fit a scalar calibration factor to align predictions with actuals.
-    This function computes a multiplicative calibration factor...
-    Args:
-        pred: Model predictions (premiums or pure premiums)
-        actual: Actual observed values (claims or losses)
-        weight: Optional weights (e.g., exposure, earned premium)
-        target_lr: Target loss ratio to achieve (0 < target_lr < 1)
-    Returns:
-        Calibration factor (scalar multiplier)
-    Example:
-        >>> # Calibrate to achieve 70% loss ratio
-        >>> pred_premium = np.array([100, 150, 200])
-        >>> actual_claims = np.array([75, 100, 130])
-        >>> factor = fit_calibration_factor(pred_premium, actual_claims, target_lr=0.70)
-        >>> print(f"{factor:.3f}")
-        1.143  # Adjust premiums to achieve 70% loss ratio
-    Note:
-        - target_lr typically in range [0.5, 0.9] for insurance pricing
-    """
-```
----
-### 4. 🧪 Test Coverage Expansion
-#### New Test Suites
-- **tests/production/** (247 scenarios)
-  - Prediction, scoring, monitoring, preprocessing
-- **tests/pricing/** (60+ scenarios)
-  - Factors, exposure, calibration, rate tables
-- **tests/governance/** (40+ scenarios)
-  - Registry, release, audit workflows
-#### Coverage Increase
-- **Before:** 35% overall coverage
-- **After:** 60%+ overall coverage
-- **Impact:** Better reliability, fewer production bugs
----
-## 📦 What's New
-### Added
-#### Core Utilities
-- `utils/validation.py` - 8 validation functions for data quality
-- `utils/profiling.py` - Performance and memory monitoring tools
-- `pricing/factors.py` - LRU caching for binning operations
-#### Test Coverage
-- 11 new test files with 250+ test scenarios
-- Complete coverage for production, pricing, governance modules
-#### Documentation
-- Module-level docstrings with business context
-- 150+ lines of comprehensive documentation
-- 8+ complete working examples
-### Enhanced
-#### SHAP Computation
-- Parallel processing support via joblib
-- Automatic batch size optimization
-- Graceful fallback if joblib unavailable
-- All SHAP functions support `use_parallel=True`
-#### Configuration Validation
-- BayesOptConfig with comprehensive `__post_init__` validation
-- Clear error messages for configuration issues
-- Validation of distributed training settings
-### Performance
-| Feature | Before | After | Improvement |
-|---------|--------|-------|-------------|
-| SHAP (200 samples) | 10 min | 2-3 min | **3-6x faster** |
-| Preprocessing memory | 2.5 GB | 1.5 GB | **40% reduction** |
-| Repeated binning | 5.2s | 0.5s | **10x faster** |
----
-## 🔄 Migration Guide
-### No Breaking Changes
-All changes are **backward compatible**. Existing code will continue to work without modifications.
-### Opt-in Features
-New features are opt-in and don't affect existing behavior:
-```python
-# SHAP parallelization - opt-in
-result = compute_shap_xgb(ctx, use_parallel=True)  # New parameter
-# Binning cache - automatic, but can be disabled
-binned = bin_numeric(series, bins=10, use_cache=False)  # Opt-out if needed
-```
-### Recommended Updates
-While not required, consider adopting these improvements:
-#### 1. Enable Parallel SHAP (if using SHAP)
-```python
-# Before
-shap_result = compute_shap_xgb(ctx, n_samples=200)
-# After (recommended for n_samples > 100)
-shap_result = compute_shap_xgb(ctx, n_samples=200, use_parallel=True, n_jobs=-1)
-```
-#### 2. Add Data Validation (for production code)
-```python
-from ins_pricing.utils.validation import validate_required_columns, validate_positive
-def score_policies(df):
-    # Add validation at entry points
-    validate_required_columns(df, ['age', 'premium', 'exposure'], df_name='input_data')
-    validate_positive(df, ['premium', 'exposure'])
-    # Your existing code...
-```
-#### 3. Use Profiling (for optimization)
-```python
-from ins_pricing.utils.profiling import profile_section
-def expensive_operation():
-    with profile_section("Data Processing"):
-        # Your code...
-```
----
-## 📋 Installation
-### Standard Installation
-```bash
-pip install ins_pricing==0.2.8
-```
-### With Optional Dependencies
-```bash
-# For parallel SHAP computation
-pip install "ins_pricing[explain]==0.2.8"
-# For memory profiling
-pip install psutil
-# All features
-pip install "ins_pricing[all]==0.2.8" psutil
-```
----
-## 🔧 Dependencies
-### New Optional Dependencies
-- `joblib>=1.2` - For parallel SHAP computation (optional)
-- `psutil` - For memory profiling utilities (optional)
-### Unchanged Core Dependencies
-- `numpy>=1.20`
-- `pandas>=1.4`
-- All existing optional dependencies remain the same
----
-## 🐛 Known Issues
-None identified in this release.
----
-## 🔮 What's Next (v0.2.9)
-Planned improvements for the next release:
-1. **Governance Module Documentation** - Complete docs for registry, approval, release modules
-2. **Plotting Module Documentation** - Enhanced visualization guidance
-3. **CI/CD Pipeline** - Automated testing and code quality checks
-4. **Additional Performance Optimizations** - Vectorized operations in pricing modules
----
-## 📊 Metrics Summary
-| Metric | Before | After | Change |
-|--------|--------|-------|--------|
-| **Test Coverage** | 35% | 60%+ | +25% ✅ |
-| **Documentation Coverage** | ~40% | ~70% | +30% ✅ |
-| **SHAP Performance** | 1x | 3-6x | +3-6x ✅ |
-| **Memory Usage** | 100% | 60-70% | -30-40% ✅ |
-| **Binning Performance** | 1x | 5-10x | +5-10x ✅ |
----
-## 🙏 Acknowledgments
-This release includes comprehensive code review findings and implements best practices for:
-- Performance optimization
-- Memory management
-- Code documentation
-- Test coverage
-- Developer experience
----
-## 📞 Support
-For issues or questions about this release:
-1. Check the [CHANGELOG.md](CHANGELOG.md) for detailed changes
-2. Review module documentation in updated files
-3. Check test files for usage examples
----
-## ✅ Upgrade Checklist
-Before upgrading to 0.2.8:
-- [ ] Review [CHANGELOG.md](CHANGELOG.md) for all changes
-- [ ] No breaking changes - safe to upgrade
-- [ ] Consider enabling parallel SHAP if using SHAP
-- [ ] Consider adding data validation for production workflows
-- [ ] Install optional dependencies if needed: `pip install joblib psutil`
-After upgrading:
-- [ ] Verify existing functionality still works
-- [ ] Consider adopting new validation utilities
-- [ ] Consider adding performance profiling
-- [ ] Review new test examples for your use cases
----
-**Happy modeling! 🎉**

ins_pricing/docs/LOSS_FUNCTIONS.md DELETED Viewed

@@ -1,78 +0,0 @@
-LOSS FUNCTIONS
-Overview
-This document describes the loss-function changes in ins_pricing. The training
-stack now supports multiple regression losses (not just Tweedie deviance) and
-propagates the selected loss into tuning, training, and inference.
-Supported loss_name values
-- auto (default): keep legacy behavior based on model name
-- tweedie: Tweedie deviance (uses tw_power / tweedie_variance_power when tuning)
-- poisson: Poisson deviance (power=1)
-- gamma: Gamma deviance (power=2)
-- mse: mean squared error
-- mae: mean absolute error
-Loss name mapping (all options)
-- Tweedie deviance -> tweedie
-- Poisson deviance -> poisson
-- Gamma deviance -> gamma
-- Mean squared error -> mse
-- Mean absolute error -> mae
-- Classification log loss -> logloss (classification only)
-- Classification BCE -> bce (classification only)
-Classification tasks
-- loss_name can be auto, logloss, or bce
-- training continues to use BCEWithLogits for torch models; evaluation uses logloss
-Where to set loss_name
-Add to any BayesOpt config JSON:
-{
-  "task_type": "regression",
-  "loss_name": "mse"
-}
-Behavior changes
-1) Tuning and metrics
-   - When loss_name is mse/mae, tuning does not sample Tweedie power.
-   - When loss_name is poisson/gamma, power is fixed (1.0/2.0).
-   - When loss_name is tweedie, power is sampled as before.
-2) Torch training (ResNet/FT/GNN)
-   - Loss computation is routed by loss_name.
-   - For tweedie/poisson/gamma, predictions are clamped positive.
-   - For mse/mae, no Tweedie power is used.
-3) XGBoost objective
-   - loss_name controls XGB objective:
-     - tweedie -> reg:tweedie
-     - poisson -> count:poisson
-     - gamma -> reg:gamma
-     - mse -> reg:squarederror
-     - mae -> reg:absoluteerror
-4) Inference
-   - ResNet/GNN constructors now receive loss_name.
-   - When loss_name is not tweedie, tw_power is not applied at inference.
-Legacy defaults (auto)
-- If loss_name is omitted, behavior is unchanged:
-  - model name contains "f" -> poisson
-  - model name contains "s" -> gamma
-  - otherwise -> tweedie
-Examples
-- ResNet direct training (MSE):
-  "loss_name": "mse"
-- FT embed -> ResNet (MSE):
-  "loss_name": "mse"
-- XGB direct training (unchanged):
-  omit loss_name or set "loss_name": "auto"
-Notes
-- loss_name is global per config. If you need different losses for different
-  models, split into separate configs and run them independently.

ins-pricing 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

ins-pricing 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl