ins-pricing 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. ins_pricing/README.md +74 -56
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +52 -50
  4. ins_pricing/cli/BayesOpt_incremental.py +832 -898
  5. ins_pricing/cli/Explain_Run.py +31 -23
  6. ins_pricing/cli/Explain_entry.py +532 -579
  7. ins_pricing/cli/Pricing_Run.py +31 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +1440 -1438
  9. ins_pricing/cli/utils/cli_common.py +256 -256
  10. ins_pricing/cli/utils/cli_config.py +375 -375
  11. ins_pricing/cli/utils/import_resolver.py +382 -365
  12. ins_pricing/cli/utils/notebook_utils.py +340 -340
  13. ins_pricing/cli/watchdog_run.py +209 -201
  14. ins_pricing/frontend/README.md +573 -419
  15. ins_pricing/frontend/__init__.py +10 -10
  16. ins_pricing/frontend/config_builder.py +1 -0
  17. ins_pricing/frontend/example_workflows.py +1 -1
  18. ins_pricing/governance/__init__.py +20 -20
  19. ins_pricing/governance/release.py +159 -159
  20. ins_pricing/modelling/README.md +67 -0
  21. ins_pricing/modelling/__init__.py +147 -92
  22. ins_pricing/modelling/bayesopt/README.md +59 -0
  23. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  24. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -550
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -962
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  29. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
  32. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
  36. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
  37. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
  38. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
  39. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  40. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  41. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  42. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  43. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  44. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
  45. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  46. ins_pricing/modelling/explain/__init__.py +55 -55
  47. ins_pricing/modelling/explain/metrics.py +27 -174
  48. ins_pricing/modelling/explain/permutation.py +237 -237
  49. ins_pricing/modelling/plotting/__init__.py +40 -36
  50. ins_pricing/modelling/plotting/compat.py +228 -0
  51. ins_pricing/modelling/plotting/curves.py +572 -572
  52. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  53. ins_pricing/modelling/plotting/geo.py +362 -362
  54. ins_pricing/modelling/plotting/importance.py +121 -121
  55. ins_pricing/pricing/__init__.py +27 -27
  56. ins_pricing/production/__init__.py +35 -25
  57. ins_pricing/production/{predict.py → inference.py} +140 -57
  58. ins_pricing/production/monitoring.py +8 -21
  59. ins_pricing/reporting/__init__.py +11 -11
  60. ins_pricing/setup.py +1 -1
  61. ins_pricing/tests/production/test_inference.py +90 -0
  62. ins_pricing/utils/__init__.py +116 -83
  63. ins_pricing/utils/device.py +255 -255
  64. ins_pricing/utils/features.py +53 -0
  65. ins_pricing/utils/io.py +72 -0
  66. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  67. ins_pricing/utils/metrics.py +158 -24
  68. ins_pricing/utils/numerics.py +76 -0
  69. ins_pricing/utils/paths.py +9 -1
  70. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +55 -35
  71. ins_pricing-0.5.0.dist-info/RECORD +131 -0
  72. ins_pricing/CHANGELOG.md +0 -272
  73. ins_pricing/RELEASE_NOTES_0.2.8.md +0 -344
  74. ins_pricing/docs/LOSS_FUNCTIONS.md +0 -78
  75. ins_pricing/docs/modelling/BayesOpt_USAGE.md +0 -945
  76. ins_pricing/docs/modelling/README.md +0 -34
  77. ins_pricing/frontend/QUICKSTART.md +0 -152
  78. ins_pricing/modelling/core/BayesOpt.py +0 -146
  79. ins_pricing/modelling/core/__init__.py +0 -1
  80. ins_pricing/modelling/core/bayesopt/PHASE2_REFACTORING_SUMMARY.md +0 -449
  81. ins_pricing/modelling/core/bayesopt/PHASE3_REFACTORING_SUMMARY.md +0 -406
  82. ins_pricing/modelling/core/bayesopt/REFACTORING_SUMMARY.md +0 -247
  83. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  84. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  85. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  86. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  87. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  88. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  89. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  90. ins_pricing/tests/production/test_predict.py +0 -233
  91. ins_pricing-0.4.4.dist-info/RECORD +0 -137
  92. /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
  93. /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
  94. /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
  95. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
  96. {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,344 +0,0 @@
1
- # Release Notes: ins_pricing v0.2.8
2
-
3
- **Release Date:** January 14, 2026
4
- **Type:** Minor Release (Quality & Performance Improvements)
5
-
6
- ---
7
-
8
- ## 🎯 Overview
9
-
10
- Version 0.2.8 is a significant quality and performance improvement release that focuses on:
11
- - **Code quality and maintainability**
12
- - **Performance optimization** (3-6x faster SHAP, 30-40% memory reduction)
13
- - **Comprehensive documentation**
14
- - **Extensive test coverage** (35% → 60%+)
15
-
16
- **All changes are backward compatible.** No breaking changes.
17
-
18
- ---
19
-
20
- ## ⭐ Highlights
21
-
22
- ### 1. 🚀 Performance Optimizations
23
-
24
- #### SHAP Parallelization (3-6x Speedup)
25
- ```python
26
- # Before (slow - serial processing)
27
- result = compute_shap_xgb(ctx, n_samples=200) # ~10 minutes
28
-
29
- # After (fast - parallel processing)
30
- result = compute_shap_xgb(ctx, n_samples=200, use_parallel=True) # ~2 minutes
31
- ```
32
- **Impact:** 3-6x faster on multi-core systems for n_samples > 100
33
-
34
- #### Memory Optimization (30-40% Reduction)
35
- - DatasetPreprocessor reduces unnecessary DataFrame copies
36
- - Conditional copying only when needed
37
- - Direct reference assignment where safe
38
-
39
- #### Binning Cache (5-10x Speedup)
40
- ```python
41
- from ins_pricing.pricing.factors import get_cache_info, clear_binning_cache
42
-
43
- # Automatic caching for repeated binning
44
- factor_table = build_factor_table(df, factor_col='age', n_bins=10) # Cached!
45
-
46
- # Check cache performance
47
- info = get_cache_info()
48
- print(f"Cache hit rate: {info['hits'] / (info['hits'] + info['misses']):.1%}")
49
- ```
50
-
51
- ---
52
-
53
- ### 2. 🛠️ New Utility Modules
54
-
55
- #### Data Validation Toolkit
56
- ```python
57
- from ins_pricing.utils.validation import (
58
- validate_required_columns,
59
- validate_column_types,
60
- validate_value_range,
61
- validate_no_nulls,
62
- validate_positive
63
- )
64
-
65
- # Validate DataFrame structure
66
- validate_required_columns(df, ['age', 'premium', 'exposure'], df_name='policy_data')
67
-
68
- # Validate data types
69
- df = validate_column_types(df, {'age': 'int64', 'premium': 'float64'}, coerce=True)
70
-
71
- # Validate value ranges
72
- validate_value_range(df, 'age', min_val=0, max_val=120)
73
- validate_positive(df, ['premium', 'exposure'], allow_zero=False)
74
- ```
75
-
76
- #### Performance Profiling
77
- ```python
78
- from ins_pricing.utils.profiling import profile_section, MemoryMonitor
79
-
80
- # Simple profiling
81
- with profile_section("Data Processing", logger):
82
- process_large_dataset()
83
- # Output: [Profile] Data Processing: 5.23s, RAM: +1250.3MB, GPU peak: 2048.5MB
84
-
85
- # Memory monitoring with auto-cleanup
86
- with MemoryMonitor("Training", threshold_gb=16.0, logger=logger):
87
- train_model()
88
- ```
89
-
90
- ---
91
-
92
- ### 3. 📚 Documentation Overhaul
93
-
94
- #### Complete Module Documentation
95
- - **production/preprocess.py**: Module + 3 functions fully documented
96
- - **pricing/calibration.py**: Module + 2 functions with business context
97
- - All docs include practical examples and business rationale
98
-
99
- #### Example Quality
100
- ```python
101
- def fit_calibration_factor(pred, actual, *, weight=None, target_lr=None):
102
- """Fit a scalar calibration factor to align predictions with actuals.
103
-
104
- This function computes a multiplicative calibration factor...
105
-
106
- Args:
107
- pred: Model predictions (premiums or pure premiums)
108
- actual: Actual observed values (claims or losses)
109
- weight: Optional weights (e.g., exposure, earned premium)
110
- target_lr: Target loss ratio to achieve (0 < target_lr < 1)
111
-
112
- Returns:
113
- Calibration factor (scalar multiplier)
114
-
115
- Example:
116
- >>> # Calibrate to achieve 70% loss ratio
117
- >>> pred_premium = np.array([100, 150, 200])
118
- >>> actual_claims = np.array([75, 100, 130])
119
- >>> factor = fit_calibration_factor(pred_premium, actual_claims, target_lr=0.70)
120
- >>> print(f"{factor:.3f}")
121
- 1.143 # Adjust premiums to achieve 70% loss ratio
122
-
123
- Note:
124
- - target_lr typically in range [0.5, 0.9] for insurance pricing
125
- """
126
- ```
127
-
128
- ---
129
-
130
- ### 4. 🧪 Test Coverage Expansion
131
-
132
- #### New Test Suites
133
- - **tests/production/** (247 scenarios)
134
- - Prediction, scoring, monitoring, preprocessing
135
- - **tests/pricing/** (60+ scenarios)
136
- - Factors, exposure, calibration, rate tables
137
- - **tests/governance/** (40+ scenarios)
138
- - Registry, release, audit workflows
139
-
140
- #### Coverage Increase
141
- - **Before:** 35% overall coverage
142
- - **After:** 60%+ overall coverage
143
- - **Impact:** Better reliability, fewer production bugs
144
-
145
- ---
146
-
147
- ## 📦 What's New
148
-
149
- ### Added
150
-
151
- #### Core Utilities
152
- - `utils/validation.py` - 8 validation functions for data quality
153
- - `utils/profiling.py` - Performance and memory monitoring tools
154
- - `pricing/factors.py` - LRU caching for binning operations
155
-
156
- #### Test Coverage
157
- - 11 new test files with 250+ test scenarios
158
- - Complete coverage for production, pricing, governance modules
159
-
160
- #### Documentation
161
- - Module-level docstrings with business context
162
- - 150+ lines of comprehensive documentation
163
- - 8+ complete working examples
164
-
165
- ### Enhanced
166
-
167
- #### SHAP Computation
168
- - Parallel processing support via joblib
169
- - Automatic batch size optimization
170
- - Graceful fallback if joblib unavailable
171
- - All SHAP functions support `use_parallel=True`
172
-
173
- #### Configuration Validation
174
- - BayesOptConfig with comprehensive `__post_init__` validation
175
- - Clear error messages for configuration issues
176
- - Validation of distributed training settings
177
-
178
- ### Performance
179
-
180
- | Feature | Before | After | Improvement |
181
- |---------|--------|-------|-------------|
182
- | SHAP (200 samples) | 10 min | 2-3 min | **3-6x faster** |
183
- | Preprocessing memory | 2.5 GB | 1.5 GB | **40% reduction** |
184
- | Repeated binning | 5.2s | 0.5s | **10x faster** |
185
-
186
- ---
187
-
188
- ## 🔄 Migration Guide
189
-
190
- ### No Breaking Changes
191
-
192
- All changes are **backward compatible**. Existing code will continue to work without modifications.
193
-
194
- ### Opt-in Features
195
-
196
- New features are opt-in and don't affect existing behavior:
197
-
198
- ```python
199
- # SHAP parallelization - opt-in
200
- result = compute_shap_xgb(ctx, use_parallel=True) # New parameter
201
-
202
- # Binning cache - automatic, but can be disabled
203
- binned = bin_numeric(series, bins=10, use_cache=False) # Opt-out if needed
204
- ```
205
-
206
- ### Recommended Updates
207
-
208
- While not required, consider adopting these improvements:
209
-
210
- #### 1. Enable Parallel SHAP (if using SHAP)
211
- ```python
212
- # Before
213
- shap_result = compute_shap_xgb(ctx, n_samples=200)
214
-
215
- # After (recommended for n_samples > 100)
216
- shap_result = compute_shap_xgb(ctx, n_samples=200, use_parallel=True, n_jobs=-1)
217
- ```
218
-
219
- #### 2. Add Data Validation (for production code)
220
- ```python
221
- from ins_pricing.utils.validation import validate_required_columns, validate_positive
222
-
223
- def score_policies(df):
224
- # Add validation at entry points
225
- validate_required_columns(df, ['age', 'premium', 'exposure'], df_name='input_data')
226
- validate_positive(df, ['premium', 'exposure'])
227
-
228
- # Your existing code...
229
- ```
230
-
231
- #### 3. Use Profiling (for optimization)
232
- ```python
233
- from ins_pricing.utils.profiling import profile_section
234
-
235
- def expensive_operation():
236
- with profile_section("Data Processing"):
237
- # Your code...
238
- ```
239
-
240
- ---
241
-
242
- ## 📋 Installation
243
-
244
- ### Standard Installation
245
- ```bash
246
- pip install ins_pricing==0.2.8
247
- ```
248
-
249
- ### With Optional Dependencies
250
- ```bash
251
- # For parallel SHAP computation
252
- pip install "ins_pricing[explain]==0.2.8"
253
-
254
- # For memory profiling
255
- pip install psutil
256
-
257
- # All features
258
- pip install "ins_pricing[all]==0.2.8" psutil
259
- ```
260
-
261
- ---
262
-
263
- ## 🔧 Dependencies
264
-
265
- ### New Optional Dependencies
266
- - `joblib>=1.2` - For parallel SHAP computation (optional)
267
- - `psutil` - For memory profiling utilities (optional)
268
-
269
- ### Unchanged Core Dependencies
270
- - `numpy>=1.20`
271
- - `pandas>=1.4`
272
- - All existing optional dependencies remain the same
273
-
274
- ---
275
-
276
- ## 🐛 Known Issues
277
-
278
- None identified in this release.
279
-
280
- ---
281
-
282
- ## 🔮 What's Next (v0.2.9)
283
-
284
- Planned improvements for the next release:
285
-
286
- 1. **Governance Module Documentation** - Complete docs for registry, approval, release modules
287
- 2. **Plotting Module Documentation** - Enhanced visualization guidance
288
- 3. **CI/CD Pipeline** - Automated testing and code quality checks
289
- 4. **Additional Performance Optimizations** - Vectorized operations in pricing modules
290
-
291
- ---
292
-
293
- ## 📊 Metrics Summary
294
-
295
- | Metric | Before | After | Change |
296
- |--------|--------|-------|--------|
297
- | **Test Coverage** | 35% | 60%+ | +25% ✅ |
298
- | **Documentation Coverage** | ~40% | ~70% | +30% ✅ |
299
- | **SHAP Performance** | 1x | 3-6x | +3-6x ✅ |
300
- | **Memory Usage** | 100% | 60-70% | -30-40% ✅ |
301
- | **Binning Performance** | 1x | 5-10x | +5-10x ✅ |
302
-
303
- ---
304
-
305
- ## 🙏 Acknowledgments
306
-
307
- This release includes comprehensive code review findings and implements best practices for:
308
- - Performance optimization
309
- - Memory management
310
- - Code documentation
311
- - Test coverage
312
- - Developer experience
313
-
314
- ---
315
-
316
- ## 📞 Support
317
-
318
- For issues or questions about this release:
319
- 1. Check the [CHANGELOG.md](CHANGELOG.md) for detailed changes
320
- 2. Review module documentation in updated files
321
- 3. Check test files for usage examples
322
-
323
- ---
324
-
325
- ## ✅ Upgrade Checklist
326
-
327
- Before upgrading to 0.2.8:
328
-
329
- - [ ] Review [CHANGELOG.md](CHANGELOG.md) for all changes
330
- - [ ] No breaking changes - safe to upgrade
331
- - [ ] Consider enabling parallel SHAP if using SHAP
332
- - [ ] Consider adding data validation for production workflows
333
- - [ ] Install optional dependencies if needed: `pip install joblib psutil`
334
-
335
- After upgrading:
336
-
337
- - [ ] Verify existing functionality still works
338
- - [ ] Consider adopting new validation utilities
339
- - [ ] Consider adding performance profiling
340
- - [ ] Review new test examples for your use cases
341
-
342
- ---
343
-
344
- **Happy modeling! 🎉**
@@ -1,78 +0,0 @@
1
- LOSS FUNCTIONS
2
-
3
- Overview
4
- This document describes the loss-function changes in ins_pricing. The training
5
- stack now supports multiple regression losses (not just Tweedie deviance) and
6
- propagates the selected loss into tuning, training, and inference.
7
-
8
- Supported loss_name values
9
- - auto (default): keep legacy behavior based on model name
10
- - tweedie: Tweedie deviance (uses tw_power / tweedie_variance_power when tuning)
11
- - poisson: Poisson deviance (power=1)
12
- - gamma: Gamma deviance (power=2)
13
- - mse: mean squared error
14
- - mae: mean absolute error
15
-
16
- Loss name mapping (all options)
17
- - Tweedie deviance -> tweedie
18
- - Poisson deviance -> poisson
19
- - Gamma deviance -> gamma
20
- - Mean squared error -> mse
21
- - Mean absolute error -> mae
22
- - Classification log loss -> logloss (classification only)
23
- - Classification BCE -> bce (classification only)
24
-
25
- Classification tasks
26
- - loss_name can be auto, logloss, or bce
27
- - training continues to use BCEWithLogits for torch models; evaluation uses logloss
28
-
29
- Where to set loss_name
30
- Add to any BayesOpt config JSON:
31
-
32
- {
33
- "task_type": "regression",
34
- "loss_name": "mse"
35
- }
36
-
37
- Behavior changes
38
- 1) Tuning and metrics
39
- - When loss_name is mse/mae, tuning does not sample Tweedie power.
40
- - When loss_name is poisson/gamma, power is fixed (1.0/2.0).
41
- - When loss_name is tweedie, power is sampled as before.
42
-
43
- 2) Torch training (ResNet/FT/GNN)
44
- - Loss computation is routed by loss_name.
45
- - For tweedie/poisson/gamma, predictions are clamped positive.
46
- - For mse/mae, no Tweedie power is used.
47
-
48
- 3) XGBoost objective
49
- - loss_name controls XGB objective:
50
- - tweedie -> reg:tweedie
51
- - poisson -> count:poisson
52
- - gamma -> reg:gamma
53
- - mse -> reg:squarederror
54
- - mae -> reg:absoluteerror
55
-
56
- 4) Inference
57
- - ResNet/GNN constructors now receive loss_name.
58
- - When loss_name is not tweedie, tw_power is not applied at inference.
59
-
60
- Legacy defaults (auto)
61
- - If loss_name is omitted, behavior is unchanged:
62
- - model name contains "f" -> poisson
63
- - model name contains "s" -> gamma
64
- - otherwise -> tweedie
65
-
66
- Examples
67
- - ResNet direct training (MSE):
68
- "loss_name": "mse"
69
-
70
- - FT embed -> ResNet (MSE):
71
- "loss_name": "mse"
72
-
73
- - XGB direct training (unchanged):
74
- omit loss_name or set "loss_name": "auto"
75
-
76
- Notes
77
- - loss_name is global per config. If you need different losses for different
78
- models, split into separate configs and run them independently.