ins-pricing 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +74 -56
- ins_pricing/__init__.py +142 -90
- ins_pricing/cli/BayesOpt_entry.py +52 -50
- ins_pricing/cli/BayesOpt_incremental.py +832 -898
- ins_pricing/cli/Explain_Run.py +31 -23
- ins_pricing/cli/Explain_entry.py +532 -579
- ins_pricing/cli/Pricing_Run.py +31 -23
- ins_pricing/cli/bayesopt_entry_runner.py +1440 -1438
- ins_pricing/cli/utils/cli_common.py +256 -256
- ins_pricing/cli/utils/cli_config.py +375 -375
- ins_pricing/cli/utils/import_resolver.py +382 -365
- ins_pricing/cli/utils/notebook_utils.py +340 -340
- ins_pricing/cli/watchdog_run.py +209 -201
- ins_pricing/frontend/README.md +573 -419
- ins_pricing/frontend/__init__.py +10 -10
- ins_pricing/frontend/config_builder.py +1 -0
- ins_pricing/frontend/example_workflows.py +1 -1
- ins_pricing/governance/__init__.py +20 -20
- ins_pricing/governance/release.py +159 -159
- ins_pricing/modelling/README.md +67 -0
- ins_pricing/modelling/__init__.py +147 -92
- ins_pricing/modelling/bayesopt/README.md +59 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
- ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -550
- ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -962
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
- ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
- ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
- ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
- ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
- ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
- ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
- ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
- ins_pricing/modelling/explain/__init__.py +55 -55
- ins_pricing/modelling/explain/metrics.py +27 -174
- ins_pricing/modelling/explain/permutation.py +237 -237
- ins_pricing/modelling/plotting/__init__.py +40 -36
- ins_pricing/modelling/plotting/compat.py +228 -0
- ins_pricing/modelling/plotting/curves.py +572 -572
- ins_pricing/modelling/plotting/diagnostics.py +163 -163
- ins_pricing/modelling/plotting/geo.py +362 -362
- ins_pricing/modelling/plotting/importance.py +121 -121
- ins_pricing/pricing/__init__.py +27 -27
- ins_pricing/production/__init__.py +35 -25
- ins_pricing/production/{predict.py → inference.py} +140 -57
- ins_pricing/production/monitoring.py +8 -21
- ins_pricing/reporting/__init__.py +11 -11
- ins_pricing/setup.py +1 -1
- ins_pricing/tests/production/test_inference.py +90 -0
- ins_pricing/utils/__init__.py +116 -83
- ins_pricing/utils/device.py +255 -255
- ins_pricing/utils/features.py +53 -0
- ins_pricing/utils/io.py +72 -0
- ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
- ins_pricing/utils/metrics.py +158 -24
- ins_pricing/utils/numerics.py +76 -0
- ins_pricing/utils/paths.py +9 -1
- {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +55 -35
- ins_pricing-0.5.0.dist-info/RECORD +131 -0
- ins_pricing/CHANGELOG.md +0 -272
- ins_pricing/RELEASE_NOTES_0.2.8.md +0 -344
- ins_pricing/docs/LOSS_FUNCTIONS.md +0 -78
- ins_pricing/docs/modelling/BayesOpt_USAGE.md +0 -945
- ins_pricing/docs/modelling/README.md +0 -34
- ins_pricing/frontend/QUICKSTART.md +0 -152
- ins_pricing/modelling/core/BayesOpt.py +0 -146
- ins_pricing/modelling/core/__init__.py +0 -1
- ins_pricing/modelling/core/bayesopt/PHASE2_REFACTORING_SUMMARY.md +0 -449
- ins_pricing/modelling/core/bayesopt/PHASE3_REFACTORING_SUMMARY.md +0 -406
- ins_pricing/modelling/core/bayesopt/REFACTORING_SUMMARY.md +0 -247
- ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
- ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
- ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
- ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
- ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
- ins_pricing/modelling/core/bayesopt/utils.py +0 -105
- ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
- ins_pricing/tests/production/test_predict.py +0 -233
- ins_pricing-0.4.4.dist-info/RECORD +0 -137
- /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
- /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
- /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
- {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
- {ins_pricing-0.4.4.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,344 +0,0 @@
|
|
|
1
|
-
# Release Notes: ins_pricing v0.2.8
|
|
2
|
-
|
|
3
|
-
**Release Date:** January 14, 2026
|
|
4
|
-
**Type:** Minor Release (Quality & Performance Improvements)
|
|
5
|
-
|
|
6
|
-
---
|
|
7
|
-
|
|
8
|
-
## 🎯 Overview
|
|
9
|
-
|
|
10
|
-
Version 0.2.8 is a significant quality and performance improvement release that focuses on:
|
|
11
|
-
- **Code quality and maintainability**
|
|
12
|
-
- **Performance optimization** (3-6x faster SHAP, 30-40% memory reduction)
|
|
13
|
-
- **Comprehensive documentation**
|
|
14
|
-
- **Extensive test coverage** (35% → 60%+)
|
|
15
|
-
|
|
16
|
-
**All changes are backward compatible.** No breaking changes.
|
|
17
|
-
|
|
18
|
-
---
|
|
19
|
-
|
|
20
|
-
## ⭐ Highlights
|
|
21
|
-
|
|
22
|
-
### 1. 🚀 Performance Optimizations
|
|
23
|
-
|
|
24
|
-
#### SHAP Parallelization (3-6x Speedup)
|
|
25
|
-
```python
|
|
26
|
-
# Before (slow - serial processing)
|
|
27
|
-
result = compute_shap_xgb(ctx, n_samples=200) # ~10 minutes
|
|
28
|
-
|
|
29
|
-
# After (fast - parallel processing)
|
|
30
|
-
result = compute_shap_xgb(ctx, n_samples=200, use_parallel=True) # ~2 minutes
|
|
31
|
-
```
|
|
32
|
-
**Impact:** 3-6x faster on multi-core systems for n_samples > 100
|
|
33
|
-
|
|
34
|
-
#### Memory Optimization (30-40% Reduction)
|
|
35
|
-
- DatasetPreprocessor reduces unnecessary DataFrame copies
|
|
36
|
-
- Conditional copying only when needed
|
|
37
|
-
- Direct reference assignment where safe
|
|
38
|
-
|
|
39
|
-
#### Binning Cache (5-10x Speedup)
|
|
40
|
-
```python
|
|
41
|
-
from ins_pricing.pricing.factors import get_cache_info, clear_binning_cache
|
|
42
|
-
|
|
43
|
-
# Automatic caching for repeated binning
|
|
44
|
-
factor_table = build_factor_table(df, factor_col='age', n_bins=10) # Cached!
|
|
45
|
-
|
|
46
|
-
# Check cache performance
|
|
47
|
-
info = get_cache_info()
|
|
48
|
-
print(f"Cache hit rate: {info['hits'] / (info['hits'] + info['misses']):.1%}")
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
---
|
|
52
|
-
|
|
53
|
-
### 2. 🛠️ New Utility Modules
|
|
54
|
-
|
|
55
|
-
#### Data Validation Toolkit
|
|
56
|
-
```python
|
|
57
|
-
from ins_pricing.utils.validation import (
|
|
58
|
-
validate_required_columns,
|
|
59
|
-
validate_column_types,
|
|
60
|
-
validate_value_range,
|
|
61
|
-
validate_no_nulls,
|
|
62
|
-
validate_positive
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
# Validate DataFrame structure
|
|
66
|
-
validate_required_columns(df, ['age', 'premium', 'exposure'], df_name='policy_data')
|
|
67
|
-
|
|
68
|
-
# Validate data types
|
|
69
|
-
df = validate_column_types(df, {'age': 'int64', 'premium': 'float64'}, coerce=True)
|
|
70
|
-
|
|
71
|
-
# Validate value ranges
|
|
72
|
-
validate_value_range(df, 'age', min_val=0, max_val=120)
|
|
73
|
-
validate_positive(df, ['premium', 'exposure'], allow_zero=False)
|
|
74
|
-
```
|
|
75
|
-
|
|
76
|
-
#### Performance Profiling
|
|
77
|
-
```python
|
|
78
|
-
from ins_pricing.utils.profiling import profile_section, MemoryMonitor
|
|
79
|
-
|
|
80
|
-
# Simple profiling
|
|
81
|
-
with profile_section("Data Processing", logger):
|
|
82
|
-
process_large_dataset()
|
|
83
|
-
# Output: [Profile] Data Processing: 5.23s, RAM: +1250.3MB, GPU peak: 2048.5MB
|
|
84
|
-
|
|
85
|
-
# Memory monitoring with auto-cleanup
|
|
86
|
-
with MemoryMonitor("Training", threshold_gb=16.0, logger=logger):
|
|
87
|
-
train_model()
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
---
|
|
91
|
-
|
|
92
|
-
### 3. 📚 Documentation Overhaul
|
|
93
|
-
|
|
94
|
-
#### Complete Module Documentation
|
|
95
|
-
- **production/preprocess.py**: Module + 3 functions fully documented
|
|
96
|
-
- **pricing/calibration.py**: Module + 2 functions with business context
|
|
97
|
-
- All docs include practical examples and business rationale
|
|
98
|
-
|
|
99
|
-
#### Example Quality
|
|
100
|
-
```python
|
|
101
|
-
def fit_calibration_factor(pred, actual, *, weight=None, target_lr=None):
|
|
102
|
-
"""Fit a scalar calibration factor to align predictions with actuals.
|
|
103
|
-
|
|
104
|
-
This function computes a multiplicative calibration factor...
|
|
105
|
-
|
|
106
|
-
Args:
|
|
107
|
-
pred: Model predictions (premiums or pure premiums)
|
|
108
|
-
actual: Actual observed values (claims or losses)
|
|
109
|
-
weight: Optional weights (e.g., exposure, earned premium)
|
|
110
|
-
target_lr: Target loss ratio to achieve (0 < target_lr < 1)
|
|
111
|
-
|
|
112
|
-
Returns:
|
|
113
|
-
Calibration factor (scalar multiplier)
|
|
114
|
-
|
|
115
|
-
Example:
|
|
116
|
-
>>> # Calibrate to achieve 70% loss ratio
|
|
117
|
-
>>> pred_premium = np.array([100, 150, 200])
|
|
118
|
-
>>> actual_claims = np.array([75, 100, 130])
|
|
119
|
-
>>> factor = fit_calibration_factor(pred_premium, actual_claims, target_lr=0.70)
|
|
120
|
-
>>> print(f"{factor:.3f}")
|
|
121
|
-
1.143 # Adjust premiums to achieve 70% loss ratio
|
|
122
|
-
|
|
123
|
-
Note:
|
|
124
|
-
- target_lr typically in range [0.5, 0.9] for insurance pricing
|
|
125
|
-
"""
|
|
126
|
-
```
|
|
127
|
-
|
|
128
|
-
---
|
|
129
|
-
|
|
130
|
-
### 4. 🧪 Test Coverage Expansion
|
|
131
|
-
|
|
132
|
-
#### New Test Suites
|
|
133
|
-
- **tests/production/** (247 scenarios)
|
|
134
|
-
- Prediction, scoring, monitoring, preprocessing
|
|
135
|
-
- **tests/pricing/** (60+ scenarios)
|
|
136
|
-
- Factors, exposure, calibration, rate tables
|
|
137
|
-
- **tests/governance/** (40+ scenarios)
|
|
138
|
-
- Registry, release, audit workflows
|
|
139
|
-
|
|
140
|
-
#### Coverage Increase
|
|
141
|
-
- **Before:** 35% overall coverage
|
|
142
|
-
- **After:** 60%+ overall coverage
|
|
143
|
-
- **Impact:** Better reliability, fewer production bugs
|
|
144
|
-
|
|
145
|
-
---
|
|
146
|
-
|
|
147
|
-
## 📦 What's New
|
|
148
|
-
|
|
149
|
-
### Added
|
|
150
|
-
|
|
151
|
-
#### Core Utilities
|
|
152
|
-
- `utils/validation.py` - 8 validation functions for data quality
|
|
153
|
-
- `utils/profiling.py` - Performance and memory monitoring tools
|
|
154
|
-
- `pricing/factors.py` - LRU caching for binning operations
|
|
155
|
-
|
|
156
|
-
#### Test Coverage
|
|
157
|
-
- 11 new test files with 250+ test scenarios
|
|
158
|
-
- Complete coverage for production, pricing, governance modules
|
|
159
|
-
|
|
160
|
-
#### Documentation
|
|
161
|
-
- Module-level docstrings with business context
|
|
162
|
-
- 150+ lines of comprehensive documentation
|
|
163
|
-
- 8+ complete working examples
|
|
164
|
-
|
|
165
|
-
### Enhanced
|
|
166
|
-
|
|
167
|
-
#### SHAP Computation
|
|
168
|
-
- Parallel processing support via joblib
|
|
169
|
-
- Automatic batch size optimization
|
|
170
|
-
- Graceful fallback if joblib unavailable
|
|
171
|
-
- All SHAP functions support `use_parallel=True`
|
|
172
|
-
|
|
173
|
-
#### Configuration Validation
|
|
174
|
-
- BayesOptConfig with comprehensive `__post_init__` validation
|
|
175
|
-
- Clear error messages for configuration issues
|
|
176
|
-
- Validation of distributed training settings
|
|
177
|
-
|
|
178
|
-
### Performance
|
|
179
|
-
|
|
180
|
-
| Feature | Before | After | Improvement |
|
|
181
|
-
|---------|--------|-------|-------------|
|
|
182
|
-
| SHAP (200 samples) | 10 min | 2-3 min | **3-6x faster** |
|
|
183
|
-
| Preprocessing memory | 2.5 GB | 1.5 GB | **40% reduction** |
|
|
184
|
-
| Repeated binning | 5.2s | 0.5s | **10x faster** |
|
|
185
|
-
|
|
186
|
-
---
|
|
187
|
-
|
|
188
|
-
## 🔄 Migration Guide
|
|
189
|
-
|
|
190
|
-
### No Breaking Changes
|
|
191
|
-
|
|
192
|
-
All changes are **backward compatible**. Existing code will continue to work without modifications.
|
|
193
|
-
|
|
194
|
-
### Opt-in Features
|
|
195
|
-
|
|
196
|
-
New features are opt-in and don't affect existing behavior:
|
|
197
|
-
|
|
198
|
-
```python
|
|
199
|
-
# SHAP parallelization - opt-in
|
|
200
|
-
result = compute_shap_xgb(ctx, use_parallel=True) # New parameter
|
|
201
|
-
|
|
202
|
-
# Binning cache - automatic, but can be disabled
|
|
203
|
-
binned = bin_numeric(series, bins=10, use_cache=False) # Opt-out if needed
|
|
204
|
-
```
|
|
205
|
-
|
|
206
|
-
### Recommended Updates
|
|
207
|
-
|
|
208
|
-
While not required, consider adopting these improvements:
|
|
209
|
-
|
|
210
|
-
#### 1. Enable Parallel SHAP (if using SHAP)
|
|
211
|
-
```python
|
|
212
|
-
# Before
|
|
213
|
-
shap_result = compute_shap_xgb(ctx, n_samples=200)
|
|
214
|
-
|
|
215
|
-
# After (recommended for n_samples > 100)
|
|
216
|
-
shap_result = compute_shap_xgb(ctx, n_samples=200, use_parallel=True, n_jobs=-1)
|
|
217
|
-
```
|
|
218
|
-
|
|
219
|
-
#### 2. Add Data Validation (for production code)
|
|
220
|
-
```python
|
|
221
|
-
from ins_pricing.utils.validation import validate_required_columns, validate_positive
|
|
222
|
-
|
|
223
|
-
def score_policies(df):
|
|
224
|
-
# Add validation at entry points
|
|
225
|
-
validate_required_columns(df, ['age', 'premium', 'exposure'], df_name='input_data')
|
|
226
|
-
validate_positive(df, ['premium', 'exposure'])
|
|
227
|
-
|
|
228
|
-
# Your existing code...
|
|
229
|
-
```
|
|
230
|
-
|
|
231
|
-
#### 3. Use Profiling (for optimization)
|
|
232
|
-
```python
|
|
233
|
-
from ins_pricing.utils.profiling import profile_section
|
|
234
|
-
|
|
235
|
-
def expensive_operation():
|
|
236
|
-
with profile_section("Data Processing"):
|
|
237
|
-
# Your code...
|
|
238
|
-
```
|
|
239
|
-
|
|
240
|
-
---
|
|
241
|
-
|
|
242
|
-
## 📋 Installation
|
|
243
|
-
|
|
244
|
-
### Standard Installation
|
|
245
|
-
```bash
|
|
246
|
-
pip install ins_pricing==0.2.8
|
|
247
|
-
```
|
|
248
|
-
|
|
249
|
-
### With Optional Dependencies
|
|
250
|
-
```bash
|
|
251
|
-
# For parallel SHAP computation
|
|
252
|
-
pip install "ins_pricing[explain]==0.2.8"
|
|
253
|
-
|
|
254
|
-
# For memory profiling
|
|
255
|
-
pip install psutil
|
|
256
|
-
|
|
257
|
-
# All features
|
|
258
|
-
pip install "ins_pricing[all]==0.2.8" psutil
|
|
259
|
-
```
|
|
260
|
-
|
|
261
|
-
---
|
|
262
|
-
|
|
263
|
-
## 🔧 Dependencies
|
|
264
|
-
|
|
265
|
-
### New Optional Dependencies
|
|
266
|
-
- `joblib>=1.2` - For parallel SHAP computation (optional)
|
|
267
|
-
- `psutil` - For memory profiling utilities (optional)
|
|
268
|
-
|
|
269
|
-
### Unchanged Core Dependencies
|
|
270
|
-
- `numpy>=1.20`
|
|
271
|
-
- `pandas>=1.4`
|
|
272
|
-
- All existing optional dependencies remain the same
|
|
273
|
-
|
|
274
|
-
---
|
|
275
|
-
|
|
276
|
-
## 🐛 Known Issues
|
|
277
|
-
|
|
278
|
-
None identified in this release.
|
|
279
|
-
|
|
280
|
-
---
|
|
281
|
-
|
|
282
|
-
## 🔮 What's Next (v0.2.9)
|
|
283
|
-
|
|
284
|
-
Planned improvements for the next release:
|
|
285
|
-
|
|
286
|
-
1. **Governance Module Documentation** - Complete docs for registry, approval, release modules
|
|
287
|
-
2. **Plotting Module Documentation** - Enhanced visualization guidance
|
|
288
|
-
3. **CI/CD Pipeline** - Automated testing and code quality checks
|
|
289
|
-
4. **Additional Performance Optimizations** - Vectorized operations in pricing modules
|
|
290
|
-
|
|
291
|
-
---
|
|
292
|
-
|
|
293
|
-
## 📊 Metrics Summary
|
|
294
|
-
|
|
295
|
-
| Metric | Before | After | Change |
|
|
296
|
-
|--------|--------|-------|--------|
|
|
297
|
-
| **Test Coverage** | 35% | 60%+ | +25% ✅ |
|
|
298
|
-
| **Documentation Coverage** | ~40% | ~70% | +30% ✅ |
|
|
299
|
-
| **SHAP Performance** | 1x | 3-6x | +3-6x ✅ |
|
|
300
|
-
| **Memory Usage** | 100% | 60-70% | -30-40% ✅ |
|
|
301
|
-
| **Binning Performance** | 1x | 5-10x | +5-10x ✅ |
|
|
302
|
-
|
|
303
|
-
---
|
|
304
|
-
|
|
305
|
-
## 🙏 Acknowledgments
|
|
306
|
-
|
|
307
|
-
This release includes comprehensive code review findings and implements best practices for:
|
|
308
|
-
- Performance optimization
|
|
309
|
-
- Memory management
|
|
310
|
-
- Code documentation
|
|
311
|
-
- Test coverage
|
|
312
|
-
- Developer experience
|
|
313
|
-
|
|
314
|
-
---
|
|
315
|
-
|
|
316
|
-
## 📞 Support
|
|
317
|
-
|
|
318
|
-
For issues or questions about this release:
|
|
319
|
-
1. Check the [CHANGELOG.md](CHANGELOG.md) for detailed changes
|
|
320
|
-
2. Review module documentation in updated files
|
|
321
|
-
3. Check test files for usage examples
|
|
322
|
-
|
|
323
|
-
---
|
|
324
|
-
|
|
325
|
-
## ✅ Upgrade Checklist
|
|
326
|
-
|
|
327
|
-
Before upgrading to 0.2.8:
|
|
328
|
-
|
|
329
|
-
- [ ] Review [CHANGELOG.md](CHANGELOG.md) for all changes
|
|
330
|
-
- [ ] No breaking changes - safe to upgrade
|
|
331
|
-
- [ ] Consider enabling parallel SHAP if using SHAP
|
|
332
|
-
- [ ] Consider adding data validation for production workflows
|
|
333
|
-
- [ ] Install optional dependencies if needed: `pip install joblib psutil`
|
|
334
|
-
|
|
335
|
-
After upgrading:
|
|
336
|
-
|
|
337
|
-
- [ ] Verify existing functionality still works
|
|
338
|
-
- [ ] Consider adopting new validation utilities
|
|
339
|
-
- [ ] Consider adding performance profiling
|
|
340
|
-
- [ ] Review new test examples for your use cases
|
|
341
|
-
|
|
342
|
-
---
|
|
343
|
-
|
|
344
|
-
**Happy modeling! 🎉**
|
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
LOSS FUNCTIONS
|
|
2
|
-
|
|
3
|
-
Overview
|
|
4
|
-
This document describes the loss-function changes in ins_pricing. The training
|
|
5
|
-
stack now supports multiple regression losses (not just Tweedie deviance) and
|
|
6
|
-
propagates the selected loss into tuning, training, and inference.
|
|
7
|
-
|
|
8
|
-
Supported loss_name values
|
|
9
|
-
- auto (default): keep legacy behavior based on model name
|
|
10
|
-
- tweedie: Tweedie deviance (uses tw_power / tweedie_variance_power when tuning)
|
|
11
|
-
- poisson: Poisson deviance (power=1)
|
|
12
|
-
- gamma: Gamma deviance (power=2)
|
|
13
|
-
- mse: mean squared error
|
|
14
|
-
- mae: mean absolute error
|
|
15
|
-
|
|
16
|
-
Loss name mapping (all options)
|
|
17
|
-
- Tweedie deviance -> tweedie
|
|
18
|
-
- Poisson deviance -> poisson
|
|
19
|
-
- Gamma deviance -> gamma
|
|
20
|
-
- Mean squared error -> mse
|
|
21
|
-
- Mean absolute error -> mae
|
|
22
|
-
- Classification log loss -> logloss (classification only)
|
|
23
|
-
- Classification BCE -> bce (classification only)
|
|
24
|
-
|
|
25
|
-
Classification tasks
|
|
26
|
-
- loss_name can be auto, logloss, or bce
|
|
27
|
-
- training continues to use BCEWithLogits for torch models; evaluation uses logloss
|
|
28
|
-
|
|
29
|
-
Where to set loss_name
|
|
30
|
-
Add to any BayesOpt config JSON:
|
|
31
|
-
|
|
32
|
-
{
|
|
33
|
-
"task_type": "regression",
|
|
34
|
-
"loss_name": "mse"
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
Behavior changes
|
|
38
|
-
1) Tuning and metrics
|
|
39
|
-
- When loss_name is mse/mae, tuning does not sample Tweedie power.
|
|
40
|
-
- When loss_name is poisson/gamma, power is fixed (1.0/2.0).
|
|
41
|
-
- When loss_name is tweedie, power is sampled as before.
|
|
42
|
-
|
|
43
|
-
2) Torch training (ResNet/FT/GNN)
|
|
44
|
-
- Loss computation is routed by loss_name.
|
|
45
|
-
- For tweedie/poisson/gamma, predictions are clamped positive.
|
|
46
|
-
- For mse/mae, no Tweedie power is used.
|
|
47
|
-
|
|
48
|
-
3) XGBoost objective
|
|
49
|
-
- loss_name controls XGB objective:
|
|
50
|
-
- tweedie -> reg:tweedie
|
|
51
|
-
- poisson -> count:poisson
|
|
52
|
-
- gamma -> reg:gamma
|
|
53
|
-
- mse -> reg:squarederror
|
|
54
|
-
- mae -> reg:absoluteerror
|
|
55
|
-
|
|
56
|
-
4) Inference
|
|
57
|
-
- ResNet/GNN constructors now receive loss_name.
|
|
58
|
-
- When loss_name is not tweedie, tw_power is not applied at inference.
|
|
59
|
-
|
|
60
|
-
Legacy defaults (auto)
|
|
61
|
-
- If loss_name is omitted, behavior is unchanged:
|
|
62
|
-
- model name contains "f" -> poisson
|
|
63
|
-
- model name contains "s" -> gamma
|
|
64
|
-
- otherwise -> tweedie
|
|
65
|
-
|
|
66
|
-
Examples
|
|
67
|
-
- ResNet direct training (MSE):
|
|
68
|
-
"loss_name": "mse"
|
|
69
|
-
|
|
70
|
-
- FT embed -> ResNet (MSE):
|
|
71
|
-
"loss_name": "mse"
|
|
72
|
-
|
|
73
|
-
- XGB direct training (unchanged):
|
|
74
|
-
omit loss_name or set "loss_name": "auto"
|
|
75
|
-
|
|
76
|
-
Notes
|
|
77
|
-
- loss_name is global per config. If you need different losses for different
|
|
78
|
-
models, split into separate configs and run them independently.
|