ins-pricing 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. ins_pricing/CHANGELOG.md +179 -0
  2. ins_pricing/RELEASE_NOTES_0.2.8.md +344 -0
  3. ins_pricing/modelling/core/bayesopt/utils.py +2 -1
  4. ins_pricing/modelling/explain/shap_utils.py +209 -6
  5. ins_pricing/pricing/calibration.py +125 -1
  6. ins_pricing/pricing/factors.py +110 -1
  7. ins_pricing/production/preprocess.py +166 -0
  8. ins_pricing/setup.py +1 -1
  9. ins_pricing/tests/governance/__init__.py +1 -0
  10. ins_pricing/tests/governance/test_audit.py +56 -0
  11. ins_pricing/tests/governance/test_registry.py +128 -0
  12. ins_pricing/tests/governance/test_release.py +74 -0
  13. ins_pricing/tests/pricing/__init__.py +1 -0
  14. ins_pricing/tests/pricing/test_calibration.py +72 -0
  15. ins_pricing/tests/pricing/test_exposure.py +64 -0
  16. ins_pricing/tests/pricing/test_factors.py +156 -0
  17. ins_pricing/tests/pricing/test_rate_table.py +40 -0
  18. ins_pricing/tests/production/__init__.py +1 -0
  19. ins_pricing/tests/production/test_monitoring.py +350 -0
  20. ins_pricing/tests/production/test_predict.py +233 -0
  21. ins_pricing/tests/production/test_preprocess.py +339 -0
  22. ins_pricing/tests/production/test_scoring.py +311 -0
  23. ins_pricing/utils/profiling.py +377 -0
  24. ins_pricing/utils/validation.py +427 -0
  25. ins_pricing-0.2.9.dist-info/METADATA +149 -0
  26. {ins_pricing-0.2.7.dist-info → ins_pricing-0.2.9.dist-info}/RECORD +28 -12
  27. ins_pricing/CHANGELOG_20260114.md +0 -275
  28. ins_pricing/CODE_REVIEW_IMPROVEMENTS.md +0 -715
  29. ins_pricing-0.2.7.dist-info/METADATA +0 -101
  30. {ins_pricing-0.2.7.dist-info → ins_pricing-0.2.9.dist-info}/WHEEL +0 -0
  31. {ins_pricing-0.2.7.dist-info → ins_pricing-0.2.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,233 @@
1
+ """Tests for production prediction module."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from unittest.mock import Mock, patch, MagicMock
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import pytest
10
+
11
+ from ins_pricing.exceptions import ConfigurationError, ModelLoadError, PredictionError
12
+
13
+
14
+ # Mock the production.predict module since it may have heavy dependencies
15
+ pytest.importorskip("ins_pricing.production.predict", reason="predict module not available")
16
+
17
+
18
+ @pytest.fixture
19
+ def sample_config():
20
+ """Sample configuration for predictor."""
21
+ return {
22
+ "model_name": "test_model",
23
+ "task_type": "regression",
24
+ "base_dir": "/tmp/models",
25
+ "feature_names": ["age", "premium", "region"],
26
+ "model_type": "xgboost"
27
+ }
28
+
29
+
30
+ @pytest.fixture
31
+ def sample_data():
32
+ """Sample input data for predictions."""
33
+ return pd.DataFrame({
34
+ "age": [25, 30, 35, 40],
35
+ "premium": [100.0, 150.0, 200.0, 250.0],
36
+ "region": ["A", "B", "A", "C"]
37
+ })
38
+
39
+
40
+ class TestConfigValidation:
41
+ """Test configuration validation for predictors."""
42
+
43
+ def test_missing_config_file(self, tmp_path):
44
+ """Test error when config file doesn't exist."""
45
+ from ins_pricing.production.predict import load_predictor_from_config
46
+
47
+ config_path = tmp_path / "nonexistent.json"
48
+
49
+ with pytest.raises((FileNotFoundError, ModelLoadError)):
50
+ load_predictor_from_config(config_path)
51
+
52
+ def test_invalid_json_config(self, tmp_path):
53
+ """Test error when config file contains invalid JSON."""
54
+ from ins_pricing.production.predict import load_predictor_from_config
55
+
56
+ config_path = tmp_path / "invalid.json"
57
+ config_path.write_text("{ invalid json }")
58
+
59
+ with pytest.raises((ConfigurationError, json.JSONDecodeError)):
60
+ load_predictor_from_config(config_path)
61
+
62
+ def test_missing_required_fields(self, tmp_path):
63
+ """Test error when required config fields are missing."""
64
+ from ins_pricing.production.predict import load_predictor_from_config
65
+
66
+ config_path = tmp_path / "incomplete.json"
67
+ config_path.write_text(json.dumps({"model_name": "test"}))
68
+
69
+ with pytest.raises(ConfigurationError):
70
+ load_predictor_from_config(config_path)
71
+
72
+
73
+ class TestPredictorLoading:
74
+ """Test predictor loading functionality."""
75
+
76
+ @patch('ins_pricing.production.predict.load_model')
77
+ def test_load_valid_predictor(self, mock_load_model, tmp_path, sample_config):
78
+ """Test loading a valid predictor."""
79
+ from ins_pricing.production.predict import load_predictor_from_config
80
+
81
+ # Setup
82
+ config_path = tmp_path / "config.json"
83
+ config_path.write_text(json.dumps(sample_config))
84
+ mock_load_model.return_value = Mock()
85
+
86
+ # Execute
87
+ predictor = load_predictor_from_config(config_path)
88
+
89
+ # Verify
90
+ assert predictor is not None
91
+ assert predictor['config']['model_name'] == "test_model"
92
+
93
+ def test_load_missing_model_file(self, tmp_path, sample_config):
94
+ """Test error when model file is missing."""
95
+ from ins_pricing.production.predict import load_predictor_from_config
96
+
97
+ config_path = tmp_path / "config.json"
98
+ config_path.write_text(json.dumps(sample_config))
99
+
100
+ with pytest.raises(ModelLoadError):
101
+ load_predictor_from_config(config_path)
102
+
103
+
104
+ class TestPrediction:
105
+ """Test prediction functionality."""
106
+
107
+ @patch('ins_pricing.production.predict.load_model')
108
+ def test_predict_on_valid_data(self, mock_load_model, sample_data):
109
+ """Test prediction on valid input data."""
110
+ from ins_pricing.production.predict import predict
111
+
112
+ # Setup mock model
113
+ mock_model = Mock()
114
+ mock_model.predict.return_value = np.array([100, 150, 200, 250])
115
+ mock_load_model.return_value = mock_model
116
+
117
+ predictor = {
118
+ 'model': mock_model,
119
+ 'config': {'feature_names': ["age", "premium", "region"]}
120
+ }
121
+
122
+ # Execute
123
+ predictions = predict(predictor, sample_data)
124
+
125
+ # Verify
126
+ assert len(predictions) == len(sample_data)
127
+ assert all(isinstance(p, (int, float, np.number)) for p in predictions)
128
+
129
+ def test_predict_missing_features(self, sample_data):
130
+ """Test error when input data is missing required features."""
131
+ from ins_pricing.production.predict import predict
132
+ from ins_pricing.utils.validation import validate_required_columns
133
+
134
+ predictor = {
135
+ 'model': Mock(),
136
+ 'config': {'feature_names': ["age", "premium", "region", "missing_col"]}
137
+ }
138
+
139
+ # Should raise validation error for missing column
140
+ with pytest.raises(PredictionError):
141
+ validate_required_columns(
142
+ sample_data,
143
+ predictor['config']['feature_names'],
144
+ df_name="input_data"
145
+ )
146
+
147
+ def test_predict_empty_dataframe(self):
148
+ """Test prediction on empty DataFrame."""
149
+ from ins_pricing.production.predict import predict
150
+ from ins_pricing.utils.validation import validate_dataframe_not_empty
151
+
152
+ empty_df = pd.DataFrame()
153
+ predictor = {'model': Mock(), 'config': {}}
154
+
155
+ with pytest.raises(PredictionError):
156
+ validate_dataframe_not_empty(empty_df, df_name="input_data")
157
+
158
+
159
+ class TestBatchScoring:
160
+ """Test batch scoring functionality."""
161
+
162
+ @patch('ins_pricing.production.predict.load_predictor_from_config')
163
+ @patch('ins_pricing.production.predict.predict')
164
+ def test_batch_score_success(self, mock_predict, mock_load, sample_data, tmp_path):
165
+ """Test successful batch scoring."""
166
+ from ins_pricing.production.predict import batch_score
167
+
168
+ # Setup
169
+ mock_load.return_value = {'model': Mock(), 'config': {}}
170
+ mock_predict.return_value = np.array([100, 150, 200, 250])
171
+
172
+ output_path = tmp_path / "predictions.csv"
173
+
174
+ # Execute
175
+ batch_score(
176
+ config_path=tmp_path / "config.json",
177
+ input_data=sample_data,
178
+ output_path=output_path
179
+ )
180
+
181
+ # Verify
182
+ assert output_path.exists()
183
+ results = pd.read_csv(output_path)
184
+ assert "predictions" in results.columns
185
+
186
+ def test_batch_score_large_data(self, tmp_path):
187
+ """Test batch scoring with large dataset."""
188
+ from ins_pricing.production.predict import batch_score
189
+
190
+ # Create large dataset
191
+ large_data = pd.DataFrame({
192
+ "age": np.random.randint(20, 70, size=10000),
193
+ "premium": np.random.uniform(100, 500, size=10000),
194
+ "region": np.random.choice(["A", "B", "C"], size=10000)
195
+ })
196
+
197
+ with patch('ins_pricing.production.predict.load_predictor_from_config') as mock_load:
198
+ with patch('ins_pricing.production.predict.predict') as mock_predict:
199
+ mock_load.return_value = {'model': Mock(), 'config': {}}
200
+ mock_predict.return_value = np.random.uniform(50, 300, size=10000)
201
+
202
+ output_path = tmp_path / "large_predictions.csv"
203
+ batch_score(
204
+ config_path=tmp_path / "config.json",
205
+ input_data=large_data,
206
+ output_path=output_path
207
+ )
208
+
209
+ assert output_path.exists()
210
+
211
+
212
+ class TestModelVersioning:
213
+ """Test model versioning functionality."""
214
+
215
+ def test_version_compatibility_check(self):
216
+ """Test version compatibility checking."""
217
+ # Test that predictor checks model version compatibility
218
+ pass # Implement based on actual version checking logic
219
+
220
+ def test_load_different_model_versions(self):
221
+ """Test loading different versions of the same model."""
222
+ pass # Implement based on actual versioning system
223
+
224
+
225
+ @pytest.mark.integration
226
+ class TestPredictionIntegration:
227
+ """Integration tests for prediction pipeline."""
228
+
229
+ @pytest.mark.skipif(not Path("test_models").exists(), reason="Test models not available")
230
+ def test_end_to_end_prediction(self):
231
+ """Test complete prediction pipeline from config to output."""
232
+ # This would require actual model artifacts
233
+ pass
@@ -0,0 +1,339 @@
1
+ """Tests for production preprocessing module."""
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ import pytest
6
+ from unittest.mock import Mock, patch
7
+
8
+ from ins_pricing.exceptions import PreprocessingError, DataValidationError
9
+
10
+
11
+ @pytest.fixture
12
+ def sample_raw_data():
13
+ """Sample raw input data."""
14
+ return pd.DataFrame({
15
+ "age": [25, 30, 35, 40, 45],
16
+ "gender": ["M", "F", "M", "F", "M"],
17
+ "region": ["North", "South", "East", "West", "North"],
18
+ "premium": [100.0, 150.0, 200.0, 250.0, 300.0],
19
+ "coverage": ["Basic", "Premium", "Basic", "Premium", "Premium"]
20
+ })
21
+
22
+
23
+ class TestFeatureEngineering:
24
+ """Test feature engineering transformations."""
25
+
26
+ def test_create_age_bands(self, sample_raw_data):
27
+ """Test age banding transformation."""
28
+ from ins_pricing.production.preprocess import create_age_bands
29
+
30
+ df = create_age_bands(sample_raw_data, 'age', bins=[0, 30, 40, 100])
31
+
32
+ assert 'age_band' in df.columns
33
+ assert df['age_band'].dtype == 'object' or pd.api.types.is_categorical_dtype(df['age_band'])
34
+
35
+ def test_encode_categorical(self, sample_raw_data):
36
+ """Test categorical encoding."""
37
+ from ins_pricing.production.preprocess import encode_categorical
38
+
39
+ df = encode_categorical(
40
+ sample_raw_data,
41
+ columns=['gender', 'region'],
42
+ method='onehot'
43
+ )
44
+
45
+ # Check that encoded columns exist
46
+ assert any('gender_' in col for col in df.columns)
47
+ assert any('region_' in col for col in df.columns)
48
+
49
+ def test_scale_numeric_features(self, sample_raw_data):
50
+ """Test numeric feature scaling."""
51
+ from ins_pricing.production.preprocess import scale_features
52
+
53
+ df = scale_features(
54
+ sample_raw_data,
55
+ columns=['premium'],
56
+ method='standard'
57
+ )
58
+
59
+ # Check that scaled values have mean ~0 and std ~1
60
+ assert abs(df['premium'].mean()) < 0.1
61
+ assert abs(df['premium'].std() - 1.0) < 0.1
62
+
63
+ def test_create_interaction_features(self, sample_raw_data):
64
+ """Test interaction feature creation."""
65
+ from ins_pricing.production.preprocess import create_interactions
66
+
67
+ df = create_interactions(
68
+ sample_raw_data,
69
+ feature_pairs=[('age', 'premium')]
70
+ )
71
+
72
+ assert 'age_x_premium' in df.columns
73
+
74
+ def test_polynomial_features(self, sample_raw_data):
75
+ """Test polynomial feature generation."""
76
+ from ins_pricing.production.preprocess import create_polynomial_features
77
+
78
+ df = create_polynomial_features(
79
+ sample_raw_data,
80
+ columns=['age'],
81
+ degree=2
82
+ )
83
+
84
+ assert 'age_squared' in df.columns
85
+
86
+
87
+ class TestDataCleaning:
88
+ """Test data cleaning operations."""
89
+
90
+ def test_handle_missing_values(self):
91
+ """Test missing value handling."""
92
+ from ins_pricing.production.preprocess import handle_missing
93
+
94
+ df = pd.DataFrame({
95
+ "col1": [1, 2, np.nan, 4],
96
+ "col2": [np.nan, 2, 3, 4],
97
+ "col3": ["A", "B", None, "D"]
98
+ })
99
+
100
+ cleaned = handle_missing(df, strategy='mean', columns=['col1', 'col2'])
101
+
102
+ assert cleaned['col1'].isna().sum() == 0
103
+ assert cleaned['col2'].isna().sum() == 0
104
+
105
+ def test_remove_outliers(self, sample_raw_data):
106
+ """Test outlier removal."""
107
+ from ins_pricing.production.preprocess import remove_outliers
108
+
109
+ # Add outlier
110
+ data_with_outlier = sample_raw_data.copy()
111
+ data_with_outlier.loc[0, 'premium'] = 10000 # Extreme value
112
+
113
+ cleaned = remove_outliers(data_with_outlier, column='premium', method='iqr')
114
+
115
+ assert len(cleaned) < len(data_with_outlier)
116
+
117
+ def test_deduplicate(self):
118
+ """Test duplicate removal."""
119
+ from ins_pricing.production.preprocess import deduplicate
120
+
121
+ df = pd.DataFrame({
122
+ "id": [1, 2, 2, 3],
123
+ "value": [10, 20, 20, 30]
124
+ })
125
+
126
+ deduped = deduplicate(df)
127
+
128
+ assert len(deduped) == 3
129
+
130
+ def test_fix_data_types(self):
131
+ """Test data type corrections."""
132
+ from ins_pricing.production.preprocess import fix_data_types
133
+
134
+ df = pd.DataFrame({
135
+ "age": ["25", "30", "35"], # String instead of int
136
+ "premium": [100, 200, 300]
137
+ })
138
+
139
+ fixed = fix_data_types(df, type_spec={'age': 'int64'})
140
+
141
+ assert fixed['age'].dtype == np.int64
142
+
143
+
144
+ class TestFeatureSelection:
145
+ """Test feature selection operations."""
146
+
147
+ def test_select_by_importance(self):
148
+ """Test feature selection by importance."""
149
+ from ins_pricing.production.preprocess import select_features_by_importance
150
+
151
+ X = pd.DataFrame(np.random.rand(100, 10))
152
+ y = pd.Series(np.random.rand(100))
153
+
154
+ selected = select_features_by_importance(X, y, n_features=5)
155
+
156
+ assert selected.shape[1] == 5
157
+
158
+ def test_remove_low_variance(self):
159
+ """Test removal of low variance features."""
160
+ from ins_pricing.production.preprocess import remove_low_variance
161
+
162
+ df = pd.DataFrame({
163
+ "constant": [1, 1, 1, 1], # Zero variance
164
+ "low_var": [1, 1, 1, 2], # Low variance
165
+ "high_var": [1, 5, 10, 20] # High variance
166
+ })
167
+
168
+ filtered = remove_low_variance(df, threshold=0.01)
169
+
170
+ assert 'constant' not in filtered.columns
171
+ assert 'high_var' in filtered.columns
172
+
173
+ def test_remove_correlated_features(self):
174
+ """Test removal of highly correlated features."""
175
+ from ins_pricing.production.preprocess import remove_correlated
176
+
177
+ # Create correlated features
178
+ df = pd.DataFrame({
179
+ "feature_1": np.random.rand(100),
180
+ "feature_2": np.random.rand(100)
181
+ })
182
+ df['feature_3'] = df['feature_1'] * 1.1 # Highly correlated
183
+
184
+ filtered = remove_correlated(df, threshold=0.95)
185
+
186
+ assert filtered.shape[1] < 3
187
+
188
+
189
+ class TestPipelineValidation:
190
+ """Test preprocessing pipeline validation."""
191
+
192
+ def test_validate_input_schema(self, sample_raw_data):
193
+ """Test input schema validation."""
194
+ from ins_pricing.production.preprocess import validate_input_schema
195
+
196
+ expected_schema = {
197
+ "age": "int64",
198
+ "gender": "object",
199
+ "premium": "float64"
200
+ }
201
+
202
+ # Should not raise
203
+ validate_input_schema(sample_raw_data, expected_schema)
204
+
205
+ def test_validate_input_schema_failure(self):
206
+ """Test input schema validation catches errors."""
207
+ from ins_pricing.production.preprocess import validate_input_schema
208
+ from ins_pricing.utils.validation import validate_column_types
209
+
210
+ df = pd.DataFrame({
211
+ "age": ["not_a_number", "25"], # Wrong type
212
+ "premium": [100.0, 200.0]
213
+ })
214
+
215
+ expected_schema = {"age": "int64", "premium": "float64"}
216
+
217
+ with pytest.raises(DataValidationError):
218
+ validate_column_types(df, expected_schema, coerce=False)
219
+
220
+ def test_validate_feature_range(self, sample_raw_data):
221
+ """Test feature value range validation."""
222
+ from ins_pricing.utils.validation import validate_value_range
223
+
224
+ # Age should be positive
225
+ validate_value_range(sample_raw_data, 'age', min_val=0, max_val=120)
226
+
227
+ # Premium should be positive
228
+ validate_value_range(sample_raw_data, 'premium', min_val=0)
229
+
230
+
231
+ class TestPreprocessorState:
232
+ """Test preprocessor state management."""
233
+
234
+ def test_save_preprocessor_state(self, sample_raw_data, tmp_path):
235
+ """Test saving preprocessor state."""
236
+ from ins_pricing.production.preprocess import Preprocessor
237
+
238
+ preprocessor = Preprocessor()
239
+ preprocessor.fit(sample_raw_data)
240
+
241
+ state_path = tmp_path / "preprocessor_state.pkl"
242
+ preprocessor.save(state_path)
243
+
244
+ assert state_path.exists()
245
+
246
+ def test_load_preprocessor_state(self, tmp_path):
247
+ """Test loading preprocessor state."""
248
+ from ins_pricing.production.preprocess import Preprocessor
249
+
250
+ # Create and save
251
+ preprocessor = Preprocessor()
252
+ state_path = tmp_path / "preprocessor_state.pkl"
253
+ preprocessor.save(state_path)
254
+
255
+ # Load
256
+ loaded = Preprocessor.load(state_path)
257
+
258
+ assert loaded is not None
259
+
260
+ def test_preprocessor_consistency(self, sample_raw_data, tmp_path):
261
+ """Test that loaded preprocessor produces same results."""
262
+ from ins_pricing.production.preprocess import Preprocessor
263
+
264
+ # Fit and transform
265
+ preprocessor = Preprocessor()
266
+ preprocessor.fit(sample_raw_data)
267
+ result1 = preprocessor.transform(sample_raw_data)
268
+
269
+ # Save, load, transform
270
+ state_path = tmp_path / "preprocessor_state.pkl"
271
+ preprocessor.save(state_path)
272
+ loaded = Preprocessor.load(state_path)
273
+ result2 = loaded.transform(sample_raw_data)
274
+
275
+ # Results should be identical
276
+ pd.testing.assert_frame_equal(result1, result2)
277
+
278
+
279
+ class TestTransformationPipeline:
280
+ """Test complete transformation pipeline."""
281
+
282
+ def test_full_pipeline(self, sample_raw_data):
283
+ """Test complete preprocessing pipeline."""
284
+ from ins_pricing.production.preprocess import PreprocessingPipeline
285
+
286
+ pipeline = PreprocessingPipeline([
287
+ ('handle_missing', {'strategy': 'mean'}),
288
+ ('encode_categorical', {'columns': ['gender', 'region']}),
289
+ ('scale_features', {'columns': ['premium'], 'method': 'standard'})
290
+ ])
291
+
292
+ transformed = pipeline.fit_transform(sample_raw_data)
293
+
294
+ assert transformed is not None
295
+ assert len(transformed) == len(sample_raw_data)
296
+
297
+ def test_pipeline_error_handling(self):
298
+ """Test pipeline handles errors gracefully."""
299
+ from ins_pricing.production.preprocess import PreprocessingPipeline
300
+
301
+ df = pd.DataFrame({
302
+ "col1": [1, 2, 3],
303
+ "col2": ["A", "B", "C"]
304
+ })
305
+
306
+ # Try to scale a categorical column (should fail)
307
+ pipeline = PreprocessingPipeline([
308
+ ('scale_features', {'columns': ['col2'], 'method': 'standard'})
309
+ ])
310
+
311
+ with pytest.raises(PreprocessingError):
312
+ pipeline.fit_transform(df)
313
+
314
+
315
+ @pytest.mark.performance
316
+ class TestPreprocessingPerformance:
317
+ """Test preprocessing performance."""
318
+
319
+ def test_large_dataset_preprocessing(self):
320
+ """Test preprocessing on large dataset."""
321
+ from ins_pricing.production.preprocess import Preprocessor
322
+
323
+ n = 100_000
324
+ large_df = pd.DataFrame({
325
+ "age": np.random.randint(18, 80, n),
326
+ "premium": np.random.uniform(100, 1000, n),
327
+ "region": np.random.choice(['A', 'B', 'C', 'D'], n)
328
+ })
329
+
330
+ preprocessor = Preprocessor()
331
+
332
+ import time
333
+ start = time.time()
334
+ preprocessor.fit(large_df)
335
+ transformed = preprocessor.transform(large_df)
336
+ elapsed = time.time() - start
337
+
338
+ assert len(transformed) == n
339
+ assert elapsed < 5.0 # Should complete in under 5 seconds