ins-pricing 0.2.7__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/CHANGELOG.md +179 -0
- ins_pricing/RELEASE_NOTES_0.2.8.md +344 -0
- ins_pricing/modelling/explain/shap_utils.py +209 -6
- ins_pricing/pricing/calibration.py +125 -1
- ins_pricing/pricing/factors.py +110 -1
- ins_pricing/production/preprocess.py +166 -0
- ins_pricing/setup.py +1 -1
- ins_pricing/tests/governance/__init__.py +1 -0
- ins_pricing/tests/governance/test_audit.py +56 -0
- ins_pricing/tests/governance/test_registry.py +128 -0
- ins_pricing/tests/governance/test_release.py +74 -0
- ins_pricing/tests/pricing/__init__.py +1 -0
- ins_pricing/tests/pricing/test_calibration.py +72 -0
- ins_pricing/tests/pricing/test_exposure.py +64 -0
- ins_pricing/tests/pricing/test_factors.py +156 -0
- ins_pricing/tests/pricing/test_rate_table.py +40 -0
- ins_pricing/tests/production/__init__.py +1 -0
- ins_pricing/tests/production/test_monitoring.py +350 -0
- ins_pricing/tests/production/test_predict.py +233 -0
- ins_pricing/tests/production/test_preprocess.py +339 -0
- ins_pricing/tests/production/test_scoring.py +311 -0
- ins_pricing/utils/profiling.py +377 -0
- ins_pricing/utils/validation.py +427 -0
- {ins_pricing-0.2.7.dist-info → ins_pricing-0.2.8.dist-info}/METADATA +1 -51
- {ins_pricing-0.2.7.dist-info → ins_pricing-0.2.8.dist-info}/RECORD +27 -11
- ins_pricing/CHANGELOG_20260114.md +0 -275
- ins_pricing/CODE_REVIEW_IMPROVEMENTS.md +0 -715
- {ins_pricing-0.2.7.dist-info → ins_pricing-0.2.8.dist-info}/WHEEL +0 -0
- {ins_pricing-0.2.7.dist-info → ins_pricing-0.2.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,427 @@
|
|
|
1
|
+
"""Data validation utilities for ins_pricing.
|
|
2
|
+
|
|
3
|
+
This module provides reusable validation functions to ensure data quality
|
|
4
|
+
and provide clear error messages when validation fails.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
>>> import pandas as pd
|
|
8
|
+
>>> from ins_pricing.utils.validation import validate_required_columns
|
|
9
|
+
>>> df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
|
|
10
|
+
>>> validate_required_columns(df, ['a', 'b'], df_name='my_data')
|
|
11
|
+
>>> # Raises DataValidationError if columns missing
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from typing import Dict, List, Optional, Union
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from ins_pricing.exceptions import DataValidationError
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def validate_required_columns(
|
|
24
|
+
df: pd.DataFrame,
|
|
25
|
+
required: List[str],
|
|
26
|
+
*,
|
|
27
|
+
df_name: str = "DataFrame"
|
|
28
|
+
) -> None:
|
|
29
|
+
"""Validate that DataFrame contains all required columns.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
df: DataFrame to validate
|
|
33
|
+
required: List of required column names
|
|
34
|
+
df_name: Name of DataFrame for error messages (default: "DataFrame")
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
DataValidationError: If any required columns are missing
|
|
38
|
+
|
|
39
|
+
Example:
|
|
40
|
+
>>> df = pd.DataFrame({'age': [25, 30], 'premium': [100, 200]})
|
|
41
|
+
>>> validate_required_columns(df, ['age', 'premium'], df_name='policy_data')
|
|
42
|
+
>>> validate_required_columns(df, ['age', 'claim'], df_name='policy_data')
|
|
43
|
+
Traceback (most recent call last):
|
|
44
|
+
...
|
|
45
|
+
DataValidationError: policy_data missing required columns: ['claim']...
|
|
46
|
+
"""
|
|
47
|
+
missing = [col for col in required if col not in df.columns]
|
|
48
|
+
if missing:
|
|
49
|
+
available_preview = list(df.columns)[:50]
|
|
50
|
+
raise DataValidationError(
|
|
51
|
+
f"{df_name} missing required columns: {missing}. "
|
|
52
|
+
f"Available columns (first 50): {available_preview}"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def validate_column_types(
|
|
57
|
+
df: pd.DataFrame,
|
|
58
|
+
type_spec: Dict[str, Union[type, str]],
|
|
59
|
+
*,
|
|
60
|
+
coerce: bool = False,
|
|
61
|
+
df_name: str = "DataFrame"
|
|
62
|
+
) -> pd.DataFrame:
|
|
63
|
+
"""Validate and optionally coerce column data types.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
df: DataFrame to validate
|
|
67
|
+
type_spec: Dictionary mapping column names to expected types.
|
|
68
|
+
Types can be Python types (int, float, str) or pandas
|
|
69
|
+
dtype strings ('int64', 'float64', 'object', 'category')
|
|
70
|
+
coerce: If True, attempt to convert columns to expected types.
|
|
71
|
+
If False, raise error on type mismatch (default: False)
|
|
72
|
+
df_name: Name of DataFrame for error messages
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
DataFrame with validated (and possibly coerced) types
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
DataValidationError: If column types don't match and coerce=False
|
|
79
|
+
|
|
80
|
+
Example:
|
|
81
|
+
>>> df = pd.DataFrame({'age': ['25', '30'], 'premium': [100.0, 200.0]})
|
|
82
|
+
>>> df = validate_column_types(
|
|
83
|
+
... df,
|
|
84
|
+
... {'age': 'int64', 'premium': 'float64'},
|
|
85
|
+
... coerce=True
|
|
86
|
+
... )
|
|
87
|
+
>>> df['age'].dtype
|
|
88
|
+
dtype('int64')
|
|
89
|
+
"""
|
|
90
|
+
df = df.copy() if coerce else df
|
|
91
|
+
|
|
92
|
+
for col, expected_type in type_spec.items():
|
|
93
|
+
if col not in df.columns:
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
current_dtype = df[col].dtype
|
|
97
|
+
|
|
98
|
+
# Convert type spec to pandas dtype
|
|
99
|
+
if isinstance(expected_type, type):
|
|
100
|
+
if expected_type == int:
|
|
101
|
+
expected_dtype = 'int64'
|
|
102
|
+
elif expected_type == float:
|
|
103
|
+
expected_dtype = 'float64'
|
|
104
|
+
elif expected_type == str:
|
|
105
|
+
expected_dtype = 'object'
|
|
106
|
+
else:
|
|
107
|
+
expected_dtype = str(expected_type)
|
|
108
|
+
else:
|
|
109
|
+
expected_dtype = expected_type
|
|
110
|
+
|
|
111
|
+
# Check if types match
|
|
112
|
+
type_matches = (
|
|
113
|
+
str(current_dtype) == expected_dtype or
|
|
114
|
+
current_dtype.name == expected_dtype
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
if not type_matches:
|
|
118
|
+
if coerce:
|
|
119
|
+
try:
|
|
120
|
+
df[col] = df[col].astype(expected_dtype)
|
|
121
|
+
except (ValueError, TypeError) as e:
|
|
122
|
+
raise DataValidationError(
|
|
123
|
+
f"{df_name}: Cannot convert column '{col}' from "
|
|
124
|
+
f"{current_dtype} to {expected_dtype}: {e}"
|
|
125
|
+
)
|
|
126
|
+
else:
|
|
127
|
+
raise DataValidationError(
|
|
128
|
+
f"{df_name}: Column '{col}' has type {current_dtype}, "
|
|
129
|
+
f"expected {expected_dtype}"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return df
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def validate_value_range(
|
|
136
|
+
df: pd.DataFrame,
|
|
137
|
+
col: str,
|
|
138
|
+
*,
|
|
139
|
+
min_val: Optional[float] = None,
|
|
140
|
+
max_val: Optional[float] = None,
|
|
141
|
+
df_name: str = "DataFrame"
|
|
142
|
+
) -> None:
|
|
143
|
+
"""Validate that numeric column values are within expected range.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
df: DataFrame to validate
|
|
147
|
+
col: Column name to check
|
|
148
|
+
min_val: Minimum allowed value (inclusive), None for no minimum
|
|
149
|
+
max_val: Maximum allowed value (inclusive), None for no maximum
|
|
150
|
+
df_name: Name of DataFrame for error messages
|
|
151
|
+
|
|
152
|
+
Raises:
|
|
153
|
+
DataValidationError: If values are outside the specified range
|
|
154
|
+
|
|
155
|
+
Example:
|
|
156
|
+
>>> df = pd.DataFrame({'age': [25, 30, 150]})
|
|
157
|
+
>>> validate_value_range(df, 'age', min_val=0, max_val=120)
|
|
158
|
+
Traceback (most recent call last):
|
|
159
|
+
...
|
|
160
|
+
DataValidationError: ...values outside range [0, 120]...
|
|
161
|
+
"""
|
|
162
|
+
if col not in df.columns:
|
|
163
|
+
raise DataValidationError(
|
|
164
|
+
f"{df_name}: Column '{col}' not found for range validation"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
if not pd.api.types.is_numeric_dtype(df[col]):
|
|
168
|
+
raise DataValidationError(
|
|
169
|
+
f"{df_name}: Column '{col}' is not numeric, cannot validate range"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
violations = []
|
|
173
|
+
|
|
174
|
+
if min_val is not None:
|
|
175
|
+
below_min = df[col] < min_val
|
|
176
|
+
if below_min.any():
|
|
177
|
+
count = below_min.sum()
|
|
178
|
+
min_found = df.loc[below_min, col].min()
|
|
179
|
+
violations.append(
|
|
180
|
+
f"{count} values below minimum {min_val} (min found: {min_found})"
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
if max_val is not None:
|
|
184
|
+
above_max = df[col] > max_val
|
|
185
|
+
if above_max.any():
|
|
186
|
+
count = above_max.sum()
|
|
187
|
+
max_found = df.loc[above_max, col].max()
|
|
188
|
+
violations.append(
|
|
189
|
+
f"{count} values above maximum {max_val} (max found: {max_found})"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
if violations:
|
|
193
|
+
range_str = f"[{min_val}, {max_val}]"
|
|
194
|
+
raise DataValidationError(
|
|
195
|
+
f"{df_name}: Column '{col}' has values outside range {range_str}: "
|
|
196
|
+
f"{'; '.join(violations)}"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def validate_no_nulls(
|
|
201
|
+
df: pd.DataFrame,
|
|
202
|
+
columns: List[str],
|
|
203
|
+
*,
|
|
204
|
+
df_name: str = "DataFrame"
|
|
205
|
+
) -> None:
|
|
206
|
+
"""Validate that specified columns contain no null values.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
df: DataFrame to validate
|
|
210
|
+
columns: List of column names to check for nulls
|
|
211
|
+
df_name: Name of DataFrame for error messages
|
|
212
|
+
|
|
213
|
+
Raises:
|
|
214
|
+
DataValidationError: If any specified columns contain null values
|
|
215
|
+
|
|
216
|
+
Example:
|
|
217
|
+
>>> df = pd.DataFrame({'age': [25, None, 30], 'premium': [100, 200, 300]})
|
|
218
|
+
>>> validate_no_nulls(df, ['age', 'premium'])
|
|
219
|
+
Traceback (most recent call last):
|
|
220
|
+
...
|
|
221
|
+
DataValidationError: ...contains null values: age (1 nulls)...
|
|
222
|
+
"""
|
|
223
|
+
null_info = []
|
|
224
|
+
|
|
225
|
+
for col in columns:
|
|
226
|
+
if col not in df.columns:
|
|
227
|
+
raise DataValidationError(
|
|
228
|
+
f"{df_name}: Column '{col}' not found for null validation"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
null_count = df[col].isna().sum()
|
|
232
|
+
if null_count > 0:
|
|
233
|
+
null_info.append(f"{col} ({null_count} nulls)")
|
|
234
|
+
|
|
235
|
+
if null_info:
|
|
236
|
+
raise DataValidationError(
|
|
237
|
+
f"{df_name} contains null values: {', '.join(null_info)}"
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def validate_categorical_values(
|
|
242
|
+
df: pd.DataFrame,
|
|
243
|
+
col: str,
|
|
244
|
+
allowed_values: List[str],
|
|
245
|
+
*,
|
|
246
|
+
df_name: str = "DataFrame"
|
|
247
|
+
) -> None:
|
|
248
|
+
"""Validate that categorical column contains only allowed values.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
df: DataFrame to validate
|
|
252
|
+
col: Column name to check
|
|
253
|
+
allowed_values: List of allowed values
|
|
254
|
+
df_name: Name of DataFrame for error messages
|
|
255
|
+
|
|
256
|
+
Raises:
|
|
257
|
+
DataValidationError: If column contains values not in allowed_values
|
|
258
|
+
|
|
259
|
+
Example:
|
|
260
|
+
>>> df = pd.DataFrame({'gender': ['M', 'F', 'X']})
|
|
261
|
+
>>> validate_categorical_values(df, 'gender', ['M', 'F'])
|
|
262
|
+
Traceback (most recent call last):
|
|
263
|
+
...
|
|
264
|
+
DataValidationError: ...contains invalid values: ['X']...
|
|
265
|
+
"""
|
|
266
|
+
if col not in df.columns:
|
|
267
|
+
raise DataValidationError(
|
|
268
|
+
f"{df_name}: Column '{col}' not found for categorical validation"
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
unique_values = df[col].dropna().unique()
|
|
272
|
+
invalid_values = [v for v in unique_values if v not in allowed_values]
|
|
273
|
+
|
|
274
|
+
if invalid_values:
|
|
275
|
+
raise DataValidationError(
|
|
276
|
+
f"{df_name}: Column '{col}' contains invalid values: {invalid_values}. "
|
|
277
|
+
f"Allowed values: {allowed_values}"
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def validate_positive(
|
|
282
|
+
df: pd.DataFrame,
|
|
283
|
+
columns: List[str],
|
|
284
|
+
*,
|
|
285
|
+
allow_zero: bool = False,
|
|
286
|
+
df_name: str = "DataFrame"
|
|
287
|
+
) -> None:
|
|
288
|
+
"""Validate that numeric columns contain only positive values.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
df: DataFrame to validate
|
|
292
|
+
columns: List of column names to check
|
|
293
|
+
allow_zero: If True, allow zero values (default: False)
|
|
294
|
+
df_name: Name of DataFrame for error messages
|
|
295
|
+
|
|
296
|
+
Raises:
|
|
297
|
+
DataValidationError: If columns contain non-positive values
|
|
298
|
+
|
|
299
|
+
Example:
|
|
300
|
+
>>> df = pd.DataFrame({'premium': [100, -50, 200], 'exposure': [1, 0, 2]})
|
|
301
|
+
>>> validate_positive(df, ['premium', 'exposure'])
|
|
302
|
+
Traceback (most recent call last):
|
|
303
|
+
...
|
|
304
|
+
DataValidationError: ...contains non-positive values...
|
|
305
|
+
"""
|
|
306
|
+
violations = []
|
|
307
|
+
|
|
308
|
+
for col in columns:
|
|
309
|
+
if col not in df.columns:
|
|
310
|
+
raise DataValidationError(
|
|
311
|
+
f"{df_name}: Column '{col}' not found for positivity validation"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
if not pd.api.types.is_numeric_dtype(df[col]):
|
|
315
|
+
raise DataValidationError(
|
|
316
|
+
f"{df_name}: Column '{col}' is not numeric"
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
if allow_zero:
|
|
320
|
+
invalid = df[col] < 0
|
|
321
|
+
msg = "negative"
|
|
322
|
+
else:
|
|
323
|
+
invalid = df[col] <= 0
|
|
324
|
+
msg = "non-positive"
|
|
325
|
+
|
|
326
|
+
if invalid.any():
|
|
327
|
+
count = invalid.sum()
|
|
328
|
+
min_val = df.loc[invalid, col].min()
|
|
329
|
+
violations.append(f"{col} ({count} {msg} values, min: {min_val})")
|
|
330
|
+
|
|
331
|
+
if violations:
|
|
332
|
+
raise DataValidationError(
|
|
333
|
+
f"{df_name} contains {msg} values: {', '.join(violations)}"
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def validate_dataframe_not_empty(
|
|
338
|
+
df: pd.DataFrame,
|
|
339
|
+
*,
|
|
340
|
+
df_name: str = "DataFrame"
|
|
341
|
+
) -> None:
|
|
342
|
+
"""Validate that DataFrame is not empty.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
df: DataFrame to validate
|
|
346
|
+
df_name: Name of DataFrame for error messages
|
|
347
|
+
|
|
348
|
+
Raises:
|
|
349
|
+
DataValidationError: If DataFrame is empty
|
|
350
|
+
|
|
351
|
+
Example:
|
|
352
|
+
>>> df = pd.DataFrame()
|
|
353
|
+
>>> validate_dataframe_not_empty(df, df_name='train_data')
|
|
354
|
+
Traceback (most recent call last):
|
|
355
|
+
...
|
|
356
|
+
DataValidationError: train_data is empty (0 rows)
|
|
357
|
+
"""
|
|
358
|
+
if len(df) == 0:
|
|
359
|
+
raise DataValidationError(f"{df_name} is empty (0 rows)")
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def validate_date_range(
|
|
363
|
+
df: pd.DataFrame,
|
|
364
|
+
col: str,
|
|
365
|
+
*,
|
|
366
|
+
min_date: Optional[pd.Timestamp] = None,
|
|
367
|
+
max_date: Optional[pd.Timestamp] = None,
|
|
368
|
+
df_name: str = "DataFrame"
|
|
369
|
+
) -> None:
|
|
370
|
+
"""Validate that date column values are within expected range.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
df: DataFrame to validate
|
|
374
|
+
col: Column name to check (should be datetime type)
|
|
375
|
+
min_date: Minimum allowed date, None for no minimum
|
|
376
|
+
max_date: Maximum allowed date, None for no maximum
|
|
377
|
+
df_name: Name of DataFrame for error messages
|
|
378
|
+
|
|
379
|
+
Raises:
|
|
380
|
+
DataValidationError: If dates are outside the specified range
|
|
381
|
+
|
|
382
|
+
Example:
|
|
383
|
+
>>> df = pd.DataFrame({'policy_date': pd.to_datetime(['2020-01-01', '2025-01-01'])})
|
|
384
|
+
>>> validate_date_range(
|
|
385
|
+
... df, 'policy_date',
|
|
386
|
+
... min_date=pd.Timestamp('2020-01-01'),
|
|
387
|
+
... max_date=pd.Timestamp('2023-12-31')
|
|
388
|
+
... )
|
|
389
|
+
Traceback (most recent call last):
|
|
390
|
+
...
|
|
391
|
+
DataValidationError: ...dates outside range...
|
|
392
|
+
"""
|
|
393
|
+
if col not in df.columns:
|
|
394
|
+
raise DataValidationError(
|
|
395
|
+
f"{df_name}: Column '{col}' not found for date validation"
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
if not pd.api.types.is_datetime64_any_dtype(df[col]):
|
|
399
|
+
raise DataValidationError(
|
|
400
|
+
f"{df_name}: Column '{col}' is not datetime type"
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
violations = []
|
|
404
|
+
|
|
405
|
+
if min_date is not None:
|
|
406
|
+
before_min = df[col] < min_date
|
|
407
|
+
if before_min.any():
|
|
408
|
+
count = before_min.sum()
|
|
409
|
+
earliest = df.loc[before_min, col].min()
|
|
410
|
+
violations.append(
|
|
411
|
+
f"{count} dates before {min_date} (earliest: {earliest})"
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
if max_date is not None:
|
|
415
|
+
after_max = df[col] > max_date
|
|
416
|
+
if after_max.any():
|
|
417
|
+
count = after_max.sum()
|
|
418
|
+
latest = df.loc[after_max, col].max()
|
|
419
|
+
violations.append(
|
|
420
|
+
f"{count} dates after {max_date} (latest: {latest})"
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
if violations:
|
|
424
|
+
raise DataValidationError(
|
|
425
|
+
f"{df_name}: Column '{col}' has dates outside range: "
|
|
426
|
+
f"{'; '.join(violations)}"
|
|
427
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ins_pricing
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: Reusable modelling, pricing, governance, and reporting utilities.
|
|
5
5
|
Author: meishi125478
|
|
6
6
|
License: Proprietary
|
|
@@ -49,53 +49,3 @@ Requires-Dist: shap>=0.41; extra == "all"
|
|
|
49
49
|
Requires-Dist: contextily>=1.3; extra == "all"
|
|
50
50
|
Requires-Dist: pynndescent>=0.5; extra == "all"
|
|
51
51
|
Requires-Dist: torch-geometric>=2.3; extra == "all"
|
|
52
|
-
|
|
53
|
-
# Ins-Pricing Overview
|
|
54
|
-
|
|
55
|
-
This repository contains risk modeling and optimization notebooks, scripts, and a reusable training framework. The main module is `ins_pricing/modelling/bayesopt`.
|
|
56
|
-
|
|
57
|
-
## Top-level structure
|
|
58
|
-
|
|
59
|
-
- `Auto Info/`: vehicle info crawling, preprocessing, and embedding experiments
|
|
60
|
-
- `GLM and LGB/`: GLM/LightGBM modeling experiments
|
|
61
|
-
- `OpenAI/`: OpenAI notebook prototypes
|
|
62
|
-
- `Python Code/`: runnable scripts and utilities
|
|
63
|
-
- `others/`: temporary or miscellaneous notebooks
|
|
64
|
-
- `ins_pricing/`: reusable training framework and CLI tools (BayesOpt subpackage)
|
|
65
|
-
- `user_packages legacy/`: historical snapshot
|
|
66
|
-
|
|
67
|
-
Note: `ins_pricing/modelling/examples/` is kept in the repo only and is not shipped in the PyPI package.
|
|
68
|
-
|
|
69
|
-
## Quickstart
|
|
70
|
-
|
|
71
|
-
Run the following commands from the repo root:
|
|
72
|
-
|
|
73
|
-
```bash
|
|
74
|
-
python -m venv .venv
|
|
75
|
-
source .venv/bin/activate # Windows: .\\.venv\\Scripts\\activate
|
|
76
|
-
pip install pandas scikit-learn lightgbm seaborn matplotlib
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
Start notebooks:
|
|
80
|
-
|
|
81
|
-
```bash
|
|
82
|
-
jupyter lab
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
## BayesOpt entry points
|
|
86
|
-
|
|
87
|
-
- CLI batch training: `python ins_pricing/modelling/cli/BayesOpt_entry.py --config-json <path>`
|
|
88
|
-
- Incremental training: `python ins_pricing/modelling/cli/BayesOpt_incremental.py --config-json <path>`
|
|
89
|
-
- Python API: `from ins_pricing.modelling import BayesOptModel`
|
|
90
|
-
|
|
91
|
-
## Tests
|
|
92
|
-
|
|
93
|
-
```bash
|
|
94
|
-
pytest -q
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
## Data and outputs
|
|
98
|
-
|
|
99
|
-
- Put shared data under `data/` (create it if needed).
|
|
100
|
-
- Training outputs are written to `plot/`, `Results/`, and `model/` by default.
|
|
101
|
-
- Keep secrets and large files outside the repo and use environment variables or `.env`.
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
ins_pricing/
|
|
2
|
-
ins_pricing/CODE_REVIEW_IMPROVEMENTS.md,sha256=eGQ1qtl8l84VvxvAnvzcV9Kk_FVPthAg1poq92k6vZU,17852
|
|
1
|
+
ins_pricing/CHANGELOG.md,sha256=Z423KtXu_4_cL0T2C947K8F5hHjulRvoc3lRjH5nCVw,7618
|
|
3
2
|
ins_pricing/README.md,sha256=pMOO1cU06oBfvm5d8gvAsQsJr9bfb6AKdpXlrx0AAxw,2727
|
|
3
|
+
ins_pricing/RELEASE_NOTES_0.2.8.md,sha256=KIJzk1jbZbZPKjwnkPSDHO_2Ipv3SP3CzCNDdf07jI0,9331
|
|
4
4
|
ins_pricing/__init__.py,sha256=46j1wCdLVrgrofeBwKl-3NXTxzjbTv-w3KjW-dyKGiY,2622
|
|
5
5
|
ins_pricing/exceptions.py,sha256=5fZavPV4zNJ7wPC75L215KkHXX9pRrfDAYZOdSKJMGo,4778
|
|
6
|
-
ins_pricing/setup.py,sha256=
|
|
6
|
+
ins_pricing/setup.py,sha256=cAHC3M7QAMgZYzxO9tTkkk-Wc9eWPSUUBj3G1RKHURM,1702
|
|
7
7
|
ins_pricing/cli/BayesOpt_entry.py,sha256=X3AiNQQh5ARcjVMM2vOKWPYPDIId40n_RPZA76pTGl4,558
|
|
8
8
|
ins_pricing/cli/BayesOpt_incremental.py,sha256=_Klr5vvNoq_TbgwrH_T3f0a6cHmA9iVJMViiji6ahJY,35927
|
|
9
9
|
ins_pricing/cli/Explain_Run.py,sha256=gEPQjqHiXyXlCTKjUzwSvbAn5_h74ABgb_sEGs-YHVE,664
|
|
@@ -51,7 +51,7 @@ ins_pricing/modelling/explain/__init__.py,sha256=CPoGzGu8TTO3FOXjxoXC13VkuIDCf3Y
|
|
|
51
51
|
ins_pricing/modelling/explain/gradients.py,sha256=9TqCws_p49nFxVMcjVxe4KCZ7frezeL0uV_LCdoM5yo,11088
|
|
52
52
|
ins_pricing/modelling/explain/metrics.py,sha256=K_xOY7ZrHWhbJ79RNB7eXN3VXeTe8vq68ZLH2BlZufA,5389
|
|
53
53
|
ins_pricing/modelling/explain/permutation.py,sha256=TeVlhz7FWyPsaZwEP3BSLa0BZgdOq74coZqnadkGaPw,9333
|
|
54
|
-
ins_pricing/modelling/explain/shap_utils.py,sha256=
|
|
54
|
+
ins_pricing/modelling/explain/shap_utils.py,sha256=70zRIHPPdoECFOFQeBTRxLZF-6sjaGJBNMIRS4_kmVI,10462
|
|
55
55
|
ins_pricing/modelling/plotting/__init__.py,sha256=BBQKcE7IYUYObFrjpSnfNS6rmzc80Lae7oEqxKz-vEk,1058
|
|
56
56
|
ins_pricing/modelling/plotting/common.py,sha256=_kFq7JMA0LnKIp4bqAFvr-24VaHjj9pegDMm1qP9_7Y,1439
|
|
57
57
|
ins_pricing/modelling/plotting/curves.py,sha256=fEnoeHAVvLPI5HXadNLUoWygmHtNWCngynLdZK868NQ,18398
|
|
@@ -60,22 +60,26 @@ ins_pricing/modelling/plotting/geo.py,sha256=sRJTYOcAphNFM-oww4qbw9MoZneBCJtur96
|
|
|
60
60
|
ins_pricing/modelling/plotting/importance.py,sha256=xs3l9uW_rCrakoA__fanIph6DK2jN_DugsKASAzteJU,3666
|
|
61
61
|
ins_pricing/pricing/README.md,sha256=PEcyw5oDkqJHOqnNdzBdbbpZwG4SOlnhMwY-owwQ0GI,1064
|
|
62
62
|
ins_pricing/pricing/__init__.py,sha256=XFplK3zkxPyNQZJd1Gn6_VvpwHLedMqxAd_Vn9tqsTE,881
|
|
63
|
-
ins_pricing/pricing/calibration.py,sha256=
|
|
63
|
+
ins_pricing/pricing/calibration.py,sha256=cx9fbDoOnNEMGPH6Js1EDMfVwy9J7zf_90yuNwD7W6I,6196
|
|
64
64
|
ins_pricing/pricing/data_quality.py,sha256=8FecBE60cABsTZE7HETuoKCEOXIrirGAFgg5wQCZrmU,4043
|
|
65
65
|
ins_pricing/pricing/exposure.py,sha256=rw8kKZ_1QdeGTCm13ck9NXrRBPt6TunxRw2s_qkHYkg,2575
|
|
66
|
-
ins_pricing/pricing/factors.py,sha256=
|
|
66
|
+
ins_pricing/pricing/factors.py,sha256=1cqDqdXRLb9-yt-x60f0lPWdgAAOvk7slnawVIKcWDo,6573
|
|
67
67
|
ins_pricing/pricing/monitoring.py,sha256=GnfF2g1C9KzDks9ELBykfAd0zzVBUvjuTWoffa7aGbI,613
|
|
68
68
|
ins_pricing/pricing/rate_table.py,sha256=llDW95i7gR6cCtGFwcGqgpgFvOOPCURaJWmuQw1oce8,2473
|
|
69
69
|
ins_pricing/production/__init__.py,sha256=plUjyiwxrzHDDgXKezyGp9UHOg7Mav4f0ryXYtNmbfs,885
|
|
70
70
|
ins_pricing/production/drift.py,sha256=q_oE_h2NbVETTBkh9QUu8Y68ERuFFcrfKpOb3zBcvsA,383
|
|
71
71
|
ins_pricing/production/monitoring.py,sha256=A6Hyc5WSKhFkDZOIrqmFteuDee75CdcwdTq644vrk-U,4836
|
|
72
72
|
ins_pricing/production/predict.py,sha256=mJog-RGLHxIJxx4oh0D1gbhJwBZPQs1z1P6YTDfWtfg,21791
|
|
73
|
-
ins_pricing/production/preprocess.py,sha256=
|
|
73
|
+
ins_pricing/production/preprocess.py,sha256=cl20X0rVcKNCjVJswB8SdHffMgox6Qga4Ac29L6pW5g,9404
|
|
74
74
|
ins_pricing/production/scoring.py,sha256=yFmMmbYb7w_RC4uZOCMnAjLMRcjXQWIuT1nsfu-bwuc,1379
|
|
75
75
|
ins_pricing/reporting/README.md,sha256=kTVdB6pNewwh1HlCHrI2SzWTgprtQoQprLRQ2qLdgNA,486
|
|
76
76
|
ins_pricing/reporting/__init__.py,sha256=Se5Cdomv9_RwkIDizkw1yx4iCMcjhjTHb4pZK6K895c,242
|
|
77
77
|
ins_pricing/reporting/report_builder.py,sha256=53ZFqGUx2isAoigT5IDwvXkek67zN7-6IgKeGpJhO7c,2241
|
|
78
78
|
ins_pricing/reporting/scheduler.py,sha256=9koG_1cmWvLqrS66uzMJuAlYI2VTkynV19ssB2TtcKU,1336
|
|
79
|
+
ins_pricing/tests/governance/__init__.py,sha256=5Nxg4_dIxY_J58_x2QOXrrRgw6L51Md0Wnt5Up-chqg,39
|
|
80
|
+
ins_pricing/tests/governance/test_audit.py,sha256=ubybXSTVILPN4VxQ2fMnG6oPNv4LjJJE3EsQ53NYdLU,1702
|
|
81
|
+
ins_pricing/tests/governance/test_registry.py,sha256=TvkNMLHViNuopjjho6oETwZ9d6MNaNM1xbL6URPDKSk,4602
|
|
82
|
+
ins_pricing/tests/governance/test_release.py,sha256=Cdo6prZ0xlioAP2AYHodzgASEIa6ZCLjbXW9Me2RGKk,2347
|
|
79
83
|
ins_pricing/tests/modelling/conftest.py,sha256=0KUXnkTgIGEIsf0J4uzIx5Kq4JkDyFo81Mv0qvIzW9k,180
|
|
80
84
|
ins_pricing/tests/modelling/test_cross_val_generic.py,sha256=iLZOFmdyrycB15lFWoQphkFlEjzZTozQXTLVOHLw2Qg,1721
|
|
81
85
|
ins_pricing/tests/modelling/test_distributed_utils.py,sha256=9cStpDw7jPdQwmm0Po-G2tB04uzSR1CoOUZMLuB61yI,466
|
|
@@ -85,12 +89,24 @@ ins_pricing/tests/modelling/test_graph_cache.py,sha256=QEI5cLLtQ9_zwRR50KqUf8qxo
|
|
|
85
89
|
ins_pricing/tests/modelling/test_plotting.py,sha256=4gJax72l40fQrjyJQLOgUmaT6xn6zXpujEaFNeHVwGw,1911
|
|
86
90
|
ins_pricing/tests/modelling/test_plotting_library.py,sha256=SB5RjKTaPydK848V0xpqEaJtEWhRv6ZfnHmnnzjaPh4,4079
|
|
87
91
|
ins_pricing/tests/modelling/test_preprocessor.py,sha256=FqbKltV803Pd-ZY1xBc4XF1T-INDuUliaVcMIDPmBxI,1438
|
|
92
|
+
ins_pricing/tests/pricing/__init__.py,sha256=SVfgUYBlCmc4wjYLMRX5xPFgQZxTS3aHBOA_Cx1aJg4,36
|
|
93
|
+
ins_pricing/tests/pricing/test_calibration.py,sha256=hLZuSWOH4t9WKcQ-2srvYp4P5ldr1Yh1dhl7s61vMp8,2420
|
|
94
|
+
ins_pricing/tests/pricing/test_exposure.py,sha256=CrpSncVce-PGt2XzjOX6qV0SA22vKPUv1u8RlKQjt_g,2054
|
|
95
|
+
ins_pricing/tests/pricing/test_factors.py,sha256=NTE7lz1RWChhoRt2K5003DoNRqG_Gu4X1Aauy2NexOg,5093
|
|
96
|
+
ins_pricing/tests/pricing/test_rate_table.py,sha256=ICHfAQsC9TaxXbQVKM5AvBaJXRTVY723Vaz1XOWNMW8,1250
|
|
97
|
+
ins_pricing/tests/production/__init__.py,sha256=WFWlvBVdjg-E-nKaiJ8VTKNELYufJusufpij1p1xwso,39
|
|
98
|
+
ins_pricing/tests/production/test_monitoring.py,sha256=jettbaVLH4a3efLWeiQ6FFukGEw7mmz6_AeYhYX0caQ,11409
|
|
99
|
+
ins_pricing/tests/production/test_predict.py,sha256=PNamNzb-KNbLtT8xbBkwWOcmJC1gXvxj2nnc7dR5fyc,8193
|
|
100
|
+
ins_pricing/tests/production/test_preprocess.py,sha256=tsHYANwJjNlaSo8O4qiwqBvMOMtwmtZymRFm6UrODrE,11084
|
|
101
|
+
ins_pricing/tests/production/test_scoring.py,sha256=fKz2tJomodrRt333apCrjtyJCwg9RHRbWm0lvcU6xm0,9848
|
|
88
102
|
ins_pricing/utils/__init__.py,sha256=ovtolxOvlYp_1SOxZ35OPBdn7JB2O4idzRSQgIlzCvc,2339
|
|
89
103
|
ins_pricing/utils/device.py,sha256=fePvqSaOkzHMBbrHCXAOCKRwdcR8YtiGI5K8Q3ljXJc,7543
|
|
90
104
|
ins_pricing/utils/logging.py,sha256=_AKB4ErmvygwGLtu7Ai7ESemj6Hh8FTgh4cs8j_gVW4,2258
|
|
91
105
|
ins_pricing/utils/metrics.py,sha256=zhKYgXgki8RDscjP_GO2lEgzrtMIZCqOX_aLpQzdw6k,8390
|
|
92
106
|
ins_pricing/utils/paths.py,sha256=o_tBiclFvBci4cYg9WANwKPxrMcglEdOjDP-EZgGjdQ,8749
|
|
93
|
-
ins_pricing
|
|
94
|
-
ins_pricing
|
|
95
|
-
ins_pricing-0.2.
|
|
96
|
-
ins_pricing-0.2.
|
|
107
|
+
ins_pricing/utils/profiling.py,sha256=kmbykHLcYywlZxAf_aVU8HXID3zOvUcBoO5Q58AijhA,11132
|
|
108
|
+
ins_pricing/utils/validation.py,sha256=4Tw9VUJPk0N-WO3YUqZP-xXRl1Xpubkm0vi3WzzZrv4,13348
|
|
109
|
+
ins_pricing-0.2.8.dist-info/METADATA,sha256=hYiDc7_gTSXgLXpw2YkLtRlPRN7oaASEuxbODxK6NZI,2157
|
|
110
|
+
ins_pricing-0.2.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
111
|
+
ins_pricing-0.2.8.dist-info/top_level.txt,sha256=haZuNQpHKNBEPZx3NjLnHp8pV3I_J9QG8-HyJn00FA0,12
|
|
112
|
+
ins_pricing-0.2.8.dist-info/RECORD,,
|