ins-pricing 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/cli/BayesOpt_entry.py +32 -0
- ins_pricing/cli/utils/import_resolver.py +29 -3
- ins_pricing/cli/utils/notebook_utils.py +3 -2
- ins_pricing/docs/modelling/BayesOpt_USAGE.md +3 -3
- ins_pricing/modelling/core/bayesopt/__init__.py +4 -0
- ins_pricing/modelling/core/bayesopt/config_preprocess.py +12 -0
- ins_pricing/modelling/core/bayesopt/core.py +21 -8
- ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +38 -12
- ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +16 -6
- ins_pricing/modelling/core/bayesopt/models/model_gnn.py +16 -6
- ins_pricing/modelling/core/bayesopt/models/model_resn.py +16 -7
- ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +2 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +25 -8
- ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +14 -11
- ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +29 -10
- ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +28 -12
- ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +13 -14
- ins_pricing/modelling/core/bayesopt/utils/losses.py +129 -0
- ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +18 -3
- ins_pricing/modelling/core/bayesopt/utils/torch_trainer_mixin.py +24 -3
- ins_pricing/production/predict.py +38 -9
- ins_pricing/setup.py +1 -1
- ins_pricing/utils/metrics.py +27 -3
- ins_pricing/utils/torch_compat.py +40 -0
- {ins_pricing-0.3.2.dist-info → ins_pricing-0.3.4.dist-info}/METADATA +162 -162
- {ins_pricing-0.3.2.dist-info → ins_pricing-0.3.4.dist-info}/RECORD +28 -27
- {ins_pricing-0.3.2.dist-info → ins_pricing-0.3.4.dist-info}/WHEEL +0 -0
- {ins_pricing-0.3.2.dist-info → ins_pricing-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -23,6 +23,11 @@ from .preprocess import (
|
|
|
23
23
|
from .scoring import batch_score
|
|
24
24
|
from ..modelling.core.bayesopt.models.model_gnn import GraphNeuralNetSklearn
|
|
25
25
|
from ..modelling.core.bayesopt.models.model_resn import ResNetSklearn
|
|
26
|
+
from ..modelling.core.bayesopt.utils.losses import (
|
|
27
|
+
infer_loss_name_from_model_name,
|
|
28
|
+
normalize_loss_name,
|
|
29
|
+
resolve_tweedie_power,
|
|
30
|
+
)
|
|
26
31
|
from ins_pricing.utils import DeviceManager, get_logger
|
|
27
32
|
from ins_pricing.utils.torch_compat import torch_load
|
|
28
33
|
|
|
@@ -50,6 +55,15 @@ def _default_tweedie_power(model_name: str, task_type: str) -> Optional[float]:
|
|
|
50
55
|
return 1.5
|
|
51
56
|
|
|
52
57
|
|
|
58
|
+
def _resolve_loss_name(cfg: Dict[str, Any], model_name: str, task_type: str) -> str:
|
|
59
|
+
normalized = normalize_loss_name(cfg.get("loss_name"), task_type)
|
|
60
|
+
if task_type == "classification":
|
|
61
|
+
return "logloss" if normalized == "auto" else normalized
|
|
62
|
+
if normalized == "auto":
|
|
63
|
+
return infer_loss_name_from_model_name(model_name)
|
|
64
|
+
return normalized
|
|
65
|
+
|
|
66
|
+
|
|
53
67
|
def _resolve_value(
|
|
54
68
|
value: Any,
|
|
55
69
|
*,
|
|
@@ -182,11 +196,14 @@ def _build_resn_model(
|
|
|
182
196
|
task_type: str,
|
|
183
197
|
epochs: int,
|
|
184
198
|
resn_weight_decay: float,
|
|
199
|
+
loss_name: str,
|
|
185
200
|
params: Dict[str, Any],
|
|
186
201
|
) -> ResNetSklearn:
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
power = float(power)
|
|
202
|
+
if loss_name == "tweedie":
|
|
203
|
+
power = params.get("tw_power", _default_tweedie_power(model_name, task_type))
|
|
204
|
+
power = float(power) if power is not None else None
|
|
205
|
+
else:
|
|
206
|
+
power = resolve_tweedie_power(loss_name, default=1.5)
|
|
190
207
|
weight_decay = float(params.get("weight_decay", resn_weight_decay))
|
|
191
208
|
return ResNetSklearn(
|
|
192
209
|
model_nme=model_name,
|
|
@@ -205,6 +222,7 @@ def _build_resn_model(
|
|
|
205
222
|
weight_decay=weight_decay,
|
|
206
223
|
use_data_parallel=False,
|
|
207
224
|
use_ddp=False,
|
|
225
|
+
loss_name=loss_name,
|
|
208
226
|
)
|
|
209
227
|
|
|
210
228
|
|
|
@@ -215,9 +233,15 @@ def _build_gnn_model(
|
|
|
215
233
|
task_type: str,
|
|
216
234
|
epochs: int,
|
|
217
235
|
cfg: Dict[str, Any],
|
|
236
|
+
loss_name: str,
|
|
218
237
|
params: Dict[str, Any],
|
|
219
238
|
) -> GraphNeuralNetSklearn:
|
|
220
239
|
base_tw = _default_tweedie_power(model_name, task_type)
|
|
240
|
+
if loss_name == "tweedie":
|
|
241
|
+
tw_power = params.get("tw_power", base_tw)
|
|
242
|
+
tw_power = float(tw_power) if tw_power is not None else None
|
|
243
|
+
else:
|
|
244
|
+
tw_power = resolve_tweedie_power(loss_name, default=1.5)
|
|
221
245
|
return GraphNeuralNetSklearn(
|
|
222
246
|
model_nme=f"{model_name}_gnn",
|
|
223
247
|
input_dim=input_dim,
|
|
@@ -229,7 +253,7 @@ def _build_gnn_model(
|
|
|
229
253
|
epochs=int(params.get("epochs", epochs)),
|
|
230
254
|
patience=int(params.get("patience", 5)),
|
|
231
255
|
task_type=task_type,
|
|
232
|
-
tweedie_power=
|
|
256
|
+
tweedie_power=tw_power,
|
|
233
257
|
weight_decay=float(params.get("weight_decay", 0.0)),
|
|
234
258
|
use_data_parallel=False,
|
|
235
259
|
use_ddp=False,
|
|
@@ -239,6 +263,7 @@ def _build_gnn_model(
|
|
|
239
263
|
max_gpu_knn_nodes=cfg.get("gnn_max_gpu_knn_nodes"),
|
|
240
264
|
knn_gpu_mem_ratio=cfg.get("gnn_knn_gpu_mem_ratio", 0.9),
|
|
241
265
|
knn_gpu_mem_overhead=cfg.get("gnn_knn_gpu_mem_overhead", 2.0),
|
|
266
|
+
loss_name=loss_name,
|
|
242
267
|
)
|
|
243
268
|
|
|
244
269
|
|
|
@@ -273,6 +298,9 @@ def load_saved_model(
|
|
|
273
298
|
from ..modelling.core.bayesopt.models.model_ft_components import FTTransformerCore
|
|
274
299
|
|
|
275
300
|
# Reconstruct model from config
|
|
301
|
+
resolved_loss = model_config.get("loss_name")
|
|
302
|
+
if not resolved_loss:
|
|
303
|
+
resolved_loss = _resolve_loss_name(cfg, model_name, task_type)
|
|
276
304
|
model = FTTransformerSklearn(
|
|
277
305
|
model_nme=model_config.get("model_nme", ""),
|
|
278
306
|
num_cols=model_config.get("num_cols", []),
|
|
@@ -282,6 +310,7 @@ def load_saved_model(
|
|
|
282
310
|
n_layers=model_config.get("n_layers", 4),
|
|
283
311
|
dropout=model_config.get("dropout", 0.1),
|
|
284
312
|
task_type=model_config.get("task_type", "regression"),
|
|
313
|
+
loss_name=resolved_loss,
|
|
285
314
|
tweedie_power=model_config.get("tw_power", 1.5),
|
|
286
315
|
num_numeric_tokens=model_config.get("num_numeric_tokens"),
|
|
287
316
|
use_data_parallel=False,
|
|
@@ -337,12 +366,14 @@ def load_saved_model(
|
|
|
337
366
|
params = load_best_params(output_dir, model_name, model_key)
|
|
338
367
|
if params is None:
|
|
339
368
|
raise RuntimeError("Best params not found for resn")
|
|
369
|
+
loss_name = _resolve_loss_name(cfg, model_name, task_type)
|
|
340
370
|
model = _build_resn_model(
|
|
341
371
|
model_name=model_name,
|
|
342
372
|
input_dim=input_dim,
|
|
343
373
|
task_type=task_type,
|
|
344
374
|
epochs=int(cfg.get("epochs", 50)),
|
|
345
375
|
resn_weight_decay=float(cfg.get("resn_weight_decay", 1e-4)),
|
|
376
|
+
loss_name=loss_name,
|
|
346
377
|
params=params,
|
|
347
378
|
)
|
|
348
379
|
model.resnet.load_state_dict(state_dict)
|
|
@@ -357,12 +388,14 @@ def load_saved_model(
|
|
|
357
388
|
raise ValueError(f"Invalid GNN checkpoint: {model_path}")
|
|
358
389
|
params = payload.get("best_params") or {}
|
|
359
390
|
state_dict = payload.get("state_dict")
|
|
391
|
+
loss_name = _resolve_loss_name(cfg, model_name, task_type)
|
|
360
392
|
model = _build_gnn_model(
|
|
361
393
|
model_name=model_name,
|
|
362
394
|
input_dim=input_dim,
|
|
363
395
|
task_type=task_type,
|
|
364
396
|
epochs=int(cfg.get("epochs", 50)),
|
|
365
397
|
cfg=cfg,
|
|
398
|
+
loss_name=loss_name,
|
|
366
399
|
params=params,
|
|
367
400
|
)
|
|
368
401
|
model.set_params(dict(params))
|
|
@@ -628,8 +661,4 @@ def predict_from_config(
|
|
|
628
661
|
if output_path:
|
|
629
662
|
output_path = Path(output_path)
|
|
630
663
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
631
|
-
if output_path.suffix.lower
|
|
632
|
-
result.to_parquet(output_path, index=False)
|
|
633
|
-
else:
|
|
634
|
-
result.to_csv(output_path, index=False)
|
|
635
|
-
return result
|
|
664
|
+
if output_path.suffix.lower
|
ins_pricing/setup.py
CHANGED
ins_pricing/utils/metrics.py
CHANGED
|
@@ -22,9 +22,16 @@ import numpy as np
|
|
|
22
22
|
import pandas as pd
|
|
23
23
|
|
|
24
24
|
try:
|
|
25
|
-
from sklearn.metrics import
|
|
25
|
+
from sklearn.metrics import (
|
|
26
|
+
log_loss,
|
|
27
|
+
mean_absolute_error,
|
|
28
|
+
mean_squared_error,
|
|
29
|
+
mean_tweedie_deviance,
|
|
30
|
+
)
|
|
26
31
|
except ImportError:
|
|
27
32
|
log_loss = None
|
|
33
|
+
mean_absolute_error = None
|
|
34
|
+
mean_squared_error = None
|
|
28
35
|
mean_tweedie_deviance = None
|
|
29
36
|
|
|
30
37
|
|
|
@@ -198,6 +205,7 @@ class MetricFactory:
|
|
|
198
205
|
self,
|
|
199
206
|
task_type: str = "regression",
|
|
200
207
|
tweedie_power: float = 1.5,
|
|
208
|
+
loss_name: str = "tweedie",
|
|
201
209
|
clip_min: float = 1e-8,
|
|
202
210
|
clip_max: float = 1 - 1e-8,
|
|
203
211
|
):
|
|
@@ -206,11 +214,13 @@ class MetricFactory:
|
|
|
206
214
|
Args:
|
|
207
215
|
task_type: Either 'regression' or 'classification'
|
|
208
216
|
tweedie_power: Power parameter for Tweedie deviance (1.0-2.0)
|
|
217
|
+
loss_name: Regression loss name ('tweedie', 'poisson', 'gamma', 'mse', 'mae')
|
|
209
218
|
clip_min: Minimum value for clipping predictions
|
|
210
219
|
clip_max: Maximum value for clipping predictions (for classification)
|
|
211
220
|
"""
|
|
212
221
|
self.task_type = task_type
|
|
213
222
|
self.tweedie_power = tweedie_power
|
|
223
|
+
self.loss_name = loss_name
|
|
214
224
|
self.clip_min = clip_min
|
|
215
225
|
self.clip_max = clip_max
|
|
216
226
|
|
|
@@ -240,14 +250,28 @@ class MetricFactory:
|
|
|
240
250
|
y_pred_clipped = np.clip(y_pred, self.clip_min, self.clip_max)
|
|
241
251
|
return float(log_loss(y_true, y_pred_clipped, sample_weight=sample_weight))
|
|
242
252
|
|
|
243
|
-
|
|
253
|
+
loss_name = str(self.loss_name or "tweedie").strip().lower()
|
|
254
|
+
if loss_name in {"mse", "mae"}:
|
|
255
|
+
if mean_squared_error is None or mean_absolute_error is None:
|
|
256
|
+
raise ImportError("sklearn is required for metric computation")
|
|
257
|
+
if loss_name == "mse":
|
|
258
|
+
return float(mean_squared_error(
|
|
259
|
+
y_true, y_pred, sample_weight=sample_weight))
|
|
260
|
+
return float(mean_absolute_error(
|
|
261
|
+
y_true, y_pred, sample_weight=sample_weight))
|
|
262
|
+
|
|
244
263
|
y_pred_safe = np.maximum(y_pred, self.clip_min)
|
|
264
|
+
power = self.tweedie_power
|
|
265
|
+
if loss_name == "poisson":
|
|
266
|
+
power = 1.0
|
|
267
|
+
elif loss_name == "gamma":
|
|
268
|
+
power = 2.0
|
|
245
269
|
return float(
|
|
246
270
|
mean_tweedie_deviance(
|
|
247
271
|
y_true,
|
|
248
272
|
y_pred_safe,
|
|
249
273
|
sample_weight=sample_weight,
|
|
250
|
-
power=
|
|
274
|
+
power=power,
|
|
251
275
|
)
|
|
252
276
|
)
|
|
253
277
|
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import inspect
|
|
6
|
+
import os
|
|
6
7
|
from typing import Any, Optional
|
|
7
8
|
|
|
8
9
|
try:
|
|
@@ -14,6 +15,7 @@ except ImportError: # pragma: no cover - handled by callers
|
|
|
14
15
|
torch = None
|
|
15
16
|
|
|
16
17
|
_SUPPORTS_WEIGHTS_ONLY: Optional[bool] = None
|
|
18
|
+
_DYNAMO_PATCHED = False
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
def _supports_weights_only() -> bool:
|
|
@@ -43,3 +45,41 @@ def torch_load(
|
|
|
43
45
|
if weights_only is not None and _supports_weights_only():
|
|
44
46
|
return torch.load(path, *args, weights_only=weights_only, **kwargs)
|
|
45
47
|
return torch.load(path, *args, **kwargs)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _env_truthy(key: str) -> bool:
|
|
51
|
+
value = os.environ.get(key)
|
|
52
|
+
if value is None:
|
|
53
|
+
return False
|
|
54
|
+
return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def disable_torch_dynamo_if_requested() -> None:
|
|
58
|
+
"""Disable torch._dynamo wrappers when compile is explicitly disabled."""
|
|
59
|
+
global _DYNAMO_PATCHED
|
|
60
|
+
if _DYNAMO_PATCHED or not TORCH_AVAILABLE:
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
if not any(
|
|
64
|
+
_env_truthy(k)
|
|
65
|
+
for k in (
|
|
66
|
+
"TORCHDYNAMO_DISABLE",
|
|
67
|
+
"TORCH_DISABLE_DYNAMO",
|
|
68
|
+
"TORCH_COMPILE_DISABLE",
|
|
69
|
+
"TORCHINDUCTOR_DISABLE",
|
|
70
|
+
)
|
|
71
|
+
):
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
import torch.optim.optimizer as optim_mod
|
|
76
|
+
except Exception:
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
for name in ("state_dict", "load_state_dict", "zero_grad", "add_param_group"):
|
|
80
|
+
fn = getattr(optim_mod.Optimizer, name, None)
|
|
81
|
+
wrapped = getattr(fn, "__wrapped__", None)
|
|
82
|
+
if wrapped is not None:
|
|
83
|
+
setattr(optim_mod.Optimizer, name, wrapped)
|
|
84
|
+
|
|
85
|
+
_DYNAMO_PATCHED = True
|
|
@@ -1,162 +1,162 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: ins_pricing
|
|
3
|
-
Version: 0.3.
|
|
4
|
-
Summary: Reusable modelling, pricing, governance, and reporting utilities.
|
|
5
|
-
Author: meishi125478
|
|
6
|
-
License: Proprietary
|
|
7
|
-
Keywords: pricing,insurance,bayesopt,ml
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: Programming Language :: Python :: 3 :: Only
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
-
Classifier: License :: Other/Proprietary License
|
|
12
|
-
Classifier: Operating System :: OS Independent
|
|
13
|
-
Classifier: Intended Audience :: Developers
|
|
14
|
-
Requires-Python: >=3.9
|
|
15
|
-
Description-Content-Type: text/markdown
|
|
16
|
-
Requires-Dist: numpy>=1.20
|
|
17
|
-
Requires-Dist: pandas>=1.4
|
|
18
|
-
Provides-Extra: bayesopt
|
|
19
|
-
Requires-Dist: torch>=1.13; extra == "bayesopt"
|
|
20
|
-
Requires-Dist: optuna>=3.0; extra == "bayesopt"
|
|
21
|
-
Requires-Dist: xgboost>=1.6; extra == "bayesopt"
|
|
22
|
-
Requires-Dist: scikit-learn>=1.1; extra == "bayesopt"
|
|
23
|
-
Requires-Dist: statsmodels>=0.13; extra == "bayesopt"
|
|
24
|
-
Requires-Dist: joblib>=1.2; extra == "bayesopt"
|
|
25
|
-
Requires-Dist: matplotlib>=3.5; extra == "bayesopt"
|
|
26
|
-
Provides-Extra: plotting
|
|
27
|
-
Requires-Dist: matplotlib>=3.5; extra == "plotting"
|
|
28
|
-
Requires-Dist: scikit-learn>=1.1; extra == "plotting"
|
|
29
|
-
Provides-Extra: explain
|
|
30
|
-
Requires-Dist: torch>=1.13; extra == "explain"
|
|
31
|
-
Requires-Dist: shap>=0.41; extra == "explain"
|
|
32
|
-
Requires-Dist: scikit-learn>=1.1; extra == "explain"
|
|
33
|
-
Provides-Extra: geo
|
|
34
|
-
Requires-Dist: contextily>=1.3; extra == "geo"
|
|
35
|
-
Requires-Dist: matplotlib>=3.5; extra == "geo"
|
|
36
|
-
Provides-Extra: gnn
|
|
37
|
-
Requires-Dist: torch>=1.13; extra == "gnn"
|
|
38
|
-
Requires-Dist: pynndescent>=0.5; extra == "gnn"
|
|
39
|
-
Requires-Dist: torch-geometric>=2.3; extra == "gnn"
|
|
40
|
-
Provides-Extra: all
|
|
41
|
-
Requires-Dist: torch>=1.13; extra == "all"
|
|
42
|
-
Requires-Dist: optuna>=3.0; extra == "all"
|
|
43
|
-
Requires-Dist: xgboost>=1.6; extra == "all"
|
|
44
|
-
Requires-Dist: scikit-learn>=1.1; extra == "all"
|
|
45
|
-
Requires-Dist: statsmodels>=0.13; extra == "all"
|
|
46
|
-
Requires-Dist: joblib>=1.2; extra == "all"
|
|
47
|
-
Requires-Dist: matplotlib>=3.5; extra == "all"
|
|
48
|
-
Requires-Dist: shap>=0.41; extra == "all"
|
|
49
|
-
Requires-Dist: contextily>=1.3; extra == "all"
|
|
50
|
-
Requires-Dist: pynndescent>=0.5; extra == "all"
|
|
51
|
-
Requires-Dist: torch-geometric>=2.3; extra == "all"
|
|
52
|
-
|
|
53
|
-
# Insurance-Pricing
|
|
54
|
-
|
|
55
|
-
A reusable toolkit for insurance modeling, pricing, governance, and reporting.
|
|
56
|
-
|
|
57
|
-
## Overview
|
|
58
|
-
|
|
59
|
-
Insurance-Pricing (ins_pricing) is an enterprise-grade Python library designed for machine learning model training, pricing calculations, and model governance workflows in the insurance industry.
|
|
60
|
-
|
|
61
|
-
### Core Modules
|
|
62
|
-
|
|
63
|
-
| Module | Description |
|
|
64
|
-
|--------|-------------|
|
|
65
|
-
| **modelling** | ML model training (GLM, XGBoost, ResNet, FT-Transformer, GNN) and model interpretability (SHAP, permutation importance) |
|
|
66
|
-
| **pricing** | Factor table construction, numeric binning, premium calibration, exposure calculation, PSI monitoring |
|
|
67
|
-
| **production** | Model prediction, batch scoring, data drift detection, production metrics monitoring |
|
|
68
|
-
| **governance** | Model registry, version management, approval workflows, audit logging |
|
|
69
|
-
| **reporting** | Report generation (Markdown format), report scheduling |
|
|
70
|
-
| **utils** | Data validation, performance profiling, device management, logging configuration |
|
|
71
|
-
|
|
72
|
-
### Quick Start
|
|
73
|
-
|
|
74
|
-
```python
|
|
75
|
-
# Model training with Bayesian optimization
|
|
76
|
-
from ins_pricing import bayesopt as ropt
|
|
77
|
-
|
|
78
|
-
model = ropt.BayesOptModel(
|
|
79
|
-
train_data, test_data,
|
|
80
|
-
model_name='my_model',
|
|
81
|
-
resp_nme='target',
|
|
82
|
-
weight_nme='weight',
|
|
83
|
-
factor_nmes=feature_list,
|
|
84
|
-
cate_list=categorical_features,
|
|
85
|
-
)
|
|
86
|
-
model.bayesopt_xgb(max_evals=100) # Train XGBoost
|
|
87
|
-
model.bayesopt_resnet(max_evals=50) # Train ResNet
|
|
88
|
-
model.bayesopt_ft(max_evals=50) # Train FT-Transformer
|
|
89
|
-
|
|
90
|
-
# Pricing: build factor table
|
|
91
|
-
from ins_pricing.pricing import build_factor_table
|
|
92
|
-
factors = build_factor_table(
|
|
93
|
-
df,
|
|
94
|
-
factor_col='age_band',
|
|
95
|
-
loss_col='claim_amount',
|
|
96
|
-
exposure_col='exposure',
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
# Production: batch scoring
|
|
100
|
-
from ins_pricing.production import batch_score
|
|
101
|
-
scores = batch_score(model.trainers['xgb'].predict, df)
|
|
102
|
-
|
|
103
|
-
# Model governance
|
|
104
|
-
from ins_pricing.governance import ModelRegistry
|
|
105
|
-
registry = ModelRegistry('models.json')
|
|
106
|
-
registry.register(model_name, version, metrics=metrics)
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
### Project Structure
|
|
110
|
-
|
|
111
|
-
```
|
|
112
|
-
ins_pricing/
|
|
113
|
-
├── cli/ # Command-line entry points
|
|
114
|
-
├── modelling/
|
|
115
|
-
│ ├── core/bayesopt/ # ML model training core
|
|
116
|
-
│ ├── explain/ # Model interpretability
|
|
117
|
-
│ └── plotting/ # Model visualization
|
|
118
|
-
├── pricing/ # Insurance pricing module
|
|
119
|
-
├── production/ # Production deployment module
|
|
120
|
-
├── governance/ # Model governance
|
|
121
|
-
├── reporting/ # Report generation
|
|
122
|
-
├── utils/ # Utilities
|
|
123
|
-
└── tests/ # Test suite
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
### Installation
|
|
127
|
-
|
|
128
|
-
```bash
|
|
129
|
-
# Basic installation
|
|
130
|
-
pip install ins_pricing
|
|
131
|
-
|
|
132
|
-
# Full installation (all optional dependencies)
|
|
133
|
-
pip install ins_pricing[all]
|
|
134
|
-
|
|
135
|
-
# Install specific extras
|
|
136
|
-
pip install ins_pricing[bayesopt] # Model training
|
|
137
|
-
pip install ins_pricing[explain] # Model explanation
|
|
138
|
-
pip install ins_pricing[plotting] # Visualization
|
|
139
|
-
pip install ins_pricing[gnn] # Graph neural networks
|
|
140
|
-
```
|
|
141
|
-
|
|
142
|
-
#### Multi-platform & GPU installation notes
|
|
143
|
-
|
|
144
|
-
- **PyTorch (CPU/GPU/MPS)**: Install the correct PyTorch build for your platform/GPU first (CUDA on
|
|
145
|
-
Linux/Windows, ROCm on supported AMD platforms, or MPS on Apple Silicon). Then install the
|
|
146
|
-
optional extras you need (e.g., `bayesopt`, `explain`, or `gnn`). This avoids pip pulling a
|
|
147
|
-
mismatched wheel.
|
|
148
|
-
- **Torch Geometric (GNN)**: `torch-geometric` often requires platform-specific wheels (e.g.,
|
|
149
|
-
`torch-scatter`, `torch-sparse`). Follow the official PyG installation instructions for your
|
|
150
|
-
CUDA/ROCm/CPU environment, then install `ins_pricing[gnn]`.
|
|
151
|
-
- **Multi-GPU**: Training code will use CUDA when available and can enable multi-GPU via
|
|
152
|
-
`torch.distributed`/`DataParallel` where supported. On Windows, CUDA DDP is not supported and will
|
|
153
|
-
fall back to single-GPU or DataParallel where possible.
|
|
154
|
-
|
|
155
|
-
### Requirements
|
|
156
|
-
|
|
157
|
-
- Python >= 3.9
|
|
158
|
-
- Core dependencies: numpy >= 1.20, pandas >= 1.4
|
|
159
|
-
|
|
160
|
-
### License
|
|
161
|
-
|
|
162
|
-
Proprietary
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ins_pricing
|
|
3
|
+
Version: 0.3.4
|
|
4
|
+
Summary: Reusable modelling, pricing, governance, and reporting utilities.
|
|
5
|
+
Author: meishi125478
|
|
6
|
+
License: Proprietary
|
|
7
|
+
Keywords: pricing,insurance,bayesopt,ml
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
+
Classifier: License :: Other/Proprietary License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Requires-Python: >=3.9
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
Requires-Dist: numpy>=1.20
|
|
17
|
+
Requires-Dist: pandas>=1.4
|
|
18
|
+
Provides-Extra: bayesopt
|
|
19
|
+
Requires-Dist: torch>=1.13; extra == "bayesopt"
|
|
20
|
+
Requires-Dist: optuna>=3.0; extra == "bayesopt"
|
|
21
|
+
Requires-Dist: xgboost>=1.6; extra == "bayesopt"
|
|
22
|
+
Requires-Dist: scikit-learn>=1.1; extra == "bayesopt"
|
|
23
|
+
Requires-Dist: statsmodels>=0.13; extra == "bayesopt"
|
|
24
|
+
Requires-Dist: joblib>=1.2; extra == "bayesopt"
|
|
25
|
+
Requires-Dist: matplotlib>=3.5; extra == "bayesopt"
|
|
26
|
+
Provides-Extra: plotting
|
|
27
|
+
Requires-Dist: matplotlib>=3.5; extra == "plotting"
|
|
28
|
+
Requires-Dist: scikit-learn>=1.1; extra == "plotting"
|
|
29
|
+
Provides-Extra: explain
|
|
30
|
+
Requires-Dist: torch>=1.13; extra == "explain"
|
|
31
|
+
Requires-Dist: shap>=0.41; extra == "explain"
|
|
32
|
+
Requires-Dist: scikit-learn>=1.1; extra == "explain"
|
|
33
|
+
Provides-Extra: geo
|
|
34
|
+
Requires-Dist: contextily>=1.3; extra == "geo"
|
|
35
|
+
Requires-Dist: matplotlib>=3.5; extra == "geo"
|
|
36
|
+
Provides-Extra: gnn
|
|
37
|
+
Requires-Dist: torch>=1.13; extra == "gnn"
|
|
38
|
+
Requires-Dist: pynndescent>=0.5; extra == "gnn"
|
|
39
|
+
Requires-Dist: torch-geometric>=2.3; extra == "gnn"
|
|
40
|
+
Provides-Extra: all
|
|
41
|
+
Requires-Dist: torch>=1.13; extra == "all"
|
|
42
|
+
Requires-Dist: optuna>=3.0; extra == "all"
|
|
43
|
+
Requires-Dist: xgboost>=1.6; extra == "all"
|
|
44
|
+
Requires-Dist: scikit-learn>=1.1; extra == "all"
|
|
45
|
+
Requires-Dist: statsmodels>=0.13; extra == "all"
|
|
46
|
+
Requires-Dist: joblib>=1.2; extra == "all"
|
|
47
|
+
Requires-Dist: matplotlib>=3.5; extra == "all"
|
|
48
|
+
Requires-Dist: shap>=0.41; extra == "all"
|
|
49
|
+
Requires-Dist: contextily>=1.3; extra == "all"
|
|
50
|
+
Requires-Dist: pynndescent>=0.5; extra == "all"
|
|
51
|
+
Requires-Dist: torch-geometric>=2.3; extra == "all"
|
|
52
|
+
|
|
53
|
+
# Insurance-Pricing
|
|
54
|
+
|
|
55
|
+
A reusable toolkit for insurance modeling, pricing, governance, and reporting.
|
|
56
|
+
|
|
57
|
+
## Overview
|
|
58
|
+
|
|
59
|
+
Insurance-Pricing (ins_pricing) is an enterprise-grade Python library designed for machine learning model training, pricing calculations, and model governance workflows in the insurance industry.
|
|
60
|
+
|
|
61
|
+
### Core Modules
|
|
62
|
+
|
|
63
|
+
| Module | Description |
|
|
64
|
+
|--------|-------------|
|
|
65
|
+
| **modelling** | ML model training (GLM, XGBoost, ResNet, FT-Transformer, GNN) and model interpretability (SHAP, permutation importance) |
|
|
66
|
+
| **pricing** | Factor table construction, numeric binning, premium calibration, exposure calculation, PSI monitoring |
|
|
67
|
+
| **production** | Model prediction, batch scoring, data drift detection, production metrics monitoring |
|
|
68
|
+
| **governance** | Model registry, version management, approval workflows, audit logging |
|
|
69
|
+
| **reporting** | Report generation (Markdown format), report scheduling |
|
|
70
|
+
| **utils** | Data validation, performance profiling, device management, logging configuration |
|
|
71
|
+
|
|
72
|
+
### Quick Start
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
# Model training with Bayesian optimization
|
|
76
|
+
from ins_pricing import bayesopt as ropt
|
|
77
|
+
|
|
78
|
+
model = ropt.BayesOptModel(
|
|
79
|
+
train_data, test_data,
|
|
80
|
+
model_name='my_model',
|
|
81
|
+
resp_nme='target',
|
|
82
|
+
weight_nme='weight',
|
|
83
|
+
factor_nmes=feature_list,
|
|
84
|
+
cate_list=categorical_features,
|
|
85
|
+
)
|
|
86
|
+
model.bayesopt_xgb(max_evals=100) # Train XGBoost
|
|
87
|
+
model.bayesopt_resnet(max_evals=50) # Train ResNet
|
|
88
|
+
model.bayesopt_ft(max_evals=50) # Train FT-Transformer
|
|
89
|
+
|
|
90
|
+
# Pricing: build factor table
|
|
91
|
+
from ins_pricing.pricing import build_factor_table
|
|
92
|
+
factors = build_factor_table(
|
|
93
|
+
df,
|
|
94
|
+
factor_col='age_band',
|
|
95
|
+
loss_col='claim_amount',
|
|
96
|
+
exposure_col='exposure',
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Production: batch scoring
|
|
100
|
+
from ins_pricing.production import batch_score
|
|
101
|
+
scores = batch_score(model.trainers['xgb'].predict, df)
|
|
102
|
+
|
|
103
|
+
# Model governance
|
|
104
|
+
from ins_pricing.governance import ModelRegistry
|
|
105
|
+
registry = ModelRegistry('models.json')
|
|
106
|
+
registry.register(model_name, version, metrics=metrics)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Project Structure
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
ins_pricing/
|
|
113
|
+
├── cli/ # Command-line entry points
|
|
114
|
+
├── modelling/
|
|
115
|
+
│ ├── core/bayesopt/ # ML model training core
|
|
116
|
+
│ ├── explain/ # Model interpretability
|
|
117
|
+
│ └── plotting/ # Model visualization
|
|
118
|
+
├── pricing/ # Insurance pricing module
|
|
119
|
+
├── production/ # Production deployment module
|
|
120
|
+
├── governance/ # Model governance
|
|
121
|
+
├── reporting/ # Report generation
|
|
122
|
+
├── utils/ # Utilities
|
|
123
|
+
└── tests/ # Test suite
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Installation
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
# Basic installation
|
|
130
|
+
pip install ins_pricing
|
|
131
|
+
|
|
132
|
+
# Full installation (all optional dependencies)
|
|
133
|
+
pip install ins_pricing[all]
|
|
134
|
+
|
|
135
|
+
# Install specific extras
|
|
136
|
+
pip install ins_pricing[bayesopt] # Model training
|
|
137
|
+
pip install ins_pricing[explain] # Model explanation
|
|
138
|
+
pip install ins_pricing[plotting] # Visualization
|
|
139
|
+
pip install ins_pricing[gnn] # Graph neural networks
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
#### Multi-platform & GPU installation notes
|
|
143
|
+
|
|
144
|
+
- **PyTorch (CPU/GPU/MPS)**: Install the correct PyTorch build for your platform/GPU first (CUDA on
|
|
145
|
+
Linux/Windows, ROCm on supported AMD platforms, or MPS on Apple Silicon). Then install the
|
|
146
|
+
optional extras you need (e.g., `bayesopt`, `explain`, or `gnn`). This avoids pip pulling a
|
|
147
|
+
mismatched wheel.
|
|
148
|
+
- **Torch Geometric (GNN)**: `torch-geometric` often requires platform-specific wheels (e.g.,
|
|
149
|
+
`torch-scatter`, `torch-sparse`). Follow the official PyG installation instructions for your
|
|
150
|
+
CUDA/ROCm/CPU environment, then install `ins_pricing[gnn]`.
|
|
151
|
+
- **Multi-GPU**: Training code will use CUDA when available and can enable multi-GPU via
|
|
152
|
+
`torch.distributed`/`DataParallel` where supported. On Windows, CUDA DDP is not supported and will
|
|
153
|
+
fall back to single-GPU or DataParallel where possible.
|
|
154
|
+
|
|
155
|
+
### Requirements
|
|
156
|
+
|
|
157
|
+
- Python >= 3.9
|
|
158
|
+
- Core dependencies: numpy >= 1.20, pandas >= 1.4
|
|
159
|
+
|
|
160
|
+
### License
|
|
161
|
+
|
|
162
|
+
Proprietary
|