ins-pricing 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +60 -0
- ins_pricing/__init__.py +102 -0
- ins_pricing/governance/README.md +18 -0
- ins_pricing/governance/__init__.py +20 -0
- ins_pricing/governance/approval.py +93 -0
- ins_pricing/governance/audit.py +37 -0
- ins_pricing/governance/registry.py +99 -0
- ins_pricing/governance/release.py +159 -0
- ins_pricing/modelling/BayesOpt.py +146 -0
- ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
- ins_pricing/modelling/BayesOpt_entry.py +575 -0
- ins_pricing/modelling/BayesOpt_incremental.py +731 -0
- ins_pricing/modelling/Explain_Run.py +36 -0
- ins_pricing/modelling/Explain_entry.py +539 -0
- ins_pricing/modelling/Pricing_Run.py +36 -0
- ins_pricing/modelling/README.md +33 -0
- ins_pricing/modelling/__init__.py +44 -0
- ins_pricing/modelling/bayesopt/__init__.py +98 -0
- ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
- ins_pricing/modelling/bayesopt/core.py +1476 -0
- ins_pricing/modelling/bayesopt/models.py +2196 -0
- ins_pricing/modelling/bayesopt/trainers.py +2446 -0
- ins_pricing/modelling/bayesopt/utils.py +1021 -0
- ins_pricing/modelling/cli_common.py +136 -0
- ins_pricing/modelling/explain/__init__.py +55 -0
- ins_pricing/modelling/explain/gradients.py +334 -0
- ins_pricing/modelling/explain/metrics.py +176 -0
- ins_pricing/modelling/explain/permutation.py +155 -0
- ins_pricing/modelling/explain/shap_utils.py +146 -0
- ins_pricing/modelling/notebook_utils.py +284 -0
- ins_pricing/modelling/plotting/__init__.py +45 -0
- ins_pricing/modelling/plotting/common.py +63 -0
- ins_pricing/modelling/plotting/curves.py +572 -0
- ins_pricing/modelling/plotting/diagnostics.py +139 -0
- ins_pricing/modelling/plotting/geo.py +362 -0
- ins_pricing/modelling/plotting/importance.py +121 -0
- ins_pricing/modelling/run_logging.py +133 -0
- ins_pricing/modelling/tests/conftest.py +8 -0
- ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing/modelling/tests/test_explain.py +56 -0
- ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing/modelling/tests/test_plotting.py +63 -0
- ins_pricing/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing/modelling/watchdog_run.py +211 -0
- ins_pricing/pricing/README.md +44 -0
- ins_pricing/pricing/__init__.py +27 -0
- ins_pricing/pricing/calibration.py +39 -0
- ins_pricing/pricing/data_quality.py +117 -0
- ins_pricing/pricing/exposure.py +85 -0
- ins_pricing/pricing/factors.py +91 -0
- ins_pricing/pricing/monitoring.py +99 -0
- ins_pricing/pricing/rate_table.py +78 -0
- ins_pricing/production/__init__.py +21 -0
- ins_pricing/production/drift.py +30 -0
- ins_pricing/production/monitoring.py +143 -0
- ins_pricing/production/scoring.py +40 -0
- ins_pricing/reporting/README.md +20 -0
- ins_pricing/reporting/__init__.py +11 -0
- ins_pricing/reporting/report_builder.py +72 -0
- ins_pricing/reporting/scheduler.py +45 -0
- ins_pricing/setup.py +41 -0
- ins_pricing v2/__init__.py +23 -0
- ins_pricing v2/governance/__init__.py +20 -0
- ins_pricing v2/governance/approval.py +93 -0
- ins_pricing v2/governance/audit.py +37 -0
- ins_pricing v2/governance/registry.py +99 -0
- ins_pricing v2/governance/release.py +159 -0
- ins_pricing v2/modelling/Explain_Run.py +36 -0
- ins_pricing v2/modelling/Pricing_Run.py +36 -0
- ins_pricing v2/modelling/__init__.py +151 -0
- ins_pricing v2/modelling/cli_common.py +141 -0
- ins_pricing v2/modelling/config.py +249 -0
- ins_pricing v2/modelling/config_preprocess.py +254 -0
- ins_pricing v2/modelling/core.py +741 -0
- ins_pricing v2/modelling/data_container.py +42 -0
- ins_pricing v2/modelling/explain/__init__.py +55 -0
- ins_pricing v2/modelling/explain/gradients.py +334 -0
- ins_pricing v2/modelling/explain/metrics.py +176 -0
- ins_pricing v2/modelling/explain/permutation.py +155 -0
- ins_pricing v2/modelling/explain/shap_utils.py +146 -0
- ins_pricing v2/modelling/features.py +215 -0
- ins_pricing v2/modelling/model_manager.py +148 -0
- ins_pricing v2/modelling/model_plotting.py +463 -0
- ins_pricing v2/modelling/models.py +2203 -0
- ins_pricing v2/modelling/notebook_utils.py +294 -0
- ins_pricing v2/modelling/plotting/__init__.py +45 -0
- ins_pricing v2/modelling/plotting/common.py +63 -0
- ins_pricing v2/modelling/plotting/curves.py +572 -0
- ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
- ins_pricing v2/modelling/plotting/geo.py +362 -0
- ins_pricing v2/modelling/plotting/importance.py +121 -0
- ins_pricing v2/modelling/run_logging.py +133 -0
- ins_pricing v2/modelling/tests/conftest.py +8 -0
- ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing v2/modelling/tests/test_explain.py +56 -0
- ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing v2/modelling/tests/test_plotting.py +63 -0
- ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing v2/modelling/trainers.py +2447 -0
- ins_pricing v2/modelling/utils.py +1020 -0
- ins_pricing v2/modelling/watchdog_run.py +211 -0
- ins_pricing v2/pricing/__init__.py +27 -0
- ins_pricing v2/pricing/calibration.py +39 -0
- ins_pricing v2/pricing/data_quality.py +117 -0
- ins_pricing v2/pricing/exposure.py +85 -0
- ins_pricing v2/pricing/factors.py +91 -0
- ins_pricing v2/pricing/monitoring.py +99 -0
- ins_pricing v2/pricing/rate_table.py +78 -0
- ins_pricing v2/production/__init__.py +21 -0
- ins_pricing v2/production/drift.py +30 -0
- ins_pricing v2/production/monitoring.py +143 -0
- ins_pricing v2/production/scoring.py +40 -0
- ins_pricing v2/reporting/__init__.py +11 -0
- ins_pricing v2/reporting/report_builder.py +72 -0
- ins_pricing v2/reporting/scheduler.py +45 -0
- ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
- ins_pricing v2/scripts/Explain_entry.py +545 -0
- ins_pricing v2/scripts/__init__.py +1 -0
- ins_pricing v2/scripts/train.py +568 -0
- ins_pricing v2/setup.py +55 -0
- ins_pricing v2/smoke_test.py +28 -0
- ins_pricing-0.1.6.dist-info/METADATA +78 -0
- ins_pricing-0.1.6.dist-info/RECORD +169 -0
- ins_pricing-0.1.6.dist-info/WHEEL +5 -0
- ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
- user_packages/__init__.py +105 -0
- user_packages legacy/BayesOpt.py +5659 -0
- user_packages legacy/BayesOpt_entry.py +513 -0
- user_packages legacy/BayesOpt_incremental.py +685 -0
- user_packages legacy/Pricing_Run.py +36 -0
- user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
- user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
- user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
- user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
- user_packages legacy/Try/BayesOpt legacy.py +3280 -0
- user_packages legacy/Try/BayesOpt.py +838 -0
- user_packages legacy/Try/BayesOptAll.py +1569 -0
- user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
- user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
- user_packages legacy/Try/BayesOptSearch.py +830 -0
- user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
- user_packages legacy/Try/BayesOptV1.py +1911 -0
- user_packages legacy/Try/BayesOptV10.py +2973 -0
- user_packages legacy/Try/BayesOptV11.py +3001 -0
- user_packages legacy/Try/BayesOptV12.py +3001 -0
- user_packages legacy/Try/BayesOptV2.py +2065 -0
- user_packages legacy/Try/BayesOptV3.py +2209 -0
- user_packages legacy/Try/BayesOptV4.py +2342 -0
- user_packages legacy/Try/BayesOptV5.py +2372 -0
- user_packages legacy/Try/BayesOptV6.py +2759 -0
- user_packages legacy/Try/BayesOptV7.py +2832 -0
- user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
- user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
- user_packages legacy/Try/BayesOptV9.py +2927 -0
- user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
- user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
- user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
- user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
- user_packages legacy/Try/xgbbayesopt.py +523 -0
- user_packages legacy/__init__.py +19 -0
- user_packages legacy/cli_common.py +124 -0
- user_packages legacy/notebook_utils.py +228 -0
- user_packages legacy/watchdog_run.py +202 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_permutation_importance_simple():
|
|
7
|
+
from ins_pricing.explain import permutation_importance
|
|
8
|
+
|
|
9
|
+
rng = np.random.default_rng(0)
|
|
10
|
+
X = pd.DataFrame(
|
|
11
|
+
{
|
|
12
|
+
"x1": rng.normal(size=200),
|
|
13
|
+
"x2": rng.normal(size=200),
|
|
14
|
+
}
|
|
15
|
+
)
|
|
16
|
+
y = 3.0 * X["x1"].to_numpy() + rng.normal(scale=0.1, size=200)
|
|
17
|
+
|
|
18
|
+
def predict_fn(df):
|
|
19
|
+
return 3.0 * df["x1"].to_numpy()
|
|
20
|
+
|
|
21
|
+
imp = permutation_importance(
|
|
22
|
+
predict_fn,
|
|
23
|
+
X,
|
|
24
|
+
y,
|
|
25
|
+
metric="rmse",
|
|
26
|
+
n_repeats=3,
|
|
27
|
+
max_rows=None,
|
|
28
|
+
random_state=0,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
assert imp.loc[0, "feature"] == "x1"
|
|
32
|
+
assert imp["importance_mean"].iloc[0] > imp["importance_mean"].iloc[1]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_integrated_gradients_linear():
|
|
36
|
+
torch = pytest.importorskip("torch")
|
|
37
|
+
from ins_pricing.explain import integrated_gradients_torch
|
|
38
|
+
|
|
39
|
+
torch.manual_seed(0)
|
|
40
|
+
model = torch.nn.Linear(3, 1, bias=False)
|
|
41
|
+
with torch.no_grad():
|
|
42
|
+
model.weight[:] = torch.tensor([[1.0, 2.0, -1.0]])
|
|
43
|
+
|
|
44
|
+
X = torch.tensor(
|
|
45
|
+
[[1.0, 2.0, 3.0], [0.5, -1.0, 0.0]],
|
|
46
|
+
dtype=torch.float32,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def forward(x):
|
|
50
|
+
return model(x).squeeze(1)
|
|
51
|
+
|
|
52
|
+
attrs = integrated_gradients_torch(forward, X, baseline="zeros", steps=10)
|
|
53
|
+
expected = X.numpy() * np.array([1.0, 2.0, -1.0])
|
|
54
|
+
|
|
55
|
+
assert attrs.shape == X.shape
|
|
56
|
+
np.testing.assert_allclose(attrs, expected, rtol=1e-2, atol=1e-2)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import types
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
pytest.importorskip("torch")
|
|
7
|
+
pytest.importorskip("optuna")
|
|
8
|
+
pytest.importorskip("xgboost")
|
|
9
|
+
pytest.importorskip("statsmodels")
|
|
10
|
+
|
|
11
|
+
from ins_pricing.bayesopt.trainers import FTTrainer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DummyCtx:
|
|
15
|
+
def __init__(self, train_df: pd.DataFrame, test_df: pd.DataFrame):
|
|
16
|
+
self.task_type = "regression"
|
|
17
|
+
self.config = types.SimpleNamespace(use_ft_ddp=False, geo_feature_nmes=["geo"])
|
|
18
|
+
self.train_data = train_df
|
|
19
|
+
self.test_data = test_df
|
|
20
|
+
self._build_calls = []
|
|
21
|
+
|
|
22
|
+
def _build_geo_tokens(self, _params=None):
|
|
23
|
+
self._build_calls.append(
|
|
24
|
+
(self.train_data.copy(deep=True), self.test_data.copy(deep=True))
|
|
25
|
+
)
|
|
26
|
+
return self.train_data.copy(deep=True), self.test_data.copy(deep=True), ["geo_token"], None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_geo_token_split_uses_fold_and_restores_context():
|
|
30
|
+
train = pd.DataFrame({"geo": ["a", "b", "c", "d"], "x": [1, 2, 3, 4]})
|
|
31
|
+
test = pd.DataFrame({"geo": ["e"], "x": [5]})
|
|
32
|
+
ctx = DummyCtx(train, test)
|
|
33
|
+
trainer = FTTrainer(ctx)
|
|
34
|
+
|
|
35
|
+
X_train = train.iloc[[0, 1]]
|
|
36
|
+
X_val = train.iloc[[2, 3]]
|
|
37
|
+
|
|
38
|
+
orig_train = ctx.train_data
|
|
39
|
+
orig_test = ctx.test_data
|
|
40
|
+
|
|
41
|
+
result = trainer._build_geo_tokens_for_split(X_train, X_val, geo_params={"k": 1})
|
|
42
|
+
|
|
43
|
+
assert ctx.train_data is orig_train
|
|
44
|
+
assert ctx.test_data is orig_test
|
|
45
|
+
assert result is not None
|
|
46
|
+
|
|
47
|
+
train_snapshot, test_snapshot = ctx._build_calls[0]
|
|
48
|
+
assert train_snapshot.equals(train.loc[X_train.index])
|
|
49
|
+
assert test_snapshot.equals(train.loc[X_val.index])
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
pytest.importorskip("torch")
|
|
6
|
+
pytest.importorskip("sklearn")
|
|
7
|
+
|
|
8
|
+
from ins_pricing.bayesopt.models import GraphNeuralNetSklearn
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_graph_cache_meta_invalidation(tmp_path):
|
|
12
|
+
X = pd.DataFrame({"a": [0.1, 0.2, 0.3], "b": [1.0, 2.0, 3.0]})
|
|
13
|
+
cache_path = tmp_path / "gnn_cache.pt"
|
|
14
|
+
|
|
15
|
+
model = GraphNeuralNetSklearn(
|
|
16
|
+
model_nme="demo",
|
|
17
|
+
input_dim=2,
|
|
18
|
+
k_neighbors=1,
|
|
19
|
+
epochs=1,
|
|
20
|
+
use_approx_knn=False,
|
|
21
|
+
graph_cache_path=str(cache_path),
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
adj = model._build_graph_from_df(X)
|
|
25
|
+
assert adj is not None
|
|
26
|
+
assert cache_path.exists()
|
|
27
|
+
|
|
28
|
+
cached = model._load_cached_adj(X)
|
|
29
|
+
assert cached is not None
|
|
30
|
+
|
|
31
|
+
X_changed = X.copy()
|
|
32
|
+
X_changed.iloc[0, 0] += 1.0
|
|
33
|
+
assert model._load_cached_adj(X_changed) is None
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
pytest.importorskip("torch")
|
|
8
|
+
pytest.importorskip("xgboost")
|
|
9
|
+
pytest.importorskip("optuna")
|
|
10
|
+
pytest.importorskip("statsmodels")
|
|
11
|
+
pytest.importorskip("shap")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_plotting_outputs(tmp_path, monkeypatch):
|
|
15
|
+
monkeypatch.setenv("MPLBACKEND", "Agg")
|
|
16
|
+
mpl_cfg = tmp_path / ".mplconfig"
|
|
17
|
+
cache_dir = tmp_path / ".cache"
|
|
18
|
+
(cache_dir / "fontconfig").mkdir(parents=True, exist_ok=True)
|
|
19
|
+
mpl_cfg.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
monkeypatch.setenv("MPLCONFIGDIR", str(mpl_cfg))
|
|
21
|
+
monkeypatch.setenv("XDG_CACHE_HOME", str(cache_dir))
|
|
22
|
+
|
|
23
|
+
from ins_pricing.BayesOpt import BayesOptModel
|
|
24
|
+
import matplotlib.pyplot as plt
|
|
25
|
+
|
|
26
|
+
monkeypatch.setattr(plt, "show", lambda *args, **kwargs: None)
|
|
27
|
+
|
|
28
|
+
rng = np.random.default_rng(0)
|
|
29
|
+
train = pd.DataFrame(
|
|
30
|
+
{
|
|
31
|
+
"x1": rng.normal(size=30),
|
|
32
|
+
"y": rng.normal(size=30),
|
|
33
|
+
"w": rng.uniform(0.5, 1.5, size=30),
|
|
34
|
+
}
|
|
35
|
+
)
|
|
36
|
+
test = pd.DataFrame({"x1": rng.normal(size=20)})
|
|
37
|
+
|
|
38
|
+
model = BayesOptModel(
|
|
39
|
+
train,
|
|
40
|
+
test,
|
|
41
|
+
model_nme="demo",
|
|
42
|
+
resp_nme="y",
|
|
43
|
+
weight_nme="w",
|
|
44
|
+
factor_nmes=["x1"],
|
|
45
|
+
task_type="regression",
|
|
46
|
+
use_gpu=False,
|
|
47
|
+
output_dir=str(tmp_path),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
for df in (model.train_data, model.test_data):
|
|
51
|
+
df["pred_xgb"] = rng.normal(size=len(df))
|
|
52
|
+
df["pred_resn"] = rng.normal(size=len(df))
|
|
53
|
+
df["w_pred_xgb"] = df["pred_xgb"] * df[model.weight_nme]
|
|
54
|
+
df["w_pred_resn"] = df["pred_resn"] * df[model.weight_nme]
|
|
55
|
+
|
|
56
|
+
model.plot_lift("Xgboost", "pred_xgb", n_bins=5)
|
|
57
|
+
model.plot_dlift(["xgb", "resn"], n_bins=5)
|
|
58
|
+
|
|
59
|
+
lift_path = tmp_path / "plot" / "01_demo_Xgboost_lift.png"
|
|
60
|
+
dlift_path = tmp_path / "plot" / "02_demo_dlift_xgb_vs_resn.png"
|
|
61
|
+
|
|
62
|
+
assert lift_path.exists()
|
|
63
|
+
assert dlift_path.exists()
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
pytest.importorskip("matplotlib")
|
|
6
|
+
pytest.importorskip("sklearn")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _configure_matplotlib(tmp_path, monkeypatch):
|
|
10
|
+
monkeypatch.setenv("MPLBACKEND", "Agg")
|
|
11
|
+
mpl_cfg = tmp_path / ".mplconfig"
|
|
12
|
+
cache_dir = tmp_path / ".cache"
|
|
13
|
+
(cache_dir / "fontconfig").mkdir(parents=True, exist_ok=True)
|
|
14
|
+
mpl_cfg.mkdir(parents=True, exist_ok=True)
|
|
15
|
+
monkeypatch.setenv("MPLCONFIGDIR", str(mpl_cfg))
|
|
16
|
+
monkeypatch.setenv("XDG_CACHE_HOME", str(cache_dir))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_plotting_library_outputs(tmp_path, monkeypatch):
|
|
20
|
+
_configure_matplotlib(tmp_path, monkeypatch)
|
|
21
|
+
|
|
22
|
+
from ins_pricing.plotting import curves, diagnostics, geo, importance
|
|
23
|
+
|
|
24
|
+
rng = np.random.default_rng(42)
|
|
25
|
+
n = 80
|
|
26
|
+
pred1 = rng.normal(loc=0.2, scale=1.0, size=n)
|
|
27
|
+
pred2 = rng.normal(loc=0.1, scale=1.0, size=n)
|
|
28
|
+
actual = np.abs(rng.normal(loc=1.0, scale=0.5, size=n))
|
|
29
|
+
weight = rng.uniform(0.5, 2.0, size=n)
|
|
30
|
+
|
|
31
|
+
curves.plot_lift_curve(
|
|
32
|
+
pred1,
|
|
33
|
+
actual * weight,
|
|
34
|
+
weight,
|
|
35
|
+
n_bins=8,
|
|
36
|
+
save_path=str(tmp_path / "lift.png"),
|
|
37
|
+
)
|
|
38
|
+
curves.plot_double_lift_curve(
|
|
39
|
+
pred1,
|
|
40
|
+
pred2,
|
|
41
|
+
actual * weight,
|
|
42
|
+
weight,
|
|
43
|
+
n_bins=8,
|
|
44
|
+
save_path=str(tmp_path / "dlift.png"),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
y_true = rng.integers(0, 2, size=n)
|
|
48
|
+
curves.plot_roc_curves(
|
|
49
|
+
y_true,
|
|
50
|
+
{"m1": pred1, "m2": pred2},
|
|
51
|
+
save_path=str(tmp_path / "roc.png"),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
importance.plot_feature_importance(
|
|
55
|
+
{"x1": 0.3, "x2": 0.1, "x3": 0.05},
|
|
56
|
+
save_path=str(tmp_path / "importance.png"),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
diagnostics.plot_loss_curve(
|
|
60
|
+
history={"train": [1.0, 0.7, 0.5], "val": [1.2, 0.8, 0.6]},
|
|
61
|
+
save_path=str(tmp_path / "loss.png"),
|
|
62
|
+
)
|
|
63
|
+
diagnostics.plot_oneway(
|
|
64
|
+
pd.DataFrame(
|
|
65
|
+
{
|
|
66
|
+
"x1": rng.normal(size=n),
|
|
67
|
+
"w_act": actual * weight,
|
|
68
|
+
"w": weight,
|
|
69
|
+
}
|
|
70
|
+
),
|
|
71
|
+
feature="x1",
|
|
72
|
+
weight_col="w",
|
|
73
|
+
target_col="w_act",
|
|
74
|
+
n_bins=6,
|
|
75
|
+
save_path=str(tmp_path / "oneway.png"),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
df_geo = pd.DataFrame(
|
|
79
|
+
{
|
|
80
|
+
"lon": rng.uniform(100, 120, size=n),
|
|
81
|
+
"lat": rng.uniform(20, 40, size=n),
|
|
82
|
+
"loss": actual,
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
geo.plot_geo_heatmap(
|
|
86
|
+
df_geo,
|
|
87
|
+
x_col="lon",
|
|
88
|
+
y_col="lat",
|
|
89
|
+
value_col="loss",
|
|
90
|
+
bins=10,
|
|
91
|
+
save_path=str(tmp_path / "geo_heat.png"),
|
|
92
|
+
)
|
|
93
|
+
geo.plot_geo_contour(
|
|
94
|
+
df_geo,
|
|
95
|
+
x_col="lon",
|
|
96
|
+
y_col="lat",
|
|
97
|
+
value_col="loss",
|
|
98
|
+
max_points=40,
|
|
99
|
+
levels=6,
|
|
100
|
+
save_path=str(tmp_path / "geo_contour.png"),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
assert (tmp_path / "lift.png").exists()
|
|
104
|
+
assert (tmp_path / "dlift.png").exists()
|
|
105
|
+
assert (tmp_path / "roc.png").exists()
|
|
106
|
+
assert (tmp_path / "importance.png").exists()
|
|
107
|
+
assert (tmp_path / "loss.png").exists()
|
|
108
|
+
assert (tmp_path / "oneway.png").exists()
|
|
109
|
+
assert (tmp_path / "geo_heat.png").exists()
|
|
110
|
+
assert (tmp_path / "geo_contour.png").exists()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_geo_plotting_on_map_optional(tmp_path, monkeypatch):
|
|
114
|
+
_configure_matplotlib(tmp_path, monkeypatch)
|
|
115
|
+
pytest.importorskip("contextily")
|
|
116
|
+
|
|
117
|
+
from ins_pricing.plotting import geo
|
|
118
|
+
|
|
119
|
+
rng = np.random.default_rng(7)
|
|
120
|
+
n = 60
|
|
121
|
+
df_geo = pd.DataFrame(
|
|
122
|
+
{
|
|
123
|
+
"lon": rng.uniform(105, 115, size=n),
|
|
124
|
+
"lat": rng.uniform(25, 35, size=n),
|
|
125
|
+
"loss": np.abs(rng.normal(loc=1.0, scale=0.4, size=n)),
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
geo.plot_geo_heatmap_on_map(
|
|
130
|
+
df_geo,
|
|
131
|
+
lon_col="lon",
|
|
132
|
+
lat_col="lat",
|
|
133
|
+
value_col="loss",
|
|
134
|
+
bins=12,
|
|
135
|
+
basemap=None,
|
|
136
|
+
save_path=str(tmp_path / "geo_heat_map.png"),
|
|
137
|
+
)
|
|
138
|
+
geo.plot_geo_contour_on_map(
|
|
139
|
+
df_geo,
|
|
140
|
+
lon_col="lon",
|
|
141
|
+
lat_col="lat",
|
|
142
|
+
value_col="loss",
|
|
143
|
+
max_points=30,
|
|
144
|
+
levels=5,
|
|
145
|
+
basemap=None,
|
|
146
|
+
save_path=str(tmp_path / "geo_contour_map.png"),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
assert (tmp_path / "geo_heat_map.png").exists()
|
|
150
|
+
assert (tmp_path / "geo_contour_map.png").exists()
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from ins_pricing.bayesopt.config_preprocess import BayesOptConfig, DatasetPreprocessor
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _build_config(binary_resp: bool = False) -> BayesOptConfig:
|
|
9
|
+
return BayesOptConfig(
|
|
10
|
+
model_nme="demo",
|
|
11
|
+
resp_nme="y",
|
|
12
|
+
weight_nme="w",
|
|
13
|
+
factor_nmes=["x1"],
|
|
14
|
+
task_type="regression",
|
|
15
|
+
binary_resp_nme="y_bin" if binary_resp else None,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_preprocessor_fills_missing_test_labels():
|
|
20
|
+
train = pd.DataFrame(
|
|
21
|
+
{
|
|
22
|
+
"x1": [1.0, 2.0, 3.0],
|
|
23
|
+
"y": [10.0, 20.0, 30.0],
|
|
24
|
+
"w": [1.0, 2.0, 3.0],
|
|
25
|
+
"y_bin": [0, 1, 0],
|
|
26
|
+
}
|
|
27
|
+
)
|
|
28
|
+
test = pd.DataFrame({"x1": [4.0, 5.0]})
|
|
29
|
+
|
|
30
|
+
cfg = _build_config(binary_resp=True)
|
|
31
|
+
result = DatasetPreprocessor(train, test, cfg).run()
|
|
32
|
+
|
|
33
|
+
assert "w_act" in result.train_data.columns
|
|
34
|
+
assert "w_act" not in result.test_data.columns
|
|
35
|
+
assert "w_binary_act" in result.train_data.columns
|
|
36
|
+
assert "w_binary_act" not in result.test_data.columns
|
|
37
|
+
assert result.test_data["w"].eq(1.0).all()
|
|
38
|
+
assert result.test_data["y"].isna().all()
|
|
39
|
+
assert result.test_data["y_bin"].isna().all()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_preprocessor_missing_train_columns_raises():
|
|
43
|
+
train = pd.DataFrame({"x1": [1.0]})
|
|
44
|
+
test = pd.DataFrame({"x1": [2.0]})
|
|
45
|
+
|
|
46
|
+
cfg = _build_config(binary_resp=False)
|
|
47
|
+
with pytest.raises(KeyError):
|
|
48
|
+
DatasetPreprocessor(train, test, cfg).run()
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import os
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
import threading
|
|
8
|
+
import time
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from .run_logging import configure_run_logging # type: ignore
|
|
13
|
+
except Exception: # pragma: no cover
|
|
14
|
+
try:
|
|
15
|
+
from run_logging import configure_run_logging # type: ignore
|
|
16
|
+
except Exception: # pragma: no cover
|
|
17
|
+
configure_run_logging = None # type: ignore
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _split_argv(argv: List[str]) -> tuple[List[str], List[str]]:
|
|
21
|
+
if "--" not in argv:
|
|
22
|
+
raise ValueError("Missing '--' separator before the command to run.")
|
|
23
|
+
idx = argv.index("--")
|
|
24
|
+
return argv[:idx], argv[idx + 1 :]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _kill_process_tree(pid: int) -> None:
|
|
28
|
+
if pid <= 0:
|
|
29
|
+
return
|
|
30
|
+
if os.name == "nt":
|
|
31
|
+
subprocess.run(
|
|
32
|
+
["taskkill", "/PID", str(pid), "/T", "/F"],
|
|
33
|
+
stdout=subprocess.DEVNULL,
|
|
34
|
+
stderr=subprocess.DEVNULL,
|
|
35
|
+
check=False,
|
|
36
|
+
)
|
|
37
|
+
return
|
|
38
|
+
try:
|
|
39
|
+
os.killpg(pid, 15)
|
|
40
|
+
time.sleep(2)
|
|
41
|
+
os.killpg(pid, 9)
|
|
42
|
+
except Exception:
|
|
43
|
+
try:
|
|
44
|
+
os.kill(pid, 9)
|
|
45
|
+
except Exception:
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _reader_thread(
|
|
50
|
+
proc: subprocess.Popen, last_output_ts: dict, prefix: str = ""
|
|
51
|
+
) -> None:
|
|
52
|
+
assert proc.stdout is not None
|
|
53
|
+
for line in proc.stdout:
|
|
54
|
+
last_output_ts["ts"] = time.time()
|
|
55
|
+
if prefix:
|
|
56
|
+
sys.stdout.write(prefix)
|
|
57
|
+
sys.stdout.write(line)
|
|
58
|
+
sys.stdout.flush()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _parse_args(before_cmd: List[str], cmd: List[str]) -> argparse.Namespace:
|
|
62
|
+
parser = argparse.ArgumentParser(
|
|
63
|
+
description=(
|
|
64
|
+
"Run a command under a simple watchdog: if there is no stdout/stderr "
|
|
65
|
+
"output for N seconds, kill the whole process tree and restart. "
|
|
66
|
+
"Designed to pair with optuna_storage so BayesOpt can resume."
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
parser.add_argument(
|
|
70
|
+
"--idle-seconds",
|
|
71
|
+
type=int,
|
|
72
|
+
default=7200,
|
|
73
|
+
help="Restart if there is no output for this many seconds (default: 7200).",
|
|
74
|
+
)
|
|
75
|
+
parser.add_argument(
|
|
76
|
+
"--max-restarts",
|
|
77
|
+
type=int,
|
|
78
|
+
default=50,
|
|
79
|
+
help="Maximum restart attempts (default: 50).",
|
|
80
|
+
)
|
|
81
|
+
parser.add_argument(
|
|
82
|
+
"--restart-delay-seconds",
|
|
83
|
+
type=int,
|
|
84
|
+
default=10,
|
|
85
|
+
help="Delay between restarts (default: 10).",
|
|
86
|
+
)
|
|
87
|
+
parser.add_argument(
|
|
88
|
+
"--stop-on-nonzero-exit",
|
|
89
|
+
action="store_true",
|
|
90
|
+
help="If the command exits non-zero, stop instead of restarting.",
|
|
91
|
+
)
|
|
92
|
+
args = parser.parse_args(before_cmd)
|
|
93
|
+
if not cmd:
|
|
94
|
+
parser.error("Empty command after '--'.")
|
|
95
|
+
return args
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def run_with_watchdog(
|
|
99
|
+
cmd: List[str],
|
|
100
|
+
idle_seconds: int,
|
|
101
|
+
max_restarts: int,
|
|
102
|
+
restart_delay_seconds: int,
|
|
103
|
+
stop_on_nonzero_exit: bool,
|
|
104
|
+
) -> int:
|
|
105
|
+
idle_seconds = max(1, int(idle_seconds))
|
|
106
|
+
max_restarts = max(0, int(max_restarts))
|
|
107
|
+
restart_delay_seconds = max(0, int(restart_delay_seconds))
|
|
108
|
+
|
|
109
|
+
attempt = 0
|
|
110
|
+
while True:
|
|
111
|
+
attempt += 1
|
|
112
|
+
print(
|
|
113
|
+
f"[watchdog] start attempt={attempt} idle_seconds={idle_seconds} cmd={cmd}",
|
|
114
|
+
flush=True,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
creationflags = 0
|
|
118
|
+
start_new_session = False
|
|
119
|
+
if os.name == "nt":
|
|
120
|
+
creationflags = getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0)
|
|
121
|
+
else:
|
|
122
|
+
start_new_session = True
|
|
123
|
+
|
|
124
|
+
proc = subprocess.Popen(
|
|
125
|
+
cmd,
|
|
126
|
+
stdout=subprocess.PIPE,
|
|
127
|
+
stderr=subprocess.STDOUT,
|
|
128
|
+
text=True,
|
|
129
|
+
bufsize=1,
|
|
130
|
+
universal_newlines=True,
|
|
131
|
+
creationflags=creationflags,
|
|
132
|
+
start_new_session=start_new_session,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
last_output_ts: dict = {"ts": time.time()}
|
|
136
|
+
reader = threading.Thread(
|
|
137
|
+
target=_reader_thread,
|
|
138
|
+
args=(proc, last_output_ts),
|
|
139
|
+
kwargs={"prefix": ""},
|
|
140
|
+
daemon=True,
|
|
141
|
+
)
|
|
142
|
+
reader.start()
|
|
143
|
+
|
|
144
|
+
killed_for_idle = False
|
|
145
|
+
exit_code: Optional[int] = None
|
|
146
|
+
while True:
|
|
147
|
+
exit_code = proc.poll()
|
|
148
|
+
if exit_code is not None:
|
|
149
|
+
break
|
|
150
|
+
idle_for = time.time() - float(last_output_ts["ts"])
|
|
151
|
+
if idle_for > idle_seconds:
|
|
152
|
+
killed_for_idle = True
|
|
153
|
+
print(
|
|
154
|
+
f"[watchdog] idle>{idle_seconds}s (idle_for={int(idle_for)}s), killing pid={proc.pid}",
|
|
155
|
+
flush=True,
|
|
156
|
+
)
|
|
157
|
+
_kill_process_tree(proc.pid)
|
|
158
|
+
break
|
|
159
|
+
time.sleep(5)
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
proc.wait(timeout=30)
|
|
163
|
+
except Exception:
|
|
164
|
+
_kill_process_tree(proc.pid)
|
|
165
|
+
|
|
166
|
+
if exit_code is None:
|
|
167
|
+
exit_code = proc.poll() or 1
|
|
168
|
+
|
|
169
|
+
if exit_code == 0:
|
|
170
|
+
print("[watchdog] finished with exit_code=0", flush=True)
|
|
171
|
+
return 0
|
|
172
|
+
|
|
173
|
+
if stop_on_nonzero_exit and not killed_for_idle:
|
|
174
|
+
print(
|
|
175
|
+
f"[watchdog] command exited non-zero (exit_code={exit_code}); stop.",
|
|
176
|
+
flush=True,
|
|
177
|
+
)
|
|
178
|
+
return int(exit_code)
|
|
179
|
+
|
|
180
|
+
if attempt > max_restarts + 1:
|
|
181
|
+
print(
|
|
182
|
+
f"[watchdog] exceeded max_restarts={max_restarts}; last exit_code={exit_code}",
|
|
183
|
+
flush=True,
|
|
184
|
+
)
|
|
185
|
+
return int(exit_code)
|
|
186
|
+
|
|
187
|
+
print(
|
|
188
|
+
f"[watchdog] restart in {restart_delay_seconds}s (exit_code={exit_code}, killed_for_idle={killed_for_idle})",
|
|
189
|
+
flush=True,
|
|
190
|
+
)
|
|
191
|
+
if restart_delay_seconds:
|
|
192
|
+
time.sleep(restart_delay_seconds)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def main(argv: Optional[List[str]] = None) -> int:
|
|
196
|
+
if configure_run_logging:
|
|
197
|
+
configure_run_logging(prefix="watchdog")
|
|
198
|
+
argv = list(sys.argv[1:] if argv is None else argv)
|
|
199
|
+
before_cmd, cmd = _split_argv(argv)
|
|
200
|
+
args = _parse_args(before_cmd, cmd)
|
|
201
|
+
return run_with_watchdog(
|
|
202
|
+
cmd=cmd,
|
|
203
|
+
idle_seconds=args.idle_seconds,
|
|
204
|
+
max_restarts=args.max_restarts,
|
|
205
|
+
restart_delay_seconds=args.restart_delay_seconds,
|
|
206
|
+
stop_on_nonzero_exit=bool(args.stop_on_nonzero_exit),
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
if __name__ == "__main__":
|
|
211
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# pricing
|
|
2
|
+
|
|
3
|
+
Lightweight pricing loop utilities: data quality checks, exposure/targets,
|
|
4
|
+
factor tables, rate tables, calibration, and monitoring (PSI).
|
|
5
|
+
|
|
6
|
+
Quick start:
|
|
7
|
+
|
|
8
|
+
```python
|
|
9
|
+
from ins_pricing.pricing import (
|
|
10
|
+
compute_exposure,
|
|
11
|
+
build_frequency_severity,
|
|
12
|
+
build_factor_table,
|
|
13
|
+
compute_base_rate,
|
|
14
|
+
rate_premium,
|
|
15
|
+
fit_calibration_factor,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
df["exposure"] = compute_exposure(df, "start_date", "end_date")
|
|
19
|
+
df = build_frequency_severity(
|
|
20
|
+
df,
|
|
21
|
+
exposure_col="exposure",
|
|
22
|
+
claim_count_col="claim_cnt",
|
|
23
|
+
claim_amount_col="claim_amt",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
base_rate = compute_base_rate(df, loss_col="claim_amt", exposure_col="exposure")
|
|
27
|
+
vehicle_table = build_factor_table(
|
|
28
|
+
df,
|
|
29
|
+
factor_col="vehicle_type",
|
|
30
|
+
loss_col="claim_amt",
|
|
31
|
+
exposure_col="exposure",
|
|
32
|
+
base_rate=base_rate,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
premium = rate_premium(
|
|
36
|
+
df,
|
|
37
|
+
exposure_col="exposure",
|
|
38
|
+
base_rate=base_rate,
|
|
39
|
+
factor_tables={"vehicle_type": vehicle_table},
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
factor = fit_calibration_factor(premium, df["claim_amt"].to_numpy(), target_lr=0.65)
|
|
43
|
+
premium_calibrated = premium * factor
|
|
44
|
+
```
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .calibration import apply_calibration, fit_calibration_factor
|
|
4
|
+
from .data_quality import detect_leakage, profile_columns, validate_schema
|
|
5
|
+
from .exposure import aggregate_policy_level, build_frequency_severity, compute_exposure
|
|
6
|
+
from .factors import bin_numeric, build_factor_table
|
|
7
|
+
from .monitoring import population_stability_index, psi_report
|
|
8
|
+
from .rate_table import RateTable, apply_factor_tables, compute_base_rate, rate_premium
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"apply_calibration",
|
|
12
|
+
"fit_calibration_factor",
|
|
13
|
+
"detect_leakage",
|
|
14
|
+
"profile_columns",
|
|
15
|
+
"validate_schema",
|
|
16
|
+
"aggregate_policy_level",
|
|
17
|
+
"build_frequency_severity",
|
|
18
|
+
"compute_exposure",
|
|
19
|
+
"bin_numeric",
|
|
20
|
+
"build_factor_table",
|
|
21
|
+
"population_stability_index",
|
|
22
|
+
"psi_report",
|
|
23
|
+
"RateTable",
|
|
24
|
+
"apply_factor_tables",
|
|
25
|
+
"compute_base_rate",
|
|
26
|
+
"rate_premium",
|
|
27
|
+
]
|