ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -1,56 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- import pytest
4
-
5
-
6
- def test_permutation_importance_simple():
7
- from ins_pricing.explain import permutation_importance
8
-
9
- rng = np.random.default_rng(0)
10
- X = pd.DataFrame(
11
- {
12
- "x1": rng.normal(size=200),
13
- "x2": rng.normal(size=200),
14
- }
15
- )
16
- y = 3.0 * X["x1"].to_numpy() + rng.normal(scale=0.1, size=200)
17
-
18
- def predict_fn(df):
19
- return 3.0 * df["x1"].to_numpy()
20
-
21
- imp = permutation_importance(
22
- predict_fn,
23
- X,
24
- y,
25
- metric="rmse",
26
- n_repeats=3,
27
- max_rows=None,
28
- random_state=0,
29
- )
30
-
31
- assert imp.loc[0, "feature"] == "x1"
32
- assert imp["importance_mean"].iloc[0] > imp["importance_mean"].iloc[1]
33
-
34
-
35
- def test_integrated_gradients_linear():
36
- torch = pytest.importorskip("torch")
37
- from ins_pricing.explain import integrated_gradients_torch
38
-
39
- torch.manual_seed(0)
40
- model = torch.nn.Linear(3, 1, bias=False)
41
- with torch.no_grad():
42
- model.weight[:] = torch.tensor([[1.0, 2.0, -1.0]])
43
-
44
- X = torch.tensor(
45
- [[1.0, 2.0, 3.0], [0.5, -1.0, 0.0]],
46
- dtype=torch.float32,
47
- )
48
-
49
- def forward(x):
50
- return model(x).squeeze(1)
51
-
52
- attrs = integrated_gradients_torch(forward, X, baseline="zeros", steps=10)
53
- expected = X.numpy() * np.array([1.0, 2.0, -1.0])
54
-
55
- assert attrs.shape == X.shape
56
- np.testing.assert_allclose(attrs, expected, rtol=1e-2, atol=1e-2)
@@ -1,49 +0,0 @@
1
- import types
2
-
3
- import pandas as pd
4
- import pytest
5
-
6
- pytest.importorskip("torch")
7
- pytest.importorskip("optuna")
8
- pytest.importorskip("xgboost")
9
- pytest.importorskip("statsmodels")
10
-
11
- from ins_pricing.bayesopt.trainers import FTTrainer
12
-
13
-
14
- class DummyCtx:
15
- def __init__(self, train_df: pd.DataFrame, test_df: pd.DataFrame):
16
- self.task_type = "regression"
17
- self.config = types.SimpleNamespace(use_ft_ddp=False, geo_feature_nmes=["geo"])
18
- self.train_data = train_df
19
- self.test_data = test_df
20
- self._build_calls = []
21
-
22
- def _build_geo_tokens(self, _params=None):
23
- self._build_calls.append(
24
- (self.train_data.copy(deep=True), self.test_data.copy(deep=True))
25
- )
26
- return self.train_data.copy(deep=True), self.test_data.copy(deep=True), ["geo_token"], None
27
-
28
-
29
- def test_geo_token_split_uses_fold_and_restores_context():
30
- train = pd.DataFrame({"geo": ["a", "b", "c", "d"], "x": [1, 2, 3, 4]})
31
- test = pd.DataFrame({"geo": ["e"], "x": [5]})
32
- ctx = DummyCtx(train, test)
33
- trainer = FTTrainer(ctx)
34
-
35
- X_train = train.iloc[[0, 1]]
36
- X_val = train.iloc[[2, 3]]
37
-
38
- orig_train = ctx.train_data
39
- orig_test = ctx.test_data
40
-
41
- result = trainer._build_geo_tokens_for_split(X_train, X_val, geo_params={"k": 1})
42
-
43
- assert ctx.train_data is orig_train
44
- assert ctx.test_data is orig_test
45
- assert result is not None
46
-
47
- train_snapshot, test_snapshot = ctx._build_calls[0]
48
- assert train_snapshot.equals(train.loc[X_train.index])
49
- assert test_snapshot.equals(train.loc[X_val.index])
@@ -1,33 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- import pytest
4
-
5
- pytest.importorskip("torch")
6
- pytest.importorskip("sklearn")
7
-
8
- from ins_pricing.bayesopt.models import GraphNeuralNetSklearn
9
-
10
-
11
- def test_graph_cache_meta_invalidation(tmp_path):
12
- X = pd.DataFrame({"a": [0.1, 0.2, 0.3], "b": [1.0, 2.0, 3.0]})
13
- cache_path = tmp_path / "gnn_cache.pt"
14
-
15
- model = GraphNeuralNetSklearn(
16
- model_nme="demo",
17
- input_dim=2,
18
- k_neighbors=1,
19
- epochs=1,
20
- use_approx_knn=False,
21
- graph_cache_path=str(cache_path),
22
- )
23
-
24
- adj = model._build_graph_from_df(X)
25
- assert adj is not None
26
- assert cache_path.exists()
27
-
28
- cached = model._load_cached_adj(X)
29
- assert cached is not None
30
-
31
- X_changed = X.copy()
32
- X_changed.iloc[0, 0] += 1.0
33
- assert model._load_cached_adj(X_changed) is None
@@ -1,150 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- import pytest
4
-
5
- pytest.importorskip("matplotlib")
6
- pytest.importorskip("sklearn")
7
-
8
-
9
- def _configure_matplotlib(tmp_path, monkeypatch):
10
- monkeypatch.setenv("MPLBACKEND", "Agg")
11
- mpl_cfg = tmp_path / ".mplconfig"
12
- cache_dir = tmp_path / ".cache"
13
- (cache_dir / "fontconfig").mkdir(parents=True, exist_ok=True)
14
- mpl_cfg.mkdir(parents=True, exist_ok=True)
15
- monkeypatch.setenv("MPLCONFIGDIR", str(mpl_cfg))
16
- monkeypatch.setenv("XDG_CACHE_HOME", str(cache_dir))
17
-
18
-
19
- def test_plotting_library_outputs(tmp_path, monkeypatch):
20
- _configure_matplotlib(tmp_path, monkeypatch)
21
-
22
- from ins_pricing.plotting import curves, diagnostics, geo, importance
23
-
24
- rng = np.random.default_rng(42)
25
- n = 80
26
- pred1 = rng.normal(loc=0.2, scale=1.0, size=n)
27
- pred2 = rng.normal(loc=0.1, scale=1.0, size=n)
28
- actual = np.abs(rng.normal(loc=1.0, scale=0.5, size=n))
29
- weight = rng.uniform(0.5, 2.0, size=n)
30
-
31
- curves.plot_lift_curve(
32
- pred1,
33
- actual * weight,
34
- weight,
35
- n_bins=8,
36
- save_path=str(tmp_path / "lift.png"),
37
- )
38
- curves.plot_double_lift_curve(
39
- pred1,
40
- pred2,
41
- actual * weight,
42
- weight,
43
- n_bins=8,
44
- save_path=str(tmp_path / "dlift.png"),
45
- )
46
-
47
- y_true = rng.integers(0, 2, size=n)
48
- curves.plot_roc_curves(
49
- y_true,
50
- {"m1": pred1, "m2": pred2},
51
- save_path=str(tmp_path / "roc.png"),
52
- )
53
-
54
- importance.plot_feature_importance(
55
- {"x1": 0.3, "x2": 0.1, "x3": 0.05},
56
- save_path=str(tmp_path / "importance.png"),
57
- )
58
-
59
- diagnostics.plot_loss_curve(
60
- history={"train": [1.0, 0.7, 0.5], "val": [1.2, 0.8, 0.6]},
61
- save_path=str(tmp_path / "loss.png"),
62
- )
63
- diagnostics.plot_oneway(
64
- pd.DataFrame(
65
- {
66
- "x1": rng.normal(size=n),
67
- "w_act": actual * weight,
68
- "w": weight,
69
- }
70
- ),
71
- feature="x1",
72
- weight_col="w",
73
- target_col="w_act",
74
- n_bins=6,
75
- save_path=str(tmp_path / "oneway.png"),
76
- )
77
-
78
- df_geo = pd.DataFrame(
79
- {
80
- "lon": rng.uniform(100, 120, size=n),
81
- "lat": rng.uniform(20, 40, size=n),
82
- "loss": actual,
83
- }
84
- )
85
- geo.plot_geo_heatmap(
86
- df_geo,
87
- x_col="lon",
88
- y_col="lat",
89
- value_col="loss",
90
- bins=10,
91
- save_path=str(tmp_path / "geo_heat.png"),
92
- )
93
- geo.plot_geo_contour(
94
- df_geo,
95
- x_col="lon",
96
- y_col="lat",
97
- value_col="loss",
98
- max_points=40,
99
- levels=6,
100
- save_path=str(tmp_path / "geo_contour.png"),
101
- )
102
-
103
- assert (tmp_path / "lift.png").exists()
104
- assert (tmp_path / "dlift.png").exists()
105
- assert (tmp_path / "roc.png").exists()
106
- assert (tmp_path / "importance.png").exists()
107
- assert (tmp_path / "loss.png").exists()
108
- assert (tmp_path / "oneway.png").exists()
109
- assert (tmp_path / "geo_heat.png").exists()
110
- assert (tmp_path / "geo_contour.png").exists()
111
-
112
-
113
- def test_geo_plotting_on_map_optional(tmp_path, monkeypatch):
114
- _configure_matplotlib(tmp_path, monkeypatch)
115
- pytest.importorskip("contextily")
116
-
117
- from ins_pricing.plotting import geo
118
-
119
- rng = np.random.default_rng(7)
120
- n = 60
121
- df_geo = pd.DataFrame(
122
- {
123
- "lon": rng.uniform(105, 115, size=n),
124
- "lat": rng.uniform(25, 35, size=n),
125
- "loss": np.abs(rng.normal(loc=1.0, scale=0.4, size=n)),
126
- }
127
- )
128
-
129
- geo.plot_geo_heatmap_on_map(
130
- df_geo,
131
- lon_col="lon",
132
- lat_col="lat",
133
- value_col="loss",
134
- bins=12,
135
- basemap=None,
136
- save_path=str(tmp_path / "geo_heat_map.png"),
137
- )
138
- geo.plot_geo_contour_on_map(
139
- df_geo,
140
- lon_col="lon",
141
- lat_col="lat",
142
- value_col="loss",
143
- max_points=30,
144
- levels=5,
145
- basemap=None,
146
- save_path=str(tmp_path / "geo_contour_map.png"),
147
- )
148
-
149
- assert (tmp_path / "geo_heat_map.png").exists()
150
- assert (tmp_path / "geo_contour_map.png").exists()
@@ -1,48 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- import pytest
4
-
5
- from ins_pricing.bayesopt.config_preprocess import BayesOptConfig, DatasetPreprocessor
6
-
7
-
8
- def _build_config(binary_resp: bool = False) -> BayesOptConfig:
9
- return BayesOptConfig(
10
- model_nme="demo",
11
- resp_nme="y",
12
- weight_nme="w",
13
- factor_nmes=["x1"],
14
- task_type="regression",
15
- binary_resp_nme="y_bin" if binary_resp else None,
16
- )
17
-
18
-
19
- def test_preprocessor_fills_missing_test_labels():
20
- train = pd.DataFrame(
21
- {
22
- "x1": [1.0, 2.0, 3.0],
23
- "y": [10.0, 20.0, 30.0],
24
- "w": [1.0, 2.0, 3.0],
25
- "y_bin": [0, 1, 0],
26
- }
27
- )
28
- test = pd.DataFrame({"x1": [4.0, 5.0]})
29
-
30
- cfg = _build_config(binary_resp=True)
31
- result = DatasetPreprocessor(train, test, cfg).run()
32
-
33
- assert "w_act" in result.train_data.columns
34
- assert "w_act" not in result.test_data.columns
35
- assert "w_binary_act" in result.train_data.columns
36
- assert "w_binary_act" not in result.test_data.columns
37
- assert result.test_data["w"].eq(1.0).all()
38
- assert result.test_data["y"].isna().all()
39
- assert result.test_data["y_bin"].isna().all()
40
-
41
-
42
- def test_preprocessor_missing_train_columns_raises():
43
- train = pd.DataFrame({"x1": [1.0]})
44
- test = pd.DataFrame({"x1": [2.0]})
45
-
46
- cfg = _build_config(binary_resp=False)
47
- with pytest.raises(KeyError):
48
- DatasetPreprocessor(train, test, cfg).run()