ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +9 -6
- ins_pricing/__init__.py +3 -11
- ins_pricing/cli/BayesOpt_entry.py +24 -0
- ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
- ins_pricing/cli/Explain_Run.py +25 -0
- ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
- ins_pricing/cli/Pricing_Run.py +25 -0
- ins_pricing/cli/__init__.py +1 -0
- ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
- ins_pricing/cli/utils/__init__.py +1 -0
- ins_pricing/cli/utils/cli_common.py +320 -0
- ins_pricing/cli/utils/cli_config.py +375 -0
- ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
- {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
- ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
- ins_pricing/docs/modelling/README.md +34 -0
- ins_pricing/modelling/__init__.py +57 -6
- ins_pricing/modelling/core/__init__.py +1 -0
- ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
- ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
- ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
- ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
- ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
- ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
- ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
- ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
- ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
- ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
- ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
- ins_pricing/modelling/core/evaluation.py +115 -0
- ins_pricing/production/__init__.py +4 -0
- ins_pricing/production/preprocess.py +71 -0
- ins_pricing/setup.py +10 -5
- {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
- ins_pricing-0.2.0.dist-info/RECORD +125 -0
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
- ins_pricing/modelling/BayesOpt_entry.py +0 -633
- ins_pricing/modelling/Explain_Run.py +0 -36
- ins_pricing/modelling/Pricing_Run.py +0 -36
- ins_pricing/modelling/README.md +0 -33
- ins_pricing/modelling/bayesopt/models.py +0 -2196
- ins_pricing/modelling/bayesopt/trainers.py +0 -2446
- ins_pricing/modelling/cli_common.py +0 -136
- ins_pricing/modelling/tests/test_plotting.py +0 -63
- ins_pricing/modelling/watchdog_run.py +0 -211
- ins_pricing-0.1.11.dist-info/RECORD +0 -169
- ins_pricing_gemini/__init__.py +0 -23
- ins_pricing_gemini/governance/__init__.py +0 -20
- ins_pricing_gemini/governance/approval.py +0 -93
- ins_pricing_gemini/governance/audit.py +0 -37
- ins_pricing_gemini/governance/registry.py +0 -99
- ins_pricing_gemini/governance/release.py +0 -159
- ins_pricing_gemini/modelling/Explain_Run.py +0 -36
- ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
- ins_pricing_gemini/modelling/__init__.py +0 -151
- ins_pricing_gemini/modelling/cli_common.py +0 -141
- ins_pricing_gemini/modelling/config.py +0 -249
- ins_pricing_gemini/modelling/config_preprocess.py +0 -254
- ins_pricing_gemini/modelling/core.py +0 -741
- ins_pricing_gemini/modelling/data_container.py +0 -42
- ins_pricing_gemini/modelling/explain/__init__.py +0 -55
- ins_pricing_gemini/modelling/explain/gradients.py +0 -334
- ins_pricing_gemini/modelling/explain/metrics.py +0 -176
- ins_pricing_gemini/modelling/explain/permutation.py +0 -155
- ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
- ins_pricing_gemini/modelling/features.py +0 -215
- ins_pricing_gemini/modelling/model_manager.py +0 -148
- ins_pricing_gemini/modelling/model_plotting.py +0 -463
- ins_pricing_gemini/modelling/models.py +0 -2203
- ins_pricing_gemini/modelling/notebook_utils.py +0 -294
- ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
- ins_pricing_gemini/modelling/plotting/common.py +0 -63
- ins_pricing_gemini/modelling/plotting/curves.py +0 -572
- ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
- ins_pricing_gemini/modelling/plotting/geo.py +0 -362
- ins_pricing_gemini/modelling/plotting/importance.py +0 -121
- ins_pricing_gemini/modelling/run_logging.py +0 -133
- ins_pricing_gemini/modelling/tests/conftest.py +0 -8
- ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
- ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
- ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
- ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
- ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
- ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
- ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
- ins_pricing_gemini/modelling/trainers.py +0 -2447
- ins_pricing_gemini/modelling/utils.py +0 -1020
- ins_pricing_gemini/pricing/__init__.py +0 -27
- ins_pricing_gemini/pricing/calibration.py +0 -39
- ins_pricing_gemini/pricing/data_quality.py +0 -117
- ins_pricing_gemini/pricing/exposure.py +0 -85
- ins_pricing_gemini/pricing/factors.py +0 -91
- ins_pricing_gemini/pricing/monitoring.py +0 -99
- ins_pricing_gemini/pricing/rate_table.py +0 -78
- ins_pricing_gemini/production/__init__.py +0 -21
- ins_pricing_gemini/production/drift.py +0 -30
- ins_pricing_gemini/production/monitoring.py +0 -143
- ins_pricing_gemini/production/scoring.py +0 -40
- ins_pricing_gemini/reporting/__init__.py +0 -11
- ins_pricing_gemini/reporting/report_builder.py +0 -72
- ins_pricing_gemini/reporting/scheduler.py +0 -45
- ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
- ins_pricing_gemini/scripts/Explain_entry.py +0 -545
- ins_pricing_gemini/scripts/__init__.py +0 -1
- ins_pricing_gemini/scripts/train.py +0 -568
- ins_pricing_gemini/setup.py +0 -55
- ins_pricing_gemini/smoke_test.py +0 -28
- /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
- /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
- /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def test_permutation_importance_simple():
|
|
7
|
-
from ins_pricing.explain import permutation_importance
|
|
8
|
-
|
|
9
|
-
rng = np.random.default_rng(0)
|
|
10
|
-
X = pd.DataFrame(
|
|
11
|
-
{
|
|
12
|
-
"x1": rng.normal(size=200),
|
|
13
|
-
"x2": rng.normal(size=200),
|
|
14
|
-
}
|
|
15
|
-
)
|
|
16
|
-
y = 3.0 * X["x1"].to_numpy() + rng.normal(scale=0.1, size=200)
|
|
17
|
-
|
|
18
|
-
def predict_fn(df):
|
|
19
|
-
return 3.0 * df["x1"].to_numpy()
|
|
20
|
-
|
|
21
|
-
imp = permutation_importance(
|
|
22
|
-
predict_fn,
|
|
23
|
-
X,
|
|
24
|
-
y,
|
|
25
|
-
metric="rmse",
|
|
26
|
-
n_repeats=3,
|
|
27
|
-
max_rows=None,
|
|
28
|
-
random_state=0,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
assert imp.loc[0, "feature"] == "x1"
|
|
32
|
-
assert imp["importance_mean"].iloc[0] > imp["importance_mean"].iloc[1]
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def test_integrated_gradients_linear():
|
|
36
|
-
torch = pytest.importorskip("torch")
|
|
37
|
-
from ins_pricing.explain import integrated_gradients_torch
|
|
38
|
-
|
|
39
|
-
torch.manual_seed(0)
|
|
40
|
-
model = torch.nn.Linear(3, 1, bias=False)
|
|
41
|
-
with torch.no_grad():
|
|
42
|
-
model.weight[:] = torch.tensor([[1.0, 2.0, -1.0]])
|
|
43
|
-
|
|
44
|
-
X = torch.tensor(
|
|
45
|
-
[[1.0, 2.0, 3.0], [0.5, -1.0, 0.0]],
|
|
46
|
-
dtype=torch.float32,
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
def forward(x):
|
|
50
|
-
return model(x).squeeze(1)
|
|
51
|
-
|
|
52
|
-
attrs = integrated_gradients_torch(forward, X, baseline="zeros", steps=10)
|
|
53
|
-
expected = X.numpy() * np.array([1.0, 2.0, -1.0])
|
|
54
|
-
|
|
55
|
-
assert attrs.shape == X.shape
|
|
56
|
-
np.testing.assert_allclose(attrs, expected, rtol=1e-2, atol=1e-2)
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
import types
|
|
2
|
-
|
|
3
|
-
import pandas as pd
|
|
4
|
-
import pytest
|
|
5
|
-
|
|
6
|
-
pytest.importorskip("torch")
|
|
7
|
-
pytest.importorskip("optuna")
|
|
8
|
-
pytest.importorskip("xgboost")
|
|
9
|
-
pytest.importorskip("statsmodels")
|
|
10
|
-
|
|
11
|
-
from ins_pricing.bayesopt.trainers import FTTrainer
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class DummyCtx:
|
|
15
|
-
def __init__(self, train_df: pd.DataFrame, test_df: pd.DataFrame):
|
|
16
|
-
self.task_type = "regression"
|
|
17
|
-
self.config = types.SimpleNamespace(use_ft_ddp=False, geo_feature_nmes=["geo"])
|
|
18
|
-
self.train_data = train_df
|
|
19
|
-
self.test_data = test_df
|
|
20
|
-
self._build_calls = []
|
|
21
|
-
|
|
22
|
-
def _build_geo_tokens(self, _params=None):
|
|
23
|
-
self._build_calls.append(
|
|
24
|
-
(self.train_data.copy(deep=True), self.test_data.copy(deep=True))
|
|
25
|
-
)
|
|
26
|
-
return self.train_data.copy(deep=True), self.test_data.copy(deep=True), ["geo_token"], None
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def test_geo_token_split_uses_fold_and_restores_context():
|
|
30
|
-
train = pd.DataFrame({"geo": ["a", "b", "c", "d"], "x": [1, 2, 3, 4]})
|
|
31
|
-
test = pd.DataFrame({"geo": ["e"], "x": [5]})
|
|
32
|
-
ctx = DummyCtx(train, test)
|
|
33
|
-
trainer = FTTrainer(ctx)
|
|
34
|
-
|
|
35
|
-
X_train = train.iloc[[0, 1]]
|
|
36
|
-
X_val = train.iloc[[2, 3]]
|
|
37
|
-
|
|
38
|
-
orig_train = ctx.train_data
|
|
39
|
-
orig_test = ctx.test_data
|
|
40
|
-
|
|
41
|
-
result = trainer._build_geo_tokens_for_split(X_train, X_val, geo_params={"k": 1})
|
|
42
|
-
|
|
43
|
-
assert ctx.train_data is orig_train
|
|
44
|
-
assert ctx.test_data is orig_test
|
|
45
|
-
assert result is not None
|
|
46
|
-
|
|
47
|
-
train_snapshot, test_snapshot = ctx._build_calls[0]
|
|
48
|
-
assert train_snapshot.equals(train.loc[X_train.index])
|
|
49
|
-
assert test_snapshot.equals(train.loc[X_val.index])
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
pytest.importorskip("torch")
|
|
6
|
-
pytest.importorskip("sklearn")
|
|
7
|
-
|
|
8
|
-
from ins_pricing.bayesopt.models import GraphNeuralNetSklearn
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def test_graph_cache_meta_invalidation(tmp_path):
|
|
12
|
-
X = pd.DataFrame({"a": [0.1, 0.2, 0.3], "b": [1.0, 2.0, 3.0]})
|
|
13
|
-
cache_path = tmp_path / "gnn_cache.pt"
|
|
14
|
-
|
|
15
|
-
model = GraphNeuralNetSklearn(
|
|
16
|
-
model_nme="demo",
|
|
17
|
-
input_dim=2,
|
|
18
|
-
k_neighbors=1,
|
|
19
|
-
epochs=1,
|
|
20
|
-
use_approx_knn=False,
|
|
21
|
-
graph_cache_path=str(cache_path),
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
adj = model._build_graph_from_df(X)
|
|
25
|
-
assert adj is not None
|
|
26
|
-
assert cache_path.exists()
|
|
27
|
-
|
|
28
|
-
cached = model._load_cached_adj(X)
|
|
29
|
-
assert cached is not None
|
|
30
|
-
|
|
31
|
-
X_changed = X.copy()
|
|
32
|
-
X_changed.iloc[0, 0] += 1.0
|
|
33
|
-
assert model._load_cached_adj(X_changed) is None
|
|
@@ -1,150 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
pytest.importorskip("matplotlib")
|
|
6
|
-
pytest.importorskip("sklearn")
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def _configure_matplotlib(tmp_path, monkeypatch):
|
|
10
|
-
monkeypatch.setenv("MPLBACKEND", "Agg")
|
|
11
|
-
mpl_cfg = tmp_path / ".mplconfig"
|
|
12
|
-
cache_dir = tmp_path / ".cache"
|
|
13
|
-
(cache_dir / "fontconfig").mkdir(parents=True, exist_ok=True)
|
|
14
|
-
mpl_cfg.mkdir(parents=True, exist_ok=True)
|
|
15
|
-
monkeypatch.setenv("MPLCONFIGDIR", str(mpl_cfg))
|
|
16
|
-
monkeypatch.setenv("XDG_CACHE_HOME", str(cache_dir))
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def test_plotting_library_outputs(tmp_path, monkeypatch):
|
|
20
|
-
_configure_matplotlib(tmp_path, monkeypatch)
|
|
21
|
-
|
|
22
|
-
from ins_pricing.plotting import curves, diagnostics, geo, importance
|
|
23
|
-
|
|
24
|
-
rng = np.random.default_rng(42)
|
|
25
|
-
n = 80
|
|
26
|
-
pred1 = rng.normal(loc=0.2, scale=1.0, size=n)
|
|
27
|
-
pred2 = rng.normal(loc=0.1, scale=1.0, size=n)
|
|
28
|
-
actual = np.abs(rng.normal(loc=1.0, scale=0.5, size=n))
|
|
29
|
-
weight = rng.uniform(0.5, 2.0, size=n)
|
|
30
|
-
|
|
31
|
-
curves.plot_lift_curve(
|
|
32
|
-
pred1,
|
|
33
|
-
actual * weight,
|
|
34
|
-
weight,
|
|
35
|
-
n_bins=8,
|
|
36
|
-
save_path=str(tmp_path / "lift.png"),
|
|
37
|
-
)
|
|
38
|
-
curves.plot_double_lift_curve(
|
|
39
|
-
pred1,
|
|
40
|
-
pred2,
|
|
41
|
-
actual * weight,
|
|
42
|
-
weight,
|
|
43
|
-
n_bins=8,
|
|
44
|
-
save_path=str(tmp_path / "dlift.png"),
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
y_true = rng.integers(0, 2, size=n)
|
|
48
|
-
curves.plot_roc_curves(
|
|
49
|
-
y_true,
|
|
50
|
-
{"m1": pred1, "m2": pred2},
|
|
51
|
-
save_path=str(tmp_path / "roc.png"),
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
importance.plot_feature_importance(
|
|
55
|
-
{"x1": 0.3, "x2": 0.1, "x3": 0.05},
|
|
56
|
-
save_path=str(tmp_path / "importance.png"),
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
diagnostics.plot_loss_curve(
|
|
60
|
-
history={"train": [1.0, 0.7, 0.5], "val": [1.2, 0.8, 0.6]},
|
|
61
|
-
save_path=str(tmp_path / "loss.png"),
|
|
62
|
-
)
|
|
63
|
-
diagnostics.plot_oneway(
|
|
64
|
-
pd.DataFrame(
|
|
65
|
-
{
|
|
66
|
-
"x1": rng.normal(size=n),
|
|
67
|
-
"w_act": actual * weight,
|
|
68
|
-
"w": weight,
|
|
69
|
-
}
|
|
70
|
-
),
|
|
71
|
-
feature="x1",
|
|
72
|
-
weight_col="w",
|
|
73
|
-
target_col="w_act",
|
|
74
|
-
n_bins=6,
|
|
75
|
-
save_path=str(tmp_path / "oneway.png"),
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
df_geo = pd.DataFrame(
|
|
79
|
-
{
|
|
80
|
-
"lon": rng.uniform(100, 120, size=n),
|
|
81
|
-
"lat": rng.uniform(20, 40, size=n),
|
|
82
|
-
"loss": actual,
|
|
83
|
-
}
|
|
84
|
-
)
|
|
85
|
-
geo.plot_geo_heatmap(
|
|
86
|
-
df_geo,
|
|
87
|
-
x_col="lon",
|
|
88
|
-
y_col="lat",
|
|
89
|
-
value_col="loss",
|
|
90
|
-
bins=10,
|
|
91
|
-
save_path=str(tmp_path / "geo_heat.png"),
|
|
92
|
-
)
|
|
93
|
-
geo.plot_geo_contour(
|
|
94
|
-
df_geo,
|
|
95
|
-
x_col="lon",
|
|
96
|
-
y_col="lat",
|
|
97
|
-
value_col="loss",
|
|
98
|
-
max_points=40,
|
|
99
|
-
levels=6,
|
|
100
|
-
save_path=str(tmp_path / "geo_contour.png"),
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
assert (tmp_path / "lift.png").exists()
|
|
104
|
-
assert (tmp_path / "dlift.png").exists()
|
|
105
|
-
assert (tmp_path / "roc.png").exists()
|
|
106
|
-
assert (tmp_path / "importance.png").exists()
|
|
107
|
-
assert (tmp_path / "loss.png").exists()
|
|
108
|
-
assert (tmp_path / "oneway.png").exists()
|
|
109
|
-
assert (tmp_path / "geo_heat.png").exists()
|
|
110
|
-
assert (tmp_path / "geo_contour.png").exists()
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def test_geo_plotting_on_map_optional(tmp_path, monkeypatch):
|
|
114
|
-
_configure_matplotlib(tmp_path, monkeypatch)
|
|
115
|
-
pytest.importorskip("contextily")
|
|
116
|
-
|
|
117
|
-
from ins_pricing.plotting import geo
|
|
118
|
-
|
|
119
|
-
rng = np.random.default_rng(7)
|
|
120
|
-
n = 60
|
|
121
|
-
df_geo = pd.DataFrame(
|
|
122
|
-
{
|
|
123
|
-
"lon": rng.uniform(105, 115, size=n),
|
|
124
|
-
"lat": rng.uniform(25, 35, size=n),
|
|
125
|
-
"loss": np.abs(rng.normal(loc=1.0, scale=0.4, size=n)),
|
|
126
|
-
}
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
geo.plot_geo_heatmap_on_map(
|
|
130
|
-
df_geo,
|
|
131
|
-
lon_col="lon",
|
|
132
|
-
lat_col="lat",
|
|
133
|
-
value_col="loss",
|
|
134
|
-
bins=12,
|
|
135
|
-
basemap=None,
|
|
136
|
-
save_path=str(tmp_path / "geo_heat_map.png"),
|
|
137
|
-
)
|
|
138
|
-
geo.plot_geo_contour_on_map(
|
|
139
|
-
df_geo,
|
|
140
|
-
lon_col="lon",
|
|
141
|
-
lat_col="lat",
|
|
142
|
-
value_col="loss",
|
|
143
|
-
max_points=30,
|
|
144
|
-
levels=5,
|
|
145
|
-
basemap=None,
|
|
146
|
-
save_path=str(tmp_path / "geo_contour_map.png"),
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
assert (tmp_path / "geo_heat_map.png").exists()
|
|
150
|
-
assert (tmp_path / "geo_contour_map.png").exists()
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
from ins_pricing.bayesopt.config_preprocess import BayesOptConfig, DatasetPreprocessor
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def _build_config(binary_resp: bool = False) -> BayesOptConfig:
|
|
9
|
-
return BayesOptConfig(
|
|
10
|
-
model_nme="demo",
|
|
11
|
-
resp_nme="y",
|
|
12
|
-
weight_nme="w",
|
|
13
|
-
factor_nmes=["x1"],
|
|
14
|
-
task_type="regression",
|
|
15
|
-
binary_resp_nme="y_bin" if binary_resp else None,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def test_preprocessor_fills_missing_test_labels():
|
|
20
|
-
train = pd.DataFrame(
|
|
21
|
-
{
|
|
22
|
-
"x1": [1.0, 2.0, 3.0],
|
|
23
|
-
"y": [10.0, 20.0, 30.0],
|
|
24
|
-
"w": [1.0, 2.0, 3.0],
|
|
25
|
-
"y_bin": [0, 1, 0],
|
|
26
|
-
}
|
|
27
|
-
)
|
|
28
|
-
test = pd.DataFrame({"x1": [4.0, 5.0]})
|
|
29
|
-
|
|
30
|
-
cfg = _build_config(binary_resp=True)
|
|
31
|
-
result = DatasetPreprocessor(train, test, cfg).run()
|
|
32
|
-
|
|
33
|
-
assert "w_act" in result.train_data.columns
|
|
34
|
-
assert "w_act" not in result.test_data.columns
|
|
35
|
-
assert "w_binary_act" in result.train_data.columns
|
|
36
|
-
assert "w_binary_act" not in result.test_data.columns
|
|
37
|
-
assert result.test_data["w"].eq(1.0).all()
|
|
38
|
-
assert result.test_data["y"].isna().all()
|
|
39
|
-
assert result.test_data["y_bin"].isna().all()
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def test_preprocessor_missing_train_columns_raises():
|
|
43
|
-
train = pd.DataFrame({"x1": [1.0]})
|
|
44
|
-
test = pd.DataFrame({"x1": [2.0]})
|
|
45
|
-
|
|
46
|
-
cfg = _build_config(binary_resp=False)
|
|
47
|
-
with pytest.raises(KeyError):
|
|
48
|
-
DatasetPreprocessor(train, test, cfg).run()
|