ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +9 -6
- ins_pricing/__init__.py +3 -11
- ins_pricing/cli/BayesOpt_entry.py +24 -0
- ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
- ins_pricing/cli/Explain_Run.py +25 -0
- ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
- ins_pricing/cli/Pricing_Run.py +25 -0
- ins_pricing/cli/__init__.py +1 -0
- ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
- ins_pricing/cli/utils/__init__.py +1 -0
- ins_pricing/cli/utils/cli_common.py +320 -0
- ins_pricing/cli/utils/cli_config.py +375 -0
- ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
- {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
- ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
- ins_pricing/docs/modelling/README.md +34 -0
- ins_pricing/modelling/__init__.py +57 -6
- ins_pricing/modelling/core/__init__.py +1 -0
- ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
- ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
- ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
- ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
- ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
- ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
- ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
- ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
- ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
- ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
- ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
- ins_pricing/modelling/core/evaluation.py +115 -0
- ins_pricing/production/__init__.py +4 -0
- ins_pricing/production/preprocess.py +71 -0
- ins_pricing/setup.py +10 -5
- {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
- ins_pricing-0.2.0.dist-info/RECORD +125 -0
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
- ins_pricing/modelling/BayesOpt_entry.py +0 -633
- ins_pricing/modelling/Explain_Run.py +0 -36
- ins_pricing/modelling/Pricing_Run.py +0 -36
- ins_pricing/modelling/README.md +0 -33
- ins_pricing/modelling/bayesopt/models.py +0 -2196
- ins_pricing/modelling/bayesopt/trainers.py +0 -2446
- ins_pricing/modelling/cli_common.py +0 -136
- ins_pricing/modelling/tests/test_plotting.py +0 -63
- ins_pricing/modelling/watchdog_run.py +0 -211
- ins_pricing-0.1.11.dist-info/RECORD +0 -169
- ins_pricing_gemini/__init__.py +0 -23
- ins_pricing_gemini/governance/__init__.py +0 -20
- ins_pricing_gemini/governance/approval.py +0 -93
- ins_pricing_gemini/governance/audit.py +0 -37
- ins_pricing_gemini/governance/registry.py +0 -99
- ins_pricing_gemini/governance/release.py +0 -159
- ins_pricing_gemini/modelling/Explain_Run.py +0 -36
- ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
- ins_pricing_gemini/modelling/__init__.py +0 -151
- ins_pricing_gemini/modelling/cli_common.py +0 -141
- ins_pricing_gemini/modelling/config.py +0 -249
- ins_pricing_gemini/modelling/config_preprocess.py +0 -254
- ins_pricing_gemini/modelling/core.py +0 -741
- ins_pricing_gemini/modelling/data_container.py +0 -42
- ins_pricing_gemini/modelling/explain/__init__.py +0 -55
- ins_pricing_gemini/modelling/explain/gradients.py +0 -334
- ins_pricing_gemini/modelling/explain/metrics.py +0 -176
- ins_pricing_gemini/modelling/explain/permutation.py +0 -155
- ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
- ins_pricing_gemini/modelling/features.py +0 -215
- ins_pricing_gemini/modelling/model_manager.py +0 -148
- ins_pricing_gemini/modelling/model_plotting.py +0 -463
- ins_pricing_gemini/modelling/models.py +0 -2203
- ins_pricing_gemini/modelling/notebook_utils.py +0 -294
- ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
- ins_pricing_gemini/modelling/plotting/common.py +0 -63
- ins_pricing_gemini/modelling/plotting/curves.py +0 -572
- ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
- ins_pricing_gemini/modelling/plotting/geo.py +0 -362
- ins_pricing_gemini/modelling/plotting/importance.py +0 -121
- ins_pricing_gemini/modelling/run_logging.py +0 -133
- ins_pricing_gemini/modelling/tests/conftest.py +0 -8
- ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
- ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
- ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
- ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
- ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
- ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
- ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
- ins_pricing_gemini/modelling/trainers.py +0 -2447
- ins_pricing_gemini/modelling/utils.py +0 -1020
- ins_pricing_gemini/pricing/__init__.py +0 -27
- ins_pricing_gemini/pricing/calibration.py +0 -39
- ins_pricing_gemini/pricing/data_quality.py +0 -117
- ins_pricing_gemini/pricing/exposure.py +0 -85
- ins_pricing_gemini/pricing/factors.py +0 -91
- ins_pricing_gemini/pricing/monitoring.py +0 -99
- ins_pricing_gemini/pricing/rate_table.py +0 -78
- ins_pricing_gemini/production/__init__.py +0 -21
- ins_pricing_gemini/production/drift.py +0 -30
- ins_pricing_gemini/production/monitoring.py +0 -143
- ins_pricing_gemini/production/scoring.py +0 -40
- ins_pricing_gemini/reporting/__init__.py +0 -11
- ins_pricing_gemini/reporting/report_builder.py +0 -72
- ins_pricing_gemini/reporting/scheduler.py +0 -45
- ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
- ins_pricing_gemini/scripts/Explain_entry.py +0 -545
- ins_pricing_gemini/scripts/__init__.py +0 -1
- ins_pricing_gemini/scripts/train.py +0 -568
- ins_pricing_gemini/setup.py +0 -55
- ins_pricing_gemini/smoke_test.py +0 -28
- /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
- /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
- /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
|
-
from dataclasses import dataclass
|
|
5
|
+
from dataclasses import dataclass, asdict
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import Any, Dict, List, Optional
|
|
@@ -117,6 +117,36 @@ class BayesOptConfig:
|
|
|
117
117
|
final_ensemble: bool = False
|
|
118
118
|
final_ensemble_k: int = 3
|
|
119
119
|
final_refit: bool = True
|
|
120
|
+
cv_strategy: str = "random"
|
|
121
|
+
cv_splits: Optional[int] = None
|
|
122
|
+
cv_group_col: Optional[str] = None
|
|
123
|
+
cv_time_col: Optional[str] = None
|
|
124
|
+
cv_time_ascending: bool = True
|
|
125
|
+
ft_oof_folds: Optional[int] = None
|
|
126
|
+
ft_oof_strategy: Optional[str] = None
|
|
127
|
+
ft_oof_shuffle: bool = True
|
|
128
|
+
save_preprocess: bool = False
|
|
129
|
+
preprocess_artifact_path: Optional[str] = None
|
|
130
|
+
plot_path_style: str = "nested"
|
|
131
|
+
bo_sample_limit: Optional[int] = None
|
|
132
|
+
cache_predictions: bool = False
|
|
133
|
+
prediction_cache_dir: Optional[str] = None
|
|
134
|
+
prediction_cache_format: str = "parquet"
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@dataclass
|
|
138
|
+
class PreprocessArtifacts:
|
|
139
|
+
factor_nmes: List[str]
|
|
140
|
+
cate_list: List[str]
|
|
141
|
+
num_features: List[str]
|
|
142
|
+
var_nmes: List[str]
|
|
143
|
+
cat_categories: Dict[str, List[Any]]
|
|
144
|
+
dummy_columns: List[str]
|
|
145
|
+
numeric_scalers: Dict[str, Dict[str, float]]
|
|
146
|
+
weight_nme: str
|
|
147
|
+
resp_nme: str
|
|
148
|
+
binary_resp_nme: Optional[str] = None
|
|
149
|
+
drop_first: bool = True
|
|
120
150
|
|
|
121
151
|
|
|
122
152
|
class OutputManager:
|
|
@@ -191,6 +221,7 @@ class DatasetPreprocessor:
|
|
|
191
221
|
self.test_oht_scl_data: Optional[pd.DataFrame] = None
|
|
192
222
|
self.var_nmes: List[str] = []
|
|
193
223
|
self.cat_categories_for_shap: Dict[str, List[Any]] = {}
|
|
224
|
+
self.numeric_scalers: Dict[str, Dict[str, float]] = {}
|
|
194
225
|
|
|
195
226
|
def run(self) -> "DatasetPreprocessor":
|
|
196
227
|
"""Run preprocessing: categorical encoding, target clipping, numeric scaling."""
|
|
@@ -291,6 +322,13 @@ class DatasetPreprocessor:
|
|
|
291
322
|
train_oht_scaled[num_chr].values.reshape(-1, 1))
|
|
292
323
|
test_oht_scaled[num_chr] = scaler.transform(
|
|
293
324
|
test_oht_scaled[num_chr].values.reshape(-1, 1))
|
|
325
|
+
scale_val = float(getattr(scaler, "scale_", [1.0])[0])
|
|
326
|
+
if scale_val == 0.0:
|
|
327
|
+
scale_val = 1.0
|
|
328
|
+
self.numeric_scalers[num_chr] = {
|
|
329
|
+
"mean": float(getattr(scaler, "mean_", [0.0])[0]),
|
|
330
|
+
"scale": scale_val,
|
|
331
|
+
}
|
|
294
332
|
# Fill missing dummy columns when reindexing to align train/test columns.
|
|
295
333
|
test_oht_scaled = test_oht_scaled.reindex(
|
|
296
334
|
columns=train_oht_scaled.columns, fill_value=0)
|
|
@@ -301,3 +339,28 @@ class DatasetPreprocessor:
|
|
|
301
339
|
col for col in train_oht_scaled.columns if col not in excluded
|
|
302
340
|
]
|
|
303
341
|
return self
|
|
342
|
+
|
|
343
|
+
def export_artifacts(self) -> PreprocessArtifacts:
|
|
344
|
+
dummy_columns: List[str] = []
|
|
345
|
+
if self.train_oht_data is not None:
|
|
346
|
+
dummy_columns = list(self.train_oht_data.columns)
|
|
347
|
+
return PreprocessArtifacts(
|
|
348
|
+
factor_nmes=list(self.config.factor_nmes),
|
|
349
|
+
cate_list=list(self.config.cate_list or []),
|
|
350
|
+
num_features=list(self.num_features),
|
|
351
|
+
var_nmes=list(self.var_nmes),
|
|
352
|
+
cat_categories=dict(self.cat_categories_for_shap),
|
|
353
|
+
dummy_columns=dummy_columns,
|
|
354
|
+
numeric_scalers=dict(self.numeric_scalers),
|
|
355
|
+
weight_nme=str(self.config.weight_nme),
|
|
356
|
+
resp_nme=str(self.config.resp_nme),
|
|
357
|
+
binary_resp_nme=self.config.binary_resp_nme,
|
|
358
|
+
drop_first=True,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
def save_artifacts(self, path: str | Path) -> str:
|
|
362
|
+
payload = self.export_artifacts()
|
|
363
|
+
target = Path(path)
|
|
364
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
365
|
+
target.write_text(json.dumps(asdict(payload), ensure_ascii=True, indent=2), encoding="utf-8")
|
|
366
|
+
return str(target)
|