ins-pricing 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/PKG-INFO +1 -1
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/core.py +1 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +57 -7
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +3 -1
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +8 -3
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/production/__init__.py +12 -0
- ins_pricing-0.2.2/ins_pricing/production/predict.py +518 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/setup.py +1 -1
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing.egg-info/PKG-INFO +1 -1
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing.egg-info/SOURCES.txt +1 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/pyproject.toml +1 -1
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/MANIFEST.in +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/README.md +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/README.md +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/BayesOpt_entry.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/BayesOpt_incremental.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/Explain_Run.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/Explain_entry.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/Pricing_Run.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/bayesopt_entry_runner.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/utils/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/utils/cli_common.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/utils/cli_config.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/utils/notebook_utils.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/utils/run_logging.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/cli/watchdog_run.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/docs/modelling/BayesOpt_USAGE.md +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/docs/modelling/README.md +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/README.md +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/approval.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/audit.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/registry.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/governance/release.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/BayesOpt.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/config_preprocess.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/model_gnn.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/model_resn.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/utils.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/evaluation.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/explain/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/explain/gradients.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/explain/metrics.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/explain/permutation.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/explain/shap_utils.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/common.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/curves.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/diagnostics.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/geo.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/plotting/importance.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/README.md +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/calibration.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/data_quality.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/exposure.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/factors.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/monitoring.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/pricing/rate_table.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/production/drift.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/production/monitoring.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/production/preprocess.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/production/scoring.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/reporting/README.md +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/reporting/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/reporting/report_builder.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/reporting/scheduler.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/conftest.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_cross_val_generic.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_distributed_utils.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_explain.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_geo_tokens_split.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_graph_cache.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_plotting.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_plotting_library.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_preprocessor.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing.egg-info/dependency_links.txt +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing.egg-info/requires.txt +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing.egg-info/top_level.txt +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/setup.cfg +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/BayesOpt.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/BayesOpt_entry.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/BayesOpt_incremental.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Pricing_Run.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt Legacy251213.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt Legacy251215.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt lagecy251201.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt lagecy251218.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt legacy.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptAll.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptAllPlatform.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptCPUGPU.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptSearch.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptSearchOrigin.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV1.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV10.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV11.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV12.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV2.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV3.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV4.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV5.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV6.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV7.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV8Codex.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV8Gemini.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOptV9.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/BayesOpt_entry legacy.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/ModelBayesOptSearch.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/ResNetBayesOptSearch.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/XgbBayesOptSearch.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/Try/xgbbayesopt.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/__init__.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/cli_common.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/notebook_utils.py +0 -0
- {ins_pricing-0.2.0 → ins_pricing-0.2.2}/user_packages legacy/watchdog_run.py +0 -0
|
@@ -258,6 +258,7 @@ class BayesOptModel(BayesOptPlottingMixin, BayesOptExplainMixin):
|
|
|
258
258
|
self.var_nmes = preprocessor.var_nmes
|
|
259
259
|
self.num_features = preprocessor.num_features
|
|
260
260
|
self.cat_categories_for_shap = preprocessor.cat_categories_for_shap
|
|
261
|
+
self.numeric_scalers = preprocessor.numeric_scalers
|
|
261
262
|
if getattr(self.config, "save_preprocess", False):
|
|
262
263
|
artifact_path = getattr(self.config, "preprocess_artifact_path", None)
|
|
263
264
|
if artifact_path:
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py
RENAMED
|
@@ -55,6 +55,24 @@ class TrainerBase:
|
|
|
55
55
|
self.enable_distributed_optuna: bool = False
|
|
56
56
|
self._distributed_forced_params: Optional[Dict[str, Any]] = None
|
|
57
57
|
|
|
58
|
+
def _export_preprocess_artifacts(self) -> Dict[str, Any]:
|
|
59
|
+
dummy_columns: List[str] = []
|
|
60
|
+
if getattr(self.ctx, "train_oht_data", None) is not None:
|
|
61
|
+
dummy_columns = list(self.ctx.train_oht_data.columns)
|
|
62
|
+
return {
|
|
63
|
+
"factor_nmes": list(getattr(self.ctx, "factor_nmes", []) or []),
|
|
64
|
+
"cate_list": list(getattr(self.ctx, "cate_list", []) or []),
|
|
65
|
+
"num_features": list(getattr(self.ctx, "num_features", []) or []),
|
|
66
|
+
"var_nmes": list(getattr(self.ctx, "var_nmes", []) or []),
|
|
67
|
+
"cat_categories": dict(getattr(self.ctx, "cat_categories_for_shap", {}) or {}),
|
|
68
|
+
"dummy_columns": dummy_columns,
|
|
69
|
+
"numeric_scalers": dict(getattr(self.ctx, "numeric_scalers", {}) or {}),
|
|
70
|
+
"weight_nme": str(getattr(self.ctx, "weight_nme", "")),
|
|
71
|
+
"resp_nme": str(getattr(self.ctx, "resp_nme", "")),
|
|
72
|
+
"binary_resp_nme": getattr(self.ctx, "binary_resp_nme", None),
|
|
73
|
+
"drop_first": True,
|
|
74
|
+
}
|
|
75
|
+
|
|
58
76
|
def _dist_barrier(self, reason: str) -> None:
|
|
59
77
|
"""DDP barrier wrapper used by distributed Optuna.
|
|
60
78
|
|
|
@@ -268,14 +286,38 @@ class TrainerBase:
|
|
|
268
286
|
|
|
269
287
|
path = self.output.model_path(self._get_model_filename())
|
|
270
288
|
if self.label in ['Xgboost', 'GLM']:
|
|
271
|
-
|
|
289
|
+
payload = {
|
|
290
|
+
"model": self.model,
|
|
291
|
+
"preprocess_artifacts": self._export_preprocess_artifacts(),
|
|
292
|
+
}
|
|
293
|
+
joblib.dump(payload, path)
|
|
272
294
|
else:
|
|
273
295
|
# PyTorch models can save state_dict or the full object.
|
|
274
296
|
# Legacy behavior: ResNetTrainer saves state_dict; FTTrainer saves full object.
|
|
297
|
+
payload = {
|
|
298
|
+
"preprocess_artifacts": self._export_preprocess_artifacts(),
|
|
299
|
+
}
|
|
275
300
|
if hasattr(self.model, 'resnet'): # ResNetSklearn model
|
|
276
|
-
|
|
301
|
+
# Unwrap and move to CPU
|
|
302
|
+
resnet = self.model.resnet
|
|
303
|
+
if hasattr(resnet, "module"):
|
|
304
|
+
resnet = resnet.module
|
|
305
|
+
resnet = resnet.to("cpu")
|
|
306
|
+
|
|
307
|
+
payload["state_dict"] = resnet.state_dict()
|
|
308
|
+
payload["best_params"] = dict(self.best_params or {})
|
|
277
309
|
else: # FTTransformerSklearn or other PyTorch model
|
|
278
|
-
|
|
310
|
+
model_to_save = self.model
|
|
311
|
+
if hasattr(model_to_save, "ft"):
|
|
312
|
+
ft = model_to_save.ft
|
|
313
|
+
if hasattr(ft, "module"):
|
|
314
|
+
ft = ft.module
|
|
315
|
+
model_to_save.ft = ft.to("cpu")
|
|
316
|
+
if hasattr(model_to_save, "device"):
|
|
317
|
+
model_to_save.device = torch.device("cpu")
|
|
318
|
+
|
|
319
|
+
payload["model"] = model_to_save
|
|
320
|
+
torch.save(payload, path)
|
|
279
321
|
|
|
280
322
|
def load(self) -> None:
|
|
281
323
|
path = self.output.model_path(self._get_model_filename())
|
|
@@ -284,7 +326,11 @@ class TrainerBase:
|
|
|
284
326
|
return
|
|
285
327
|
|
|
286
328
|
if self.label in ['Xgboost', 'GLM']:
|
|
287
|
-
|
|
329
|
+
loaded = joblib.load(path)
|
|
330
|
+
if isinstance(loaded, dict) and "model" in loaded:
|
|
331
|
+
self.model = loaded.get("model")
|
|
332
|
+
else:
|
|
333
|
+
self.model = loaded
|
|
288
334
|
else:
|
|
289
335
|
# PyTorch loading depends on the model structure.
|
|
290
336
|
if self.label == 'ResNet' or self.label == 'ResNetClassifier':
|
|
@@ -293,8 +339,13 @@ class TrainerBase:
|
|
|
293
339
|
else:
|
|
294
340
|
# FT-Transformer serializes the whole object; load then move to device.
|
|
295
341
|
loaded = torch.load(path, map_location='cpu')
|
|
296
|
-
|
|
297
|
-
|
|
342
|
+
if isinstance(loaded, dict) and "model" in loaded:
|
|
343
|
+
loaded_model = loaded.get("model")
|
|
344
|
+
else:
|
|
345
|
+
loaded_model = loaded
|
|
346
|
+
if loaded_model is not None:
|
|
347
|
+
self._move_to_device(loaded_model)
|
|
348
|
+
self.model = loaded_model
|
|
298
349
|
|
|
299
350
|
def _move_to_device(self, model_obj):
|
|
300
351
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
@@ -1017,4 +1068,3 @@ class TrainerBase:
|
|
|
1017
1068
|
predict_kwargs_test=predict_kwargs_test,
|
|
1018
1069
|
predict_fn=predict_fn)
|
|
1019
1070
|
|
|
1020
|
-
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py
RENAMED
|
@@ -282,10 +282,13 @@ class GNNTrainer(TrainerBase):
|
|
|
282
282
|
return
|
|
283
283
|
path = self.output.model_path(self._get_model_filename())
|
|
284
284
|
base_gnn = getattr(self.model, "_unwrap_gnn", lambda: None)()
|
|
285
|
+
if base_gnn is not None:
|
|
286
|
+
base_gnn = base_gnn.to("cpu")
|
|
285
287
|
state = None if base_gnn is None else base_gnn.state_dict()
|
|
286
288
|
payload = {
|
|
287
289
|
"best_params": self.best_params,
|
|
288
290
|
"state_dict": state,
|
|
291
|
+
"preprocess_artifacts": self._export_preprocess_artifacts(),
|
|
289
292
|
}
|
|
290
293
|
torch.save(payload, path)
|
|
291
294
|
|
|
@@ -309,4 +312,3 @@ class GNNTrainer(TrainerBase):
|
|
|
309
312
|
self.best_params = dict(params) if isinstance(params, dict) else None
|
|
310
313
|
self.ctx.gnn_best = self.model
|
|
311
314
|
|
|
312
|
-
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py
RENAMED
|
@@ -248,8 +248,14 @@ class ResNetTrainer(TrainerBase):
|
|
|
248
248
|
# Load ResNet weights to the current device to match context.
|
|
249
249
|
path = self.output.model_path(self._get_model_filename())
|
|
250
250
|
if os.path.exists(path):
|
|
251
|
-
|
|
252
|
-
|
|
251
|
+
payload = torch.load(path, map_location='cpu')
|
|
252
|
+
if isinstance(payload, dict) and "state_dict" in payload:
|
|
253
|
+
state_dict = payload.get("state_dict")
|
|
254
|
+
params = payload.get("best_params") or self.best_params
|
|
255
|
+
else:
|
|
256
|
+
state_dict = payload
|
|
257
|
+
params = self.best_params
|
|
258
|
+
resn_loaded = self._build_model(params)
|
|
253
259
|
resn_loaded.resnet.load_state_dict(state_dict)
|
|
254
260
|
|
|
255
261
|
self._move_to_device(resn_loaded)
|
|
@@ -258,4 +264,3 @@ class ResNetTrainer(TrainerBase):
|
|
|
258
264
|
else:
|
|
259
265
|
print(f"[ResNetTrainer.load] Model file not found: {path}")
|
|
260
266
|
|
|
261
|
-
|
|
@@ -10,6 +10,13 @@ from .monitoring import (
|
|
|
10
10
|
)
|
|
11
11
|
from .scoring import batch_score
|
|
12
12
|
from .preprocess import apply_preprocess_artifacts, load_preprocess_artifacts, prepare_raw_features
|
|
13
|
+
from .predict import (
|
|
14
|
+
SavedModelPredictor,
|
|
15
|
+
load_best_params,
|
|
16
|
+
load_predictor_from_config,
|
|
17
|
+
load_saved_model,
|
|
18
|
+
predict_from_config,
|
|
19
|
+
)
|
|
13
20
|
|
|
14
21
|
__all__ = [
|
|
15
22
|
"psi_report",
|
|
@@ -22,4 +29,9 @@ __all__ = [
|
|
|
22
29
|
"apply_preprocess_artifacts",
|
|
23
30
|
"load_preprocess_artifacts",
|
|
24
31
|
"prepare_raw_features",
|
|
32
|
+
"SavedModelPredictor",
|
|
33
|
+
"load_best_params",
|
|
34
|
+
"load_predictor_from_config",
|
|
35
|
+
"load_saved_model",
|
|
36
|
+
"predict_from_config",
|
|
25
37
|
]
|
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, Iterable, List, Optional, Sequence
|
|
6
|
+
|
|
7
|
+
import joblib
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
import torch
|
|
11
|
+
try: # statsmodels is optional when GLM inference is not used
|
|
12
|
+
import statsmodels.api as sm
|
|
13
|
+
_SM_IMPORT_ERROR: Optional[BaseException] = None
|
|
14
|
+
except Exception as exc: # pragma: no cover - optional dependency
|
|
15
|
+
sm = None # type: ignore[assignment]
|
|
16
|
+
_SM_IMPORT_ERROR = exc
|
|
17
|
+
|
|
18
|
+
from .preprocess import (
|
|
19
|
+
apply_preprocess_artifacts,
|
|
20
|
+
load_preprocess_artifacts,
|
|
21
|
+
prepare_raw_features,
|
|
22
|
+
)
|
|
23
|
+
from .scoring import batch_score
|
|
24
|
+
from ..modelling.core.bayesopt.models.model_gnn import GraphNeuralNetSklearn
|
|
25
|
+
from ..modelling.core.bayesopt.models.model_resn import ResNetSklearn
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
MODEL_PREFIX = {
|
|
29
|
+
"xgb": "Xgboost",
|
|
30
|
+
"glm": "GLM",
|
|
31
|
+
"resn": "ResNet",
|
|
32
|
+
"ft": "FTTransformer",
|
|
33
|
+
"gnn": "GNN",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
OHT_MODELS = {"resn", "gnn", "glm"}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _default_tweedie_power(model_name: str, task_type: str) -> Optional[float]:
|
|
40
|
+
if task_type == "classification":
|
|
41
|
+
return None
|
|
42
|
+
if "f" in model_name:
|
|
43
|
+
return 1.0
|
|
44
|
+
if "s" in model_name:
|
|
45
|
+
return 2.0
|
|
46
|
+
return 1.5
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _resolve_value(
|
|
50
|
+
value: Any,
|
|
51
|
+
*,
|
|
52
|
+
model_name: str,
|
|
53
|
+
base_dir: Path,
|
|
54
|
+
) -> Optional[Path]:
|
|
55
|
+
if value is None:
|
|
56
|
+
return None
|
|
57
|
+
if isinstance(value, dict):
|
|
58
|
+
value = value.get(model_name)
|
|
59
|
+
if value is None:
|
|
60
|
+
return None
|
|
61
|
+
path_str = str(value)
|
|
62
|
+
try:
|
|
63
|
+
path_str = path_str.format(model_name=model_name)
|
|
64
|
+
except Exception:
|
|
65
|
+
pass
|
|
66
|
+
candidate = Path(path_str)
|
|
67
|
+
if candidate.is_absolute():
|
|
68
|
+
return candidate
|
|
69
|
+
return (base_dir / candidate).resolve()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _load_json(path: Path) -> Dict[str, Any]:
|
|
73
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _infer_format(path: Path) -> str:
|
|
77
|
+
suffix = path.suffix.lower()
|
|
78
|
+
if suffix in {".parquet", ".pq"}:
|
|
79
|
+
return "parquet"
|
|
80
|
+
if suffix in {".feather", ".ft"}:
|
|
81
|
+
return "feather"
|
|
82
|
+
return "csv"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _load_dataset(path: Path) -> pd.DataFrame:
|
|
86
|
+
fmt = _infer_format(path)
|
|
87
|
+
if fmt == "parquet":
|
|
88
|
+
return pd.read_parquet(path)
|
|
89
|
+
if fmt == "feather":
|
|
90
|
+
return pd.read_feather(path)
|
|
91
|
+
return pd.read_csv(path, low_memory=False)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _model_file_path(output_dir: Path, model_name: str, model_key: str) -> Path:
|
|
95
|
+
prefix = MODEL_PREFIX.get(model_key)
|
|
96
|
+
if prefix is None:
|
|
97
|
+
raise ValueError(f"Unsupported model key: {model_key}")
|
|
98
|
+
ext = "pkl" if model_key in {"xgb", "glm"} else "pth"
|
|
99
|
+
return output_dir / "model" / f"01_{model_name}_{prefix}.{ext}"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _load_preprocess_from_model_file(
|
|
103
|
+
output_dir: Path,
|
|
104
|
+
model_name: str,
|
|
105
|
+
model_key: str,
|
|
106
|
+
) -> Optional[Dict[str, Any]]:
|
|
107
|
+
model_path = _model_file_path(output_dir, model_name, model_key)
|
|
108
|
+
if not model_path.exists():
|
|
109
|
+
return None
|
|
110
|
+
if model_key in {"xgb", "glm"}:
|
|
111
|
+
payload = joblib.load(model_path)
|
|
112
|
+
else:
|
|
113
|
+
payload = torch.load(model_path, map_location="cpu")
|
|
114
|
+
if isinstance(payload, dict):
|
|
115
|
+
return payload.get("preprocess_artifacts")
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _move_to_device(model_obj: Any) -> None:
|
|
120
|
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
121
|
+
if hasattr(model_obj, "device"):
|
|
122
|
+
model_obj.device = device
|
|
123
|
+
if hasattr(model_obj, "to"):
|
|
124
|
+
model_obj.to(device)
|
|
125
|
+
for attr in ("ft", "resnet", "gnn"):
|
|
126
|
+
if hasattr(model_obj, attr):
|
|
127
|
+
getattr(model_obj, attr).to(device)
|
|
128
|
+
if hasattr(model_obj, "eval"):
|
|
129
|
+
model_obj.eval()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def load_best_params(
|
|
133
|
+
output_dir: str | Path,
|
|
134
|
+
model_name: str,
|
|
135
|
+
model_key: str,
|
|
136
|
+
) -> Optional[Dict[str, Any]]:
|
|
137
|
+
output_path = Path(output_dir)
|
|
138
|
+
versions_dir = output_path / "Results" / "versions"
|
|
139
|
+
if versions_dir.exists():
|
|
140
|
+
candidates = sorted(versions_dir.glob(f"*_{model_key}_best.json"))
|
|
141
|
+
if candidates:
|
|
142
|
+
payload = _load_json(candidates[-1])
|
|
143
|
+
params = payload.get("best_params")
|
|
144
|
+
if params:
|
|
145
|
+
return params
|
|
146
|
+
|
|
147
|
+
label_map = {
|
|
148
|
+
"xgb": "xgboost",
|
|
149
|
+
"resn": "resnet",
|
|
150
|
+
"ft": "fttransformer",
|
|
151
|
+
"glm": "glm",
|
|
152
|
+
"gnn": "gnn",
|
|
153
|
+
}
|
|
154
|
+
label = label_map.get(model_key, model_key)
|
|
155
|
+
csv_path = output_path / "Results" / f"{model_name}_bestparams_{label}.csv"
|
|
156
|
+
if csv_path.exists():
|
|
157
|
+
df = pd.read_csv(csv_path)
|
|
158
|
+
if not df.empty:
|
|
159
|
+
return df.iloc[0].to_dict()
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _build_resn_model(
|
|
164
|
+
*,
|
|
165
|
+
model_name: str,
|
|
166
|
+
input_dim: int,
|
|
167
|
+
task_type: str,
|
|
168
|
+
epochs: int,
|
|
169
|
+
resn_weight_decay: float,
|
|
170
|
+
params: Dict[str, Any],
|
|
171
|
+
) -> ResNetSklearn:
|
|
172
|
+
power = params.get("tw_power", _default_tweedie_power(model_name, task_type))
|
|
173
|
+
if power is not None:
|
|
174
|
+
power = float(power)
|
|
175
|
+
weight_decay = float(params.get("weight_decay", resn_weight_decay))
|
|
176
|
+
return ResNetSklearn(
|
|
177
|
+
model_nme=model_name,
|
|
178
|
+
input_dim=input_dim,
|
|
179
|
+
hidden_dim=int(params.get("hidden_dim", 64)),
|
|
180
|
+
block_num=int(params.get("block_num", 2)),
|
|
181
|
+
task_type=task_type,
|
|
182
|
+
epochs=int(epochs),
|
|
183
|
+
tweedie_power=power,
|
|
184
|
+
learning_rate=float(params.get("learning_rate", 0.01)),
|
|
185
|
+
patience=int(params.get("patience", 10)),
|
|
186
|
+
use_layernorm=True,
|
|
187
|
+
dropout=float(params.get("dropout", 0.1)),
|
|
188
|
+
residual_scale=float(params.get("residual_scale", 0.1)),
|
|
189
|
+
stochastic_depth=float(params.get("stochastic_depth", 0.0)),
|
|
190
|
+
weight_decay=weight_decay,
|
|
191
|
+
use_data_parallel=False,
|
|
192
|
+
use_ddp=False,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _build_gnn_model(
|
|
197
|
+
*,
|
|
198
|
+
model_name: str,
|
|
199
|
+
input_dim: int,
|
|
200
|
+
task_type: str,
|
|
201
|
+
epochs: int,
|
|
202
|
+
cfg: Dict[str, Any],
|
|
203
|
+
params: Dict[str, Any],
|
|
204
|
+
) -> GraphNeuralNetSklearn:
|
|
205
|
+
base_tw = _default_tweedie_power(model_name, task_type)
|
|
206
|
+
return GraphNeuralNetSklearn(
|
|
207
|
+
model_nme=f"{model_name}_gnn",
|
|
208
|
+
input_dim=input_dim,
|
|
209
|
+
hidden_dim=int(params.get("hidden_dim", 64)),
|
|
210
|
+
num_layers=int(params.get("num_layers", 2)),
|
|
211
|
+
k_neighbors=int(params.get("k_neighbors", 10)),
|
|
212
|
+
dropout=float(params.get("dropout", 0.1)),
|
|
213
|
+
learning_rate=float(params.get("learning_rate", 1e-3)),
|
|
214
|
+
epochs=int(params.get("epochs", epochs)),
|
|
215
|
+
patience=int(params.get("patience", 5)),
|
|
216
|
+
task_type=task_type,
|
|
217
|
+
tweedie_power=float(params.get("tw_power", base_tw or 1.5)),
|
|
218
|
+
weight_decay=float(params.get("weight_decay", 0.0)),
|
|
219
|
+
use_data_parallel=False,
|
|
220
|
+
use_ddp=False,
|
|
221
|
+
use_approx_knn=bool(cfg.get("gnn_use_approx_knn", True)),
|
|
222
|
+
approx_knn_threshold=int(cfg.get("gnn_approx_knn_threshold", 50000)),
|
|
223
|
+
graph_cache_path=cfg.get("gnn_graph_cache"),
|
|
224
|
+
max_gpu_knn_nodes=cfg.get("gnn_max_gpu_knn_nodes"),
|
|
225
|
+
knn_gpu_mem_ratio=cfg.get("gnn_knn_gpu_mem_ratio", 0.9),
|
|
226
|
+
knn_gpu_mem_overhead=cfg.get("gnn_knn_gpu_mem_overhead", 2.0),
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def load_saved_model(
|
|
231
|
+
*,
|
|
232
|
+
output_dir: str | Path,
|
|
233
|
+
model_name: str,
|
|
234
|
+
model_key: str,
|
|
235
|
+
task_type: str,
|
|
236
|
+
input_dim: Optional[int],
|
|
237
|
+
cfg: Dict[str, Any],
|
|
238
|
+
) -> Any:
|
|
239
|
+
model_path = _model_file_path(Path(output_dir), model_name, model_key)
|
|
240
|
+
if not model_path.exists():
|
|
241
|
+
raise FileNotFoundError(f"Model file not found: {model_path}")
|
|
242
|
+
|
|
243
|
+
if model_key in {"xgb", "glm"}:
|
|
244
|
+
payload = joblib.load(model_path)
|
|
245
|
+
if isinstance(payload, dict) and "model" in payload:
|
|
246
|
+
return payload.get("model")
|
|
247
|
+
return payload
|
|
248
|
+
|
|
249
|
+
if model_key == "ft":
|
|
250
|
+
payload = torch.load(model_path, map_location="cpu")
|
|
251
|
+
if isinstance(payload, dict) and "model" in payload:
|
|
252
|
+
model = payload.get("model")
|
|
253
|
+
else:
|
|
254
|
+
model = payload
|
|
255
|
+
_move_to_device(model)
|
|
256
|
+
return model
|
|
257
|
+
|
|
258
|
+
if model_key == "resn":
|
|
259
|
+
if input_dim is None:
|
|
260
|
+
raise ValueError("input_dim is required for ResNet loading")
|
|
261
|
+
payload = torch.load(model_path, map_location="cpu")
|
|
262
|
+
if isinstance(payload, dict) and "state_dict" in payload:
|
|
263
|
+
state_dict = payload.get("state_dict")
|
|
264
|
+
params = payload.get("best_params") or load_best_params(
|
|
265
|
+
output_dir, model_name, model_key
|
|
266
|
+
)
|
|
267
|
+
else:
|
|
268
|
+
state_dict = payload
|
|
269
|
+
params = load_best_params(output_dir, model_name, model_key)
|
|
270
|
+
if params is None:
|
|
271
|
+
raise RuntimeError("Best params not found for resn")
|
|
272
|
+
model = _build_resn_model(
|
|
273
|
+
model_name=model_name,
|
|
274
|
+
input_dim=input_dim,
|
|
275
|
+
task_type=task_type,
|
|
276
|
+
epochs=int(cfg.get("epochs", 50)),
|
|
277
|
+
resn_weight_decay=float(cfg.get("resn_weight_decay", 1e-4)),
|
|
278
|
+
params=params,
|
|
279
|
+
)
|
|
280
|
+
model.resnet.load_state_dict(state_dict)
|
|
281
|
+
_move_to_device(model)
|
|
282
|
+
return model
|
|
283
|
+
|
|
284
|
+
if model_key == "gnn":
|
|
285
|
+
if input_dim is None:
|
|
286
|
+
raise ValueError("input_dim is required for GNN loading")
|
|
287
|
+
payload = torch.load(model_path, map_location="cpu")
|
|
288
|
+
if not isinstance(payload, dict):
|
|
289
|
+
raise ValueError(f"Invalid GNN checkpoint: {model_path}")
|
|
290
|
+
params = payload.get("best_params") or {}
|
|
291
|
+
state_dict = payload.get("state_dict")
|
|
292
|
+
model = _build_gnn_model(
|
|
293
|
+
model_name=model_name,
|
|
294
|
+
input_dim=input_dim,
|
|
295
|
+
task_type=task_type,
|
|
296
|
+
epochs=int(cfg.get("epochs", 50)),
|
|
297
|
+
cfg=cfg,
|
|
298
|
+
params=params,
|
|
299
|
+
)
|
|
300
|
+
model.set_params(dict(params))
|
|
301
|
+
base_gnn = getattr(model, "_unwrap_gnn", lambda: None)()
|
|
302
|
+
if base_gnn is not None and state_dict is not None:
|
|
303
|
+
base_gnn.load_state_dict(state_dict, strict=False)
|
|
304
|
+
_move_to_device(model)
|
|
305
|
+
return model
|
|
306
|
+
|
|
307
|
+
raise ValueError(f"Unsupported model key: {model_key}")
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _build_artifacts_from_config(cfg: Dict[str, Any]) -> Dict[str, Any]:
|
|
311
|
+
factor_nmes = list(cfg.get("feature_list") or [])
|
|
312
|
+
cate_list = list(cfg.get("categorical_features") or [])
|
|
313
|
+
num_features = [c for c in factor_nmes if c not in cate_list]
|
|
314
|
+
return {
|
|
315
|
+
"factor_nmes": factor_nmes,
|
|
316
|
+
"cate_list": cate_list,
|
|
317
|
+
"num_features": num_features,
|
|
318
|
+
"cat_categories": {},
|
|
319
|
+
"var_nmes": [],
|
|
320
|
+
"numeric_scalers": {},
|
|
321
|
+
"drop_first": True,
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _prepare_features(
|
|
326
|
+
df: pd.DataFrame,
|
|
327
|
+
*,
|
|
328
|
+
model_key: str,
|
|
329
|
+
cfg: Dict[str, Any],
|
|
330
|
+
artifacts: Optional[Dict[str, Any]],
|
|
331
|
+
) -> pd.DataFrame:
|
|
332
|
+
if model_key in OHT_MODELS:
|
|
333
|
+
if artifacts is None:
|
|
334
|
+
raise RuntimeError(
|
|
335
|
+
f"Preprocess artifacts are required for {model_key} inference. "
|
|
336
|
+
"Enable save_preprocess during training or provide preprocess_artifact_path."
|
|
337
|
+
)
|
|
338
|
+
return apply_preprocess_artifacts(df, artifacts)
|
|
339
|
+
|
|
340
|
+
if artifacts is None:
|
|
341
|
+
artifacts = _build_artifacts_from_config(cfg)
|
|
342
|
+
return prepare_raw_features(df, artifacts)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _predict_with_model(
|
|
346
|
+
*,
|
|
347
|
+
model: Any,
|
|
348
|
+
model_key: str,
|
|
349
|
+
task_type: str,
|
|
350
|
+
features: pd.DataFrame,
|
|
351
|
+
) -> np.ndarray:
|
|
352
|
+
if model_key == "xgb":
|
|
353
|
+
if task_type == "classification" and hasattr(model, "predict_proba"):
|
|
354
|
+
return model.predict_proba(features)[:, 1]
|
|
355
|
+
return model.predict(features)
|
|
356
|
+
|
|
357
|
+
if model_key == "glm":
|
|
358
|
+
if sm is None:
|
|
359
|
+
raise RuntimeError(
|
|
360
|
+
f"statsmodels is required for GLM inference ({_SM_IMPORT_ERROR})."
|
|
361
|
+
)
|
|
362
|
+
design = sm.add_constant(features, has_constant="add")
|
|
363
|
+
return model.predict(design)
|
|
364
|
+
|
|
365
|
+
return model.predict(features)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
class SavedModelPredictor:
|
|
369
|
+
def __init__(
|
|
370
|
+
self,
|
|
371
|
+
*,
|
|
372
|
+
model_key: str,
|
|
373
|
+
model_name: str,
|
|
374
|
+
task_type: str,
|
|
375
|
+
cfg: Dict[str, Any],
|
|
376
|
+
output_dir: Path,
|
|
377
|
+
artifacts: Optional[Dict[str, Any]],
|
|
378
|
+
) -> None:
|
|
379
|
+
self.model_key = model_key
|
|
380
|
+
self.model_name = model_name
|
|
381
|
+
self.task_type = task_type
|
|
382
|
+
self.cfg = cfg
|
|
383
|
+
self.output_dir = output_dir
|
|
384
|
+
self.artifacts = artifacts
|
|
385
|
+
|
|
386
|
+
if model_key == "ft" and str(cfg.get("ft_role", "model")) != "model":
|
|
387
|
+
raise ValueError("FT predictions require ft_role == 'model'.")
|
|
388
|
+
if model_key == "ft" and cfg.get("geo_feature_nmes"):
|
|
389
|
+
raise ValueError("FT inference with geo tokens is not supported in this helper.")
|
|
390
|
+
|
|
391
|
+
input_dim = None
|
|
392
|
+
if model_key in OHT_MODELS and artifacts is not None:
|
|
393
|
+
var_nmes = list(artifacts.get("var_nmes") or [])
|
|
394
|
+
input_dim = len(var_nmes) if var_nmes else None
|
|
395
|
+
|
|
396
|
+
self.model = load_saved_model(
|
|
397
|
+
output_dir=output_dir,
|
|
398
|
+
model_name=model_name,
|
|
399
|
+
model_key=model_key,
|
|
400
|
+
task_type=task_type,
|
|
401
|
+
input_dim=input_dim,
|
|
402
|
+
cfg=cfg,
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
def predict(self, df: pd.DataFrame) -> np.ndarray:
|
|
406
|
+
features = _prepare_features(
|
|
407
|
+
df,
|
|
408
|
+
model_key=self.model_key,
|
|
409
|
+
cfg=self.cfg,
|
|
410
|
+
artifacts=self.artifacts,
|
|
411
|
+
)
|
|
412
|
+
return _predict_with_model(
|
|
413
|
+
model=self.model,
|
|
414
|
+
model_key=self.model_key,
|
|
415
|
+
task_type=self.task_type,
|
|
416
|
+
features=features,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def load_predictor_from_config(
|
|
421
|
+
config_path: str | Path,
|
|
422
|
+
model_key: str,
|
|
423
|
+
*,
|
|
424
|
+
model_name: Optional[str] = None,
|
|
425
|
+
output_dir: Optional[str | Path] = None,
|
|
426
|
+
preprocess_artifact_path: Optional[str | Path] = None,
|
|
427
|
+
) -> SavedModelPredictor:
|
|
428
|
+
config_path = Path(config_path).resolve()
|
|
429
|
+
cfg = _load_json(config_path)
|
|
430
|
+
base_dir = config_path.parent
|
|
431
|
+
|
|
432
|
+
if model_name is None:
|
|
433
|
+
model_list = list(cfg.get("model_list") or [])
|
|
434
|
+
model_categories = list(cfg.get("model_categories") or [])
|
|
435
|
+
if len(model_list) != 1 or len(model_categories) != 1:
|
|
436
|
+
raise ValueError("Provide model_name when config has multiple models.")
|
|
437
|
+
model_name = f"{model_list[0]}_{model_categories[0]}"
|
|
438
|
+
|
|
439
|
+
resolved_output = (
|
|
440
|
+
Path(output_dir).resolve()
|
|
441
|
+
if output_dir is not None
|
|
442
|
+
else _resolve_value(cfg.get("output_dir"), model_name=model_name, base_dir=base_dir)
|
|
443
|
+
)
|
|
444
|
+
if resolved_output is None:
|
|
445
|
+
raise ValueError("output_dir is required to locate saved models.")
|
|
446
|
+
|
|
447
|
+
resolved_artifact = None
|
|
448
|
+
if preprocess_artifact_path is not None:
|
|
449
|
+
resolved_artifact = Path(preprocess_artifact_path).resolve()
|
|
450
|
+
else:
|
|
451
|
+
resolved_artifact = _resolve_value(
|
|
452
|
+
cfg.get("preprocess_artifact_path"),
|
|
453
|
+
model_name=model_name,
|
|
454
|
+
base_dir=base_dir,
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
if resolved_artifact is None:
|
|
458
|
+
candidate = resolved_output / "Results" / f"{model_name}_preprocess.json"
|
|
459
|
+
if candidate.exists():
|
|
460
|
+
resolved_artifact = candidate
|
|
461
|
+
|
|
462
|
+
artifacts = None
|
|
463
|
+
if resolved_artifact is not None and resolved_artifact.exists():
|
|
464
|
+
artifacts = load_preprocess_artifacts(resolved_artifact)
|
|
465
|
+
if artifacts is None:
|
|
466
|
+
artifacts = _load_preprocess_from_model_file(
|
|
467
|
+
resolved_output, model_name, model_key
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
predictor = SavedModelPredictor(
|
|
471
|
+
model_key=model_key,
|
|
472
|
+
model_name=model_name,
|
|
473
|
+
task_type=str(cfg.get("task_type", "regression")),
|
|
474
|
+
cfg=cfg,
|
|
475
|
+
output_dir=resolved_output,
|
|
476
|
+
artifacts=artifacts,
|
|
477
|
+
)
|
|
478
|
+
return predictor
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def predict_from_config(
|
|
482
|
+
config_path: str | Path,
|
|
483
|
+
input_path: str | Path,
|
|
484
|
+
*,
|
|
485
|
+
model_keys: Sequence[str],
|
|
486
|
+
model_name: Optional[str] = None,
|
|
487
|
+
output_path: Optional[str | Path] = None,
|
|
488
|
+
output_col_prefix: str = "pred_",
|
|
489
|
+
batch_size: int = 10000,
|
|
490
|
+
) -> pd.DataFrame:
|
|
491
|
+
input_path = Path(input_path).resolve()
|
|
492
|
+
data = _load_dataset(input_path)
|
|
493
|
+
|
|
494
|
+
result = data.copy()
|
|
495
|
+
for key in model_keys:
|
|
496
|
+
predictor = load_predictor_from_config(
|
|
497
|
+
config_path,
|
|
498
|
+
key,
|
|
499
|
+
model_name=model_name,
|
|
500
|
+
)
|
|
501
|
+
output_col = f"{output_col_prefix}{key}"
|
|
502
|
+
scored = batch_score(
|
|
503
|
+
predictor.predict,
|
|
504
|
+
data,
|
|
505
|
+
output_col=output_col,
|
|
506
|
+
batch_size=batch_size,
|
|
507
|
+
keep_input=False,
|
|
508
|
+
)
|
|
509
|
+
result[output_col] = scored[output_col].values
|
|
510
|
+
|
|
511
|
+
if output_path:
|
|
512
|
+
output_path = Path(output_path)
|
|
513
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
514
|
+
if output_path.suffix.lower() in {".parquet", ".pq"}:
|
|
515
|
+
result.to_parquet(output_path, index=False)
|
|
516
|
+
else:
|
|
517
|
+
result.to_csv(output_path, index=False)
|
|
518
|
+
return result
|
|
@@ -74,6 +74,7 @@ ins_pricing/pricing/rate_table.py
|
|
|
74
74
|
ins_pricing/production/__init__.py
|
|
75
75
|
ins_pricing/production/drift.py
|
|
76
76
|
ins_pricing/production/monitoring.py
|
|
77
|
+
ins_pricing/production/predict.py
|
|
77
78
|
ins_pricing/production/preprocess.py
|
|
78
79
|
ins_pricing/production/scoring.py
|
|
79
80
|
ins_pricing/reporting/README.md
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/config_preprocess.py
RENAMED
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/model_explain_mixin.py
RENAMED
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py
RENAMED
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/model_gnn.py
RENAMED
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/models/model_resn.py
RENAMED
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/__init__.py
RENAMED
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py
RENAMED
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py
RENAMED
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_cross_val_generic.py
RENAMED
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_distributed_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_geo_tokens_split.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ins_pricing-0.2.0 → ins_pricing-0.2.2}/ins_pricing/tests/modelling/test_plotting_library.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|