ins-pricing 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +48 -22
- ins_pricing/__init__.py +142 -90
- ins_pricing/cli/BayesOpt_entry.py +52 -50
- ins_pricing/cli/BayesOpt_incremental.py +39 -105
- ins_pricing/cli/Explain_Run.py +31 -23
- ins_pricing/cli/Explain_entry.py +532 -579
- ins_pricing/cli/Pricing_Run.py +31 -23
- ins_pricing/cli/bayesopt_entry_runner.py +11 -9
- ins_pricing/cli/utils/cli_common.py +256 -256
- ins_pricing/cli/utils/cli_config.py +375 -375
- ins_pricing/cli/utils/import_resolver.py +382 -365
- ins_pricing/cli/utils/notebook_utils.py +340 -340
- ins_pricing/cli/watchdog_run.py +209 -201
- ins_pricing/frontend/__init__.py +10 -10
- ins_pricing/frontend/example_workflows.py +1 -1
- ins_pricing/governance/__init__.py +20 -20
- ins_pricing/governance/release.py +159 -159
- ins_pricing/modelling/__init__.py +147 -92
- ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +2 -2
- ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
- ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -562
- ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -964
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
- ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
- ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
- ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
- ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
- ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
- ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
- ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
- ins_pricing/modelling/explain/__init__.py +55 -55
- ins_pricing/modelling/explain/metrics.py +27 -174
- ins_pricing/modelling/explain/permutation.py +237 -237
- ins_pricing/modelling/plotting/__init__.py +40 -36
- ins_pricing/modelling/plotting/compat.py +228 -0
- ins_pricing/modelling/plotting/curves.py +572 -572
- ins_pricing/modelling/plotting/diagnostics.py +163 -163
- ins_pricing/modelling/plotting/geo.py +362 -362
- ins_pricing/modelling/plotting/importance.py +121 -121
- ins_pricing/pricing/__init__.py +27 -27
- ins_pricing/production/__init__.py +35 -25
- ins_pricing/production/{predict.py → inference.py} +140 -57
- ins_pricing/production/monitoring.py +8 -21
- ins_pricing/reporting/__init__.py +11 -11
- ins_pricing/setup.py +1 -1
- ins_pricing/tests/production/test_inference.py +90 -0
- ins_pricing/utils/__init__.py +116 -83
- ins_pricing/utils/device.py +255 -255
- ins_pricing/utils/features.py +53 -0
- ins_pricing/utils/io.py +72 -0
- ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
- ins_pricing/utils/metrics.py +158 -24
- ins_pricing/utils/numerics.py +76 -0
- ins_pricing/utils/paths.py +9 -1
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +182 -182
- ins_pricing-0.5.0.dist-info/RECORD +131 -0
- ins_pricing/modelling/core/BayesOpt.py +0 -146
- ins_pricing/modelling/core/__init__.py +0 -1
- ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
- ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
- ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
- ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
- ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
- ins_pricing/modelling/core/bayesopt/utils.py +0 -105
- ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
- ins_pricing/tests/production/test_predict.py +0 -233
- ins_pricing-0.4.5.dist-info/RECORD +0 -130
- /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
- /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
- /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
4
5
|
from pathlib import Path
|
|
5
|
-
from typing import Any, Dict, Iterable, List, Optional, Sequence, TYPE_CHECKING
|
|
6
|
+
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, TYPE_CHECKING
|
|
6
7
|
|
|
7
8
|
import joblib
|
|
8
9
|
import numpy as np
|
|
@@ -14,25 +15,25 @@ except Exception as exc: # pragma: no cover - optional dependency
|
|
|
14
15
|
sm = None # type: ignore[assignment]
|
|
15
16
|
_SM_IMPORT_ERROR = exc
|
|
16
17
|
|
|
17
|
-
from .preprocess import (
|
|
18
|
+
from ins_pricing.production.preprocess import (
|
|
18
19
|
apply_preprocess_artifacts,
|
|
19
20
|
load_preprocess_artifacts,
|
|
20
21
|
prepare_raw_features,
|
|
21
22
|
)
|
|
22
|
-
from .scoring import batch_score
|
|
23
|
-
from
|
|
23
|
+
from ins_pricing.production.scoring import batch_score
|
|
24
|
+
from ins_pricing.utils.losses import (
|
|
24
25
|
infer_loss_name_from_model_name,
|
|
25
26
|
normalize_loss_name,
|
|
26
27
|
resolve_tweedie_power,
|
|
27
28
|
)
|
|
28
|
-
from ins_pricing.utils
|
|
29
|
+
from ins_pricing.utils import get_logger, load_dataset
|
|
29
30
|
|
|
30
|
-
_logger = get_logger("ins_pricing.production.
|
|
31
|
+
_logger = get_logger("ins_pricing.production.inference")
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
if TYPE_CHECKING:
|
|
34
|
-
from
|
|
35
|
-
from
|
|
35
|
+
from ins_pricing.modelling.bayesopt.models.model_gnn import GraphNeuralNetSklearn
|
|
36
|
+
from ins_pricing.modelling.bayesopt.models.model_resn import ResNetSklearn
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
def _torch_load(*args, **kwargs):
|
|
@@ -43,6 +44,14 @@ def _get_device_manager():
|
|
|
43
44
|
from ins_pricing.utils.device import DeviceManager
|
|
44
45
|
return DeviceManager
|
|
45
46
|
|
|
47
|
+
|
|
48
|
+
def _normalize_device(device: Optional[Any]) -> Optional[Any]:
|
|
49
|
+
if device is None:
|
|
50
|
+
return None
|
|
51
|
+
if isinstance(device, str) and device.strip().lower() in {"auto", "best"}:
|
|
52
|
+
return None
|
|
53
|
+
return device
|
|
54
|
+
|
|
46
55
|
MODEL_PREFIX = {
|
|
47
56
|
"xgb": "Xgboost",
|
|
48
57
|
"glm": "GLM",
|
|
@@ -54,6 +63,54 @@ MODEL_PREFIX = {
|
|
|
54
63
|
OHT_MODELS = {"resn", "gnn", "glm"}
|
|
55
64
|
|
|
56
65
|
|
|
66
|
+
class Predictor:
|
|
67
|
+
"""Minimal predictor interface for production inference."""
|
|
68
|
+
|
|
69
|
+
def predict(self, df: pd.DataFrame) -> np.ndarray: # pragma: no cover - protocol-like
|
|
70
|
+
raise NotImplementedError
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass(frozen=True)
|
|
74
|
+
class ModelSpec:
|
|
75
|
+
model_key: str
|
|
76
|
+
model_name: str
|
|
77
|
+
task_type: str
|
|
78
|
+
cfg: Dict[str, Any]
|
|
79
|
+
output_dir: Path
|
|
80
|
+
artifacts: Optional[Dict[str, Any]]
|
|
81
|
+
device: Optional[Any] = None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
ModelLoader = Callable[[ModelSpec], Predictor]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class PredictorRegistry:
|
|
88
|
+
"""Registry for mapping model keys to predictor loaders."""
|
|
89
|
+
|
|
90
|
+
def __init__(self) -> None:
|
|
91
|
+
self._loaders: Dict[str, ModelLoader] = {}
|
|
92
|
+
self._default_loader: Optional[ModelLoader] = None
|
|
93
|
+
|
|
94
|
+
def register(self, model_key: str, loader: ModelLoader, *, overwrite: bool = False) -> None:
|
|
95
|
+
if model_key == "*":
|
|
96
|
+
if self._default_loader is not None and not overwrite:
|
|
97
|
+
raise ValueError("Default loader already registered.")
|
|
98
|
+
self._default_loader = loader
|
|
99
|
+
return
|
|
100
|
+
if model_key in self._loaders and not overwrite:
|
|
101
|
+
raise ValueError(f"Loader already registered for model_key={model_key!r}.")
|
|
102
|
+
self._loaders[model_key] = loader
|
|
103
|
+
|
|
104
|
+
def load(self, spec: ModelSpec) -> Predictor:
|
|
105
|
+
loader = self._loaders.get(spec.model_key) or self._default_loader
|
|
106
|
+
if loader is None:
|
|
107
|
+
raise KeyError(f"No loader registered for model_key={spec.model_key!r}.")
|
|
108
|
+
return loader(spec)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
_DEFAULT_REGISTRY = PredictorRegistry()
|
|
112
|
+
|
|
113
|
+
|
|
57
114
|
def _default_tweedie_power(model_name: str, task_type: str) -> Optional[float]:
|
|
58
115
|
if task_type == "classification":
|
|
59
116
|
return None
|
|
@@ -100,39 +157,6 @@ def _load_json(path: Path) -> Dict[str, Any]:
|
|
|
100
157
|
return json.loads(path.read_text(encoding="utf-8"))
|
|
101
158
|
|
|
102
159
|
|
|
103
|
-
def _infer_format(path: Path) -> str:
|
|
104
|
-
suffix = path.suffix.lower()
|
|
105
|
-
if suffix in {".parquet", ".pq"}:
|
|
106
|
-
return "parquet"
|
|
107
|
-
if suffix in {".feather", ".ft"}:
|
|
108
|
-
return "feather"
|
|
109
|
-
return "csv"
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def _load_dataset(path: Path, chunksize: Optional[int] = None) -> pd.DataFrame:
|
|
113
|
-
"""Load dataset with optional chunked reading for large CSV files.
|
|
114
|
-
|
|
115
|
-
Args:
|
|
116
|
-
path: Path to the dataset file
|
|
117
|
-
chunksize: If specified for CSV files, reads in chunks and concatenates.
|
|
118
|
-
Useful for large files that may not fit in memory at once.
|
|
119
|
-
|
|
120
|
-
Returns:
|
|
121
|
-
DataFrame containing the full dataset
|
|
122
|
-
"""
|
|
123
|
-
fmt = _infer_format(path)
|
|
124
|
-
if fmt == "parquet":
|
|
125
|
-
return pd.read_parquet(path)
|
|
126
|
-
if fmt == "feather":
|
|
127
|
-
return pd.read_feather(path)
|
|
128
|
-
|
|
129
|
-
# For CSV, support chunked reading for large files
|
|
130
|
-
if chunksize is not None:
|
|
131
|
-
chunks = []
|
|
132
|
-
for chunk in pd.read_csv(path, low_memory=False, chunksize=chunksize):
|
|
133
|
-
chunks.append(chunk)
|
|
134
|
-
return pd.concat(chunks, ignore_index=True)
|
|
135
|
-
return pd.read_csv(path, low_memory=False)
|
|
136
160
|
|
|
137
161
|
|
|
138
162
|
def _model_file_path(output_dir: Path, model_name: str, model_key: str) -> Path:
|
|
@@ -160,10 +184,10 @@ def _load_preprocess_from_model_file(
|
|
|
160
184
|
return None
|
|
161
185
|
|
|
162
186
|
|
|
163
|
-
def _move_to_device(model_obj: Any) -> None:
|
|
187
|
+
def _move_to_device(model_obj: Any, device: Optional[Any] = None) -> None:
|
|
164
188
|
"""Move model to best available device using shared DeviceManager."""
|
|
165
189
|
DeviceManager = _get_device_manager()
|
|
166
|
-
DeviceManager.move_to_device(model_obj)
|
|
190
|
+
DeviceManager.move_to_device(model_obj, device=device)
|
|
167
191
|
if hasattr(model_obj, "eval"):
|
|
168
192
|
model_obj.eval()
|
|
169
193
|
|
|
@@ -209,7 +233,7 @@ def _build_resn_model(
|
|
|
209
233
|
loss_name: str,
|
|
210
234
|
params: Dict[str, Any],
|
|
211
235
|
) -> ResNetSklearn:
|
|
212
|
-
from
|
|
236
|
+
from ins_pricing.modelling.bayesopt.models.model_resn import ResNetSklearn
|
|
213
237
|
if loss_name == "tweedie":
|
|
214
238
|
power = params.get(
|
|
215
239
|
"tw_power", _default_tweedie_power(model_name, task_type))
|
|
@@ -248,7 +272,7 @@ def _build_gnn_model(
|
|
|
248
272
|
loss_name: str,
|
|
249
273
|
params: Dict[str, Any],
|
|
250
274
|
) -> GraphNeuralNetSklearn:
|
|
251
|
-
from
|
|
275
|
+
from ins_pricing.modelling.bayesopt.models.model_gnn import GraphNeuralNetSklearn
|
|
252
276
|
base_tw = _default_tweedie_power(model_name, task_type)
|
|
253
277
|
if loss_name == "tweedie":
|
|
254
278
|
tw_power = params.get("tw_power", base_tw)
|
|
@@ -288,6 +312,7 @@ def load_saved_model(
|
|
|
288
312
|
task_type: str,
|
|
289
313
|
input_dim: Optional[int],
|
|
290
314
|
cfg: Dict[str, Any],
|
|
315
|
+
device: Optional[Any] = None,
|
|
291
316
|
) -> Any:
|
|
292
317
|
model_path = _model_file_path(Path(output_dir), model_name, model_key)
|
|
293
318
|
if not model_path.exists():
|
|
@@ -308,8 +333,8 @@ def load_saved_model(
|
|
|
308
333
|
state_dict = payload.get("state_dict")
|
|
309
334
|
model_config = payload.get("model_config", {})
|
|
310
335
|
|
|
311
|
-
from
|
|
312
|
-
from
|
|
336
|
+
from ins_pricing.modelling.bayesopt.models import FTTransformerSklearn
|
|
337
|
+
from ins_pricing.modelling.bayesopt.models.model_ft_components import FTTransformerCore
|
|
313
338
|
|
|
314
339
|
# Reconstruct model from config
|
|
315
340
|
resolved_loss = model_config.get("loss_name")
|
|
@@ -359,15 +384,15 @@ def load_saved_model(
|
|
|
359
384
|
model.ft = core
|
|
360
385
|
model.ft.load_state_dict(state_dict)
|
|
361
386
|
|
|
362
|
-
_move_to_device(model)
|
|
387
|
+
_move_to_device(model, device=device)
|
|
363
388
|
return model
|
|
364
389
|
elif "model" in payload:
|
|
365
390
|
# Legacy format: full model object
|
|
366
391
|
model = payload.get("model")
|
|
367
|
-
_move_to_device(model)
|
|
392
|
+
_move_to_device(model, device=device)
|
|
368
393
|
return model
|
|
369
394
|
# Very old format: direct model object
|
|
370
|
-
_move_to_device(payload)
|
|
395
|
+
_move_to_device(payload, device=device)
|
|
371
396
|
return payload
|
|
372
397
|
|
|
373
398
|
if model_key == "resn":
|
|
@@ -395,7 +420,7 @@ def load_saved_model(
|
|
|
395
420
|
params=params,
|
|
396
421
|
)
|
|
397
422
|
model.resnet.load_state_dict(state_dict)
|
|
398
|
-
_move_to_device(model)
|
|
423
|
+
_move_to_device(model, device=device)
|
|
399
424
|
return model
|
|
400
425
|
|
|
401
426
|
if model_key == "gnn":
|
|
@@ -420,7 +445,7 @@ def load_saved_model(
|
|
|
420
445
|
base_gnn = getattr(model, "_unwrap_gnn", lambda: None)()
|
|
421
446
|
if base_gnn is not None and state_dict is not None:
|
|
422
447
|
base_gnn.load_state_dict(state_dict, strict=False)
|
|
423
|
-
_move_to_device(model)
|
|
448
|
+
_move_to_device(model, device=device)
|
|
424
449
|
return model
|
|
425
450
|
|
|
426
451
|
raise ValueError(f"Unsupported model key: {model_key}")
|
|
@@ -484,7 +509,7 @@ def _predict_with_model(
|
|
|
484
509
|
return model.predict(features)
|
|
485
510
|
|
|
486
511
|
|
|
487
|
-
class SavedModelPredictor:
|
|
512
|
+
class SavedModelPredictor(Predictor):
|
|
488
513
|
def __init__(
|
|
489
514
|
self,
|
|
490
515
|
*,
|
|
@@ -494,6 +519,7 @@ class SavedModelPredictor:
|
|
|
494
519
|
cfg: Dict[str, Any],
|
|
495
520
|
output_dir: Path,
|
|
496
521
|
artifacts: Optional[Dict[str, Any]],
|
|
522
|
+
device: Optional[Any] = None,
|
|
497
523
|
) -> None:
|
|
498
524
|
self.model_key = model_key
|
|
499
525
|
self.model_name = model_name
|
|
@@ -501,6 +527,7 @@ class SavedModelPredictor:
|
|
|
501
527
|
self.cfg = cfg
|
|
502
528
|
self.output_dir = output_dir
|
|
503
529
|
self.artifacts = artifacts
|
|
530
|
+
self.device = _normalize_device(device)
|
|
504
531
|
|
|
505
532
|
if model_key == "ft" and str(cfg.get("ft_role", "model")) != "model":
|
|
506
533
|
raise ValueError("FT predictions require ft_role == 'model'.")
|
|
@@ -520,6 +547,7 @@ class SavedModelPredictor:
|
|
|
520
547
|
task_type=task_type,
|
|
521
548
|
input_dim=input_dim,
|
|
522
549
|
cfg=cfg,
|
|
550
|
+
device=self.device,
|
|
523
551
|
)
|
|
524
552
|
|
|
525
553
|
def predict(self, df: pd.DataFrame) -> np.ndarray:
|
|
@@ -537,6 +565,35 @@ class SavedModelPredictor:
|
|
|
537
565
|
)
|
|
538
566
|
|
|
539
567
|
|
|
568
|
+
def _default_loader(spec: ModelSpec) -> SavedModelPredictor:
|
|
569
|
+
return SavedModelPredictor(
|
|
570
|
+
model_key=spec.model_key,
|
|
571
|
+
model_name=spec.model_name,
|
|
572
|
+
task_type=spec.task_type,
|
|
573
|
+
cfg=spec.cfg,
|
|
574
|
+
output_dir=spec.output_dir,
|
|
575
|
+
artifacts=spec.artifacts,
|
|
576
|
+
device=spec.device,
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
_DEFAULT_REGISTRY.register("*", _default_loader)
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def register_model_loader(
|
|
584
|
+
model_key: str,
|
|
585
|
+
loader: ModelLoader,
|
|
586
|
+
*,
|
|
587
|
+
overwrite: bool = False,
|
|
588
|
+
registry: Optional[PredictorRegistry] = None,
|
|
589
|
+
) -> None:
|
|
590
|
+
(registry or _DEFAULT_REGISTRY).register(model_key, loader, overwrite=overwrite)
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def load_predictor(spec: ModelSpec, *, registry: Optional[PredictorRegistry] = None) -> Predictor:
|
|
594
|
+
return (registry or _DEFAULT_REGISTRY).load(spec)
|
|
595
|
+
|
|
596
|
+
|
|
540
597
|
def load_predictor_from_config(
|
|
541
598
|
config_path: str | Path,
|
|
542
599
|
model_key: str,
|
|
@@ -544,7 +601,9 @@ def load_predictor_from_config(
|
|
|
544
601
|
model_name: Optional[str] = None,
|
|
545
602
|
output_dir: Optional[str | Path] = None,
|
|
546
603
|
preprocess_artifact_path: Optional[str | Path] = None,
|
|
547
|
-
|
|
604
|
+
device: Optional[Any] = None,
|
|
605
|
+
registry: Optional[PredictorRegistry] = None,
|
|
606
|
+
) -> Predictor:
|
|
548
607
|
config_path = Path(config_path).resolve()
|
|
549
608
|
cfg = _load_json(config_path)
|
|
550
609
|
base_dir = config_path.parent
|
|
@@ -589,15 +648,17 @@ def load_predictor_from_config(
|
|
|
589
648
|
resolved_output, model_name, model_key
|
|
590
649
|
)
|
|
591
650
|
|
|
592
|
-
|
|
651
|
+
device = _normalize_device(device)
|
|
652
|
+
spec = ModelSpec(
|
|
593
653
|
model_key=model_key,
|
|
594
654
|
model_name=model_name,
|
|
595
655
|
task_type=str(cfg.get("task_type", "regression")),
|
|
596
656
|
cfg=cfg,
|
|
597
657
|
output_dir=resolved_output,
|
|
598
658
|
artifacts=artifacts,
|
|
659
|
+
device=device,
|
|
599
660
|
)
|
|
600
|
-
return
|
|
661
|
+
return load_predictor(spec, registry=registry)
|
|
601
662
|
|
|
602
663
|
|
|
603
664
|
def predict_from_config(
|
|
@@ -612,6 +673,8 @@ def predict_from_config(
|
|
|
612
673
|
chunksize: Optional[int] = None,
|
|
613
674
|
parallel_load: bool = False,
|
|
614
675
|
n_jobs: int = -1,
|
|
676
|
+
device: Optional[Any] = None,
|
|
677
|
+
registry: Optional[PredictorRegistry] = None,
|
|
615
678
|
) -> pd.DataFrame:
|
|
616
679
|
"""Predict from multiple models with optional parallel loading.
|
|
617
680
|
|
|
@@ -626,12 +689,14 @@ def predict_from_config(
|
|
|
626
689
|
chunksize: Optional chunk size for CSV reading
|
|
627
690
|
parallel_load: If True, load models in parallel (faster for multiple models)
|
|
628
691
|
n_jobs: Number of parallel jobs for model loading (-1 = all cores)
|
|
692
|
+
device: Optional torch device or string override (e.g., "cuda", "mps", "cpu")
|
|
693
|
+
registry: Optional predictor registry override
|
|
629
694
|
|
|
630
695
|
Returns:
|
|
631
696
|
DataFrame with predictions from all models
|
|
632
697
|
"""
|
|
633
698
|
input_path = Path(input_path).resolve()
|
|
634
|
-
data =
|
|
699
|
+
data = load_dataset(input_path, data_format="auto", low_memory=False, chunksize=chunksize)
|
|
635
700
|
|
|
636
701
|
result = data.copy()
|
|
637
702
|
|
|
@@ -644,6 +709,8 @@ def predict_from_config(
|
|
|
644
709
|
config_path,
|
|
645
710
|
key,
|
|
646
711
|
model_name=model_name,
|
|
712
|
+
device=device,
|
|
713
|
+
registry=registry,
|
|
647
714
|
)
|
|
648
715
|
output_col = f"{output_col_prefix}{key}"
|
|
649
716
|
scored = batch_score(
|
|
@@ -668,6 +735,8 @@ def predict_from_config(
|
|
|
668
735
|
config_path,
|
|
669
736
|
key,
|
|
670
737
|
model_name=model_name,
|
|
738
|
+
device=device,
|
|
739
|
+
registry=registry,
|
|
671
740
|
)
|
|
672
741
|
output_col = f"{output_col_prefix}{key}"
|
|
673
742
|
scored = batch_score(
|
|
@@ -691,3 +760,17 @@ def predict_from_config(
|
|
|
691
760
|
result.to_csv(output_path, index=False)
|
|
692
761
|
|
|
693
762
|
return result
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
__all__ = [
|
|
766
|
+
"Predictor",
|
|
767
|
+
"ModelSpec",
|
|
768
|
+
"PredictorRegistry",
|
|
769
|
+
"register_model_loader",
|
|
770
|
+
"load_predictor",
|
|
771
|
+
"SavedModelPredictor",
|
|
772
|
+
"load_best_params",
|
|
773
|
+
"load_saved_model",
|
|
774
|
+
"load_predictor_from_config",
|
|
775
|
+
"predict_from_config",
|
|
776
|
+
]
|
|
@@ -5,6 +5,8 @@ from typing import Dict, Iterable, Optional
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
+
from ins_pricing.utils.metrics import mae, mape, r2_score, rmse
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
def _safe_div(numer: float, denom: float, default: float = 0.0) -> float:
|
|
10
12
|
if denom == 0:
|
|
@@ -18,27 +20,12 @@ def regression_metrics(
|
|
|
18
20
|
*,
|
|
19
21
|
weight: Optional[np.ndarray] = None,
|
|
20
22
|
) -> Dict[str, float]:
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
err = y_true - y_pred
|
|
28
|
-
if weight is None:
|
|
29
|
-
mse = float(np.mean(err ** 2))
|
|
30
|
-
mae = float(np.mean(np.abs(err)))
|
|
31
|
-
else:
|
|
32
|
-
w_sum = float(np.sum(weight))
|
|
33
|
-
mse = float(np.sum(weight * (err ** 2)) / max(w_sum, 1.0))
|
|
34
|
-
mae = float(np.sum(weight * np.abs(err)) / max(w_sum, 1.0))
|
|
35
|
-
rmse = float(np.sqrt(mse))
|
|
36
|
-
denom = float(np.mean(y_true)) if np.mean(y_true) != 0 else 1.0
|
|
37
|
-
mape = float(np.mean(np.abs(err) / np.clip(np.abs(y_true), 1e-9, None)))
|
|
38
|
-
ss_tot = float(np.sum((y_true - np.mean(y_true)) ** 2))
|
|
39
|
-
ss_res = float(np.sum(err ** 2))
|
|
40
|
-
r2 = 1.0 - _safe_div(ss_res, ss_tot, default=0.0)
|
|
41
|
-
return {"rmse": rmse, "mae": mae, "mape": mape, "r2": r2}
|
|
23
|
+
return {
|
|
24
|
+
"rmse": rmse(y_true, y_pred, sample_weight=weight),
|
|
25
|
+
"mae": mae(y_true, y_pred, sample_weight=weight),
|
|
26
|
+
"mape": mape(y_true, y_pred, sample_weight=weight),
|
|
27
|
+
"r2": r2_score(y_true, y_pred, sample_weight=weight),
|
|
28
|
+
}
|
|
42
29
|
|
|
43
30
|
|
|
44
31
|
def loss_ratio(
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from .report_builder import ReportPayload, build_report, write_report
|
|
4
|
-
from .scheduler import schedule_daily
|
|
5
|
-
|
|
6
|
-
__all__ = [
|
|
7
|
-
"ReportPayload",
|
|
8
|
-
"build_report",
|
|
9
|
-
"write_report",
|
|
10
|
-
"schedule_daily",
|
|
11
|
-
]
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from ins_pricing.reporting.report_builder import ReportPayload, build_report, write_report
|
|
4
|
+
from ins_pricing.reporting.scheduler import schedule_daily
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"ReportPayload",
|
|
8
|
+
"build_report",
|
|
9
|
+
"write_report",
|
|
10
|
+
"schedule_daily",
|
|
11
|
+
]
|
ins_pricing/setup.py
CHANGED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Tests for production inference module (lightweight API checks)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pytest
|
|
10
|
+
|
|
11
|
+
pytest.importorskip("ins_pricing.production.inference", reason="inference module not available")
|
|
12
|
+
|
|
13
|
+
from ins_pricing.production.inference import (
|
|
14
|
+
ModelSpec,
|
|
15
|
+
Predictor,
|
|
16
|
+
PredictorRegistry,
|
|
17
|
+
load_predictor,
|
|
18
|
+
register_model_loader,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class _DummyPredictor(Predictor):
|
|
24
|
+
value: float = 1.0
|
|
25
|
+
|
|
26
|
+
def predict(self, df): # type: ignore[override]
|
|
27
|
+
return np.full(len(df), self.value)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_registry_loads_custom_predictor():
|
|
31
|
+
registry = PredictorRegistry()
|
|
32
|
+
captured = {}
|
|
33
|
+
|
|
34
|
+
def _loader(spec: ModelSpec) -> Predictor:
|
|
35
|
+
captured["spec"] = spec
|
|
36
|
+
return _DummyPredictor(value=3.0)
|
|
37
|
+
|
|
38
|
+
register_model_loader("xgb", _loader, registry=registry)
|
|
39
|
+
spec = ModelSpec(
|
|
40
|
+
model_key="xgb",
|
|
41
|
+
model_name="demo",
|
|
42
|
+
task_type="regression",
|
|
43
|
+
cfg={},
|
|
44
|
+
output_dir=Path("."),
|
|
45
|
+
artifacts=None,
|
|
46
|
+
)
|
|
47
|
+
predictor = load_predictor(spec, registry=registry)
|
|
48
|
+
assert isinstance(predictor, _DummyPredictor)
|
|
49
|
+
assert captured["spec"] is spec
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_registry_missing_key_raises():
|
|
53
|
+
registry = PredictorRegistry()
|
|
54
|
+
spec = ModelSpec(
|
|
55
|
+
model_key="glm",
|
|
56
|
+
model_name="demo",
|
|
57
|
+
task_type="regression",
|
|
58
|
+
cfg={},
|
|
59
|
+
output_dir=Path("."),
|
|
60
|
+
artifacts=None,
|
|
61
|
+
)
|
|
62
|
+
with pytest.raises(KeyError):
|
|
63
|
+
load_predictor(spec, registry=registry)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_register_overwrite_controls():
|
|
67
|
+
registry = PredictorRegistry()
|
|
68
|
+
|
|
69
|
+
def _loader_one(spec: ModelSpec) -> Predictor:
|
|
70
|
+
return _DummyPredictor(value=1.0)
|
|
71
|
+
|
|
72
|
+
def _loader_two(spec: ModelSpec) -> Predictor:
|
|
73
|
+
return _DummyPredictor(value=2.0)
|
|
74
|
+
|
|
75
|
+
register_model_loader("ft", _loader_one, registry=registry)
|
|
76
|
+
with pytest.raises(ValueError):
|
|
77
|
+
register_model_loader("ft", _loader_two, registry=registry, overwrite=False)
|
|
78
|
+
|
|
79
|
+
register_model_loader("ft", _loader_two, registry=registry, overwrite=True)
|
|
80
|
+
spec = ModelSpec(
|
|
81
|
+
model_key="ft",
|
|
82
|
+
model_name="demo",
|
|
83
|
+
task_type="regression",
|
|
84
|
+
cfg={},
|
|
85
|
+
output_dir=Path("."),
|
|
86
|
+
artifacts=None,
|
|
87
|
+
)
|
|
88
|
+
predictor = load_predictor(spec, registry=registry)
|
|
89
|
+
assert isinstance(predictor, _DummyPredictor)
|
|
90
|
+
assert predictor.value == 2.0
|