ins-pricing 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +60 -0
- ins_pricing/__init__.py +102 -0
- ins_pricing/governance/README.md +18 -0
- ins_pricing/governance/__init__.py +20 -0
- ins_pricing/governance/approval.py +93 -0
- ins_pricing/governance/audit.py +37 -0
- ins_pricing/governance/registry.py +99 -0
- ins_pricing/governance/release.py +159 -0
- ins_pricing/modelling/BayesOpt.py +146 -0
- ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
- ins_pricing/modelling/BayesOpt_entry.py +575 -0
- ins_pricing/modelling/BayesOpt_incremental.py +731 -0
- ins_pricing/modelling/Explain_Run.py +36 -0
- ins_pricing/modelling/Explain_entry.py +539 -0
- ins_pricing/modelling/Pricing_Run.py +36 -0
- ins_pricing/modelling/README.md +33 -0
- ins_pricing/modelling/__init__.py +44 -0
- ins_pricing/modelling/bayesopt/__init__.py +98 -0
- ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
- ins_pricing/modelling/bayesopt/core.py +1476 -0
- ins_pricing/modelling/bayesopt/models.py +2196 -0
- ins_pricing/modelling/bayesopt/trainers.py +2446 -0
- ins_pricing/modelling/bayesopt/utils.py +1021 -0
- ins_pricing/modelling/cli_common.py +136 -0
- ins_pricing/modelling/explain/__init__.py +55 -0
- ins_pricing/modelling/explain/gradients.py +334 -0
- ins_pricing/modelling/explain/metrics.py +176 -0
- ins_pricing/modelling/explain/permutation.py +155 -0
- ins_pricing/modelling/explain/shap_utils.py +146 -0
- ins_pricing/modelling/notebook_utils.py +284 -0
- ins_pricing/modelling/plotting/__init__.py +45 -0
- ins_pricing/modelling/plotting/common.py +63 -0
- ins_pricing/modelling/plotting/curves.py +572 -0
- ins_pricing/modelling/plotting/diagnostics.py +139 -0
- ins_pricing/modelling/plotting/geo.py +362 -0
- ins_pricing/modelling/plotting/importance.py +121 -0
- ins_pricing/modelling/run_logging.py +133 -0
- ins_pricing/modelling/tests/conftest.py +8 -0
- ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing/modelling/tests/test_explain.py +56 -0
- ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing/modelling/tests/test_plotting.py +63 -0
- ins_pricing/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing/modelling/watchdog_run.py +211 -0
- ins_pricing/pricing/README.md +44 -0
- ins_pricing/pricing/__init__.py +27 -0
- ins_pricing/pricing/calibration.py +39 -0
- ins_pricing/pricing/data_quality.py +117 -0
- ins_pricing/pricing/exposure.py +85 -0
- ins_pricing/pricing/factors.py +91 -0
- ins_pricing/pricing/monitoring.py +99 -0
- ins_pricing/pricing/rate_table.py +78 -0
- ins_pricing/production/__init__.py +21 -0
- ins_pricing/production/drift.py +30 -0
- ins_pricing/production/monitoring.py +143 -0
- ins_pricing/production/scoring.py +40 -0
- ins_pricing/reporting/README.md +20 -0
- ins_pricing/reporting/__init__.py +11 -0
- ins_pricing/reporting/report_builder.py +72 -0
- ins_pricing/reporting/scheduler.py +45 -0
- ins_pricing/setup.py +41 -0
- ins_pricing v2/__init__.py +23 -0
- ins_pricing v2/governance/__init__.py +20 -0
- ins_pricing v2/governance/approval.py +93 -0
- ins_pricing v2/governance/audit.py +37 -0
- ins_pricing v2/governance/registry.py +99 -0
- ins_pricing v2/governance/release.py +159 -0
- ins_pricing v2/modelling/Explain_Run.py +36 -0
- ins_pricing v2/modelling/Pricing_Run.py +36 -0
- ins_pricing v2/modelling/__init__.py +151 -0
- ins_pricing v2/modelling/cli_common.py +141 -0
- ins_pricing v2/modelling/config.py +249 -0
- ins_pricing v2/modelling/config_preprocess.py +254 -0
- ins_pricing v2/modelling/core.py +741 -0
- ins_pricing v2/modelling/data_container.py +42 -0
- ins_pricing v2/modelling/explain/__init__.py +55 -0
- ins_pricing v2/modelling/explain/gradients.py +334 -0
- ins_pricing v2/modelling/explain/metrics.py +176 -0
- ins_pricing v2/modelling/explain/permutation.py +155 -0
- ins_pricing v2/modelling/explain/shap_utils.py +146 -0
- ins_pricing v2/modelling/features.py +215 -0
- ins_pricing v2/modelling/model_manager.py +148 -0
- ins_pricing v2/modelling/model_plotting.py +463 -0
- ins_pricing v2/modelling/models.py +2203 -0
- ins_pricing v2/modelling/notebook_utils.py +294 -0
- ins_pricing v2/modelling/plotting/__init__.py +45 -0
- ins_pricing v2/modelling/plotting/common.py +63 -0
- ins_pricing v2/modelling/plotting/curves.py +572 -0
- ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
- ins_pricing v2/modelling/plotting/geo.py +362 -0
- ins_pricing v2/modelling/plotting/importance.py +121 -0
- ins_pricing v2/modelling/run_logging.py +133 -0
- ins_pricing v2/modelling/tests/conftest.py +8 -0
- ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing v2/modelling/tests/test_explain.py +56 -0
- ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing v2/modelling/tests/test_plotting.py +63 -0
- ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing v2/modelling/trainers.py +2447 -0
- ins_pricing v2/modelling/utils.py +1020 -0
- ins_pricing v2/modelling/watchdog_run.py +211 -0
- ins_pricing v2/pricing/__init__.py +27 -0
- ins_pricing v2/pricing/calibration.py +39 -0
- ins_pricing v2/pricing/data_quality.py +117 -0
- ins_pricing v2/pricing/exposure.py +85 -0
- ins_pricing v2/pricing/factors.py +91 -0
- ins_pricing v2/pricing/monitoring.py +99 -0
- ins_pricing v2/pricing/rate_table.py +78 -0
- ins_pricing v2/production/__init__.py +21 -0
- ins_pricing v2/production/drift.py +30 -0
- ins_pricing v2/production/monitoring.py +143 -0
- ins_pricing v2/production/scoring.py +40 -0
- ins_pricing v2/reporting/__init__.py +11 -0
- ins_pricing v2/reporting/report_builder.py +72 -0
- ins_pricing v2/reporting/scheduler.py +45 -0
- ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
- ins_pricing v2/scripts/Explain_entry.py +545 -0
- ins_pricing v2/scripts/__init__.py +1 -0
- ins_pricing v2/scripts/train.py +568 -0
- ins_pricing v2/setup.py +55 -0
- ins_pricing v2/smoke_test.py +28 -0
- ins_pricing-0.1.6.dist-info/METADATA +78 -0
- ins_pricing-0.1.6.dist-info/RECORD +169 -0
- ins_pricing-0.1.6.dist-info/WHEEL +5 -0
- ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
- user_packages/__init__.py +105 -0
- user_packages legacy/BayesOpt.py +5659 -0
- user_packages legacy/BayesOpt_entry.py +513 -0
- user_packages legacy/BayesOpt_incremental.py +685 -0
- user_packages legacy/Pricing_Run.py +36 -0
- user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
- user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
- user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
- user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
- user_packages legacy/Try/BayesOpt legacy.py +3280 -0
- user_packages legacy/Try/BayesOpt.py +838 -0
- user_packages legacy/Try/BayesOptAll.py +1569 -0
- user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
- user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
- user_packages legacy/Try/BayesOptSearch.py +830 -0
- user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
- user_packages legacy/Try/BayesOptV1.py +1911 -0
- user_packages legacy/Try/BayesOptV10.py +2973 -0
- user_packages legacy/Try/BayesOptV11.py +3001 -0
- user_packages legacy/Try/BayesOptV12.py +3001 -0
- user_packages legacy/Try/BayesOptV2.py +2065 -0
- user_packages legacy/Try/BayesOptV3.py +2209 -0
- user_packages legacy/Try/BayesOptV4.py +2342 -0
- user_packages legacy/Try/BayesOptV5.py +2372 -0
- user_packages legacy/Try/BayesOptV6.py +2759 -0
- user_packages legacy/Try/BayesOptV7.py +2832 -0
- user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
- user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
- user_packages legacy/Try/BayesOptV9.py +2927 -0
- user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
- user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
- user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
- user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
- user_packages legacy/Try/xgbbayesopt.py +523 -0
- user_packages legacy/__init__.py +19 -0
- user_packages legacy/cli_common.py +124 -0
- user_packages legacy/notebook_utils.py +228 -0
- user_packages legacy/watchdog_run.py +202 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import asdict, dataclass, field
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from .audit import AuditLogger
|
|
10
|
+
from .registry import ModelRegistry
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ModelRef:
|
|
15
|
+
name: str
|
|
16
|
+
version: str
|
|
17
|
+
activated_at: str
|
|
18
|
+
actor: Optional[str] = None
|
|
19
|
+
note: Optional[str] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class DeploymentState:
|
|
24
|
+
env: str
|
|
25
|
+
active: Optional[ModelRef] = None
|
|
26
|
+
history: List[ModelRef] = field(default_factory=list)
|
|
27
|
+
updated_at: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ReleaseManager:
|
|
31
|
+
"""Environment release manager with rollback support."""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
state_dir: str | Path,
|
|
36
|
+
*,
|
|
37
|
+
registry: Optional[ModelRegistry] = None,
|
|
38
|
+
audit_logger: Optional[AuditLogger] = None,
|
|
39
|
+
):
|
|
40
|
+
self.state_dir = Path(state_dir)
|
|
41
|
+
self.state_dir.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
self.registry = registry
|
|
43
|
+
self.audit_logger = audit_logger
|
|
44
|
+
|
|
45
|
+
def _state_path(self, env: str) -> Path:
|
|
46
|
+
return self.state_dir / f"{env}.json"
|
|
47
|
+
|
|
48
|
+
def _load(self, env: str) -> DeploymentState:
|
|
49
|
+
path = self._state_path(env)
|
|
50
|
+
if not path.exists():
|
|
51
|
+
return DeploymentState(env=env)
|
|
52
|
+
with path.open("r", encoding="utf-8") as fh:
|
|
53
|
+
payload = json.load(fh)
|
|
54
|
+
active = payload.get("active")
|
|
55
|
+
history = payload.get("history", [])
|
|
56
|
+
return DeploymentState(
|
|
57
|
+
env=payload.get("env", env),
|
|
58
|
+
active=ModelRef(**active) if active else None,
|
|
59
|
+
history=[ModelRef(**item) for item in history],
|
|
60
|
+
updated_at=payload.get("updated_at"),
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def _save(self, state: DeploymentState) -> None:
|
|
64
|
+
payload = {
|
|
65
|
+
"env": state.env,
|
|
66
|
+
"active": asdict(state.active) if state.active else None,
|
|
67
|
+
"history": [asdict(item) for item in state.history],
|
|
68
|
+
"updated_at": state.updated_at,
|
|
69
|
+
}
|
|
70
|
+
path = self._state_path(state.env)
|
|
71
|
+
with path.open("w", encoding="utf-8") as fh:
|
|
72
|
+
json.dump(payload, fh, indent=2, ensure_ascii=True)
|
|
73
|
+
|
|
74
|
+
def get_active(self, env: str) -> Optional[ModelRef]:
|
|
75
|
+
state = self._load(env)
|
|
76
|
+
return state.active
|
|
77
|
+
|
|
78
|
+
def list_history(self, env: str) -> List[ModelRef]:
|
|
79
|
+
return self._load(env).history
|
|
80
|
+
|
|
81
|
+
def deploy(
|
|
82
|
+
self,
|
|
83
|
+
env: str,
|
|
84
|
+
name: str,
|
|
85
|
+
version: str,
|
|
86
|
+
*,
|
|
87
|
+
actor: Optional[str] = None,
|
|
88
|
+
note: Optional[str] = None,
|
|
89
|
+
update_registry_status: bool = True,
|
|
90
|
+
registry_status: str = "production",
|
|
91
|
+
) -> DeploymentState:
|
|
92
|
+
state = self._load(env)
|
|
93
|
+
if state.active and state.active.name == name and state.active.version == version:
|
|
94
|
+
return state
|
|
95
|
+
|
|
96
|
+
if state.active is not None:
|
|
97
|
+
state.history.append(state.active)
|
|
98
|
+
|
|
99
|
+
now = datetime.utcnow().isoformat()
|
|
100
|
+
state.active = ModelRef(
|
|
101
|
+
name=name,
|
|
102
|
+
version=version,
|
|
103
|
+
activated_at=now,
|
|
104
|
+
actor=actor,
|
|
105
|
+
note=note,
|
|
106
|
+
)
|
|
107
|
+
state.updated_at = now
|
|
108
|
+
self._save(state)
|
|
109
|
+
|
|
110
|
+
if self.registry and update_registry_status:
|
|
111
|
+
self.registry.promote(name, version, new_status=registry_status)
|
|
112
|
+
|
|
113
|
+
if self.audit_logger:
|
|
114
|
+
self.audit_logger.log(
|
|
115
|
+
"deploy",
|
|
116
|
+
actor or "unknown",
|
|
117
|
+
metadata={"env": env, "name": name, "version": version},
|
|
118
|
+
note=note,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return state
|
|
122
|
+
|
|
123
|
+
def rollback(
|
|
124
|
+
self,
|
|
125
|
+
env: str,
|
|
126
|
+
*,
|
|
127
|
+
actor: Optional[str] = None,
|
|
128
|
+
note: Optional[str] = None,
|
|
129
|
+
update_registry_status: bool = False,
|
|
130
|
+
registry_status: str = "production",
|
|
131
|
+
) -> DeploymentState:
|
|
132
|
+
state = self._load(env)
|
|
133
|
+
if not state.history:
|
|
134
|
+
raise ValueError("No history available to rollback.")
|
|
135
|
+
|
|
136
|
+
previous = state.history.pop()
|
|
137
|
+
now = datetime.utcnow().isoformat()
|
|
138
|
+
state.active = ModelRef(
|
|
139
|
+
name=previous.name,
|
|
140
|
+
version=previous.version,
|
|
141
|
+
activated_at=now,
|
|
142
|
+
actor=actor or previous.actor,
|
|
143
|
+
note=note or previous.note,
|
|
144
|
+
)
|
|
145
|
+
state.updated_at = now
|
|
146
|
+
self._save(state)
|
|
147
|
+
|
|
148
|
+
if self.registry and update_registry_status:
|
|
149
|
+
self.registry.promote(previous.name, previous.version, new_status=registry_status)
|
|
150
|
+
|
|
151
|
+
if self.audit_logger:
|
|
152
|
+
self.audit_logger.log(
|
|
153
|
+
"rollback",
|
|
154
|
+
actor or "unknown",
|
|
155
|
+
metadata={"env": env, "name": previous.name, "version": previous.version},
|
|
156
|
+
note=note,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return state
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from .notebook_utils import run_from_config # type: ignore
|
|
9
|
+
except Exception: # pragma: no cover
|
|
10
|
+
from notebook_utils import run_from_config # type: ignore
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def run(config_json: str | Path) -> None:
|
|
14
|
+
"""Run explain by config.json (runner.mode=explain)."""
|
|
15
|
+
run_from_config(config_json)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
19
|
+
parser = argparse.ArgumentParser(
|
|
20
|
+
description="Explain_Run: run explain by config.json (runner.mode=explain)."
|
|
21
|
+
)
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"--config-json",
|
|
24
|
+
required=True,
|
|
25
|
+
help="Path to config.json (relative paths are resolved from ins_pricing/modelling/ when possible).",
|
|
26
|
+
)
|
|
27
|
+
return parser
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def main(argv: Optional[list[str]] = None) -> None:
|
|
31
|
+
args = _build_parser().parse_args(argv)
|
|
32
|
+
run(args.config_json)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
if __name__ == "__main__":
|
|
36
|
+
main()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from .notebook_utils import run_from_config # type: ignore
|
|
9
|
+
except Exception: # pragma: no cover
|
|
10
|
+
from notebook_utils import run_from_config # type: ignore
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def run(config_json: str | Path) -> None:
|
|
14
|
+
"""Unified entry point: run entry/incremental/watchdog/DDP based on config.json runner."""
|
|
15
|
+
run_from_config(config_json)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
19
|
+
parser = argparse.ArgumentParser(
|
|
20
|
+
description="Pricing_Run: run BayesOpt by config.json (entry/incremental/watchdog/DDP)."
|
|
21
|
+
)
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"--config-json",
|
|
24
|
+
required=True,
|
|
25
|
+
help="Path to config.json (relative paths are resolved from ins_pricing/modelling/ when possible).",
|
|
26
|
+
)
|
|
27
|
+
return parser
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def main(argv: Optional[list[str]] = None) -> None:
|
|
31
|
+
args = _build_parser().parse_args(argv)
|
|
32
|
+
run(args.config_json)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
if __name__ == "__main__":
|
|
36
|
+
main()
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""Modelling subpackage for ins_pricing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import types
|
|
5
|
+
import sys
|
|
6
|
+
from importlib import import_module
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
# Exports
|
|
10
|
+
from .config import BayesOptConfig
|
|
11
|
+
from .config_preprocess import (
|
|
12
|
+
DatasetPreprocessor,
|
|
13
|
+
OutputManager,
|
|
14
|
+
VersionManager,
|
|
15
|
+
)
|
|
16
|
+
from .core import BayesOptModel
|
|
17
|
+
from .models import (
|
|
18
|
+
FeatureTokenizer,
|
|
19
|
+
FTTransformerCore,
|
|
20
|
+
FTTransformerSklearn,
|
|
21
|
+
GraphNeuralNetSklearn,
|
|
22
|
+
MaskedTabularDataset,
|
|
23
|
+
ResBlock,
|
|
24
|
+
ResNetSequential,
|
|
25
|
+
ResNetSklearn,
|
|
26
|
+
ScaledTransformerEncoderLayer,
|
|
27
|
+
SimpleGraphLayer,
|
|
28
|
+
SimpleGNN,
|
|
29
|
+
TabularDataset,
|
|
30
|
+
)
|
|
31
|
+
from .trainers import (
|
|
32
|
+
FTTrainer,
|
|
33
|
+
GLMTrainer,
|
|
34
|
+
GNNTrainer,
|
|
35
|
+
ResNetTrainer,
|
|
36
|
+
TrainerBase,
|
|
37
|
+
XGBTrainer,
|
|
38
|
+
_xgb_cuda_available,
|
|
39
|
+
)
|
|
40
|
+
from .utils import (
|
|
41
|
+
EPS,
|
|
42
|
+
DistributedUtils,
|
|
43
|
+
IOUtils,
|
|
44
|
+
PlotUtils,
|
|
45
|
+
TorchTrainerMixin,
|
|
46
|
+
TrainingUtils,
|
|
47
|
+
compute_batch_size,
|
|
48
|
+
csv_to_dict,
|
|
49
|
+
ensure_parent_dir,
|
|
50
|
+
free_cuda,
|
|
51
|
+
infer_factor_and_cate_list,
|
|
52
|
+
plot_dlift_list,
|
|
53
|
+
plot_lift_list,
|
|
54
|
+
set_global_seed,
|
|
55
|
+
split_data,
|
|
56
|
+
tweedie_loss,
|
|
57
|
+
)
|
|
58
|
+
try:
|
|
59
|
+
import torch
|
|
60
|
+
except ImportError:
|
|
61
|
+
torch = None
|
|
62
|
+
|
|
63
|
+
# Lazy submodules
|
|
64
|
+
_LAZY_SUBMODULES = {
|
|
65
|
+
"plotting": "ins_pricing.modelling.plotting",
|
|
66
|
+
"explain": "ins_pricing.modelling.explain",
|
|
67
|
+
"cli_common": "ins_pricing.modelling.cli_common",
|
|
68
|
+
"notebook_utils": "ins_pricing.modelling.notebook_utils",
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
_PACKAGE_PATHS = {
|
|
72
|
+
"plotting": Path(__file__).resolve().parent / "plotting",
|
|
73
|
+
"explain": Path(__file__).resolve().parent / "explain",
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
__all__ = [
|
|
77
|
+
"BayesOptConfig",
|
|
78
|
+
"DatasetPreprocessor",
|
|
79
|
+
"OutputManager",
|
|
80
|
+
"VersionManager",
|
|
81
|
+
"BayesOptModel",
|
|
82
|
+
"FeatureTokenizer",
|
|
83
|
+
"FTTransformerCore",
|
|
84
|
+
"FTTransformerSklearn",
|
|
85
|
+
"GraphNeuralNetSklearn",
|
|
86
|
+
"MaskedTabularDataset",
|
|
87
|
+
"ResBlock",
|
|
88
|
+
"ResNetSequential",
|
|
89
|
+
"ResNetSklearn",
|
|
90
|
+
"ScaledTransformerEncoderLayer",
|
|
91
|
+
"SimpleGraphLayer",
|
|
92
|
+
"SimpleGNN",
|
|
93
|
+
"TabularDataset",
|
|
94
|
+
"FTTrainer",
|
|
95
|
+
"GLMTrainer",
|
|
96
|
+
"GNNTrainer",
|
|
97
|
+
"ResNetTrainer",
|
|
98
|
+
"TrainerBase",
|
|
99
|
+
"XGBTrainer",
|
|
100
|
+
"_xgb_cuda_available",
|
|
101
|
+
"EPS",
|
|
102
|
+
"DistributedUtils",
|
|
103
|
+
"IOUtils",
|
|
104
|
+
"PlotUtils",
|
|
105
|
+
"TorchTrainerMixin",
|
|
106
|
+
"TrainingUtils",
|
|
107
|
+
"compute_batch_size",
|
|
108
|
+
"csv_to_dict",
|
|
109
|
+
"ensure_parent_dir",
|
|
110
|
+
"free_cuda",
|
|
111
|
+
"infer_factor_and_cate_list",
|
|
112
|
+
"plot_dlift_list",
|
|
113
|
+
"plot_lift_list",
|
|
114
|
+
"set_global_seed",
|
|
115
|
+
"split_data",
|
|
116
|
+
"tweedie_loss",
|
|
117
|
+
"torch",
|
|
118
|
+
] + sorted(list(_LAZY_SUBMODULES.keys()))
|
|
119
|
+
|
|
120
|
+
def _lazy_module(name: str, target: str, package_path: Path | None = None) -> types.ModuleType:
|
|
121
|
+
proxy = types.ModuleType(name)
|
|
122
|
+
if package_path is not None:
|
|
123
|
+
proxy.__path__ = [str(package_path)]
|
|
124
|
+
|
|
125
|
+
def _load():
|
|
126
|
+
module = import_module(target)
|
|
127
|
+
sys.modules[name] = module
|
|
128
|
+
return module
|
|
129
|
+
|
|
130
|
+
def __getattr__(attr: str):
|
|
131
|
+
module = _load()
|
|
132
|
+
return getattr(module, attr)
|
|
133
|
+
|
|
134
|
+
def __dir__() -> list[str]:
|
|
135
|
+
module = _load()
|
|
136
|
+
return sorted(set(dir(module)))
|
|
137
|
+
|
|
138
|
+
proxy.__getattr__ = __getattr__ # type: ignore[attr-defined]
|
|
139
|
+
proxy.__dir__ = __dir__ # type: ignore[attr-defined]
|
|
140
|
+
return proxy
|
|
141
|
+
|
|
142
|
+
def _install_proxy(alias: str, target: str) -> None:
|
|
143
|
+
module_name = f"{__name__}.{alias}"
|
|
144
|
+
if module_name in sys.modules:
|
|
145
|
+
return
|
|
146
|
+
proxy = _lazy_module(module_name, target, _PACKAGE_PATHS.get(alias))
|
|
147
|
+
sys.modules[module_name] = proxy
|
|
148
|
+
globals()[alias] = proxy
|
|
149
|
+
|
|
150
|
+
for _alias, _target in _LAZY_SUBMODULES.items():
|
|
151
|
+
_install_proxy(_alias, _target)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
|
|
7
|
+
try:
|
|
8
|
+
from .config import BayesOptConfig
|
|
9
|
+
except ImportError:
|
|
10
|
+
from ins_pricing.modelling.config import BayesOptConfig
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
PLOT_MODEL_LABELS: Dict[str, Tuple[str, str]] = {
|
|
15
|
+
"glm": ("GLM", "pred_glm"),
|
|
16
|
+
"xgb": ("Xgboost", "pred_xgb"),
|
|
17
|
+
"resn": ("ResNet", "pred_resn"),
|
|
18
|
+
"ft": ("FTTransformer", "pred_ft"),
|
|
19
|
+
"gnn": ("GNN", "pred_gnn"),
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
PYTORCH_TRAINERS = {"resn", "ft", "gnn"}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def dedupe_preserve_order(items: Iterable[str]) -> List[str]:
|
|
26
|
+
seen = set()
|
|
27
|
+
unique: List[str] = []
|
|
28
|
+
for item in items:
|
|
29
|
+
if item not in seen:
|
|
30
|
+
unique.append(item)
|
|
31
|
+
seen.add(item)
|
|
32
|
+
return unique
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def build_model_names(prefixes: Sequence[str], suffixes: Sequence[str]) -> List[str]:
|
|
36
|
+
names: List[str] = []
|
|
37
|
+
for suffix in suffixes:
|
|
38
|
+
names.extend(f"{prefix}_{suffix}" for prefix in prefixes)
|
|
39
|
+
return names
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def parse_model_pairs(raw_pairs: List) -> List[Tuple[str, str]]:
|
|
43
|
+
pairs: List[Tuple[str, str]] = []
|
|
44
|
+
for pair in raw_pairs:
|
|
45
|
+
if isinstance(pair, (list, tuple)) and len(pair) == 2:
|
|
46
|
+
pairs.append((str(pair[0]), str(pair[1])))
|
|
47
|
+
elif isinstance(pair, str):
|
|
48
|
+
parts = [p.strip() for p in pair.split(",") if p.strip()]
|
|
49
|
+
if len(parts) == 2:
|
|
50
|
+
pairs.append((parts[0], parts[1]))
|
|
51
|
+
return pairs
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def resolve_path(value: Optional[str], base_dir: Path) -> Optional[Path]:
|
|
55
|
+
if value is None:
|
|
56
|
+
return None
|
|
57
|
+
if not isinstance(value, str) or not value.strip():
|
|
58
|
+
return None
|
|
59
|
+
p = Path(value)
|
|
60
|
+
if p.is_absolute():
|
|
61
|
+
return p
|
|
62
|
+
return (base_dir / p).resolve()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def resolve_config_path(raw: str, script_dir: Path) -> Path:
|
|
66
|
+
candidate = Path(raw)
|
|
67
|
+
if candidate.exists():
|
|
68
|
+
return candidate.resolve()
|
|
69
|
+
candidate2 = (script_dir / raw)
|
|
70
|
+
if candidate2.exists():
|
|
71
|
+
return candidate2.resolve()
|
|
72
|
+
raise FileNotFoundError(
|
|
73
|
+
f"Config file not found: {raw}. Tried: {Path(raw).resolve()} and {candidate2.resolve()}"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def load_config_json(path: Path, required_keys: Sequence[str]) -> BayesOptConfig:
|
|
78
|
+
cfg_dict = json.loads(path.read_text(encoding="utf-8"))
|
|
79
|
+
missing = [key for key in required_keys if key not in cfg_dict]
|
|
80
|
+
if missing:
|
|
81
|
+
raise ValueError(f"Missing required keys in {path}: {missing}")
|
|
82
|
+
return BayesOptConfig.from_legacy_dict(cfg_dict)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def set_env(env_overrides: Dict[str, Any]) -> None:
|
|
86
|
+
"""Apply environment variables from config.json.
|
|
87
|
+
|
|
88
|
+
Notes (DDP/Optuna hang debugging):
|
|
89
|
+
- You can add these keys into config.json's `env` to debug distributed hangs:
|
|
90
|
+
- `TORCH_DISTRIBUTED_DEBUG=DETAIL`
|
|
91
|
+
- `NCCL_DEBUG=INFO`
|
|
92
|
+
- `BAYESOPT_DDP_BARRIER_DEBUG=1`
|
|
93
|
+
- `BAYESOPT_DDP_BARRIER_TIMEOUT=300`
|
|
94
|
+
- `BAYESOPT_CUDA_SYNC=1` (optional; can slow down)
|
|
95
|
+
- `BAYESOPT_CUDA_IPC_COLLECT=1` (optional; can slow down)
|
|
96
|
+
- This function uses `os.environ.setdefault`, so a value already set in the
|
|
97
|
+
shell will take precedence over config.json.
|
|
98
|
+
"""
|
|
99
|
+
for key, value in (env_overrides or {}).items():
|
|
100
|
+
os.environ.setdefault(str(key), str(value))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _looks_like_url(value: str) -> bool:
|
|
104
|
+
value = str(value)
|
|
105
|
+
return "://" in value
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def normalize_config_paths(cfg: Dict[str, Any], config_path: Path) -> Dict[str, Any]:
|
|
109
|
+
"""Resolve relative paths against the config.json directory.
|
|
110
|
+
|
|
111
|
+
Fields handled:
|
|
112
|
+
- data_dir / output_dir / optuna_storage / gnn_graph_cache
|
|
113
|
+
- best_params_files (dict: model_key -> path)
|
|
114
|
+
"""
|
|
115
|
+
base_dir = config_path.parent
|
|
116
|
+
out = dict(cfg)
|
|
117
|
+
|
|
118
|
+
for key in ("data_dir", "output_dir", "gnn_graph_cache"):
|
|
119
|
+
if key in out and isinstance(out.get(key), str):
|
|
120
|
+
resolved = resolve_path(out.get(key), base_dir)
|
|
121
|
+
if resolved is not None:
|
|
122
|
+
out[key] = str(resolved)
|
|
123
|
+
|
|
124
|
+
storage = out.get("optuna_storage")
|
|
125
|
+
if isinstance(storage, str) and storage.strip():
|
|
126
|
+
if not _looks_like_url(storage):
|
|
127
|
+
resolved = resolve_path(storage, base_dir)
|
|
128
|
+
if resolved is not None:
|
|
129
|
+
out["optuna_storage"] = str(resolved)
|
|
130
|
+
|
|
131
|
+
best_files = out.get("best_params_files")
|
|
132
|
+
if isinstance(best_files, dict):
|
|
133
|
+
resolved_map: Dict[str, str] = {}
|
|
134
|
+
for mk, path_str in best_files.items():
|
|
135
|
+
if not isinstance(path_str, str):
|
|
136
|
+
continue
|
|
137
|
+
resolved = resolve_path(path_str, base_dir)
|
|
138
|
+
resolved_map[str(mk)] = str(resolved) if resolved is not None else str(path_str)
|
|
139
|
+
out["best_params_files"] = resolved_map
|
|
140
|
+
|
|
141
|
+
return out
|