ins-pricing 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +48 -22
- ins_pricing/__init__.py +142 -90
- ins_pricing/cli/BayesOpt_entry.py +58 -46
- ins_pricing/cli/BayesOpt_incremental.py +77 -110
- ins_pricing/cli/Explain_Run.py +42 -23
- ins_pricing/cli/Explain_entry.py +551 -577
- ins_pricing/cli/Pricing_Run.py +42 -23
- ins_pricing/cli/bayesopt_entry_runner.py +51 -16
- ins_pricing/cli/utils/bootstrap.py +23 -0
- ins_pricing/cli/utils/cli_common.py +256 -256
- ins_pricing/cli/utils/cli_config.py +379 -360
- ins_pricing/cli/utils/import_resolver.py +375 -358
- ins_pricing/cli/utils/notebook_utils.py +256 -242
- ins_pricing/cli/watchdog_run.py +216 -198
- ins_pricing/frontend/__init__.py +10 -10
- ins_pricing/frontend/app.py +132 -61
- ins_pricing/frontend/config_builder.py +33 -0
- ins_pricing/frontend/example_config.json +11 -0
- ins_pricing/frontend/example_workflows.py +1 -1
- ins_pricing/frontend/runner.py +340 -388
- ins_pricing/governance/__init__.py +20 -20
- ins_pricing/governance/release.py +159 -159
- ins_pricing/modelling/README.md +1 -1
- ins_pricing/modelling/__init__.py +147 -92
- ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +31 -13
- ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
- ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +12 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +589 -552
- ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +987 -958
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +488 -548
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +349 -342
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +921 -913
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +794 -785
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +454 -446
- ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1294 -1282
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +64 -56
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +203 -198
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +333 -325
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +279 -267
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +515 -313
- ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
- ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +193 -186
- ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
- ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
- ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +636 -623
- ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
- ins_pricing/modelling/explain/__init__.py +55 -55
- ins_pricing/modelling/explain/metrics.py +27 -174
- ins_pricing/modelling/explain/permutation.py +237 -237
- ins_pricing/modelling/plotting/__init__.py +40 -36
- ins_pricing/modelling/plotting/compat.py +228 -0
- ins_pricing/modelling/plotting/curves.py +572 -572
- ins_pricing/modelling/plotting/diagnostics.py +163 -163
- ins_pricing/modelling/plotting/geo.py +362 -362
- ins_pricing/modelling/plotting/importance.py +121 -121
- ins_pricing/pricing/__init__.py +27 -27
- ins_pricing/pricing/factors.py +67 -56
- ins_pricing/production/__init__.py +35 -25
- ins_pricing/production/{predict.py → inference.py} +140 -57
- ins_pricing/production/monitoring.py +8 -21
- ins_pricing/reporting/__init__.py +11 -11
- ins_pricing/setup.py +1 -1
- ins_pricing/tests/production/test_inference.py +90 -0
- ins_pricing/utils/__init__.py +112 -78
- ins_pricing/utils/device.py +258 -237
- ins_pricing/utils/features.py +53 -0
- ins_pricing/utils/io.py +72 -0
- ins_pricing/utils/logging.py +34 -1
- ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
- ins_pricing/utils/metrics.py +158 -24
- ins_pricing/utils/numerics.py +76 -0
- ins_pricing/utils/paths.py +9 -1
- ins_pricing/utils/profiling.py +8 -4
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/METADATA +1 -1
- ins_pricing-0.5.1.dist-info/RECORD +132 -0
- ins_pricing/modelling/core/BayesOpt.py +0 -146
- ins_pricing/modelling/core/__init__.py +0 -1
- ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
- ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
- ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
- ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
- ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
- ins_pricing/modelling/core/bayesopt/utils.py +0 -105
- ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
- ins_pricing/tests/production/test_predict.py +0 -233
- ins_pricing-0.4.5.dist-info/RECORD +0 -130
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/WHEEL +0 -0
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from .approval import ApprovalAction, ApprovalRequest, ApprovalStore
|
|
4
|
-
from .audit import AuditEvent, AuditLogger
|
|
5
|
-
from .registry import ModelArtifact, ModelRegistry, ModelVersion
|
|
6
|
-
from .release import DeploymentState, ModelRef, ReleaseManager
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
"ApprovalAction",
|
|
10
|
-
"ApprovalRequest",
|
|
11
|
-
"ApprovalStore",
|
|
12
|
-
"AuditEvent",
|
|
13
|
-
"AuditLogger",
|
|
14
|
-
"ModelArtifact",
|
|
15
|
-
"ModelRegistry",
|
|
16
|
-
"ModelVersion",
|
|
17
|
-
"DeploymentState",
|
|
18
|
-
"ModelRef",
|
|
19
|
-
"ReleaseManager",
|
|
20
|
-
]
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from ins_pricing.governance.approval import ApprovalAction, ApprovalRequest, ApprovalStore
|
|
4
|
+
from ins_pricing.governance.audit import AuditEvent, AuditLogger
|
|
5
|
+
from ins_pricing.governance.registry import ModelArtifact, ModelRegistry, ModelVersion
|
|
6
|
+
from ins_pricing.governance.release import DeploymentState, ModelRef, ReleaseManager
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ApprovalAction",
|
|
10
|
+
"ApprovalRequest",
|
|
11
|
+
"ApprovalStore",
|
|
12
|
+
"AuditEvent",
|
|
13
|
+
"AuditLogger",
|
|
14
|
+
"ModelArtifact",
|
|
15
|
+
"ModelRegistry",
|
|
16
|
+
"ModelVersion",
|
|
17
|
+
"DeploymentState",
|
|
18
|
+
"ModelRef",
|
|
19
|
+
"ReleaseManager",
|
|
20
|
+
]
|
|
@@ -1,159 +1,159 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import json
|
|
4
|
-
from dataclasses import asdict, dataclass, field
|
|
5
|
-
from datetime import datetime
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import List, Optional
|
|
8
|
-
|
|
9
|
-
from .audit import AuditLogger
|
|
10
|
-
from .registry import ModelRegistry
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class ModelRef:
|
|
15
|
-
name: str
|
|
16
|
-
version: str
|
|
17
|
-
activated_at: str
|
|
18
|
-
actor: Optional[str] = None
|
|
19
|
-
note: Optional[str] = None
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
@dataclass
|
|
23
|
-
class DeploymentState:
|
|
24
|
-
env: str
|
|
25
|
-
active: Optional[ModelRef] = None
|
|
26
|
-
history: List[ModelRef] = field(default_factory=list)
|
|
27
|
-
updated_at: Optional[str] = None
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class ReleaseManager:
|
|
31
|
-
"""Environment release manager with rollback support."""
|
|
32
|
-
|
|
33
|
-
def __init__(
|
|
34
|
-
self,
|
|
35
|
-
state_dir: str | Path,
|
|
36
|
-
*,
|
|
37
|
-
registry: Optional[ModelRegistry] = None,
|
|
38
|
-
audit_logger: Optional[AuditLogger] = None,
|
|
39
|
-
):
|
|
40
|
-
self.state_dir = Path(state_dir)
|
|
41
|
-
self.state_dir.mkdir(parents=True, exist_ok=True)
|
|
42
|
-
self.registry = registry
|
|
43
|
-
self.audit_logger = audit_logger
|
|
44
|
-
|
|
45
|
-
def _state_path(self, env: str) -> Path:
|
|
46
|
-
return self.state_dir / f"{env}.json"
|
|
47
|
-
|
|
48
|
-
def _load(self, env: str) -> DeploymentState:
|
|
49
|
-
path = self._state_path(env)
|
|
50
|
-
if not path.exists():
|
|
51
|
-
return DeploymentState(env=env)
|
|
52
|
-
with path.open("r", encoding="utf-8") as fh:
|
|
53
|
-
payload = json.load(fh)
|
|
54
|
-
active = payload.get("active")
|
|
55
|
-
history = payload.get("history", [])
|
|
56
|
-
return DeploymentState(
|
|
57
|
-
env=payload.get("env", env),
|
|
58
|
-
active=ModelRef(**active) if active else None,
|
|
59
|
-
history=[ModelRef(**item) for item in history],
|
|
60
|
-
updated_at=payload.get("updated_at"),
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
def _save(self, state: DeploymentState) -> None:
|
|
64
|
-
payload = {
|
|
65
|
-
"env": state.env,
|
|
66
|
-
"active": asdict(state.active) if state.active else None,
|
|
67
|
-
"history": [asdict(item) for item in state.history],
|
|
68
|
-
"updated_at": state.updated_at,
|
|
69
|
-
}
|
|
70
|
-
path = self._state_path(state.env)
|
|
71
|
-
with path.open("w", encoding="utf-8") as fh:
|
|
72
|
-
json.dump(payload, fh, indent=2, ensure_ascii=True)
|
|
73
|
-
|
|
74
|
-
def get_active(self, env: str) -> Optional[ModelRef]:
|
|
75
|
-
state = self._load(env)
|
|
76
|
-
return state.active
|
|
77
|
-
|
|
78
|
-
def list_history(self, env: str) -> List[ModelRef]:
|
|
79
|
-
return self._load(env).history
|
|
80
|
-
|
|
81
|
-
def deploy(
|
|
82
|
-
self,
|
|
83
|
-
env: str,
|
|
84
|
-
name: str,
|
|
85
|
-
version: str,
|
|
86
|
-
*,
|
|
87
|
-
actor: Optional[str] = None,
|
|
88
|
-
note: Optional[str] = None,
|
|
89
|
-
update_registry_status: bool = True,
|
|
90
|
-
registry_status: str = "production",
|
|
91
|
-
) -> DeploymentState:
|
|
92
|
-
state = self._load(env)
|
|
93
|
-
if state.active and state.active.name == name and state.active.version == version:
|
|
94
|
-
return state
|
|
95
|
-
|
|
96
|
-
if state.active is not None:
|
|
97
|
-
state.history.append(state.active)
|
|
98
|
-
|
|
99
|
-
now = datetime.utcnow().isoformat()
|
|
100
|
-
state.active = ModelRef(
|
|
101
|
-
name=name,
|
|
102
|
-
version=version,
|
|
103
|
-
activated_at=now,
|
|
104
|
-
actor=actor,
|
|
105
|
-
note=note,
|
|
106
|
-
)
|
|
107
|
-
state.updated_at = now
|
|
108
|
-
self._save(state)
|
|
109
|
-
|
|
110
|
-
if self.registry and update_registry_status:
|
|
111
|
-
self.registry.promote(name, version, new_status=registry_status)
|
|
112
|
-
|
|
113
|
-
if self.audit_logger:
|
|
114
|
-
self.audit_logger.log(
|
|
115
|
-
"deploy",
|
|
116
|
-
actor or "unknown",
|
|
117
|
-
metadata={"env": env, "name": name, "version": version},
|
|
118
|
-
note=note,
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
return state
|
|
122
|
-
|
|
123
|
-
def rollback(
|
|
124
|
-
self,
|
|
125
|
-
env: str,
|
|
126
|
-
*,
|
|
127
|
-
actor: Optional[str] = None,
|
|
128
|
-
note: Optional[str] = None,
|
|
129
|
-
update_registry_status: bool = False,
|
|
130
|
-
registry_status: str = "production",
|
|
131
|
-
) -> DeploymentState:
|
|
132
|
-
state = self._load(env)
|
|
133
|
-
if not state.history:
|
|
134
|
-
raise ValueError("No history available to rollback.")
|
|
135
|
-
|
|
136
|
-
previous = state.history.pop()
|
|
137
|
-
now = datetime.utcnow().isoformat()
|
|
138
|
-
state.active = ModelRef(
|
|
139
|
-
name=previous.name,
|
|
140
|
-
version=previous.version,
|
|
141
|
-
activated_at=now,
|
|
142
|
-
actor=actor or previous.actor,
|
|
143
|
-
note=note or previous.note,
|
|
144
|
-
)
|
|
145
|
-
state.updated_at = now
|
|
146
|
-
self._save(state)
|
|
147
|
-
|
|
148
|
-
if self.registry and update_registry_status:
|
|
149
|
-
self.registry.promote(previous.name, previous.version, new_status=registry_status)
|
|
150
|
-
|
|
151
|
-
if self.audit_logger:
|
|
152
|
-
self.audit_logger.log(
|
|
153
|
-
"rollback",
|
|
154
|
-
actor or "unknown",
|
|
155
|
-
metadata={"env": env, "name": previous.name, "version": previous.version},
|
|
156
|
-
note=note,
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
return state
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import asdict, dataclass, field
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from ins_pricing.governance.audit import AuditLogger
|
|
10
|
+
from ins_pricing.governance.registry import ModelRegistry
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ModelRef:
|
|
15
|
+
name: str
|
|
16
|
+
version: str
|
|
17
|
+
activated_at: str
|
|
18
|
+
actor: Optional[str] = None
|
|
19
|
+
note: Optional[str] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class DeploymentState:
|
|
24
|
+
env: str
|
|
25
|
+
active: Optional[ModelRef] = None
|
|
26
|
+
history: List[ModelRef] = field(default_factory=list)
|
|
27
|
+
updated_at: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ReleaseManager:
|
|
31
|
+
"""Environment release manager with rollback support."""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
state_dir: str | Path,
|
|
36
|
+
*,
|
|
37
|
+
registry: Optional[ModelRegistry] = None,
|
|
38
|
+
audit_logger: Optional[AuditLogger] = None,
|
|
39
|
+
):
|
|
40
|
+
self.state_dir = Path(state_dir)
|
|
41
|
+
self.state_dir.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
self.registry = registry
|
|
43
|
+
self.audit_logger = audit_logger
|
|
44
|
+
|
|
45
|
+
def _state_path(self, env: str) -> Path:
|
|
46
|
+
return self.state_dir / f"{env}.json"
|
|
47
|
+
|
|
48
|
+
def _load(self, env: str) -> DeploymentState:
|
|
49
|
+
path = self._state_path(env)
|
|
50
|
+
if not path.exists():
|
|
51
|
+
return DeploymentState(env=env)
|
|
52
|
+
with path.open("r", encoding="utf-8") as fh:
|
|
53
|
+
payload = json.load(fh)
|
|
54
|
+
active = payload.get("active")
|
|
55
|
+
history = payload.get("history", [])
|
|
56
|
+
return DeploymentState(
|
|
57
|
+
env=payload.get("env", env),
|
|
58
|
+
active=ModelRef(**active) if active else None,
|
|
59
|
+
history=[ModelRef(**item) for item in history],
|
|
60
|
+
updated_at=payload.get("updated_at"),
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def _save(self, state: DeploymentState) -> None:
|
|
64
|
+
payload = {
|
|
65
|
+
"env": state.env,
|
|
66
|
+
"active": asdict(state.active) if state.active else None,
|
|
67
|
+
"history": [asdict(item) for item in state.history],
|
|
68
|
+
"updated_at": state.updated_at,
|
|
69
|
+
}
|
|
70
|
+
path = self._state_path(state.env)
|
|
71
|
+
with path.open("w", encoding="utf-8") as fh:
|
|
72
|
+
json.dump(payload, fh, indent=2, ensure_ascii=True)
|
|
73
|
+
|
|
74
|
+
def get_active(self, env: str) -> Optional[ModelRef]:
|
|
75
|
+
state = self._load(env)
|
|
76
|
+
return state.active
|
|
77
|
+
|
|
78
|
+
def list_history(self, env: str) -> List[ModelRef]:
|
|
79
|
+
return self._load(env).history
|
|
80
|
+
|
|
81
|
+
def deploy(
|
|
82
|
+
self,
|
|
83
|
+
env: str,
|
|
84
|
+
name: str,
|
|
85
|
+
version: str,
|
|
86
|
+
*,
|
|
87
|
+
actor: Optional[str] = None,
|
|
88
|
+
note: Optional[str] = None,
|
|
89
|
+
update_registry_status: bool = True,
|
|
90
|
+
registry_status: str = "production",
|
|
91
|
+
) -> DeploymentState:
|
|
92
|
+
state = self._load(env)
|
|
93
|
+
if state.active and state.active.name == name and state.active.version == version:
|
|
94
|
+
return state
|
|
95
|
+
|
|
96
|
+
if state.active is not None:
|
|
97
|
+
state.history.append(state.active)
|
|
98
|
+
|
|
99
|
+
now = datetime.utcnow().isoformat()
|
|
100
|
+
state.active = ModelRef(
|
|
101
|
+
name=name,
|
|
102
|
+
version=version,
|
|
103
|
+
activated_at=now,
|
|
104
|
+
actor=actor,
|
|
105
|
+
note=note,
|
|
106
|
+
)
|
|
107
|
+
state.updated_at = now
|
|
108
|
+
self._save(state)
|
|
109
|
+
|
|
110
|
+
if self.registry and update_registry_status:
|
|
111
|
+
self.registry.promote(name, version, new_status=registry_status)
|
|
112
|
+
|
|
113
|
+
if self.audit_logger:
|
|
114
|
+
self.audit_logger.log(
|
|
115
|
+
"deploy",
|
|
116
|
+
actor or "unknown",
|
|
117
|
+
metadata={"env": env, "name": name, "version": version},
|
|
118
|
+
note=note,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return state
|
|
122
|
+
|
|
123
|
+
def rollback(
|
|
124
|
+
self,
|
|
125
|
+
env: str,
|
|
126
|
+
*,
|
|
127
|
+
actor: Optional[str] = None,
|
|
128
|
+
note: Optional[str] = None,
|
|
129
|
+
update_registry_status: bool = False,
|
|
130
|
+
registry_status: str = "production",
|
|
131
|
+
) -> DeploymentState:
|
|
132
|
+
state = self._load(env)
|
|
133
|
+
if not state.history:
|
|
134
|
+
raise ValueError("No history available to rollback.")
|
|
135
|
+
|
|
136
|
+
previous = state.history.pop()
|
|
137
|
+
now = datetime.utcnow().isoformat()
|
|
138
|
+
state.active = ModelRef(
|
|
139
|
+
name=previous.name,
|
|
140
|
+
version=previous.version,
|
|
141
|
+
activated_at=now,
|
|
142
|
+
actor=actor or previous.actor,
|
|
143
|
+
note=note or previous.note,
|
|
144
|
+
)
|
|
145
|
+
state.updated_at = now
|
|
146
|
+
self._save(state)
|
|
147
|
+
|
|
148
|
+
if self.registry and update_registry_status:
|
|
149
|
+
self.registry.promote(previous.name, previous.version, new_status=registry_status)
|
|
150
|
+
|
|
151
|
+
if self.audit_logger:
|
|
152
|
+
self.audit_logger.log(
|
|
153
|
+
"rollback",
|
|
154
|
+
actor or "unknown",
|
|
155
|
+
metadata={"env": env, "name": previous.name, "version": previous.version},
|
|
156
|
+
note=note,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return state
|
ins_pricing/modelling/README.md
CHANGED
|
@@ -4,7 +4,7 @@ This directory contains reusable training tooling and frameworks centered on Bay
|
|
|
4
4
|
|
|
5
5
|
## Key locations
|
|
6
6
|
|
|
7
|
-
- `
|
|
7
|
+
- `bayesopt/` - core training/tuning package
|
|
8
8
|
- `explain/` - explainability helpers
|
|
9
9
|
- `plotting/` - plotting utilities
|
|
10
10
|
- `ins_pricing/cli/` - CLI entry points
|
|
@@ -1,95 +1,150 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from importlib import import_module
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
import sys
|
|
6
|
-
import types
|
|
7
|
-
|
|
8
|
-
# Keep imports lazy to avoid hard dependencies when only using lightweight modules.
|
|
9
|
-
|
|
10
|
-
__all__ =
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from importlib import import_module
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import sys
|
|
6
|
+
import types
|
|
7
|
+
|
|
8
|
+
# Keep imports lazy to avoid hard dependencies when only using lightweight modules.
|
|
9
|
+
|
|
10
|
+
__all__ = sorted(
|
|
11
|
+
{
|
|
12
|
+
"BayesOptConfig",
|
|
13
|
+
"BayesOptModel",
|
|
14
|
+
"bayesopt",
|
|
15
|
+
"plotting",
|
|
16
|
+
"explain",
|
|
17
|
+
"evaluation",
|
|
18
|
+
"cli",
|
|
19
|
+
}
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
_LAZY_ATTRS = {
|
|
23
|
+
"bayesopt": "ins_pricing.modelling.bayesopt",
|
|
24
|
+
"plotting": "ins_pricing.modelling.plotting",
|
|
25
|
+
"explain": "ins_pricing.modelling.explain",
|
|
26
|
+
"BayesOptConfig": "ins_pricing.modelling.bayesopt.core",
|
|
27
|
+
"BayesOptModel": "ins_pricing.modelling.bayesopt.core",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
_BAYESOPT_EXPORTS = {
|
|
31
|
+
"BayesOptConfig",
|
|
32
|
+
"DatasetPreprocessor",
|
|
33
|
+
"OutputManager",
|
|
34
|
+
"VersionManager",
|
|
35
|
+
"BayesOptModel",
|
|
36
|
+
"FeatureTokenizer",
|
|
37
|
+
"FTTransformerCore",
|
|
38
|
+
"FTTransformerSklearn",
|
|
39
|
+
"GraphNeuralNetSklearn",
|
|
40
|
+
"MaskedTabularDataset",
|
|
41
|
+
"ResBlock",
|
|
42
|
+
"ResNetSequential",
|
|
43
|
+
"ResNetSklearn",
|
|
44
|
+
"ScaledTransformerEncoderLayer",
|
|
45
|
+
"SimpleGraphLayer",
|
|
46
|
+
"SimpleGNN",
|
|
47
|
+
"TabularDataset",
|
|
48
|
+
"FTTrainer",
|
|
49
|
+
"GLMTrainer",
|
|
50
|
+
"GNNTrainer",
|
|
51
|
+
"ResNetTrainer",
|
|
52
|
+
"TrainerBase",
|
|
53
|
+
"XGBTrainer",
|
|
54
|
+
"_xgb_cuda_available",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
_LEGACY_EXPORTS = {
|
|
58
|
+
"IOUtils": "ins_pricing.utils",
|
|
59
|
+
"DeviceManager": "ins_pricing.utils",
|
|
60
|
+
"GPUMemoryManager": "ins_pricing.utils",
|
|
61
|
+
"MetricFactory": "ins_pricing.utils",
|
|
62
|
+
"EPS": "ins_pricing.utils",
|
|
63
|
+
"set_global_seed": "ins_pricing.utils",
|
|
64
|
+
"compute_batch_size": "ins_pricing.utils",
|
|
65
|
+
"tweedie_loss": "ins_pricing.utils",
|
|
66
|
+
"infer_factor_and_cate_list": "ins_pricing.utils",
|
|
67
|
+
"DistributedUtils": "ins_pricing.modelling.bayesopt.utils",
|
|
68
|
+
"TrainingUtils": "ins_pricing.modelling.bayesopt.utils",
|
|
69
|
+
"free_cuda": "ins_pricing.modelling.bayesopt.utils",
|
|
70
|
+
"TorchTrainerMixin": "ins_pricing.modelling.bayesopt.utils",
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
__all__ = sorted(set(__all__) | set(_BAYESOPT_EXPORTS) | set(_LEGACY_EXPORTS))
|
|
74
|
+
|
|
32
75
|
_LAZY_SUBMODULES = {
|
|
33
|
-
"bayesopt": "ins_pricing.modelling.
|
|
34
|
-
"
|
|
35
|
-
"evaluation": "ins_pricing.modelling.core.evaluation",
|
|
76
|
+
"bayesopt": "ins_pricing.modelling.bayesopt",
|
|
77
|
+
"evaluation": "ins_pricing.modelling.evaluation",
|
|
36
78
|
"cli": "ins_pricing.cli",
|
|
37
79
|
}
|
|
38
|
-
|
|
39
|
-
_PACKAGE_PATHS = {
|
|
40
|
-
"bayesopt": Path(__file__).resolve().parent / "
|
|
41
|
-
"cli": Path(__file__).resolve().parents[1] / "cli",
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def _lazy_module(name: str, target: str, package_path: Path | None = None) -> types.ModuleType:
|
|
46
|
-
proxy = types.ModuleType(name)
|
|
47
|
-
if package_path is not None:
|
|
48
|
-
proxy.__path__ = [str(package_path)]
|
|
49
|
-
|
|
50
|
-
def _load():
|
|
51
|
-
module = import_module(target)
|
|
52
|
-
sys.modules[name] = module
|
|
53
|
-
return module
|
|
54
|
-
|
|
55
|
-
def __getattr__(attr: str):
|
|
56
|
-
module = _load()
|
|
57
|
-
return getattr(module, attr)
|
|
58
|
-
|
|
59
|
-
def __dir__() -> list[str]:
|
|
60
|
-
module = _load()
|
|
61
|
-
return sorted(set(dir(module)))
|
|
62
|
-
|
|
63
|
-
proxy.__getattr__ = __getattr__ # type: ignore[attr-defined]
|
|
64
|
-
proxy.__dir__ = __dir__ # type: ignore[attr-defined]
|
|
65
|
-
return proxy
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def _install_proxy(alias: str, target: str) -> None:
|
|
69
|
-
module_name = f"{__name__}.{alias}"
|
|
70
|
-
if module_name in sys.modules:
|
|
71
|
-
return
|
|
72
|
-
proxy = _lazy_module(module_name, target, _PACKAGE_PATHS.get(alias))
|
|
73
|
-
sys.modules[module_name] = proxy
|
|
74
|
-
globals()[alias] = proxy
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
for _alias, _target in _LAZY_SUBMODULES.items():
|
|
78
|
-
_install_proxy(_alias, _target)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def __getattr__(name: str):
|
|
82
|
-
target = _LAZY_ATTRS.get(name)
|
|
83
|
-
if
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
80
|
+
|
|
81
|
+
_PACKAGE_PATHS = {
|
|
82
|
+
"bayesopt": Path(__file__).resolve().parent / "bayesopt",
|
|
83
|
+
"cli": Path(__file__).resolve().parents[1] / "cli",
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _lazy_module(name: str, target: str, package_path: Path | None = None) -> types.ModuleType:
|
|
88
|
+
proxy = types.ModuleType(name)
|
|
89
|
+
if package_path is not None:
|
|
90
|
+
proxy.__path__ = [str(package_path)]
|
|
91
|
+
|
|
92
|
+
def _load():
|
|
93
|
+
module = import_module(target)
|
|
94
|
+
sys.modules[name] = module
|
|
95
|
+
return module
|
|
96
|
+
|
|
97
|
+
def __getattr__(attr: str):
|
|
98
|
+
module = _load()
|
|
99
|
+
return getattr(module, attr)
|
|
100
|
+
|
|
101
|
+
def __dir__() -> list[str]:
|
|
102
|
+
module = _load()
|
|
103
|
+
return sorted(set(dir(module)))
|
|
104
|
+
|
|
105
|
+
proxy.__getattr__ = __getattr__ # type: ignore[attr-defined]
|
|
106
|
+
proxy.__dir__ = __dir__ # type: ignore[attr-defined]
|
|
107
|
+
return proxy
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _install_proxy(alias: str, target: str) -> None:
|
|
111
|
+
module_name = f"{__name__}.{alias}"
|
|
112
|
+
if module_name in sys.modules:
|
|
113
|
+
return
|
|
114
|
+
proxy = _lazy_module(module_name, target, _PACKAGE_PATHS.get(alias))
|
|
115
|
+
sys.modules[module_name] = proxy
|
|
116
|
+
globals()[alias] = proxy
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
for _alias, _target in _LAZY_SUBMODULES.items():
|
|
120
|
+
_install_proxy(_alias, _target)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def __getattr__(name: str):
|
|
124
|
+
target = _LAZY_ATTRS.get(name)
|
|
125
|
+
if target:
|
|
126
|
+
module = import_module(target)
|
|
127
|
+
if name in {"bayesopt", "plotting", "explain"}:
|
|
128
|
+
value = module
|
|
129
|
+
else:
|
|
130
|
+
value = getattr(module, name)
|
|
131
|
+
globals()[name] = value
|
|
132
|
+
return value
|
|
133
|
+
|
|
134
|
+
if name in _BAYESOPT_EXPORTS:
|
|
135
|
+
module = import_module("ins_pricing.modelling.bayesopt")
|
|
136
|
+
value = getattr(module, name)
|
|
137
|
+
globals()[name] = value
|
|
138
|
+
return value
|
|
139
|
+
|
|
140
|
+
legacy_module = _LEGACY_EXPORTS.get(name)
|
|
141
|
+
if legacy_module:
|
|
142
|
+
module = import_module(legacy_module)
|
|
143
|
+
value = getattr(module, name)
|
|
144
|
+
globals()[name] = value
|
|
145
|
+
return value
|
|
146
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def __dir__() -> list[str]:
|
|
150
|
+
return sorted(set(__all__) | set(_BAYESOPT_EXPORTS) | set(_LEGACY_EXPORTS) | set(globals().keys()))
|
|
@@ -6,7 +6,7 @@ It supports JSON-driven CLI runs and a Python API for notebooks/scripts.
|
|
|
6
6
|
## Recommended API (config-based)
|
|
7
7
|
|
|
8
8
|
```python
|
|
9
|
-
from ins_pricing.modelling.
|
|
9
|
+
from ins_pricing.modelling.bayesopt import BayesOptConfig
|
|
10
10
|
from ins_pricing.modelling import BayesOptModel
|
|
11
11
|
|
|
12
12
|
config = BayesOptConfig(
|
|
@@ -27,7 +27,7 @@ model.optimize_model("xgb", max_evals=50)
|
|
|
27
27
|
## Load config from file
|
|
28
28
|
|
|
29
29
|
```python
|
|
30
|
-
from ins_pricing.modelling.
|
|
30
|
+
from ins_pricing.modelling.bayesopt import BayesOptConfig
|
|
31
31
|
from ins_pricing.modelling import BayesOptModel
|
|
32
32
|
|
|
33
33
|
config = BayesOptConfig.from_file("config.json")
|
|
@@ -46,14 +46,32 @@ python ins_pricing/cli/BayesOpt_entry.py --config-json config_template.json
|
|
|
46
46
|
- `embedding`: FT trains with labels but exports embeddings (`pred_<prefix>_*`).
|
|
47
47
|
- `unsupervised_embedding`: FT trains without labels and exports embeddings.
|
|
48
48
|
|
|
49
|
-
## Output layout
|
|
50
|
-
|
|
51
|
-
`output_dir/` contains:
|
|
52
|
-
- `plot/` plots and diagnostics
|
|
53
|
-
- `Results/` metrics, params, and snapshots
|
|
54
|
-
- `model/` saved models
|
|
55
|
-
|
|
56
|
-
##
|
|
57
|
-
|
|
58
|
-
-
|
|
59
|
-
-
|
|
49
|
+
## Output layout
|
|
50
|
+
|
|
51
|
+
`output_dir/` contains:
|
|
52
|
+
- `plot/` plots and diagnostics
|
|
53
|
+
- `Results/` metrics, params, and snapshots
|
|
54
|
+
- `model/` saved models
|
|
55
|
+
|
|
56
|
+
## XGBoost GPU tips
|
|
57
|
+
|
|
58
|
+
- Use `xgb_gpu_id` to select a specific GPU on multi-GPU Linux systems.
|
|
59
|
+
- Per-fold GPU cleanup is disabled by default to avoid long idle gaps caused by CUDA sync.
|
|
60
|
+
- If you need to reclaim memory between folds, set `xgb_cleanup_per_fold=true`.
|
|
61
|
+
- If you still need a full device sync, set `xgb_cleanup_synchronize=true` (slower).
|
|
62
|
+
- `xgb_use_dmatrix=true` switches XGBoost to `xgb.train` + DMatrix/QuantileDMatrix for better throughput.
|
|
63
|
+
- External-memory DMatrix (file-backed) is disabled; pass in-memory arrays/dataframes.
|
|
64
|
+
|
|
65
|
+
## Torch model cleanup
|
|
66
|
+
|
|
67
|
+
To reduce CPU↔GPU thrash, fold-level cleanup for FT/ResNet/GNN is off by default.
|
|
68
|
+
Enable if you see memory pressure:
|
|
69
|
+
- `ft_cleanup_per_fold`, `ft_cleanup_synchronize`
|
|
70
|
+
- `resn_cleanup_per_fold`, `resn_cleanup_synchronize`
|
|
71
|
+
- `gnn_cleanup_per_fold`, `gnn_cleanup_synchronize`
|
|
72
|
+
- `optuna_cleanup_synchronize` controls whether trial-level cleanup syncs CUDA (default false)
|
|
73
|
+
|
|
74
|
+
## Notes
|
|
75
|
+
|
|
76
|
+
- Relative paths in config are resolved from the config file directory.
|
|
77
|
+
- For multi-GPU, use `torchrun` and set `runner.nproc_per_node` in config.
|