ins-pricing 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. ins_pricing/README.md +60 -0
  2. ins_pricing/__init__.py +102 -0
  3. ins_pricing/governance/README.md +18 -0
  4. ins_pricing/governance/__init__.py +20 -0
  5. ins_pricing/governance/approval.py +93 -0
  6. ins_pricing/governance/audit.py +37 -0
  7. ins_pricing/governance/registry.py +99 -0
  8. ins_pricing/governance/release.py +159 -0
  9. ins_pricing/modelling/BayesOpt.py +146 -0
  10. ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
  11. ins_pricing/modelling/BayesOpt_entry.py +575 -0
  12. ins_pricing/modelling/BayesOpt_incremental.py +731 -0
  13. ins_pricing/modelling/Explain_Run.py +36 -0
  14. ins_pricing/modelling/Explain_entry.py +539 -0
  15. ins_pricing/modelling/Pricing_Run.py +36 -0
  16. ins_pricing/modelling/README.md +33 -0
  17. ins_pricing/modelling/__init__.py +44 -0
  18. ins_pricing/modelling/bayesopt/__init__.py +98 -0
  19. ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
  20. ins_pricing/modelling/bayesopt/core.py +1476 -0
  21. ins_pricing/modelling/bayesopt/models.py +2196 -0
  22. ins_pricing/modelling/bayesopt/trainers.py +2446 -0
  23. ins_pricing/modelling/bayesopt/utils.py +1021 -0
  24. ins_pricing/modelling/cli_common.py +136 -0
  25. ins_pricing/modelling/explain/__init__.py +55 -0
  26. ins_pricing/modelling/explain/gradients.py +334 -0
  27. ins_pricing/modelling/explain/metrics.py +176 -0
  28. ins_pricing/modelling/explain/permutation.py +155 -0
  29. ins_pricing/modelling/explain/shap_utils.py +146 -0
  30. ins_pricing/modelling/notebook_utils.py +284 -0
  31. ins_pricing/modelling/plotting/__init__.py +45 -0
  32. ins_pricing/modelling/plotting/common.py +63 -0
  33. ins_pricing/modelling/plotting/curves.py +572 -0
  34. ins_pricing/modelling/plotting/diagnostics.py +139 -0
  35. ins_pricing/modelling/plotting/geo.py +362 -0
  36. ins_pricing/modelling/plotting/importance.py +121 -0
  37. ins_pricing/modelling/run_logging.py +133 -0
  38. ins_pricing/modelling/tests/conftest.py +8 -0
  39. ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
  40. ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
  41. ins_pricing/modelling/tests/test_explain.py +56 -0
  42. ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
  43. ins_pricing/modelling/tests/test_graph_cache.py +33 -0
  44. ins_pricing/modelling/tests/test_plotting.py +63 -0
  45. ins_pricing/modelling/tests/test_plotting_library.py +150 -0
  46. ins_pricing/modelling/tests/test_preprocessor.py +48 -0
  47. ins_pricing/modelling/watchdog_run.py +211 -0
  48. ins_pricing/pricing/README.md +44 -0
  49. ins_pricing/pricing/__init__.py +27 -0
  50. ins_pricing/pricing/calibration.py +39 -0
  51. ins_pricing/pricing/data_quality.py +117 -0
  52. ins_pricing/pricing/exposure.py +85 -0
  53. ins_pricing/pricing/factors.py +91 -0
  54. ins_pricing/pricing/monitoring.py +99 -0
  55. ins_pricing/pricing/rate_table.py +78 -0
  56. ins_pricing/production/__init__.py +21 -0
  57. ins_pricing/production/drift.py +30 -0
  58. ins_pricing/production/monitoring.py +143 -0
  59. ins_pricing/production/scoring.py +40 -0
  60. ins_pricing/reporting/README.md +20 -0
  61. ins_pricing/reporting/__init__.py +11 -0
  62. ins_pricing/reporting/report_builder.py +72 -0
  63. ins_pricing/reporting/scheduler.py +45 -0
  64. ins_pricing/setup.py +41 -0
  65. ins_pricing v2/__init__.py +23 -0
  66. ins_pricing v2/governance/__init__.py +20 -0
  67. ins_pricing v2/governance/approval.py +93 -0
  68. ins_pricing v2/governance/audit.py +37 -0
  69. ins_pricing v2/governance/registry.py +99 -0
  70. ins_pricing v2/governance/release.py +159 -0
  71. ins_pricing v2/modelling/Explain_Run.py +36 -0
  72. ins_pricing v2/modelling/Pricing_Run.py +36 -0
  73. ins_pricing v2/modelling/__init__.py +151 -0
  74. ins_pricing v2/modelling/cli_common.py +141 -0
  75. ins_pricing v2/modelling/config.py +249 -0
  76. ins_pricing v2/modelling/config_preprocess.py +254 -0
  77. ins_pricing v2/modelling/core.py +741 -0
  78. ins_pricing v2/modelling/data_container.py +42 -0
  79. ins_pricing v2/modelling/explain/__init__.py +55 -0
  80. ins_pricing v2/modelling/explain/gradients.py +334 -0
  81. ins_pricing v2/modelling/explain/metrics.py +176 -0
  82. ins_pricing v2/modelling/explain/permutation.py +155 -0
  83. ins_pricing v2/modelling/explain/shap_utils.py +146 -0
  84. ins_pricing v2/modelling/features.py +215 -0
  85. ins_pricing v2/modelling/model_manager.py +148 -0
  86. ins_pricing v2/modelling/model_plotting.py +463 -0
  87. ins_pricing v2/modelling/models.py +2203 -0
  88. ins_pricing v2/modelling/notebook_utils.py +294 -0
  89. ins_pricing v2/modelling/plotting/__init__.py +45 -0
  90. ins_pricing v2/modelling/plotting/common.py +63 -0
  91. ins_pricing v2/modelling/plotting/curves.py +572 -0
  92. ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
  93. ins_pricing v2/modelling/plotting/geo.py +362 -0
  94. ins_pricing v2/modelling/plotting/importance.py +121 -0
  95. ins_pricing v2/modelling/run_logging.py +133 -0
  96. ins_pricing v2/modelling/tests/conftest.py +8 -0
  97. ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
  98. ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
  99. ins_pricing v2/modelling/tests/test_explain.py +56 -0
  100. ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
  101. ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
  102. ins_pricing v2/modelling/tests/test_plotting.py +63 -0
  103. ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
  104. ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
  105. ins_pricing v2/modelling/trainers.py +2447 -0
  106. ins_pricing v2/modelling/utils.py +1020 -0
  107. ins_pricing v2/modelling/watchdog_run.py +211 -0
  108. ins_pricing v2/pricing/__init__.py +27 -0
  109. ins_pricing v2/pricing/calibration.py +39 -0
  110. ins_pricing v2/pricing/data_quality.py +117 -0
  111. ins_pricing v2/pricing/exposure.py +85 -0
  112. ins_pricing v2/pricing/factors.py +91 -0
  113. ins_pricing v2/pricing/monitoring.py +99 -0
  114. ins_pricing v2/pricing/rate_table.py +78 -0
  115. ins_pricing v2/production/__init__.py +21 -0
  116. ins_pricing v2/production/drift.py +30 -0
  117. ins_pricing v2/production/monitoring.py +143 -0
  118. ins_pricing v2/production/scoring.py +40 -0
  119. ins_pricing v2/reporting/__init__.py +11 -0
  120. ins_pricing v2/reporting/report_builder.py +72 -0
  121. ins_pricing v2/reporting/scheduler.py +45 -0
  122. ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
  123. ins_pricing v2/scripts/Explain_entry.py +545 -0
  124. ins_pricing v2/scripts/__init__.py +1 -0
  125. ins_pricing v2/scripts/train.py +568 -0
  126. ins_pricing v2/setup.py +55 -0
  127. ins_pricing v2/smoke_test.py +28 -0
  128. ins_pricing-0.1.6.dist-info/METADATA +78 -0
  129. ins_pricing-0.1.6.dist-info/RECORD +169 -0
  130. ins_pricing-0.1.6.dist-info/WHEEL +5 -0
  131. ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
  132. user_packages/__init__.py +105 -0
  133. user_packages legacy/BayesOpt.py +5659 -0
  134. user_packages legacy/BayesOpt_entry.py +513 -0
  135. user_packages legacy/BayesOpt_incremental.py +685 -0
  136. user_packages legacy/Pricing_Run.py +36 -0
  137. user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
  138. user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
  139. user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
  140. user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
  141. user_packages legacy/Try/BayesOpt legacy.py +3280 -0
  142. user_packages legacy/Try/BayesOpt.py +838 -0
  143. user_packages legacy/Try/BayesOptAll.py +1569 -0
  144. user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
  145. user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
  146. user_packages legacy/Try/BayesOptSearch.py +830 -0
  147. user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
  148. user_packages legacy/Try/BayesOptV1.py +1911 -0
  149. user_packages legacy/Try/BayesOptV10.py +2973 -0
  150. user_packages legacy/Try/BayesOptV11.py +3001 -0
  151. user_packages legacy/Try/BayesOptV12.py +3001 -0
  152. user_packages legacy/Try/BayesOptV2.py +2065 -0
  153. user_packages legacy/Try/BayesOptV3.py +2209 -0
  154. user_packages legacy/Try/BayesOptV4.py +2342 -0
  155. user_packages legacy/Try/BayesOptV5.py +2372 -0
  156. user_packages legacy/Try/BayesOptV6.py +2759 -0
  157. user_packages legacy/Try/BayesOptV7.py +2832 -0
  158. user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
  159. user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
  160. user_packages legacy/Try/BayesOptV9.py +2927 -0
  161. user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
  162. user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
  163. user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
  164. user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
  165. user_packages legacy/Try/xgbbayesopt.py +523 -0
  166. user_packages legacy/__init__.py +19 -0
  167. user_packages legacy/cli_common.py +124 -0
  168. user_packages legacy/notebook_utils.py +228 -0
  169. user_packages legacy/watchdog_run.py +202 -0
@@ -0,0 +1,159 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import asdict, dataclass, field
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import List, Optional
8
+
9
+ from .audit import AuditLogger
10
+ from .registry import ModelRegistry
11
+
12
+
13
+ @dataclass
14
+ class ModelRef:
15
+ name: str
16
+ version: str
17
+ activated_at: str
18
+ actor: Optional[str] = None
19
+ note: Optional[str] = None
20
+
21
+
22
+ @dataclass
23
+ class DeploymentState:
24
+ env: str
25
+ active: Optional[ModelRef] = None
26
+ history: List[ModelRef] = field(default_factory=list)
27
+ updated_at: Optional[str] = None
28
+
29
+
30
+ class ReleaseManager:
31
+ """Environment release manager with rollback support."""
32
+
33
+ def __init__(
34
+ self,
35
+ state_dir: str | Path,
36
+ *,
37
+ registry: Optional[ModelRegistry] = None,
38
+ audit_logger: Optional[AuditLogger] = None,
39
+ ):
40
+ self.state_dir = Path(state_dir)
41
+ self.state_dir.mkdir(parents=True, exist_ok=True)
42
+ self.registry = registry
43
+ self.audit_logger = audit_logger
44
+
45
+ def _state_path(self, env: str) -> Path:
46
+ return self.state_dir / f"{env}.json"
47
+
48
+ def _load(self, env: str) -> DeploymentState:
49
+ path = self._state_path(env)
50
+ if not path.exists():
51
+ return DeploymentState(env=env)
52
+ with path.open("r", encoding="utf-8") as fh:
53
+ payload = json.load(fh)
54
+ active = payload.get("active")
55
+ history = payload.get("history", [])
56
+ return DeploymentState(
57
+ env=payload.get("env", env),
58
+ active=ModelRef(**active) if active else None,
59
+ history=[ModelRef(**item) for item in history],
60
+ updated_at=payload.get("updated_at"),
61
+ )
62
+
63
+ def _save(self, state: DeploymentState) -> None:
64
+ payload = {
65
+ "env": state.env,
66
+ "active": asdict(state.active) if state.active else None,
67
+ "history": [asdict(item) for item in state.history],
68
+ "updated_at": state.updated_at,
69
+ }
70
+ path = self._state_path(state.env)
71
+ with path.open("w", encoding="utf-8") as fh:
72
+ json.dump(payload, fh, indent=2, ensure_ascii=True)
73
+
74
+ def get_active(self, env: str) -> Optional[ModelRef]:
75
+ state = self._load(env)
76
+ return state.active
77
+
78
+ def list_history(self, env: str) -> List[ModelRef]:
79
+ return self._load(env).history
80
+
81
+ def deploy(
82
+ self,
83
+ env: str,
84
+ name: str,
85
+ version: str,
86
+ *,
87
+ actor: Optional[str] = None,
88
+ note: Optional[str] = None,
89
+ update_registry_status: bool = True,
90
+ registry_status: str = "production",
91
+ ) -> DeploymentState:
92
+ state = self._load(env)
93
+ if state.active and state.active.name == name and state.active.version == version:
94
+ return state
95
+
96
+ if state.active is not None:
97
+ state.history.append(state.active)
98
+
99
+ now = datetime.utcnow().isoformat()
100
+ state.active = ModelRef(
101
+ name=name,
102
+ version=version,
103
+ activated_at=now,
104
+ actor=actor,
105
+ note=note,
106
+ )
107
+ state.updated_at = now
108
+ self._save(state)
109
+
110
+ if self.registry and update_registry_status:
111
+ self.registry.promote(name, version, new_status=registry_status)
112
+
113
+ if self.audit_logger:
114
+ self.audit_logger.log(
115
+ "deploy",
116
+ actor or "unknown",
117
+ metadata={"env": env, "name": name, "version": version},
118
+ note=note,
119
+ )
120
+
121
+ return state
122
+
123
+ def rollback(
124
+ self,
125
+ env: str,
126
+ *,
127
+ actor: Optional[str] = None,
128
+ note: Optional[str] = None,
129
+ update_registry_status: bool = False,
130
+ registry_status: str = "production",
131
+ ) -> DeploymentState:
132
+ state = self._load(env)
133
+ if not state.history:
134
+ raise ValueError("No history available to rollback.")
135
+
136
+ previous = state.history.pop()
137
+ now = datetime.utcnow().isoformat()
138
+ state.active = ModelRef(
139
+ name=previous.name,
140
+ version=previous.version,
141
+ activated_at=now,
142
+ actor=actor or previous.actor,
143
+ note=note or previous.note,
144
+ )
145
+ state.updated_at = now
146
+ self._save(state)
147
+
148
+ if self.registry and update_registry_status:
149
+ self.registry.promote(previous.name, previous.version, new_status=registry_status)
150
+
151
+ if self.audit_logger:
152
+ self.audit_logger.log(
153
+ "rollback",
154
+ actor or "unknown",
155
+ metadata={"env": env, "name": previous.name, "version": previous.version},
156
+ note=note,
157
+ )
158
+
159
+ return state
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ try:
8
+ from .notebook_utils import run_from_config # type: ignore
9
+ except Exception: # pragma: no cover
10
+ from notebook_utils import run_from_config # type: ignore
11
+
12
+
13
+ def run(config_json: str | Path) -> None:
14
+ """Run explain by config.json (runner.mode=explain)."""
15
+ run_from_config(config_json)
16
+
17
+
18
+ def _build_parser() -> argparse.ArgumentParser:
19
+ parser = argparse.ArgumentParser(
20
+ description="Explain_Run: run explain by config.json (runner.mode=explain)."
21
+ )
22
+ parser.add_argument(
23
+ "--config-json",
24
+ required=True,
25
+ help="Path to config.json (relative paths are resolved from ins_pricing/modelling/ when possible).",
26
+ )
27
+ return parser
28
+
29
+
30
+ def main(argv: Optional[list[str]] = None) -> None:
31
+ args = _build_parser().parse_args(argv)
32
+ run(args.config_json)
33
+
34
+
35
+ if __name__ == "__main__":
36
+ main()
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ try:
8
+ from .notebook_utils import run_from_config # type: ignore
9
+ except Exception: # pragma: no cover
10
+ from notebook_utils import run_from_config # type: ignore
11
+
12
+
13
+ def run(config_json: str | Path) -> None:
14
+ """Unified entry point: run entry/incremental/watchdog/DDP based on config.json runner."""
15
+ run_from_config(config_json)
16
+
17
+
18
+ def _build_parser() -> argparse.ArgumentParser:
19
+ parser = argparse.ArgumentParser(
20
+ description="Pricing_Run: run BayesOpt by config.json (entry/incremental/watchdog/DDP)."
21
+ )
22
+ parser.add_argument(
23
+ "--config-json",
24
+ required=True,
25
+ help="Path to config.json (relative paths are resolved from ins_pricing/modelling/ when possible).",
26
+ )
27
+ return parser
28
+
29
+
30
+ def main(argv: Optional[list[str]] = None) -> None:
31
+ args = _build_parser().parse_args(argv)
32
+ run(args.config_json)
33
+
34
+
35
+ if __name__ == "__main__":
36
+ main()
@@ -0,0 +1,151 @@
1
+ """Modelling subpackage for ins_pricing."""
2
+
3
+ from __future__ import annotations
4
+ import types
5
+ import sys
6
+ from importlib import import_module
7
+ from pathlib import Path
8
+
9
+ # Exports
10
+ from .config import BayesOptConfig
11
+ from .config_preprocess import (
12
+ DatasetPreprocessor,
13
+ OutputManager,
14
+ VersionManager,
15
+ )
16
+ from .core import BayesOptModel
17
+ from .models import (
18
+ FeatureTokenizer,
19
+ FTTransformerCore,
20
+ FTTransformerSklearn,
21
+ GraphNeuralNetSklearn,
22
+ MaskedTabularDataset,
23
+ ResBlock,
24
+ ResNetSequential,
25
+ ResNetSklearn,
26
+ ScaledTransformerEncoderLayer,
27
+ SimpleGraphLayer,
28
+ SimpleGNN,
29
+ TabularDataset,
30
+ )
31
+ from .trainers import (
32
+ FTTrainer,
33
+ GLMTrainer,
34
+ GNNTrainer,
35
+ ResNetTrainer,
36
+ TrainerBase,
37
+ XGBTrainer,
38
+ _xgb_cuda_available,
39
+ )
40
+ from .utils import (
41
+ EPS,
42
+ DistributedUtils,
43
+ IOUtils,
44
+ PlotUtils,
45
+ TorchTrainerMixin,
46
+ TrainingUtils,
47
+ compute_batch_size,
48
+ csv_to_dict,
49
+ ensure_parent_dir,
50
+ free_cuda,
51
+ infer_factor_and_cate_list,
52
+ plot_dlift_list,
53
+ plot_lift_list,
54
+ set_global_seed,
55
+ split_data,
56
+ tweedie_loss,
57
+ )
58
+ try:
59
+ import torch
60
+ except ImportError:
61
+ torch = None
62
+
63
+ # Lazy submodules
64
+ _LAZY_SUBMODULES = {
65
+ "plotting": "ins_pricing.modelling.plotting",
66
+ "explain": "ins_pricing.modelling.explain",
67
+ "cli_common": "ins_pricing.modelling.cli_common",
68
+ "notebook_utils": "ins_pricing.modelling.notebook_utils",
69
+ }
70
+
71
+ _PACKAGE_PATHS = {
72
+ "plotting": Path(__file__).resolve().parent / "plotting",
73
+ "explain": Path(__file__).resolve().parent / "explain",
74
+ }
75
+
76
+ __all__ = [
77
+ "BayesOptConfig",
78
+ "DatasetPreprocessor",
79
+ "OutputManager",
80
+ "VersionManager",
81
+ "BayesOptModel",
82
+ "FeatureTokenizer",
83
+ "FTTransformerCore",
84
+ "FTTransformerSklearn",
85
+ "GraphNeuralNetSklearn",
86
+ "MaskedTabularDataset",
87
+ "ResBlock",
88
+ "ResNetSequential",
89
+ "ResNetSklearn",
90
+ "ScaledTransformerEncoderLayer",
91
+ "SimpleGraphLayer",
92
+ "SimpleGNN",
93
+ "TabularDataset",
94
+ "FTTrainer",
95
+ "GLMTrainer",
96
+ "GNNTrainer",
97
+ "ResNetTrainer",
98
+ "TrainerBase",
99
+ "XGBTrainer",
100
+ "_xgb_cuda_available",
101
+ "EPS",
102
+ "DistributedUtils",
103
+ "IOUtils",
104
+ "PlotUtils",
105
+ "TorchTrainerMixin",
106
+ "TrainingUtils",
107
+ "compute_batch_size",
108
+ "csv_to_dict",
109
+ "ensure_parent_dir",
110
+ "free_cuda",
111
+ "infer_factor_and_cate_list",
112
+ "plot_dlift_list",
113
+ "plot_lift_list",
114
+ "set_global_seed",
115
+ "split_data",
116
+ "tweedie_loss",
117
+ "torch",
118
+ ] + sorted(list(_LAZY_SUBMODULES.keys()))
119
+
120
+ def _lazy_module(name: str, target: str, package_path: Path | None = None) -> types.ModuleType:
121
+ proxy = types.ModuleType(name)
122
+ if package_path is not None:
123
+ proxy.__path__ = [str(package_path)]
124
+
125
+ def _load():
126
+ module = import_module(target)
127
+ sys.modules[name] = module
128
+ return module
129
+
130
+ def __getattr__(attr: str):
131
+ module = _load()
132
+ return getattr(module, attr)
133
+
134
+ def __dir__() -> list[str]:
135
+ module = _load()
136
+ return sorted(set(dir(module)))
137
+
138
+ proxy.__getattr__ = __getattr__ # type: ignore[attr-defined]
139
+ proxy.__dir__ = __dir__ # type: ignore[attr-defined]
140
+ return proxy
141
+
142
+ def _install_proxy(alias: str, target: str) -> None:
143
+ module_name = f"{__name__}.{alias}"
144
+ if module_name in sys.modules:
145
+ return
146
+ proxy = _lazy_module(module_name, target, _PACKAGE_PATHS.get(alias))
147
+ sys.modules[module_name] = proxy
148
+ globals()[alias] = proxy
149
+
150
+ for _alias, _target in _LAZY_SUBMODULES.items():
151
+ _install_proxy(_alias, _target)
@@ -0,0 +1,141 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
7
+ try:
8
+ from .config import BayesOptConfig
9
+ except ImportError:
10
+ from ins_pricing.modelling.config import BayesOptConfig
11
+
12
+
13
+
14
+ PLOT_MODEL_LABELS: Dict[str, Tuple[str, str]] = {
15
+ "glm": ("GLM", "pred_glm"),
16
+ "xgb": ("Xgboost", "pred_xgb"),
17
+ "resn": ("ResNet", "pred_resn"),
18
+ "ft": ("FTTransformer", "pred_ft"),
19
+ "gnn": ("GNN", "pred_gnn"),
20
+ }
21
+
22
+ PYTORCH_TRAINERS = {"resn", "ft", "gnn"}
23
+
24
+
25
+ def dedupe_preserve_order(items: Iterable[str]) -> List[str]:
26
+ seen = set()
27
+ unique: List[str] = []
28
+ for item in items:
29
+ if item not in seen:
30
+ unique.append(item)
31
+ seen.add(item)
32
+ return unique
33
+
34
+
35
+ def build_model_names(prefixes: Sequence[str], suffixes: Sequence[str]) -> List[str]:
36
+ names: List[str] = []
37
+ for suffix in suffixes:
38
+ names.extend(f"{prefix}_{suffix}" for prefix in prefixes)
39
+ return names
40
+
41
+
42
+ def parse_model_pairs(raw_pairs: List) -> List[Tuple[str, str]]:
43
+ pairs: List[Tuple[str, str]] = []
44
+ for pair in raw_pairs:
45
+ if isinstance(pair, (list, tuple)) and len(pair) == 2:
46
+ pairs.append((str(pair[0]), str(pair[1])))
47
+ elif isinstance(pair, str):
48
+ parts = [p.strip() for p in pair.split(",") if p.strip()]
49
+ if len(parts) == 2:
50
+ pairs.append((parts[0], parts[1]))
51
+ return pairs
52
+
53
+
54
+ def resolve_path(value: Optional[str], base_dir: Path) -> Optional[Path]:
55
+ if value is None:
56
+ return None
57
+ if not isinstance(value, str) or not value.strip():
58
+ return None
59
+ p = Path(value)
60
+ if p.is_absolute():
61
+ return p
62
+ return (base_dir / p).resolve()
63
+
64
+
65
+ def resolve_config_path(raw: str, script_dir: Path) -> Path:
66
+ candidate = Path(raw)
67
+ if candidate.exists():
68
+ return candidate.resolve()
69
+ candidate2 = (script_dir / raw)
70
+ if candidate2.exists():
71
+ return candidate2.resolve()
72
+ raise FileNotFoundError(
73
+ f"Config file not found: {raw}. Tried: {Path(raw).resolve()} and {candidate2.resolve()}"
74
+ )
75
+
76
+
77
+ def load_config_json(path: Path, required_keys: Sequence[str]) -> BayesOptConfig:
78
+ cfg_dict = json.loads(path.read_text(encoding="utf-8"))
79
+ missing = [key for key in required_keys if key not in cfg_dict]
80
+ if missing:
81
+ raise ValueError(f"Missing required keys in {path}: {missing}")
82
+ return BayesOptConfig.from_legacy_dict(cfg_dict)
83
+
84
+
85
+ def set_env(env_overrides: Dict[str, Any]) -> None:
86
+ """Apply environment variables from config.json.
87
+
88
+ Notes (DDP/Optuna hang debugging):
89
+ - You can add these keys into config.json's `env` to debug distributed hangs:
90
+ - `TORCH_DISTRIBUTED_DEBUG=DETAIL`
91
+ - `NCCL_DEBUG=INFO`
92
+ - `BAYESOPT_DDP_BARRIER_DEBUG=1`
93
+ - `BAYESOPT_DDP_BARRIER_TIMEOUT=300`
94
+ - `BAYESOPT_CUDA_SYNC=1` (optional; can slow down)
95
+ - `BAYESOPT_CUDA_IPC_COLLECT=1` (optional; can slow down)
96
+ - This function uses `os.environ.setdefault`, so a value already set in the
97
+ shell will take precedence over config.json.
98
+ """
99
+ for key, value in (env_overrides or {}).items():
100
+ os.environ.setdefault(str(key), str(value))
101
+
102
+
103
+ def _looks_like_url(value: str) -> bool:
104
+ value = str(value)
105
+ return "://" in value
106
+
107
+
108
+ def normalize_config_paths(cfg: Dict[str, Any], config_path: Path) -> Dict[str, Any]:
109
+ """Resolve relative paths against the config.json directory.
110
+
111
+ Fields handled:
112
+ - data_dir / output_dir / optuna_storage / gnn_graph_cache
113
+ - best_params_files (dict: model_key -> path)
114
+ """
115
+ base_dir = config_path.parent
116
+ out = dict(cfg)
117
+
118
+ for key in ("data_dir", "output_dir", "gnn_graph_cache"):
119
+ if key in out and isinstance(out.get(key), str):
120
+ resolved = resolve_path(out.get(key), base_dir)
121
+ if resolved is not None:
122
+ out[key] = str(resolved)
123
+
124
+ storage = out.get("optuna_storage")
125
+ if isinstance(storage, str) and storage.strip():
126
+ if not _looks_like_url(storage):
127
+ resolved = resolve_path(storage, base_dir)
128
+ if resolved is not None:
129
+ out["optuna_storage"] = str(resolved)
130
+
131
+ best_files = out.get("best_params_files")
132
+ if isinstance(best_files, dict):
133
+ resolved_map: Dict[str, str] = {}
134
+ for mk, path_str in best_files.items():
135
+ if not isinstance(path_str, str):
136
+ continue
137
+ resolved = resolve_path(path_str, base_dir)
138
+ resolved_map[str(mk)] = str(resolved) if resolved is not None else str(path_str)
139
+ out["best_params_files"] = resolved_map
140
+
141
+ return out