ins-pricing 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. ins_pricing/README.md +48 -22
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +52 -50
  4. ins_pricing/cli/BayesOpt_incremental.py +39 -105
  5. ins_pricing/cli/Explain_Run.py +31 -23
  6. ins_pricing/cli/Explain_entry.py +532 -579
  7. ins_pricing/cli/Pricing_Run.py +31 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +11 -9
  9. ins_pricing/cli/utils/cli_common.py +256 -256
  10. ins_pricing/cli/utils/cli_config.py +375 -375
  11. ins_pricing/cli/utils/import_resolver.py +382 -365
  12. ins_pricing/cli/utils/notebook_utils.py +340 -340
  13. ins_pricing/cli/watchdog_run.py +209 -201
  14. ins_pricing/frontend/__init__.py +10 -10
  15. ins_pricing/frontend/example_workflows.py +1 -1
  16. ins_pricing/governance/__init__.py +20 -20
  17. ins_pricing/governance/release.py +159 -159
  18. ins_pricing/modelling/__init__.py +147 -92
  19. ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +2 -2
  20. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  21. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -562
  22. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -964
  23. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  24. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
  29. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
  32. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
  36. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  37. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  38. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  39. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  40. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  41. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
  42. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  43. ins_pricing/modelling/explain/__init__.py +55 -55
  44. ins_pricing/modelling/explain/metrics.py +27 -174
  45. ins_pricing/modelling/explain/permutation.py +237 -237
  46. ins_pricing/modelling/plotting/__init__.py +40 -36
  47. ins_pricing/modelling/plotting/compat.py +228 -0
  48. ins_pricing/modelling/plotting/curves.py +572 -572
  49. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  50. ins_pricing/modelling/plotting/geo.py +362 -362
  51. ins_pricing/modelling/plotting/importance.py +121 -121
  52. ins_pricing/pricing/__init__.py +27 -27
  53. ins_pricing/production/__init__.py +35 -25
  54. ins_pricing/production/{predict.py → inference.py} +140 -57
  55. ins_pricing/production/monitoring.py +8 -21
  56. ins_pricing/reporting/__init__.py +11 -11
  57. ins_pricing/setup.py +1 -1
  58. ins_pricing/tests/production/test_inference.py +90 -0
  59. ins_pricing/utils/__init__.py +116 -83
  60. ins_pricing/utils/device.py +255 -255
  61. ins_pricing/utils/features.py +53 -0
  62. ins_pricing/utils/io.py +72 -0
  63. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  64. ins_pricing/utils/metrics.py +158 -24
  65. ins_pricing/utils/numerics.py +76 -0
  66. ins_pricing/utils/paths.py +9 -1
  67. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +182 -182
  68. ins_pricing-0.5.0.dist-info/RECORD +131 -0
  69. ins_pricing/modelling/core/BayesOpt.py +0 -146
  70. ins_pricing/modelling/core/__init__.py +0 -1
  71. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  72. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  73. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  74. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  75. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  76. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  77. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  78. ins_pricing/tests/production/test_predict.py +0 -233
  79. ins_pricing-0.4.5.dist-info/RECORD +0 -130
  80. /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
  81. /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
  82. /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
  83. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
  84. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,340 +1,340 @@
1
- from __future__ import annotations
2
-
3
- import argparse
4
- import json
5
- import subprocess
6
- import sys
7
- from pathlib import Path
8
- from typing import Iterable, List, Optional, Sequence, cast
9
-
10
- try:
11
- from .cli_config import add_config_json_arg, set_env # type: ignore
12
- except Exception: # pragma: no cover
13
- from cli_config import add_config_json_arg, set_env # type: ignore
14
-
15
-
16
- def _find_ins_pricing_dir(cwd: Optional[Path] = None) -> Path:
17
- cwd = (cwd or Path().resolve()).resolve()
18
- pkg_root = Path(__file__).resolve().parents[2]
19
- candidates = [pkg_root, cwd / "ins_pricing", cwd, cwd.parent / "ins_pricing"]
20
- for cand in candidates:
21
- cli_entry = cand / "cli" / "BayesOpt_entry.py"
22
- cli_watchdog = cand / "cli" / "watchdog_run.py"
23
- if cli_entry.exists() and cli_watchdog.exists():
24
- return cand
25
- raise FileNotFoundError(
26
- "Cannot locate ins_pricing directory (expected cli/BayesOpt_entry.py and "
27
- "cli/watchdog_run.py). "
28
- f"cwd={cwd}"
29
- )
30
-
31
-
32
- def _stringify_cmd(cmd: Sequence[object]) -> List[str]:
33
- return [str(x) for x in cmd]
34
-
35
-
36
- def build_bayesopt_entry_cmd(
37
- config_json: str | Path,
38
- model_keys: Sequence[str],
39
- *,
40
- nproc_per_node: int = 1,
41
- standalone: bool = True,
42
- entry_script: str | Path = "cli/BayesOpt_entry.py",
43
- extra_args: Optional[Sequence[str]] = None,
44
- ) -> List[str]:
45
- """Build a command to run cli/BayesOpt_entry.py (optional torchrun/DDP)."""
46
- pkg_dir = _find_ins_pricing_dir()
47
- entry_script_path = Path(entry_script)
48
- if entry_script_path.is_absolute():
49
- entry_path = entry_script_path.resolve()
50
- else:
51
- candidate = pkg_dir / entry_script_path
52
- legacy = pkg_dir / "modelling" / entry_script_path
53
- entry_path = (
54
- candidate.resolve()
55
- if candidate.exists()
56
- else legacy.resolve()
57
- if legacy.exists()
58
- else candidate.resolve()
59
- )
60
- config_path = Path(config_json)
61
- if not config_path.is_absolute():
62
- config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
63
-
64
- cmd: List[object]
65
- if int(nproc_per_node) > 1:
66
- cmd = [
67
- sys.executable,
68
- "-m",
69
- "torch.distributed.run",
70
- *(["--standalone"] if standalone else []),
71
- f"--nproc_per_node={int(nproc_per_node)}",
72
- str(entry_path),
73
- ]
74
- else:
75
- cmd = [sys.executable, str(entry_path)]
76
-
77
- cmd += ["--config-json", str(config_path), "--model-keys", *list(model_keys)]
78
- if extra_args:
79
- cmd += list(extra_args)
80
- return _stringify_cmd(cmd)
81
-
82
-
83
- def build_incremental_cmd(
84
- config_json: str | Path,
85
- *,
86
- entry_script: str | Path = "cli/BayesOpt_incremental.py",
87
- extra_args: Optional[Sequence[str]] = None,
88
- ) -> List[str]:
89
- """Build a command to run cli/BayesOpt_incremental.py."""
90
- pkg_dir = _find_ins_pricing_dir()
91
- entry_script_path = Path(entry_script)
92
- if entry_script_path.is_absolute():
93
- entry_path = entry_script_path.resolve()
94
- else:
95
- candidate = pkg_dir / entry_script_path
96
- legacy = pkg_dir / "modelling" / entry_script_path
97
- entry_path = (
98
- candidate.resolve()
99
- if candidate.exists()
100
- else legacy.resolve()
101
- if legacy.exists()
102
- else candidate.resolve()
103
- )
104
- config_path = Path(config_json)
105
- if not config_path.is_absolute():
106
- config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
107
-
108
- cmd: List[object] = [sys.executable, str(entry_path), "--config-json", str(config_path)]
109
- if extra_args:
110
- cmd += list(extra_args)
111
- return _stringify_cmd(cmd)
112
-
113
-
114
- def build_explain_cmd(
115
- config_json: str | Path,
116
- *,
117
- entry_script: str | Path = "cli/Explain_entry.py",
118
- extra_args: Optional[Sequence[str]] = None,
119
- ) -> List[str]:
120
- """Build a command to run cli/Explain_entry.py."""
121
- pkg_dir = _find_ins_pricing_dir()
122
- entry_script_path = Path(entry_script)
123
- if entry_script_path.is_absolute():
124
- entry_path = entry_script_path.resolve()
125
- else:
126
- candidate = pkg_dir / entry_script_path
127
- legacy = pkg_dir / "modelling" / entry_script_path
128
- entry_path = (
129
- candidate.resolve()
130
- if candidate.exists()
131
- else legacy.resolve()
132
- if legacy.exists()
133
- else candidate.resolve()
134
- )
135
- config_path = Path(config_json)
136
- if not config_path.is_absolute():
137
- config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
138
-
139
- cmd: List[object] = [sys.executable, str(entry_path), "--config-json", str(config_path)]
140
- if extra_args:
141
- cmd += list(extra_args)
142
- return _stringify_cmd(cmd)
143
-
144
-
145
- def wrap_with_watchdog(
146
- cmd: Sequence[str],
147
- *,
148
- idle_seconds: int = 7200,
149
- max_restarts: int = 50,
150
- restart_delay_seconds: int = 10,
151
- stop_on_nonzero_exit: bool = True,
152
- watchdog_script: str | Path = "cli/watchdog_run.py",
153
- ) -> List[str]:
154
- """Wrap a command with watchdog: restart when idle_seconds elapses with no output."""
155
- pkg_dir = _find_ins_pricing_dir()
156
- watchdog_script_path = Path(watchdog_script)
157
- if watchdog_script_path.is_absolute():
158
- watchdog_path = watchdog_script_path.resolve()
159
- else:
160
- candidate = pkg_dir / watchdog_script_path
161
- legacy = pkg_dir / "modelling" / watchdog_script_path
162
- watchdog_path = (
163
- candidate.resolve()
164
- if candidate.exists()
165
- else legacy.resolve()
166
- if legacy.exists()
167
- else candidate.resolve()
168
- )
169
- wd_cmd: List[object] = [
170
- sys.executable,
171
- str(watchdog_path),
172
- "--idle-seconds",
173
- str(int(idle_seconds)),
174
- "--max-restarts",
175
- str(int(max_restarts)),
176
- "--restart-delay-seconds",
177
- str(int(restart_delay_seconds)),
178
- ]
179
- if stop_on_nonzero_exit:
180
- wd_cmd.append("--stop-on-nonzero-exit")
181
- wd_cmd.append("--")
182
- wd_cmd.extend(list(cmd))
183
- return _stringify_cmd(wd_cmd)
184
-
185
-
186
- def run(cmd: Sequence[str], *, check: bool = True) -> subprocess.CompletedProcess:
187
- """Run an external command from a notebook (blocking)."""
188
- return subprocess.run(list(cmd), check=check)
189
-
190
-
191
- def _build_config_parser(description: str) -> argparse.ArgumentParser:
192
- parser = argparse.ArgumentParser(description=description)
193
- add_config_json_arg(
194
- parser,
195
- help_text="Path to config.json (relative paths are resolved from ins_pricing/ when possible).",
196
- )
197
- return parser
198
-
199
-
200
- def run_from_config_cli(
201
- description: str,
202
- argv: Optional[Sequence[str]] = None,
203
- ) -> subprocess.CompletedProcess:
204
- parser = _build_config_parser(description)
205
- args = parser.parse_args(argv)
206
- return run_from_config(args.config_json)
207
-
208
-
209
- def run_bayesopt_entry(
210
- *,
211
- config_json: str | Path,
212
- model_keys: Sequence[str],
213
- max_evals: int = 50,
214
- plot_curves: bool = True,
215
- ft_role: Optional[str] = None,
216
- nproc_per_node: int = 1,
217
- use_watchdog: bool = False,
218
- idle_seconds: int = 7200,
219
- max_restarts: int = 50,
220
- restart_delay_seconds: int = 10,
221
- extra_args: Optional[Sequence[str]] = None,
222
- ) -> subprocess.CompletedProcess:
223
- """Convenience wrapper: build and run BayesOpt_entry (optional torchrun + watchdog)."""
224
- args: List[str] = ["--max-evals", str(int(max_evals))]
225
- if plot_curves:
226
- args.append("--plot-curves")
227
- if ft_role:
228
- args += ["--ft-role", str(ft_role)]
229
- if extra_args:
230
- args += list(extra_args)
231
-
232
- cmd = build_bayesopt_entry_cmd(
233
- config_json=config_json,
234
- model_keys=model_keys,
235
- nproc_per_node=nproc_per_node,
236
- extra_args=args,
237
- )
238
- if use_watchdog:
239
- cmd = wrap_with_watchdog(
240
- cmd,
241
- idle_seconds=idle_seconds,
242
- max_restarts=max_restarts,
243
- restart_delay_seconds=restart_delay_seconds,
244
- )
245
- return run(cmd, check=True)
246
-
247
-
248
- def run_from_config(config_json: str | Path) -> subprocess.CompletedProcess:
249
- """Notebook entry point: switch execution modes by editing config.json.
250
-
251
- Convention: config.json may include a `runner` section for notebook control:
252
- - runner.mode: "entry" (default), "incremental", or "explain"
253
- - runner.nproc_per_node: >1 enables torchrun/DDP (entry only)
254
- - runner.model_keys: list of models to run (entry only)
255
- - runner.max_evals / runner.plot_curves / runner.ft_role (entry only; override config fields)
256
- - runner.use_watchdog / runner.idle_seconds / runner.max_restarts / runner.restart_delay_seconds
257
- - runner.incremental_args: List[str] (incremental only; extra args for cli/BayesOpt_incremental.py)
258
- """
259
- pkg_dir = _find_ins_pricing_dir()
260
- config_path = Path(config_json)
261
- if not config_path.is_absolute():
262
- config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
263
- raw = json.loads(config_path.read_text(encoding="utf-8", errors="replace"))
264
- set_env(raw.get("env", {}))
265
- runner = cast(dict, raw.get("runner") or {})
266
-
267
- mode = str(runner.get("mode") or "entry").strip().lower()
268
- use_watchdog = bool(runner.get("use_watchdog", False))
269
- idle_seconds = int(runner.get("idle_seconds", 7200))
270
- max_restarts = int(runner.get("max_restarts", 50))
271
- restart_delay_seconds = int(runner.get("restart_delay_seconds", 10))
272
-
273
- if mode == "incremental":
274
- inc_args = runner.get("incremental_args") or []
275
- if not isinstance(inc_args, list):
276
- raise ValueError("config.runner.incremental_args must be a list of strings.")
277
- cmd = build_incremental_cmd(config_path, extra_args=[str(x) for x in inc_args])
278
- if use_watchdog:
279
- cmd = wrap_with_watchdog(
280
- cmd,
281
- idle_seconds=idle_seconds,
282
- max_restarts=max_restarts,
283
- restart_delay_seconds=restart_delay_seconds,
284
- )
285
- return run(cmd, check=True)
286
-
287
- if mode == "explain":
288
- exp_args = runner.get("explain_args") or []
289
- if not isinstance(exp_args, list):
290
- raise ValueError("config.runner.explain_args must be a list of strings.")
291
- cmd = build_explain_cmd(config_path, extra_args=[str(x) for x in exp_args])
292
- if use_watchdog:
293
- cmd = wrap_with_watchdog(
294
- cmd,
295
- idle_seconds=idle_seconds,
296
- max_restarts=max_restarts,
297
- restart_delay_seconds=restart_delay_seconds,
298
- )
299
- return run(cmd, check=True)
300
-
301
- if mode != "entry":
302
- raise ValueError(
303
- f"Unsupported runner.mode={mode!r}, expected 'entry', 'incremental', or 'explain'."
304
- )
305
-
306
- model_keys = runner.get("model_keys")
307
- if not model_keys:
308
- model_keys = raw.get("model_keys")
309
- if not model_keys:
310
- model_keys = ["ft"]
311
- if not isinstance(model_keys, list):
312
- raise ValueError("runner.model_keys must be a list of strings.")
313
-
314
- nproc_per_node = int(runner.get("nproc_per_node", 1))
315
- max_evals = int(runner.get("max_evals", raw.get("max_evals", 50)))
316
- plot_curves = bool(runner.get("plot_curves", raw.get("plot_curves", True)))
317
- ft_role = runner.get("ft_role", None)
318
- if ft_role is None:
319
- ft_role = raw.get("ft_role")
320
-
321
- cmd = build_bayesopt_entry_cmd(
322
- config_path,
323
- model_keys=[str(x) for x in model_keys],
324
- nproc_per_node=nproc_per_node,
325
- extra_args=[
326
- "--max-evals",
327
- str(max_evals),
328
- *(["--plot-curves"] if plot_curves else []),
329
- *(["--ft-role", str(ft_role)] if ft_role else []),
330
- ],
331
- )
332
-
333
- if use_watchdog:
334
- cmd = wrap_with_watchdog(
335
- cmd,
336
- idle_seconds=idle_seconds,
337
- max_restarts=max_restarts,
338
- restart_delay_seconds=restart_delay_seconds,
339
- )
340
- return run(cmd, check=True)
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import subprocess
6
+ import sys
7
+ from pathlib import Path
8
+ from typing import Iterable, List, Optional, Sequence, cast
9
+
10
+ try:
11
+ from ins_pricing.cli.utils.cli_config import add_config_json_arg, set_env # type: ignore
12
+ except Exception: # pragma: no cover
13
+ from cli_config import add_config_json_arg, set_env # type: ignore
14
+
15
+
16
+ def _find_ins_pricing_dir(cwd: Optional[Path] = None) -> Path:
17
+ cwd = (cwd or Path().resolve()).resolve()
18
+ pkg_root = Path(__file__).resolve().parents[2]
19
+ candidates = [pkg_root, cwd / "ins_pricing", cwd, cwd.parent / "ins_pricing"]
20
+ for cand in candidates:
21
+ cli_entry = cand / "cli" / "BayesOpt_entry.py"
22
+ cli_watchdog = cand / "cli" / "watchdog_run.py"
23
+ if cli_entry.exists() and cli_watchdog.exists():
24
+ return cand
25
+ raise FileNotFoundError(
26
+ "Cannot locate ins_pricing directory (expected cli/BayesOpt_entry.py and "
27
+ "cli/watchdog_run.py). "
28
+ f"cwd={cwd}"
29
+ )
30
+
31
+
32
+ def _stringify_cmd(cmd: Sequence[object]) -> List[str]:
33
+ return [str(x) for x in cmd]
34
+
35
+
36
+ def build_bayesopt_entry_cmd(
37
+ config_json: str | Path,
38
+ model_keys: Sequence[str],
39
+ *,
40
+ nproc_per_node: int = 1,
41
+ standalone: bool = True,
42
+ entry_script: str | Path = "cli/BayesOpt_entry.py",
43
+ extra_args: Optional[Sequence[str]] = None,
44
+ ) -> List[str]:
45
+ """Build a command to run cli/BayesOpt_entry.py (optional torchrun/DDP)."""
46
+ pkg_dir = _find_ins_pricing_dir()
47
+ entry_script_path = Path(entry_script)
48
+ if entry_script_path.is_absolute():
49
+ entry_path = entry_script_path.resolve()
50
+ else:
51
+ candidate = pkg_dir / entry_script_path
52
+ legacy = pkg_dir / "modelling" / entry_script_path
53
+ entry_path = (
54
+ candidate.resolve()
55
+ if candidate.exists()
56
+ else legacy.resolve()
57
+ if legacy.exists()
58
+ else candidate.resolve()
59
+ )
60
+ config_path = Path(config_json)
61
+ if not config_path.is_absolute():
62
+ config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
63
+
64
+ cmd: List[object]
65
+ if int(nproc_per_node) > 1:
66
+ cmd = [
67
+ sys.executable,
68
+ "-m",
69
+ "torch.distributed.run",
70
+ *(["--standalone"] if standalone else []),
71
+ f"--nproc_per_node={int(nproc_per_node)}",
72
+ str(entry_path),
73
+ ]
74
+ else:
75
+ cmd = [sys.executable, str(entry_path)]
76
+
77
+ cmd += ["--config-json", str(config_path), "--model-keys", *list(model_keys)]
78
+ if extra_args:
79
+ cmd += list(extra_args)
80
+ return _stringify_cmd(cmd)
81
+
82
+
83
+ def build_incremental_cmd(
84
+ config_json: str | Path,
85
+ *,
86
+ entry_script: str | Path = "cli/BayesOpt_incremental.py",
87
+ extra_args: Optional[Sequence[str]] = None,
88
+ ) -> List[str]:
89
+ """Build a command to run cli/BayesOpt_incremental.py."""
90
+ pkg_dir = _find_ins_pricing_dir()
91
+ entry_script_path = Path(entry_script)
92
+ if entry_script_path.is_absolute():
93
+ entry_path = entry_script_path.resolve()
94
+ else:
95
+ candidate = pkg_dir / entry_script_path
96
+ legacy = pkg_dir / "modelling" / entry_script_path
97
+ entry_path = (
98
+ candidate.resolve()
99
+ if candidate.exists()
100
+ else legacy.resolve()
101
+ if legacy.exists()
102
+ else candidate.resolve()
103
+ )
104
+ config_path = Path(config_json)
105
+ if not config_path.is_absolute():
106
+ config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
107
+
108
+ cmd: List[object] = [sys.executable, str(entry_path), "--config-json", str(config_path)]
109
+ if extra_args:
110
+ cmd += list(extra_args)
111
+ return _stringify_cmd(cmd)
112
+
113
+
114
+ def build_explain_cmd(
115
+ config_json: str | Path,
116
+ *,
117
+ entry_script: str | Path = "cli/Explain_entry.py",
118
+ extra_args: Optional[Sequence[str]] = None,
119
+ ) -> List[str]:
120
+ """Build a command to run cli/Explain_entry.py."""
121
+ pkg_dir = _find_ins_pricing_dir()
122
+ entry_script_path = Path(entry_script)
123
+ if entry_script_path.is_absolute():
124
+ entry_path = entry_script_path.resolve()
125
+ else:
126
+ candidate = pkg_dir / entry_script_path
127
+ legacy = pkg_dir / "modelling" / entry_script_path
128
+ entry_path = (
129
+ candidate.resolve()
130
+ if candidate.exists()
131
+ else legacy.resolve()
132
+ if legacy.exists()
133
+ else candidate.resolve()
134
+ )
135
+ config_path = Path(config_json)
136
+ if not config_path.is_absolute():
137
+ config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
138
+
139
+ cmd: List[object] = [sys.executable, str(entry_path), "--config-json", str(config_path)]
140
+ if extra_args:
141
+ cmd += list(extra_args)
142
+ return _stringify_cmd(cmd)
143
+
144
+
145
+ def wrap_with_watchdog(
146
+ cmd: Sequence[str],
147
+ *,
148
+ idle_seconds: int = 7200,
149
+ max_restarts: int = 50,
150
+ restart_delay_seconds: int = 10,
151
+ stop_on_nonzero_exit: bool = True,
152
+ watchdog_script: str | Path = "cli/watchdog_run.py",
153
+ ) -> List[str]:
154
+ """Wrap a command with watchdog: restart when idle_seconds elapses with no output."""
155
+ pkg_dir = _find_ins_pricing_dir()
156
+ watchdog_script_path = Path(watchdog_script)
157
+ if watchdog_script_path.is_absolute():
158
+ watchdog_path = watchdog_script_path.resolve()
159
+ else:
160
+ candidate = pkg_dir / watchdog_script_path
161
+ legacy = pkg_dir / "modelling" / watchdog_script_path
162
+ watchdog_path = (
163
+ candidate.resolve()
164
+ if candidate.exists()
165
+ else legacy.resolve()
166
+ if legacy.exists()
167
+ else candidate.resolve()
168
+ )
169
+ wd_cmd: List[object] = [
170
+ sys.executable,
171
+ str(watchdog_path),
172
+ "--idle-seconds",
173
+ str(int(idle_seconds)),
174
+ "--max-restarts",
175
+ str(int(max_restarts)),
176
+ "--restart-delay-seconds",
177
+ str(int(restart_delay_seconds)),
178
+ ]
179
+ if stop_on_nonzero_exit:
180
+ wd_cmd.append("--stop-on-nonzero-exit")
181
+ wd_cmd.append("--")
182
+ wd_cmd.extend(list(cmd))
183
+ return _stringify_cmd(wd_cmd)
184
+
185
+
186
+ def run(cmd: Sequence[str], *, check: bool = True) -> subprocess.CompletedProcess:
187
+ """Run an external command from a notebook (blocking)."""
188
+ return subprocess.run(list(cmd), check=check)
189
+
190
+
191
+ def _build_config_parser(description: str) -> argparse.ArgumentParser:
192
+ parser = argparse.ArgumentParser(description=description)
193
+ add_config_json_arg(
194
+ parser,
195
+ help_text="Path to config.json (relative paths are resolved from ins_pricing/ when possible).",
196
+ )
197
+ return parser
198
+
199
+
200
+ def run_from_config_cli(
201
+ description: str,
202
+ argv: Optional[Sequence[str]] = None,
203
+ ) -> subprocess.CompletedProcess:
204
+ parser = _build_config_parser(description)
205
+ args = parser.parse_args(argv)
206
+ return run_from_config(args.config_json)
207
+
208
+
209
+ def run_bayesopt_entry(
210
+ *,
211
+ config_json: str | Path,
212
+ model_keys: Sequence[str],
213
+ max_evals: int = 50,
214
+ plot_curves: bool = True,
215
+ ft_role: Optional[str] = None,
216
+ nproc_per_node: int = 1,
217
+ use_watchdog: bool = False,
218
+ idle_seconds: int = 7200,
219
+ max_restarts: int = 50,
220
+ restart_delay_seconds: int = 10,
221
+ extra_args: Optional[Sequence[str]] = None,
222
+ ) -> subprocess.CompletedProcess:
223
+ """Convenience wrapper: build and run BayesOpt_entry (optional torchrun + watchdog)."""
224
+ args: List[str] = ["--max-evals", str(int(max_evals))]
225
+ if plot_curves:
226
+ args.append("--plot-curves")
227
+ if ft_role:
228
+ args += ["--ft-role", str(ft_role)]
229
+ if extra_args:
230
+ args += list(extra_args)
231
+
232
+ cmd = build_bayesopt_entry_cmd(
233
+ config_json=config_json,
234
+ model_keys=model_keys,
235
+ nproc_per_node=nproc_per_node,
236
+ extra_args=args,
237
+ )
238
+ if use_watchdog:
239
+ cmd = wrap_with_watchdog(
240
+ cmd,
241
+ idle_seconds=idle_seconds,
242
+ max_restarts=max_restarts,
243
+ restart_delay_seconds=restart_delay_seconds,
244
+ )
245
+ return run(cmd, check=True)
246
+
247
+
248
+ def run_from_config(config_json: str | Path) -> subprocess.CompletedProcess:
249
+ """Notebook entry point: switch execution modes by editing config.json.
250
+
251
+ Convention: config.json may include a `runner` section for notebook control:
252
+ - runner.mode: "entry" (default), "incremental", or "explain"
253
+ - runner.nproc_per_node: >1 enables torchrun/DDP (entry only)
254
+ - runner.model_keys: list of models to run (entry only)
255
+ - runner.max_evals / runner.plot_curves / runner.ft_role (entry only; override config fields)
256
+ - runner.use_watchdog / runner.idle_seconds / runner.max_restarts / runner.restart_delay_seconds
257
+ - runner.incremental_args: List[str] (incremental only; extra args for cli/BayesOpt_incremental.py)
258
+ """
259
+ pkg_dir = _find_ins_pricing_dir()
260
+ config_path = Path(config_json)
261
+ if not config_path.is_absolute():
262
+ config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
263
+ raw = json.loads(config_path.read_text(encoding="utf-8", errors="replace"))
264
+ set_env(raw.get("env", {}))
265
+ runner = cast(dict, raw.get("runner") or {})
266
+
267
+ mode = str(runner.get("mode") or "entry").strip().lower()
268
+ use_watchdog = bool(runner.get("use_watchdog", False))
269
+ idle_seconds = int(runner.get("idle_seconds", 7200))
270
+ max_restarts = int(runner.get("max_restarts", 50))
271
+ restart_delay_seconds = int(runner.get("restart_delay_seconds", 10))
272
+
273
+ if mode == "incremental":
274
+ inc_args = runner.get("incremental_args") or []
275
+ if not isinstance(inc_args, list):
276
+ raise ValueError("config.runner.incremental_args must be a list of strings.")
277
+ cmd = build_incremental_cmd(config_path, extra_args=[str(x) for x in inc_args])
278
+ if use_watchdog:
279
+ cmd = wrap_with_watchdog(
280
+ cmd,
281
+ idle_seconds=idle_seconds,
282
+ max_restarts=max_restarts,
283
+ restart_delay_seconds=restart_delay_seconds,
284
+ )
285
+ return run(cmd, check=True)
286
+
287
+ if mode == "explain":
288
+ exp_args = runner.get("explain_args") or []
289
+ if not isinstance(exp_args, list):
290
+ raise ValueError("config.runner.explain_args must be a list of strings.")
291
+ cmd = build_explain_cmd(config_path, extra_args=[str(x) for x in exp_args])
292
+ if use_watchdog:
293
+ cmd = wrap_with_watchdog(
294
+ cmd,
295
+ idle_seconds=idle_seconds,
296
+ max_restarts=max_restarts,
297
+ restart_delay_seconds=restart_delay_seconds,
298
+ )
299
+ return run(cmd, check=True)
300
+
301
+ if mode != "entry":
302
+ raise ValueError(
303
+ f"Unsupported runner.mode={mode!r}, expected 'entry', 'incremental', or 'explain'."
304
+ )
305
+
306
+ model_keys = runner.get("model_keys")
307
+ if not model_keys:
308
+ model_keys = raw.get("model_keys")
309
+ if not model_keys:
310
+ model_keys = ["ft"]
311
+ if not isinstance(model_keys, list):
312
+ raise ValueError("runner.model_keys must be a list of strings.")
313
+
314
+ nproc_per_node = int(runner.get("nproc_per_node", 1))
315
+ max_evals = int(runner.get("max_evals", raw.get("max_evals", 50)))
316
+ plot_curves = bool(runner.get("plot_curves", raw.get("plot_curves", True)))
317
+ ft_role = runner.get("ft_role", None)
318
+ if ft_role is None:
319
+ ft_role = raw.get("ft_role")
320
+
321
+ cmd = build_bayesopt_entry_cmd(
322
+ config_path,
323
+ model_keys=[str(x) for x in model_keys],
324
+ nproc_per_node=nproc_per_node,
325
+ extra_args=[
326
+ "--max-evals",
327
+ str(max_evals),
328
+ *(["--plot-curves"] if plot_curves else []),
329
+ *(["--ft-role", str(ft_role)] if ft_role else []),
330
+ ],
331
+ )
332
+
333
+ if use_watchdog:
334
+ cmd = wrap_with_watchdog(
335
+ cmd,
336
+ idle_seconds=idle_seconds,
337
+ max_restarts=max_restarts,
338
+ restart_delay_seconds=restart_delay_seconds,
339
+ )
340
+ return run(cmd, check=True)