ins-pricing 0.5.0__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/cli/BayesOpt_entry.py +15 -5
- ins_pricing/cli/BayesOpt_incremental.py +43 -10
- ins_pricing/cli/Explain_Run.py +16 -5
- ins_pricing/cli/Explain_entry.py +29 -8
- ins_pricing/cli/Pricing_Run.py +16 -5
- ins_pricing/cli/bayesopt_entry_runner.py +45 -12
- ins_pricing/cli/utils/bootstrap.py +23 -0
- ins_pricing/cli/utils/cli_config.py +34 -15
- ins_pricing/cli/utils/import_resolver.py +14 -14
- ins_pricing/cli/utils/notebook_utils.py +120 -106
- ins_pricing/cli/watchdog_run.py +15 -5
- ins_pricing/frontend/app.py +132 -61
- ins_pricing/frontend/config_builder.py +33 -0
- ins_pricing/frontend/example_config.json +11 -0
- ins_pricing/frontend/runner.py +340 -388
- ins_pricing/modelling/README.md +1 -1
- ins_pricing/modelling/__init__.py +10 -10
- ins_pricing/modelling/bayesopt/README.md +29 -11
- ins_pricing/modelling/bayesopt/config_components.py +12 -0
- ins_pricing/modelling/bayesopt/config_preprocess.py +50 -13
- ins_pricing/modelling/bayesopt/core.py +47 -19
- ins_pricing/modelling/bayesopt/model_plotting_mixin.py +20 -14
- ins_pricing/modelling/bayesopt/models/model_ft_components.py +349 -342
- ins_pricing/modelling/bayesopt/models/model_ft_trainer.py +11 -5
- ins_pricing/modelling/bayesopt/models/model_gnn.py +20 -14
- ins_pricing/modelling/bayesopt/models/model_resn.py +9 -3
- ins_pricing/modelling/bayesopt/trainers/trainer_base.py +62 -50
- ins_pricing/modelling/bayesopt/trainers/trainer_ft.py +61 -53
- ins_pricing/modelling/bayesopt/trainers/trainer_glm.py +9 -3
- ins_pricing/modelling/bayesopt/trainers/trainer_gnn.py +40 -32
- ins_pricing/modelling/bayesopt/trainers/trainer_resn.py +36 -24
- ins_pricing/modelling/bayesopt/trainers/trainer_xgb.py +240 -37
- ins_pricing/modelling/bayesopt/utils/distributed_utils.py +193 -186
- ins_pricing/modelling/bayesopt/utils/torch_trainer_mixin.py +23 -10
- ins_pricing/pricing/factors.py +67 -56
- ins_pricing/setup.py +1 -1
- ins_pricing/utils/__init__.py +7 -6
- ins_pricing/utils/device.py +45 -24
- ins_pricing/utils/logging.py +34 -1
- ins_pricing/utils/profiling.py +8 -4
- {ins_pricing-0.5.0.dist-info → ins_pricing-0.5.3.dist-info}/METADATA +182 -182
- {ins_pricing-0.5.0.dist-info → ins_pricing-0.5.3.dist-info}/RECORD +44 -43
- {ins_pricing-0.5.0.dist-info → ins_pricing-0.5.3.dist-info}/WHEEL +0 -0
- {ins_pricing-0.5.0.dist-info → ins_pricing-0.5.3.dist-info}/top_level.txt +0 -0
|
@@ -142,8 +142,8 @@ def build_explain_cmd(
|
|
|
142
142
|
return _stringify_cmd(cmd)
|
|
143
143
|
|
|
144
144
|
|
|
145
|
-
def wrap_with_watchdog(
|
|
146
|
-
cmd: Sequence[str],
|
|
145
|
+
def wrap_with_watchdog(
|
|
146
|
+
cmd: Sequence[str],
|
|
147
147
|
*,
|
|
148
148
|
idle_seconds: int = 7200,
|
|
149
149
|
max_restarts: int = 50,
|
|
@@ -179,28 +179,122 @@ def wrap_with_watchdog(
|
|
|
179
179
|
if stop_on_nonzero_exit:
|
|
180
180
|
wd_cmd.append("--stop-on-nonzero-exit")
|
|
181
181
|
wd_cmd.append("--")
|
|
182
|
-
wd_cmd.extend(list(cmd))
|
|
183
|
-
return _stringify_cmd(wd_cmd)
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
def run(cmd: Sequence[str], *, check: bool = True) -> subprocess.CompletedProcess:
|
|
187
|
-
"""Run an external command from a notebook (blocking)."""
|
|
188
|
-
return subprocess.run(list(cmd), check=check)
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
def _build_config_parser(description: str) -> argparse.ArgumentParser:
|
|
182
|
+
wd_cmd.extend(list(cmd))
|
|
183
|
+
return _stringify_cmd(wd_cmd)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def run(cmd: Sequence[str], *, check: bool = True) -> subprocess.CompletedProcess:
|
|
187
|
+
"""Run an external command from a notebook (blocking)."""
|
|
188
|
+
return subprocess.run(list(cmd), check=check)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _build_config_parser(description: str) -> argparse.ArgumentParser:
|
|
192
192
|
parser = argparse.ArgumentParser(description=description)
|
|
193
193
|
add_config_json_arg(
|
|
194
194
|
parser,
|
|
195
195
|
help_text="Path to config.json (relative paths are resolved from ins_pricing/ when possible).",
|
|
196
|
-
)
|
|
197
|
-
return parser
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
def
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
196
|
+
)
|
|
197
|
+
return parser
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def build_cmd_from_config(config_json: str | Path) -> tuple[List[str], str]:
|
|
201
|
+
"""Build a command list from config.json runner settings.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
(cmd, mode) where mode is one of: entry, incremental, explain.
|
|
205
|
+
"""
|
|
206
|
+
pkg_dir = _find_ins_pricing_dir()
|
|
207
|
+
config_path = Path(config_json)
|
|
208
|
+
if not config_path.is_absolute():
|
|
209
|
+
config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
|
|
210
|
+
raw = json.loads(config_path.read_text(encoding="utf-8", errors="replace"))
|
|
211
|
+
set_env(raw.get("env", {}))
|
|
212
|
+
runner = cast(dict, raw.get("runner") or {})
|
|
213
|
+
|
|
214
|
+
mode = str(runner.get("mode") or "entry").strip().lower()
|
|
215
|
+
use_watchdog = bool(runner.get("use_watchdog", False))
|
|
216
|
+
if mode == "watchdog":
|
|
217
|
+
use_watchdog = True
|
|
218
|
+
mode = "entry"
|
|
219
|
+
|
|
220
|
+
idle_seconds = int(runner.get("idle_seconds", 7200))
|
|
221
|
+
max_restarts = int(runner.get("max_restarts", 50))
|
|
222
|
+
restart_delay_seconds = int(runner.get("restart_delay_seconds", 10))
|
|
223
|
+
|
|
224
|
+
if mode == "incremental":
|
|
225
|
+
inc_args = runner.get("incremental_args") or []
|
|
226
|
+
if not isinstance(inc_args, list):
|
|
227
|
+
raise ValueError("config.runner.incremental_args must be a list of strings.")
|
|
228
|
+
cmd = build_incremental_cmd(config_path, extra_args=[str(x) for x in inc_args])
|
|
229
|
+
if use_watchdog:
|
|
230
|
+
cmd = wrap_with_watchdog(
|
|
231
|
+
cmd,
|
|
232
|
+
idle_seconds=idle_seconds,
|
|
233
|
+
max_restarts=max_restarts,
|
|
234
|
+
restart_delay_seconds=restart_delay_seconds,
|
|
235
|
+
)
|
|
236
|
+
return cmd, "incremental"
|
|
237
|
+
|
|
238
|
+
if mode == "explain":
|
|
239
|
+
exp_args = runner.get("explain_args") or []
|
|
240
|
+
if not isinstance(exp_args, list):
|
|
241
|
+
raise ValueError("config.runner.explain_args must be a list of strings.")
|
|
242
|
+
cmd = build_explain_cmd(config_path, extra_args=[str(x) for x in exp_args])
|
|
243
|
+
if use_watchdog:
|
|
244
|
+
cmd = wrap_with_watchdog(
|
|
245
|
+
cmd,
|
|
246
|
+
idle_seconds=idle_seconds,
|
|
247
|
+
max_restarts=max_restarts,
|
|
248
|
+
restart_delay_seconds=restart_delay_seconds,
|
|
249
|
+
)
|
|
250
|
+
return cmd, "explain"
|
|
251
|
+
|
|
252
|
+
if mode != "entry":
|
|
253
|
+
raise ValueError(
|
|
254
|
+
f"Unsupported runner.mode={mode!r}, expected 'entry', 'incremental', or 'explain'."
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
model_keys = runner.get("model_keys")
|
|
258
|
+
if not model_keys:
|
|
259
|
+
model_keys = raw.get("model_keys")
|
|
260
|
+
if not model_keys:
|
|
261
|
+
model_keys = ["ft"]
|
|
262
|
+
if not isinstance(model_keys, list):
|
|
263
|
+
raise ValueError("runner.model_keys must be a list of strings.")
|
|
264
|
+
|
|
265
|
+
nproc_per_node = int(runner.get("nproc_per_node", 1))
|
|
266
|
+
max_evals = int(runner.get("max_evals", raw.get("max_evals", 50)))
|
|
267
|
+
plot_curves = bool(runner.get("plot_curves", raw.get("plot_curves", True)))
|
|
268
|
+
ft_role = runner.get("ft_role", None)
|
|
269
|
+
if ft_role is None:
|
|
270
|
+
ft_role = raw.get("ft_role")
|
|
271
|
+
|
|
272
|
+
cmd = build_bayesopt_entry_cmd(
|
|
273
|
+
config_path,
|
|
274
|
+
model_keys=[str(x) for x in model_keys],
|
|
275
|
+
nproc_per_node=nproc_per_node,
|
|
276
|
+
extra_args=[
|
|
277
|
+
"--max-evals",
|
|
278
|
+
str(max_evals),
|
|
279
|
+
*(["--plot-curves"] if plot_curves else []),
|
|
280
|
+
*(["--ft-role", str(ft_role)] if ft_role else []),
|
|
281
|
+
],
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
if use_watchdog:
|
|
285
|
+
cmd = wrap_with_watchdog(
|
|
286
|
+
cmd,
|
|
287
|
+
idle_seconds=idle_seconds,
|
|
288
|
+
max_restarts=max_restarts,
|
|
289
|
+
restart_delay_seconds=restart_delay_seconds,
|
|
290
|
+
)
|
|
291
|
+
return cmd, "entry"
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def run_from_config_cli(
|
|
295
|
+
description: str,
|
|
296
|
+
argv: Optional[Sequence[str]] = None,
|
|
297
|
+
) -> subprocess.CompletedProcess:
|
|
204
298
|
parser = _build_config_parser(description)
|
|
205
299
|
args = parser.parse_args(argv)
|
|
206
300
|
return run_from_config(args.config_json)
|
|
@@ -245,8 +339,8 @@ def run_bayesopt_entry(
|
|
|
245
339
|
return run(cmd, check=True)
|
|
246
340
|
|
|
247
341
|
|
|
248
|
-
def run_from_config(config_json: str | Path) -> subprocess.CompletedProcess:
|
|
249
|
-
"""Notebook entry point: switch execution modes by editing config.json.
|
|
342
|
+
def run_from_config(config_json: str | Path) -> subprocess.CompletedProcess:
|
|
343
|
+
"""Notebook entry point: switch execution modes by editing config.json.
|
|
250
344
|
|
|
251
345
|
Convention: config.json may include a `runner` section for notebook control:
|
|
252
346
|
- runner.mode: "entry" (default), "incremental", or "explain"
|
|
@@ -254,87 +348,7 @@ def run_from_config(config_json: str | Path) -> subprocess.CompletedProcess:
|
|
|
254
348
|
- runner.model_keys: list of models to run (entry only)
|
|
255
349
|
- runner.max_evals / runner.plot_curves / runner.ft_role (entry only; override config fields)
|
|
256
350
|
- runner.use_watchdog / runner.idle_seconds / runner.max_restarts / runner.restart_delay_seconds
|
|
257
|
-
- runner.incremental_args: List[str] (incremental only; extra args for cli/BayesOpt_incremental.py)
|
|
258
|
-
"""
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
if not config_path.is_absolute():
|
|
262
|
-
config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
|
|
263
|
-
raw = json.loads(config_path.read_text(encoding="utf-8", errors="replace"))
|
|
264
|
-
set_env(raw.get("env", {}))
|
|
265
|
-
runner = cast(dict, raw.get("runner") or {})
|
|
266
|
-
|
|
267
|
-
mode = str(runner.get("mode") or "entry").strip().lower()
|
|
268
|
-
use_watchdog = bool(runner.get("use_watchdog", False))
|
|
269
|
-
idle_seconds = int(runner.get("idle_seconds", 7200))
|
|
270
|
-
max_restarts = int(runner.get("max_restarts", 50))
|
|
271
|
-
restart_delay_seconds = int(runner.get("restart_delay_seconds", 10))
|
|
272
|
-
|
|
273
|
-
if mode == "incremental":
|
|
274
|
-
inc_args = runner.get("incremental_args") or []
|
|
275
|
-
if not isinstance(inc_args, list):
|
|
276
|
-
raise ValueError("config.runner.incremental_args must be a list of strings.")
|
|
277
|
-
cmd = build_incremental_cmd(config_path, extra_args=[str(x) for x in inc_args])
|
|
278
|
-
if use_watchdog:
|
|
279
|
-
cmd = wrap_with_watchdog(
|
|
280
|
-
cmd,
|
|
281
|
-
idle_seconds=idle_seconds,
|
|
282
|
-
max_restarts=max_restarts,
|
|
283
|
-
restart_delay_seconds=restart_delay_seconds,
|
|
284
|
-
)
|
|
285
|
-
return run(cmd, check=True)
|
|
286
|
-
|
|
287
|
-
if mode == "explain":
|
|
288
|
-
exp_args = runner.get("explain_args") or []
|
|
289
|
-
if not isinstance(exp_args, list):
|
|
290
|
-
raise ValueError("config.runner.explain_args must be a list of strings.")
|
|
291
|
-
cmd = build_explain_cmd(config_path, extra_args=[str(x) for x in exp_args])
|
|
292
|
-
if use_watchdog:
|
|
293
|
-
cmd = wrap_with_watchdog(
|
|
294
|
-
cmd,
|
|
295
|
-
idle_seconds=idle_seconds,
|
|
296
|
-
max_restarts=max_restarts,
|
|
297
|
-
restart_delay_seconds=restart_delay_seconds,
|
|
298
|
-
)
|
|
299
|
-
return run(cmd, check=True)
|
|
300
|
-
|
|
301
|
-
if mode != "entry":
|
|
302
|
-
raise ValueError(
|
|
303
|
-
f"Unsupported runner.mode={mode!r}, expected 'entry', 'incremental', or 'explain'."
|
|
304
|
-
)
|
|
305
|
-
|
|
306
|
-
model_keys = runner.get("model_keys")
|
|
307
|
-
if not model_keys:
|
|
308
|
-
model_keys = raw.get("model_keys")
|
|
309
|
-
if not model_keys:
|
|
310
|
-
model_keys = ["ft"]
|
|
311
|
-
if not isinstance(model_keys, list):
|
|
312
|
-
raise ValueError("runner.model_keys must be a list of strings.")
|
|
313
|
-
|
|
314
|
-
nproc_per_node = int(runner.get("nproc_per_node", 1))
|
|
315
|
-
max_evals = int(runner.get("max_evals", raw.get("max_evals", 50)))
|
|
316
|
-
plot_curves = bool(runner.get("plot_curves", raw.get("plot_curves", True)))
|
|
317
|
-
ft_role = runner.get("ft_role", None)
|
|
318
|
-
if ft_role is None:
|
|
319
|
-
ft_role = raw.get("ft_role")
|
|
320
|
-
|
|
321
|
-
cmd = build_bayesopt_entry_cmd(
|
|
322
|
-
config_path,
|
|
323
|
-
model_keys=[str(x) for x in model_keys],
|
|
324
|
-
nproc_per_node=nproc_per_node,
|
|
325
|
-
extra_args=[
|
|
326
|
-
"--max-evals",
|
|
327
|
-
str(max_evals),
|
|
328
|
-
*(["--plot-curves"] if plot_curves else []),
|
|
329
|
-
*(["--ft-role", str(ft_role)] if ft_role else []),
|
|
330
|
-
],
|
|
331
|
-
)
|
|
332
|
-
|
|
333
|
-
if use_watchdog:
|
|
334
|
-
cmd = wrap_with_watchdog(
|
|
335
|
-
cmd,
|
|
336
|
-
idle_seconds=idle_seconds,
|
|
337
|
-
max_restarts=max_restarts,
|
|
338
|
-
restart_delay_seconds=restart_delay_seconds,
|
|
339
|
-
)
|
|
340
|
-
return run(cmd, check=True)
|
|
351
|
+
- runner.incremental_args: List[str] (incremental only; extra args for cli/BayesOpt_incremental.py)
|
|
352
|
+
"""
|
|
353
|
+
cmd, _mode = build_cmd_from_config(config_json)
|
|
354
|
+
return run(cmd, check=True)
|
ins_pricing/cli/watchdog_run.py
CHANGED
|
@@ -10,11 +10,21 @@ import threading
|
|
|
10
10
|
import time
|
|
11
11
|
from typing import List, Optional
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
if
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
13
|
+
def _ensure_repo_root() -> None:
|
|
14
|
+
if __package__ not in {None, ""}:
|
|
15
|
+
return
|
|
16
|
+
if importlib.util.find_spec("ins_pricing") is not None:
|
|
17
|
+
return
|
|
18
|
+
bootstrap_path = Path(__file__).resolve().parents[1] / "utils" / "bootstrap.py"
|
|
19
|
+
spec = importlib.util.spec_from_file_location("ins_pricing.cli.utils.bootstrap", bootstrap_path)
|
|
20
|
+
if spec is None or spec.loader is None:
|
|
21
|
+
return
|
|
22
|
+
module = importlib.util.module_from_spec(spec)
|
|
23
|
+
spec.loader.exec_module(module)
|
|
24
|
+
module.ensure_repo_root()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
_ensure_repo_root()
|
|
18
28
|
|
|
19
29
|
try:
|
|
20
30
|
from ins_pricing.cli.utils.run_logging import configure_run_logging # type: ignore
|
ins_pricing/frontend/app.py
CHANGED
|
@@ -17,16 +17,31 @@ from ins_pricing.frontend.ft_workflow import FTWorkflowHelper
|
|
|
17
17
|
from ins_pricing.frontend.runner import TaskRunner
|
|
18
18
|
from ins_pricing.frontend.config_builder import ConfigBuilder
|
|
19
19
|
import json
|
|
20
|
+
import tempfile
|
|
20
21
|
import sys
|
|
21
22
|
import inspect
|
|
23
|
+
import importlib.util
|
|
22
24
|
from pathlib import Path
|
|
23
|
-
from typing import Optional, Dict, Any, Callable, Iterable, Tuple
|
|
25
|
+
from typing import Optional, Dict, Any, Callable, Iterable, Tuple, Generator
|
|
24
26
|
import threading
|
|
25
27
|
import queue
|
|
26
28
|
import time
|
|
27
29
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
+
def _ensure_repo_root() -> None:
|
|
31
|
+
if __package__ not in {None, ""}:
|
|
32
|
+
return
|
|
33
|
+
if importlib.util.find_spec("ins_pricing") is not None:
|
|
34
|
+
return
|
|
35
|
+
bootstrap_path = Path(__file__).resolve().parents[1] / "cli" / "utils" / "bootstrap.py"
|
|
36
|
+
spec = importlib.util.spec_from_file_location("ins_pricing.cli.utils.bootstrap", bootstrap_path)
|
|
37
|
+
if spec is None or spec.loader is None:
|
|
38
|
+
return
|
|
39
|
+
module = importlib.util.module_from_spec(spec)
|
|
40
|
+
spec.loader.exec_module(module)
|
|
41
|
+
module.ensure_repo_root()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
_ensure_repo_root()
|
|
30
45
|
|
|
31
46
|
os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
|
|
32
47
|
os.environ.setdefault("GRADIO_TELEMETRY_ENABLED", "False")
|
|
@@ -112,10 +127,21 @@ class PricingApp:
|
|
|
112
127
|
output_dir: str,
|
|
113
128
|
use_gpu: bool,
|
|
114
129
|
model_keys: str,
|
|
115
|
-
max_evals: int,
|
|
116
|
-
xgb_max_depth_max: int,
|
|
117
|
-
xgb_n_estimators_max: int,
|
|
118
|
-
|
|
130
|
+
max_evals: int,
|
|
131
|
+
xgb_max_depth_max: int,
|
|
132
|
+
xgb_n_estimators_max: int,
|
|
133
|
+
xgb_gpu_id: int,
|
|
134
|
+
xgb_cleanup_per_fold: bool,
|
|
135
|
+
xgb_cleanup_synchronize: bool,
|
|
136
|
+
xgb_use_dmatrix: bool,
|
|
137
|
+
ft_cleanup_per_fold: bool,
|
|
138
|
+
ft_cleanup_synchronize: bool,
|
|
139
|
+
resn_cleanup_per_fold: bool,
|
|
140
|
+
resn_cleanup_synchronize: bool,
|
|
141
|
+
gnn_cleanup_per_fold: bool,
|
|
142
|
+
gnn_cleanup_synchronize: bool,
|
|
143
|
+
optuna_cleanup_synchronize: bool,
|
|
144
|
+
) -> tuple[str, str]:
|
|
119
145
|
"""Build configuration from UI parameters."""
|
|
120
146
|
try:
|
|
121
147
|
# Parse comma-separated lists
|
|
@@ -148,10 +174,21 @@ class PricingApp:
|
|
|
148
174
|
output_dir=output_dir,
|
|
149
175
|
use_gpu=use_gpu,
|
|
150
176
|
model_keys=model_keys,
|
|
151
|
-
max_evals=max_evals,
|
|
152
|
-
xgb_max_depth_max=xgb_max_depth_max,
|
|
153
|
-
xgb_n_estimators_max=xgb_n_estimators_max,
|
|
154
|
-
|
|
177
|
+
max_evals=max_evals,
|
|
178
|
+
xgb_max_depth_max=xgb_max_depth_max,
|
|
179
|
+
xgb_n_estimators_max=xgb_n_estimators_max,
|
|
180
|
+
xgb_gpu_id=xgb_gpu_id,
|
|
181
|
+
xgb_cleanup_per_fold=xgb_cleanup_per_fold,
|
|
182
|
+
xgb_cleanup_synchronize=xgb_cleanup_synchronize,
|
|
183
|
+
xgb_use_dmatrix=xgb_use_dmatrix,
|
|
184
|
+
ft_cleanup_per_fold=ft_cleanup_per_fold,
|
|
185
|
+
ft_cleanup_synchronize=ft_cleanup_synchronize,
|
|
186
|
+
resn_cleanup_per_fold=resn_cleanup_per_fold,
|
|
187
|
+
resn_cleanup_synchronize=resn_cleanup_synchronize,
|
|
188
|
+
gnn_cleanup_per_fold=gnn_cleanup_per_fold,
|
|
189
|
+
gnn_cleanup_synchronize=gnn_cleanup_synchronize,
|
|
190
|
+
optuna_cleanup_synchronize=optuna_cleanup_synchronize,
|
|
191
|
+
)
|
|
155
192
|
|
|
156
193
|
is_valid, msg = self.config_builder.validate_config(config)
|
|
157
194
|
if not is_valid:
|
|
@@ -180,54 +217,60 @@ class PricingApp:
|
|
|
180
217
|
except Exception as e:
|
|
181
218
|
return f"Error saving config: {str(e)}"
|
|
182
219
|
|
|
183
|
-
def run_training(self, config_json: str) -> tuple[str, str]:
|
|
220
|
+
def run_training(self, config_json: str) -> Generator[tuple[str, str], None, None]:
|
|
184
221
|
"""
|
|
185
222
|
Run task (training, explain, plotting, etc.) with the current configuration.
|
|
186
223
|
|
|
187
224
|
The task type is automatically detected from config.runner.mode.
|
|
188
225
|
Supported modes: entry (training), explain, incremental, watchdog, etc.
|
|
189
226
|
"""
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
if config_json:
|
|
193
|
-
config = json.loads(config_json)
|
|
194
|
-
task_mode = config.get('runner', {}).get('mode', 'entry')
|
|
195
|
-
base_dir = self.current_config_dir or Path.cwd()
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
yield f"Task [{task_mode}] completed!", full_log
|
|
227
|
-
|
|
228
|
-
except Exception as e:
|
|
229
|
-
error_msg = f"Error during task execution: {str(e)}"
|
|
230
|
-
yield error_msg, error_msg
|
|
227
|
+
temp_config_path: Optional[Path] = None
|
|
228
|
+
try:
|
|
229
|
+
if config_json:
|
|
230
|
+
config = json.loads(config_json)
|
|
231
|
+
task_mode = config.get('runner', {}).get('mode', 'entry')
|
|
232
|
+
base_dir = self.current_config_dir or Path.cwd()
|
|
233
|
+
fd, temp_path = tempfile.mkstemp(prefix="temp_config_", suffix=".json", dir=base_dir)
|
|
234
|
+
temp_config_path = Path(temp_path)
|
|
235
|
+
with os.fdopen(fd, 'w', encoding='utf-8') as f:
|
|
236
|
+
json.dump(config, f, indent=2)
|
|
237
|
+
config_path = temp_config_path
|
|
238
|
+
elif self.current_config_path and self.current_config_path.exists():
|
|
239
|
+
config_path = self.current_config_path
|
|
240
|
+
config = json.loads(config_path.read_text(encoding="utf-8"))
|
|
241
|
+
task_mode = config.get('runner', {}).get('mode', 'entry')
|
|
242
|
+
elif self.current_config:
|
|
243
|
+
config = self.current_config
|
|
244
|
+
task_mode = config.get('runner', {}).get('mode', 'entry')
|
|
245
|
+
base_dir = Path.cwd()
|
|
246
|
+
fd, temp_path = tempfile.mkstemp(prefix="temp_config_", suffix=".json", dir=base_dir)
|
|
247
|
+
temp_config_path = Path(temp_path)
|
|
248
|
+
with os.fdopen(fd, 'w', encoding='utf-8') as f:
|
|
249
|
+
json.dump(config, f, indent=2)
|
|
250
|
+
config_path = temp_config_path
|
|
251
|
+
else:
|
|
252
|
+
yield "No configuration provided", ""
|
|
253
|
+
return
|
|
254
|
+
|
|
255
|
+
log_generator = self.runner.run_task(str(config_path))
|
|
256
|
+
|
|
257
|
+
# Collect logs
|
|
258
|
+
full_log = ""
|
|
259
|
+
for log_line in log_generator:
|
|
260
|
+
full_log += log_line + "\n"
|
|
261
|
+
yield f"Task [{task_mode}] in progress...", full_log
|
|
262
|
+
|
|
263
|
+
yield f"Task [{task_mode}] completed!", full_log
|
|
264
|
+
|
|
265
|
+
except Exception as e:
|
|
266
|
+
error_msg = f"Error during task execution: {str(e)}"
|
|
267
|
+
yield error_msg, error_msg
|
|
268
|
+
finally:
|
|
269
|
+
if temp_config_path is not None:
|
|
270
|
+
try:
|
|
271
|
+
temp_config_path.unlink(missing_ok=True)
|
|
272
|
+
except Exception:
|
|
273
|
+
pass
|
|
231
274
|
|
|
232
275
|
def prepare_ft_step1(self, config_json: str, use_ddp: bool, nproc: int) -> tuple[str, str]:
|
|
233
276
|
"""Prepare FT Step 1 configuration."""
|
|
@@ -551,11 +594,34 @@ def create_ui():
|
|
|
551
594
|
label="Max Evaluations", value=50, precision=0)
|
|
552
595
|
|
|
553
596
|
with gr.Column():
|
|
554
|
-
gr.Markdown("#### XGBoost Settings")
|
|
555
|
-
xgb_max_depth_max = gr.Number(
|
|
556
|
-
label="XGB Max Depth", value=25, precision=0)
|
|
557
|
-
xgb_n_estimators_max = gr.Number(
|
|
558
|
-
label="XGB Max Estimators", value=500, precision=0)
|
|
597
|
+
gr.Markdown("#### XGBoost Settings")
|
|
598
|
+
xgb_max_depth_max = gr.Number(
|
|
599
|
+
label="XGB Max Depth", value=25, precision=0)
|
|
600
|
+
xgb_n_estimators_max = gr.Number(
|
|
601
|
+
label="XGB Max Estimators", value=500, precision=0)
|
|
602
|
+
xgb_gpu_id = gr.Number(
|
|
603
|
+
label="XGB GPU ID", value=0, precision=0)
|
|
604
|
+
xgb_cleanup_per_fold = gr.Checkbox(
|
|
605
|
+
label="XGB Cleanup Per Fold", value=False)
|
|
606
|
+
xgb_cleanup_synchronize = gr.Checkbox(
|
|
607
|
+
label="XGB Cleanup Synchronize", value=False)
|
|
608
|
+
xgb_use_dmatrix = gr.Checkbox(
|
|
609
|
+
label="XGB Use DMatrix", value=True)
|
|
610
|
+
gr.Markdown("#### Fold Cleanup")
|
|
611
|
+
ft_cleanup_per_fold = gr.Checkbox(
|
|
612
|
+
label="FT Cleanup Per Fold", value=False)
|
|
613
|
+
ft_cleanup_synchronize = gr.Checkbox(
|
|
614
|
+
label="FT Cleanup Synchronize", value=False)
|
|
615
|
+
resn_cleanup_per_fold = gr.Checkbox(
|
|
616
|
+
label="ResNet Cleanup Per Fold", value=False)
|
|
617
|
+
resn_cleanup_synchronize = gr.Checkbox(
|
|
618
|
+
label="ResNet Cleanup Synchronize", value=False)
|
|
619
|
+
gnn_cleanup_per_fold = gr.Checkbox(
|
|
620
|
+
label="GNN Cleanup Per Fold", value=False)
|
|
621
|
+
gnn_cleanup_synchronize = gr.Checkbox(
|
|
622
|
+
label="GNN Cleanup Synchronize", value=False)
|
|
623
|
+
optuna_cleanup_synchronize = gr.Checkbox(
|
|
624
|
+
label="Optuna Cleanup Synchronize", value=False)
|
|
559
625
|
|
|
560
626
|
with gr.Row():
|
|
561
627
|
build_btn = gr.Button(
|
|
@@ -836,9 +902,14 @@ def create_ui():
|
|
|
836
902
|
data_dir, model_list, model_categories, target, weight,
|
|
837
903
|
feature_list, categorical_features, task_type, prop_test,
|
|
838
904
|
holdout_ratio, val_ratio, split_strategy, rand_seed, epochs,
|
|
839
|
-
output_dir, use_gpu, model_keys, max_evals,
|
|
840
|
-
xgb_max_depth_max, xgb_n_estimators_max
|
|
841
|
-
|
|
905
|
+
output_dir, use_gpu, model_keys, max_evals,
|
|
906
|
+
xgb_max_depth_max, xgb_n_estimators_max,
|
|
907
|
+
xgb_gpu_id, xgb_cleanup_per_fold, xgb_cleanup_synchronize,
|
|
908
|
+
xgb_use_dmatrix, ft_cleanup_per_fold, ft_cleanup_synchronize,
|
|
909
|
+
resn_cleanup_per_fold, resn_cleanup_synchronize,
|
|
910
|
+
gnn_cleanup_per_fold, gnn_cleanup_synchronize,
|
|
911
|
+
optuna_cleanup_synchronize
|
|
912
|
+
],
|
|
842
913
|
outputs=[build_status, config_json]
|
|
843
914
|
)
|
|
844
915
|
|
|
@@ -148,6 +148,17 @@ class ConfigBuilder:
|
|
|
148
148
|
max_evals: int = 50,
|
|
149
149
|
xgb_max_depth_max: int = 25,
|
|
150
150
|
xgb_n_estimators_max: int = 500,
|
|
151
|
+
xgb_gpu_id: Optional[int] = None,
|
|
152
|
+
xgb_cleanup_per_fold: bool = False,
|
|
153
|
+
xgb_cleanup_synchronize: bool = False,
|
|
154
|
+
xgb_use_dmatrix: bool = True,
|
|
155
|
+
ft_cleanup_per_fold: bool = False,
|
|
156
|
+
ft_cleanup_synchronize: bool = False,
|
|
157
|
+
resn_cleanup_per_fold: bool = False,
|
|
158
|
+
resn_cleanup_synchronize: bool = False,
|
|
159
|
+
gnn_cleanup_per_fold: bool = False,
|
|
160
|
+
gnn_cleanup_synchronize: bool = False,
|
|
161
|
+
optuna_cleanup_synchronize: bool = False,
|
|
151
162
|
nproc_per_node: int = 2,
|
|
152
163
|
) -> Dict[str, Any]:
|
|
153
164
|
"""
|
|
@@ -174,6 +185,17 @@ class ConfigBuilder:
|
|
|
174
185
|
max_evals: Maximum number of evaluations for optimization
|
|
175
186
|
xgb_max_depth_max: Maximum depth for XGBoost
|
|
176
187
|
xgb_n_estimators_max: Maximum estimators for XGBoost
|
|
188
|
+
xgb_gpu_id: XGBoost GPU device id (None = default)
|
|
189
|
+
xgb_cleanup_per_fold: Cleanup GPU memory per XGBoost fold
|
|
190
|
+
xgb_cleanup_synchronize: Synchronize CUDA during XGBoost cleanup
|
|
191
|
+
xgb_use_dmatrix: Use xgb.train with DMatrix/QuantileDMatrix
|
|
192
|
+
ft_cleanup_per_fold: Cleanup GPU memory per FT fold
|
|
193
|
+
ft_cleanup_synchronize: Synchronize CUDA during FT cleanup
|
|
194
|
+
resn_cleanup_per_fold: Cleanup GPU memory per ResNet fold
|
|
195
|
+
resn_cleanup_synchronize: Synchronize CUDA during ResNet cleanup
|
|
196
|
+
gnn_cleanup_per_fold: Cleanup GPU memory per GNN fold
|
|
197
|
+
gnn_cleanup_synchronize: Synchronize CUDA during GNN cleanup
|
|
198
|
+
optuna_cleanup_synchronize: Synchronize CUDA during Optuna cleanup
|
|
177
199
|
nproc_per_node: Number of processes per node
|
|
178
200
|
|
|
179
201
|
Returns:
|
|
@@ -204,6 +226,17 @@ class ConfigBuilder:
|
|
|
204
226
|
"use_gpu": use_gpu,
|
|
205
227
|
"xgb_max_depth_max": xgb_max_depth_max,
|
|
206
228
|
"xgb_n_estimators_max": xgb_n_estimators_max,
|
|
229
|
+
"xgb_gpu_id": xgb_gpu_id,
|
|
230
|
+
"xgb_cleanup_per_fold": xgb_cleanup_per_fold,
|
|
231
|
+
"xgb_cleanup_synchronize": xgb_cleanup_synchronize,
|
|
232
|
+
"xgb_use_dmatrix": xgb_use_dmatrix,
|
|
233
|
+
"ft_cleanup_per_fold": ft_cleanup_per_fold,
|
|
234
|
+
"ft_cleanup_synchronize": ft_cleanup_synchronize,
|
|
235
|
+
"resn_cleanup_per_fold": resn_cleanup_per_fold,
|
|
236
|
+
"resn_cleanup_synchronize": resn_cleanup_synchronize,
|
|
237
|
+
"gnn_cleanup_per_fold": gnn_cleanup_per_fold,
|
|
238
|
+
"gnn_cleanup_synchronize": gnn_cleanup_synchronize,
|
|
239
|
+
"optuna_cleanup_synchronize": optuna_cleanup_synchronize,
|
|
207
240
|
"optuna_storage": f"{output_dir}/optuna/bayesopt.sqlite3",
|
|
208
241
|
"stack_model_keys": model_keys,
|
|
209
242
|
})
|
|
@@ -26,6 +26,17 @@
|
|
|
26
26
|
"use_gpu": true,
|
|
27
27
|
"xgb_max_depth_max": 25,
|
|
28
28
|
"xgb_n_estimators_max": 500,
|
|
29
|
+
"xgb_gpu_id": 0,
|
|
30
|
+
"xgb_cleanup_per_fold": false,
|
|
31
|
+
"xgb_cleanup_synchronize": false,
|
|
32
|
+
"xgb_use_dmatrix": true,
|
|
33
|
+
"ft_cleanup_per_fold": false,
|
|
34
|
+
"ft_cleanup_synchronize": false,
|
|
35
|
+
"resn_cleanup_per_fold": false,
|
|
36
|
+
"resn_cleanup_synchronize": false,
|
|
37
|
+
"gnn_cleanup_per_fold": false,
|
|
38
|
+
"gnn_cleanup_synchronize": false,
|
|
39
|
+
"optuna_cleanup_synchronize": false,
|
|
29
40
|
"optuna_storage": "./Results/optuna/bayesopt.sqlite3",
|
|
30
41
|
"runner": {
|
|
31
42
|
"mode": "entry",
|