ins-pricing 0.5.0__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. ins_pricing/cli/BayesOpt_entry.py +15 -5
  2. ins_pricing/cli/BayesOpt_incremental.py +43 -10
  3. ins_pricing/cli/Explain_Run.py +16 -5
  4. ins_pricing/cli/Explain_entry.py +29 -8
  5. ins_pricing/cli/Pricing_Run.py +16 -5
  6. ins_pricing/cli/bayesopt_entry_runner.py +45 -12
  7. ins_pricing/cli/utils/bootstrap.py +23 -0
  8. ins_pricing/cli/utils/cli_config.py +34 -15
  9. ins_pricing/cli/utils/import_resolver.py +14 -14
  10. ins_pricing/cli/utils/notebook_utils.py +120 -106
  11. ins_pricing/cli/watchdog_run.py +15 -5
  12. ins_pricing/frontend/app.py +132 -61
  13. ins_pricing/frontend/config_builder.py +33 -0
  14. ins_pricing/frontend/example_config.json +11 -0
  15. ins_pricing/frontend/runner.py +340 -388
  16. ins_pricing/modelling/README.md +1 -1
  17. ins_pricing/modelling/__init__.py +10 -10
  18. ins_pricing/modelling/bayesopt/README.md +29 -11
  19. ins_pricing/modelling/bayesopt/config_components.py +12 -0
  20. ins_pricing/modelling/bayesopt/config_preprocess.py +50 -13
  21. ins_pricing/modelling/bayesopt/core.py +47 -19
  22. ins_pricing/modelling/bayesopt/model_plotting_mixin.py +20 -14
  23. ins_pricing/modelling/bayesopt/models/model_ft_components.py +349 -342
  24. ins_pricing/modelling/bayesopt/models/model_ft_trainer.py +11 -5
  25. ins_pricing/modelling/bayesopt/models/model_gnn.py +20 -14
  26. ins_pricing/modelling/bayesopt/models/model_resn.py +9 -3
  27. ins_pricing/modelling/bayesopt/trainers/trainer_base.py +62 -50
  28. ins_pricing/modelling/bayesopt/trainers/trainer_ft.py +61 -53
  29. ins_pricing/modelling/bayesopt/trainers/trainer_glm.py +9 -3
  30. ins_pricing/modelling/bayesopt/trainers/trainer_gnn.py +40 -32
  31. ins_pricing/modelling/bayesopt/trainers/trainer_resn.py +36 -24
  32. ins_pricing/modelling/bayesopt/trainers/trainer_xgb.py +240 -37
  33. ins_pricing/modelling/bayesopt/utils/distributed_utils.py +193 -186
  34. ins_pricing/modelling/bayesopt/utils/torch_trainer_mixin.py +23 -10
  35. ins_pricing/pricing/factors.py +67 -56
  36. ins_pricing/setup.py +1 -1
  37. ins_pricing/utils/__init__.py +7 -6
  38. ins_pricing/utils/device.py +45 -24
  39. ins_pricing/utils/logging.py +34 -1
  40. ins_pricing/utils/profiling.py +8 -4
  41. {ins_pricing-0.5.0.dist-info → ins_pricing-0.5.3.dist-info}/METADATA +182 -182
  42. {ins_pricing-0.5.0.dist-info → ins_pricing-0.5.3.dist-info}/RECORD +44 -43
  43. {ins_pricing-0.5.0.dist-info → ins_pricing-0.5.3.dist-info}/WHEEL +0 -0
  44. {ins_pricing-0.5.0.dist-info → ins_pricing-0.5.3.dist-info}/top_level.txt +0 -0
@@ -142,8 +142,8 @@ def build_explain_cmd(
142
142
  return _stringify_cmd(cmd)
143
143
 
144
144
 
145
- def wrap_with_watchdog(
146
- cmd: Sequence[str],
145
+ def wrap_with_watchdog(
146
+ cmd: Sequence[str],
147
147
  *,
148
148
  idle_seconds: int = 7200,
149
149
  max_restarts: int = 50,
@@ -179,28 +179,122 @@ def wrap_with_watchdog(
179
179
  if stop_on_nonzero_exit:
180
180
  wd_cmd.append("--stop-on-nonzero-exit")
181
181
  wd_cmd.append("--")
182
- wd_cmd.extend(list(cmd))
183
- return _stringify_cmd(wd_cmd)
184
-
185
-
186
- def run(cmd: Sequence[str], *, check: bool = True) -> subprocess.CompletedProcess:
187
- """Run an external command from a notebook (blocking)."""
188
- return subprocess.run(list(cmd), check=check)
189
-
190
-
191
- def _build_config_parser(description: str) -> argparse.ArgumentParser:
182
+ wd_cmd.extend(list(cmd))
183
+ return _stringify_cmd(wd_cmd)
184
+
185
+
186
+ def run(cmd: Sequence[str], *, check: bool = True) -> subprocess.CompletedProcess:
187
+ """Run an external command from a notebook (blocking)."""
188
+ return subprocess.run(list(cmd), check=check)
189
+
190
+
191
+ def _build_config_parser(description: str) -> argparse.ArgumentParser:
192
192
  parser = argparse.ArgumentParser(description=description)
193
193
  add_config_json_arg(
194
194
  parser,
195
195
  help_text="Path to config.json (relative paths are resolved from ins_pricing/ when possible).",
196
- )
197
- return parser
198
-
199
-
200
- def run_from_config_cli(
201
- description: str,
202
- argv: Optional[Sequence[str]] = None,
203
- ) -> subprocess.CompletedProcess:
196
+ )
197
+ return parser
198
+
199
+
200
+ def build_cmd_from_config(config_json: str | Path) -> tuple[List[str], str]:
201
+ """Build a command list from config.json runner settings.
202
+
203
+ Returns:
204
+ (cmd, mode) where mode is one of: entry, incremental, explain.
205
+ """
206
+ pkg_dir = _find_ins_pricing_dir()
207
+ config_path = Path(config_json)
208
+ if not config_path.is_absolute():
209
+ config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
210
+ raw = json.loads(config_path.read_text(encoding="utf-8", errors="replace"))
211
+ set_env(raw.get("env", {}))
212
+ runner = cast(dict, raw.get("runner") or {})
213
+
214
+ mode = str(runner.get("mode") or "entry").strip().lower()
215
+ use_watchdog = bool(runner.get("use_watchdog", False))
216
+ if mode == "watchdog":
217
+ use_watchdog = True
218
+ mode = "entry"
219
+
220
+ idle_seconds = int(runner.get("idle_seconds", 7200))
221
+ max_restarts = int(runner.get("max_restarts", 50))
222
+ restart_delay_seconds = int(runner.get("restart_delay_seconds", 10))
223
+
224
+ if mode == "incremental":
225
+ inc_args = runner.get("incremental_args") or []
226
+ if not isinstance(inc_args, list):
227
+ raise ValueError("config.runner.incremental_args must be a list of strings.")
228
+ cmd = build_incremental_cmd(config_path, extra_args=[str(x) for x in inc_args])
229
+ if use_watchdog:
230
+ cmd = wrap_with_watchdog(
231
+ cmd,
232
+ idle_seconds=idle_seconds,
233
+ max_restarts=max_restarts,
234
+ restart_delay_seconds=restart_delay_seconds,
235
+ )
236
+ return cmd, "incremental"
237
+
238
+ if mode == "explain":
239
+ exp_args = runner.get("explain_args") or []
240
+ if not isinstance(exp_args, list):
241
+ raise ValueError("config.runner.explain_args must be a list of strings.")
242
+ cmd = build_explain_cmd(config_path, extra_args=[str(x) for x in exp_args])
243
+ if use_watchdog:
244
+ cmd = wrap_with_watchdog(
245
+ cmd,
246
+ idle_seconds=idle_seconds,
247
+ max_restarts=max_restarts,
248
+ restart_delay_seconds=restart_delay_seconds,
249
+ )
250
+ return cmd, "explain"
251
+
252
+ if mode != "entry":
253
+ raise ValueError(
254
+ f"Unsupported runner.mode={mode!r}, expected 'entry', 'incremental', or 'explain'."
255
+ )
256
+
257
+ model_keys = runner.get("model_keys")
258
+ if not model_keys:
259
+ model_keys = raw.get("model_keys")
260
+ if not model_keys:
261
+ model_keys = ["ft"]
262
+ if not isinstance(model_keys, list):
263
+ raise ValueError("runner.model_keys must be a list of strings.")
264
+
265
+ nproc_per_node = int(runner.get("nproc_per_node", 1))
266
+ max_evals = int(runner.get("max_evals", raw.get("max_evals", 50)))
267
+ plot_curves = bool(runner.get("plot_curves", raw.get("plot_curves", True)))
268
+ ft_role = runner.get("ft_role", None)
269
+ if ft_role is None:
270
+ ft_role = raw.get("ft_role")
271
+
272
+ cmd = build_bayesopt_entry_cmd(
273
+ config_path,
274
+ model_keys=[str(x) for x in model_keys],
275
+ nproc_per_node=nproc_per_node,
276
+ extra_args=[
277
+ "--max-evals",
278
+ str(max_evals),
279
+ *(["--plot-curves"] if plot_curves else []),
280
+ *(["--ft-role", str(ft_role)] if ft_role else []),
281
+ ],
282
+ )
283
+
284
+ if use_watchdog:
285
+ cmd = wrap_with_watchdog(
286
+ cmd,
287
+ idle_seconds=idle_seconds,
288
+ max_restarts=max_restarts,
289
+ restart_delay_seconds=restart_delay_seconds,
290
+ )
291
+ return cmd, "entry"
292
+
293
+
294
+ def run_from_config_cli(
295
+ description: str,
296
+ argv: Optional[Sequence[str]] = None,
297
+ ) -> subprocess.CompletedProcess:
204
298
  parser = _build_config_parser(description)
205
299
  args = parser.parse_args(argv)
206
300
  return run_from_config(args.config_json)
@@ -245,8 +339,8 @@ def run_bayesopt_entry(
245
339
  return run(cmd, check=True)
246
340
 
247
341
 
248
- def run_from_config(config_json: str | Path) -> subprocess.CompletedProcess:
249
- """Notebook entry point: switch execution modes by editing config.json.
342
+ def run_from_config(config_json: str | Path) -> subprocess.CompletedProcess:
343
+ """Notebook entry point: switch execution modes by editing config.json.
250
344
 
251
345
  Convention: config.json may include a `runner` section for notebook control:
252
346
  - runner.mode: "entry" (default), "incremental", or "explain"
@@ -254,87 +348,7 @@ def run_from_config(config_json: str | Path) -> subprocess.CompletedProcess:
254
348
  - runner.model_keys: list of models to run (entry only)
255
349
  - runner.max_evals / runner.plot_curves / runner.ft_role (entry only; override config fields)
256
350
  - runner.use_watchdog / runner.idle_seconds / runner.max_restarts / runner.restart_delay_seconds
257
- - runner.incremental_args: List[str] (incremental only; extra args for cli/BayesOpt_incremental.py)
258
- """
259
- pkg_dir = _find_ins_pricing_dir()
260
- config_path = Path(config_json)
261
- if not config_path.is_absolute():
262
- config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
263
- raw = json.loads(config_path.read_text(encoding="utf-8", errors="replace"))
264
- set_env(raw.get("env", {}))
265
- runner = cast(dict, raw.get("runner") or {})
266
-
267
- mode = str(runner.get("mode") or "entry").strip().lower()
268
- use_watchdog = bool(runner.get("use_watchdog", False))
269
- idle_seconds = int(runner.get("idle_seconds", 7200))
270
- max_restarts = int(runner.get("max_restarts", 50))
271
- restart_delay_seconds = int(runner.get("restart_delay_seconds", 10))
272
-
273
- if mode == "incremental":
274
- inc_args = runner.get("incremental_args") or []
275
- if not isinstance(inc_args, list):
276
- raise ValueError("config.runner.incremental_args must be a list of strings.")
277
- cmd = build_incremental_cmd(config_path, extra_args=[str(x) for x in inc_args])
278
- if use_watchdog:
279
- cmd = wrap_with_watchdog(
280
- cmd,
281
- idle_seconds=idle_seconds,
282
- max_restarts=max_restarts,
283
- restart_delay_seconds=restart_delay_seconds,
284
- )
285
- return run(cmd, check=True)
286
-
287
- if mode == "explain":
288
- exp_args = runner.get("explain_args") or []
289
- if not isinstance(exp_args, list):
290
- raise ValueError("config.runner.explain_args must be a list of strings.")
291
- cmd = build_explain_cmd(config_path, extra_args=[str(x) for x in exp_args])
292
- if use_watchdog:
293
- cmd = wrap_with_watchdog(
294
- cmd,
295
- idle_seconds=idle_seconds,
296
- max_restarts=max_restarts,
297
- restart_delay_seconds=restart_delay_seconds,
298
- )
299
- return run(cmd, check=True)
300
-
301
- if mode != "entry":
302
- raise ValueError(
303
- f"Unsupported runner.mode={mode!r}, expected 'entry', 'incremental', or 'explain'."
304
- )
305
-
306
- model_keys = runner.get("model_keys")
307
- if not model_keys:
308
- model_keys = raw.get("model_keys")
309
- if not model_keys:
310
- model_keys = ["ft"]
311
- if not isinstance(model_keys, list):
312
- raise ValueError("runner.model_keys must be a list of strings.")
313
-
314
- nproc_per_node = int(runner.get("nproc_per_node", 1))
315
- max_evals = int(runner.get("max_evals", raw.get("max_evals", 50)))
316
- plot_curves = bool(runner.get("plot_curves", raw.get("plot_curves", True)))
317
- ft_role = runner.get("ft_role", None)
318
- if ft_role is None:
319
- ft_role = raw.get("ft_role")
320
-
321
- cmd = build_bayesopt_entry_cmd(
322
- config_path,
323
- model_keys=[str(x) for x in model_keys],
324
- nproc_per_node=nproc_per_node,
325
- extra_args=[
326
- "--max-evals",
327
- str(max_evals),
328
- *(["--plot-curves"] if plot_curves else []),
329
- *(["--ft-role", str(ft_role)] if ft_role else []),
330
- ],
331
- )
332
-
333
- if use_watchdog:
334
- cmd = wrap_with_watchdog(
335
- cmd,
336
- idle_seconds=idle_seconds,
337
- max_restarts=max_restarts,
338
- restart_delay_seconds=restart_delay_seconds,
339
- )
340
- return run(cmd, check=True)
351
+ - runner.incremental_args: List[str] (incremental only; extra args for cli/BayesOpt_incremental.py)
352
+ """
353
+ cmd, _mode = build_cmd_from_config(config_json)
354
+ return run(cmd, check=True)
@@ -10,11 +10,21 @@ import threading
10
10
  import time
11
11
  from typing import List, Optional
12
12
 
13
- if __package__ in {None, ""}:
14
- if importlib.util.find_spec("ins_pricing") is None:
15
- repo_root = Path(__file__).resolve().parents[2]
16
- if str(repo_root) not in sys.path:
17
- sys.path.insert(0, str(repo_root))
13
+ def _ensure_repo_root() -> None:
14
+ if __package__ not in {None, ""}:
15
+ return
16
+ if importlib.util.find_spec("ins_pricing") is not None:
17
+ return
18
+ bootstrap_path = Path(__file__).resolve().parents[1] / "utils" / "bootstrap.py"
19
+ spec = importlib.util.spec_from_file_location("ins_pricing.cli.utils.bootstrap", bootstrap_path)
20
+ if spec is None or spec.loader is None:
21
+ return
22
+ module = importlib.util.module_from_spec(spec)
23
+ spec.loader.exec_module(module)
24
+ module.ensure_repo_root()
25
+
26
+
27
+ _ensure_repo_root()
18
28
 
19
29
  try:
20
30
  from ins_pricing.cli.utils.run_logging import configure_run_logging # type: ignore
@@ -17,16 +17,31 @@ from ins_pricing.frontend.ft_workflow import FTWorkflowHelper
17
17
  from ins_pricing.frontend.runner import TaskRunner
18
18
  from ins_pricing.frontend.config_builder import ConfigBuilder
19
19
  import json
20
+ import tempfile
20
21
  import sys
21
22
  import inspect
23
+ import importlib.util
22
24
  from pathlib import Path
23
- from typing import Optional, Dict, Any, Callable, Iterable, Tuple
25
+ from typing import Optional, Dict, Any, Callable, Iterable, Tuple, Generator
24
26
  import threading
25
27
  import queue
26
28
  import time
27
29
 
28
- # Add parent directory to path to import ins_pricing modules
29
- sys.path.insert(0, str(Path(__file__).parent.parent.parent))
30
+ def _ensure_repo_root() -> None:
31
+ if __package__ not in {None, ""}:
32
+ return
33
+ if importlib.util.find_spec("ins_pricing") is not None:
34
+ return
35
+ bootstrap_path = Path(__file__).resolve().parents[1] / "cli" / "utils" / "bootstrap.py"
36
+ spec = importlib.util.spec_from_file_location("ins_pricing.cli.utils.bootstrap", bootstrap_path)
37
+ if spec is None or spec.loader is None:
38
+ return
39
+ module = importlib.util.module_from_spec(spec)
40
+ spec.loader.exec_module(module)
41
+ module.ensure_repo_root()
42
+
43
+
44
+ _ensure_repo_root()
30
45
 
31
46
  os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
32
47
  os.environ.setdefault("GRADIO_TELEMETRY_ENABLED", "False")
@@ -112,10 +127,21 @@ class PricingApp:
112
127
  output_dir: str,
113
128
  use_gpu: bool,
114
129
  model_keys: str,
115
- max_evals: int,
116
- xgb_max_depth_max: int,
117
- xgb_n_estimators_max: int,
118
- ) -> tuple[str, str]:
130
+ max_evals: int,
131
+ xgb_max_depth_max: int,
132
+ xgb_n_estimators_max: int,
133
+ xgb_gpu_id: int,
134
+ xgb_cleanup_per_fold: bool,
135
+ xgb_cleanup_synchronize: bool,
136
+ xgb_use_dmatrix: bool,
137
+ ft_cleanup_per_fold: bool,
138
+ ft_cleanup_synchronize: bool,
139
+ resn_cleanup_per_fold: bool,
140
+ resn_cleanup_synchronize: bool,
141
+ gnn_cleanup_per_fold: bool,
142
+ gnn_cleanup_synchronize: bool,
143
+ optuna_cleanup_synchronize: bool,
144
+ ) -> tuple[str, str]:
119
145
  """Build configuration from UI parameters."""
120
146
  try:
121
147
  # Parse comma-separated lists
@@ -148,10 +174,21 @@ class PricingApp:
148
174
  output_dir=output_dir,
149
175
  use_gpu=use_gpu,
150
176
  model_keys=model_keys,
151
- max_evals=max_evals,
152
- xgb_max_depth_max=xgb_max_depth_max,
153
- xgb_n_estimators_max=xgb_n_estimators_max,
154
- )
177
+ max_evals=max_evals,
178
+ xgb_max_depth_max=xgb_max_depth_max,
179
+ xgb_n_estimators_max=xgb_n_estimators_max,
180
+ xgb_gpu_id=xgb_gpu_id,
181
+ xgb_cleanup_per_fold=xgb_cleanup_per_fold,
182
+ xgb_cleanup_synchronize=xgb_cleanup_synchronize,
183
+ xgb_use_dmatrix=xgb_use_dmatrix,
184
+ ft_cleanup_per_fold=ft_cleanup_per_fold,
185
+ ft_cleanup_synchronize=ft_cleanup_synchronize,
186
+ resn_cleanup_per_fold=resn_cleanup_per_fold,
187
+ resn_cleanup_synchronize=resn_cleanup_synchronize,
188
+ gnn_cleanup_per_fold=gnn_cleanup_per_fold,
189
+ gnn_cleanup_synchronize=gnn_cleanup_synchronize,
190
+ optuna_cleanup_synchronize=optuna_cleanup_synchronize,
191
+ )
155
192
 
156
193
  is_valid, msg = self.config_builder.validate_config(config)
157
194
  if not is_valid:
@@ -180,54 +217,60 @@ class PricingApp:
180
217
  except Exception as e:
181
218
  return f"Error saving config: {str(e)}"
182
219
 
183
- def run_training(self, config_json: str) -> tuple[str, str]:
220
+ def run_training(self, config_json: str) -> Generator[tuple[str, str], None, None]:
184
221
  """
185
222
  Run task (training, explain, plotting, etc.) with the current configuration.
186
223
 
187
224
  The task type is automatically detected from config.runner.mode.
188
225
  Supported modes: entry (training), explain, incremental, watchdog, etc.
189
226
  """
190
- try:
191
- temp_config_path = None
192
- if config_json:
193
- config = json.loads(config_json)
194
- task_mode = config.get('runner', {}).get('mode', 'entry')
195
- base_dir = self.current_config_dir or Path.cwd()
196
- temp_config_path = (base_dir / "temp_config.json").resolve()
197
- with open(temp_config_path, 'w', encoding='utf-8') as f:
198
- json.dump(config, f, indent=2)
199
- config_path = temp_config_path
200
- elif self.current_config_path and self.current_config_path.exists():
201
- config_path = self.current_config_path
202
- config = json.loads(config_path.read_text(encoding="utf-8"))
203
- task_mode = config.get('runner', {}).get('mode', 'entry')
204
- elif self.current_config:
205
- config = self.current_config
206
- task_mode = config.get('runner', {}).get('mode', 'entry')
207
- temp_config_path = (Path.cwd() / "temp_config.json").resolve()
208
- with open(temp_config_path, 'w', encoding='utf-8') as f:
209
- json.dump(config, f, indent=2)
210
- config_path = temp_config_path
211
- else:
212
- return "No configuration provided", ""
213
-
214
- log_generator = self.runner.run_task(str(config_path))
215
-
216
- # Collect logs
217
- full_log = ""
218
- for log_line in log_generator:
219
- full_log += log_line + "\n"
220
- yield f"Task [{task_mode}] in progress...", full_log
221
-
222
- # Clean up
223
- if temp_config_path and temp_config_path.exists():
224
- temp_config_path.unlink()
225
-
226
- yield f"Task [{task_mode}] completed!", full_log
227
-
228
- except Exception as e:
229
- error_msg = f"Error during task execution: {str(e)}"
230
- yield error_msg, error_msg
227
+ temp_config_path: Optional[Path] = None
228
+ try:
229
+ if config_json:
230
+ config = json.loads(config_json)
231
+ task_mode = config.get('runner', {}).get('mode', 'entry')
232
+ base_dir = self.current_config_dir or Path.cwd()
233
+ fd, temp_path = tempfile.mkstemp(prefix="temp_config_", suffix=".json", dir=base_dir)
234
+ temp_config_path = Path(temp_path)
235
+ with os.fdopen(fd, 'w', encoding='utf-8') as f:
236
+ json.dump(config, f, indent=2)
237
+ config_path = temp_config_path
238
+ elif self.current_config_path and self.current_config_path.exists():
239
+ config_path = self.current_config_path
240
+ config = json.loads(config_path.read_text(encoding="utf-8"))
241
+ task_mode = config.get('runner', {}).get('mode', 'entry')
242
+ elif self.current_config:
243
+ config = self.current_config
244
+ task_mode = config.get('runner', {}).get('mode', 'entry')
245
+ base_dir = Path.cwd()
246
+ fd, temp_path = tempfile.mkstemp(prefix="temp_config_", suffix=".json", dir=base_dir)
247
+ temp_config_path = Path(temp_path)
248
+ with os.fdopen(fd, 'w', encoding='utf-8') as f:
249
+ json.dump(config, f, indent=2)
250
+ config_path = temp_config_path
251
+ else:
252
+ yield "No configuration provided", ""
253
+ return
254
+
255
+ log_generator = self.runner.run_task(str(config_path))
256
+
257
+ # Collect logs
258
+ full_log = ""
259
+ for log_line in log_generator:
260
+ full_log += log_line + "\n"
261
+ yield f"Task [{task_mode}] in progress...", full_log
262
+
263
+ yield f"Task [{task_mode}] completed!", full_log
264
+
265
+ except Exception as e:
266
+ error_msg = f"Error during task execution: {str(e)}"
267
+ yield error_msg, error_msg
268
+ finally:
269
+ if temp_config_path is not None:
270
+ try:
271
+ temp_config_path.unlink(missing_ok=True)
272
+ except Exception:
273
+ pass
231
274
 
232
275
  def prepare_ft_step1(self, config_json: str, use_ddp: bool, nproc: int) -> tuple[str, str]:
233
276
  """Prepare FT Step 1 configuration."""
@@ -551,11 +594,34 @@ def create_ui():
551
594
  label="Max Evaluations", value=50, precision=0)
552
595
 
553
596
  with gr.Column():
554
- gr.Markdown("#### XGBoost Settings")
555
- xgb_max_depth_max = gr.Number(
556
- label="XGB Max Depth", value=25, precision=0)
557
- xgb_n_estimators_max = gr.Number(
558
- label="XGB Max Estimators", value=500, precision=0)
597
+ gr.Markdown("#### XGBoost Settings")
598
+ xgb_max_depth_max = gr.Number(
599
+ label="XGB Max Depth", value=25, precision=0)
600
+ xgb_n_estimators_max = gr.Number(
601
+ label="XGB Max Estimators", value=500, precision=0)
602
+ xgb_gpu_id = gr.Number(
603
+ label="XGB GPU ID", value=0, precision=0)
604
+ xgb_cleanup_per_fold = gr.Checkbox(
605
+ label="XGB Cleanup Per Fold", value=False)
606
+ xgb_cleanup_synchronize = gr.Checkbox(
607
+ label="XGB Cleanup Synchronize", value=False)
608
+ xgb_use_dmatrix = gr.Checkbox(
609
+ label="XGB Use DMatrix", value=True)
610
+ gr.Markdown("#### Fold Cleanup")
611
+ ft_cleanup_per_fold = gr.Checkbox(
612
+ label="FT Cleanup Per Fold", value=False)
613
+ ft_cleanup_synchronize = gr.Checkbox(
614
+ label="FT Cleanup Synchronize", value=False)
615
+ resn_cleanup_per_fold = gr.Checkbox(
616
+ label="ResNet Cleanup Per Fold", value=False)
617
+ resn_cleanup_synchronize = gr.Checkbox(
618
+ label="ResNet Cleanup Synchronize", value=False)
619
+ gnn_cleanup_per_fold = gr.Checkbox(
620
+ label="GNN Cleanup Per Fold", value=False)
621
+ gnn_cleanup_synchronize = gr.Checkbox(
622
+ label="GNN Cleanup Synchronize", value=False)
623
+ optuna_cleanup_synchronize = gr.Checkbox(
624
+ label="Optuna Cleanup Synchronize", value=False)
559
625
 
560
626
  with gr.Row():
561
627
  build_btn = gr.Button(
@@ -836,9 +902,14 @@ def create_ui():
836
902
  data_dir, model_list, model_categories, target, weight,
837
903
  feature_list, categorical_features, task_type, prop_test,
838
904
  holdout_ratio, val_ratio, split_strategy, rand_seed, epochs,
839
- output_dir, use_gpu, model_keys, max_evals,
840
- xgb_max_depth_max, xgb_n_estimators_max
841
- ],
905
+ output_dir, use_gpu, model_keys, max_evals,
906
+ xgb_max_depth_max, xgb_n_estimators_max,
907
+ xgb_gpu_id, xgb_cleanup_per_fold, xgb_cleanup_synchronize,
908
+ xgb_use_dmatrix, ft_cleanup_per_fold, ft_cleanup_synchronize,
909
+ resn_cleanup_per_fold, resn_cleanup_synchronize,
910
+ gnn_cleanup_per_fold, gnn_cleanup_synchronize,
911
+ optuna_cleanup_synchronize
912
+ ],
842
913
  outputs=[build_status, config_json]
843
914
  )
844
915
 
@@ -148,6 +148,17 @@ class ConfigBuilder:
148
148
  max_evals: int = 50,
149
149
  xgb_max_depth_max: int = 25,
150
150
  xgb_n_estimators_max: int = 500,
151
+ xgb_gpu_id: Optional[int] = None,
152
+ xgb_cleanup_per_fold: bool = False,
153
+ xgb_cleanup_synchronize: bool = False,
154
+ xgb_use_dmatrix: bool = True,
155
+ ft_cleanup_per_fold: bool = False,
156
+ ft_cleanup_synchronize: bool = False,
157
+ resn_cleanup_per_fold: bool = False,
158
+ resn_cleanup_synchronize: bool = False,
159
+ gnn_cleanup_per_fold: bool = False,
160
+ gnn_cleanup_synchronize: bool = False,
161
+ optuna_cleanup_synchronize: bool = False,
151
162
  nproc_per_node: int = 2,
152
163
  ) -> Dict[str, Any]:
153
164
  """
@@ -174,6 +185,17 @@ class ConfigBuilder:
174
185
  max_evals: Maximum number of evaluations for optimization
175
186
  xgb_max_depth_max: Maximum depth for XGBoost
176
187
  xgb_n_estimators_max: Maximum estimators for XGBoost
188
+ xgb_gpu_id: XGBoost GPU device id (None = default)
189
+ xgb_cleanup_per_fold: Cleanup GPU memory per XGBoost fold
190
+ xgb_cleanup_synchronize: Synchronize CUDA during XGBoost cleanup
191
+ xgb_use_dmatrix: Use xgb.train with DMatrix/QuantileDMatrix
192
+ ft_cleanup_per_fold: Cleanup GPU memory per FT fold
193
+ ft_cleanup_synchronize: Synchronize CUDA during FT cleanup
194
+ resn_cleanup_per_fold: Cleanup GPU memory per ResNet fold
195
+ resn_cleanup_synchronize: Synchronize CUDA during ResNet cleanup
196
+ gnn_cleanup_per_fold: Cleanup GPU memory per GNN fold
197
+ gnn_cleanup_synchronize: Synchronize CUDA during GNN cleanup
198
+ optuna_cleanup_synchronize: Synchronize CUDA during Optuna cleanup
177
199
  nproc_per_node: Number of processes per node
178
200
 
179
201
  Returns:
@@ -204,6 +226,17 @@ class ConfigBuilder:
204
226
  "use_gpu": use_gpu,
205
227
  "xgb_max_depth_max": xgb_max_depth_max,
206
228
  "xgb_n_estimators_max": xgb_n_estimators_max,
229
+ "xgb_gpu_id": xgb_gpu_id,
230
+ "xgb_cleanup_per_fold": xgb_cleanup_per_fold,
231
+ "xgb_cleanup_synchronize": xgb_cleanup_synchronize,
232
+ "xgb_use_dmatrix": xgb_use_dmatrix,
233
+ "ft_cleanup_per_fold": ft_cleanup_per_fold,
234
+ "ft_cleanup_synchronize": ft_cleanup_synchronize,
235
+ "resn_cleanup_per_fold": resn_cleanup_per_fold,
236
+ "resn_cleanup_synchronize": resn_cleanup_synchronize,
237
+ "gnn_cleanup_per_fold": gnn_cleanup_per_fold,
238
+ "gnn_cleanup_synchronize": gnn_cleanup_synchronize,
239
+ "optuna_cleanup_synchronize": optuna_cleanup_synchronize,
207
240
  "optuna_storage": f"{output_dir}/optuna/bayesopt.sqlite3",
208
241
  "stack_model_keys": model_keys,
209
242
  })
@@ -26,6 +26,17 @@
26
26
  "use_gpu": true,
27
27
  "xgb_max_depth_max": 25,
28
28
  "xgb_n_estimators_max": 500,
29
+ "xgb_gpu_id": 0,
30
+ "xgb_cleanup_per_fold": false,
31
+ "xgb_cleanup_synchronize": false,
32
+ "xgb_use_dmatrix": true,
33
+ "ft_cleanup_per_fold": false,
34
+ "ft_cleanup_synchronize": false,
35
+ "resn_cleanup_per_fold": false,
36
+ "resn_cleanup_synchronize": false,
37
+ "gnn_cleanup_per_fold": false,
38
+ "gnn_cleanup_synchronize": false,
39
+ "optuna_cleanup_synchronize": false,
29
40
  "optuna_storage": "./Results/optuna/bayesopt.sqlite3",
30
41
  "runner": {
31
42
  "mode": "entry",