hud-python 0.4.28__py3-none-any.whl → 0.4.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (77) hide show
  1. hud/__init__.py +2 -1
  2. hud/agents/base.py +81 -45
  3. hud/agents/claude.py +8 -4
  4. hud/agents/openai_chat_generic.py +66 -40
  5. hud/agents/tests/test_base.py +0 -4
  6. hud/agents/tests/test_openai.py +1 -1
  7. hud/cli/__init__.py +182 -52
  8. hud/cli/dev.py +8 -9
  9. hud/cli/eval.py +317 -119
  10. hud/cli/flows/__init__.py +0 -0
  11. hud/cli/flows/tasks.py +0 -0
  12. hud/cli/get.py +160 -0
  13. hud/cli/rl/__init__.py +567 -71
  14. hud/cli/rl/config.py +94 -0
  15. hud/cli/rl/display.py +133 -0
  16. hud/cli/rl/gpu.py +63 -0
  17. hud/cli/rl/gpu_utils.py +318 -0
  18. hud/cli/rl/presets.py +96 -0
  19. hud/cli/rl/remote_runner.py +347 -0
  20. hud/cli/rl/rl_api.py +150 -0
  21. hud/cli/rl/vllm.py +177 -0
  22. hud/cli/tests/test_analyze_metadata.py +0 -1
  23. hud/cli/utils/tasks.py +26 -0
  24. hud/clients/base.py +21 -23
  25. hud/clients/mcp_use.py +36 -44
  26. hud/clients/tests/test_mcp_use_retry.py +10 -10
  27. hud/datasets/__init__.py +4 -3
  28. hud/datasets/{execution/parallel.py → parallel.py} +1 -1
  29. hud/datasets/{execution/runner.py → runner.py} +1 -1
  30. hud/datasets/utils.py +1 -1
  31. hud/native/comparator.py +6 -6
  32. hud/native/tests/test_comparator.py +8 -8
  33. hud/native/tests/test_native_init.py +13 -11
  34. hud/otel/config.py +1 -1
  35. hud/otel/instrumentation.py +35 -0
  36. hud/rl/README.md +30 -0
  37. hud/rl/__init__.py +1 -0
  38. hud/rl/actor.py +174 -0
  39. hud/rl/buffer.py +371 -0
  40. hud/rl/chat_template.jinja +101 -0
  41. hud/rl/config.py +184 -0
  42. hud/rl/distributed.py +95 -0
  43. hud/rl/learner.py +589 -0
  44. hud/rl/tests/__init__.py +1 -0
  45. hud/rl/tests/test_learner.py +171 -0
  46. hud/rl/train.py +354 -0
  47. hud/rl/types.py +101 -0
  48. hud/rl/utils/start_vllm_server.sh +30 -0
  49. hud/rl/utils.py +524 -0
  50. hud/rl/vllm_adapter.py +125 -0
  51. hud/settings.py +6 -0
  52. hud/telemetry/__init__.py +2 -1
  53. hud/telemetry/job.py +46 -3
  54. hud/telemetry/tests/test_trace.py +3 -3
  55. hud/telemetry/trace.py +85 -13
  56. hud/tools/tests/test_computer.py +3 -3
  57. hud/tools/tests/test_computer_actions.py +1 -1
  58. hud/types.py +123 -2
  59. hud/utils/group_eval.py +223 -0
  60. hud/utils/hud_console.py +113 -13
  61. hud/utils/tasks.py +119 -0
  62. hud/utils/tests/test_version.py +1 -1
  63. hud/version.py +1 -1
  64. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/METADATA +20 -2
  65. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/RECORD +68 -48
  66. hud/cli/hf.py +0 -406
  67. hud/cli/rl/README.md +0 -243
  68. hud/cli/rl/init.py +0 -370
  69. hud/cli/rl/pod.py +0 -501
  70. hud/cli/rl/ssh.py +0 -322
  71. hud/cli/rl/train.py +0 -562
  72. hud/cli/rl/utils.py +0 -165
  73. hud/datasets/execution/__init__.py +0 -13
  74. hud/datasets/task.py +0 -116
  75. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/WHEEL +0 -0
  76. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/entry_points.txt +0 -0
  77. {hud_python-0.4.28.dist-info → hud_python-0.4.30.dist-info}/licenses/LICENSE +0 -0
hud/cli/rl/__init__.py CHANGED
@@ -1,91 +1,587 @@
1
- """HUD RL - Commands for reinforcement learning with HUD environments."""
1
+ """RL training command for HUD CLI."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from pathlib import Path # noqa: TC003
5
+ import asyncio
6
+ import logging
7
+ import os
8
+ import subprocess
9
+ import sys
10
+ from pathlib import Path
6
11
 
7
12
  import typer
13
+ from rich.console import Console
8
14
 
9
- from hud.utils.hud_console import HUDConsole
15
+ from hud.cli.utils.tasks import find_tasks_file
16
+ from hud.rl.config import validate_vl_model
17
+ from hud.rl.train import train
10
18
 
11
- # Create the RL subcommand app
12
- rl_app = typer.Typer(
13
- name="rl",
14
- help="🤖 Reinforcement learning commands for HUD environments",
15
- rich_markup_mode="rich",
16
- )
19
+ # Then import HUD modules
20
+ from hud.utils.hud_console import hud_console
21
+ from hud.utils.tasks import load_tasks
17
22
 
18
- hud_console = HUDConsole()
23
+ from .config import generate_config_interactive, load_config, save_config
24
+ from .display import display_config_summary, display_gpu_info
19
25
 
26
+ # Import local modules first
27
+ from .gpu import detect_cuda_devices, validate_gpu_memory
28
+ from .gpu_utils import adjust_config_for_ddp, calculate_optimal_gpu_allocation, health_check_gpus
29
+ from .presets import get_training_presets
30
+ from .vllm import start_vllm_server, wait_for_vllm_server
20
31
 
21
- @rl_app.callback(invoke_without_command=True)
22
- def rl_main(
23
- ctx: typer.Context,
24
- model: str = typer.Option("Qwen/Qwen2.5-3B-Instruct", "--model", "-m", help="Model to train"),
25
- dataset: str | None = typer.Option(
32
+ console = Console()
33
+
34
+
35
+ def rl_command(
36
+ tasks_file: str | None = typer.Argument(
37
+ None,
38
+ help="Path to tasks file (JSON/JSONL) or HuggingFace dataset name",
39
+ ),
40
+ model: str | None = typer.Argument(
41
+ None,
42
+ help="Model to train (default: interactive selection)",
43
+ ),
44
+ config_file: Path | None = typer.Option( # noqa: B008
45
+ None,
46
+ "--config",
47
+ "-c",
48
+ help="Path to existing configuration file",
49
+ ),
50
+ output_dir: str = typer.Option(
51
+ "/checkpoints",
52
+ "--output-dir",
53
+ "-o",
54
+ help="Output directory for checkpoints",
55
+ ),
56
+ restart: bool = typer.Option(
57
+ False,
58
+ "--restart",
59
+ help="Restart the vLLM server before training",
60
+ ),
61
+ verbose: bool = typer.Option(
62
+ False,
63
+ "--verbose",
64
+ "-v",
65
+ help="Enable verbose output",
66
+ ),
67
+ # DDP options
68
+ no_ddp: bool = typer.Option(
69
+ False,
70
+ "--no-ddp",
71
+ help="Disable DDP even with multiple GPUs",
72
+ ),
73
+ ddp_gpus: str | None = typer.Option(
26
74
  None,
27
- "--dataset",
28
- "-d",
29
- help="Dataset: JSON file path or HuggingFace name (auto-detects if not provided)",
75
+ "--ddp-gpus",
76
+ help="Specific GPUs for DDP (e.g., '0,1,2,3')",
77
+ ),
78
+ vllm_gpu: int | None = typer.Option(
79
+ None,
80
+ "--vllm-gpu",
81
+ help="Specific GPU for vLLM server",
82
+ ),
83
+ # Execution mode options
84
+ local: bool = typer.Option(
85
+ False,
86
+ "--local",
87
+ help="Run training locally instead of using remote API server",
88
+ ),
89
+ # Internal flag
90
+ skip_vllm_startup: bool = typer.Option(
91
+ False,
92
+ hidden=True,
93
+ help="Skip local vLLM server startup (for internal use)",
30
94
  ),
31
- config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"), # noqa: B008
32
- gpus: str = typer.Option("2xA100", "--gpus", help="GPU configuration (e.g., 2xA100, 4xH100)"),
33
- provider: str = typer.Option("prime", "--provider", help="Infrastructure provider"),
34
- output_dir: Path = typer.Option("./checkpoints", "--output", "-o", help="Output directory"), # noqa: B008
35
95
  ) -> None:
36
- """🤖 Train RL models on HUD environments.
37
-
38
- Runs training on remote GPU infrastructure with automatic setup.
39
- The command will:
40
- 1. Check for required files (config, dataset)
41
- 2. Offer to generate missing files
42
- 3. Push environment to registry if needed
43
- 4. Start remote training on Prime Intellect
44
-
45
- Dataset can be:
46
- - A local JSON file with tasks (e.g., tasks.json)
47
- - A HuggingFace dataset name (e.g., 'username/dataset-name')
48
- - Auto-detected from current directory if not specified
49
-
50
- Examples:
51
- hud rl # Interactive mode, auto-detect tasks.json
52
- hud rl --model gpt2 # Train with specific model
53
- hud rl --dataset tasks.json # Use local task file
54
- hud rl --gpus 4xH100 # Use different GPU configuration
55
- hud rl init my-env:latest # Generate config for environment
56
- """
57
- # Only run main command if no subcommand was invoked
58
- if ctx.invoked_subcommand is None:
59
- from .train import train_command_wrapper
60
-
61
- train_command_wrapper(
62
- model=model,
63
- dataset=dataset,
64
- config=config,
65
- gpus=gpus,
66
- provider=provider,
67
- output_dir=output_dir,
96
+ """Run GRPO reinforcement learning training on tasks."""
97
+ # Configure logging based on verbose flag BEFORE any output
98
+ if not verbose:
99
+ # Set environment variable for HUD components
100
+ os.environ["HUD_LOG_LEVEL"] = "WARNING"
101
+
102
+ # Configure logging levels
103
+ logging.basicConfig(level=logging.WARNING, force=True)
104
+
105
+ # Get root logger and set its level
106
+ root_logger = logging.getLogger()
107
+ root_logger.setLevel(logging.WARNING)
108
+
109
+ # Suppress INFO logs from various components
110
+ for logger_name in [
111
+ "httpx",
112
+ "hud.agents",
113
+ "hud.utils.design",
114
+ "hud",
115
+ "asyncio",
116
+ "transformers",
117
+ ]:
118
+ logging.getLogger(logger_name).setLevel(logging.WARNING)
119
+
120
+ # Also set HUD agent logger explicitly
121
+ logging.getLogger("hud.agents.base").setLevel(logging.WARNING)
122
+ else:
123
+ # In verbose mode, show everything
124
+ logging.basicConfig(level=logging.INFO)
125
+
126
+ hud_console.header("HUD RL Training")
127
+
128
+ # Determine execution mode
129
+ use_remote = not local
130
+
131
+ if not tasks_file:
132
+ tasks_file = find_tasks_file(tasks_file)
133
+ if not tasks_file:
134
+ hud_console.warning("No tasks file found in current directory")
135
+ hud_console.hint(
136
+ "Download a HF dataset using `hud get <dataset_name>` (e.g., `hud get hud-evals/2048-basic`)" # noqa: E501
137
+ )
138
+ hud_console.hint("or create a tasks file manually.")
139
+ raise typer.Exit(1)
140
+
141
+ # Handle remote execution
142
+ if use_remote:
143
+ try:
144
+ from .remote_runner import run_remote_training
145
+
146
+ run_remote_training(
147
+ tasks_file=tasks_file, model=model, config_file=config_file, output_dir=output_dir
148
+ )
149
+ return
150
+ except Exception as e:
151
+ console.print(f"[red]❌ Remote training failed: {e!s}[/red]")
152
+ raise typer.Exit(1) from e
153
+
154
+ # Check Python version compatibility
155
+ python_version = sys.version_info
156
+ if python_version.major == 3 and python_version.minor >= 13:
157
+ console.print("[red]⚠️ Warning: Python 3.13+ detected![/red]")
158
+ console.print("[yellow]vLLM has compatibility issues with Python 3.13.[/yellow]")
159
+ console.print("[yellow]Recommended: Use Python 3.12 or 3.11[/yellow]")
160
+ console.print("\n[dim]To create a new environment with Python 3.12:[/dim]")
161
+ console.print("[dim] 1. Exit this shell: exit[/dim]")
162
+ console.print("[dim] 2. Remove current venv: sudo rm -rf .venv[/dim]")
163
+ console.print("[dim] 3. Create new venv: uv venv --python 3.12[/dim]")
164
+ console.print("[dim] 4. Install dependencies: uv pip install -e '.[rl]'[/dim]")
165
+
166
+ if not typer.confirm("\nDo you want to continue anyway?", default=False):
167
+ raise typer.Exit(1)
168
+
169
+ # Step 1: Validate CUDA devices
170
+ console.print("[yellow]Checking GPU availability...[/yellow]")
171
+ gpu_info = detect_cuda_devices()
172
+
173
+ if not gpu_info["available"]:
174
+ console.print(f"[red]❌ {gpu_info['error']}[/red]")
175
+ console.print("[yellow]RL training requires CUDA-capable GPUs[/yellow]")
176
+ raise typer.Exit(1)
177
+
178
+ display_gpu_info(gpu_info)
179
+
180
+ # Perform GPU health check
181
+ all_gpu_indices = [device["index"] for device in gpu_info["devices"]]
182
+ health_results = health_check_gpus(all_gpu_indices)
183
+
184
+ if not health_results["all_healthy"]:
185
+ console.print("\n[yellow]⚠️ Some GPUs failed health checks![/yellow]")
186
+ console.print(
187
+ f"[yellow]Unhealthy GPUs: {list(health_results['unhealthy_gpus'].keys())}[/yellow]"
188
+ )
189
+
190
+ if not health_results["healthy_gpus"]:
191
+ console.print("[red]❌ No healthy GPUs available for training![/red]")
192
+ raise typer.Exit(1)
193
+
194
+ console.print(
195
+ f"\n[cyan]You have {len(health_results['healthy_gpus'])} healthy GPUs available.[/cyan]"
68
196
  )
69
197
 
198
+ continue_training = typer.confirm("\nContinue with healthy GPUs only?", default=True)
199
+
200
+ if not continue_training:
201
+ healthy_str = ",".join(map(str, health_results["healthy_gpus"]))
202
+ console.print("\n[yellow]Exiting. Please resolve GPU issues and try again.[/yellow]")
203
+ console.print("\n[cyan]💡 Tip: To use only healthy GPUs, you can run:[/cyan]")
204
+ console.print(f"[white]hud rl {tasks_file} --ddp-gpus {healthy_str} --local[/white]\n")
205
+ raise typer.Exit(0)
206
+ else:
207
+ # Continue with healthy GPUs only
208
+ # Update gpu_info to only include healthy GPUs
209
+ gpu_info["devices"] = [
210
+ d for d in gpu_info["devices"] if d["index"] in health_results["healthy_gpus"]
211
+ ]
212
+ console.print(
213
+ f"\n[green]✅ Continuing with {len(gpu_info['devices'])} healthy GPUs[/green]"
214
+ )
215
+
216
+ # Get primary GPU memory for configuration
217
+ primary_gpu = gpu_info["devices"][0]
218
+ gpu_memory_gb = primary_gpu["memory_gb"]
219
+
220
+ # Validate GPU memory for 3B model
221
+ if not validate_gpu_memory(gpu_memory_gb, "3B"):
222
+ console.print(f"[red]❌ Insufficient GPU memory ({gpu_memory_gb:.1f} GB)[/red]")
223
+ console.print("[yellow]Qwen 2.5 VL 3B requires at least 12 GB of GPU memory[/yellow]")
224
+ raise typer.Exit(1)
225
+
226
+ # Step 2: Load tasks
227
+ if tasks_file:
228
+ console.print(f"\n[cyan]Loading tasks from: {tasks_file}[/cyan]")
229
+ else:
230
+ # Auto-detect tasks file
231
+ possible_files = ["tasks.json", "tasks.jsonl", "browser_2048_tasks.jsonl"]
232
+ for f in possible_files:
233
+ if Path(f).exists():
234
+ tasks_file = f
235
+ console.print(f"[green]Auto-detected tasks file: {f}[/green]")
236
+ break
237
+
238
+ if not tasks_file:
239
+ console.print("[red]❌ No tasks file specified or auto-detected[/red]")
240
+ console.print(
241
+ "[yellow]Please provide a tasks file or create one of: tasks.json, tasks.jsonl[/yellow]" # noqa: E501
242
+ )
243
+ raise typer.Exit(1)
244
+
245
+ # Load the tasks
246
+ tasks = load_tasks(tasks_file)
247
+ console.print(f"[green]✅ Loaded {len(tasks)} tasks[/green]")
248
+
249
+ # Validate tasks
250
+ invalid_tasks = []
251
+ for i, task in enumerate(tasks):
252
+ if not hasattr(task, "prompt") or not task.prompt:
253
+ invalid_tasks.append(f"Task {i}: missing 'prompt' field")
254
+ if not hasattr(task, "mcp_config") or not task.mcp_config:
255
+ invalid_tasks.append(f"Task {i}: missing 'mcp_config' field")
256
+
257
+ if invalid_tasks:
258
+ console.print("[red]❌ Invalid tasks found:[/red]")
259
+ for error in invalid_tasks[:5]: # Show first 5 errors
260
+ console.print(f" - {error}")
261
+ if len(invalid_tasks) > 5:
262
+ console.print(f" ... and {len(invalid_tasks) - 5} more")
263
+ raise typer.Exit(1)
264
+
265
+ # Step 3: Model selection (if not provided)
266
+ if model is None and not config_file:
267
+ model = hud_console.select(
268
+ "Select a model for RL training:",
269
+ choices=[
270
+ {
271
+ "name": "Qwen 2.5 VL 3B (Recommended - Vision-Language)",
272
+ "value": "Qwen/Qwen2.5-VL-3B-Instruct",
273
+ },
274
+ {"name": "Custom model", "value": "custom"},
275
+ ],
276
+ default=0,
277
+ )
70
278
 
71
- @rl_app.command()
72
- def init(
73
- directory: str = typer.Argument(".", help="Environment directory or Docker image"),
74
- output: Path = typer.Option(None, "--output", "-o", help="Output config file path"), # noqa: B008
75
- force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing config"),
76
- build: bool = typer.Option(False, "--build", "-b", help="Build environment if no lock file"),
279
+ if model == "custom":
280
+ console.print("Enter the model name (HuggingFace ID):")
281
+ model = input().strip()
282
+
283
+ # Validate model is a VL model (whether provided via CLI or selected)
284
+ if model:
285
+ try:
286
+ validate_vl_model(model)
287
+ except ValueError as e:
288
+ console.print(f"\n[red]❌ {e}[/red]")
289
+ raise typer.Exit(1) from e
290
+ else:
291
+ raise typer.Exit(1)
292
+
293
+ # Step 4: Generate or load configuration
294
+ if config_file:
295
+ console.print(f"\n[cyan]Loading configuration from: {config_file}[/cyan]")
296
+ config = load_config(config_file)
297
+
298
+ # Validate model from config
299
+ if hasattr(config, "model") and hasattr(config.model, "base_model"):
300
+ try:
301
+ validate_vl_model(config.model.base_model)
302
+ except ValueError as e:
303
+ console.print(f"\n[red]❌ {e}[/red]")
304
+ raise typer.Exit(1) from e
305
+
306
+ # Estimate memory for display
307
+ from .presets import estimate_memory_usage
308
+
309
+ estimated_memory = estimate_memory_usage(
310
+ config.training.mini_batch_size,
311
+ config.actor.max_steps_per_episode,
312
+ config.actor.max_new_tokens,
313
+ config.model.max_pixels,
314
+ )
315
+ else:
316
+ console.print("\n[cyan]Generating training configuration...[/cyan]")
317
+ # Get number of GPUs for preset scaling
318
+ num_training_gpus = 1 # Default, will be adjusted later
319
+ if len(gpu_info["devices"]) > 2:
320
+ # If we have many GPUs, presets will show scaled values
321
+ num_training_gpus = len(gpu_info["devices"]) - 1 # Reserve 1 for vLLM
322
+ console.print(
323
+ f"[yellow]Note: Episodes will be scaled for {num_training_gpus} training GPUs[/yellow]\n" # noqa: E501
324
+ )
325
+
326
+ presets = get_training_presets(gpu_memory_gb)
327
+ config, estimated_memory = generate_config_interactive(
328
+ model_name=model,
329
+ presets=presets,
330
+ )
331
+
332
+ # Step 5: Save temporary config and display summary
333
+ temp_config_path = Path(".rl_config_temp.json")
334
+ save_config(config, temp_config_path)
335
+ console.print(f"\n[cyan]📝 Configuration saved to: {temp_config_path.absolute()}[/cyan]")
336
+ console.print("[yellow]You can edit this file before starting training.[/yellow]")
337
+
338
+ # Display configuration summary
339
+ display_config_summary(config, len(tasks), gpu_info, estimated_memory)
340
+
341
+ # Step 6: Ask for confirmation (skip if config was provided)
342
+ if not config_file:
343
+ console.print("\n[bold yellow]Options:[/bold yellow]")
344
+ console.print(" • Type [green]'start'[/green] to begin training")
345
+ console.print(" • Type [cyan]'edit'[/cyan] to open config in your editor")
346
+ console.print(" • Type [red]'cancel'[/red] to abort")
347
+ console.print("\n[bold]Your choice:[/bold] ", end="")
348
+
349
+ while True:
350
+ choice = input().strip().lower()
351
+
352
+ if choice == "start":
353
+ # Reload config in case it was edited
354
+ config = load_config(temp_config_path)
355
+ break
356
+ elif choice == "edit":
357
+ # Default to nano if EDITOR is not set
358
+ editor = os.environ.get("EDITOR", "nano")
359
+
360
+ # Show nano instructions if using nano
361
+ if editor == "nano":
362
+ console.print("\n[cyan]Opening config in nano editor...[/cyan]")
363
+ console.print("[yellow]Tips:[/yellow]")
364
+ console.print(" • Edit the configuration values as needed")
365
+ console.print(" • Press [bold]Ctrl+O[/bold] then [bold]Enter[/bold] to save")
366
+ console.print(" • Press [bold]Ctrl+X[/bold] to exit")
367
+ console.print(" • Press [bold]Ctrl+C[/bold] to cancel without saving\n")
368
+ input("Press Enter to continue...")
369
+
370
+ try:
371
+ subprocess.run([editor, str(temp_config_path)], check=True) # noqa: S603
372
+ # Reload and display updated config
373
+ config = load_config(temp_config_path)
374
+ estimated_memory = estimate_memory_usage(
375
+ config.training.mini_batch_size,
376
+ config.actor.max_steps_per_episode,
377
+ config.actor.max_new_tokens,
378
+ config.model.max_pixels,
379
+ )
380
+ display_config_summary(config, len(tasks), gpu_info, estimated_memory)
381
+ console.print(
382
+ "\n[bold]Type 'start' to begin or 'cancel' to abort:[/bold] ", end=""
383
+ )
384
+ except subprocess.CalledProcessError:
385
+ console.print(
386
+ "\n[yellow]Editor closed without saving or was cancelled.[/yellow]"
387
+ )
388
+ console.print("[bold]Your choice:[/bold] ", end="")
389
+ except Exception as e:
390
+ console.print(f"\n[red]Failed to open editor: {e}[/red]")
391
+ console.print(
392
+ f"[yellow]Please edit {temp_config_path} manually and type 'start' when ready.[/yellow]" # noqa: E501
393
+ )
394
+ console.print("[bold]Your choice:[/bold] ", end="")
395
+ elif choice == "cancel":
396
+ console.print("[red]Training cancelled[/red]")
397
+
398
+ # Ask if they want to save the config
399
+ if typer.confirm("Save this configuration for later?", default=True):
400
+ config_path = Path("rl_config.json")
401
+ save_config(config, config_path)
402
+
403
+ # Clean up temp file
404
+ try:
405
+ temp_config_path.unlink()
406
+ except Exception as e:
407
+ hud_console.warning(f"Failed to clean up temp config: {e}")
408
+
409
+ raise typer.Exit(0)
410
+ else:
411
+ console.print(
412
+ "[red]Invalid choice. Type 'start', 'edit', or 'cancel':[/red] ", end=""
413
+ )
414
+ else:
415
+ # Config was provided, proceed directly
416
+ console.print("\n[dim]Using provided configuration file...[/dim]")
417
+ config = load_config(temp_config_path)
418
+
419
+ # Step 7: Determine if DDP should be used
420
+ num_gpus = len(gpu_info["devices"])
421
+ use_ddp = False
422
+ training_gpus = [0] # Default single GPU
423
+ vllm_gpu_idx = 1 if num_gpus > 1 else 0
424
+
425
+ if num_gpus > 2 and not no_ddp:
426
+ console.print(f"\n[cyan]🚀 Detected {num_gpus} GPUs - checking DDP configuration...[/cyan]")
427
+
428
+ # Calculate optimal GPU allocation
429
+ gpu_allocation = calculate_optimal_gpu_allocation(gpu_info, config)
430
+
431
+ if gpu_allocation["use_ddp"]:
432
+ use_ddp = True
433
+ training_gpus = gpu_allocation["training_gpus"]
434
+ vllm_gpu_idx = gpu_allocation["vllm_gpu"]
435
+
436
+ console.print(
437
+ f"[green]✅ Will use DDP with {len(training_gpus)} GPUs for training[/green]"
438
+ )
439
+ console.print(f"[green]✅ GPU {vllm_gpu_idx} reserved for vLLM server[/green]")
440
+
441
+ # Show details
442
+ console.print("\n[cyan]Training Configuration:[/cyan]")
443
+ console.print(f" • Groups to process: {gpu_allocation['num_groups']}")
444
+ console.print(f" • Training GPUs: {training_gpus}")
445
+ console.print(f" • Groups per GPU: {gpu_allocation.get('groups_per_gpu', 'N/A'):.1f}")
446
+
447
+ # Warn about efficiency
448
+ if gpu_allocation.get("parallel_efficiency", 1.0) < 0.8:
449
+ console.print(
450
+ f"\n[yellow]⚠️ GPU efficiency: {gpu_allocation['parallel_efficiency'] * 100:.0f}%[/yellow]" # noqa: E501
451
+ )
452
+ console.print(
453
+ f"[yellow]Consider adjusting batch_size to {len(training_gpus) * config.training.group_size} for optimal performance[/yellow]" # noqa: E501
454
+ )
455
+ else:
456
+ console.print(f"[cyan]{gpu_allocation.get('reason', 'Using single GPU')}[/cyan]")
457
+
458
+ # Allow manual override
459
+ if ddp_gpus is not None:
460
+ requested_gpus = [int(x) for x in ddp_gpus.split(",")]
461
+ console.print(f"[cyan]Manual GPU selection: {requested_gpus}[/cyan]")
462
+ # Validate requested GPUs are in the healthy set
463
+ available_indices = [d["index"] for d in gpu_info["devices"]]
464
+ invalid_gpus = [g for g in requested_gpus if g not in available_indices]
465
+ if invalid_gpus:
466
+ console.print(f"[red]❌ Invalid/unhealthy GPU(s) requested: {invalid_gpus}[/red]")
467
+ console.print(f"[yellow]Available healthy GPUs: {available_indices}[/yellow]")
468
+ raise typer.Exit(1)
469
+ training_gpus = requested_gpus
470
+ use_ddp = len(training_gpus) > 1
471
+
472
+ if vllm_gpu is not None:
473
+ vllm_gpu_idx = vllm_gpu
474
+ console.print(f"[cyan]Manual vLLM GPU: {vllm_gpu_idx}[/cyan]")
475
+ # Validate vLLM GPU is in the healthy set
476
+ available_indices = [d["index"] for d in gpu_info["devices"]]
477
+ if vllm_gpu_idx not in available_indices:
478
+ console.print(f"[red]❌ vLLM GPU {vllm_gpu_idx} is not available/healthy![/red]")
479
+ console.print(f"[yellow]Available healthy GPUs: {available_indices}[/yellow]")
480
+ raise typer.Exit(1)
481
+
482
+ # Ensure we have at least one training GPU
483
+ if not training_gpus:
484
+ console.print("[red]❌ No available GPUs for training![/red]")
485
+ raise typer.Exit(1)
486
+
487
+ # Always adjust batch_size based on number of training GPUs
488
+ config = adjust_config_for_ddp(config, len(training_gpus))
489
+
490
+ # Save updated config (for both DDP and single GPU)
491
+ save_config(config, temp_config_path)
492
+
493
+ # Step 8: Start vLLM server (unless we're using a remote one)
494
+ if not skip_vllm_startup:
495
+ console.print(f"\n[cyan]Setting up vLLM server on GPU {vllm_gpu_idx}...[/cyan]")
496
+
497
+ start_vllm_server(config.model.base_model, vllm_gpu_idx, restart=restart)
498
+
499
+ # Wait for server to be ready
500
+ server_ready = asyncio.run(wait_for_vllm_server())
501
+ if not server_ready:
502
+ console.print("[red]❌ Failed to start vLLM server[/red]")
503
+ raise typer.Exit(1)
504
+ else:
505
+ console.print("\n[cyan]Using remote vLLM server (skipping local startup)[/cyan]")
506
+
507
+ # Step 9: Run training (DDP or single GPU)
508
+ if use_ddp:
509
+ console.print(
510
+ f"\n[bold green]🎯 Starting DDP training on {len(training_gpus)} GPUs...[/bold green]\n"
511
+ )
512
+ launch_ddp_training(training_gpus, tasks_file, temp_config_path, verbose)
513
+ console.print("\n[green]✅ Training completed successfully![/green]")
514
+ else:
515
+ console.print("\n[bold green]🎯 Starting single-GPU training...[/bold green]\n")
516
+ try:
517
+ # Set verbose in config instead of passing as parameter
518
+ if verbose:
519
+ config.verbose = True
520
+
521
+ # Run the async training function
522
+ asyncio.run(train(config, tasks))
523
+ console.print("\n[green]✅ Training completed successfully![/green]")
524
+
525
+ # Clean up temp config file
526
+ try:
527
+ temp_config_path.unlink()
528
+ except Exception as e:
529
+ hud_console.warning(f"Failed to clean up temp config: {e}")
530
+
531
+ except KeyboardInterrupt:
532
+ console.print("\n[yellow]Training interrupted by user[/yellow]")
533
+ raise typer.Exit(1) # noqa: B904
534
+ except Exception as e:
535
+ console.print(f"\n[red]❌ Training failed: {e}[/red]")
536
+ raise typer.Exit(1) from e
537
+
538
+
539
+ def launch_ddp_training(
540
+ training_gpus: list[int], tasks_file: str, config_path: Path, verbose: bool
77
541
  ) -> None:
78
- """🔧 Generate hud-vf-gym config from environment.
542
+ """Launch DDP training with torchrun."""
543
+ import subprocess
544
+ import sys
545
+
546
+ # Prepare environment
547
+ env = os.environ.copy()
548
+ env["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, training_gpus))
549
+
550
+ if not verbose:
551
+ env["HUD_LOG_LEVEL"] = "WARNING"
552
+
553
+ # Build command
554
+ cmd = [
555
+ sys.executable,
556
+ "-m",
557
+ "torch.distributed.run",
558
+ f"--nproc_per_node={len(training_gpus)}",
559
+ "--master_port=29500",
560
+ "-m",
561
+ "hud.rl.train",
562
+ "--config",
563
+ str(config_path),
564
+ "--tasks",
565
+ tasks_file,
566
+ ]
567
+
568
+ # Add verbose flag if enabled
569
+ if verbose:
570
+ cmd.append("--verbose")
79
571
 
80
- Generates a YAML configuration file compatible with the hud-vf-gym adapter
81
- from either a directory with hud.lock.yaml or a Docker image.
572
+ try:
573
+ # Run DDP training
574
+ subprocess.run(cmd, env=env, check=True) # noqa: S603
575
+ except subprocess.CalledProcessError as e:
576
+ console.print(f"\n[red]❌ DDP training failed with exit code {e.returncode}[/red]")
577
+ raise typer.Exit(1) from e
578
+ finally:
579
+ # Cleanup temp config
580
+ try:
581
+ config_path.unlink()
582
+ except Exception as e:
583
+ hud_console.warning(f"Failed to clean up temp config: {e}")
82
584
 
83
- Examples:
84
- hud rl init # Use current directory
85
- hud rl init environments/test # Use specific directory
86
- hud rl init my-env:latest # Use Docker image directly
87
- hud rl init . -o configs/2048.yaml --build
88
- """
89
- from .init import init_command_wrapper
90
585
 
91
- init_command_wrapper(directory, output, force, build)
586
+ # Export the command function
587
+ __all__ = ["rl_command"]