hud-python 0.4.28__py3-none-any.whl → 0.4.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +2 -1
- hud/agents/base.py +73 -45
- hud/agents/claude.py +8 -4
- hud/agents/openai_chat_generic.py +65 -40
- hud/agents/tests/test_base.py +0 -4
- hud/agents/tests/test_openai.py +1 -1
- hud/cli/__init__.py +182 -52
- hud/cli/dev.py +8 -9
- hud/cli/eval.py +317 -119
- hud/cli/flows/__init__.py +0 -0
- hud/cli/flows/tasks.py +0 -0
- hud/cli/get.py +160 -0
- hud/cli/rl/__init__.py +563 -71
- hud/cli/rl/config.py +94 -0
- hud/cli/rl/display.py +133 -0
- hud/cli/rl/gpu.py +63 -0
- hud/cli/rl/gpu_utils.py +318 -0
- hud/cli/rl/presets.py +96 -0
- hud/cli/rl/remote_runner.py +348 -0
- hud/cli/rl/rl_api.py +150 -0
- hud/cli/rl/vllm.py +177 -0
- hud/cli/tests/test_analyze_metadata.py +0 -1
- hud/cli/utils/tasks.py +26 -0
- hud/clients/base.py +21 -23
- hud/clients/mcp_use.py +36 -44
- hud/clients/tests/test_mcp_use_retry.py +10 -10
- hud/datasets/__init__.py +4 -3
- hud/datasets/{execution/parallel.py → parallel.py} +1 -1
- hud/datasets/{execution/runner.py → runner.py} +1 -1
- hud/datasets/utils.py +1 -1
- hud/native/tests/test_native_init.py +1 -1
- hud/otel/config.py +1 -1
- hud/otel/instrumentation.py +35 -0
- hud/rl/README.md +31 -0
- hud/rl/__init__.py +1 -0
- hud/rl/actor.py +174 -0
- hud/rl/buffer.py +371 -0
- hud/rl/chat_template.jinja +101 -0
- hud/rl/config.py +184 -0
- hud/rl/distributed.py +95 -0
- hud/rl/learner.py +586 -0
- hud/rl/tests/__init__.py +1 -0
- hud/rl/tests/test_learner.py +171 -0
- hud/rl/train.py +354 -0
- hud/rl/types.py +101 -0
- hud/rl/utils/start_vllm_server.sh +30 -0
- hud/rl/utils.py +524 -0
- hud/rl/vllm_adapter.py +125 -0
- hud/settings.py +6 -0
- hud/telemetry/__init__.py +2 -1
- hud/telemetry/job.py +46 -3
- hud/telemetry/tests/test_trace.py +3 -3
- hud/telemetry/trace.py +85 -13
- hud/tools/tests/test_computer.py +3 -3
- hud/tools/tests/test_computer_actions.py +1 -1
- hud/types.py +123 -2
- hud/utils/group_eval.py +223 -0
- hud/utils/hud_console.py +113 -13
- hud/utils/tasks.py +119 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/METADATA +20 -2
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/RECORD +66 -46
- hud/cli/hf.py +0 -406
- hud/cli/rl/README.md +0 -243
- hud/cli/rl/init.py +0 -370
- hud/cli/rl/pod.py +0 -501
- hud/cli/rl/ssh.py +0 -322
- hud/cli/rl/train.py +0 -562
- hud/cli/rl/utils.py +0 -165
- hud/datasets/execution/__init__.py +0 -13
- hud/datasets/task.py +0 -116
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/WHEEL +0 -0
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/licenses/LICENSE +0 -0
hud/cli/rl/__init__.py
CHANGED
|
@@ -1,91 +1,583 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""RL training command for HUD CLI."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import subprocess
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
6
11
|
|
|
7
12
|
import typer
|
|
13
|
+
from rich.console import Console
|
|
8
14
|
|
|
9
|
-
from hud.utils.
|
|
15
|
+
from hud.cli.utils.tasks import find_tasks_file
|
|
16
|
+
from hud.rl.config import validate_vl_model
|
|
17
|
+
from hud.rl.train import train
|
|
10
18
|
|
|
11
|
-
#
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
help="🤖 Reinforcement learning commands for HUD environments",
|
|
15
|
-
rich_markup_mode="rich",
|
|
16
|
-
)
|
|
19
|
+
# Then import HUD modules
|
|
20
|
+
from hud.utils.hud_console import hud_console
|
|
21
|
+
from hud.utils.tasks import load_tasks
|
|
17
22
|
|
|
18
|
-
|
|
23
|
+
from .config import generate_config_interactive, load_config, save_config
|
|
24
|
+
from .display import display_config_summary, display_gpu_info
|
|
19
25
|
|
|
26
|
+
# Import local modules first
|
|
27
|
+
from .gpu import detect_cuda_devices, validate_gpu_memory
|
|
28
|
+
from .gpu_utils import adjust_config_for_ddp, calculate_optimal_gpu_allocation, health_check_gpus
|
|
29
|
+
from .presets import get_training_presets
|
|
30
|
+
from .vllm import start_vllm_server, wait_for_vllm_server
|
|
20
31
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
32
|
+
console = Console()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def rl_command(
|
|
36
|
+
tasks_file: str | None = typer.Argument(
|
|
37
|
+
None,
|
|
38
|
+
help="Path to tasks file (JSON/JSONL) or HuggingFace dataset name",
|
|
39
|
+
),
|
|
40
|
+
model: str | None = typer.Argument(
|
|
41
|
+
None,
|
|
42
|
+
help="Model to train (default: interactive selection)",
|
|
43
|
+
),
|
|
44
|
+
config_file: Path | None = typer.Option( # noqa: B008
|
|
45
|
+
None,
|
|
46
|
+
"--config",
|
|
47
|
+
"-c",
|
|
48
|
+
help="Path to existing configuration file",
|
|
49
|
+
),
|
|
50
|
+
output_dir: str = typer.Option(
|
|
51
|
+
"/checkpoints",
|
|
52
|
+
"--output-dir",
|
|
53
|
+
"-o",
|
|
54
|
+
help="Output directory for checkpoints",
|
|
55
|
+
),
|
|
56
|
+
restart: bool = typer.Option(
|
|
57
|
+
False,
|
|
58
|
+
"--restart",
|
|
59
|
+
help="Restart the vLLM server before training",
|
|
60
|
+
),
|
|
61
|
+
verbose: bool = typer.Option(
|
|
62
|
+
False,
|
|
63
|
+
"--verbose",
|
|
64
|
+
"-v",
|
|
65
|
+
help="Enable verbose output",
|
|
66
|
+
),
|
|
67
|
+
# DDP options
|
|
68
|
+
no_ddp: bool = typer.Option(
|
|
69
|
+
False,
|
|
70
|
+
"--no-ddp",
|
|
71
|
+
help="Disable DDP even with multiple GPUs",
|
|
72
|
+
),
|
|
73
|
+
ddp_gpus: str | None = typer.Option(
|
|
26
74
|
None,
|
|
27
|
-
"--
|
|
28
|
-
"
|
|
29
|
-
|
|
75
|
+
"--ddp-gpus",
|
|
76
|
+
help="Specific GPUs for DDP (e.g., '0,1,2,3')",
|
|
77
|
+
),
|
|
78
|
+
vllm_gpu: int | None = typer.Option(
|
|
79
|
+
None,
|
|
80
|
+
"--vllm-gpu",
|
|
81
|
+
help="Specific GPU for vLLM server",
|
|
82
|
+
),
|
|
83
|
+
# Execution mode options
|
|
84
|
+
local: bool = typer.Option(
|
|
85
|
+
False,
|
|
86
|
+
"--local",
|
|
87
|
+
help="Run training locally instead of using remote API server",
|
|
88
|
+
),
|
|
89
|
+
# Internal flag
|
|
90
|
+
skip_vllm_startup: bool = typer.Option(
|
|
91
|
+
False,
|
|
92
|
+
hidden=True,
|
|
93
|
+
help="Skip local vLLM server startup (for internal use)",
|
|
30
94
|
),
|
|
31
|
-
config: Path | None = typer.Option(None, "--config", "-c", help="Config YAML path"), # noqa: B008
|
|
32
|
-
gpus: str = typer.Option("2xA100", "--gpus", help="GPU configuration (e.g., 2xA100, 4xH100)"),
|
|
33
|
-
provider: str = typer.Option("prime", "--provider", help="Infrastructure provider"),
|
|
34
|
-
output_dir: Path = typer.Option("./checkpoints", "--output", "-o", help="Output directory"), # noqa: B008
|
|
35
95
|
) -> None:
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
96
|
+
"""Run GRPO reinforcement learning training on tasks."""
|
|
97
|
+
# Configure logging based on verbose flag BEFORE any output
|
|
98
|
+
if not verbose:
|
|
99
|
+
# Set environment variable for HUD components
|
|
100
|
+
os.environ["HUD_LOG_LEVEL"] = "WARNING"
|
|
101
|
+
|
|
102
|
+
# Configure logging levels
|
|
103
|
+
logging.basicConfig(level=logging.WARNING, force=True)
|
|
104
|
+
|
|
105
|
+
# Get root logger and set its level
|
|
106
|
+
root_logger = logging.getLogger()
|
|
107
|
+
root_logger.setLevel(logging.WARNING)
|
|
108
|
+
|
|
109
|
+
# Suppress INFO logs from various components
|
|
110
|
+
for logger_name in [
|
|
111
|
+
"httpx",
|
|
112
|
+
"hud.agents",
|
|
113
|
+
"hud.utils.design",
|
|
114
|
+
"hud",
|
|
115
|
+
"asyncio",
|
|
116
|
+
"transformers",
|
|
117
|
+
]:
|
|
118
|
+
logging.getLogger(logger_name).setLevel(logging.WARNING)
|
|
119
|
+
|
|
120
|
+
# Also set HUD agent logger explicitly
|
|
121
|
+
logging.getLogger("hud.agents.base").setLevel(logging.WARNING)
|
|
122
|
+
else:
|
|
123
|
+
# In verbose mode, show everything
|
|
124
|
+
logging.basicConfig(level=logging.INFO)
|
|
125
|
+
|
|
126
|
+
hud_console.header("HUD RL Training")
|
|
127
|
+
|
|
128
|
+
# Determine execution mode
|
|
129
|
+
use_remote = not local
|
|
130
|
+
|
|
131
|
+
if not tasks_file:
|
|
132
|
+
tasks_file = find_tasks_file(tasks_file)
|
|
133
|
+
if not tasks_file:
|
|
134
|
+
console.print("[red]❌ No tasks file found in current directory[/red]")
|
|
135
|
+
raise typer.Exit(1)
|
|
136
|
+
|
|
137
|
+
# Handle remote execution
|
|
138
|
+
if use_remote:
|
|
139
|
+
try:
|
|
140
|
+
from .remote_runner import run_remote_training
|
|
141
|
+
|
|
142
|
+
run_remote_training(
|
|
143
|
+
tasks_file=tasks_file, model=model, config_file=config_file, output_dir=output_dir
|
|
144
|
+
)
|
|
145
|
+
return
|
|
146
|
+
except Exception as e:
|
|
147
|
+
console.print(f"[red]❌ Remote training failed: {e!s}[/red]")
|
|
148
|
+
raise typer.Exit(1) from e
|
|
149
|
+
|
|
150
|
+
# Check Python version compatibility
|
|
151
|
+
python_version = sys.version_info
|
|
152
|
+
if python_version.major == 3 and python_version.minor >= 13:
|
|
153
|
+
console.print("[red]⚠️ Warning: Python 3.13+ detected![/red]")
|
|
154
|
+
console.print("[yellow]vLLM has compatibility issues with Python 3.13.[/yellow]")
|
|
155
|
+
console.print("[yellow]Recommended: Use Python 3.12 or 3.11[/yellow]")
|
|
156
|
+
console.print("\n[dim]To create a new environment with Python 3.12:[/dim]")
|
|
157
|
+
console.print("[dim] 1. Exit this shell: exit[/dim]")
|
|
158
|
+
console.print("[dim] 2. Remove current venv: sudo rm -rf .venv[/dim]")
|
|
159
|
+
console.print("[dim] 3. Create new venv: uv venv --python 3.12[/dim]")
|
|
160
|
+
console.print("[dim] 4. Install dependencies: uv pip install -e '.[rl]'[/dim]")
|
|
161
|
+
|
|
162
|
+
if not typer.confirm("\nDo you want to continue anyway?", default=False):
|
|
163
|
+
raise typer.Exit(1)
|
|
164
|
+
|
|
165
|
+
# Step 1: Validate CUDA devices
|
|
166
|
+
console.print("[yellow]Checking GPU availability...[/yellow]")
|
|
167
|
+
gpu_info = detect_cuda_devices()
|
|
168
|
+
|
|
169
|
+
if not gpu_info["available"]:
|
|
170
|
+
console.print(f"[red]❌ {gpu_info['error']}[/red]")
|
|
171
|
+
console.print("[yellow]RL training requires CUDA-capable GPUs[/yellow]")
|
|
172
|
+
raise typer.Exit(1)
|
|
173
|
+
|
|
174
|
+
display_gpu_info(gpu_info)
|
|
175
|
+
|
|
176
|
+
# Perform GPU health check
|
|
177
|
+
all_gpu_indices = [device["index"] for device in gpu_info["devices"]]
|
|
178
|
+
health_results = health_check_gpus(all_gpu_indices)
|
|
179
|
+
|
|
180
|
+
if not health_results["all_healthy"]:
|
|
181
|
+
console.print("\n[yellow]⚠️ Some GPUs failed health checks![/yellow]")
|
|
182
|
+
console.print(
|
|
183
|
+
f"[yellow]Unhealthy GPUs: {list(health_results['unhealthy_gpus'].keys())}[/yellow]"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if not health_results["healthy_gpus"]:
|
|
187
|
+
console.print("[red]❌ No healthy GPUs available for training![/red]")
|
|
188
|
+
raise typer.Exit(1)
|
|
189
|
+
|
|
190
|
+
console.print(
|
|
191
|
+
f"\n[cyan]You have {len(health_results['healthy_gpus'])} healthy GPUs available.[/cyan]"
|
|
68
192
|
)
|
|
69
193
|
|
|
194
|
+
continue_training = typer.confirm("\nContinue with healthy GPUs only?", default=True)
|
|
195
|
+
|
|
196
|
+
if not continue_training:
|
|
197
|
+
healthy_str = ",".join(map(str, health_results["healthy_gpus"]))
|
|
198
|
+
console.print("\n[yellow]Exiting. Please resolve GPU issues and try again.[/yellow]")
|
|
199
|
+
console.print("\n[cyan]💡 Tip: To use only healthy GPUs, you can run:[/cyan]")
|
|
200
|
+
console.print(f"[white]hud rl {tasks_file} --ddp-gpus {healthy_str} --local[/white]\n")
|
|
201
|
+
raise typer.Exit(0)
|
|
202
|
+
else:
|
|
203
|
+
# Continue with healthy GPUs only
|
|
204
|
+
# Update gpu_info to only include healthy GPUs
|
|
205
|
+
gpu_info["devices"] = [
|
|
206
|
+
d for d in gpu_info["devices"] if d["index"] in health_results["healthy_gpus"]
|
|
207
|
+
]
|
|
208
|
+
console.print(
|
|
209
|
+
f"\n[green]✅ Continuing with {len(gpu_info['devices'])} healthy GPUs[/green]"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Get primary GPU memory for configuration
|
|
213
|
+
primary_gpu = gpu_info["devices"][0]
|
|
214
|
+
gpu_memory_gb = primary_gpu["memory_gb"]
|
|
215
|
+
|
|
216
|
+
# Validate GPU memory for 3B model
|
|
217
|
+
if not validate_gpu_memory(gpu_memory_gb, "3B"):
|
|
218
|
+
console.print(f"[red]❌ Insufficient GPU memory ({gpu_memory_gb:.1f} GB)[/red]")
|
|
219
|
+
console.print("[yellow]Qwen 2.5 VL 3B requires at least 12 GB of GPU memory[/yellow]")
|
|
220
|
+
raise typer.Exit(1)
|
|
221
|
+
|
|
222
|
+
# Step 2: Load tasks
|
|
223
|
+
if tasks_file:
|
|
224
|
+
console.print(f"\n[cyan]Loading tasks from: {tasks_file}[/cyan]")
|
|
225
|
+
else:
|
|
226
|
+
# Auto-detect tasks file
|
|
227
|
+
possible_files = ["tasks.json", "tasks.jsonl", "browser_2048_tasks.jsonl"]
|
|
228
|
+
for f in possible_files:
|
|
229
|
+
if Path(f).exists():
|
|
230
|
+
tasks_file = f
|
|
231
|
+
console.print(f"[green]Auto-detected tasks file: {f}[/green]")
|
|
232
|
+
break
|
|
233
|
+
|
|
234
|
+
if not tasks_file:
|
|
235
|
+
console.print("[red]❌ No tasks file specified or auto-detected[/red]")
|
|
236
|
+
console.print(
|
|
237
|
+
"[yellow]Please provide a tasks file or create one of: tasks.json, tasks.jsonl[/yellow]" # noqa: E501
|
|
238
|
+
)
|
|
239
|
+
raise typer.Exit(1)
|
|
240
|
+
|
|
241
|
+
# Load the tasks
|
|
242
|
+
tasks = load_tasks(tasks_file)
|
|
243
|
+
console.print(f"[green]✅ Loaded {len(tasks)} tasks[/green]")
|
|
244
|
+
|
|
245
|
+
# Validate tasks
|
|
246
|
+
invalid_tasks = []
|
|
247
|
+
for i, task in enumerate(tasks):
|
|
248
|
+
if not hasattr(task, "prompt") or not task.prompt:
|
|
249
|
+
invalid_tasks.append(f"Task {i}: missing 'prompt' field")
|
|
250
|
+
if not hasattr(task, "mcp_config") or not task.mcp_config:
|
|
251
|
+
invalid_tasks.append(f"Task {i}: missing 'mcp_config' field")
|
|
252
|
+
|
|
253
|
+
if invalid_tasks:
|
|
254
|
+
console.print("[red]❌ Invalid tasks found:[/red]")
|
|
255
|
+
for error in invalid_tasks[:5]: # Show first 5 errors
|
|
256
|
+
console.print(f" - {error}")
|
|
257
|
+
if len(invalid_tasks) > 5:
|
|
258
|
+
console.print(f" ... and {len(invalid_tasks) - 5} more")
|
|
259
|
+
raise typer.Exit(1)
|
|
260
|
+
|
|
261
|
+
# Step 3: Model selection (if not provided)
|
|
262
|
+
if model is None and not config_file:
|
|
263
|
+
model = hud_console.select(
|
|
264
|
+
"Select a model for RL training:",
|
|
265
|
+
choices=[
|
|
266
|
+
{
|
|
267
|
+
"name": "Qwen 2.5 VL 3B (Recommended - Vision-Language)",
|
|
268
|
+
"value": "Qwen/Qwen2.5-VL-3B-Instruct",
|
|
269
|
+
},
|
|
270
|
+
{"name": "Custom model", "value": "custom"},
|
|
271
|
+
],
|
|
272
|
+
default=0,
|
|
273
|
+
)
|
|
70
274
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
275
|
+
if model == "custom":
|
|
276
|
+
console.print("Enter the model name (HuggingFace ID):")
|
|
277
|
+
model = input().strip()
|
|
278
|
+
|
|
279
|
+
# Validate model is a VL model (whether provided via CLI or selected)
|
|
280
|
+
if model:
|
|
281
|
+
try:
|
|
282
|
+
validate_vl_model(model)
|
|
283
|
+
except ValueError as e:
|
|
284
|
+
console.print(f"\n[red]❌ {e}[/red]")
|
|
285
|
+
raise typer.Exit(1) from e
|
|
286
|
+
else:
|
|
287
|
+
raise typer.Exit(1)
|
|
288
|
+
|
|
289
|
+
# Step 4: Generate or load configuration
|
|
290
|
+
if config_file:
|
|
291
|
+
console.print(f"\n[cyan]Loading configuration from: {config_file}[/cyan]")
|
|
292
|
+
config = load_config(config_file)
|
|
293
|
+
|
|
294
|
+
# Validate model from config
|
|
295
|
+
if hasattr(config, "model") and hasattr(config.model, "base_model"):
|
|
296
|
+
try:
|
|
297
|
+
validate_vl_model(config.model.base_model)
|
|
298
|
+
except ValueError as e:
|
|
299
|
+
console.print(f"\n[red]❌ {e}[/red]")
|
|
300
|
+
raise typer.Exit(1) from e
|
|
301
|
+
|
|
302
|
+
# Estimate memory for display
|
|
303
|
+
from .presets import estimate_memory_usage
|
|
304
|
+
|
|
305
|
+
estimated_memory = estimate_memory_usage(
|
|
306
|
+
config.training.mini_batch_size,
|
|
307
|
+
config.actor.max_steps_per_episode,
|
|
308
|
+
config.actor.max_new_tokens,
|
|
309
|
+
config.model.max_pixels,
|
|
310
|
+
)
|
|
311
|
+
else:
|
|
312
|
+
console.print("\n[cyan]Generating training configuration...[/cyan]")
|
|
313
|
+
# Get number of GPUs for preset scaling
|
|
314
|
+
num_training_gpus = 1 # Default, will be adjusted later
|
|
315
|
+
if len(gpu_info["devices"]) > 2:
|
|
316
|
+
# If we have many GPUs, presets will show scaled values
|
|
317
|
+
num_training_gpus = len(gpu_info["devices"]) - 1 # Reserve 1 for vLLM
|
|
318
|
+
console.print(
|
|
319
|
+
f"[yellow]Note: Episodes will be scaled for {num_training_gpus} training GPUs[/yellow]\n" # noqa: E501
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
presets = get_training_presets(gpu_memory_gb)
|
|
323
|
+
config, estimated_memory = generate_config_interactive(
|
|
324
|
+
model_name=model,
|
|
325
|
+
presets=presets,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# Step 5: Save temporary config and display summary
|
|
329
|
+
temp_config_path = Path(".rl_config_temp.json")
|
|
330
|
+
save_config(config, temp_config_path)
|
|
331
|
+
console.print(f"\n[cyan]📝 Configuration saved to: {temp_config_path.absolute()}[/cyan]")
|
|
332
|
+
console.print("[yellow]You can edit this file before starting training.[/yellow]")
|
|
333
|
+
|
|
334
|
+
# Display configuration summary
|
|
335
|
+
display_config_summary(config, len(tasks), gpu_info, estimated_memory)
|
|
336
|
+
|
|
337
|
+
# Step 6: Ask for confirmation (skip if config was provided)
|
|
338
|
+
if not config_file:
|
|
339
|
+
console.print("\n[bold yellow]Options:[/bold yellow]")
|
|
340
|
+
console.print(" • Type [green]'start'[/green] to begin training")
|
|
341
|
+
console.print(" • Type [cyan]'edit'[/cyan] to open config in your editor")
|
|
342
|
+
console.print(" • Type [red]'cancel'[/red] to abort")
|
|
343
|
+
console.print("\n[bold]Your choice:[/bold] ", end="")
|
|
344
|
+
|
|
345
|
+
while True:
|
|
346
|
+
choice = input().strip().lower()
|
|
347
|
+
|
|
348
|
+
if choice == "start":
|
|
349
|
+
# Reload config in case it was edited
|
|
350
|
+
config = load_config(temp_config_path)
|
|
351
|
+
break
|
|
352
|
+
elif choice == "edit":
|
|
353
|
+
# Default to nano if EDITOR is not set
|
|
354
|
+
editor = os.environ.get("EDITOR", "nano")
|
|
355
|
+
|
|
356
|
+
# Show nano instructions if using nano
|
|
357
|
+
if editor == "nano":
|
|
358
|
+
console.print("\n[cyan]Opening config in nano editor...[/cyan]")
|
|
359
|
+
console.print("[yellow]Tips:[/yellow]")
|
|
360
|
+
console.print(" • Edit the configuration values as needed")
|
|
361
|
+
console.print(" • Press [bold]Ctrl+O[/bold] then [bold]Enter[/bold] to save")
|
|
362
|
+
console.print(" • Press [bold]Ctrl+X[/bold] to exit")
|
|
363
|
+
console.print(" • Press [bold]Ctrl+C[/bold] to cancel without saving\n")
|
|
364
|
+
input("Press Enter to continue...")
|
|
365
|
+
|
|
366
|
+
try:
|
|
367
|
+
subprocess.run([editor, str(temp_config_path)], check=True) # noqa: S603
|
|
368
|
+
# Reload and display updated config
|
|
369
|
+
config = load_config(temp_config_path)
|
|
370
|
+
estimated_memory = estimate_memory_usage(
|
|
371
|
+
config.training.mini_batch_size,
|
|
372
|
+
config.actor.max_steps_per_episode,
|
|
373
|
+
config.actor.max_new_tokens,
|
|
374
|
+
config.model.max_pixels,
|
|
375
|
+
)
|
|
376
|
+
display_config_summary(config, len(tasks), gpu_info, estimated_memory)
|
|
377
|
+
console.print(
|
|
378
|
+
"\n[bold]Type 'start' to begin or 'cancel' to abort:[/bold] ", end=""
|
|
379
|
+
)
|
|
380
|
+
except subprocess.CalledProcessError:
|
|
381
|
+
console.print(
|
|
382
|
+
"\n[yellow]Editor closed without saving or was cancelled.[/yellow]"
|
|
383
|
+
)
|
|
384
|
+
console.print("[bold]Your choice:[/bold] ", end="")
|
|
385
|
+
except Exception as e:
|
|
386
|
+
console.print(f"\n[red]Failed to open editor: {e}[/red]")
|
|
387
|
+
console.print(
|
|
388
|
+
f"[yellow]Please edit {temp_config_path} manually and type 'start' when ready.[/yellow]" # noqa: E501
|
|
389
|
+
)
|
|
390
|
+
console.print("[bold]Your choice:[/bold] ", end="")
|
|
391
|
+
elif choice == "cancel":
|
|
392
|
+
console.print("[red]Training cancelled[/red]")
|
|
393
|
+
|
|
394
|
+
# Ask if they want to save the config
|
|
395
|
+
if typer.confirm("Save this configuration for later?", default=True):
|
|
396
|
+
config_path = Path("rl_config.json")
|
|
397
|
+
save_config(config, config_path)
|
|
398
|
+
|
|
399
|
+
# Clean up temp file
|
|
400
|
+
try:
|
|
401
|
+
temp_config_path.unlink()
|
|
402
|
+
except Exception as e:
|
|
403
|
+
hud_console.warning(f"Failed to clean up temp config: {e}")
|
|
404
|
+
|
|
405
|
+
raise typer.Exit(0)
|
|
406
|
+
else:
|
|
407
|
+
console.print(
|
|
408
|
+
"[red]Invalid choice. Type 'start', 'edit', or 'cancel':[/red] ", end=""
|
|
409
|
+
)
|
|
410
|
+
else:
|
|
411
|
+
# Config was provided, proceed directly
|
|
412
|
+
console.print("\n[dim]Using provided configuration file...[/dim]")
|
|
413
|
+
config = load_config(temp_config_path)
|
|
414
|
+
|
|
415
|
+
# Step 7: Determine if DDP should be used
|
|
416
|
+
num_gpus = len(gpu_info["devices"])
|
|
417
|
+
use_ddp = False
|
|
418
|
+
training_gpus = [0] # Default single GPU
|
|
419
|
+
vllm_gpu_idx = 1 if num_gpus > 1 else 0
|
|
420
|
+
|
|
421
|
+
if num_gpus > 2 and not no_ddp:
|
|
422
|
+
console.print(f"\n[cyan]🚀 Detected {num_gpus} GPUs - checking DDP configuration...[/cyan]")
|
|
423
|
+
|
|
424
|
+
# Calculate optimal GPU allocation
|
|
425
|
+
gpu_allocation = calculate_optimal_gpu_allocation(gpu_info, config)
|
|
426
|
+
|
|
427
|
+
if gpu_allocation["use_ddp"]:
|
|
428
|
+
use_ddp = True
|
|
429
|
+
training_gpus = gpu_allocation["training_gpus"]
|
|
430
|
+
vllm_gpu_idx = gpu_allocation["vllm_gpu"]
|
|
431
|
+
|
|
432
|
+
console.print(
|
|
433
|
+
f"[green]✅ Will use DDP with {len(training_gpus)} GPUs for training[/green]"
|
|
434
|
+
)
|
|
435
|
+
console.print(f"[green]✅ GPU {vllm_gpu_idx} reserved for vLLM server[/green]")
|
|
436
|
+
|
|
437
|
+
# Show details
|
|
438
|
+
console.print("\n[cyan]Training Configuration:[/cyan]")
|
|
439
|
+
console.print(f" • Groups to process: {gpu_allocation['num_groups']}")
|
|
440
|
+
console.print(f" • Training GPUs: {training_gpus}")
|
|
441
|
+
console.print(f" • Groups per GPU: {gpu_allocation.get('groups_per_gpu', 'N/A'):.1f}")
|
|
442
|
+
|
|
443
|
+
# Warn about efficiency
|
|
444
|
+
if gpu_allocation.get("parallel_efficiency", 1.0) < 0.8:
|
|
445
|
+
console.print(
|
|
446
|
+
f"\n[yellow]⚠️ GPU efficiency: {gpu_allocation['parallel_efficiency'] * 100:.0f}%[/yellow]" # noqa: E501
|
|
447
|
+
)
|
|
448
|
+
console.print(
|
|
449
|
+
f"[yellow]Consider adjusting batch_size to {len(training_gpus) * config.training.group_size} for optimal performance[/yellow]" # noqa: E501
|
|
450
|
+
)
|
|
451
|
+
else:
|
|
452
|
+
console.print(f"[cyan]{gpu_allocation.get('reason', 'Using single GPU')}[/cyan]")
|
|
453
|
+
|
|
454
|
+
# Allow manual override
|
|
455
|
+
if ddp_gpus is not None:
|
|
456
|
+
requested_gpus = [int(x) for x in ddp_gpus.split(",")]
|
|
457
|
+
console.print(f"[cyan]Manual GPU selection: {requested_gpus}[/cyan]")
|
|
458
|
+
# Validate requested GPUs are in the healthy set
|
|
459
|
+
available_indices = [d["index"] for d in gpu_info["devices"]]
|
|
460
|
+
invalid_gpus = [g for g in requested_gpus if g not in available_indices]
|
|
461
|
+
if invalid_gpus:
|
|
462
|
+
console.print(f"[red]❌ Invalid/unhealthy GPU(s) requested: {invalid_gpus}[/red]")
|
|
463
|
+
console.print(f"[yellow]Available healthy GPUs: {available_indices}[/yellow]")
|
|
464
|
+
raise typer.Exit(1)
|
|
465
|
+
training_gpus = requested_gpus
|
|
466
|
+
use_ddp = len(training_gpus) > 1
|
|
467
|
+
|
|
468
|
+
if vllm_gpu is not None:
|
|
469
|
+
vllm_gpu_idx = vllm_gpu
|
|
470
|
+
console.print(f"[cyan]Manual vLLM GPU: {vllm_gpu_idx}[/cyan]")
|
|
471
|
+
# Validate vLLM GPU is in the healthy set
|
|
472
|
+
available_indices = [d["index"] for d in gpu_info["devices"]]
|
|
473
|
+
if vllm_gpu_idx not in available_indices:
|
|
474
|
+
console.print(f"[red]❌ vLLM GPU {vllm_gpu_idx} is not available/healthy![/red]")
|
|
475
|
+
console.print(f"[yellow]Available healthy GPUs: {available_indices}[/yellow]")
|
|
476
|
+
raise typer.Exit(1)
|
|
477
|
+
|
|
478
|
+
# Ensure we have at least one training GPU
|
|
479
|
+
if not training_gpus:
|
|
480
|
+
console.print("[red]❌ No available GPUs for training![/red]")
|
|
481
|
+
raise typer.Exit(1)
|
|
482
|
+
|
|
483
|
+
# Always adjust batch_size based on number of training GPUs
|
|
484
|
+
config = adjust_config_for_ddp(config, len(training_gpus))
|
|
485
|
+
|
|
486
|
+
# Save updated config (for both DDP and single GPU)
|
|
487
|
+
save_config(config, temp_config_path)
|
|
488
|
+
|
|
489
|
+
# Step 8: Start vLLM server (unless we're using a remote one)
|
|
490
|
+
if not skip_vllm_startup:
|
|
491
|
+
console.print(f"\n[cyan]Setting up vLLM server on GPU {vllm_gpu_idx}...[/cyan]")
|
|
492
|
+
|
|
493
|
+
start_vllm_server(config.model.base_model, vllm_gpu_idx, restart=restart)
|
|
494
|
+
|
|
495
|
+
# Wait for server to be ready
|
|
496
|
+
server_ready = asyncio.run(wait_for_vllm_server())
|
|
497
|
+
if not server_ready:
|
|
498
|
+
console.print("[red]❌ Failed to start vLLM server[/red]")
|
|
499
|
+
raise typer.Exit(1)
|
|
500
|
+
else:
|
|
501
|
+
console.print("\n[cyan]Using remote vLLM server (skipping local startup)[/cyan]")
|
|
502
|
+
|
|
503
|
+
# Step 9: Run training (DDP or single GPU)
|
|
504
|
+
if use_ddp:
|
|
505
|
+
console.print(
|
|
506
|
+
f"\n[bold green]🎯 Starting DDP training on {len(training_gpus)} GPUs...[/bold green]\n"
|
|
507
|
+
)
|
|
508
|
+
launch_ddp_training(training_gpus, tasks_file, temp_config_path, verbose)
|
|
509
|
+
console.print("\n[green]✅ Training completed successfully![/green]")
|
|
510
|
+
else:
|
|
511
|
+
console.print("\n[bold green]🎯 Starting single-GPU training...[/bold green]\n")
|
|
512
|
+
try:
|
|
513
|
+
# Set verbose in config instead of passing as parameter
|
|
514
|
+
if verbose:
|
|
515
|
+
config.verbose = True
|
|
516
|
+
|
|
517
|
+
# Run the async training function
|
|
518
|
+
asyncio.run(train(config, tasks))
|
|
519
|
+
console.print("\n[green]✅ Training completed successfully![/green]")
|
|
520
|
+
|
|
521
|
+
# Clean up temp config file
|
|
522
|
+
try:
|
|
523
|
+
temp_config_path.unlink()
|
|
524
|
+
except Exception as e:
|
|
525
|
+
hud_console.warning(f"Failed to clean up temp config: {e}")
|
|
526
|
+
|
|
527
|
+
except KeyboardInterrupt:
|
|
528
|
+
console.print("\n[yellow]Training interrupted by user[/yellow]")
|
|
529
|
+
raise typer.Exit(1) # noqa: B904
|
|
530
|
+
except Exception as e:
|
|
531
|
+
console.print(f"\n[red]❌ Training failed: {e}[/red]")
|
|
532
|
+
raise typer.Exit(1) from e
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def launch_ddp_training(
|
|
536
|
+
training_gpus: list[int], tasks_file: str, config_path: Path, verbose: bool
|
|
77
537
|
) -> None:
|
|
78
|
-
"""
|
|
538
|
+
"""Launch DDP training with torchrun."""
|
|
539
|
+
import subprocess
|
|
540
|
+
import sys
|
|
541
|
+
|
|
542
|
+
# Prepare environment
|
|
543
|
+
env = os.environ.copy()
|
|
544
|
+
env["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, training_gpus))
|
|
545
|
+
|
|
546
|
+
if not verbose:
|
|
547
|
+
env["HUD_LOG_LEVEL"] = "WARNING"
|
|
548
|
+
|
|
549
|
+
# Build command
|
|
550
|
+
cmd = [
|
|
551
|
+
sys.executable,
|
|
552
|
+
"-m",
|
|
553
|
+
"torch.distributed.run",
|
|
554
|
+
f"--nproc_per_node={len(training_gpus)}",
|
|
555
|
+
"--master_port=29500",
|
|
556
|
+
"-m",
|
|
557
|
+
"hud.rl.train",
|
|
558
|
+
"--config",
|
|
559
|
+
str(config_path),
|
|
560
|
+
"--tasks",
|
|
561
|
+
tasks_file,
|
|
562
|
+
]
|
|
563
|
+
|
|
564
|
+
# Add verbose flag if enabled
|
|
565
|
+
if verbose:
|
|
566
|
+
cmd.append("--verbose")
|
|
79
567
|
|
|
80
|
-
|
|
81
|
-
|
|
568
|
+
try:
|
|
569
|
+
# Run DDP training
|
|
570
|
+
subprocess.run(cmd, env=env, check=True) # noqa: S603
|
|
571
|
+
except subprocess.CalledProcessError as e:
|
|
572
|
+
console.print(f"\n[red]❌ DDP training failed with exit code {e.returncode}[/red]")
|
|
573
|
+
raise typer.Exit(1) from e
|
|
574
|
+
finally:
|
|
575
|
+
# Cleanup temp config
|
|
576
|
+
try:
|
|
577
|
+
config_path.unlink()
|
|
578
|
+
except Exception as e:
|
|
579
|
+
hud_console.warning(f"Failed to clean up temp config: {e}")
|
|
82
580
|
|
|
83
|
-
Examples:
|
|
84
|
-
hud rl init # Use current directory
|
|
85
|
-
hud rl init environments/test # Use specific directory
|
|
86
|
-
hud rl init my-env:latest # Use Docker image directly
|
|
87
|
-
hud rl init . -o configs/2048.yaml --build
|
|
88
|
-
"""
|
|
89
|
-
from .init import init_command_wrapper
|
|
90
581
|
|
|
91
|
-
|
|
582
|
+
# Export the command function
|
|
583
|
+
__all__ = ["rl_command"]
|