hud-python 0.4.30__py3-none-any.whl → 0.4.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

@@ -0,0 +1,571 @@
1
+ """
2
+ Local runner for HUD RL training.
3
+
4
+ This module encapsulates the local training flow and imports heavy
5
+ dependencies (torch, transformers, etc.) only when actually running
6
+ locally. The CLI entrypoint should import this module lazily to avoid
7
+ pulling heavy deps during remote-only usage.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import os
14
+ import subprocess
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ from rich.console import Console
19
+
20
+ from hud.rl.config import validate_vl_model
21
+ from hud.utils.hud_console import hud_console
22
+ from hud.utils.tasks import load_tasks
23
+
24
+ console = Console()
25
+
26
+
27
+ def run_local_training(
28
+ *,
29
+ tasks_file: str,
30
+ model: str | None,
31
+ config_file: Path | None,
32
+ output_dir: str,
33
+ restart: bool,
34
+ verbose: bool,
35
+ no_ddp: bool,
36
+ ddp_gpus: str | None,
37
+ vllm_gpu: int | None,
38
+ skip_vllm_startup: bool,
39
+ ) -> None:
40
+ """Run RL training locally on the current machine.
41
+
42
+ Heavy modules are imported inside this function to avoid import-time side effects
43
+ during remote-only runs.
44
+ """
45
+ # Light-weight utilities
46
+ from .config import generate_config_interactive, load_config, save_config
47
+ from .display import display_config_summary, display_gpu_info
48
+ from .gpu import detect_cuda_devices, validate_gpu_memory
49
+ from .presets import get_training_presets
50
+
51
+ # Python version compatibility warning for vLLM
52
+ python_version = sys.version_info
53
+ if python_version.major == 3 and python_version.minor >= 13:
54
+ console.print("[red]⚠️ Warning: Python 3.13+ detected![/red]")
55
+ console.print("[yellow]vLLM has compatibility issues with Python 3.13.[/yellow]")
56
+ console.print("[yellow]Recommended: Use Python 3.12 or 3.11[/yellow]")
57
+ console.print("\n[dim]To create a new environment with Python 3.12:[/dim]")
58
+ console.print("[dim] 1. Exit this shell: exit[/dim]")
59
+ console.print("[dim] 2. Remove current venv: sudo rm -rf .venv[/dim]")
60
+ console.print("[dim] 3. Create new venv: uv venv --python 3.12[/dim]")
61
+ console.print("[dim] 4. Install dependencies: uv pip install -e '.[rl]'[/dim]")
62
+
63
+ try:
64
+ import typer
65
+
66
+ if not typer.confirm("\nDo you want to continue anyway?", default=False):
67
+ raise typer.Exit(1)
68
+ except Exception as e:
69
+ hud_console.warning(f"Failed to confirm: {e}")
70
+ return
71
+
72
+ # Step 1: Validate CUDA devices
73
+ console.print("[yellow]Checking GPU availability...[/yellow]")
74
+ gpu_info = detect_cuda_devices()
75
+
76
+ if not gpu_info["available"]:
77
+ console.print(f"[red]❌ {gpu_info['error']}[/red]")
78
+ console.print("[yellow]RL training requires CUDA-capable GPUs[/yellow]")
79
+ try:
80
+ import typer
81
+
82
+ raise typer.Exit(1)
83
+ except Exception:
84
+ return
85
+
86
+ display_gpu_info(gpu_info)
87
+
88
+ # Perform GPU health check (imports torch lazily)
89
+ all_gpu_indices = [device["index"] for device in gpu_info["devices"]]
90
+ from .gpu_utils import health_check_gpus # heavy import (torch)
91
+
92
+ health_results = health_check_gpus(all_gpu_indices)
93
+
94
+ if not health_results["all_healthy"]:
95
+ console.print("\n[yellow]⚠️ Some GPUs failed health checks![/yellow]")
96
+ console.print(
97
+ f"[yellow]Unhealthy GPUs: {list(health_results['unhealthy_gpus'].keys())}[/yellow]"
98
+ )
99
+
100
+ if not health_results["healthy_gpus"]:
101
+ console.print("[red]❌ No healthy GPUs available for training![/red]")
102
+ try:
103
+ import typer
104
+
105
+ raise typer.Exit(1)
106
+ except Exception:
107
+ return
108
+
109
+ console.print(
110
+ f"\n[cyan]You have {len(health_results['healthy_gpus'])} healthy GPUs available.[/cyan]"
111
+ )
112
+
113
+ try:
114
+ import typer
115
+
116
+ continue_training = typer.confirm("\nContinue with healthy GPUs only?", default=True)
117
+ except Exception:
118
+ continue_training = True
119
+
120
+ if not continue_training:
121
+ healthy_str = ",".join(map(str, health_results["healthy_gpus"]))
122
+ console.print("\n[yellow]Exiting. Please resolve GPU issues and try again.[/yellow]")
123
+ console.print("\n[cyan]💡 Tip: To use only healthy GPUs, you can run:[/cyan]")
124
+ console.print(f"[white]hud rl {tasks_file} --ddp-gpus {healthy_str} --local[/white]\n")
125
+ try:
126
+ import typer
127
+
128
+ raise typer.Exit(0)
129
+ except Exception:
130
+ return
131
+ else:
132
+ # Continue with healthy GPUs only
133
+ gpu_info["devices"] = [
134
+ d for d in gpu_info["devices"] if d["index"] in health_results["healthy_gpus"]
135
+ ]
136
+ console.print(
137
+ f"\n[green]✅ Continuing with {len(gpu_info['devices'])} healthy GPUs[/green]"
138
+ )
139
+
140
+ # Get primary GPU memory for configuration
141
+ primary_gpu = gpu_info["devices"][0]
142
+ gpu_memory_gb = primary_gpu["memory_gb"]
143
+
144
+ # Validate GPU memory for 3B model
145
+ if not validate_gpu_memory(gpu_memory_gb, "3B"):
146
+ console.print(f"[red]❌ Insufficient GPU memory ({gpu_memory_gb:.1f} GB)[/red]")
147
+ console.print("[yellow]Qwen 2.5 VL 3B requires at least 12 GB of GPU memory[/yellow]")
148
+ try:
149
+ import typer
150
+
151
+ raise typer.Exit(1)
152
+ except Exception:
153
+ return
154
+
155
+ # Step 2: Load and validate tasks
156
+ if tasks_file:
157
+ console.print(f"\n[cyan]Loading tasks from: {tasks_file}[/cyan]")
158
+ else:
159
+ possible_files = ["tasks.json", "tasks.jsonl", "browser_2048_tasks.jsonl"]
160
+ for f in possible_files:
161
+ if Path(f).exists():
162
+ tasks_file = f
163
+ console.print(f"[green]Auto-detected tasks file: {f}[/green]")
164
+ break
165
+
166
+ if not tasks_file:
167
+ console.print("[red]❌ No tasks file specified or auto-detected[/red]")
168
+ console.print(
169
+ "[yellow]Please provide a tasks file or create one of: tasks.json, tasks.jsonl[/yellow]" # noqa: E501
170
+ )
171
+ try:
172
+ import typer
173
+
174
+ raise typer.Exit(1)
175
+ except Exception:
176
+ return
177
+
178
+ tasks = load_tasks(tasks_file)
179
+ console.print(f"[green]✅ Loaded {len(tasks)} tasks[/green]")
180
+
181
+ invalid_tasks: list[str] = []
182
+ for i, task in enumerate(tasks):
183
+ if not hasattr(task, "prompt") or not task.prompt:
184
+ invalid_tasks.append(f"Task {i}: missing 'prompt' field")
185
+ if not hasattr(task, "mcp_config") or not task.mcp_config:
186
+ invalid_tasks.append(f"Task {i}: missing 'mcp_config' field")
187
+
188
+ if invalid_tasks:
189
+ console.print("[red]❌ Invalid tasks found:[/red]")
190
+ for error in invalid_tasks[:5]:
191
+ console.print(f" - {error}")
192
+ if len(invalid_tasks) > 5:
193
+ console.print(f" ... and {len(invalid_tasks) - 5} more")
194
+ try:
195
+ import typer
196
+
197
+ raise typer.Exit(1)
198
+ except Exception:
199
+ return
200
+
201
+ # Step 3: Model selection (if not provided)
202
+ if model is None and not config_file:
203
+ model = hud_console.select(
204
+ "Select a model for RL training:",
205
+ choices=[
206
+ {
207
+ "name": "Qwen 2.5 VL 3B (Recommended - Vision-Language)",
208
+ "value": "Qwen/Qwen2.5-VL-3B-Instruct",
209
+ },
210
+ {"name": "Custom model", "value": "custom"},
211
+ ],
212
+ default=0,
213
+ )
214
+
215
+ if model == "custom":
216
+ console.print("Enter the model name (HuggingFace ID):")
217
+ model = input().strip()
218
+
219
+ # Validate model is a VL model (whether provided via CLI or selected)
220
+ if model:
221
+ try:
222
+ validate_vl_model(model)
223
+ except ValueError as e:
224
+ console.print(f"\n[red]❌ {e}[/red]")
225
+ try:
226
+ import typer
227
+
228
+ raise typer.Exit(1)
229
+ except Exception:
230
+ return
231
+ else:
232
+ try:
233
+ import typer
234
+
235
+ raise typer.Exit(1)
236
+ except Exception:
237
+ return
238
+
239
+ # Step 4: Generate or load configuration
240
+ if config_file:
241
+ console.print(f"\n[cyan]Loading configuration from: {config_file}[/cyan]")
242
+ config = load_config(config_file)
243
+
244
+ # Validate model from config
245
+ if hasattr(config, "model") and hasattr(config.model, "base_model"):
246
+ try:
247
+ validate_vl_model(config.model.base_model)
248
+ except ValueError as e:
249
+ console.print(f"\n[red]❌ {e}[/red]")
250
+ try:
251
+ import typer
252
+
253
+ raise typer.Exit(1)
254
+ except Exception:
255
+ return
256
+
257
+ # Estimate memory for display
258
+ from .presets import estimate_memory_usage
259
+
260
+ estimated_memory = estimate_memory_usage(
261
+ config.training.mini_batch_size,
262
+ config.actor.max_steps_per_episode,
263
+ config.actor.max_new_tokens,
264
+ config.model.max_pixels,
265
+ )
266
+ else:
267
+ console.print("\n[cyan]Generating training configuration...[/cyan]")
268
+ # Get number of GPUs for preset scaling
269
+ num_training_gpus = 1 # Default, will be adjusted later
270
+ if len(gpu_info["devices"]) > 2:
271
+ num_training_gpus = len(gpu_info["devices"]) - 1 # Reserve 1 for vLLM
272
+ console.print(
273
+ f"[yellow]Note: Episodes will be scaled for {num_training_gpus} training GPUs[/yellow]\n" # noqa: E501
274
+ )
275
+
276
+ presets = get_training_presets(gpu_memory_gb)
277
+ config, estimated_memory = generate_config_interactive(
278
+ model_name=model,
279
+ presets=presets,
280
+ )
281
+
282
+ # Step 5: Save temporary config and display summary
283
+ temp_config_path = Path(".rl_config_temp.json")
284
+ save_config(config, temp_config_path)
285
+ console.print(f"\n[cyan]📝 Configuration saved to: {temp_config_path.absolute()}[/cyan]")
286
+ console.print("[yellow]You can edit this file before starting training.[/yellow]")
287
+
288
+ # Display configuration summary
289
+ display_config_summary(config, len(tasks), gpu_info, estimated_memory)
290
+
291
+ # Step 6: Ask for confirmation (skip if config was provided)
292
+ if not config_file:
293
+ console.print("\n[bold yellow]Options:[/bold yellow]")
294
+ console.print(" • Type [green]'start'[/green] to begin training")
295
+ console.print(" • Type [cyan]'edit'[/cyan] to open config in your editor")
296
+ console.print(" • Type [red]'cancel'[/red] to abort")
297
+ console.print("\n[bold]Your choice:[/bold] ", end="")
298
+
299
+ while True:
300
+ choice = input().strip().lower()
301
+
302
+ if choice == "start":
303
+ config = load_config(temp_config_path) # Reload config in case it was edited
304
+ break
305
+ elif choice == "edit":
306
+ editor = os.environ.get("EDITOR", "nano")
307
+
308
+ if editor == "nano":
309
+ console.print("\n[cyan]Opening config in nano editor...[/cyan]")
310
+ console.print("[yellow]Tips:[/yellow]")
311
+ console.print(" • Edit the configuration values as needed")
312
+ console.print(" • Press [bold]Ctrl+O[/bold] then [bold]Enter[/bold] to save")
313
+ console.print(" • Press [bold]Ctrl+X[/bold] to exit")
314
+ console.print(" • Press [bold]Ctrl+C[/bold] to cancel without saving\n")
315
+ input("Press Enter to continue...")
316
+
317
+ try:
318
+ subprocess.run([editor, str(temp_config_path)], check=True) # noqa: S603
319
+ # Reload and display updated config
320
+ config = load_config(temp_config_path)
321
+ from .presets import estimate_memory_usage as _estimate_memory
322
+
323
+ estimated_memory = _estimate_memory(
324
+ config.training.mini_batch_size,
325
+ config.actor.max_steps_per_episode,
326
+ config.actor.max_new_tokens,
327
+ config.model.max_pixels,
328
+ )
329
+ display_config_summary(config, len(tasks), gpu_info, estimated_memory)
330
+ console.print(
331
+ "\n[bold]Type 'start' to begin or 'cancel' to abort:[/bold] ", end=""
332
+ )
333
+ except subprocess.CalledProcessError:
334
+ console.print(
335
+ "\n[yellow]Editor closed without saving or was cancelled.[/yellow]"
336
+ )
337
+ console.print("[bold]Your choice:[/bold] ", end="")
338
+ except Exception as e:
339
+ console.print(f"\n[red]Failed to open editor: {e}[/red]")
340
+ console.print(
341
+ f"[yellow]Please edit {temp_config_path} manually and type 'start' when ready.[/yellow]" # noqa: E501
342
+ )
343
+ console.print("[bold]Your choice:[/bold] ", end="")
344
+ elif choice == "cancel":
345
+ console.print("[red]Training cancelled[/red]")
346
+ try:
347
+ import typer
348
+
349
+ if typer.confirm("Save this configuration for later?", default=True):
350
+ config_path = Path("rl_config.json")
351
+ save_config(config, config_path)
352
+ except Exception as e:
353
+ hud_console.warning(f"Failed to save config: {e}")
354
+
355
+ try:
356
+ temp_config_path.unlink()
357
+ except Exception as e:
358
+ hud_console.warning(f"Failed to clean up temp config: {e}")
359
+
360
+ try:
361
+ import typer
362
+
363
+ raise typer.Exit(0)
364
+ except Exception:
365
+ return
366
+ else:
367
+ console.print(
368
+ "[red]Invalid choice. Type 'start', 'edit', or 'cancel':[/red] ", end=""
369
+ )
370
+ else:
371
+ console.print("\n[dim]Using provided configuration file...[/dim]")
372
+ config = load_config(temp_config_path)
373
+
374
+ # Step 7: Determine if DDP should be used (imports heavy helpers lazily)
375
+ num_gpus = len(gpu_info["devices"])
376
+ use_ddp = False
377
+ training_gpus = [0] # Default single GPU
378
+ vllm_gpu_idx = 1 if num_gpus > 1 else 0
379
+
380
+ if num_gpus > 2 and not no_ddp:
381
+ console.print(f"\n[cyan]🚀 Detected {num_gpus} GPUs - checking DDP configuration...[/cyan]")
382
+
383
+ from .gpu_utils import calculate_optimal_gpu_allocation # heavy import (torch at module)
384
+
385
+ gpu_allocation = calculate_optimal_gpu_allocation(gpu_info, config)
386
+
387
+ if gpu_allocation["use_ddp"]:
388
+ use_ddp = True
389
+ training_gpus = gpu_allocation["training_gpus"]
390
+ vllm_gpu_idx = gpu_allocation["vllm_gpu"]
391
+
392
+ console.print(
393
+ f"[green]✅ Will use DDP with {len(training_gpus)} GPUs for training[/green]"
394
+ )
395
+ console.print(f"[green]✅ GPU {vllm_gpu_idx} reserved for vLLM server[/green]")
396
+
397
+ console.print("\n[cyan]Training Configuration:[/cyan]")
398
+ console.print(f" • Groups to process: {gpu_allocation['num_groups']}")
399
+ console.print(f" • Training GPUs: {training_gpus}")
400
+ console.print(f" • Groups per GPU: {gpu_allocation.get('groups_per_gpu', 'N/A'):.1f}")
401
+
402
+ if gpu_allocation.get("parallel_efficiency", 1.0) < 0.8:
403
+ console.print(
404
+ f"\n[yellow]⚠️ GPU efficiency: {gpu_allocation['parallel_efficiency'] * 100:.0f}%[/yellow]" # noqa: E501
405
+ )
406
+ console.print(
407
+ f"[yellow]Consider adjusting batch_size to {len(training_gpus) * config.training.group_size} for optimal performance[/yellow]" # noqa: E501
408
+ )
409
+ else:
410
+ console.print(f"[cyan]{gpu_allocation.get('reason', 'Using single GPU')}[/cyan]")
411
+
412
+ # Allow manual overrides
413
+ if ddp_gpus is not None:
414
+ requested_gpus = [int(x) for x in ddp_gpus.split(",")]
415
+ console.print(f"[cyan]Manual GPU selection: {requested_gpus}[/cyan]")
416
+ available_indices = [d["index"] for d in gpu_info["devices"]]
417
+ invalid_gpus = [g for g in requested_gpus if g not in available_indices]
418
+ if invalid_gpus:
419
+ console.print(f"[red]❌ Invalid/unhealthy GPU(s) requested: {invalid_gpus}[/red]")
420
+ console.print(f"[yellow]Available healthy GPUs: {available_indices}[/yellow]")
421
+ try:
422
+ import typer
423
+
424
+ raise typer.Exit(1)
425
+ except Exception:
426
+ return
427
+ training_gpus = requested_gpus
428
+ use_ddp = len(training_gpus) > 1
429
+
430
+ if vllm_gpu is not None:
431
+ vllm_gpu_idx = vllm_gpu
432
+ console.print(f"[cyan]Manual vLLM GPU: {vllm_gpu_idx}[/cyan]")
433
+ available_indices = [d["index"] for d in gpu_info["devices"]]
434
+ if vllm_gpu_idx not in available_indices:
435
+ console.print(f"[red]❌ vLLM GPU {vllm_gpu_idx} is not available/healthy![/red]")
436
+ console.print(f"[yellow]Available healthy GPUs: {available_indices}[/yellow]")
437
+ try:
438
+ import typer
439
+
440
+ raise typer.Exit(1)
441
+ except Exception:
442
+ return
443
+
444
+ # Ensure we have at least one training GPU
445
+ if not training_gpus:
446
+ console.print("[red]❌ No available GPUs for training![/red]")
447
+ try:
448
+ import typer
449
+
450
+ raise typer.Exit(1)
451
+ except Exception:
452
+ return
453
+
454
+ # Always adjust batch_size based on number of training GPUs (lazy import)
455
+ from .gpu_utils import adjust_config_for_ddp # heavy import (torch at module)
456
+
457
+ config = adjust_config_for_ddp(config, len(training_gpus))
458
+ save_config(config, temp_config_path)
459
+
460
+ # Step 8: Start vLLM server (unless we're using a remote one)
461
+ if not skip_vllm_startup:
462
+ console.print(f"\n[cyan]Setting up vLLM server on GPU {vllm_gpu_idx}...[/cyan]")
463
+
464
+ from .vllm import start_vllm_server, wait_for_vllm_server
465
+
466
+ start_vllm_server(config.model.base_model, vllm_gpu_idx, restart=restart)
467
+
468
+ server_ready = asyncio.run(wait_for_vllm_server())
469
+ if not server_ready:
470
+ console.print("[red]❌ Failed to start vLLM server[/red]")
471
+ try:
472
+ import typer
473
+
474
+ raise typer.Exit(1)
475
+ except Exception:
476
+ return
477
+ else:
478
+ console.print("\n[cyan]Using remote vLLM server (skipping local startup)[/cyan]")
479
+
480
+ # Step 9: Run training (DDP or single GPU)
481
+ if use_ddp:
482
+ console.print(
483
+ f"\n[bold green]🎯 Starting DDP training on {len(training_gpus)} GPUs...[/bold green]\n"
484
+ )
485
+ launch_ddp_training(training_gpus, tasks_file, temp_config_path, verbose)
486
+ console.print("\n[green]✅ Training completed successfully![/green]")
487
+ else:
488
+ console.print("\n[bold green]🎯 Starting single-GPU training...[/bold green]\n")
489
+ try:
490
+ # Set verbose in config instead of passing as parameter
491
+ if verbose:
492
+ config.verbose = True
493
+
494
+ # Import and run the async training function lazily
495
+ from hud.rl.train import train # heavy import
496
+
497
+ asyncio.run(train(config, tasks))
498
+ console.print("\n[green]✅ Training completed successfully![/green]")
499
+
500
+ try:
501
+ temp_config_path.unlink()
502
+ except Exception as e:
503
+ hud_console.warning(f"Failed to clean up temp config: {e}")
504
+
505
+ except KeyboardInterrupt:
506
+ console.print("\n[yellow]Training interrupted by user[/yellow]")
507
+ try:
508
+ import typer
509
+
510
+ raise typer.Exit(1)
511
+ except Exception:
512
+ return
513
+ except Exception as e:
514
+ console.print(f"\n[red]❌ Training failed: {e}")
515
+ try:
516
+ import typer
517
+
518
+ raise typer.Exit(1)
519
+ except Exception:
520
+ return
521
+
522
+
523
+ def launch_ddp_training(
524
+ training_gpus: list[int], tasks_file: str, config_path: Path, verbose: bool
525
+ ) -> None:
526
+ """Launch DDP training with torchrun.
527
+
528
+ Uses subprocess to run the training module, so heavy dependencies load in
529
+ the spawned processes rather than the CLI import path.
530
+ """
531
+ import subprocess as _subprocess
532
+ import sys as _sys
533
+
534
+ env = os.environ.copy()
535
+ env["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, training_gpus))
536
+
537
+ if not verbose:
538
+ env["HUD_LOG_LEVEL"] = "WARNING"
539
+
540
+ cmd = [
541
+ _sys.executable,
542
+ "-m",
543
+ "torch.distributed.run",
544
+ f"--nproc_per_node={len(training_gpus)}",
545
+ "--master_port=29500",
546
+ "-m",
547
+ "hud.rl.train",
548
+ "--config",
549
+ str(config_path),
550
+ "--tasks",
551
+ tasks_file,
552
+ ]
553
+
554
+ if verbose:
555
+ cmd.append("--verbose")
556
+
557
+ try:
558
+ _subprocess.run(cmd, env=env, check=True) # noqa: S603
559
+ except _subprocess.CalledProcessError as e:
560
+ console.print(f"\n[red]❌ DDP training failed with exit code {e.returncode}[/red]")
561
+ try:
562
+ import typer
563
+
564
+ raise typer.Exit(1)
565
+ except Exception:
566
+ return
567
+ finally:
568
+ try:
569
+ config_path.unlink()
570
+ except Exception as e:
571
+ hud_console.warning(f"Failed to clean up temp config: {e}")