hud-python 0.4.30__py3-none-any.whl → 0.4.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/cli/rl/__init__.py CHANGED
@@ -2,35 +2,21 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import asyncio
6
5
  import logging
7
6
  import os
8
- import subprocess
9
- import sys
10
- from pathlib import Path
7
+ from typing import TYPE_CHECKING
11
8
 
12
9
  import typer
13
10
  from rich.console import Console
14
11
 
15
12
  from hud.cli.utils.tasks import find_tasks_file
16
- from hud.rl.config import validate_vl_model
17
- from hud.rl.train import train
18
-
19
- # Then import HUD modules
20
13
  from hud.utils.hud_console import hud_console
21
- from hud.utils.tasks import load_tasks
22
-
23
- from .config import generate_config_interactive, load_config, save_config
24
- from .display import display_config_summary, display_gpu_info
25
-
26
- # Import local modules first
27
- from .gpu import detect_cuda_devices, validate_gpu_memory
28
- from .gpu_utils import adjust_config_for_ddp, calculate_optimal_gpu_allocation, health_check_gpus
29
- from .presets import get_training_presets
30
- from .vllm import start_vllm_server, wait_for_vllm_server
31
14
 
32
15
  console = Console()
33
16
 
17
+ if TYPE_CHECKING:
18
+ from pathlib import Path
19
+
34
20
 
35
21
  def rl_command(
36
22
  tasks_file: str | None = typer.Argument(
@@ -96,13 +82,8 @@ def rl_command(
96
82
  """Run GRPO reinforcement learning training on tasks."""
97
83
  # Configure logging based on verbose flag BEFORE any output
98
84
  if not verbose:
99
- # Set environment variable for HUD components
100
85
  os.environ["HUD_LOG_LEVEL"] = "WARNING"
101
-
102
- # Configure logging levels
103
86
  logging.basicConfig(level=logging.WARNING, force=True)
104
-
105
- # Get root logger and set its level
106
87
  root_logger = logging.getLogger()
107
88
  root_logger.setLevel(logging.WARNING)
108
89
 
@@ -116,11 +97,8 @@ def rl_command(
116
97
  "transformers",
117
98
  ]:
118
99
  logging.getLogger(logger_name).setLevel(logging.WARNING)
119
-
120
- # Also set HUD agent logger explicitly
121
100
  logging.getLogger("hud.agents.base").setLevel(logging.WARNING)
122
101
  else:
123
- # In verbose mode, show everything
124
102
  logging.basicConfig(level=logging.INFO)
125
103
 
126
104
  hud_console.header("HUD RL Training")
@@ -138,449 +116,49 @@ def rl_command(
138
116
  hud_console.hint("or create a tasks file manually.")
139
117
  raise typer.Exit(1)
140
118
 
141
- # Handle remote execution
119
+ # If user ran bare `hud rl`, guide them through remote task conversion flow
120
+ # before proceeding (remote only)
142
121
  if use_remote:
143
122
  try:
144
- from .remote_runner import run_remote_training
123
+ from hud.cli.flows.tasks import convert_tasks_to_remote
145
124
 
146
- run_remote_training(
147
- tasks_file=tasks_file, model=model, config_file=config_file, output_dir=output_dir
148
- )
149
- return
125
+ console.print("\n[cyan]Preparing remote training tasks...[/cyan]")
126
+ console.print("[cyan](build/push if needed)[/cyan]")
127
+ tasks_file = convert_tasks_to_remote(tasks_file)
128
+ except typer.Exit:
129
+ raise
150
130
  except Exception as e:
151
- console.print(f"[red] Remote training failed: {e!s}[/red]")
131
+ hud_console.warning(f"[red]Tasks file is not valid for remote training: {e!s}[/red]")
132
+ hud_console.hint("Either ensure the tasks file has remote urls")
133
+ hud_console.hint("Or rerun `hud rl` within an environment directory")
152
134
  raise typer.Exit(1) from e
153
135
 
154
- # Check Python version compatibility
155
- python_version = sys.version_info
156
- if python_version.major == 3 and python_version.minor >= 13:
157
- console.print("[red]⚠️ Warning: Python 3.13+ detected![/red]")
158
- console.print("[yellow]vLLM has compatibility issues with Python 3.13.[/yellow]")
159
- console.print("[yellow]Recommended: Use Python 3.12 or 3.11[/yellow]")
160
- console.print("\n[dim]To create a new environment with Python 3.12:[/dim]")
161
- console.print("[dim] 1. Exit this shell: exit[/dim]")
162
- console.print("[dim] 2. Remove current venv: sudo rm -rf .venv[/dim]")
163
- console.print("[dim] 3. Create new venv: uv venv --python 3.12[/dim]")
164
- console.print("[dim] 4. Install dependencies: uv pip install -e '.[rl]'[/dim]")
165
-
166
- if not typer.confirm("\nDo you want to continue anyway?", default=False):
167
- raise typer.Exit(1)
168
-
169
- # Step 1: Validate CUDA devices
170
- console.print("[yellow]Checking GPU availability...[/yellow]")
171
- gpu_info = detect_cuda_devices()
172
-
173
- if not gpu_info["available"]:
174
- console.print(f"[red]❌ {gpu_info['error']}[/red]")
175
- console.print("[yellow]RL training requires CUDA-capable GPUs[/yellow]")
176
- raise typer.Exit(1)
177
-
178
- display_gpu_info(gpu_info)
179
-
180
- # Perform GPU health check
181
- all_gpu_indices = [device["index"] for device in gpu_info["devices"]]
182
- health_results = health_check_gpus(all_gpu_indices)
183
-
184
- if not health_results["all_healthy"]:
185
- console.print("\n[yellow]⚠️ Some GPUs failed health checks![/yellow]")
186
- console.print(
187
- f"[yellow]Unhealthy GPUs: {list(health_results['unhealthy_gpus'].keys())}[/yellow]"
188
- )
189
-
190
- if not health_results["healthy_gpus"]:
191
- console.print("[red]❌ No healthy GPUs available for training![/red]")
192
- raise typer.Exit(1)
193
-
194
- console.print(
195
- f"\n[cyan]You have {len(health_results['healthy_gpus'])} healthy GPUs available.[/cyan]"
196
- )
197
-
198
- continue_training = typer.confirm("\nContinue with healthy GPUs only?", default=True)
199
-
200
- if not continue_training:
201
- healthy_str = ",".join(map(str, health_results["healthy_gpus"]))
202
- console.print("\n[yellow]Exiting. Please resolve GPU issues and try again.[/yellow]")
203
- console.print("\n[cyan]💡 Tip: To use only healthy GPUs, you can run:[/cyan]")
204
- console.print(f"[white]hud rl {tasks_file} --ddp-gpus {healthy_str} --local[/white]\n")
205
- raise typer.Exit(0)
206
- else:
207
- # Continue with healthy GPUs only
208
- # Update gpu_info to only include healthy GPUs
209
- gpu_info["devices"] = [
210
- d for d in gpu_info["devices"] if d["index"] in health_results["healthy_gpus"]
211
- ]
212
- console.print(
213
- f"\n[green]✅ Continuing with {len(gpu_info['devices'])} healthy GPUs[/green]"
214
- )
215
-
216
- # Get primary GPU memory for configuration
217
- primary_gpu = gpu_info["devices"][0]
218
- gpu_memory_gb = primary_gpu["memory_gb"]
219
-
220
- # Validate GPU memory for 3B model
221
- if not validate_gpu_memory(gpu_memory_gb, "3B"):
222
- console.print(f"[red]❌ Insufficient GPU memory ({gpu_memory_gb:.1f} GB)[/red]")
223
- console.print("[yellow]Qwen 2.5 VL 3B requires at least 12 GB of GPU memory[/yellow]")
224
- raise typer.Exit(1)
225
-
226
- # Step 2: Load tasks
227
- if tasks_file:
228
- console.print(f"\n[cyan]Loading tasks from: {tasks_file}[/cyan]")
229
- else:
230
- # Auto-detect tasks file
231
- possible_files = ["tasks.json", "tasks.jsonl", "browser_2048_tasks.jsonl"]
232
- for f in possible_files:
233
- if Path(f).exists():
234
- tasks_file = f
235
- console.print(f"[green]Auto-detected tasks file: {f}[/green]")
236
- break
237
-
238
- if not tasks_file:
239
- console.print("[red]❌ No tasks file specified or auto-detected[/red]")
240
- console.print(
241
- "[yellow]Please provide a tasks file or create one of: tasks.json, tasks.jsonl[/yellow]" # noqa: E501
242
- )
243
- raise typer.Exit(1)
244
-
245
- # Load the tasks
246
- tasks = load_tasks(tasks_file)
247
- console.print(f"[green]✅ Loaded {len(tasks)} tasks[/green]")
248
-
249
- # Validate tasks
250
- invalid_tasks = []
251
- for i, task in enumerate(tasks):
252
- if not hasattr(task, "prompt") or not task.prompt:
253
- invalid_tasks.append(f"Task {i}: missing 'prompt' field")
254
- if not hasattr(task, "mcp_config") or not task.mcp_config:
255
- invalid_tasks.append(f"Task {i}: missing 'mcp_config' field")
256
-
257
- if invalid_tasks:
258
- console.print("[red]❌ Invalid tasks found:[/red]")
259
- for error in invalid_tasks[:5]: # Show first 5 errors
260
- console.print(f" - {error}")
261
- if len(invalid_tasks) > 5:
262
- console.print(f" ... and {len(invalid_tasks) - 5} more")
263
- raise typer.Exit(1)
264
-
265
- # Step 3: Model selection (if not provided)
266
- if model is None and not config_file:
267
- model = hud_console.select(
268
- "Select a model for RL training:",
269
- choices=[
270
- {
271
- "name": "Qwen 2.5 VL 3B (Recommended - Vision-Language)",
272
- "value": "Qwen/Qwen2.5-VL-3B-Instruct",
273
- },
274
- {"name": "Custom model", "value": "custom"},
275
- ],
276
- default=0,
277
- )
278
-
279
- if model == "custom":
280
- console.print("Enter the model name (HuggingFace ID):")
281
- model = input().strip()
282
-
283
- # Validate model is a VL model (whether provided via CLI or selected)
284
- if model:
285
136
  try:
286
- validate_vl_model(model)
287
- except ValueError as e:
288
- console.print(f"\n[red]❌ {e}[/red]")
289
- raise typer.Exit(1) from e
290
- else:
291
- raise typer.Exit(1)
292
-
293
- # Step 4: Generate or load configuration
294
- if config_file:
295
- console.print(f"\n[cyan]Loading configuration from: {config_file}[/cyan]")
296
- config = load_config(config_file)
297
-
298
- # Validate model from config
299
- if hasattr(config, "model") and hasattr(config.model, "base_model"):
300
- try:
301
- validate_vl_model(config.model.base_model)
302
- except ValueError as e:
303
- console.print(f"\n[red]❌ {e}[/red]")
304
- raise typer.Exit(1) from e
305
-
306
- # Estimate memory for display
307
- from .presets import estimate_memory_usage
308
-
309
- estimated_memory = estimate_memory_usage(
310
- config.training.mini_batch_size,
311
- config.actor.max_steps_per_episode,
312
- config.actor.max_new_tokens,
313
- config.model.max_pixels,
314
- )
315
- else:
316
- console.print("\n[cyan]Generating training configuration...[/cyan]")
317
- # Get number of GPUs for preset scaling
318
- num_training_gpus = 1 # Default, will be adjusted later
319
- if len(gpu_info["devices"]) > 2:
320
- # If we have many GPUs, presets will show scaled values
321
- num_training_gpus = len(gpu_info["devices"]) - 1 # Reserve 1 for vLLM
322
- console.print(
323
- f"[yellow]Note: Episodes will be scaled for {num_training_gpus} training GPUs[/yellow]\n" # noqa: E501
324
- )
325
-
326
- presets = get_training_presets(gpu_memory_gb)
327
- config, estimated_memory = generate_config_interactive(
328
- model_name=model,
329
- presets=presets,
330
- )
331
-
332
- # Step 5: Save temporary config and display summary
333
- temp_config_path = Path(".rl_config_temp.json")
334
- save_config(config, temp_config_path)
335
- console.print(f"\n[cyan]📝 Configuration saved to: {temp_config_path.absolute()}[/cyan]")
336
- console.print("[yellow]You can edit this file before starting training.[/yellow]")
337
-
338
- # Display configuration summary
339
- display_config_summary(config, len(tasks), gpu_info, estimated_memory)
340
-
341
- # Step 6: Ask for confirmation (skip if config was provided)
342
- if not config_file:
343
- console.print("\n[bold yellow]Options:[/bold yellow]")
344
- console.print(" • Type [green]'start'[/green] to begin training")
345
- console.print(" • Type [cyan]'edit'[/cyan] to open config in your editor")
346
- console.print(" • Type [red]'cancel'[/red] to abort")
347
- console.print("\n[bold]Your choice:[/bold] ", end="")
348
-
349
- while True:
350
- choice = input().strip().lower()
351
-
352
- if choice == "start":
353
- # Reload config in case it was edited
354
- config = load_config(temp_config_path)
355
- break
356
- elif choice == "edit":
357
- # Default to nano if EDITOR is not set
358
- editor = os.environ.get("EDITOR", "nano")
359
-
360
- # Show nano instructions if using nano
361
- if editor == "nano":
362
- console.print("\n[cyan]Opening config in nano editor...[/cyan]")
363
- console.print("[yellow]Tips:[/yellow]")
364
- console.print(" • Edit the configuration values as needed")
365
- console.print(" • Press [bold]Ctrl+O[/bold] then [bold]Enter[/bold] to save")
366
- console.print(" • Press [bold]Ctrl+X[/bold] to exit")
367
- console.print(" • Press [bold]Ctrl+C[/bold] to cancel without saving\n")
368
- input("Press Enter to continue...")
369
-
370
- try:
371
- subprocess.run([editor, str(temp_config_path)], check=True) # noqa: S603
372
- # Reload and display updated config
373
- config = load_config(temp_config_path)
374
- estimated_memory = estimate_memory_usage(
375
- config.training.mini_batch_size,
376
- config.actor.max_steps_per_episode,
377
- config.actor.max_new_tokens,
378
- config.model.max_pixels,
379
- )
380
- display_config_summary(config, len(tasks), gpu_info, estimated_memory)
381
- console.print(
382
- "\n[bold]Type 'start' to begin or 'cancel' to abort:[/bold] ", end=""
383
- )
384
- except subprocess.CalledProcessError:
385
- console.print(
386
- "\n[yellow]Editor closed without saving or was cancelled.[/yellow]"
387
- )
388
- console.print("[bold]Your choice:[/bold] ", end="")
389
- except Exception as e:
390
- console.print(f"\n[red]Failed to open editor: {e}[/red]")
391
- console.print(
392
- f"[yellow]Please edit {temp_config_path} manually and type 'start' when ready.[/yellow]" # noqa: E501
393
- )
394
- console.print("[bold]Your choice:[/bold] ", end="")
395
- elif choice == "cancel":
396
- console.print("[red]Training cancelled[/red]")
397
-
398
- # Ask if they want to save the config
399
- if typer.confirm("Save this configuration for later?", default=True):
400
- config_path = Path("rl_config.json")
401
- save_config(config, config_path)
402
-
403
- # Clean up temp file
404
- try:
405
- temp_config_path.unlink()
406
- except Exception as e:
407
- hud_console.warning(f"Failed to clean up temp config: {e}")
408
-
409
- raise typer.Exit(0)
410
- else:
411
- console.print(
412
- "[red]Invalid choice. Type 'start', 'edit', or 'cancel':[/red] ", end=""
413
- )
414
- else:
415
- # Config was provided, proceed directly
416
- console.print("\n[dim]Using provided configuration file...[/dim]")
417
- config = load_config(temp_config_path)
418
-
419
- # Step 7: Determine if DDP should be used
420
- num_gpus = len(gpu_info["devices"])
421
- use_ddp = False
422
- training_gpus = [0] # Default single GPU
423
- vllm_gpu_idx = 1 if num_gpus > 1 else 0
424
-
425
- if num_gpus > 2 and not no_ddp:
426
- console.print(f"\n[cyan]🚀 Detected {num_gpus} GPUs - checking DDP configuration...[/cyan]")
427
-
428
- # Calculate optimal GPU allocation
429
- gpu_allocation = calculate_optimal_gpu_allocation(gpu_info, config)
430
-
431
- if gpu_allocation["use_ddp"]:
432
- use_ddp = True
433
- training_gpus = gpu_allocation["training_gpus"]
434
- vllm_gpu_idx = gpu_allocation["vllm_gpu"]
137
+ from .remote_runner import run_remote_training
435
138
 
436
- console.print(
437
- f"[green]✅ Will use DDP with {len(training_gpus)} GPUs for training[/green]"
139
+ run_remote_training(
140
+ tasks_file=tasks_file, model=model, config_file=config_file, output_dir=output_dir
438
141
  )
439
- console.print(f"[green]✅ GPU {vllm_gpu_idx} reserved for vLLM server[/green]")
440
-
441
- # Show details
442
- console.print("\n[cyan]Training Configuration:[/cyan]")
443
- console.print(f" • Groups to process: {gpu_allocation['num_groups']}")
444
- console.print(f" • Training GPUs: {training_gpus}")
445
- console.print(f" • Groups per GPU: {gpu_allocation.get('groups_per_gpu', 'N/A'):.1f}")
446
-
447
- # Warn about efficiency
448
- if gpu_allocation.get("parallel_efficiency", 1.0) < 0.8:
449
- console.print(
450
- f"\n[yellow]⚠️ GPU efficiency: {gpu_allocation['parallel_efficiency'] * 100:.0f}%[/yellow]" # noqa: E501
451
- )
452
- console.print(
453
- f"[yellow]Consider adjusting batch_size to {len(training_gpus) * config.training.group_size} for optimal performance[/yellow]" # noqa: E501
454
- )
455
- else:
456
- console.print(f"[cyan]{gpu_allocation.get('reason', 'Using single GPU')}[/cyan]")
457
-
458
- # Allow manual override
459
- if ddp_gpus is not None:
460
- requested_gpus = [int(x) for x in ddp_gpus.split(",")]
461
- console.print(f"[cyan]Manual GPU selection: {requested_gpus}[/cyan]")
462
- # Validate requested GPUs are in the healthy set
463
- available_indices = [d["index"] for d in gpu_info["devices"]]
464
- invalid_gpus = [g for g in requested_gpus if g not in available_indices]
465
- if invalid_gpus:
466
- console.print(f"[red]❌ Invalid/unhealthy GPU(s) requested: {invalid_gpus}[/red]")
467
- console.print(f"[yellow]Available healthy GPUs: {available_indices}[/yellow]")
468
- raise typer.Exit(1)
469
- training_gpus = requested_gpus
470
- use_ddp = len(training_gpus) > 1
471
-
472
- if vllm_gpu is not None:
473
- vllm_gpu_idx = vllm_gpu
474
- console.print(f"[cyan]Manual vLLM GPU: {vllm_gpu_idx}[/cyan]")
475
- # Validate vLLM GPU is in the healthy set
476
- available_indices = [d["index"] for d in gpu_info["devices"]]
477
- if vllm_gpu_idx not in available_indices:
478
- console.print(f"[red]❌ vLLM GPU {vllm_gpu_idx} is not available/healthy![/red]")
479
- console.print(f"[yellow]Available healthy GPUs: {available_indices}[/yellow]")
480
- raise typer.Exit(1)
481
-
482
- # Ensure we have at least one training GPU
483
- if not training_gpus:
484
- console.print("[red]❌ No available GPUs for training![/red]")
485
- raise typer.Exit(1)
486
-
487
- # Always adjust batch_size based on number of training GPUs
488
- config = adjust_config_for_ddp(config, len(training_gpus))
489
-
490
- # Save updated config (for both DDP and single GPU)
491
- save_config(config, temp_config_path)
492
-
493
- # Step 8: Start vLLM server (unless we're using a remote one)
494
- if not skip_vllm_startup:
495
- console.print(f"\n[cyan]Setting up vLLM server on GPU {vllm_gpu_idx}...[/cyan]")
496
-
497
- start_vllm_server(config.model.base_model, vllm_gpu_idx, restart=restart)
498
-
499
- # Wait for server to be ready
500
- server_ready = asyncio.run(wait_for_vllm_server())
501
- if not server_ready:
502
- console.print("[red]❌ Failed to start vLLM server[/red]")
503
- raise typer.Exit(1)
504
- else:
505
- console.print("\n[cyan]Using remote vLLM server (skipping local startup)[/cyan]")
506
-
507
- # Step 9: Run training (DDP or single GPU)
508
- if use_ddp:
509
- console.print(
510
- f"\n[bold green]🎯 Starting DDP training on {len(training_gpus)} GPUs...[/bold green]\n"
511
- )
512
- launch_ddp_training(training_gpus, tasks_file, temp_config_path, verbose)
513
- console.print("\n[green]✅ Training completed successfully![/green]")
514
- else:
515
- console.print("\n[bold green]🎯 Starting single-GPU training...[/bold green]\n")
516
- try:
517
- # Set verbose in config instead of passing as parameter
518
- if verbose:
519
- config.verbose = True
520
-
521
- # Run the async training function
522
- asyncio.run(train(config, tasks))
523
- console.print("\n[green]✅ Training completed successfully![/green]")
524
-
525
- # Clean up temp config file
526
- try:
527
- temp_config_path.unlink()
528
- except Exception as e:
529
- hud_console.warning(f"Failed to clean up temp config: {e}")
530
-
531
- except KeyboardInterrupt:
532
- console.print("\n[yellow]Training interrupted by user[/yellow]")
533
- raise typer.Exit(1) # noqa: B904
142
+ return
534
143
  except Exception as e:
535
- console.print(f"\n[red]❌ Training failed: {e}[/red]")
144
+ console.print(f"[red]❌ Remote training failed: {e!s}[/red]")
536
145
  raise typer.Exit(1) from e
537
146
 
538
-
539
- def launch_ddp_training(
540
- training_gpus: list[int], tasks_file: str, config_path: Path, verbose: bool
541
- ) -> None:
542
- """Launch DDP training with torchrun."""
543
- import subprocess
544
- import sys
545
-
546
- # Prepare environment
547
- env = os.environ.copy()
548
- env["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, training_gpus))
549
-
550
- if not verbose:
551
- env["HUD_LOG_LEVEL"] = "WARNING"
552
-
553
- # Build command
554
- cmd = [
555
- sys.executable,
556
- "-m",
557
- "torch.distributed.run",
558
- f"--nproc_per_node={len(training_gpus)}",
559
- "--master_port=29500",
560
- "-m",
561
- "hud.rl.train",
562
- "--config",
563
- str(config_path),
564
- "--tasks",
565
- tasks_file,
566
- ]
567
-
568
- # Add verbose flag if enabled
569
- if verbose:
570
- cmd.append("--verbose")
571
-
572
- try:
573
- # Run DDP training
574
- subprocess.run(cmd, env=env, check=True) # noqa: S603
575
- except subprocess.CalledProcessError as e:
576
- console.print(f"\n[red]❌ DDP training failed with exit code {e.returncode}[/red]")
577
- raise typer.Exit(1) from e
578
- finally:
579
- # Cleanup temp config
580
- try:
581
- config_path.unlink()
582
- except Exception as e:
583
- hud_console.warning(f"Failed to clean up temp config: {e}")
147
+ # Local execution flow delegated to local_runner (imports heavy deps lazily)
148
+ from .local_runner import run_local_training
149
+
150
+ run_local_training(
151
+ tasks_file=tasks_file,
152
+ model=model,
153
+ config_file=config_file,
154
+ output_dir=output_dir,
155
+ restart=restart,
156
+ verbose=verbose,
157
+ no_ddp=no_ddp,
158
+ ddp_gpus=ddp_gpus,
159
+ vllm_gpu=vllm_gpu,
160
+ skip_vllm_startup=skip_vllm_startup,
161
+ )
584
162
 
585
163
 
586
164
  # Export the command function