hud-python 0.4.28__py3-none-any.whl → 0.4.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (75) hide show
  1. hud/__init__.py +2 -1
  2. hud/agents/base.py +73 -45
  3. hud/agents/claude.py +8 -4
  4. hud/agents/openai_chat_generic.py +65 -40
  5. hud/agents/tests/test_base.py +0 -4
  6. hud/agents/tests/test_openai.py +1 -1
  7. hud/cli/__init__.py +182 -52
  8. hud/cli/dev.py +8 -9
  9. hud/cli/eval.py +317 -119
  10. hud/cli/flows/__init__.py +0 -0
  11. hud/cli/flows/tasks.py +0 -0
  12. hud/cli/get.py +160 -0
  13. hud/cli/rl/__init__.py +563 -71
  14. hud/cli/rl/config.py +94 -0
  15. hud/cli/rl/display.py +133 -0
  16. hud/cli/rl/gpu.py +63 -0
  17. hud/cli/rl/gpu_utils.py +318 -0
  18. hud/cli/rl/presets.py +96 -0
  19. hud/cli/rl/remote_runner.py +348 -0
  20. hud/cli/rl/rl_api.py +150 -0
  21. hud/cli/rl/vllm.py +177 -0
  22. hud/cli/tests/test_analyze_metadata.py +0 -1
  23. hud/cli/utils/tasks.py +26 -0
  24. hud/clients/base.py +21 -23
  25. hud/clients/mcp_use.py +36 -44
  26. hud/clients/tests/test_mcp_use_retry.py +10 -10
  27. hud/datasets/__init__.py +4 -3
  28. hud/datasets/{execution/parallel.py → parallel.py} +1 -1
  29. hud/datasets/{execution/runner.py → runner.py} +1 -1
  30. hud/datasets/utils.py +1 -1
  31. hud/native/tests/test_native_init.py +1 -1
  32. hud/otel/config.py +1 -1
  33. hud/otel/instrumentation.py +35 -0
  34. hud/rl/README.md +31 -0
  35. hud/rl/__init__.py +1 -0
  36. hud/rl/actor.py +174 -0
  37. hud/rl/buffer.py +371 -0
  38. hud/rl/chat_template.jinja +101 -0
  39. hud/rl/config.py +184 -0
  40. hud/rl/distributed.py +95 -0
  41. hud/rl/learner.py +586 -0
  42. hud/rl/tests/__init__.py +1 -0
  43. hud/rl/tests/test_learner.py +171 -0
  44. hud/rl/train.py +354 -0
  45. hud/rl/types.py +101 -0
  46. hud/rl/utils/start_vllm_server.sh +30 -0
  47. hud/rl/utils.py +524 -0
  48. hud/rl/vllm_adapter.py +125 -0
  49. hud/settings.py +6 -0
  50. hud/telemetry/__init__.py +2 -1
  51. hud/telemetry/job.py +46 -3
  52. hud/telemetry/tests/test_trace.py +3 -3
  53. hud/telemetry/trace.py +85 -13
  54. hud/tools/tests/test_computer.py +3 -3
  55. hud/tools/tests/test_computer_actions.py +1 -1
  56. hud/types.py +123 -2
  57. hud/utils/group_eval.py +223 -0
  58. hud/utils/hud_console.py +113 -13
  59. hud/utils/tasks.py +119 -0
  60. hud/utils/tests/test_version.py +1 -1
  61. hud/version.py +1 -1
  62. {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/METADATA +20 -2
  63. {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/RECORD +66 -46
  64. hud/cli/hf.py +0 -406
  65. hud/cli/rl/README.md +0 -243
  66. hud/cli/rl/init.py +0 -370
  67. hud/cli/rl/pod.py +0 -501
  68. hud/cli/rl/ssh.py +0 -322
  69. hud/cli/rl/train.py +0 -562
  70. hud/cli/rl/utils.py +0 -165
  71. hud/datasets/execution/__init__.py +0 -13
  72. hud/datasets/task.py +0 -116
  73. {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/WHEEL +0 -0
  74. {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/entry_points.txt +0 -0
  75. {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/licenses/LICENSE +0 -0
hud/cli/hf.py DELETED
@@ -1,406 +0,0 @@
1
- """HuggingFace dataset conversion command for HUD tasks."""
2
-
3
- from __future__ import annotations
4
-
5
- import json
6
- from datetime import datetime
7
- from pathlib import Path
8
-
9
- import typer
10
-
11
- from hud.cli.rl.utils import get_mcp_config_from_lock, read_lock_file, write_lock_file
12
- from hud.utils.hud_console import HUDConsole
13
-
14
- hud_console = HUDConsole()
15
-
16
-
17
- def hf_command(
18
- tasks_file: Path | None = None,
19
- name: str | None = None,
20
- push: bool = True,
21
- private: bool = False,
22
- update_lock: bool = True,
23
- token: str | None = None,
24
- ) -> None:
25
- """📊 Convert tasks to HuggingFace dataset format.
26
-
27
- Automatically detects task files if not specified.
28
- Suggests dataset name based on environment if not provided.
29
- Converts a JSON file containing HUD tasks into a HuggingFace dataset
30
- and optionally pushes it to the Hub. Also updates hud.lock.yaml with
31
- the primary dataset reference.
32
-
33
- Examples:
34
- hud hf # Auto-detect tasks and suggest name
35
- hud hf tasks.json # Use specific file, suggest name
36
- hud hf --name my-org/my-tasks # Auto-detect tasks, use name
37
- hud hf tasks.json --name hud-evals/web-tasks --private
38
- hud hf tasks.json --name local-dataset --no-push
39
- """
40
- hud_console.header("HuggingFace Dataset Converter", icon="📊")
41
-
42
- # Auto-detect task file if not provided
43
- if tasks_file is None:
44
- hud_console.info("Looking for task files...")
45
-
46
- # Common task file patterns
47
- patterns = [
48
- "tasks.json",
49
- "task.json",
50
- "*_tasks.json",
51
- "eval*.json",
52
- "evaluation*.json",
53
- ]
54
-
55
- json_files = []
56
- for pattern in patterns:
57
- json_files.extend(Path(".").glob(pattern))
58
-
59
- # Remove duplicates and sort
60
- json_files = sorted(set(json_files))
61
-
62
- if not json_files:
63
- hud_console.error("No task files found in current directory")
64
- hud_console.info("Create a task JSON file (e.g., tasks.json) or specify the file path")
65
- raise typer.Exit(1)
66
- elif len(json_files) == 1:
67
- tasks_file = json_files[0]
68
- hud_console.info(f"Found task file: {tasks_file}")
69
- else:
70
- # Multiple files found, let user choose
71
- hud_console.info("Multiple task files found:")
72
- file_choice = hud_console.select(
73
- "Select a task file to convert:",
74
- choices=[str(f) for f in json_files],
75
- )
76
- tasks_file = Path(file_choice)
77
- hud_console.success(f"Selected: {tasks_file}")
78
-
79
- # Validate inputs
80
- if tasks_file and not tasks_file.exists():
81
- hud_console.error(f"Tasks file not found: {tasks_file}")
82
- raise typer.Exit(1)
83
-
84
- # Suggest dataset name if not provided
85
- if name is None:
86
- hud_console.info("Generating dataset name suggestion...")
87
-
88
- # Try to get HF username from environment or git config
89
- hf_username = None
90
- try:
91
- # Try HF token first
92
- from huggingface_hub import HfApi
93
-
94
- api = HfApi(token=token)
95
- user_info = api.whoami()
96
- hf_username = user_info.get("name", None)
97
- except Exception:
98
- # Try git config as fallback
99
- try:
100
- import subprocess
101
-
102
- result = subprocess.run(
103
- ["git", "config", "user.name"], # noqa: S607
104
- capture_output=True,
105
- text=True,
106
- )
107
- if result.returncode == 0 and result.stdout.strip():
108
- hf_username = result.stdout.strip().lower().replace(" ", "-")
109
- except Exception:
110
- hud_console.warning("Failed to get HF username from git config")
111
-
112
- # Get environment name from current directory or lock file
113
- env_name = Path.cwd().name
114
-
115
- # Try to get a better name from lock file
116
- lock_path = Path("hud.lock.yaml")
117
- if lock_path.exists():
118
- try:
119
- with open(lock_path) as f:
120
- import yaml
121
-
122
- lock_data = yaml.safe_load(f)
123
- if "image" in lock_data:
124
- # Extract name from image like "test:dev@sha256:..."
125
- image_name = lock_data["image"].split(":")[0].split("/")[-1]
126
- if image_name and image_name != "local":
127
- env_name = image_name
128
- except Exception as e:
129
- hud_console.warning(f"Failed to get HF username from lock file: {e}")
130
-
131
- # Generate suggestions
132
- suggestions = []
133
- if hf_username:
134
- suggestions.append(f"{hf_username}/{env_name}-tasks")
135
- suggestions.append(f"{hf_username}/{env_name}-dataset")
136
- suggestions.append(f"my-org/{env_name}-tasks")
137
- suggestions.append(f"hud-evals/{env_name}-tasks")
138
-
139
- # Let user choose or enter custom
140
- hud_console.info("Dataset name suggestions:")
141
- suggestions.append("Enter custom name...")
142
-
143
- choice = hud_console.select("Select or enter a dataset name:", choices=suggestions)
144
-
145
- if choice == "Enter custom name...":
146
- name = typer.prompt("Enter dataset name (e.g., 'my-org/my-dataset')")
147
- else:
148
- name = choice
149
-
150
- hud_console.success(f"Using dataset name: {name}")
151
-
152
- # Validate dataset name format
153
- if push and name and "/" not in name:
154
- hud_console.error("Dataset name must include organization (e.g., 'my-org/my-dataset')")
155
- hud_console.info("For local-only datasets, use --no-push")
156
- raise typer.Exit(1)
157
-
158
- # Load tasks
159
- hud_console.info(f"Loading tasks from: {tasks_file}")
160
- try:
161
- if tasks_file is None:
162
- raise ValueError("Tasks file is required")
163
- with open(tasks_file) as f:
164
- tasks_data = json.load(f)
165
- except json.JSONDecodeError as e:
166
- hud_console.error(f"Invalid JSON file: {e}")
167
- raise typer.Exit(1) from e
168
-
169
- # Handle both single task and list of tasks
170
- if isinstance(tasks_data, dict):
171
- tasks = [tasks_data]
172
- hud_console.info("Found 1 task")
173
- elif isinstance(tasks_data, list):
174
- tasks = tasks_data
175
- hud_console.info(f"Found {len(tasks)} tasks")
176
- else:
177
- hud_console.error("Tasks file must contain a JSON object or array")
178
- raise typer.Exit(1)
179
-
180
- # Validate task format
181
- valid_tasks = []
182
- for i, task in enumerate(tasks):
183
- if not isinstance(task, dict):
184
- hud_console.warning(f"Skipping task {i}: not a JSON object")
185
- continue
186
-
187
- # Required fields
188
- if "prompt" not in task:
189
- hud_console.warning(f"Skipping task {i}: missing 'prompt' field")
190
- continue
191
-
192
- if "evaluate_tool" not in task:
193
- hud_console.warning(f"Skipping task {i}: missing 'evaluate_tool' field")
194
- continue
195
-
196
- # Add default values
197
- if "id" not in task:
198
- task["id"] = f"task-{i:04d}"
199
-
200
- if "mcp_config" not in task:
201
- # Try to infer from hud.lock.yaml
202
- mcp_config = get_mcp_config_from_lock()
203
- if mcp_config:
204
- task["mcp_config"] = mcp_config
205
- else:
206
- hud_console.warning(f"Task {task['id']}: missing 'mcp_config' field")
207
- continue
208
-
209
- valid_tasks.append(task)
210
-
211
- if not valid_tasks:
212
- hud_console.error("No valid tasks found")
213
- raise typer.Exit(1)
214
-
215
- hud_console.success(f"Validated {len(valid_tasks)} tasks")
216
-
217
- # Check if dataset is suitable for training
218
- if len(valid_tasks) < 4:
219
- hud_console.warning(
220
- f"Dataset has only {len(valid_tasks)} task(s). RL training typically requires at least 4 tasks." # noqa: E501
221
- )
222
- use_for_training = hud_console.select(
223
- "Will this dataset be used for RL training?",
224
- ["Yes, duplicate tasks to reach 4", "No, keep as is"],
225
- )
226
-
227
- if use_for_training == "Yes, duplicate tasks to reach 4":
228
- # Duplicate tasks to reach minimum of 4
229
- original_count = len(valid_tasks)
230
- while len(valid_tasks) < 4:
231
- for task in valid_tasks[:original_count]:
232
- if len(valid_tasks) >= 4:
233
- break
234
- # Create a copy with modified ID
235
- duplicated_task = task.copy()
236
- duplicated_task["id"] = (
237
- f"{task['id']}_dup{len(valid_tasks) - original_count + 1}"
238
- )
239
- valid_tasks.append(duplicated_task)
240
-
241
- hud_console.info(f"Duplicated tasks: {original_count} → {len(valid_tasks)}")
242
-
243
- # Check if MCP configs should be converted to remote
244
- sample_mcp_config = valid_tasks[0].get("mcp_config", {})
245
- if isinstance(sample_mcp_config, str):
246
- sample_mcp_config = json.loads(sample_mcp_config)
247
-
248
- # Check config type by looking at all MCP server URLs
249
- config_type = "unknown"
250
- remote_image = None
251
-
252
- # Check all server configs (could be named anything, not just "hud")
253
- for server_config in sample_mcp_config.values():
254
- if isinstance(server_config, dict) and "url" in server_config:
255
- url = server_config.get("url", "")
256
- if "mcp.hud.so" in url:
257
- config_type = "remote"
258
- # Extract image from Mcp-Image header if present
259
- headers = server_config.get("headers", {})
260
- found_image = headers.get("Mcp-Image", "")
261
- if found_image:
262
- remote_image = found_image
263
- break
264
- else:
265
- # Any non-mcp.hud.so URL means local config
266
- config_type = "local"
267
-
268
- if config_type == "remote" and remote_image:
269
- hud_console.info(f"Tasks already use remote MCP configs with image: {remote_image}")
270
-
271
- if config_type == "local":
272
- convert_to_remote = hud_console.select(
273
- "Tasks use local MCP configs. Convert to remote configs for training?",
274
- ["Yes, convert to remote (requires public image)", "No, keep local configs"],
275
- )
276
-
277
- if convert_to_remote == "Yes, convert to remote (requires public image)":
278
- # Get the image name from lock file
279
- from hud.cli.rl.utils import get_image_from_lock
280
-
281
- image = get_image_from_lock()
282
-
283
- if not image:
284
- hud_console.error("No image found in hud.lock.yaml")
285
- hud_console.hint("Run 'hud build' first")
286
- raise typer.Exit(1)
287
-
288
- # Check if image contains registry prefix (indicates it's pushed)
289
- if "/" not in image or image.startswith("local/"):
290
- # Clean up image name for display (remove SHA if present)
291
- display_image = image.split("@")[0] if "@" in image else image
292
- hud_console.warning(f"Image '{display_image}' appears to be local only")
293
- push_image = hud_console.select(
294
- "Would you like to push the image to make it publicly available?",
295
- ["Yes, push image", "No, cancel"],
296
- )
297
-
298
- if push_image == "Yes, push image":
299
- hud_console.info("Running 'hud push' to publish image...")
300
- # Import here to avoid circular imports
301
- from hud.cli.push import push_command
302
-
303
- # Run push command (it's synchronous)
304
- push_command(directory=".", yes=True)
305
- hud_console.success("Image pushed successfully")
306
-
307
- # Re-read the image name as it may have changed
308
- image = get_image_from_lock()
309
- else:
310
- hud_console.info("Keeping local MCP configs")
311
- convert_to_remote = None
312
-
313
- if convert_to_remote and image:
314
- # Convert all task configs to remote
315
- hud_console.info(f"Converting MCP configs to use remote image: {image}")
316
-
317
- for task in valid_tasks:
318
- # Create remote MCP config
319
- remote_config = {
320
- "hud": {
321
- "url": "https://mcp.hud.so/v3/mcp",
322
- "headers": {
323
- "Authorization": "Bearer $HUD_API_KEY",
324
- "Mcp-Image": image,
325
- },
326
- }
327
- }
328
- task["mcp_config"] = remote_config
329
-
330
- hud_console.success("✓ Converted all tasks to use remote MCP configs")
331
-
332
- # Convert to HuggingFace format
333
- dataset_dict = {
334
- "id": [],
335
- "prompt": [],
336
- "mcp_config": [],
337
- "setup_tool": [],
338
- "evaluate_tool": [],
339
- "metadata": [],
340
- }
341
-
342
- for task in valid_tasks:
343
- dataset_dict["id"].append(task["id"])
344
- dataset_dict["prompt"].append(task["prompt"])
345
- dataset_dict["mcp_config"].append(json.dumps(task["mcp_config"]))
346
- dataset_dict["setup_tool"].append(json.dumps(task.get("setup_tool", {})))
347
- dataset_dict["evaluate_tool"].append(json.dumps(task["evaluate_tool"]))
348
- dataset_dict["metadata"].append(json.dumps(task.get("metadata", {})))
349
-
350
- # Push to HuggingFace Hub if requested
351
- if push:
352
- try:
353
- from datasets import Dataset
354
- except ImportError as e:
355
- hud_console.error("datasets library not installed")
356
- hud_console.info("Install with: pip install datasets")
357
- raise typer.Exit(1) from e
358
-
359
- hud_console.info(f"Creating HuggingFace dataset: {name}")
360
- dataset = Dataset.from_dict(dataset_dict)
361
-
362
- # Set up HF token
363
- if token:
364
- import os
365
-
366
- os.environ["HF_TOKEN"] = token
367
-
368
- hud_console.info(f"Pushing to Hub (private={private})...")
369
- try:
370
- if name is None:
371
- raise ValueError("Dataset name is required")
372
- dataset.push_to_hub(name, private=private)
373
- hud_console.success(f"Dataset published: https://huggingface.co/datasets/{name}")
374
- except Exception as e:
375
- hud_console.error(f"Failed to push to Hub: {e}")
376
- hud_console.hint("Make sure you're logged in: huggingface-cli login")
377
- raise typer.Exit(1) from e
378
- else:
379
- # Save locally
380
- if name is None:
381
- raise ValueError("Dataset name is required")
382
- output_file = Path(f"{name.replace('/', '_')}_dataset.json")
383
- with open(output_file, "w") as f:
384
- json.dump(dataset_dict, f, indent=2)
385
- hud_console.success(f"Dataset saved locally: {output_file}")
386
-
387
- # Update hud.lock.yaml if requested
388
- if update_lock:
389
- update_lock_file(name, len(valid_tasks))
390
-
391
-
392
- def update_lock_file(dataset_name: str, task_count: int) -> None:
393
- """Update hud.lock.yaml with primary dataset reference."""
394
- # Load existing lock file or create new
395
- lock_data = read_lock_file()
396
-
397
- # Update dataset info
398
- lock_data["primary_dataset"] = {
399
- "name": dataset_name,
400
- "task_count": task_count,
401
- "updated_at": datetime.now().isoformat(),
402
- }
403
-
404
- # Write back
405
- if write_lock_file(lock_data):
406
- hud_console.success(f"Updated hud.lock.yaml with dataset: {dataset_name}")
hud/cli/rl/README.md DELETED
@@ -1,243 +0,0 @@
1
- # HUD RL Commands
2
-
3
- This module provides reinforcement learning commands for training agents on HUD environments using the `hud-vf-gym` adapter and verifiers framework.
4
-
5
- ## Configuration
6
-
7
- API keys can be configured in two ways:
8
-
9
- 1. **Environment Variables**:
10
- ```bash
11
- export HUD_API_KEY="your-hud-api-key"
12
- export WANDB_API_KEY="your-wandb-api-key"
13
- export PRIME_API_KEY="your-prime-api-key"
14
- ```
15
-
16
- 2. **`.env` File** (recommended):
17
- Create a `.env` file in your project root:
18
- ```env
19
- HUD_API_KEY=your-hud-api-key
20
- WANDB_API_KEY=your-wandb-api-key
21
- PRIME_API_KEY=your-prime-api-key
22
- ```
23
-
24
- HUD automatically loads settings from the `.env` file if present.
25
-
26
- ## Quick Start
27
-
28
- ```bash
29
- # 1. Generate config from environment
30
- hud rl init my-env:latest
31
-
32
- # 2. Create dataset from tasks
33
- hud hf tasks.json --name my-org/my-tasks
34
-
35
- # 3. Start training (interactive mode)
36
- hud rl
37
- ```
38
-
39
- ## Commands
40
-
41
- ### `hud rl init`
42
-
43
- Generates a `hud-vf-gym` configuration file by analyzing a HUD environment:
44
-
45
- ```bash
46
- hud rl init hudpython/hud-text-2048:latest
47
- hud rl init my-env:latest -o configs/my-env.yaml
48
- hud rl init my-env:latest --force # Overwrite existing
49
- ```
50
-
51
- This command:
52
- - Analyzes the environment's available tools
53
- - Generates appropriate action mappings
54
- - Creates a system prompt with tool descriptions
55
- - Sets up default parser and rubric configurations
56
-
57
- ### `hud hf`
58
-
59
- Converts HUD tasks to HuggingFace dataset format:
60
-
61
- ```bash
62
- hud hf tasks.json --name my-org/my-dataset
63
- hud hf tasks.json --name my-org/private-dataset --private
64
- hud hf tasks.json --name local-dataset --no-push # Local only
65
- ```
66
-
67
- Features:
68
- - Validates task format
69
- - Auto-infers MCP config from `hud.lock.yaml`
70
- - Updates lock file with primary dataset reference
71
- - Supports both single task and task array formats
72
-
73
- ### `hud rl` (main command)
74
-
75
- Runs RL training with automatic setup:
76
-
77
- ```bash
78
- # Interactive mode - prompts for missing components
79
- hud rl
80
-
81
- # Specify options
82
- hud rl --model gpt-4o-mini --dataset my-org/my-tasks
83
- hud rl --config configs/2048.yaml --gpus 4xH100
84
- hud rl --gpus 4xH100 --provider prime
85
- ```
86
-
87
- The command will:
88
- 1. Check for required files (config, dataset)
89
- 2. Offer to generate missing components
90
- 3. Push environment to registry if needed
91
- 4. Start training (local or remote)
92
-
93
- ## Task Format
94
-
95
- Tasks should follow this JSON format:
96
-
97
- ```json
98
- {
99
- "id": "task-001",
100
- "prompt": "Complete the task description",
101
- "mcp_config": {
102
- "hud": {
103
- "url": "https://mcp.hud.so/v3/mcp",
104
- "headers": {
105
- "Authorization": "Bearer $HUD_API_KEY",
106
- "Mcp-Image": "your-org/your-env:latest"
107
- }
108
- }
109
- },
110
- "setup_tool": {
111
- "name": "setup",
112
- "arguments": {
113
- "name": "function_name",
114
- "param": "value"
115
- }
116
- },
117
- "evaluate_tool": {
118
- "name": "evaluate",
119
- "arguments": {
120
- "name": "evaluator_name",
121
- "expected": "value"
122
- }
123
- },
124
- "metadata": {
125
- "difficulty": "easy",
126
- "category": "task_type"
127
- }
128
- }
129
- ```
130
-
131
- ## Configuration Format
132
-
133
- The generated YAML configs follow the `hud-vf-gym` specification:
134
-
135
- ```yaml
136
- job:
137
- name: "RL Training - my-env"
138
- metadata:
139
- environment: "my-env:latest"
140
-
141
- system_prompt: |
142
- You are an AI agent interacting with my-env.
143
-
144
- Available tools:
145
- - tool_name(params): Description
146
- Usage: <tool>tool_name(...)</tool>
147
-
148
- parser:
149
- use_thinking: true
150
- xml_weight: 0.6
151
- action_weight: 0.4
152
-
153
- action_mappings:
154
- tool_name:
155
- _tool: "mcp_tool_name"
156
- _parser:
157
- positional: ["param1", "param2"]
158
- param1:
159
- from_arg: "param1"
160
-
161
- rubric:
162
- weights:
163
- task_completion: 0.8
164
- tool_execution: 0.1
165
- format_compliance: 0.1
166
- ```
167
-
168
- ## Lock File Integration
169
-
170
- The commands integrate with `hud.lock.yaml`:
171
-
172
- ```yaml
173
- image: "my-org/my-env:latest"
174
- primary_dataset:
175
- name: "my-org/my-tasks"
176
- task_count: 50
177
- updated_at: "2024-01-01T00:00:00"
178
- ```
179
-
180
- This allows:
181
- - Automatic dataset discovery for `hud rl`
182
- - MCP config inference for tasks
183
- - Environment image tracking
184
-
185
- ## Remote Training
186
-
187
- The `hud rl` command fully automates remote training on GPU instances:
188
-
189
- 1. **Automatic Pod Creation**: Provisions GPU instances via Prime Intellect API
190
- 2. **Environment Setup**: Installs all required dependencies automatically
191
- 3. **Training Execution**: Runs distributed training with vLLM inference server
192
- 4. **Live Monitoring**: Streams training logs with WANDB integration
193
-
194
- ### What Happens Automatically
195
-
196
- When you run `hud rl`, the system will:
197
-
198
- 1. **Create GPU Pod**:
199
- - Selects lowest-cost provider (typically datacrunch)
200
- - Allocates specified GPUs (e.g., 2xA100 for GRPO training)
201
- - Configures with PyTorch CUDA image
202
- - Polls until SSH is available (5-20 minutes)
203
-
204
- 2. **Transfer Files**:
205
- - Copies your config YAML to the pod
206
- - Creates a custom training script
207
-
208
- 3. **Install Dependencies**:
209
- - Installs `uv` package manager
210
- - Creates Python 3.12 virtual environment
211
- - Installs `hud-vf-gym` via Prime registry
212
- - Installs `verifiers[train]` for GRPO training
213
- - Installs `flash-attn` for efficient attention
214
-
215
- 4. **Setup Training**:
216
- - Exports WANDB_API_KEY and HUD_API_KEY
217
- - Starts vLLM inference server on GPU 0 via tmux
218
- - Runs GRPO training on GPU 1
219
- - Logs metrics to Weights & Biases
220
-
221
- ### Required API Keys
222
-
223
- Ensure these are set in your `.env` file or environment:
224
- - `HUD_API_KEY`: For HUD telemetry and MCP connections
225
- - `WANDB_API_KEY`: For training metrics and logging
226
- - `PRIME_API_KEY`: For pod provisioning
227
-
228
- ### SSH Key Configuration
229
-
230
- Before using Prime pods:
231
- 1. Generate SSH keys at: https://app.primeintellect.ai/dashboard/profile
232
- 2. Download and save as: `~/.ssh/prime_key.pem`
233
- 3. Set permissions: `chmod 400 ~/.ssh/prime_key.pem`
234
- 4. Configure Prime CLI: `prime config set-ssh-key-path ~/.ssh/prime_key.pem`
235
-
236
-
237
- ## Implementation Notes
238
-
239
- The RL commands are built on top of:
240
- - `hud-vf-gym`: Generic adapter for HUD environments
241
- - `verifiers`: RL training framework
242
- - HuggingFace datasets: Task storage and distribution
243
- - Prime Intellect infrastructure: GPU provisioning (planned)