hud-python 0.4.28__py3-none-any.whl → 0.4.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +2 -1
- hud/agents/base.py +73 -45
- hud/agents/claude.py +8 -4
- hud/agents/openai_chat_generic.py +65 -40
- hud/agents/tests/test_base.py +0 -4
- hud/agents/tests/test_openai.py +1 -1
- hud/cli/__init__.py +182 -52
- hud/cli/dev.py +8 -9
- hud/cli/eval.py +317 -119
- hud/cli/flows/__init__.py +0 -0
- hud/cli/flows/tasks.py +0 -0
- hud/cli/get.py +160 -0
- hud/cli/rl/__init__.py +563 -71
- hud/cli/rl/config.py +94 -0
- hud/cli/rl/display.py +133 -0
- hud/cli/rl/gpu.py +63 -0
- hud/cli/rl/gpu_utils.py +318 -0
- hud/cli/rl/presets.py +96 -0
- hud/cli/rl/remote_runner.py +348 -0
- hud/cli/rl/rl_api.py +150 -0
- hud/cli/rl/vllm.py +177 -0
- hud/cli/tests/test_analyze_metadata.py +0 -1
- hud/cli/utils/tasks.py +26 -0
- hud/clients/base.py +21 -23
- hud/clients/mcp_use.py +36 -44
- hud/clients/tests/test_mcp_use_retry.py +10 -10
- hud/datasets/__init__.py +4 -3
- hud/datasets/{execution/parallel.py → parallel.py} +1 -1
- hud/datasets/{execution/runner.py → runner.py} +1 -1
- hud/datasets/utils.py +1 -1
- hud/native/tests/test_native_init.py +1 -1
- hud/otel/config.py +1 -1
- hud/otel/instrumentation.py +35 -0
- hud/rl/README.md +31 -0
- hud/rl/__init__.py +1 -0
- hud/rl/actor.py +174 -0
- hud/rl/buffer.py +371 -0
- hud/rl/chat_template.jinja +101 -0
- hud/rl/config.py +184 -0
- hud/rl/distributed.py +95 -0
- hud/rl/learner.py +586 -0
- hud/rl/tests/__init__.py +1 -0
- hud/rl/tests/test_learner.py +171 -0
- hud/rl/train.py +354 -0
- hud/rl/types.py +101 -0
- hud/rl/utils/start_vllm_server.sh +30 -0
- hud/rl/utils.py +524 -0
- hud/rl/vllm_adapter.py +125 -0
- hud/settings.py +6 -0
- hud/telemetry/__init__.py +2 -1
- hud/telemetry/job.py +46 -3
- hud/telemetry/tests/test_trace.py +3 -3
- hud/telemetry/trace.py +85 -13
- hud/tools/tests/test_computer.py +3 -3
- hud/tools/tests/test_computer_actions.py +1 -1
- hud/types.py +123 -2
- hud/utils/group_eval.py +223 -0
- hud/utils/hud_console.py +113 -13
- hud/utils/tasks.py +119 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/METADATA +20 -2
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/RECORD +66 -46
- hud/cli/hf.py +0 -406
- hud/cli/rl/README.md +0 -243
- hud/cli/rl/init.py +0 -370
- hud/cli/rl/pod.py +0 -501
- hud/cli/rl/ssh.py +0 -322
- hud/cli/rl/train.py +0 -562
- hud/cli/rl/utils.py +0 -165
- hud/datasets/execution/__init__.py +0 -13
- hud/datasets/task.py +0 -116
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/WHEEL +0 -0
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/licenses/LICENSE +0 -0
hud/cli/hf.py
DELETED
|
@@ -1,406 +0,0 @@
|
|
|
1
|
-
"""HuggingFace dataset conversion command for HUD tasks."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
from datetime import datetime
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
|
|
9
|
-
import typer
|
|
10
|
-
|
|
11
|
-
from hud.cli.rl.utils import get_mcp_config_from_lock, read_lock_file, write_lock_file
|
|
12
|
-
from hud.utils.hud_console import HUDConsole
|
|
13
|
-
|
|
14
|
-
hud_console = HUDConsole()
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def hf_command(
|
|
18
|
-
tasks_file: Path | None = None,
|
|
19
|
-
name: str | None = None,
|
|
20
|
-
push: bool = True,
|
|
21
|
-
private: bool = False,
|
|
22
|
-
update_lock: bool = True,
|
|
23
|
-
token: str | None = None,
|
|
24
|
-
) -> None:
|
|
25
|
-
"""📊 Convert tasks to HuggingFace dataset format.
|
|
26
|
-
|
|
27
|
-
Automatically detects task files if not specified.
|
|
28
|
-
Suggests dataset name based on environment if not provided.
|
|
29
|
-
Converts a JSON file containing HUD tasks into a HuggingFace dataset
|
|
30
|
-
and optionally pushes it to the Hub. Also updates hud.lock.yaml with
|
|
31
|
-
the primary dataset reference.
|
|
32
|
-
|
|
33
|
-
Examples:
|
|
34
|
-
hud hf # Auto-detect tasks and suggest name
|
|
35
|
-
hud hf tasks.json # Use specific file, suggest name
|
|
36
|
-
hud hf --name my-org/my-tasks # Auto-detect tasks, use name
|
|
37
|
-
hud hf tasks.json --name hud-evals/web-tasks --private
|
|
38
|
-
hud hf tasks.json --name local-dataset --no-push
|
|
39
|
-
"""
|
|
40
|
-
hud_console.header("HuggingFace Dataset Converter", icon="📊")
|
|
41
|
-
|
|
42
|
-
# Auto-detect task file if not provided
|
|
43
|
-
if tasks_file is None:
|
|
44
|
-
hud_console.info("Looking for task files...")
|
|
45
|
-
|
|
46
|
-
# Common task file patterns
|
|
47
|
-
patterns = [
|
|
48
|
-
"tasks.json",
|
|
49
|
-
"task.json",
|
|
50
|
-
"*_tasks.json",
|
|
51
|
-
"eval*.json",
|
|
52
|
-
"evaluation*.json",
|
|
53
|
-
]
|
|
54
|
-
|
|
55
|
-
json_files = []
|
|
56
|
-
for pattern in patterns:
|
|
57
|
-
json_files.extend(Path(".").glob(pattern))
|
|
58
|
-
|
|
59
|
-
# Remove duplicates and sort
|
|
60
|
-
json_files = sorted(set(json_files))
|
|
61
|
-
|
|
62
|
-
if not json_files:
|
|
63
|
-
hud_console.error("No task files found in current directory")
|
|
64
|
-
hud_console.info("Create a task JSON file (e.g., tasks.json) or specify the file path")
|
|
65
|
-
raise typer.Exit(1)
|
|
66
|
-
elif len(json_files) == 1:
|
|
67
|
-
tasks_file = json_files[0]
|
|
68
|
-
hud_console.info(f"Found task file: {tasks_file}")
|
|
69
|
-
else:
|
|
70
|
-
# Multiple files found, let user choose
|
|
71
|
-
hud_console.info("Multiple task files found:")
|
|
72
|
-
file_choice = hud_console.select(
|
|
73
|
-
"Select a task file to convert:",
|
|
74
|
-
choices=[str(f) for f in json_files],
|
|
75
|
-
)
|
|
76
|
-
tasks_file = Path(file_choice)
|
|
77
|
-
hud_console.success(f"Selected: {tasks_file}")
|
|
78
|
-
|
|
79
|
-
# Validate inputs
|
|
80
|
-
if tasks_file and not tasks_file.exists():
|
|
81
|
-
hud_console.error(f"Tasks file not found: {tasks_file}")
|
|
82
|
-
raise typer.Exit(1)
|
|
83
|
-
|
|
84
|
-
# Suggest dataset name if not provided
|
|
85
|
-
if name is None:
|
|
86
|
-
hud_console.info("Generating dataset name suggestion...")
|
|
87
|
-
|
|
88
|
-
# Try to get HF username from environment or git config
|
|
89
|
-
hf_username = None
|
|
90
|
-
try:
|
|
91
|
-
# Try HF token first
|
|
92
|
-
from huggingface_hub import HfApi
|
|
93
|
-
|
|
94
|
-
api = HfApi(token=token)
|
|
95
|
-
user_info = api.whoami()
|
|
96
|
-
hf_username = user_info.get("name", None)
|
|
97
|
-
except Exception:
|
|
98
|
-
# Try git config as fallback
|
|
99
|
-
try:
|
|
100
|
-
import subprocess
|
|
101
|
-
|
|
102
|
-
result = subprocess.run(
|
|
103
|
-
["git", "config", "user.name"], # noqa: S607
|
|
104
|
-
capture_output=True,
|
|
105
|
-
text=True,
|
|
106
|
-
)
|
|
107
|
-
if result.returncode == 0 and result.stdout.strip():
|
|
108
|
-
hf_username = result.stdout.strip().lower().replace(" ", "-")
|
|
109
|
-
except Exception:
|
|
110
|
-
hud_console.warning("Failed to get HF username from git config")
|
|
111
|
-
|
|
112
|
-
# Get environment name from current directory or lock file
|
|
113
|
-
env_name = Path.cwd().name
|
|
114
|
-
|
|
115
|
-
# Try to get a better name from lock file
|
|
116
|
-
lock_path = Path("hud.lock.yaml")
|
|
117
|
-
if lock_path.exists():
|
|
118
|
-
try:
|
|
119
|
-
with open(lock_path) as f:
|
|
120
|
-
import yaml
|
|
121
|
-
|
|
122
|
-
lock_data = yaml.safe_load(f)
|
|
123
|
-
if "image" in lock_data:
|
|
124
|
-
# Extract name from image like "test:dev@sha256:..."
|
|
125
|
-
image_name = lock_data["image"].split(":")[0].split("/")[-1]
|
|
126
|
-
if image_name and image_name != "local":
|
|
127
|
-
env_name = image_name
|
|
128
|
-
except Exception as e:
|
|
129
|
-
hud_console.warning(f"Failed to get HF username from lock file: {e}")
|
|
130
|
-
|
|
131
|
-
# Generate suggestions
|
|
132
|
-
suggestions = []
|
|
133
|
-
if hf_username:
|
|
134
|
-
suggestions.append(f"{hf_username}/{env_name}-tasks")
|
|
135
|
-
suggestions.append(f"{hf_username}/{env_name}-dataset")
|
|
136
|
-
suggestions.append(f"my-org/{env_name}-tasks")
|
|
137
|
-
suggestions.append(f"hud-evals/{env_name}-tasks")
|
|
138
|
-
|
|
139
|
-
# Let user choose or enter custom
|
|
140
|
-
hud_console.info("Dataset name suggestions:")
|
|
141
|
-
suggestions.append("Enter custom name...")
|
|
142
|
-
|
|
143
|
-
choice = hud_console.select("Select or enter a dataset name:", choices=suggestions)
|
|
144
|
-
|
|
145
|
-
if choice == "Enter custom name...":
|
|
146
|
-
name = typer.prompt("Enter dataset name (e.g., 'my-org/my-dataset')")
|
|
147
|
-
else:
|
|
148
|
-
name = choice
|
|
149
|
-
|
|
150
|
-
hud_console.success(f"Using dataset name: {name}")
|
|
151
|
-
|
|
152
|
-
# Validate dataset name format
|
|
153
|
-
if push and name and "/" not in name:
|
|
154
|
-
hud_console.error("Dataset name must include organization (e.g., 'my-org/my-dataset')")
|
|
155
|
-
hud_console.info("For local-only datasets, use --no-push")
|
|
156
|
-
raise typer.Exit(1)
|
|
157
|
-
|
|
158
|
-
# Load tasks
|
|
159
|
-
hud_console.info(f"Loading tasks from: {tasks_file}")
|
|
160
|
-
try:
|
|
161
|
-
if tasks_file is None:
|
|
162
|
-
raise ValueError("Tasks file is required")
|
|
163
|
-
with open(tasks_file) as f:
|
|
164
|
-
tasks_data = json.load(f)
|
|
165
|
-
except json.JSONDecodeError as e:
|
|
166
|
-
hud_console.error(f"Invalid JSON file: {e}")
|
|
167
|
-
raise typer.Exit(1) from e
|
|
168
|
-
|
|
169
|
-
# Handle both single task and list of tasks
|
|
170
|
-
if isinstance(tasks_data, dict):
|
|
171
|
-
tasks = [tasks_data]
|
|
172
|
-
hud_console.info("Found 1 task")
|
|
173
|
-
elif isinstance(tasks_data, list):
|
|
174
|
-
tasks = tasks_data
|
|
175
|
-
hud_console.info(f"Found {len(tasks)} tasks")
|
|
176
|
-
else:
|
|
177
|
-
hud_console.error("Tasks file must contain a JSON object or array")
|
|
178
|
-
raise typer.Exit(1)
|
|
179
|
-
|
|
180
|
-
# Validate task format
|
|
181
|
-
valid_tasks = []
|
|
182
|
-
for i, task in enumerate(tasks):
|
|
183
|
-
if not isinstance(task, dict):
|
|
184
|
-
hud_console.warning(f"Skipping task {i}: not a JSON object")
|
|
185
|
-
continue
|
|
186
|
-
|
|
187
|
-
# Required fields
|
|
188
|
-
if "prompt" not in task:
|
|
189
|
-
hud_console.warning(f"Skipping task {i}: missing 'prompt' field")
|
|
190
|
-
continue
|
|
191
|
-
|
|
192
|
-
if "evaluate_tool" not in task:
|
|
193
|
-
hud_console.warning(f"Skipping task {i}: missing 'evaluate_tool' field")
|
|
194
|
-
continue
|
|
195
|
-
|
|
196
|
-
# Add default values
|
|
197
|
-
if "id" not in task:
|
|
198
|
-
task["id"] = f"task-{i:04d}"
|
|
199
|
-
|
|
200
|
-
if "mcp_config" not in task:
|
|
201
|
-
# Try to infer from hud.lock.yaml
|
|
202
|
-
mcp_config = get_mcp_config_from_lock()
|
|
203
|
-
if mcp_config:
|
|
204
|
-
task["mcp_config"] = mcp_config
|
|
205
|
-
else:
|
|
206
|
-
hud_console.warning(f"Task {task['id']}: missing 'mcp_config' field")
|
|
207
|
-
continue
|
|
208
|
-
|
|
209
|
-
valid_tasks.append(task)
|
|
210
|
-
|
|
211
|
-
if not valid_tasks:
|
|
212
|
-
hud_console.error("No valid tasks found")
|
|
213
|
-
raise typer.Exit(1)
|
|
214
|
-
|
|
215
|
-
hud_console.success(f"Validated {len(valid_tasks)} tasks")
|
|
216
|
-
|
|
217
|
-
# Check if dataset is suitable for training
|
|
218
|
-
if len(valid_tasks) < 4:
|
|
219
|
-
hud_console.warning(
|
|
220
|
-
f"Dataset has only {len(valid_tasks)} task(s). RL training typically requires at least 4 tasks." # noqa: E501
|
|
221
|
-
)
|
|
222
|
-
use_for_training = hud_console.select(
|
|
223
|
-
"Will this dataset be used for RL training?",
|
|
224
|
-
["Yes, duplicate tasks to reach 4", "No, keep as is"],
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
if use_for_training == "Yes, duplicate tasks to reach 4":
|
|
228
|
-
# Duplicate tasks to reach minimum of 4
|
|
229
|
-
original_count = len(valid_tasks)
|
|
230
|
-
while len(valid_tasks) < 4:
|
|
231
|
-
for task in valid_tasks[:original_count]:
|
|
232
|
-
if len(valid_tasks) >= 4:
|
|
233
|
-
break
|
|
234
|
-
# Create a copy with modified ID
|
|
235
|
-
duplicated_task = task.copy()
|
|
236
|
-
duplicated_task["id"] = (
|
|
237
|
-
f"{task['id']}_dup{len(valid_tasks) - original_count + 1}"
|
|
238
|
-
)
|
|
239
|
-
valid_tasks.append(duplicated_task)
|
|
240
|
-
|
|
241
|
-
hud_console.info(f"Duplicated tasks: {original_count} → {len(valid_tasks)}")
|
|
242
|
-
|
|
243
|
-
# Check if MCP configs should be converted to remote
|
|
244
|
-
sample_mcp_config = valid_tasks[0].get("mcp_config", {})
|
|
245
|
-
if isinstance(sample_mcp_config, str):
|
|
246
|
-
sample_mcp_config = json.loads(sample_mcp_config)
|
|
247
|
-
|
|
248
|
-
# Check config type by looking at all MCP server URLs
|
|
249
|
-
config_type = "unknown"
|
|
250
|
-
remote_image = None
|
|
251
|
-
|
|
252
|
-
# Check all server configs (could be named anything, not just "hud")
|
|
253
|
-
for server_config in sample_mcp_config.values():
|
|
254
|
-
if isinstance(server_config, dict) and "url" in server_config:
|
|
255
|
-
url = server_config.get("url", "")
|
|
256
|
-
if "mcp.hud.so" in url:
|
|
257
|
-
config_type = "remote"
|
|
258
|
-
# Extract image from Mcp-Image header if present
|
|
259
|
-
headers = server_config.get("headers", {})
|
|
260
|
-
found_image = headers.get("Mcp-Image", "")
|
|
261
|
-
if found_image:
|
|
262
|
-
remote_image = found_image
|
|
263
|
-
break
|
|
264
|
-
else:
|
|
265
|
-
# Any non-mcp.hud.so URL means local config
|
|
266
|
-
config_type = "local"
|
|
267
|
-
|
|
268
|
-
if config_type == "remote" and remote_image:
|
|
269
|
-
hud_console.info(f"Tasks already use remote MCP configs with image: {remote_image}")
|
|
270
|
-
|
|
271
|
-
if config_type == "local":
|
|
272
|
-
convert_to_remote = hud_console.select(
|
|
273
|
-
"Tasks use local MCP configs. Convert to remote configs for training?",
|
|
274
|
-
["Yes, convert to remote (requires public image)", "No, keep local configs"],
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
if convert_to_remote == "Yes, convert to remote (requires public image)":
|
|
278
|
-
# Get the image name from lock file
|
|
279
|
-
from hud.cli.rl.utils import get_image_from_lock
|
|
280
|
-
|
|
281
|
-
image = get_image_from_lock()
|
|
282
|
-
|
|
283
|
-
if not image:
|
|
284
|
-
hud_console.error("No image found in hud.lock.yaml")
|
|
285
|
-
hud_console.hint("Run 'hud build' first")
|
|
286
|
-
raise typer.Exit(1)
|
|
287
|
-
|
|
288
|
-
# Check if image contains registry prefix (indicates it's pushed)
|
|
289
|
-
if "/" not in image or image.startswith("local/"):
|
|
290
|
-
# Clean up image name for display (remove SHA if present)
|
|
291
|
-
display_image = image.split("@")[0] if "@" in image else image
|
|
292
|
-
hud_console.warning(f"Image '{display_image}' appears to be local only")
|
|
293
|
-
push_image = hud_console.select(
|
|
294
|
-
"Would you like to push the image to make it publicly available?",
|
|
295
|
-
["Yes, push image", "No, cancel"],
|
|
296
|
-
)
|
|
297
|
-
|
|
298
|
-
if push_image == "Yes, push image":
|
|
299
|
-
hud_console.info("Running 'hud push' to publish image...")
|
|
300
|
-
# Import here to avoid circular imports
|
|
301
|
-
from hud.cli.push import push_command
|
|
302
|
-
|
|
303
|
-
# Run push command (it's synchronous)
|
|
304
|
-
push_command(directory=".", yes=True)
|
|
305
|
-
hud_console.success("Image pushed successfully")
|
|
306
|
-
|
|
307
|
-
# Re-read the image name as it may have changed
|
|
308
|
-
image = get_image_from_lock()
|
|
309
|
-
else:
|
|
310
|
-
hud_console.info("Keeping local MCP configs")
|
|
311
|
-
convert_to_remote = None
|
|
312
|
-
|
|
313
|
-
if convert_to_remote and image:
|
|
314
|
-
# Convert all task configs to remote
|
|
315
|
-
hud_console.info(f"Converting MCP configs to use remote image: {image}")
|
|
316
|
-
|
|
317
|
-
for task in valid_tasks:
|
|
318
|
-
# Create remote MCP config
|
|
319
|
-
remote_config = {
|
|
320
|
-
"hud": {
|
|
321
|
-
"url": "https://mcp.hud.so/v3/mcp",
|
|
322
|
-
"headers": {
|
|
323
|
-
"Authorization": "Bearer $HUD_API_KEY",
|
|
324
|
-
"Mcp-Image": image,
|
|
325
|
-
},
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
task["mcp_config"] = remote_config
|
|
329
|
-
|
|
330
|
-
hud_console.success("✓ Converted all tasks to use remote MCP configs")
|
|
331
|
-
|
|
332
|
-
# Convert to HuggingFace format
|
|
333
|
-
dataset_dict = {
|
|
334
|
-
"id": [],
|
|
335
|
-
"prompt": [],
|
|
336
|
-
"mcp_config": [],
|
|
337
|
-
"setup_tool": [],
|
|
338
|
-
"evaluate_tool": [],
|
|
339
|
-
"metadata": [],
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
for task in valid_tasks:
|
|
343
|
-
dataset_dict["id"].append(task["id"])
|
|
344
|
-
dataset_dict["prompt"].append(task["prompt"])
|
|
345
|
-
dataset_dict["mcp_config"].append(json.dumps(task["mcp_config"]))
|
|
346
|
-
dataset_dict["setup_tool"].append(json.dumps(task.get("setup_tool", {})))
|
|
347
|
-
dataset_dict["evaluate_tool"].append(json.dumps(task["evaluate_tool"]))
|
|
348
|
-
dataset_dict["metadata"].append(json.dumps(task.get("metadata", {})))
|
|
349
|
-
|
|
350
|
-
# Push to HuggingFace Hub if requested
|
|
351
|
-
if push:
|
|
352
|
-
try:
|
|
353
|
-
from datasets import Dataset
|
|
354
|
-
except ImportError as e:
|
|
355
|
-
hud_console.error("datasets library not installed")
|
|
356
|
-
hud_console.info("Install with: pip install datasets")
|
|
357
|
-
raise typer.Exit(1) from e
|
|
358
|
-
|
|
359
|
-
hud_console.info(f"Creating HuggingFace dataset: {name}")
|
|
360
|
-
dataset = Dataset.from_dict(dataset_dict)
|
|
361
|
-
|
|
362
|
-
# Set up HF token
|
|
363
|
-
if token:
|
|
364
|
-
import os
|
|
365
|
-
|
|
366
|
-
os.environ["HF_TOKEN"] = token
|
|
367
|
-
|
|
368
|
-
hud_console.info(f"Pushing to Hub (private={private})...")
|
|
369
|
-
try:
|
|
370
|
-
if name is None:
|
|
371
|
-
raise ValueError("Dataset name is required")
|
|
372
|
-
dataset.push_to_hub(name, private=private)
|
|
373
|
-
hud_console.success(f"Dataset published: https://huggingface.co/datasets/{name}")
|
|
374
|
-
except Exception as e:
|
|
375
|
-
hud_console.error(f"Failed to push to Hub: {e}")
|
|
376
|
-
hud_console.hint("Make sure you're logged in: huggingface-cli login")
|
|
377
|
-
raise typer.Exit(1) from e
|
|
378
|
-
else:
|
|
379
|
-
# Save locally
|
|
380
|
-
if name is None:
|
|
381
|
-
raise ValueError("Dataset name is required")
|
|
382
|
-
output_file = Path(f"{name.replace('/', '_')}_dataset.json")
|
|
383
|
-
with open(output_file, "w") as f:
|
|
384
|
-
json.dump(dataset_dict, f, indent=2)
|
|
385
|
-
hud_console.success(f"Dataset saved locally: {output_file}")
|
|
386
|
-
|
|
387
|
-
# Update hud.lock.yaml if requested
|
|
388
|
-
if update_lock:
|
|
389
|
-
update_lock_file(name, len(valid_tasks))
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
def update_lock_file(dataset_name: str, task_count: int) -> None:
|
|
393
|
-
"""Update hud.lock.yaml with primary dataset reference."""
|
|
394
|
-
# Load existing lock file or create new
|
|
395
|
-
lock_data = read_lock_file()
|
|
396
|
-
|
|
397
|
-
# Update dataset info
|
|
398
|
-
lock_data["primary_dataset"] = {
|
|
399
|
-
"name": dataset_name,
|
|
400
|
-
"task_count": task_count,
|
|
401
|
-
"updated_at": datetime.now().isoformat(),
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
# Write back
|
|
405
|
-
if write_lock_file(lock_data):
|
|
406
|
-
hud_console.success(f"Updated hud.lock.yaml with dataset: {dataset_name}")
|
hud/cli/rl/README.md
DELETED
|
@@ -1,243 +0,0 @@
|
|
|
1
|
-
# HUD RL Commands
|
|
2
|
-
|
|
3
|
-
This module provides reinforcement learning commands for training agents on HUD environments using the `hud-vf-gym` adapter and verifiers framework.
|
|
4
|
-
|
|
5
|
-
## Configuration
|
|
6
|
-
|
|
7
|
-
API keys can be configured in two ways:
|
|
8
|
-
|
|
9
|
-
1. **Environment Variables**:
|
|
10
|
-
```bash
|
|
11
|
-
export HUD_API_KEY="your-hud-api-key"
|
|
12
|
-
export WANDB_API_KEY="your-wandb-api-key"
|
|
13
|
-
export PRIME_API_KEY="your-prime-api-key"
|
|
14
|
-
```
|
|
15
|
-
|
|
16
|
-
2. **`.env` File** (recommended):
|
|
17
|
-
Create a `.env` file in your project root:
|
|
18
|
-
```env
|
|
19
|
-
HUD_API_KEY=your-hud-api-key
|
|
20
|
-
WANDB_API_KEY=your-wandb-api-key
|
|
21
|
-
PRIME_API_KEY=your-prime-api-key
|
|
22
|
-
```
|
|
23
|
-
|
|
24
|
-
HUD automatically loads settings from the `.env` file if present.
|
|
25
|
-
|
|
26
|
-
## Quick Start
|
|
27
|
-
|
|
28
|
-
```bash
|
|
29
|
-
# 1. Generate config from environment
|
|
30
|
-
hud rl init my-env:latest
|
|
31
|
-
|
|
32
|
-
# 2. Create dataset from tasks
|
|
33
|
-
hud hf tasks.json --name my-org/my-tasks
|
|
34
|
-
|
|
35
|
-
# 3. Start training (interactive mode)
|
|
36
|
-
hud rl
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
## Commands
|
|
40
|
-
|
|
41
|
-
### `hud rl init`
|
|
42
|
-
|
|
43
|
-
Generates a `hud-vf-gym` configuration file by analyzing a HUD environment:
|
|
44
|
-
|
|
45
|
-
```bash
|
|
46
|
-
hud rl init hudpython/hud-text-2048:latest
|
|
47
|
-
hud rl init my-env:latest -o configs/my-env.yaml
|
|
48
|
-
hud rl init my-env:latest --force # Overwrite existing
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
This command:
|
|
52
|
-
- Analyzes the environment's available tools
|
|
53
|
-
- Generates appropriate action mappings
|
|
54
|
-
- Creates a system prompt with tool descriptions
|
|
55
|
-
- Sets up default parser and rubric configurations
|
|
56
|
-
|
|
57
|
-
### `hud hf`
|
|
58
|
-
|
|
59
|
-
Converts HUD tasks to HuggingFace dataset format:
|
|
60
|
-
|
|
61
|
-
```bash
|
|
62
|
-
hud hf tasks.json --name my-org/my-dataset
|
|
63
|
-
hud hf tasks.json --name my-org/private-dataset --private
|
|
64
|
-
hud hf tasks.json --name local-dataset --no-push # Local only
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
Features:
|
|
68
|
-
- Validates task format
|
|
69
|
-
- Auto-infers MCP config from `hud.lock.yaml`
|
|
70
|
-
- Updates lock file with primary dataset reference
|
|
71
|
-
- Supports both single task and task array formats
|
|
72
|
-
|
|
73
|
-
### `hud rl` (main command)
|
|
74
|
-
|
|
75
|
-
Runs RL training with automatic setup:
|
|
76
|
-
|
|
77
|
-
```bash
|
|
78
|
-
# Interactive mode - prompts for missing components
|
|
79
|
-
hud rl
|
|
80
|
-
|
|
81
|
-
# Specify options
|
|
82
|
-
hud rl --model gpt-4o-mini --dataset my-org/my-tasks
|
|
83
|
-
hud rl --config configs/2048.yaml --gpus 4xH100
|
|
84
|
-
hud rl --gpus 4xH100 --provider prime
|
|
85
|
-
```
|
|
86
|
-
|
|
87
|
-
The command will:
|
|
88
|
-
1. Check for required files (config, dataset)
|
|
89
|
-
2. Offer to generate missing components
|
|
90
|
-
3. Push environment to registry if needed
|
|
91
|
-
4. Start training (local or remote)
|
|
92
|
-
|
|
93
|
-
## Task Format
|
|
94
|
-
|
|
95
|
-
Tasks should follow this JSON format:
|
|
96
|
-
|
|
97
|
-
```json
|
|
98
|
-
{
|
|
99
|
-
"id": "task-001",
|
|
100
|
-
"prompt": "Complete the task description",
|
|
101
|
-
"mcp_config": {
|
|
102
|
-
"hud": {
|
|
103
|
-
"url": "https://mcp.hud.so/v3/mcp",
|
|
104
|
-
"headers": {
|
|
105
|
-
"Authorization": "Bearer $HUD_API_KEY",
|
|
106
|
-
"Mcp-Image": "your-org/your-env:latest"
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
},
|
|
110
|
-
"setup_tool": {
|
|
111
|
-
"name": "setup",
|
|
112
|
-
"arguments": {
|
|
113
|
-
"name": "function_name",
|
|
114
|
-
"param": "value"
|
|
115
|
-
}
|
|
116
|
-
},
|
|
117
|
-
"evaluate_tool": {
|
|
118
|
-
"name": "evaluate",
|
|
119
|
-
"arguments": {
|
|
120
|
-
"name": "evaluator_name",
|
|
121
|
-
"expected": "value"
|
|
122
|
-
}
|
|
123
|
-
},
|
|
124
|
-
"metadata": {
|
|
125
|
-
"difficulty": "easy",
|
|
126
|
-
"category": "task_type"
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
## Configuration Format
|
|
132
|
-
|
|
133
|
-
The generated YAML configs follow the `hud-vf-gym` specification:
|
|
134
|
-
|
|
135
|
-
```yaml
|
|
136
|
-
job:
|
|
137
|
-
name: "RL Training - my-env"
|
|
138
|
-
metadata:
|
|
139
|
-
environment: "my-env:latest"
|
|
140
|
-
|
|
141
|
-
system_prompt: |
|
|
142
|
-
You are an AI agent interacting with my-env.
|
|
143
|
-
|
|
144
|
-
Available tools:
|
|
145
|
-
- tool_name(params): Description
|
|
146
|
-
Usage: <tool>tool_name(...)</tool>
|
|
147
|
-
|
|
148
|
-
parser:
|
|
149
|
-
use_thinking: true
|
|
150
|
-
xml_weight: 0.6
|
|
151
|
-
action_weight: 0.4
|
|
152
|
-
|
|
153
|
-
action_mappings:
|
|
154
|
-
tool_name:
|
|
155
|
-
_tool: "mcp_tool_name"
|
|
156
|
-
_parser:
|
|
157
|
-
positional: ["param1", "param2"]
|
|
158
|
-
param1:
|
|
159
|
-
from_arg: "param1"
|
|
160
|
-
|
|
161
|
-
rubric:
|
|
162
|
-
weights:
|
|
163
|
-
task_completion: 0.8
|
|
164
|
-
tool_execution: 0.1
|
|
165
|
-
format_compliance: 0.1
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
## Lock File Integration
|
|
169
|
-
|
|
170
|
-
The commands integrate with `hud.lock.yaml`:
|
|
171
|
-
|
|
172
|
-
```yaml
|
|
173
|
-
image: "my-org/my-env:latest"
|
|
174
|
-
primary_dataset:
|
|
175
|
-
name: "my-org/my-tasks"
|
|
176
|
-
task_count: 50
|
|
177
|
-
updated_at: "2024-01-01T00:00:00"
|
|
178
|
-
```
|
|
179
|
-
|
|
180
|
-
This allows:
|
|
181
|
-
- Automatic dataset discovery for `hud rl`
|
|
182
|
-
- MCP config inference for tasks
|
|
183
|
-
- Environment image tracking
|
|
184
|
-
|
|
185
|
-
## Remote Training
|
|
186
|
-
|
|
187
|
-
The `hud rl` command fully automates remote training on GPU instances:
|
|
188
|
-
|
|
189
|
-
1. **Automatic Pod Creation**: Provisions GPU instances via Prime Intellect API
|
|
190
|
-
2. **Environment Setup**: Installs all required dependencies automatically
|
|
191
|
-
3. **Training Execution**: Runs distributed training with vLLM inference server
|
|
192
|
-
4. **Live Monitoring**: Streams training logs with WANDB integration
|
|
193
|
-
|
|
194
|
-
### What Happens Automatically
|
|
195
|
-
|
|
196
|
-
When you run `hud rl`, the system will:
|
|
197
|
-
|
|
198
|
-
1. **Create GPU Pod**:
|
|
199
|
-
- Selects lowest-cost provider (typically datacrunch)
|
|
200
|
-
- Allocates specified GPUs (e.g., 2xA100 for GRPO training)
|
|
201
|
-
- Configures with PyTorch CUDA image
|
|
202
|
-
- Polls until SSH is available (5-20 minutes)
|
|
203
|
-
|
|
204
|
-
2. **Transfer Files**:
|
|
205
|
-
- Copies your config YAML to the pod
|
|
206
|
-
- Creates a custom training script
|
|
207
|
-
|
|
208
|
-
3. **Install Dependencies**:
|
|
209
|
-
- Installs `uv` package manager
|
|
210
|
-
- Creates Python 3.12 virtual environment
|
|
211
|
-
- Installs `hud-vf-gym` via Prime registry
|
|
212
|
-
- Installs `verifiers[train]` for GRPO training
|
|
213
|
-
- Installs `flash-attn` for efficient attention
|
|
214
|
-
|
|
215
|
-
4. **Setup Training**:
|
|
216
|
-
- Exports WANDB_API_KEY and HUD_API_KEY
|
|
217
|
-
- Starts vLLM inference server on GPU 0 via tmux
|
|
218
|
-
- Runs GRPO training on GPU 1
|
|
219
|
-
- Logs metrics to Weights & Biases
|
|
220
|
-
|
|
221
|
-
### Required API Keys
|
|
222
|
-
|
|
223
|
-
Ensure these are set in your `.env` file or environment:
|
|
224
|
-
- `HUD_API_KEY`: For HUD telemetry and MCP connections
|
|
225
|
-
- `WANDB_API_KEY`: For training metrics and logging
|
|
226
|
-
- `PRIME_API_KEY`: For pod provisioning
|
|
227
|
-
|
|
228
|
-
### SSH Key Configuration
|
|
229
|
-
|
|
230
|
-
Before using Prime pods:
|
|
231
|
-
1. Generate SSH keys at: https://app.primeintellect.ai/dashboard/profile
|
|
232
|
-
2. Download and save as: `~/.ssh/prime_key.pem`
|
|
233
|
-
3. Set permissions: `chmod 400 ~/.ssh/prime_key.pem`
|
|
234
|
-
4. Configure Prime CLI: `prime config set-ssh-key-path ~/.ssh/prime_key.pem`
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
## Implementation Notes
|
|
238
|
-
|
|
239
|
-
The RL commands are built on top of:
|
|
240
|
-
- `hud-vf-gym`: Generic adapter for HUD environments
|
|
241
|
-
- `verifiers`: RL training framework
|
|
242
|
-
- HuggingFace datasets: Task storage and distribution
|
|
243
|
-
- Prime Intellect infrastructure: GPU provisioning (planned)
|