hud-python 0.4.27__py3-none-any.whl → 0.4.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +2 -1
- hud/agents/base.py +73 -45
- hud/agents/claude.py +8 -4
- hud/agents/openai_chat_generic.py +65 -40
- hud/agents/tests/test_base.py +0 -4
- hud/agents/tests/test_openai.py +1 -1
- hud/cli/__init__.py +182 -52
- hud/cli/dev.py +8 -9
- hud/cli/eval.py +317 -119
- hud/cli/flows/__init__.py +0 -0
- hud/cli/flows/tasks.py +0 -0
- hud/cli/get.py +160 -0
- hud/cli/rl/__init__.py +563 -71
- hud/cli/rl/config.py +94 -0
- hud/cli/rl/display.py +133 -0
- hud/cli/rl/gpu.py +63 -0
- hud/cli/rl/gpu_utils.py +318 -0
- hud/cli/rl/presets.py +96 -0
- hud/cli/rl/remote_runner.py +348 -0
- hud/cli/rl/rl_api.py +150 -0
- hud/cli/rl/vllm.py +177 -0
- hud/cli/tests/test_analyze_metadata.py +0 -1
- hud/cli/utils/tasks.py +26 -0
- hud/clients/base.py +21 -23
- hud/clients/mcp_use.py +36 -44
- hud/clients/tests/test_mcp_use_retry.py +10 -10
- hud/datasets/__init__.py +4 -3
- hud/datasets/{execution/parallel.py → parallel.py} +1 -1
- hud/datasets/{execution/runner.py → runner.py} +1 -1
- hud/datasets/utils.py +1 -1
- hud/native/tests/test_native_init.py +1 -1
- hud/otel/config.py +1 -1
- hud/otel/instrumentation.py +35 -0
- hud/rl/README.md +31 -0
- hud/rl/__init__.py +1 -0
- hud/rl/actor.py +174 -0
- hud/rl/buffer.py +371 -0
- hud/rl/chat_template.jinja +101 -0
- hud/rl/config.py +184 -0
- hud/rl/distributed.py +95 -0
- hud/rl/learner.py +586 -0
- hud/rl/tests/__init__.py +1 -0
- hud/rl/tests/test_learner.py +171 -0
- hud/rl/train.py +354 -0
- hud/rl/types.py +101 -0
- hud/rl/utils/start_vllm_server.sh +30 -0
- hud/rl/utils.py +524 -0
- hud/rl/vllm_adapter.py +125 -0
- hud/settings.py +6 -0
- hud/telemetry/__init__.py +2 -1
- hud/telemetry/job.py +46 -3
- hud/telemetry/tests/test_trace.py +3 -3
- hud/telemetry/trace.py +85 -13
- hud/tools/computer/hud.py +4 -4
- hud/tools/tests/test_computer.py +3 -3
- hud/tools/tests/test_computer_actions.py +1 -1
- hud/types.py +123 -2
- hud/utils/group_eval.py +223 -0
- hud/utils/hud_console.py +113 -13
- hud/utils/tasks.py +119 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/METADATA +20 -2
- {hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/RECORD +67 -47
- hud/cli/hf.py +0 -406
- hud/cli/rl/README.md +0 -243
- hud/cli/rl/init.py +0 -370
- hud/cli/rl/pod.py +0 -501
- hud/cli/rl/ssh.py +0 -322
- hud/cli/rl/train.py +0 -562
- hud/cli/rl/utils.py +0 -165
- hud/datasets/execution/__init__.py +0 -13
- hud/datasets/task.py +0 -116
- {hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/WHEEL +0 -0
- {hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.27.dist-info → hud_python-0.4.29.dist-info}/licenses/LICENSE +0 -0
hud/cli/rl/init.py
DELETED
|
@@ -1,370 +0,0 @@
|
|
|
1
|
-
"""Initialize RL configuration from environment analysis."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import asyncio
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Any
|
|
8
|
-
|
|
9
|
-
import typer
|
|
10
|
-
import yaml
|
|
11
|
-
|
|
12
|
-
from hud.clients import MCPClient
|
|
13
|
-
from hud.utils.hud_console import HUDConsole
|
|
14
|
-
|
|
15
|
-
hud_console = HUDConsole()
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def init_command_wrapper(directory: str, output: Path | None, force: bool, build: bool) -> None:
|
|
19
|
-
"""Wrapper to handle interactive prompts before entering async context."""
|
|
20
|
-
hud_console.header("RL Config Generator", icon="🔧")
|
|
21
|
-
|
|
22
|
-
# Determine if this is a directory or Docker image
|
|
23
|
-
path = Path(directory)
|
|
24
|
-
is_directory = path.exists() and path.is_dir()
|
|
25
|
-
|
|
26
|
-
if is_directory:
|
|
27
|
-
# Working with a directory - check for lock file
|
|
28
|
-
lock_path = path / "hud.lock.yaml"
|
|
29
|
-
|
|
30
|
-
if not lock_path.exists():
|
|
31
|
-
if build:
|
|
32
|
-
# Auto-build was requested
|
|
33
|
-
hud_console.info("Building environment...")
|
|
34
|
-
from hud.cli.build import build_command
|
|
35
|
-
|
|
36
|
-
build_command(str(directory), None, False, False, {})
|
|
37
|
-
# After build, lock file should exist
|
|
38
|
-
else:
|
|
39
|
-
# Try to get image from pyproject.toml or auto-generate
|
|
40
|
-
from hud.cli.utils.environment import get_image_name, image_exists
|
|
41
|
-
|
|
42
|
-
image, source = get_image_name(directory)
|
|
43
|
-
|
|
44
|
-
if not (source == "cache" and image_exists(image)):
|
|
45
|
-
hud_console.warning(f"No hud.lock.yaml found in {directory}")
|
|
46
|
-
# Need to handle interactive prompt here, before async
|
|
47
|
-
action = hud_console.select(
|
|
48
|
-
"No lock file found. Would you like to:",
|
|
49
|
-
["Build the environment", "Use Docker image directly", "Cancel"],
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
if action == "Build the environment":
|
|
53
|
-
hud_console.info("Building environment...")
|
|
54
|
-
from hud.cli.build import build_command
|
|
55
|
-
|
|
56
|
-
build_command(str(directory), None, False, False, {})
|
|
57
|
-
# After build, lock file should exist
|
|
58
|
-
elif action == "Use Docker image directly":
|
|
59
|
-
# Prompt for image name
|
|
60
|
-
image = typer.prompt("Enter Docker image name")
|
|
61
|
-
directory = image # Override to use as Docker image
|
|
62
|
-
is_directory = False # Treat as image, not directory
|
|
63
|
-
else:
|
|
64
|
-
raise typer.Exit(1)
|
|
65
|
-
|
|
66
|
-
# Now run the async command with resolved parameters
|
|
67
|
-
asyncio.run(init_command(directory, output, force, False))
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
async def init_command(directory: str, output: Path | None, force: bool, build: bool) -> None:
|
|
71
|
-
"""Generate hud-vf-gym config from environment."""
|
|
72
|
-
# Determine if this is a directory or Docker image
|
|
73
|
-
path = Path(directory)
|
|
74
|
-
is_directory = path.exists() and path.is_dir()
|
|
75
|
-
|
|
76
|
-
if is_directory:
|
|
77
|
-
# Working with a directory - look for lock file
|
|
78
|
-
lock_path = path / "hud.lock.yaml"
|
|
79
|
-
|
|
80
|
-
if lock_path.exists():
|
|
81
|
-
hud_console.info(f"Found lock file: {lock_path}")
|
|
82
|
-
lock_data = read_lock_file_path(lock_path)
|
|
83
|
-
|
|
84
|
-
if not lock_data:
|
|
85
|
-
hud_console.error("Failed to read lock file")
|
|
86
|
-
raise typer.Exit(1)
|
|
87
|
-
|
|
88
|
-
# Get image and tools from lock file
|
|
89
|
-
image = lock_data.get("image", "")
|
|
90
|
-
tools = lock_data.get("tools", [])
|
|
91
|
-
|
|
92
|
-
if not image:
|
|
93
|
-
hud_console.error("No image found in lock file")
|
|
94
|
-
hud_console.hint("Run 'hud build' to create a proper lock file")
|
|
95
|
-
raise typer.Exit(1)
|
|
96
|
-
|
|
97
|
-
if not tools:
|
|
98
|
-
hud_console.error("No tools found in lock file")
|
|
99
|
-
hud_console.hint("Lock file may be outdated. Run 'hud build' to regenerate")
|
|
100
|
-
raise typer.Exit(1)
|
|
101
|
-
|
|
102
|
-
# Use lock file data to generate config
|
|
103
|
-
await generate_from_lock(image, tools, output, force)
|
|
104
|
-
|
|
105
|
-
else:
|
|
106
|
-
# No lock file - try to use cached image
|
|
107
|
-
# Build should have been handled in the wrapper
|
|
108
|
-
from hud.cli.utils.environment import get_image_name, image_exists
|
|
109
|
-
|
|
110
|
-
image, source = get_image_name(directory)
|
|
111
|
-
|
|
112
|
-
if source == "cache" and image_exists(image):
|
|
113
|
-
# Found cached image in pyproject.toml
|
|
114
|
-
hud_console.info(f"Using cached image: {image}")
|
|
115
|
-
await analyze_and_generate(image, output, force)
|
|
116
|
-
else:
|
|
117
|
-
# This should have been handled in the wrapper
|
|
118
|
-
hud_console.error("No valid image or lock file found")
|
|
119
|
-
raise typer.Exit(1)
|
|
120
|
-
|
|
121
|
-
else:
|
|
122
|
-
# Working with a Docker image directly
|
|
123
|
-
image = directory
|
|
124
|
-
await analyze_and_generate(image, output, force)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def read_lock_file_path(lock_path: Path) -> dict[str, Any]:
|
|
128
|
-
"""Read lock file from specific path."""
|
|
129
|
-
try:
|
|
130
|
-
with open(lock_path) as f:
|
|
131
|
-
return yaml.safe_load(f) or {}
|
|
132
|
-
except Exception as e:
|
|
133
|
-
hud_console.error(f"Failed to read lock file: {e}")
|
|
134
|
-
return {}
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
async def generate_from_lock(
|
|
138
|
-
image: str, tools: list[dict], output: Path | None, force: bool
|
|
139
|
-
) -> None:
|
|
140
|
-
"""Generate config from lock file data."""
|
|
141
|
-
# Determine output path
|
|
142
|
-
if output is None:
|
|
143
|
-
# Default to configs/{image_name}.yaml
|
|
144
|
-
image_name = image.split("/")[-1].split(":")[0]
|
|
145
|
-
if "/" in image_name:
|
|
146
|
-
image_name = image_name.split("/")[-1]
|
|
147
|
-
output = Path("configs") / f"{image_name}.yaml"
|
|
148
|
-
|
|
149
|
-
# Check if file exists
|
|
150
|
-
if output.exists() and not force:
|
|
151
|
-
hud_console.error(f"Config file already exists: {output}")
|
|
152
|
-
hud_console.info("Use --force to overwrite")
|
|
153
|
-
raise typer.Exit(1)
|
|
154
|
-
|
|
155
|
-
# Create output directory if needed
|
|
156
|
-
output.parent.mkdir(parents=True, exist_ok=True)
|
|
157
|
-
|
|
158
|
-
# Convert lock file tool format to full tool format
|
|
159
|
-
# Lock file may have full or simplified format
|
|
160
|
-
full_tools = []
|
|
161
|
-
for tool in tools:
|
|
162
|
-
full_tool = {
|
|
163
|
-
"name": tool["name"],
|
|
164
|
-
"description": tool.get("description", ""),
|
|
165
|
-
}
|
|
166
|
-
# Check if lock file has inputSchema (newer format)
|
|
167
|
-
if "inputSchema" in tool:
|
|
168
|
-
full_tool["inputSchema"] = tool["inputSchema"]
|
|
169
|
-
else:
|
|
170
|
-
# Old lock file format without schema
|
|
171
|
-
full_tool["inputSchema"] = {"type": "object", "properties": {}, "required": []}
|
|
172
|
-
full_tools.append(full_tool)
|
|
173
|
-
|
|
174
|
-
# Generate config
|
|
175
|
-
config = await generate_config(image, full_tools)
|
|
176
|
-
|
|
177
|
-
# Write to file
|
|
178
|
-
with open(output, "w") as f: # noqa: ASYNC230
|
|
179
|
-
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
|
180
|
-
|
|
181
|
-
hud_console.success(f"Generated config: {output}")
|
|
182
|
-
|
|
183
|
-
# Show summary
|
|
184
|
-
hud_console.section_title("📋 Generated Configuration")
|
|
185
|
-
hud_console.info("Source: hud.lock.yaml")
|
|
186
|
-
hud_console.info(f"Image: {image}")
|
|
187
|
-
hud_console.info(f"System prompt: {len(config['system_prompt'])} characters")
|
|
188
|
-
hud_console.info(f"Action mappings: {len(config['action_mappings'])} tools")
|
|
189
|
-
hud_console.info("")
|
|
190
|
-
hud_console.info("Next steps:")
|
|
191
|
-
hud_console.command_example("hud hf tasks.json --name my-tasks", "Create dataset")
|
|
192
|
-
hud_console.command_example(f"hud rl --config {output}", "Start training")
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
async def analyze_and_generate(image: str, output: Path | None, force: bool) -> None:
|
|
196
|
-
"""Analyze Docker image and generate config."""
|
|
197
|
-
# Determine output path
|
|
198
|
-
if output is None:
|
|
199
|
-
# Default to configs/{image_name}.yaml
|
|
200
|
-
image_name = image.split("/")[-1].split(":")[0]
|
|
201
|
-
output = Path("configs") / f"{image_name}.yaml"
|
|
202
|
-
|
|
203
|
-
# Check if file exists
|
|
204
|
-
if output.exists() and not force:
|
|
205
|
-
hud_console.error(f"Config file already exists: {output}")
|
|
206
|
-
hud_console.info("Use --force to overwrite")
|
|
207
|
-
raise typer.Exit(1)
|
|
208
|
-
|
|
209
|
-
# Create output directory if needed
|
|
210
|
-
output.parent.mkdir(parents=True, exist_ok=True)
|
|
211
|
-
|
|
212
|
-
hud_console.info(f"Analyzing environment: {image}")
|
|
213
|
-
|
|
214
|
-
# Analyze the environment
|
|
215
|
-
try:
|
|
216
|
-
# Create MCP config for Docker
|
|
217
|
-
mcp_config = {"local": {"command": "docker", "args": ["run", "--rm", "-i", image]}}
|
|
218
|
-
|
|
219
|
-
# Initialize client and analyze
|
|
220
|
-
client = MCPClient(mcp_config=mcp_config, auto_trace=False)
|
|
221
|
-
await client.initialize()
|
|
222
|
-
|
|
223
|
-
try:
|
|
224
|
-
analysis = await client.analyze_environment()
|
|
225
|
-
tools = analysis.get("tools", [])
|
|
226
|
-
|
|
227
|
-
# Generate config
|
|
228
|
-
config = await generate_config(image, tools)
|
|
229
|
-
|
|
230
|
-
# Write to file
|
|
231
|
-
with open(output, "w") as f: # noqa: ASYNC230
|
|
232
|
-
yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
|
233
|
-
|
|
234
|
-
hud_console.success(f"Generated config: {output}")
|
|
235
|
-
|
|
236
|
-
# Show summary
|
|
237
|
-
hud_console.section_title("📋 Generated Configuration")
|
|
238
|
-
hud_console.info(f"System prompt: {len(config['system_prompt'])} characters")
|
|
239
|
-
hud_console.info(f"Action mappings: {len(config['action_mappings'])} tools")
|
|
240
|
-
hud_console.info("")
|
|
241
|
-
hud_console.info("Next steps:")
|
|
242
|
-
hud_console.command_example("hud hf tasks.json --name my-tasks", "Create dataset")
|
|
243
|
-
hud_console.command_example(f"hud rl --config {output}", "Start training")
|
|
244
|
-
|
|
245
|
-
finally:
|
|
246
|
-
await client.shutdown()
|
|
247
|
-
|
|
248
|
-
except Exception as e:
|
|
249
|
-
hud_console.error(f"Failed to analyze environment: {e}")
|
|
250
|
-
hud_console.hint("Make sure the Docker image exists and contains a valid MCP server")
|
|
251
|
-
raise typer.Exit(1) from e
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
async def generate_config(image: str, tools: list[dict[str, Any]]) -> dict[str, Any]:
|
|
255
|
-
"""Generate hud-vf-gym configuration from tool analysis."""
|
|
256
|
-
# Clean up image name for display
|
|
257
|
-
display_name = image.split("@")[0] if "@" in image else image # Remove SHA hash
|
|
258
|
-
env_name = display_name.split("/")[-1].split(":")[0] # Extract just the env name
|
|
259
|
-
|
|
260
|
-
# Filter out setup/evaluate tools
|
|
261
|
-
interaction_tools = [t for t in tools if t["name"] not in ["setup", "evaluate"]]
|
|
262
|
-
|
|
263
|
-
# Generate system prompt
|
|
264
|
-
tool_descriptions = []
|
|
265
|
-
for tool in interaction_tools:
|
|
266
|
-
# Check if we have schema (from direct analysis) or just name/description (from lock file)
|
|
267
|
-
has_schema = "inputSchema" in tool and tool["inputSchema"].get("properties")
|
|
268
|
-
|
|
269
|
-
if has_schema:
|
|
270
|
-
params = tool.get("inputSchema", {}).get("properties", {})
|
|
271
|
-
required = tool.get("inputSchema", {}).get("required", [])
|
|
272
|
-
|
|
273
|
-
# Build parameter string
|
|
274
|
-
param_parts = []
|
|
275
|
-
for name, schema in params.items():
|
|
276
|
-
param_type = schema.get("type", "any")
|
|
277
|
-
if name in required:
|
|
278
|
-
param_parts.append(f"{name}: {param_type}")
|
|
279
|
-
else:
|
|
280
|
-
param_parts.append(f"{name}?: {param_type}")
|
|
281
|
-
|
|
282
|
-
param_str = ", ".join(param_parts) if param_parts else ""
|
|
283
|
-
else:
|
|
284
|
-
# No schema information
|
|
285
|
-
param_str = "..."
|
|
286
|
-
|
|
287
|
-
desc = tool.get("description", "No description")
|
|
288
|
-
|
|
289
|
-
tool_descriptions.append(
|
|
290
|
-
f"- {tool['name']}({param_str}): {desc}\n Usage: <tool>{tool['name']}(...)</tool>"
|
|
291
|
-
)
|
|
292
|
-
|
|
293
|
-
# Add note if any tools are missing schema info
|
|
294
|
-
if interaction_tools and any("inputSchema" not in t for t in interaction_tools):
|
|
295
|
-
tool_descriptions.append(
|
|
296
|
-
"\nNote: Some tools are missing parameter information. Update manually if needed."
|
|
297
|
-
)
|
|
298
|
-
|
|
299
|
-
system_prompt = f"""You are an AI agent in a HUD environment.
|
|
300
|
-
|
|
301
|
-
You have access to the following tools:
|
|
302
|
-
|
|
303
|
-
{chr(10).join(tool_descriptions)}
|
|
304
|
-
|
|
305
|
-
Always use the exact XML format shown above for tool calls.
|
|
306
|
-
Think step by step about what you need to do."""
|
|
307
|
-
|
|
308
|
-
# Generate action mappings
|
|
309
|
-
action_mappings = {}
|
|
310
|
-
|
|
311
|
-
for tool in interaction_tools:
|
|
312
|
-
# Check if we have inputSchema information
|
|
313
|
-
has_input_schema = "inputSchema" in tool
|
|
314
|
-
|
|
315
|
-
if has_input_schema:
|
|
316
|
-
# We have schema info (even if no parameters)
|
|
317
|
-
params = tool.get("inputSchema", {}).get("properties", {})
|
|
318
|
-
required = tool.get("inputSchema", {}).get("required", [])
|
|
319
|
-
|
|
320
|
-
# Simple 1:1 mapping by default
|
|
321
|
-
mapping = {
|
|
322
|
-
"_tool": tool["name"],
|
|
323
|
-
"_parser": {
|
|
324
|
-
"positional": list(required) # Use required params as positional
|
|
325
|
-
},
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
# Add parameter mappings (only if there are params)
|
|
329
|
-
for param_name in params:
|
|
330
|
-
mapping[param_name] = {"from_arg": param_name}
|
|
331
|
-
else:
|
|
332
|
-
# No schema information at all
|
|
333
|
-
mapping = {
|
|
334
|
-
"_tool": tool["name"],
|
|
335
|
-
"_parser": {
|
|
336
|
-
"positional": [] # No positional args without schema
|
|
337
|
-
},
|
|
338
|
-
"# TODO": "Update with actual parameters",
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
action_mappings[tool["name"]] = mapping
|
|
342
|
-
|
|
343
|
-
# Add special "done" action if not present
|
|
344
|
-
if "done" not in action_mappings:
|
|
345
|
-
action_mappings["done"] = {
|
|
346
|
-
"_tool": None, # Special marker for task completion
|
|
347
|
-
"_parser": {"positional": []},
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
# Build full config
|
|
351
|
-
config = {
|
|
352
|
-
"# Generated by hud rl init": f"for {env_name}",
|
|
353
|
-
"job": {
|
|
354
|
-
"name": f"RL Training - {env_name}",
|
|
355
|
-
"metadata": {
|
|
356
|
-
"environment": display_name,
|
|
357
|
-
"full_image": image,
|
|
358
|
-
"generated_by": "hud rl init",
|
|
359
|
-
},
|
|
360
|
-
},
|
|
361
|
-
"system_prompt": system_prompt,
|
|
362
|
-
"parser": {"use_thinking": True, "xml_weight": 0.6, "action_weight": 0.4},
|
|
363
|
-
"action_mappings": action_mappings,
|
|
364
|
-
"rubric": {
|
|
365
|
-
"weights": {"task_completion": 0.8, "tool_execution": 0.1, "format_compliance": 0.1}
|
|
366
|
-
},
|
|
367
|
-
"defaults": {"max_turns": 100},
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
return config
|