hud-python 0.4.34__py3-none-any.whl → 0.4.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/claude.py +9 -1
- hud/agents/openai.py +9 -1
- hud/agents/tests/test_claude.py +32 -7
- hud/agents/tests/test_openai.py +29 -6
- hud/cli/__init__.py +209 -75
- hud/cli/build.py +10 -5
- hud/cli/dev.py +20 -39
- hud/cli/eval.py +4 -3
- hud/cli/flows/tasks.py +1 -0
- hud/cli/init.py +222 -629
- hud/cli/pull.py +6 -0
- hud/cli/push.py +2 -1
- hud/cli/rl/remote_runner.py +3 -1
- hud/cli/tests/test_build.py +3 -27
- hud/cli/tests/test_mcp_server.py +1 -12
- hud/cli/utils/config.py +85 -0
- hud/cli/utils/docker.py +21 -39
- hud/cli/utils/environment.py +4 -3
- hud/cli/utils/interactive.py +2 -1
- hud/cli/utils/local_runner.py +204 -0
- hud/cli/utils/metadata.py +3 -1
- hud/cli/utils/package_runner.py +292 -0
- hud/cli/utils/remote_runner.py +4 -1
- hud/clients/mcp_use.py +30 -7
- hud/datasets/parallel.py +3 -1
- hud/datasets/runner.py +5 -2
- hud/otel/context.py +38 -4
- hud/rl/buffer.py +3 -0
- hud/rl/tests/test_learner.py +1 -1
- hud/server/server.py +157 -1
- hud/settings.py +38 -0
- hud/shared/hints.py +1 -1
- hud/utils/tests/test_version.py +1 -1
- hud/utils/tool_shorthand.py +7 -4
- hud/version.py +1 -1
- {hud_python-0.4.34.dist-info → hud_python-0.4.36.dist-info}/METADATA +30 -12
- {hud_python-0.4.34.dist-info → hud_python-0.4.36.dist-info}/RECORD +40 -37
- {hud_python-0.4.34.dist-info → hud_python-0.4.36.dist-info}/WHEEL +0 -0
- {hud_python-0.4.34.dist-info → hud_python-0.4.36.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.34.dist-info → hud_python-0.4.36.dist-info}/licenses/LICENSE +0 -0
hud/agents/claude.py
CHANGED
|
@@ -6,7 +6,7 @@ import copy
|
|
|
6
6
|
import logging
|
|
7
7
|
from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
8
8
|
|
|
9
|
-
from anthropic import AsyncAnthropic, BadRequestError
|
|
9
|
+
from anthropic import Anthropic, AsyncAnthropic, BadRequestError
|
|
10
10
|
from anthropic.types.beta import BetaContentBlockParam, BetaImageBlockParam, BetaTextBlockParam
|
|
11
11
|
|
|
12
12
|
import hud
|
|
@@ -54,6 +54,7 @@ class ClaudeAgent(MCPAgent):
|
|
|
54
54
|
model: str = "claude-sonnet-4-20250514",
|
|
55
55
|
max_tokens: int = 4096,
|
|
56
56
|
use_computer_beta: bool = True,
|
|
57
|
+
validate_api_key: bool = True,
|
|
57
58
|
**kwargs: Any,
|
|
58
59
|
) -> None:
|
|
59
60
|
"""
|
|
@@ -75,6 +76,13 @@ class ClaudeAgent(MCPAgent):
|
|
|
75
76
|
raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.")
|
|
76
77
|
model_client = AsyncAnthropic(api_key=api_key)
|
|
77
78
|
|
|
79
|
+
# validate api key if requested
|
|
80
|
+
if validate_api_key:
|
|
81
|
+
try:
|
|
82
|
+
Anthropic(api_key=model_client.api_key).models.list()
|
|
83
|
+
except Exception as e:
|
|
84
|
+
raise ValueError(f"Anthropic API key is invalid: {e}") from e
|
|
85
|
+
|
|
78
86
|
self.anthropic_client = model_client
|
|
79
87
|
self.model = model
|
|
80
88
|
self.max_tokens = max_tokens
|
hud/agents/openai.py
CHANGED
|
@@ -6,7 +6,7 @@ import logging
|
|
|
6
6
|
from typing import Any, ClassVar, Literal
|
|
7
7
|
|
|
8
8
|
import mcp.types as types
|
|
9
|
-
from openai import AsyncOpenAI
|
|
9
|
+
from openai import AsyncOpenAI, OpenAI
|
|
10
10
|
from openai.types.responses import (
|
|
11
11
|
ResponseComputerToolCall,
|
|
12
12
|
ResponseInputMessageContentListParam,
|
|
@@ -45,6 +45,7 @@ class OperatorAgent(MCPAgent):
|
|
|
45
45
|
model_client: AsyncOpenAI | None = None,
|
|
46
46
|
model: str = "computer-use-preview",
|
|
47
47
|
environment: Literal["windows", "mac", "linux", "browser"] = "linux",
|
|
48
|
+
validate_api_key: bool = True,
|
|
48
49
|
**kwargs: Any,
|
|
49
50
|
) -> None:
|
|
50
51
|
"""
|
|
@@ -76,6 +77,13 @@ class OperatorAgent(MCPAgent):
|
|
|
76
77
|
self.pending_call_id: str | None = None
|
|
77
78
|
self.pending_safety_checks: list[Any] = []
|
|
78
79
|
|
|
80
|
+
# validate api key if requested
|
|
81
|
+
if validate_api_key:
|
|
82
|
+
try:
|
|
83
|
+
OpenAI(api_key=self.openai_client.api_key).models.list()
|
|
84
|
+
except Exception as e:
|
|
85
|
+
raise ValueError(f"OpenAI API key is invalid: {e}") from e
|
|
86
|
+
|
|
79
87
|
self.model_name = "openai-" + self.model
|
|
80
88
|
|
|
81
89
|
# Append OpenAI-specific instructions to the base system prompt
|
hud/agents/tests/test_claude.py
CHANGED
|
@@ -86,6 +86,7 @@ class TestClaudeAgent:
|
|
|
86
86
|
model_client=mock_model_client,
|
|
87
87
|
model="claude-3-opus-20240229",
|
|
88
88
|
max_tokens=1000,
|
|
89
|
+
validate_api_key=False, # Skip validation in tests
|
|
89
90
|
)
|
|
90
91
|
|
|
91
92
|
assert agent.model_name == "claude-3-opus-20240229"
|
|
@@ -93,10 +94,14 @@ class TestClaudeAgent:
|
|
|
93
94
|
assert agent.anthropic_client == mock_model_client
|
|
94
95
|
|
|
95
96
|
@pytest.mark.asyncio
|
|
96
|
-
async def test_init_without_model_client(self, mock_mcp_client):
|
|
97
|
+
async def test_init_without_model_client(self, mock_mcp_client, mock_anthropic):
|
|
97
98
|
"""Test agent initialization without model client."""
|
|
98
99
|
with patch("hud.settings.settings.anthropic_api_key", "test_key"):
|
|
99
|
-
agent = ClaudeAgent(
|
|
100
|
+
agent = ClaudeAgent(
|
|
101
|
+
mcp_client=mock_mcp_client,
|
|
102
|
+
model="claude-3-opus-20240229",
|
|
103
|
+
validate_api_key=False, # Skip validation in tests
|
|
104
|
+
)
|
|
100
105
|
|
|
101
106
|
assert agent.model_name == "claude-3-opus-20240229"
|
|
102
107
|
assert agent.anthropic_client is not None
|
|
@@ -105,7 +110,11 @@ class TestClaudeAgent:
|
|
|
105
110
|
async def test_format_blocks(self, mock_mcp_client):
|
|
106
111
|
"""Test formatting content blocks into Claude messages."""
|
|
107
112
|
mock_model_client = MagicMock()
|
|
108
|
-
agent = ClaudeAgent(
|
|
113
|
+
agent = ClaudeAgent(
|
|
114
|
+
mcp_client=mock_mcp_client,
|
|
115
|
+
model_client=mock_model_client,
|
|
116
|
+
validate_api_key=False, # Skip validation in tests
|
|
117
|
+
)
|
|
109
118
|
|
|
110
119
|
# Test with text only
|
|
111
120
|
text_blocks: list[types.ContentBlock] = [
|
|
@@ -141,7 +150,11 @@ class TestClaudeAgent:
|
|
|
141
150
|
async def test_format_tool_results_method(self, mock_mcp_client):
|
|
142
151
|
"""Test the agent's format_tool_results method."""
|
|
143
152
|
mock_model_client = MagicMock()
|
|
144
|
-
agent = ClaudeAgent(
|
|
153
|
+
agent = ClaudeAgent(
|
|
154
|
+
mcp_client=mock_mcp_client,
|
|
155
|
+
model_client=mock_model_client,
|
|
156
|
+
validate_api_key=False, # Skip validation in tests
|
|
157
|
+
)
|
|
145
158
|
|
|
146
159
|
tool_calls = [
|
|
147
160
|
MCPToolCall(name="test_tool", arguments={}, id="id1"),
|
|
@@ -171,7 +184,11 @@ class TestClaudeAgent:
|
|
|
171
184
|
"""Test getting model response from Claude API."""
|
|
172
185
|
# Disable telemetry for this test to avoid backend configuration issues
|
|
173
186
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
174
|
-
agent = ClaudeAgent(
|
|
187
|
+
agent = ClaudeAgent(
|
|
188
|
+
mcp_client=mock_mcp_client,
|
|
189
|
+
model_client=mock_anthropic,
|
|
190
|
+
validate_api_key=False, # Skip validation in tests
|
|
191
|
+
)
|
|
175
192
|
|
|
176
193
|
# Mock the API response
|
|
177
194
|
mock_response = MagicMock()
|
|
@@ -215,7 +232,11 @@ class TestClaudeAgent:
|
|
|
215
232
|
"""Test getting text-only response."""
|
|
216
233
|
# Disable telemetry for this test to avoid backend configuration issues
|
|
217
234
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
218
|
-
agent = ClaudeAgent(
|
|
235
|
+
agent = ClaudeAgent(
|
|
236
|
+
mcp_client=mock_mcp_client,
|
|
237
|
+
model_client=mock_anthropic,
|
|
238
|
+
validate_api_key=False, # Skip validation in tests
|
|
239
|
+
)
|
|
219
240
|
|
|
220
241
|
mock_response = MagicMock()
|
|
221
242
|
# Create text block
|
|
@@ -242,7 +263,11 @@ class TestClaudeAgent:
|
|
|
242
263
|
"""Test handling API errors."""
|
|
243
264
|
# Disable telemetry for this test to avoid backend configuration issues
|
|
244
265
|
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
245
|
-
agent = ClaudeAgent(
|
|
266
|
+
agent = ClaudeAgent(
|
|
267
|
+
mcp_client=mock_mcp_client,
|
|
268
|
+
model_client=mock_anthropic,
|
|
269
|
+
validate_api_key=False, # Skip validation in tests
|
|
270
|
+
)
|
|
246
271
|
|
|
247
272
|
# Mock API error
|
|
248
273
|
mock_anthropic.beta.messages.create = AsyncMock(
|
hud/agents/tests/test_openai.py
CHANGED
|
@@ -44,7 +44,10 @@ class TestOperatorAgent:
|
|
|
44
44
|
"""Test agent initialization."""
|
|
45
45
|
mock_model_client = MagicMock()
|
|
46
46
|
agent = OperatorAgent(
|
|
47
|
-
mcp_client=mock_mcp_client,
|
|
47
|
+
mcp_client=mock_mcp_client,
|
|
48
|
+
model_client=mock_model_client,
|
|
49
|
+
model="gpt-4",
|
|
50
|
+
validate_api_key=False, # Skip validation in tests
|
|
48
51
|
)
|
|
49
52
|
|
|
50
53
|
assert agent.model_name == "openai-gpt-4"
|
|
@@ -55,7 +58,11 @@ class TestOperatorAgent:
|
|
|
55
58
|
async def test_format_blocks(self, mock_mcp_client):
|
|
56
59
|
"""Test formatting content blocks."""
|
|
57
60
|
mock_model_client = MagicMock()
|
|
58
|
-
agent = OperatorAgent(
|
|
61
|
+
agent = OperatorAgent(
|
|
62
|
+
mcp_client=mock_mcp_client,
|
|
63
|
+
model_client=mock_model_client,
|
|
64
|
+
validate_api_key=False, # Skip validation in tests
|
|
65
|
+
)
|
|
59
66
|
|
|
60
67
|
# Test with text blocks
|
|
61
68
|
blocks: list[types.ContentBlock] = [
|
|
@@ -85,7 +92,11 @@ class TestOperatorAgent:
|
|
|
85
92
|
@pytest.mark.asyncio
|
|
86
93
|
async def test_format_tool_results(self, mock_mcp_client, mock_openai):
|
|
87
94
|
"""Test formatting tool results."""
|
|
88
|
-
agent = OperatorAgent(
|
|
95
|
+
agent = OperatorAgent(
|
|
96
|
+
mcp_client=mock_mcp_client,
|
|
97
|
+
model_client=mock_openai,
|
|
98
|
+
validate_api_key=False, # Skip validation in tests
|
|
99
|
+
)
|
|
89
100
|
|
|
90
101
|
tool_calls = [
|
|
91
102
|
MCPToolCall(name="test_tool", arguments={}, id="call_123"), # type: ignore
|
|
@@ -111,7 +122,11 @@ class TestOperatorAgent:
|
|
|
111
122
|
@pytest.mark.asyncio
|
|
112
123
|
async def test_format_tool_results_with_error(self, mock_mcp_client, mock_openai):
|
|
113
124
|
"""Test formatting tool results with errors."""
|
|
114
|
-
agent = OperatorAgent(
|
|
125
|
+
agent = OperatorAgent(
|
|
126
|
+
mcp_client=mock_mcp_client,
|
|
127
|
+
model_client=mock_openai,
|
|
128
|
+
validate_api_key=False, # Skip validation in tests
|
|
129
|
+
)
|
|
115
130
|
|
|
116
131
|
tool_calls = [
|
|
117
132
|
MCPToolCall(name="failing_tool", arguments={}, id="call_error"), # type: ignore
|
|
@@ -131,7 +146,11 @@ class TestOperatorAgent:
|
|
|
131
146
|
@pytest.mark.asyncio
|
|
132
147
|
async def test_get_model_response(self, mock_mcp_client, mock_openai):
|
|
133
148
|
"""Test getting model response from OpenAI API."""
|
|
134
|
-
agent = OperatorAgent(
|
|
149
|
+
agent = OperatorAgent(
|
|
150
|
+
mcp_client=mock_mcp_client,
|
|
151
|
+
model_client=mock_openai,
|
|
152
|
+
validate_api_key=False, # Skip validation in tests
|
|
153
|
+
)
|
|
135
154
|
|
|
136
155
|
# Set up available tools so agent doesn't return "No computer use tools available"
|
|
137
156
|
agent._available_tools = [
|
|
@@ -162,7 +181,11 @@ class TestOperatorAgent:
|
|
|
162
181
|
@pytest.mark.asyncio
|
|
163
182
|
async def test_handle_empty_response(self, mock_mcp_client, mock_openai):
|
|
164
183
|
"""Test handling empty response from API."""
|
|
165
|
-
agent = OperatorAgent(
|
|
184
|
+
agent = OperatorAgent(
|
|
185
|
+
mcp_client=mock_mcp_client,
|
|
186
|
+
model_client=mock_openai,
|
|
187
|
+
validate_api_key=False, # Skip validation in tests
|
|
188
|
+
)
|
|
166
189
|
|
|
167
190
|
# Set up available tools
|
|
168
191
|
agent._available_tools = [
|
hud/cli/__init__.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
|
+
import contextlib
|
|
6
7
|
import json
|
|
7
8
|
import sys
|
|
8
9
|
from pathlib import Path
|
|
@@ -28,6 +29,7 @@ from .init import create_environment
|
|
|
28
29
|
from .pull import pull_command
|
|
29
30
|
from .push import push_command
|
|
30
31
|
from .remove import remove_command
|
|
32
|
+
from .utils.config import set_env_values
|
|
31
33
|
from .utils.cursor import get_cursor_config_path, list_cursor_servers, parse_cursor_config
|
|
32
34
|
from .utils.logging import CaptureLogger
|
|
33
35
|
|
|
@@ -116,7 +118,9 @@ def analyze(
|
|
|
116
118
|
image, *docker_args = params
|
|
117
119
|
if live or docker_args: # If docker args provided, assume live mode
|
|
118
120
|
# Build Docker command from image and args
|
|
119
|
-
|
|
121
|
+
from .utils.docker import build_run_command
|
|
122
|
+
|
|
123
|
+
docker_cmd = build_run_command(image, docker_args)
|
|
120
124
|
asyncio.run(analyze_environment(docker_cmd, output_format, verbose))
|
|
121
125
|
else:
|
|
122
126
|
# Fast mode - analyze from metadata
|
|
@@ -239,11 +243,15 @@ def debug(
|
|
|
239
243
|
raise typer.Exit(1)
|
|
240
244
|
|
|
241
245
|
# Build Docker command
|
|
242
|
-
|
|
246
|
+
from .utils.docker import build_run_command
|
|
247
|
+
|
|
248
|
+
command = build_run_command(image_name, docker_args)
|
|
243
249
|
else:
|
|
244
250
|
# Assume it's an image name
|
|
245
251
|
image = first_param
|
|
246
|
-
|
|
252
|
+
from .utils.docker import build_run_command
|
|
253
|
+
|
|
254
|
+
command = build_run_command(image, docker_args)
|
|
247
255
|
else:
|
|
248
256
|
console.print(
|
|
249
257
|
"[red]Error: Must specify a directory, Docker image, --config, or --cursor[/red]"
|
|
@@ -370,12 +378,10 @@ def dev(
|
|
|
370
378
|
False, "--interactive", help="Launch interactive testing mode (HTTP mode only)"
|
|
371
379
|
),
|
|
372
380
|
) -> None:
|
|
373
|
-
"""🔥 Development mode
|
|
374
|
-
|
|
375
|
-
Runs your MCP environment in Docker with automatic restart on file changes.
|
|
381
|
+
"""🔥 Development mode - interactive MCP environment.
|
|
376
382
|
|
|
377
|
-
|
|
378
|
-
|
|
383
|
+
Runs your MCP environment in Docker with mounted source for development.
|
|
384
|
+
The container's CMD determines reload behavior.
|
|
379
385
|
|
|
380
386
|
Examples:
|
|
381
387
|
hud dev # Auto-detect in current directory
|
|
@@ -388,13 +394,12 @@ def dev(
|
|
|
388
394
|
hud dev . --inspector # Launch MCP Inspector (HTTP mode only)
|
|
389
395
|
hud dev . --interactive # Launch interactive testing mode (HTTP mode only)
|
|
390
396
|
hud dev . --no-logs # Disable Docker log streaming
|
|
391
|
-
hud dev . --full-reload # Restart entire container on file changes (instead of just server)
|
|
392
397
|
|
|
393
398
|
# With Docker arguments (after all options):
|
|
394
399
|
hud dev . -e BROWSER_PROVIDER=anchorbrowser -e ANCHOR_API_KEY=xxx
|
|
395
400
|
hud dev . -e API_KEY=secret -v /tmp/data:/data --network host
|
|
396
401
|
hud dev . --build -e DEBUG=true --memory 2g
|
|
397
|
-
"""
|
|
402
|
+
"""
|
|
398
403
|
# Parse directory and Docker arguments
|
|
399
404
|
if params:
|
|
400
405
|
directory = params[0]
|
|
@@ -424,7 +429,7 @@ def dev(
|
|
|
424
429
|
def run(
|
|
425
430
|
params: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
|
|
426
431
|
None,
|
|
427
|
-
help="Docker image followed by optional arguments
|
|
432
|
+
help="Python file/module/package or Docker image followed by optional arguments",
|
|
428
433
|
),
|
|
429
434
|
local: bool = typer.Option(
|
|
430
435
|
False,
|
|
@@ -474,32 +479,152 @@ def run(
|
|
|
474
479
|
"--interactive",
|
|
475
480
|
help="Launch interactive testing mode (HTTP transport only)",
|
|
476
481
|
),
|
|
482
|
+
reload: bool = typer.Option(
|
|
483
|
+
False,
|
|
484
|
+
"--reload",
|
|
485
|
+
help="Enable auto-reload on file changes (local Python files only)",
|
|
486
|
+
),
|
|
487
|
+
watch: list[str] = typer.Option( # noqa: B008
|
|
488
|
+
None,
|
|
489
|
+
"--watch",
|
|
490
|
+
help="Directories to watch for changes (can be used multiple times). Defaults to current directory.", # noqa: E501
|
|
491
|
+
),
|
|
492
|
+
cmd: str | None = typer.Option(
|
|
493
|
+
None,
|
|
494
|
+
"--cmd",
|
|
495
|
+
help="Command to run as MCP server (e.g., 'python -m controller')",
|
|
496
|
+
),
|
|
477
497
|
) -> None:
|
|
478
|
-
"""🚀 Run MCP server
|
|
498
|
+
"""🚀 Run MCP server.
|
|
479
499
|
|
|
480
|
-
|
|
500
|
+
Modes:
|
|
501
|
+
- Python (decorator-based): pass a dotted module path. Example: hud run controller
|
|
502
|
+
The module is imported, decorators register implicitly, and the server runs.
|
|
503
|
+
Use --reload to watch the module/package directory.
|
|
481
504
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
hud run my-server:v1 -e API_KEY=xxx -h Run-Id:abc123
|
|
485
|
-
hud run my-server:v1 --transport http --port 9000
|
|
505
|
+
- Command: use --cmd to run any command as an MCP server. Example: hud run --cmd "python -m controller"
|
|
506
|
+
Works with Docker, binaries, or any executable. Supports --reload.
|
|
486
507
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
Interactive Testing (local only):
|
|
493
|
-
hud run --local --interactive --transport http hud-text-2048:latest
|
|
494
|
-
hud run --local --interactive --transport http --port 9000 my-server:v1
|
|
495
|
-
"""
|
|
496
|
-
if not params:
|
|
497
|
-
typer.echo("❌ Docker image is required")
|
|
508
|
+
- Docker image: pass a Docker image name (optionally with --local to run locally).
|
|
509
|
+
""" # noqa: E501
|
|
510
|
+
if not params and not cmd:
|
|
511
|
+
typer.echo("❌ Dotted module path, Docker image, or --cmd is required")
|
|
498
512
|
raise typer.Exit(1)
|
|
499
513
|
|
|
500
|
-
#
|
|
501
|
-
|
|
502
|
-
|
|
514
|
+
# Handle --cmd mode
|
|
515
|
+
if cmd:
|
|
516
|
+
import asyncio
|
|
517
|
+
|
|
518
|
+
from .utils.package_runner import run_package_as_mcp
|
|
519
|
+
|
|
520
|
+
asyncio.run(
|
|
521
|
+
run_package_as_mcp(
|
|
522
|
+
cmd, # Pass command string
|
|
523
|
+
transport=transport,
|
|
524
|
+
port=port,
|
|
525
|
+
verbose=verbose,
|
|
526
|
+
reload=reload,
|
|
527
|
+
watch_paths=watch if watch else None,
|
|
528
|
+
)
|
|
529
|
+
)
|
|
530
|
+
return
|
|
531
|
+
|
|
532
|
+
first_param = params[0]
|
|
533
|
+
extra_args = params[1:] if len(params) > 1 else []
|
|
534
|
+
|
|
535
|
+
# Guard: strip accidental nested 'run' token from positional args,
|
|
536
|
+
# which can happen with nested invocations or reload wrappers.
|
|
537
|
+
if first_param == "run" and extra_args:
|
|
538
|
+
first_param, extra_args = extra_args[0], extra_args[1:]
|
|
539
|
+
|
|
540
|
+
# Try to interpret first_param as module[:attr] or file[:attr]
|
|
541
|
+
target = first_param
|
|
542
|
+
server_attr = "mcp"
|
|
543
|
+
if ":" in target:
|
|
544
|
+
target, server_attr = target.split(":", 1)
|
|
545
|
+
|
|
546
|
+
# Only allow dotted import paths or python files for Python mode
|
|
547
|
+
import importlib.util as _importlib_util
|
|
548
|
+
|
|
549
|
+
# Ensure current working directory is importable for local packages like 'controller'
|
|
550
|
+
try:
|
|
551
|
+
import sys as _sys
|
|
552
|
+
from pathlib import Path as _Path
|
|
553
|
+
|
|
554
|
+
cwd_str = str(_Path.cwd())
|
|
555
|
+
if cwd_str not in _sys.path:
|
|
556
|
+
_sys.path.insert(0, cwd_str)
|
|
557
|
+
except Exception: # noqa: S110
|
|
558
|
+
pass
|
|
559
|
+
try:
|
|
560
|
+
# If given a file path, detect and import via file spec
|
|
561
|
+
from pathlib import Path as _Path
|
|
562
|
+
|
|
563
|
+
if target.endswith(".py") and _Path(target).exists():
|
|
564
|
+
spec = _importlib_util.spec_from_file_location("_hud_module", target)
|
|
565
|
+
else:
|
|
566
|
+
spec = _importlib_util.find_spec(target)
|
|
567
|
+
except Exception:
|
|
568
|
+
spec = None
|
|
569
|
+
|
|
570
|
+
# Fallback: treat a local package directory (e.g. 'controller') as a module target
|
|
571
|
+
from pathlib import Path as _Path
|
|
572
|
+
|
|
573
|
+
pkg_dir = _Path(target)
|
|
574
|
+
is_pkg_dir = pkg_dir.is_dir() and (pkg_dir / "__init__.py").exists()
|
|
575
|
+
|
|
576
|
+
is_python_target = (spec is not None) or is_pkg_dir
|
|
577
|
+
|
|
578
|
+
if is_python_target and not (local or remote):
|
|
579
|
+
# Python file/package mode - use implicit MCP server
|
|
580
|
+
import asyncio
|
|
581
|
+
|
|
582
|
+
from .utils.package_runner import run_package_as_mcp, run_with_reload
|
|
583
|
+
|
|
584
|
+
if reload:
|
|
585
|
+
# Run with watchfiles reload
|
|
586
|
+
# Use user-provided watch paths or compute from module
|
|
587
|
+
if watch:
|
|
588
|
+
watch_paths = watch
|
|
589
|
+
else:
|
|
590
|
+
# Compute a watch path that works for dotted modules as well
|
|
591
|
+
watch_paths = [target]
|
|
592
|
+
if spec is not None:
|
|
593
|
+
origin = getattr(spec, "origin", None)
|
|
594
|
+
sublocs = getattr(spec, "submodule_search_locations", None)
|
|
595
|
+
if origin:
|
|
596
|
+
p = _Path(origin)
|
|
597
|
+
# If package __init__.py, watch the package directory
|
|
598
|
+
watch_paths = [str(p.parent if p.name == "__init__.py" else p)]
|
|
599
|
+
elif sublocs:
|
|
600
|
+
with contextlib.suppress(Exception):
|
|
601
|
+
watch_paths = [next(iter(sublocs))]
|
|
602
|
+
|
|
603
|
+
# Always run as subprocess when using reload to enable proper file watching
|
|
604
|
+
# This ensures the parent process can watch files while the child runs the server
|
|
605
|
+
run_with_reload(
|
|
606
|
+
None, # This forces subprocess mode for both stdio and http
|
|
607
|
+
watch_paths,
|
|
608
|
+
verbose=verbose,
|
|
609
|
+
)
|
|
610
|
+
else:
|
|
611
|
+
# Run normally (but still pass reload=False for consistency)
|
|
612
|
+
asyncio.run(
|
|
613
|
+
run_package_as_mcp(
|
|
614
|
+
target,
|
|
615
|
+
transport=transport,
|
|
616
|
+
port=port,
|
|
617
|
+
verbose=verbose,
|
|
618
|
+
server_attr=server_attr,
|
|
619
|
+
reload=False, # Explicitly pass reload state
|
|
620
|
+
watch_paths=None,
|
|
621
|
+
)
|
|
622
|
+
)
|
|
623
|
+
return
|
|
624
|
+
|
|
625
|
+
# Docker image mode
|
|
626
|
+
image = first_param
|
|
627
|
+
docker_args = extra_args
|
|
503
628
|
|
|
504
629
|
# Handle conflicting flags
|
|
505
630
|
if local and remote:
|
|
@@ -741,6 +866,12 @@ def remove(
|
|
|
741
866
|
@app.command()
|
|
742
867
|
def init(
|
|
743
868
|
name: str = typer.Argument(None, help="Environment name (default: current directory name)"),
|
|
869
|
+
preset: str | None = typer.Option(
|
|
870
|
+
None,
|
|
871
|
+
"--preset",
|
|
872
|
+
"-p",
|
|
873
|
+
help="Preset to use: blank, deep-research, browser. If omitted, you'll choose interactively.", # noqa: E501
|
|
874
|
+
),
|
|
744
875
|
directory: str = typer.Option(".", "--dir", "-d", help="Target directory"),
|
|
745
876
|
force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing files"),
|
|
746
877
|
) -> None:
|
|
@@ -757,7 +888,7 @@ def init(
|
|
|
757
888
|
hud init my-env # Create in ./my-env/
|
|
758
889
|
hud init my-env --dir /tmp # Create in /tmp/my-env/
|
|
759
890
|
"""
|
|
760
|
-
create_environment(name, directory, force)
|
|
891
|
+
create_environment(name, directory, force, preset)
|
|
761
892
|
|
|
762
893
|
|
|
763
894
|
@app.command()
|
|
@@ -774,7 +905,7 @@ def eval(
|
|
|
774
905
|
source: str | None = typer.Argument(
|
|
775
906
|
None,
|
|
776
907
|
help=(
|
|
777
|
-
"HuggingFace dataset
|
|
908
|
+
"HuggingFace dataset (e.g. 'hud-evals/SheetBench-50') or task JSON file. "
|
|
778
909
|
"If not provided, looks for task.json in current directory."
|
|
779
910
|
),
|
|
780
911
|
),
|
|
@@ -846,54 +977,21 @@ def eval(
|
|
|
846
977
|
|
|
847
978
|
hud_console = HUDConsole()
|
|
848
979
|
|
|
849
|
-
# If no source provided,
|
|
980
|
+
# If no source provided, reuse RL helper to find a tasks file interactively
|
|
850
981
|
if source is None:
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
"
|
|
855
|
-
"
|
|
856
|
-
|
|
857
|
-
"*Eval*.json",
|
|
858
|
-
"*TASK*.json",
|
|
859
|
-
"*EVAL*.json",
|
|
860
|
-
]
|
|
861
|
-
|
|
862
|
-
# First check current directory
|
|
863
|
-
for pattern in patterns:
|
|
864
|
-
json_files.extend(Path(".").glob(pattern))
|
|
865
|
-
|
|
866
|
-
# If no files found, search recursively (but limit depth to avoid deep searches)
|
|
867
|
-
if not json_files:
|
|
868
|
-
for pattern in patterns:
|
|
869
|
-
# Search up to 2 levels deep
|
|
870
|
-
json_files.extend(Path(".").glob(f"*/{pattern}"))
|
|
871
|
-
json_files.extend(Path(".").glob(f"*/*/{pattern}"))
|
|
872
|
-
|
|
873
|
-
# Remove duplicates and sort
|
|
874
|
-
json_files = sorted(set(json_files))
|
|
875
|
-
|
|
876
|
-
if not json_files:
|
|
982
|
+
try:
|
|
983
|
+
from hud.cli.utils.tasks import find_tasks_file
|
|
984
|
+
|
|
985
|
+
source = find_tasks_file(None, msg="Select a tasks file to run")
|
|
986
|
+
hud_console.success(f"Selected: {source}")
|
|
987
|
+
except Exception as e:
|
|
877
988
|
hud_console.error(
|
|
878
989
|
"No source provided and no task/eval JSON files found in current directory"
|
|
879
990
|
)
|
|
880
991
|
hud_console.info(
|
|
881
|
-
"Usage: hud eval <source> or create a task JSON file "
|
|
882
|
-
"(e.g., task.json, eval_config.json)"
|
|
883
|
-
)
|
|
884
|
-
raise typer.Exit(1)
|
|
885
|
-
elif len(json_files) == 1:
|
|
886
|
-
source = str(json_files[0])
|
|
887
|
-
hud_console.info(f"Found task file: {source}")
|
|
888
|
-
else:
|
|
889
|
-
# Multiple files found, let user choose
|
|
890
|
-
hud_console.info("Multiple task files found:")
|
|
891
|
-
file_choice = hud_console.select(
|
|
892
|
-
"Select a task file to run:",
|
|
893
|
-
choices=[str(f) for f in json_files],
|
|
992
|
+
"Usage: hud eval <source> or create a task JSON file (e.g., task.json, tasks.jsonl)"
|
|
894
993
|
)
|
|
895
|
-
|
|
896
|
-
hud_console.success(f"Selected: {source}")
|
|
994
|
+
raise typer.Exit(1) from e
|
|
897
995
|
|
|
898
996
|
# Import eval_command lazily to avoid importing agent dependencies
|
|
899
997
|
try:
|
|
@@ -1085,6 +1183,42 @@ def rl(
|
|
|
1085
1183
|
)
|
|
1086
1184
|
|
|
1087
1185
|
|
|
1186
|
+
@app.command()
|
|
1187
|
+
def set(
|
|
1188
|
+
assignments: list[str] = typer.Argument( # type: ignore[arg-type] # noqa: B008
|
|
1189
|
+
..., help="One or more KEY=VALUE pairs to persist in ~/.hud/.env"
|
|
1190
|
+
),
|
|
1191
|
+
) -> None:
|
|
1192
|
+
"""Persist API keys or other variables for HUD to use by default.
|
|
1193
|
+
|
|
1194
|
+
Examples:
|
|
1195
|
+
hud set ANTHROPIC_API_KEY=sk-... OPENAI_API_KEY=sk-...
|
|
1196
|
+
|
|
1197
|
+
Values are stored in ~/.hud/.env and are loaded by hud.settings with
|
|
1198
|
+
the lowest precedence (overridden by process env and project .env).
|
|
1199
|
+
"""
|
|
1200
|
+
from hud.utils.hud_console import HUDConsole
|
|
1201
|
+
|
|
1202
|
+
hud_console = HUDConsole()
|
|
1203
|
+
|
|
1204
|
+
updates: dict[str, str] = {}
|
|
1205
|
+
for item in assignments:
|
|
1206
|
+
if "=" not in item:
|
|
1207
|
+
hud_console.error(f"Invalid assignment (expected KEY=VALUE): {item}")
|
|
1208
|
+
raise typer.Exit(1)
|
|
1209
|
+
key, value = item.split("=", 1)
|
|
1210
|
+
key = key.strip()
|
|
1211
|
+
value = value.strip()
|
|
1212
|
+
if not key:
|
|
1213
|
+
hud_console.error(f"Invalid key in assignment: {item}")
|
|
1214
|
+
raise typer.Exit(1)
|
|
1215
|
+
updates[key] = value
|
|
1216
|
+
|
|
1217
|
+
path = set_env_values(updates)
|
|
1218
|
+
hud_console.success("Saved credentials to user config")
|
|
1219
|
+
hud_console.info(f"Location: {path}")
|
|
1220
|
+
|
|
1221
|
+
|
|
1088
1222
|
def main() -> None:
|
|
1089
1223
|
"""Main entry point for the CLI."""
|
|
1090
1224
|
# Handle --version flag before Typer parses args
|
hud/cli/build.py
CHANGED
|
@@ -236,10 +236,13 @@ def build_docker_image(
|
|
|
236
236
|
hud_console.error(f"No Dockerfile found in {directory}")
|
|
237
237
|
return False
|
|
238
238
|
|
|
239
|
+
# Default platform to match RL pipeline unless explicitly overridden
|
|
240
|
+
effective_platform = platform if platform is not None else "linux/amd64"
|
|
241
|
+
|
|
239
242
|
# Build command
|
|
240
243
|
cmd = ["docker", "build"]
|
|
241
|
-
if
|
|
242
|
-
cmd.extend(["--platform",
|
|
244
|
+
if effective_platform:
|
|
245
|
+
cmd.extend(["--platform", effective_platform])
|
|
243
246
|
cmd.extend(["-t", tag])
|
|
244
247
|
if no_cache:
|
|
245
248
|
cmd.append("--no-cache")
|
|
@@ -437,8 +440,10 @@ def build_environment(
|
|
|
437
440
|
version_tag = f"{base_name}:{new_version}"
|
|
438
441
|
|
|
439
442
|
label_cmd = ["docker", "build"]
|
|
440
|
-
|
|
441
|
-
|
|
443
|
+
# Use same defaulting for the second build step
|
|
444
|
+
label_platform = platform if platform is not None else "linux/amd64"
|
|
445
|
+
if label_platform:
|
|
446
|
+
label_cmd.extend(["--platform", label_platform])
|
|
442
447
|
label_cmd.extend(
|
|
443
448
|
[
|
|
444
449
|
"--label",
|
|
@@ -489,7 +494,7 @@ def build_environment(
|
|
|
489
494
|
hud_console.warning("Could not retrieve image ID for lock file")
|
|
490
495
|
|
|
491
496
|
# Remove temp image after we're done
|
|
492
|
-
subprocess.run(["docker", "rmi", temp_tag], capture_output=True) # noqa: S603, S607
|
|
497
|
+
subprocess.run(["docker", "rmi", "-f", temp_tag], capture_output=True) # noqa: S603, S607
|
|
493
498
|
|
|
494
499
|
# Add to local registry
|
|
495
500
|
if image_id:
|