hud-python 0.5.1__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +1 -1
- hud/agents/__init__.py +65 -6
- hud/agents/base.py +33 -15
- hud/agents/claude.py +60 -31
- hud/agents/gateway.py +42 -0
- hud/agents/gemini.py +15 -26
- hud/agents/gemini_cua.py +6 -17
- hud/agents/misc/response_agent.py +7 -0
- hud/agents/openai.py +16 -29
- hud/agents/openai_chat.py +3 -19
- hud/agents/operator.py +5 -17
- hud/agents/resolver.py +70 -0
- hud/agents/tests/test_claude.py +2 -4
- hud/agents/tests/test_openai.py +2 -1
- hud/agents/tests/test_resolver.py +192 -0
- hud/agents/types.py +148 -0
- hud/cli/__init__.py +34 -3
- hud/cli/build.py +37 -5
- hud/cli/dev.py +11 -2
- hud/cli/eval.py +51 -39
- hud/cli/flows/init.py +1 -1
- hud/cli/pull.py +1 -1
- hud/cli/push.py +9 -2
- hud/cli/tests/test_build.py +2 -2
- hud/cli/tests/test_push.py +1 -1
- hud/cli/utils/metadata.py +1 -1
- hud/cli/utils/tests/test_metadata.py +1 -1
- hud/clients/mcp_use.py +6 -1
- hud/datasets/loader.py +17 -18
- hud/datasets/runner.py +16 -10
- hud/datasets/tests/test_loader.py +15 -15
- hud/environment/__init__.py +5 -3
- hud/environment/connection.py +58 -6
- hud/environment/connectors/mcp_config.py +29 -1
- hud/environment/environment.py +218 -77
- hud/environment/router.py +175 -24
- hud/environment/scenarios.py +313 -186
- hud/environment/tests/test_connectors.py +10 -23
- hud/environment/tests/test_environment.py +432 -0
- hud/environment/tests/test_local_connectors.py +81 -40
- hud/environment/tests/test_scenarios.py +820 -14
- hud/eval/context.py +63 -10
- hud/eval/instrument.py +4 -2
- hud/eval/manager.py +79 -12
- hud/eval/task.py +36 -4
- hud/eval/tests/test_eval.py +1 -1
- hud/eval/tests/test_task.py +147 -1
- hud/eval/types.py +2 -0
- hud/eval/utils.py +14 -3
- hud/patches/mcp_patches.py +178 -21
- hud/telemetry/instrument.py +8 -1
- hud/telemetry/tests/test_eval_telemetry.py +8 -8
- hud/tools/__init__.py +2 -0
- hud/tools/agent.py +223 -0
- hud/tools/computer/__init__.py +34 -5
- hud/tools/shell.py +3 -3
- hud/tools/tests/test_agent_tool.py +355 -0
- hud/types.py +62 -34
- hud/utils/hud_console.py +30 -17
- hud/utils/strict_schema.py +1 -1
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/METADATA +2 -2
- {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/RECORD +67 -61
- {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/WHEEL +0 -0
- {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
- {hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
hud/agents/operator.py
CHANGED
|
@@ -17,14 +17,14 @@ from openai.types.responses.response_input_param import (
|
|
|
17
17
|
FunctionCallOutput,
|
|
18
18
|
)
|
|
19
19
|
from openai.types.shared_params.reasoning import Reasoning
|
|
20
|
-
from pydantic import ConfigDict
|
|
21
20
|
|
|
22
21
|
from hud.tools.computer.settings import computer_settings
|
|
23
22
|
from hud.types import BaseAgentConfig, MCPToolCall, MCPToolResult
|
|
24
23
|
from hud.utils.types import with_signature
|
|
25
24
|
|
|
26
|
-
from .base import
|
|
27
|
-
from .openai import OpenAIAgent
|
|
25
|
+
from .base import MCPAgent
|
|
26
|
+
from .openai import OpenAIAgent
|
|
27
|
+
from .types import OperatorConfig, OperatorCreateParams
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
30
30
|
from openai.types.responses.response_computer_tool_call import PendingSafetyCheck
|
|
@@ -50,20 +50,6 @@ what they asked.
|
|
|
50
50
|
""".strip()
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
class OperatorConfig(OpenAIConfig):
|
|
54
|
-
"""Configuration model for `OperatorAgent`."""
|
|
55
|
-
|
|
56
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
57
|
-
|
|
58
|
-
model_name: str = "Operator"
|
|
59
|
-
model: str = "computer-use-preview"
|
|
60
|
-
environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = "linux"
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class OperatorCreateParams(BaseCreateParams, OperatorConfig):
|
|
64
|
-
pass
|
|
65
|
-
|
|
66
|
-
|
|
67
53
|
class OperatorAgent(OpenAIAgent):
|
|
68
54
|
"""
|
|
69
55
|
Backwards-compatible Operator agent built on top of OpenAIAgent.
|
|
@@ -129,6 +115,8 @@ class OperatorAgent(OpenAIAgent):
|
|
|
129
115
|
display_height=self._operator_display_height,
|
|
130
116
|
environment=self._operator_environment,
|
|
131
117
|
)
|
|
118
|
+
if tool.name == "computer" or tool.name.endswith("_computer"):
|
|
119
|
+
return None
|
|
132
120
|
return super()._to_openai_tool(tool)
|
|
133
121
|
|
|
134
122
|
def _extract_tool_call(self, item: Any) -> MCPToolCall | None:
|
hud/agents/resolver.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Model resolution - maps model strings to agent classes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from hud.agents.base import MCPAgent
|
|
9
|
+
|
|
10
|
+
__all__ = ["resolve_cls"]
|
|
11
|
+
|
|
12
|
+
_models_cache: list[dict[str, Any]] | None = None
|
|
13
|
+
|
|
14
|
+
# Provider name → AgentType value (only anthropic differs)
|
|
15
|
+
_PROVIDER_TO_AGENT = {"anthropic": "claude"}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _fetch_gateway_models() -> list[dict[str, Any]]:
|
|
19
|
+
"""Fetch available models from HUD gateway (cached)."""
|
|
20
|
+
global _models_cache
|
|
21
|
+
if _models_cache is not None:
|
|
22
|
+
return _models_cache
|
|
23
|
+
|
|
24
|
+
import httpx
|
|
25
|
+
|
|
26
|
+
from hud.settings import settings
|
|
27
|
+
|
|
28
|
+
if not settings.api_key:
|
|
29
|
+
return []
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
resp = httpx.get(
|
|
33
|
+
f"{settings.hud_gateway_url}/models",
|
|
34
|
+
headers={"Authorization": f"Bearer {settings.api_key}"},
|
|
35
|
+
timeout=10.0,
|
|
36
|
+
)
|
|
37
|
+
resp.raise_for_status()
|
|
38
|
+
data = resp.json()
|
|
39
|
+
_models_cache = data.get("data", data) if isinstance(data, dict) else data
|
|
40
|
+
return _models_cache or []
|
|
41
|
+
except Exception:
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def resolve_cls(model: str) -> tuple[type[MCPAgent], dict[str, Any] | None]:
|
|
46
|
+
"""Resolve model string to (agent_class, gateway_info).
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
(agent_class, None) for known AgentTypes
|
|
50
|
+
(agent_class, gateway_model_info) for gateway models
|
|
51
|
+
"""
|
|
52
|
+
from hud.types import AgentType
|
|
53
|
+
|
|
54
|
+
# Known AgentType → no gateway info
|
|
55
|
+
try:
|
|
56
|
+
return AgentType(model).cls, None
|
|
57
|
+
except ValueError:
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
# Gateway lookup
|
|
61
|
+
for m in _fetch_gateway_models():
|
|
62
|
+
if model in (m.get("id"), m.get("name"), m.get("model")):
|
|
63
|
+
provider = (m.get("provider") or "openai_compatible").lower()
|
|
64
|
+
agent_str = _PROVIDER_TO_AGENT.get(provider, provider)
|
|
65
|
+
try:
|
|
66
|
+
return AgentType(agent_str).cls, m
|
|
67
|
+
except ValueError:
|
|
68
|
+
return AgentType.OPENAI_COMPATIBLE.cls, m
|
|
69
|
+
|
|
70
|
+
raise ValueError(f"Model '{model}' not found")
|
hud/agents/tests/test_claude.py
CHANGED
|
@@ -22,7 +22,7 @@ from hud.types import MCPToolCall, MCPToolResult
|
|
|
22
22
|
if TYPE_CHECKING:
|
|
23
23
|
from collections.abc import Generator
|
|
24
24
|
|
|
25
|
-
from anthropic.types.beta import
|
|
25
|
+
from anthropic.types.beta import BetaMessageParam
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class MockEvalContext(EvalContext):
|
|
@@ -123,9 +123,7 @@ class TestClaudeHelperFunctions:
|
|
|
123
123
|
def test_tool_use_content_block(self) -> None:
|
|
124
124
|
"""Test tool result content block creation."""
|
|
125
125
|
tool_use_id = "tool_123"
|
|
126
|
-
content
|
|
127
|
-
text_to_content_block("Result text")
|
|
128
|
-
]
|
|
126
|
+
content = [text_to_content_block("Result text")]
|
|
129
127
|
|
|
130
128
|
result = tool_use_content_block(tool_use_id, content)
|
|
131
129
|
|
hud/agents/tests/test_openai.py
CHANGED
|
@@ -128,8 +128,9 @@ class TestOpenAIAgent:
|
|
|
128
128
|
async def test_init_without_client_no_api_key(self) -> None:
|
|
129
129
|
"""Test agent initialization fails without API key."""
|
|
130
130
|
with patch("hud.agents.openai.settings") as mock_settings:
|
|
131
|
+
mock_settings.api_key = None
|
|
131
132
|
mock_settings.openai_api_key = None
|
|
132
|
-
with pytest.raises(ValueError, match="
|
|
133
|
+
with pytest.raises(ValueError, match="No API key found"):
|
|
133
134
|
OpenAIAgent.create()
|
|
134
135
|
|
|
135
136
|
@pytest.mark.asyncio
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Tests for model resolution and create_agent."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from unittest.mock import MagicMock, patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from hud.agents import create_agent
|
|
10
|
+
from hud.agents.resolver import resolve_cls
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestResolveCls:
|
|
14
|
+
"""Tests for resolve_cls function."""
|
|
15
|
+
|
|
16
|
+
def test_resolves_known_agent_type(self) -> None:
|
|
17
|
+
"""Known AgentType strings resolve to their class."""
|
|
18
|
+
from hud.agents.claude import ClaudeAgent
|
|
19
|
+
|
|
20
|
+
cls, gateway_info = resolve_cls("claude")
|
|
21
|
+
assert cls == ClaudeAgent
|
|
22
|
+
assert gateway_info is None
|
|
23
|
+
|
|
24
|
+
def test_resolves_openai(self) -> None:
|
|
25
|
+
"""Resolves 'openai' to OpenAIAgent."""
|
|
26
|
+
from hud.agents import OpenAIAgent
|
|
27
|
+
|
|
28
|
+
cls, _gateway_info = resolve_cls("openai")
|
|
29
|
+
assert cls == OpenAIAgent
|
|
30
|
+
|
|
31
|
+
def test_resolves_gemini(self) -> None:
|
|
32
|
+
"""Resolves 'gemini' to GeminiAgent."""
|
|
33
|
+
from hud.agents.gemini import GeminiAgent
|
|
34
|
+
|
|
35
|
+
cls, _gateway_info = resolve_cls("gemini")
|
|
36
|
+
assert cls == GeminiAgent
|
|
37
|
+
|
|
38
|
+
def test_unknown_model_without_gateway_raises(self) -> None:
|
|
39
|
+
"""Unknown model with no gateway models raises ValueError."""
|
|
40
|
+
with (
|
|
41
|
+
patch("hud.agents.resolver._fetch_gateway_models", return_value=[]),
|
|
42
|
+
pytest.raises(ValueError, match="not found"),
|
|
43
|
+
):
|
|
44
|
+
resolve_cls("unknown-model-xyz")
|
|
45
|
+
|
|
46
|
+
def test_resolves_gateway_model(self) -> None:
|
|
47
|
+
"""Resolves model found in gateway."""
|
|
48
|
+
from hud.agents import OpenAIAgent
|
|
49
|
+
|
|
50
|
+
mock_models = [
|
|
51
|
+
{"id": "gpt-4o", "model": "gpt-4o", "provider": "openai"},
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
with patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models):
|
|
55
|
+
cls, info = resolve_cls("gpt-4o")
|
|
56
|
+
assert cls == OpenAIAgent
|
|
57
|
+
assert info is not None
|
|
58
|
+
assert info["id"] == "gpt-4o"
|
|
59
|
+
|
|
60
|
+
def test_resolves_anthropic_provider_to_claude(self) -> None:
|
|
61
|
+
"""Provider 'anthropic' maps to ClaudeAgent."""
|
|
62
|
+
from hud.agents.claude import ClaudeAgent
|
|
63
|
+
|
|
64
|
+
mock_models = [
|
|
65
|
+
{"id": "claude-sonnet", "model": "claude-3-sonnet", "provider": "anthropic"},
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
with patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models):
|
|
69
|
+
cls, _info = resolve_cls("claude-sonnet")
|
|
70
|
+
assert cls == ClaudeAgent
|
|
71
|
+
|
|
72
|
+
def test_resolves_unknown_provider_to_openai_compatible(self) -> None:
|
|
73
|
+
"""Unknown provider maps to OpenAIChatAgent."""
|
|
74
|
+
from hud.agents.openai_chat import OpenAIChatAgent
|
|
75
|
+
|
|
76
|
+
mock_models = [
|
|
77
|
+
{"id": "custom-model", "model": "custom", "provider": "custom-provider"},
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
with patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models):
|
|
81
|
+
cls, _info = resolve_cls("custom-model")
|
|
82
|
+
assert cls == OpenAIChatAgent
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class TestCreateAgent:
|
|
86
|
+
"""Tests for create_agent function - gateway-only."""
|
|
87
|
+
|
|
88
|
+
def test_creates_with_gateway_client(self) -> None:
|
|
89
|
+
"""create_agent always uses gateway routing."""
|
|
90
|
+
from hud.agents import OpenAIAgent
|
|
91
|
+
|
|
92
|
+
mock_models = [
|
|
93
|
+
{"id": "gpt-4o", "model": "gpt-4o", "provider": "openai"},
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
with (
|
|
97
|
+
patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models),
|
|
98
|
+
patch.object(OpenAIAgent, "create") as mock_create,
|
|
99
|
+
patch("hud.agents.gateway.build_gateway_client") as mock_build_client,
|
|
100
|
+
):
|
|
101
|
+
mock_client = MagicMock()
|
|
102
|
+
mock_build_client.return_value = mock_client
|
|
103
|
+
mock_agent = MagicMock()
|
|
104
|
+
mock_create.return_value = mock_agent
|
|
105
|
+
|
|
106
|
+
agent = create_agent("gpt-4o")
|
|
107
|
+
|
|
108
|
+
# Should have set model and model_client
|
|
109
|
+
call_kwargs = mock_create.call_args.kwargs
|
|
110
|
+
assert call_kwargs["model"] == "gpt-4o"
|
|
111
|
+
assert "model_client" in call_kwargs
|
|
112
|
+
assert agent == mock_agent
|
|
113
|
+
|
|
114
|
+
def test_passes_kwargs_to_create(self) -> None:
|
|
115
|
+
"""Extra kwargs are passed to agent.create()."""
|
|
116
|
+
from hud.agents import OpenAIAgent
|
|
117
|
+
|
|
118
|
+
mock_models = [
|
|
119
|
+
{"id": "gpt-4o", "model": "gpt-4o", "provider": "openai"},
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
with (
|
|
123
|
+
patch("hud.agents.resolver._fetch_gateway_models", return_value=mock_models),
|
|
124
|
+
patch.object(OpenAIAgent, "create") as mock_create,
|
|
125
|
+
patch("hud.agents.gateway.build_gateway_client"),
|
|
126
|
+
):
|
|
127
|
+
mock_create.return_value = MagicMock()
|
|
128
|
+
|
|
129
|
+
create_agent("gpt-4o", temperature=0.5, max_tokens=1000)
|
|
130
|
+
|
|
131
|
+
call_kwargs = mock_create.call_args.kwargs
|
|
132
|
+
assert call_kwargs["temperature"] == 0.5
|
|
133
|
+
assert call_kwargs["max_tokens"] == 1000
|
|
134
|
+
|
|
135
|
+
def test_known_agent_type_also_uses_gateway(self) -> None:
|
|
136
|
+
"""Even 'claude' string uses gateway (it's a gateway shortcut)."""
|
|
137
|
+
from hud.agents.claude import ClaudeAgent
|
|
138
|
+
|
|
139
|
+
with (
|
|
140
|
+
patch.object(ClaudeAgent, "create") as mock_create,
|
|
141
|
+
patch("hud.agents.gateway.build_gateway_client") as mock_build_client,
|
|
142
|
+
):
|
|
143
|
+
mock_client = MagicMock()
|
|
144
|
+
mock_build_client.return_value = mock_client
|
|
145
|
+
mock_create.return_value = MagicMock()
|
|
146
|
+
|
|
147
|
+
create_agent("claude")
|
|
148
|
+
|
|
149
|
+
# Should still build gateway client
|
|
150
|
+
mock_build_client.assert_called_once()
|
|
151
|
+
call_kwargs = mock_create.call_args.kwargs
|
|
152
|
+
assert "model_client" in call_kwargs
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class TestBuildGatewayClient:
|
|
156
|
+
"""Tests for build_gateway_client function."""
|
|
157
|
+
|
|
158
|
+
def test_builds_anthropic_client(self) -> None:
|
|
159
|
+
"""Builds AsyncAnthropic for anthropic provider."""
|
|
160
|
+
from hud.agents.gateway import build_gateway_client
|
|
161
|
+
|
|
162
|
+
with patch("hud.settings.settings") as mock_settings:
|
|
163
|
+
mock_settings.api_key = "test-key"
|
|
164
|
+
mock_settings.hud_gateway_url = "https://gateway.hud.ai"
|
|
165
|
+
|
|
166
|
+
with patch("anthropic.AsyncAnthropic") as mock_client_cls:
|
|
167
|
+
build_gateway_client("anthropic")
|
|
168
|
+
mock_client_cls.assert_called_once()
|
|
169
|
+
|
|
170
|
+
def test_builds_openai_client_for_openai(self) -> None:
|
|
171
|
+
"""Builds AsyncOpenAI for openai provider."""
|
|
172
|
+
from hud.agents.gateway import build_gateway_client
|
|
173
|
+
|
|
174
|
+
with patch("hud.settings.settings") as mock_settings:
|
|
175
|
+
mock_settings.api_key = "test-key"
|
|
176
|
+
mock_settings.hud_gateway_url = "https://gateway.hud.ai"
|
|
177
|
+
|
|
178
|
+
with patch("openai.AsyncOpenAI") as mock_client_cls:
|
|
179
|
+
build_gateway_client("openai")
|
|
180
|
+
mock_client_cls.assert_called_once()
|
|
181
|
+
|
|
182
|
+
def test_builds_openai_client_for_unknown(self) -> None:
|
|
183
|
+
"""Builds AsyncOpenAI for unknown providers (openai-compatible)."""
|
|
184
|
+
from hud.agents.gateway import build_gateway_client
|
|
185
|
+
|
|
186
|
+
with patch("hud.settings.settings") as mock_settings:
|
|
187
|
+
mock_settings.api_key = "test-key"
|
|
188
|
+
mock_settings.hud_gateway_url = "https://gateway.hud.ai"
|
|
189
|
+
|
|
190
|
+
with patch("openai.AsyncOpenAI") as mock_client_cls:
|
|
191
|
+
build_gateway_client("together")
|
|
192
|
+
mock_client_cls.assert_called_once()
|
hud/agents/types.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Agent configuration types.
|
|
2
|
+
|
|
3
|
+
Config classes are defined here separately from agent implementations
|
|
4
|
+
to allow importing them without requiring SDK dependencies (anthropic, google-genai).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
from pydantic import AliasChoices, BaseModel, ConfigDict, Field
|
|
12
|
+
|
|
13
|
+
from hud.types import BaseAgentConfig
|
|
14
|
+
|
|
15
|
+
# Alias to accept both 'model' and 'checkpoint_name' (backwards compat)
|
|
16
|
+
_model_alias = AliasChoices("model", "checkpoint_name")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BaseCreateParams(BaseModel):
|
|
20
|
+
"""Runtime parameters for agent creation."""
|
|
21
|
+
|
|
22
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
23
|
+
|
|
24
|
+
ctx: Any = None # EvalContext or Environment
|
|
25
|
+
auto_respond: bool = False
|
|
26
|
+
verbose: bool = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# -----------------------------------------------------------------------------
|
|
30
|
+
# Claude
|
|
31
|
+
# -----------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ClaudeConfig(BaseAgentConfig):
|
|
35
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
36
|
+
|
|
37
|
+
model_name: str = "Claude"
|
|
38
|
+
model: str = Field(default="claude-sonnet-4-5", validation_alias=_model_alias)
|
|
39
|
+
model_client: Any = None # AsyncAnthropic | AsyncAnthropicBedrock
|
|
40
|
+
max_tokens: int = 16384
|
|
41
|
+
use_computer_beta: bool = True
|
|
42
|
+
validate_api_key: bool = True
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ClaudeCreateParams(BaseCreateParams, ClaudeConfig):
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# -----------------------------------------------------------------------------
|
|
50
|
+
# Gemini
|
|
51
|
+
# -----------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class GeminiConfig(BaseAgentConfig):
|
|
55
|
+
"""Configuration for GeminiAgent."""
|
|
56
|
+
|
|
57
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
58
|
+
|
|
59
|
+
model_name: str = "Gemini"
|
|
60
|
+
model: str = Field(default="gemini-3-pro-preview", validation_alias=_model_alias)
|
|
61
|
+
model_client: Any = None # genai.Client
|
|
62
|
+
temperature: float = 1.0
|
|
63
|
+
top_p: float = 0.95
|
|
64
|
+
top_k: int = 40
|
|
65
|
+
max_output_tokens: int = 8192
|
|
66
|
+
validate_api_key: bool = True
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class GeminiCreateParams(BaseCreateParams, GeminiConfig):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GeminiCUAConfig(GeminiConfig):
|
|
74
|
+
"""Configuration for GeminiCUAAgent."""
|
|
75
|
+
|
|
76
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
77
|
+
|
|
78
|
+
model_name: str = "GeminiCUA"
|
|
79
|
+
model: str = Field(
|
|
80
|
+
default="gemini-2.5-computer-use-preview-10-2025", validation_alias=_model_alias
|
|
81
|
+
)
|
|
82
|
+
excluded_predefined_functions: list[str] = Field(default_factory=list)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class GeminiCUACreateParams(BaseCreateParams, GeminiCUAConfig):
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# -----------------------------------------------------------------------------
|
|
90
|
+
# OpenAI
|
|
91
|
+
# -----------------------------------------------------------------------------
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class OpenAIConfig(BaseAgentConfig):
|
|
95
|
+
"""Configuration for OpenAIAgent."""
|
|
96
|
+
|
|
97
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
98
|
+
|
|
99
|
+
model_name: str = "OpenAI"
|
|
100
|
+
model: str = Field(default="gpt-5.1", validation_alias=_model_alias)
|
|
101
|
+
model_client: Any = None # AsyncOpenAI
|
|
102
|
+
max_output_tokens: int | None = None
|
|
103
|
+
temperature: float | None = None
|
|
104
|
+
reasoning: Any = None # openai Reasoning
|
|
105
|
+
tool_choice: Any = None # openai ToolChoice
|
|
106
|
+
truncation: Literal["auto", "disabled"] | None = None
|
|
107
|
+
parallel_tool_calls: bool | None = None
|
|
108
|
+
validate_api_key: bool = True
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class OpenAICreateParams(BaseCreateParams, OpenAIConfig):
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class OpenAIChatConfig(BaseAgentConfig):
|
|
116
|
+
"""Configuration for OpenAIChatAgent."""
|
|
117
|
+
|
|
118
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
119
|
+
|
|
120
|
+
model_name: str = "OpenAI Chat"
|
|
121
|
+
model: str = Field(default="gpt-5-mini", validation_alias=_model_alias)
|
|
122
|
+
openai_client: Any = None # AsyncOpenAI
|
|
123
|
+
api_key: str | None = None
|
|
124
|
+
base_url: str | None = None
|
|
125
|
+
completion_kwargs: dict[str, Any] = Field(default_factory=dict)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# -----------------------------------------------------------------------------
|
|
133
|
+
# Operator
|
|
134
|
+
# -----------------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class OperatorConfig(OpenAIConfig):
|
|
138
|
+
"""Configuration for OperatorAgent."""
|
|
139
|
+
|
|
140
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
141
|
+
|
|
142
|
+
model_name: str = "Operator"
|
|
143
|
+
model: str = Field(default="computer-use-preview", validation_alias=_model_alias)
|
|
144
|
+
environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = "linux"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class OperatorCreateParams(BaseCreateParams, OperatorConfig):
|
|
148
|
+
pass
|
hud/cli/__init__.py
CHANGED
|
@@ -741,6 +741,11 @@ def build(
|
|
|
741
741
|
platform: str | None = typer.Option(
|
|
742
742
|
None, "--platform", help="Set Docker target platform (e.g., linux/amd64)"
|
|
743
743
|
),
|
|
744
|
+
secrets: list[str] | None = typer.Option( # noqa: B008
|
|
745
|
+
None,
|
|
746
|
+
"--secret",
|
|
747
|
+
help=("Docker build secret (repeatable), e.g. --secret id=GITHUB_TOKEN,env=GITHUB_TOKEN"),
|
|
748
|
+
),
|
|
744
749
|
remote_cache: str | None = typer.Option(
|
|
745
750
|
None, "--remote-cache", help="Enable remote cache using Amazon ECR with specified repo name"
|
|
746
751
|
),
|
|
@@ -757,7 +762,8 @@ def build(
|
|
|
757
762
|
hud build environments/text_2048 -e API_KEY=secret
|
|
758
763
|
hud build . --tag my-env:v1.0 -e VAR1=value1 -e VAR2=value2
|
|
759
764
|
hud build . --no-cache # Force rebuild
|
|
760
|
-
hud build . --remote-cache my-cache-repo # Use ECR remote cache (requires AWS_ACCOUNT_ID and AWS_DEFAULT_REGION)
|
|
765
|
+
hud build . --remote-cache my-cache-repo # Use ECR remote cache (requires AWS_ACCOUNT_ID and AWS_DEFAULT_REGION)
|
|
766
|
+
hud build . --build-arg NODE_ENV=production # Pass Docker build args[/not dim]
|
|
761
767
|
""" # noqa: E501
|
|
762
768
|
# Parse directory and extra arguments
|
|
763
769
|
if params:
|
|
@@ -767,8 +773,9 @@ def build(
|
|
|
767
773
|
directory = "."
|
|
768
774
|
extra_args = []
|
|
769
775
|
|
|
770
|
-
# Parse environment variables from extra args
|
|
776
|
+
# Parse environment variables and build args from extra args
|
|
771
777
|
env_vars = {}
|
|
778
|
+
build_args = {}
|
|
772
779
|
i = 0
|
|
773
780
|
while i < len(extra_args):
|
|
774
781
|
if extra_args[i] == "-e" and i + 1 < len(extra_args):
|
|
@@ -792,10 +799,34 @@ def build(
|
|
|
792
799
|
key, value = env_arg.split("=", 1)
|
|
793
800
|
env_vars[key] = value
|
|
794
801
|
i += 2
|
|
802
|
+
elif extra_args[i] == "--build-arg" and i + 1 < len(extra_args):
|
|
803
|
+
# Parse --build-arg KEY=VALUE format
|
|
804
|
+
build_arg = extra_args[i + 1]
|
|
805
|
+
if "=" in build_arg:
|
|
806
|
+
key, value = build_arg.split("=", 1)
|
|
807
|
+
build_args[key] = value
|
|
808
|
+
i += 2
|
|
809
|
+
elif extra_args[i].startswith("--build-arg="):
|
|
810
|
+
# Parse --build-arg=KEY=VALUE format
|
|
811
|
+
build_arg = extra_args[i][12:] # Remove --build-arg=
|
|
812
|
+
if "=" in build_arg:
|
|
813
|
+
key, value = build_arg.split("=", 1)
|
|
814
|
+
build_args[key] = value
|
|
815
|
+
i += 1
|
|
795
816
|
else:
|
|
796
817
|
i += 1
|
|
797
818
|
|
|
798
|
-
build_command(
|
|
819
|
+
build_command(
|
|
820
|
+
directory,
|
|
821
|
+
tag,
|
|
822
|
+
no_cache,
|
|
823
|
+
verbose,
|
|
824
|
+
env_vars,
|
|
825
|
+
platform,
|
|
826
|
+
secrets,
|
|
827
|
+
remote_cache,
|
|
828
|
+
build_args or None,
|
|
829
|
+
)
|
|
799
830
|
|
|
800
831
|
|
|
801
832
|
@app.command()
|
hud/cli/build.py
CHANGED
|
@@ -555,11 +555,13 @@ def build_docker_image(
|
|
|
555
555
|
verbose: bool = False,
|
|
556
556
|
build_args: dict[str, str] | None = None,
|
|
557
557
|
platform: str | None = None,
|
|
558
|
+
secrets: list[str] | None = None,
|
|
558
559
|
remote_cache: str | None = None,
|
|
559
560
|
) -> bool:
|
|
560
561
|
"""Build a Docker image from a directory."""
|
|
561
562
|
hud_console = HUDConsole()
|
|
562
563
|
build_args = build_args or {}
|
|
564
|
+
secrets = secrets or []
|
|
563
565
|
|
|
564
566
|
# Check if Dockerfile exists (prefer Dockerfile.hud)
|
|
565
567
|
dockerfile = find_dockerfile(directory)
|
|
@@ -629,6 +631,10 @@ def build_docker_image(
|
|
|
629
631
|
for key, value in build_args.items():
|
|
630
632
|
cmd.extend(["--build-arg", f"{key}={value}"])
|
|
631
633
|
|
|
634
|
+
# Add secrets
|
|
635
|
+
for secret in secrets:
|
|
636
|
+
cmd.extend(["--secret", secret])
|
|
637
|
+
|
|
632
638
|
cmd.append(str(directory))
|
|
633
639
|
|
|
634
640
|
# Always show build output
|
|
@@ -636,7 +642,10 @@ def build_docker_image(
|
|
|
636
642
|
|
|
637
643
|
try:
|
|
638
644
|
# Use Docker's native output formatting - no capture, let Docker handle display
|
|
639
|
-
|
|
645
|
+
env = os.environ.copy()
|
|
646
|
+
if secrets:
|
|
647
|
+
env["DOCKER_BUILDKIT"] = "1"
|
|
648
|
+
result = subprocess.run(cmd, check=False, env=env) # noqa: S603
|
|
640
649
|
return result.returncode == 0
|
|
641
650
|
except Exception as e:
|
|
642
651
|
hud_console.error(f"Build error: {e}")
|
|
@@ -650,11 +659,14 @@ def build_environment(
|
|
|
650
659
|
verbose: bool = False,
|
|
651
660
|
env_vars: dict[str, str] | None = None,
|
|
652
661
|
platform: str | None = None,
|
|
662
|
+
secrets: list[str] | None = None,
|
|
653
663
|
remote_cache: str | None = None,
|
|
664
|
+
build_args: dict[str, str] | None = None,
|
|
654
665
|
) -> None:
|
|
655
666
|
"""Build a HUD environment and generate lock file."""
|
|
656
667
|
hud_console = HUDConsole()
|
|
657
668
|
env_vars = env_vars or {}
|
|
669
|
+
build_args = build_args or {}
|
|
658
670
|
hud_console.header("HUD Environment Build")
|
|
659
671
|
|
|
660
672
|
# Resolve directory
|
|
@@ -721,8 +733,9 @@ def build_environment(
|
|
|
721
733
|
temp_tag,
|
|
722
734
|
no_cache,
|
|
723
735
|
verbose,
|
|
724
|
-
build_args=None,
|
|
736
|
+
build_args=build_args or None,
|
|
725
737
|
platform=platform,
|
|
738
|
+
secrets=secrets,
|
|
726
739
|
remote_cache=remote_cache,
|
|
727
740
|
):
|
|
728
741
|
hud_console.error("Docker build failed")
|
|
@@ -1002,16 +1015,27 @@ def build_environment(
|
|
|
1002
1015
|
if image_tag and image_tag not in [version_tag, latest_tag]:
|
|
1003
1016
|
label_cmd.extend(["-t", image_tag])
|
|
1004
1017
|
|
|
1018
|
+
# Add build args to final image build (same as initial build)
|
|
1019
|
+
for key, value in build_args.items():
|
|
1020
|
+
label_cmd.extend(["--build-arg", f"{key}={value}"])
|
|
1021
|
+
|
|
1022
|
+
# Add secrets to final image build (same as initial build)
|
|
1023
|
+
for secret in secrets or []:
|
|
1024
|
+
label_cmd.extend(["--secret", secret])
|
|
1025
|
+
|
|
1005
1026
|
label_cmd.append(str(env_dir))
|
|
1006
1027
|
|
|
1007
1028
|
# Run rebuild using Docker's native output formatting
|
|
1029
|
+
env = os.environ.copy()
|
|
1030
|
+
if secrets:
|
|
1031
|
+
env["DOCKER_BUILDKIT"] = "1"
|
|
1008
1032
|
if verbose:
|
|
1009
1033
|
# Show Docker's native output when verbose
|
|
1010
|
-
result = subprocess.run(label_cmd, check=False) # noqa: S603
|
|
1034
|
+
result = subprocess.run(label_cmd, check=False, env=env) # noqa: S603
|
|
1011
1035
|
else:
|
|
1012
1036
|
# Capture output for error reporting, but don't show unless it fails
|
|
1013
1037
|
result = subprocess.run( # noqa: S603
|
|
1014
|
-
label_cmd, capture_output=True, text=True, check=False
|
|
1038
|
+
label_cmd, capture_output=True, text=True, check=False, env=env
|
|
1015
1039
|
)
|
|
1016
1040
|
|
|
1017
1041
|
if result.returncode != 0:
|
|
@@ -1105,7 +1129,15 @@ def build_command(
|
|
|
1105
1129
|
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed output"),
|
|
1106
1130
|
env_vars: dict[str, str] | None = None,
|
|
1107
1131
|
platform: str | None = None,
|
|
1132
|
+
secrets: list[str] | None = typer.Option( # noqa: B008
|
|
1133
|
+
None,
|
|
1134
|
+
"--secret",
|
|
1135
|
+
help=("Docker build secret (repeatable), e.g. --secret id=GITHUB_TOKEN,env=GITHUB_TOKEN"),
|
|
1136
|
+
),
|
|
1108
1137
|
remote_cache: str | None = None,
|
|
1138
|
+
build_args: dict[str, str] | None = None,
|
|
1109
1139
|
) -> None:
|
|
1110
1140
|
"""Build a HUD environment and generate lock file."""
|
|
1111
|
-
build_environment(
|
|
1141
|
+
build_environment(
|
|
1142
|
+
directory, tag, no_cache, verbose, env_vars, platform, secrets, remote_cache, build_args
|
|
1143
|
+
)
|