hud-python 0.4.58__py3-none-any.whl → 0.4.59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/__init__.py +2 -0
- hud/agents/gemini.py +492 -0
- hud/agents/tests/test_gemini.py +372 -0
- hud/cli/__init__.py +26 -24
- hud/cli/eval.py +57 -1
- hud/cli/tests/test_eval.py +20 -0
- hud/settings.py +6 -0
- hud/tools/__init__.py +13 -2
- hud/tools/computer/__init__.py +2 -0
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/settings.py +21 -0
- hud/tools/playwright.py +9 -1
- hud/tools/types.py +9 -1
- hud/types.py +1 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.58.dist-info → hud_python-0.4.59.dist-info}/METADATA +2 -1
- {hud_python-0.4.58.dist-info → hud_python-0.4.59.dist-info}/RECORD +21 -18
- {hud_python-0.4.58.dist-info → hud_python-0.4.59.dist-info}/WHEEL +0 -0
- {hud_python-0.4.58.dist-info → hud_python-0.4.59.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.58.dist-info → hud_python-0.4.59.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
"""Tests for Gemini MCP Agent implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
from unittest.mock import AsyncMock, MagicMock, patch
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
from google.genai import types as genai_types
|
|
10
|
+
from mcp import types
|
|
11
|
+
|
|
12
|
+
from hud.agents.gemini import GeminiAgent
|
|
13
|
+
from hud.types import MCPToolCall, MCPToolResult
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TestGeminiAgent:
|
|
17
|
+
"""Test GeminiAgent class."""
|
|
18
|
+
|
|
19
|
+
@pytest.fixture
|
|
20
|
+
def mock_mcp_client(self):
|
|
21
|
+
"""Create a mock MCP client."""
|
|
22
|
+
mcp_client = AsyncMock()
|
|
23
|
+
# Set up the mcp_config attribute as a regular dict, not a coroutine
|
|
24
|
+
mcp_client.mcp_config = {"test_server": {"url": "http://test"}}
|
|
25
|
+
# Mock list_tools to return gemini_computer tool
|
|
26
|
+
mcp_client.list_tools = AsyncMock(
|
|
27
|
+
return_value=[
|
|
28
|
+
types.Tool(
|
|
29
|
+
name="gemini_computer",
|
|
30
|
+
description="Gemini computer use tool",
|
|
31
|
+
inputSchema={},
|
|
32
|
+
)
|
|
33
|
+
]
|
|
34
|
+
)
|
|
35
|
+
mcp_client.initialize = AsyncMock()
|
|
36
|
+
return mcp_client
|
|
37
|
+
|
|
38
|
+
@pytest.fixture
|
|
39
|
+
def mock_gemini_client(self):
|
|
40
|
+
"""Create a mock Gemini client."""
|
|
41
|
+
client = MagicMock()
|
|
42
|
+
client.api_key = "test_key"
|
|
43
|
+
# Mock models.list for validation
|
|
44
|
+
client.models = MagicMock()
|
|
45
|
+
client.models.list = MagicMock(return_value=iter([]))
|
|
46
|
+
return client
|
|
47
|
+
|
|
48
|
+
@pytest.mark.asyncio
|
|
49
|
+
async def test_init(self, mock_mcp_client, mock_gemini_client):
|
|
50
|
+
"""Test agent initialization."""
|
|
51
|
+
agent = GeminiAgent(
|
|
52
|
+
mcp_client=mock_mcp_client,
|
|
53
|
+
model_client=mock_gemini_client,
|
|
54
|
+
model="gemini-2.5-computer-use-preview-10-2025",
|
|
55
|
+
validate_api_key=False, # Skip validation in tests
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
assert agent.model_name == "gemini-2.5-computer-use-preview-10-2025"
|
|
59
|
+
assert agent.model == "gemini-2.5-computer-use-preview-10-2025"
|
|
60
|
+
assert agent.gemini_client == mock_gemini_client
|
|
61
|
+
|
|
62
|
+
@pytest.mark.asyncio
|
|
63
|
+
async def test_init_without_model_client(self, mock_mcp_client):
|
|
64
|
+
"""Test agent initialization without model client."""
|
|
65
|
+
with (
|
|
66
|
+
patch("hud.settings.settings.gemini_api_key", "test_key"),
|
|
67
|
+
patch("hud.agents.gemini.genai.Client") as mock_client_class,
|
|
68
|
+
):
|
|
69
|
+
mock_client = MagicMock()
|
|
70
|
+
mock_client.api_key = "test_key"
|
|
71
|
+
mock_client.models = MagicMock()
|
|
72
|
+
mock_client.models.list = MagicMock(return_value=iter([]))
|
|
73
|
+
mock_client_class.return_value = mock_client
|
|
74
|
+
|
|
75
|
+
agent = GeminiAgent(
|
|
76
|
+
mcp_client=mock_mcp_client,
|
|
77
|
+
model="gemini-2.5-computer-use-preview-10-2025",
|
|
78
|
+
validate_api_key=False,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
assert agent.model_name == "gemini-2.5-computer-use-preview-10-2025"
|
|
82
|
+
assert agent.gemini_client is not None
|
|
83
|
+
|
|
84
|
+
@pytest.mark.asyncio
|
|
85
|
+
async def test_format_blocks(self, mock_mcp_client, mock_gemini_client):
|
|
86
|
+
"""Test formatting content blocks into Gemini messages."""
|
|
87
|
+
agent = GeminiAgent(
|
|
88
|
+
mcp_client=mock_mcp_client,
|
|
89
|
+
model_client=mock_gemini_client,
|
|
90
|
+
validate_api_key=False,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Test with text only
|
|
94
|
+
text_blocks: list[types.ContentBlock] = [
|
|
95
|
+
types.TextContent(type="text", text="Hello, Gemini!")
|
|
96
|
+
]
|
|
97
|
+
messages = await agent.format_blocks(text_blocks)
|
|
98
|
+
assert len(messages) == 1
|
|
99
|
+
assert messages[0].role == "user"
|
|
100
|
+
parts = messages[0].parts
|
|
101
|
+
assert parts is not None
|
|
102
|
+
assert len(parts) == 1
|
|
103
|
+
assert parts[0].text == "Hello, Gemini!"
|
|
104
|
+
|
|
105
|
+
# Test with screenshot
|
|
106
|
+
image_blocks: list[types.ContentBlock] = [
|
|
107
|
+
types.TextContent(type="text", text="Look at this"),
|
|
108
|
+
types.ImageContent(
|
|
109
|
+
type="image",
|
|
110
|
+
data=base64.b64encode(b"fakeimage").decode("utf-8"),
|
|
111
|
+
mimeType="image/png",
|
|
112
|
+
),
|
|
113
|
+
]
|
|
114
|
+
messages = await agent.format_blocks(image_blocks)
|
|
115
|
+
assert len(messages) == 1
|
|
116
|
+
assert messages[0].role == "user"
|
|
117
|
+
parts = messages[0].parts
|
|
118
|
+
assert parts is not None
|
|
119
|
+
assert len(parts) == 2
|
|
120
|
+
# First part is text
|
|
121
|
+
assert parts[0].text == "Look at this"
|
|
122
|
+
# Second part is image - check that it was created from bytes
|
|
123
|
+
assert parts[1].inline_data is not None
|
|
124
|
+
|
|
125
|
+
@pytest.mark.asyncio
|
|
126
|
+
async def test_format_tool_results(self, mock_mcp_client, mock_gemini_client):
|
|
127
|
+
"""Test the agent's format_tool_results method."""
|
|
128
|
+
agent = GeminiAgent(
|
|
129
|
+
mcp_client=mock_mcp_client,
|
|
130
|
+
model_client=mock_gemini_client,
|
|
131
|
+
validate_api_key=False,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
tool_calls = [
|
|
135
|
+
MCPToolCall(
|
|
136
|
+
name="gemini_computer",
|
|
137
|
+
arguments={"action": "click_at", "x": 100, "y": 200},
|
|
138
|
+
id="call_1", # type: ignore
|
|
139
|
+
gemini_name="click_at", # type: ignore
|
|
140
|
+
),
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
tool_results = [
|
|
144
|
+
MCPToolResult(
|
|
145
|
+
content=[
|
|
146
|
+
types.TextContent(type="text", text="Clicked successfully"),
|
|
147
|
+
types.ImageContent(
|
|
148
|
+
type="image",
|
|
149
|
+
data=base64.b64encode(b"screenshot").decode("utf-8"),
|
|
150
|
+
mimeType="image/png",
|
|
151
|
+
),
|
|
152
|
+
],
|
|
153
|
+
isError=False,
|
|
154
|
+
),
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
messages = await agent.format_tool_results(tool_calls, tool_results)
|
|
158
|
+
|
|
159
|
+
# format_tool_results returns a single user message with function responses
|
|
160
|
+
assert len(messages) == 1
|
|
161
|
+
assert messages[0].role == "user"
|
|
162
|
+
# The content contains function response parts
|
|
163
|
+
parts = messages[0].parts
|
|
164
|
+
assert parts is not None
|
|
165
|
+
assert len(parts) == 1
|
|
166
|
+
function_response = parts[0].function_response
|
|
167
|
+
assert function_response is not None
|
|
168
|
+
assert function_response.name == "click_at"
|
|
169
|
+
response_payload = function_response.response or {}
|
|
170
|
+
assert response_payload.get("success") is True
|
|
171
|
+
|
|
172
|
+
@pytest.mark.asyncio
|
|
173
|
+
async def test_format_tool_results_with_error(self, mock_mcp_client, mock_gemini_client):
|
|
174
|
+
"""Test formatting tool results with errors."""
|
|
175
|
+
agent = GeminiAgent(
|
|
176
|
+
mcp_client=mock_mcp_client,
|
|
177
|
+
model_client=mock_gemini_client,
|
|
178
|
+
validate_api_key=False,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
tool_calls = [
|
|
182
|
+
MCPToolCall(
|
|
183
|
+
name="gemini_computer",
|
|
184
|
+
arguments={"action": "invalid"},
|
|
185
|
+
id="call_error", # type: ignore
|
|
186
|
+
gemini_name="invalid_action", # type: ignore
|
|
187
|
+
),
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
tool_results = [
|
|
191
|
+
MCPToolResult(
|
|
192
|
+
content=[types.TextContent(type="text", text="Action failed: invalid action")],
|
|
193
|
+
isError=True,
|
|
194
|
+
),
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
messages = await agent.format_tool_results(tool_calls, tool_results)
|
|
198
|
+
|
|
199
|
+
# Check that error is in the response
|
|
200
|
+
assert len(messages) == 1
|
|
201
|
+
assert messages[0].role == "user"
|
|
202
|
+
parts = messages[0].parts
|
|
203
|
+
assert parts is not None
|
|
204
|
+
function_response = parts[0].function_response
|
|
205
|
+
assert function_response is not None
|
|
206
|
+
response_payload = function_response.response or {}
|
|
207
|
+
assert "error" in response_payload
|
|
208
|
+
|
|
209
|
+
@pytest.mark.asyncio
|
|
210
|
+
async def test_get_response(self, mock_mcp_client, mock_gemini_client):
|
|
211
|
+
"""Test getting model response from Gemini API."""
|
|
212
|
+
# Disable telemetry for this test
|
|
213
|
+
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
214
|
+
agent = GeminiAgent(
|
|
215
|
+
mcp_client=mock_mcp_client,
|
|
216
|
+
model_client=mock_gemini_client,
|
|
217
|
+
validate_api_key=False,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Set up available tools
|
|
221
|
+
agent._available_tools = [
|
|
222
|
+
types.Tool(name="gemini_computer", description="Computer tool", inputSchema={})
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
# Mock the API response
|
|
226
|
+
mock_response = MagicMock()
|
|
227
|
+
mock_candidate = MagicMock()
|
|
228
|
+
|
|
229
|
+
# Create text part
|
|
230
|
+
text_part = MagicMock()
|
|
231
|
+
text_part.text = "I will click at coordinates"
|
|
232
|
+
text_part.function_call = None
|
|
233
|
+
|
|
234
|
+
# Create function call part
|
|
235
|
+
function_call_part = MagicMock()
|
|
236
|
+
function_call_part.text = None
|
|
237
|
+
function_call_part.function_call = MagicMock()
|
|
238
|
+
function_call_part.function_call.name = "click_at"
|
|
239
|
+
function_call_part.function_call.args = {"x": 100, "y": 200}
|
|
240
|
+
|
|
241
|
+
mock_candidate.content = MagicMock()
|
|
242
|
+
mock_candidate.content.parts = [text_part, function_call_part]
|
|
243
|
+
|
|
244
|
+
mock_response.candidates = [mock_candidate]
|
|
245
|
+
|
|
246
|
+
mock_gemini_client.models = MagicMock()
|
|
247
|
+
mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
|
|
248
|
+
|
|
249
|
+
messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Click")])]
|
|
250
|
+
response = await agent.get_response(messages)
|
|
251
|
+
|
|
252
|
+
assert response.content == "I will click at coordinates"
|
|
253
|
+
assert len(response.tool_calls) == 1
|
|
254
|
+
assert response.tool_calls[0].arguments == {"action": "click_at", "x": 100, "y": 200}
|
|
255
|
+
assert response.done is False
|
|
256
|
+
|
|
257
|
+
@pytest.mark.asyncio
|
|
258
|
+
async def test_get_response_text_only(self, mock_mcp_client, mock_gemini_client):
|
|
259
|
+
"""Test getting text-only response."""
|
|
260
|
+
# Disable telemetry for this test
|
|
261
|
+
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
262
|
+
agent = GeminiAgent(
|
|
263
|
+
mcp_client=mock_mcp_client,
|
|
264
|
+
model_client=mock_gemini_client,
|
|
265
|
+
validate_api_key=False,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Mock the API response with text only
|
|
269
|
+
mock_response = MagicMock()
|
|
270
|
+
mock_candidate = MagicMock()
|
|
271
|
+
|
|
272
|
+
text_part = MagicMock()
|
|
273
|
+
text_part.text = "Task completed successfully"
|
|
274
|
+
text_part.function_call = None
|
|
275
|
+
|
|
276
|
+
mock_candidate.content = MagicMock()
|
|
277
|
+
mock_candidate.content.parts = [text_part]
|
|
278
|
+
|
|
279
|
+
mock_response.candidates = [mock_candidate]
|
|
280
|
+
|
|
281
|
+
mock_gemini_client.models = MagicMock()
|
|
282
|
+
mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
|
|
283
|
+
|
|
284
|
+
messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Status?")])]
|
|
285
|
+
response = await agent.get_response(messages)
|
|
286
|
+
|
|
287
|
+
assert response.content == "Task completed successfully"
|
|
288
|
+
assert response.tool_calls == []
|
|
289
|
+
assert response.done is True
|
|
290
|
+
|
|
291
|
+
@pytest.mark.asyncio
|
|
292
|
+
async def test_convert_tools_for_gemini(self, mock_mcp_client, mock_gemini_client):
|
|
293
|
+
"""Test converting MCP tools to Gemini format."""
|
|
294
|
+
agent = GeminiAgent(
|
|
295
|
+
mcp_client=mock_mcp_client,
|
|
296
|
+
model_client=mock_gemini_client,
|
|
297
|
+
validate_api_key=False,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Set up available tools
|
|
301
|
+
agent._available_tools = [
|
|
302
|
+
types.Tool(
|
|
303
|
+
name="gemini_computer",
|
|
304
|
+
description="Computer tool",
|
|
305
|
+
inputSchema={"type": "object"},
|
|
306
|
+
),
|
|
307
|
+
types.Tool(
|
|
308
|
+
name="calculator",
|
|
309
|
+
description="Calculator tool",
|
|
310
|
+
inputSchema={
|
|
311
|
+
"type": "object",
|
|
312
|
+
"properties": {"operation": {"type": "string"}},
|
|
313
|
+
},
|
|
314
|
+
),
|
|
315
|
+
]
|
|
316
|
+
|
|
317
|
+
gemini_tools = agent._convert_tools_for_gemini()
|
|
318
|
+
|
|
319
|
+
# Should have 2 tools: computer_use and calculator
|
|
320
|
+
assert len(gemini_tools) == 2
|
|
321
|
+
|
|
322
|
+
# First should be computer use tool
|
|
323
|
+
assert gemini_tools[0].computer_use is not None
|
|
324
|
+
assert (
|
|
325
|
+
gemini_tools[0].computer_use.environment == genai_types.Environment.ENVIRONMENT_BROWSER
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# Second should be calculator as function declaration
|
|
329
|
+
assert gemini_tools[1].function_declarations is not None
|
|
330
|
+
assert len(gemini_tools[1].function_declarations) == 1
|
|
331
|
+
assert gemini_tools[1].function_declarations[0].name == "calculator"
|
|
332
|
+
|
|
333
|
+
@pytest.mark.asyncio
|
|
334
|
+
async def test_create_user_message(self, mock_mcp_client, mock_gemini_client):
|
|
335
|
+
"""Test creating a user message."""
|
|
336
|
+
agent = GeminiAgent(
|
|
337
|
+
mcp_client=mock_mcp_client,
|
|
338
|
+
model_client=mock_gemini_client,
|
|
339
|
+
validate_api_key=False,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
message = await agent.create_user_message("Hello Gemini")
|
|
343
|
+
|
|
344
|
+
assert message.role == "user"
|
|
345
|
+
parts = message.parts
|
|
346
|
+
assert parts is not None
|
|
347
|
+
assert len(parts) == 1
|
|
348
|
+
assert parts[0].text == "Hello Gemini"
|
|
349
|
+
|
|
350
|
+
@pytest.mark.asyncio
|
|
351
|
+
async def test_handle_empty_response(self, mock_mcp_client, mock_gemini_client):
|
|
352
|
+
"""Test handling empty response from API."""
|
|
353
|
+
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
354
|
+
agent = GeminiAgent(
|
|
355
|
+
mcp_client=mock_mcp_client,
|
|
356
|
+
model_client=mock_gemini_client,
|
|
357
|
+
validate_api_key=False,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Mock empty response
|
|
361
|
+
mock_response = MagicMock()
|
|
362
|
+
mock_response.candidates = []
|
|
363
|
+
|
|
364
|
+
mock_gemini_client.models = MagicMock()
|
|
365
|
+
mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
|
|
366
|
+
|
|
367
|
+
messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Hi")])]
|
|
368
|
+
response = await agent.get_response(messages)
|
|
369
|
+
|
|
370
|
+
assert response.content == ""
|
|
371
|
+
assert response.tool_calls == []
|
|
372
|
+
assert response.done is True
|
hud/cli/__init__.py
CHANGED
|
@@ -93,14 +93,14 @@ def analyze(
|
|
|
93
93
|
) -> None:
|
|
94
94
|
"""🔍 Analyze MCP environment - discover tools, resources, and capabilities.
|
|
95
95
|
|
|
96
|
-
By default, uses cached metadata for instant results.
|
|
96
|
+
[not dim]By default, uses cached metadata for instant results.
|
|
97
97
|
Use --live to run the container for real-time analysis.
|
|
98
98
|
|
|
99
99
|
Examples:
|
|
100
100
|
hud analyze hudpython/test_init # Fast metadata inspection
|
|
101
101
|
hud analyze my-env --live # Full container analysis
|
|
102
102
|
hud analyze --config mcp-config.json # From MCP config
|
|
103
|
-
hud analyze --cursor text-2048-dev # From Cursor config
|
|
103
|
+
hud analyze --cursor text-2048-dev # From Cursor config[/not dim]
|
|
104
104
|
"""
|
|
105
105
|
if config:
|
|
106
106
|
# Load config from JSON file (always live for configs)
|
|
@@ -177,7 +177,7 @@ def debug(
|
|
|
177
177
|
) -> None:
|
|
178
178
|
"""🐛 Debug MCP environment - test initialization, tools, and readiness.
|
|
179
179
|
|
|
180
|
-
Examples:
|
|
180
|
+
[not dim]Examples:
|
|
181
181
|
hud debug . # Debug current directory
|
|
182
182
|
hud debug environments/browser # Debug specific directory
|
|
183
183
|
hud debug . --build # Build then debug
|
|
@@ -185,7 +185,7 @@ def debug(
|
|
|
185
185
|
hud debug my-mcp-server:v1 -e API_KEY=xxx
|
|
186
186
|
hud debug --config mcp-config.json
|
|
187
187
|
hud debug --cursor text-2048-dev
|
|
188
|
-
hud debug . --max-phase 3 # Stop after phase 3
|
|
188
|
+
hud debug . --max-phase 3 # Stop after phase 3[/not dim]
|
|
189
189
|
"""
|
|
190
190
|
# Import here to avoid circular imports
|
|
191
191
|
from hud.utils.hud_console import HUDConsole
|
|
@@ -403,7 +403,7 @@ def dev(
|
|
|
403
403
|
) -> None:
|
|
404
404
|
"""🔥 Development mode - run MCP server with hot-reload.
|
|
405
405
|
|
|
406
|
-
TWO MODES:
|
|
406
|
+
[not dim]TWO MODES:
|
|
407
407
|
|
|
408
408
|
1. Python Module:
|
|
409
409
|
hud dev # Auto-detects module
|
|
@@ -424,7 +424,7 @@ def dev(
|
|
|
424
424
|
hud dev --watch ../shared # Watch additional directories
|
|
425
425
|
|
|
426
426
|
For environment backend servers, use uvicorn directly:
|
|
427
|
-
uvicorn server:app --reload
|
|
427
|
+
uvicorn server:app --reload[/not dim]
|
|
428
428
|
"""
|
|
429
429
|
# Extract module from params if provided (first param when not --docker)
|
|
430
430
|
module = params[0] if params and not docker else None
|
|
@@ -492,7 +492,7 @@ def run(
|
|
|
492
492
|
) -> None:
|
|
493
493
|
"""🚀 Run Docker image as MCP server.
|
|
494
494
|
|
|
495
|
-
A simple wrapper around 'docker run' that can launch images locally or remotely.
|
|
495
|
+
[not dim]A simple wrapper around 'docker run' that can launch images locally or remotely.
|
|
496
496
|
By default, runs remotely via mcp.hud.so. Use --local to run with local Docker.
|
|
497
497
|
|
|
498
498
|
For local Python development with hot-reload, use 'hud dev' instead.
|
|
@@ -502,7 +502,7 @@ def run(
|
|
|
502
502
|
hud run my-image:latest --local # Run with local Docker
|
|
503
503
|
hud run my-image:latest -e KEY=value # Remote with env vars
|
|
504
504
|
hud run my-image:latest --local -e KEY=val # Local with env vars
|
|
505
|
-
hud run my-image:latest --transport http # Use HTTP transport
|
|
505
|
+
hud run my-image:latest --transport http # Use HTTP transport[/not dim]
|
|
506
506
|
"""
|
|
507
507
|
if not params:
|
|
508
508
|
console.print("[red]❌ Docker image is required[/red]")
|
|
@@ -559,7 +559,7 @@ def clone(
|
|
|
559
559
|
) -> None:
|
|
560
560
|
"""🚀 Clone a git repository quietly with a pretty output.
|
|
561
561
|
|
|
562
|
-
This command wraps 'git clone' with the --quiet flag and displays
|
|
562
|
+
[not dim]This command wraps 'git clone' with the --quiet flag and displays
|
|
563
563
|
a rich formatted success message. If the repository contains a clone
|
|
564
564
|
message in pyproject.toml, it will be displayed as a tutorial.
|
|
565
565
|
|
|
@@ -574,7 +574,7 @@ def clone(
|
|
|
574
574
|
# style = "cyan"
|
|
575
575
|
|
|
576
576
|
Examples:
|
|
577
|
-
hud clone https://github.com/user/repo.git
|
|
577
|
+
hud clone https://github.com/user/repo.git[/not dim]
|
|
578
578
|
"""
|
|
579
579
|
# Run the clone
|
|
580
580
|
success, result = clone_repository(url)
|
|
@@ -605,7 +605,7 @@ def build(
|
|
|
605
605
|
) -> None:
|
|
606
606
|
"""🏗️ Build a HUD environment and generate lock file.
|
|
607
607
|
|
|
608
|
-
This command:
|
|
608
|
+
[not dim]This command:
|
|
609
609
|
- Builds a Docker image from your environment
|
|
610
610
|
- Analyzes the MCP server to extract metadata
|
|
611
611
|
- Generates a hud.lock.yaml file for reproducibility
|
|
@@ -614,7 +614,7 @@ def build(
|
|
|
614
614
|
hud build # Build current directory
|
|
615
615
|
hud build environments/text_2048 -e API_KEY=secret
|
|
616
616
|
hud build . --tag my-env:v1.0 -e VAR1=value1 -e VAR2=value2
|
|
617
|
-
hud build . --no-cache # Force rebuild
|
|
617
|
+
hud build . --no-cache # Force rebuild[/not dim]
|
|
618
618
|
"""
|
|
619
619
|
# Parse directory and extra arguments
|
|
620
620
|
if params:
|
|
@@ -670,14 +670,14 @@ def push(
|
|
|
670
670
|
) -> None:
|
|
671
671
|
"""📤 Push HUD environment to registry.
|
|
672
672
|
|
|
673
|
-
Reads hud.lock.yaml from the directory and pushes to registry.
|
|
673
|
+
[not dim]Reads hud.lock.yaml from the directory and pushes to registry.
|
|
674
674
|
Auto-detects your Docker username if --image not specified.
|
|
675
675
|
|
|
676
676
|
Examples:
|
|
677
677
|
hud push # Push with auto-detected name
|
|
678
678
|
hud push --tag v1.0 # Push with specific tag
|
|
679
679
|
hud push . --image myuser/myenv:v1.0
|
|
680
|
-
hud push --yes # Skip confirmation
|
|
680
|
+
hud push --yes # Skip confirmation[/not dim]
|
|
681
681
|
"""
|
|
682
682
|
push_command(directory, image, tag, sign, yes, verbose)
|
|
683
683
|
|
|
@@ -696,12 +696,12 @@ def pull(
|
|
|
696
696
|
) -> None:
|
|
697
697
|
"""📥 Pull HUD environment from registry with metadata preview.
|
|
698
698
|
|
|
699
|
-
Shows environment details before downloading.
|
|
699
|
+
[not dim]Shows environment details before downloading.
|
|
700
700
|
|
|
701
701
|
Examples:
|
|
702
702
|
hud pull hud.lock.yaml # Pull from lock file
|
|
703
703
|
hud pull myuser/myenv:latest # Pull by image reference
|
|
704
|
-
hud pull myuser/myenv --verify-only # Check metadata only
|
|
704
|
+
hud pull myuser/myenv --verify-only # Check metadata only[/not dim]
|
|
705
705
|
"""
|
|
706
706
|
pull_command(target, lock_file, yes, verify_only, verbose)
|
|
707
707
|
|
|
@@ -717,14 +717,14 @@ def list_environments(
|
|
|
717
717
|
) -> None:
|
|
718
718
|
"""📋 List all HUD environments in local registry.
|
|
719
719
|
|
|
720
|
-
Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
|
|
720
|
+
[not dim]Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
|
|
721
721
|
|
|
722
722
|
Examples:
|
|
723
723
|
hud list # List all environments
|
|
724
724
|
hud list --filter text # Filter by name
|
|
725
725
|
hud list --json # Output as JSON
|
|
726
726
|
hud list --all # Show digest column
|
|
727
|
-
hud list --verbose # Show full descriptions
|
|
727
|
+
hud list --verbose # Show full descriptions[/not dim]
|
|
728
728
|
"""
|
|
729
729
|
list_module.list_command(filter_name, json_output, show_all, verbose)
|
|
730
730
|
|
|
@@ -739,7 +739,7 @@ def remove(
|
|
|
739
739
|
) -> None:
|
|
740
740
|
"""🗑️ Remove HUD environments from local registry.
|
|
741
741
|
|
|
742
|
-
Removes environment metadata from ~/.hud/envs/
|
|
742
|
+
[not dim]Removes environment metadata from ~/.hud/envs/
|
|
743
743
|
Note: This does not remove the Docker images.
|
|
744
744
|
|
|
745
745
|
Examples:
|
|
@@ -747,7 +747,7 @@ def remove(
|
|
|
747
747
|
hud remove text_2048 # Remove by name
|
|
748
748
|
hud remove hudpython/test_init # Remove by full name
|
|
749
749
|
hud remove all # Remove all environments
|
|
750
|
-
hud remove all --yes # Remove all without confirmation
|
|
750
|
+
hud remove all --yes # Remove all without confirmation[/not dim]
|
|
751
751
|
"""
|
|
752
752
|
remove_command(target, yes, verbose)
|
|
753
753
|
|
|
@@ -766,7 +766,7 @@ def init(
|
|
|
766
766
|
) -> None:
|
|
767
767
|
"""🚀 Initialize a new HUD environment with minimal boilerplate.
|
|
768
768
|
|
|
769
|
-
Creates a working MCP environment with:
|
|
769
|
+
[not dim]Creates a working MCP environment with:
|
|
770
770
|
- Dockerfile for containerization
|
|
771
771
|
- pyproject.toml for dependencies
|
|
772
772
|
- Minimal MCP server with context
|
|
@@ -775,7 +775,8 @@ def init(
|
|
|
775
775
|
Examples:
|
|
776
776
|
hud init # Choose preset interactively, create ./preset-name/
|
|
777
777
|
hud init my-env # Create new directory ./my-env/
|
|
778
|
-
hud init my-env --dir /tmp # Create in /tmp/my-env/
|
|
778
|
+
hud init my-env --dir /tmp # Create in /tmp/my-env/[/not dim]
|
|
779
|
+
|
|
779
780
|
"""
|
|
780
781
|
create_environment(name, directory, force, preset)
|
|
781
782
|
|
|
@@ -917,6 +918,7 @@ def eval(
|
|
|
917
918
|
[
|
|
918
919
|
{"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE},
|
|
919
920
|
{"name": "OpenAI Computer Use", "value": AgentType.OPENAI},
|
|
921
|
+
{"name": "Gemini Computer Use", "value": AgentType.GEMINI},
|
|
920
922
|
{"name": "vLLM (Local Server)", "value": AgentType.VLLM},
|
|
921
923
|
{"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM},
|
|
922
924
|
]
|
|
@@ -1151,11 +1153,11 @@ def set(
|
|
|
1151
1153
|
) -> None:
|
|
1152
1154
|
"""Persist API keys or other variables for HUD to use by default.
|
|
1153
1155
|
|
|
1154
|
-
Examples:
|
|
1156
|
+
[not dim]Examples:
|
|
1155
1157
|
hud set ANTHROPIC_API_KEY=sk-... OPENAI_API_KEY=sk-...
|
|
1156
1158
|
|
|
1157
1159
|
Values are stored in ~/.hud/.env and are loaded by hud.settings with
|
|
1158
|
-
the lowest precedence (overridden by process env and project .env).
|
|
1160
|
+
the lowest precedence (overridden by process env and project .env).[/not dim]
|
|
1159
1161
|
"""
|
|
1160
1162
|
from hud.utils.hud_console import HUDConsole
|
|
1161
1163
|
|
hud/cli/eval.py
CHANGED
|
@@ -188,6 +188,24 @@ def build_agent(
|
|
|
188
188
|
else:
|
|
189
189
|
return OperatorAgent(verbose=verbose)
|
|
190
190
|
|
|
191
|
+
elif agent_type == AgentType.GEMINI:
|
|
192
|
+
try:
|
|
193
|
+
from hud.agents import GeminiAgent
|
|
194
|
+
except ImportError as e:
|
|
195
|
+
hud_console.error(
|
|
196
|
+
"Gemini agent dependencies are not installed. "
|
|
197
|
+
"Please install with: pip install 'hud-python[agent]'"
|
|
198
|
+
)
|
|
199
|
+
raise typer.Exit(1) from e
|
|
200
|
+
|
|
201
|
+
gemini_kwargs: dict[str, Any] = {
|
|
202
|
+
"model": model or "gemini-2.5-computer-use-preview-10-2025",
|
|
203
|
+
"verbose": verbose,
|
|
204
|
+
}
|
|
205
|
+
if allowed_tools:
|
|
206
|
+
gemini_kwargs["allowed_tools"] = allowed_tools
|
|
207
|
+
return GeminiAgent(**gemini_kwargs)
|
|
208
|
+
|
|
191
209
|
elif agent_type == AgentType.LITELLM:
|
|
192
210
|
try:
|
|
193
211
|
from hud.agents.lite_llm import LiteAgent
|
|
@@ -344,6 +362,17 @@ async def run_single_task(
|
|
|
344
362
|
agent_config = {"verbose": verbose}
|
|
345
363
|
if allowed_tools:
|
|
346
364
|
agent_config["allowed_tools"] = allowed_tools
|
|
365
|
+
elif agent_type == AgentType.GEMINI:
|
|
366
|
+
from hud.agents import GeminiAgent
|
|
367
|
+
|
|
368
|
+
agent_class = GeminiAgent
|
|
369
|
+
agent_config = {
|
|
370
|
+
"model": model or "gemini-2.5-computer-use-preview-10-2025",
|
|
371
|
+
"verbose": verbose,
|
|
372
|
+
"validate_api_key": False,
|
|
373
|
+
}
|
|
374
|
+
if allowed_tools:
|
|
375
|
+
agent_config["allowed_tools"] = allowed_tools
|
|
347
376
|
elif agent_type == AgentType.LITELLM:
|
|
348
377
|
from hud.agents.lite_llm import LiteAgent
|
|
349
378
|
|
|
@@ -534,6 +563,26 @@ async def run_full_dataset(
|
|
|
534
563
|
if allowed_tools:
|
|
535
564
|
agent_config["allowed_tools"] = allowed_tools
|
|
536
565
|
|
|
566
|
+
elif agent_type == AgentType.GEMINI:
|
|
567
|
+
try:
|
|
568
|
+
from hud.agents import GeminiAgent
|
|
569
|
+
|
|
570
|
+
agent_class = GeminiAgent
|
|
571
|
+
except ImportError as e:
|
|
572
|
+
hud_console.error(
|
|
573
|
+
"Gemini agent dependencies are not installed. "
|
|
574
|
+
"Please install with: pip install 'hud-python[agent]'"
|
|
575
|
+
)
|
|
576
|
+
raise typer.Exit(1) from e
|
|
577
|
+
|
|
578
|
+
agent_config = {
|
|
579
|
+
"model": model or "gemini-2.5-computer-use-preview-10-2025",
|
|
580
|
+
"verbose": verbose,
|
|
581
|
+
"validate_api_key": False,
|
|
582
|
+
}
|
|
583
|
+
if allowed_tools:
|
|
584
|
+
agent_config["allowed_tools"] = allowed_tools
|
|
585
|
+
|
|
537
586
|
elif agent_type == AgentType.LITELLM:
|
|
538
587
|
try:
|
|
539
588
|
from hud.agents.lite_llm import LiteAgent
|
|
@@ -641,7 +690,7 @@ def eval_command(
|
|
|
641
690
|
agent: AgentType = typer.Option( # noqa: B008
|
|
642
691
|
AgentType.CLAUDE,
|
|
643
692
|
"--agent",
|
|
644
|
-
help="Agent backend to use (claude, openai, vllm for local
|
|
693
|
+
help="Agent backend to use (claude, gemini, openai, vllm for local servers, or litellm)",
|
|
645
694
|
),
|
|
646
695
|
model: str | None = typer.Option(
|
|
647
696
|
None,
|
|
@@ -757,6 +806,13 @@ def eval_command(
|
|
|
757
806
|
"Set it in your environment or run: hud set ANTHROPIC_API_KEY=your-key-here"
|
|
758
807
|
)
|
|
759
808
|
raise typer.Exit(1)
|
|
809
|
+
elif agent == AgentType.GEMINI:
|
|
810
|
+
if not settings.gemini_api_key:
|
|
811
|
+
hud_console.error("GEMINI_API_KEY is required for Gemini agent")
|
|
812
|
+
hud_console.info(
|
|
813
|
+
"Set it in your environment or run: hud set GEMINI_API_KEY=your-key-here"
|
|
814
|
+
)
|
|
815
|
+
raise typer.Exit(1)
|
|
760
816
|
elif agent == AgentType.OPENAI and not settings.openai_api_key:
|
|
761
817
|
hud_console.error("OPENAI_API_KEY is required for OpenAI agent")
|
|
762
818
|
hud_console.info("Set it in your environment or run: hud set OPENAI_API_KEY=your-key-here")
|