hud-python 0.4.57__py3-none-any.whl → 0.4.59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/agents/__init__.py +2 -0
- hud/agents/gemini.py +492 -0
- hud/agents/tests/test_gemini.py +372 -0
- hud/cli/__init__.py +46 -31
- hud/cli/dev.py +111 -1
- hud/cli/eval.py +59 -3
- hud/cli/flows/dev.py +5 -3
- hud/cli/init.py +14 -18
- hud/cli/push.py +2 -2
- hud/cli/rl/__init__.py +1 -1
- hud/cli/rl/celebrate.py +1 -1
- hud/cli/rl/remote_runner.py +3 -3
- hud/cli/tests/test_eval.py +20 -0
- hud/clients/base.py +1 -1
- hud/clients/fastmcp.py +1 -1
- hud/otel/config.py +1 -1
- hud/otel/context.py +2 -2
- hud/server/server.py +283 -36
- hud/settings.py +6 -0
- hud/shared/hints.py +3 -3
- hud/telemetry/job.py +2 -2
- hud/tools/__init__.py +13 -2
- hud/tools/computer/__init__.py +2 -0
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/settings.py +21 -0
- hud/tools/playwright.py +17 -2
- hud/tools/types.py +9 -1
- hud/types.py +2 -1
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.57.dist-info → hud_python-0.4.59.dist-info}/METADATA +2 -1
- {hud_python-0.4.57.dist-info → hud_python-0.4.59.dist-info}/RECORD +35 -32
- {hud_python-0.4.57.dist-info → hud_python-0.4.59.dist-info}/WHEEL +0 -0
- {hud_python-0.4.57.dist-info → hud_python-0.4.59.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.57.dist-info → hud_python-0.4.59.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
"""Tests for Gemini MCP Agent implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
from unittest.mock import AsyncMock, MagicMock, patch
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
from google.genai import types as genai_types
|
|
10
|
+
from mcp import types
|
|
11
|
+
|
|
12
|
+
from hud.agents.gemini import GeminiAgent
|
|
13
|
+
from hud.types import MCPToolCall, MCPToolResult
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TestGeminiAgent:
|
|
17
|
+
"""Test GeminiAgent class."""
|
|
18
|
+
|
|
19
|
+
@pytest.fixture
|
|
20
|
+
def mock_mcp_client(self):
|
|
21
|
+
"""Create a mock MCP client."""
|
|
22
|
+
mcp_client = AsyncMock()
|
|
23
|
+
# Set up the mcp_config attribute as a regular dict, not a coroutine
|
|
24
|
+
mcp_client.mcp_config = {"test_server": {"url": "http://test"}}
|
|
25
|
+
# Mock list_tools to return gemini_computer tool
|
|
26
|
+
mcp_client.list_tools = AsyncMock(
|
|
27
|
+
return_value=[
|
|
28
|
+
types.Tool(
|
|
29
|
+
name="gemini_computer",
|
|
30
|
+
description="Gemini computer use tool",
|
|
31
|
+
inputSchema={},
|
|
32
|
+
)
|
|
33
|
+
]
|
|
34
|
+
)
|
|
35
|
+
mcp_client.initialize = AsyncMock()
|
|
36
|
+
return mcp_client
|
|
37
|
+
|
|
38
|
+
@pytest.fixture
|
|
39
|
+
def mock_gemini_client(self):
|
|
40
|
+
"""Create a mock Gemini client."""
|
|
41
|
+
client = MagicMock()
|
|
42
|
+
client.api_key = "test_key"
|
|
43
|
+
# Mock models.list for validation
|
|
44
|
+
client.models = MagicMock()
|
|
45
|
+
client.models.list = MagicMock(return_value=iter([]))
|
|
46
|
+
return client
|
|
47
|
+
|
|
48
|
+
@pytest.mark.asyncio
|
|
49
|
+
async def test_init(self, mock_mcp_client, mock_gemini_client):
|
|
50
|
+
"""Test agent initialization."""
|
|
51
|
+
agent = GeminiAgent(
|
|
52
|
+
mcp_client=mock_mcp_client,
|
|
53
|
+
model_client=mock_gemini_client,
|
|
54
|
+
model="gemini-2.5-computer-use-preview-10-2025",
|
|
55
|
+
validate_api_key=False, # Skip validation in tests
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
assert agent.model_name == "gemini-2.5-computer-use-preview-10-2025"
|
|
59
|
+
assert agent.model == "gemini-2.5-computer-use-preview-10-2025"
|
|
60
|
+
assert agent.gemini_client == mock_gemini_client
|
|
61
|
+
|
|
62
|
+
@pytest.mark.asyncio
|
|
63
|
+
async def test_init_without_model_client(self, mock_mcp_client):
|
|
64
|
+
"""Test agent initialization without model client."""
|
|
65
|
+
with (
|
|
66
|
+
patch("hud.settings.settings.gemini_api_key", "test_key"),
|
|
67
|
+
patch("hud.agents.gemini.genai.Client") as mock_client_class,
|
|
68
|
+
):
|
|
69
|
+
mock_client = MagicMock()
|
|
70
|
+
mock_client.api_key = "test_key"
|
|
71
|
+
mock_client.models = MagicMock()
|
|
72
|
+
mock_client.models.list = MagicMock(return_value=iter([]))
|
|
73
|
+
mock_client_class.return_value = mock_client
|
|
74
|
+
|
|
75
|
+
agent = GeminiAgent(
|
|
76
|
+
mcp_client=mock_mcp_client,
|
|
77
|
+
model="gemini-2.5-computer-use-preview-10-2025",
|
|
78
|
+
validate_api_key=False,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
assert agent.model_name == "gemini-2.5-computer-use-preview-10-2025"
|
|
82
|
+
assert agent.gemini_client is not None
|
|
83
|
+
|
|
84
|
+
@pytest.mark.asyncio
|
|
85
|
+
async def test_format_blocks(self, mock_mcp_client, mock_gemini_client):
|
|
86
|
+
"""Test formatting content blocks into Gemini messages."""
|
|
87
|
+
agent = GeminiAgent(
|
|
88
|
+
mcp_client=mock_mcp_client,
|
|
89
|
+
model_client=mock_gemini_client,
|
|
90
|
+
validate_api_key=False,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Test with text only
|
|
94
|
+
text_blocks: list[types.ContentBlock] = [
|
|
95
|
+
types.TextContent(type="text", text="Hello, Gemini!")
|
|
96
|
+
]
|
|
97
|
+
messages = await agent.format_blocks(text_blocks)
|
|
98
|
+
assert len(messages) == 1
|
|
99
|
+
assert messages[0].role == "user"
|
|
100
|
+
parts = messages[0].parts
|
|
101
|
+
assert parts is not None
|
|
102
|
+
assert len(parts) == 1
|
|
103
|
+
assert parts[0].text == "Hello, Gemini!"
|
|
104
|
+
|
|
105
|
+
# Test with screenshot
|
|
106
|
+
image_blocks: list[types.ContentBlock] = [
|
|
107
|
+
types.TextContent(type="text", text="Look at this"),
|
|
108
|
+
types.ImageContent(
|
|
109
|
+
type="image",
|
|
110
|
+
data=base64.b64encode(b"fakeimage").decode("utf-8"),
|
|
111
|
+
mimeType="image/png",
|
|
112
|
+
),
|
|
113
|
+
]
|
|
114
|
+
messages = await agent.format_blocks(image_blocks)
|
|
115
|
+
assert len(messages) == 1
|
|
116
|
+
assert messages[0].role == "user"
|
|
117
|
+
parts = messages[0].parts
|
|
118
|
+
assert parts is not None
|
|
119
|
+
assert len(parts) == 2
|
|
120
|
+
# First part is text
|
|
121
|
+
assert parts[0].text == "Look at this"
|
|
122
|
+
# Second part is image - check that it was created from bytes
|
|
123
|
+
assert parts[1].inline_data is not None
|
|
124
|
+
|
|
125
|
+
@pytest.mark.asyncio
|
|
126
|
+
async def test_format_tool_results(self, mock_mcp_client, mock_gemini_client):
|
|
127
|
+
"""Test the agent's format_tool_results method."""
|
|
128
|
+
agent = GeminiAgent(
|
|
129
|
+
mcp_client=mock_mcp_client,
|
|
130
|
+
model_client=mock_gemini_client,
|
|
131
|
+
validate_api_key=False,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
tool_calls = [
|
|
135
|
+
MCPToolCall(
|
|
136
|
+
name="gemini_computer",
|
|
137
|
+
arguments={"action": "click_at", "x": 100, "y": 200},
|
|
138
|
+
id="call_1", # type: ignore
|
|
139
|
+
gemini_name="click_at", # type: ignore
|
|
140
|
+
),
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
tool_results = [
|
|
144
|
+
MCPToolResult(
|
|
145
|
+
content=[
|
|
146
|
+
types.TextContent(type="text", text="Clicked successfully"),
|
|
147
|
+
types.ImageContent(
|
|
148
|
+
type="image",
|
|
149
|
+
data=base64.b64encode(b"screenshot").decode("utf-8"),
|
|
150
|
+
mimeType="image/png",
|
|
151
|
+
),
|
|
152
|
+
],
|
|
153
|
+
isError=False,
|
|
154
|
+
),
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
messages = await agent.format_tool_results(tool_calls, tool_results)
|
|
158
|
+
|
|
159
|
+
# format_tool_results returns a single user message with function responses
|
|
160
|
+
assert len(messages) == 1
|
|
161
|
+
assert messages[0].role == "user"
|
|
162
|
+
# The content contains function response parts
|
|
163
|
+
parts = messages[0].parts
|
|
164
|
+
assert parts is not None
|
|
165
|
+
assert len(parts) == 1
|
|
166
|
+
function_response = parts[0].function_response
|
|
167
|
+
assert function_response is not None
|
|
168
|
+
assert function_response.name == "click_at"
|
|
169
|
+
response_payload = function_response.response or {}
|
|
170
|
+
assert response_payload.get("success") is True
|
|
171
|
+
|
|
172
|
+
@pytest.mark.asyncio
|
|
173
|
+
async def test_format_tool_results_with_error(self, mock_mcp_client, mock_gemini_client):
|
|
174
|
+
"""Test formatting tool results with errors."""
|
|
175
|
+
agent = GeminiAgent(
|
|
176
|
+
mcp_client=mock_mcp_client,
|
|
177
|
+
model_client=mock_gemini_client,
|
|
178
|
+
validate_api_key=False,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
tool_calls = [
|
|
182
|
+
MCPToolCall(
|
|
183
|
+
name="gemini_computer",
|
|
184
|
+
arguments={"action": "invalid"},
|
|
185
|
+
id="call_error", # type: ignore
|
|
186
|
+
gemini_name="invalid_action", # type: ignore
|
|
187
|
+
),
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
tool_results = [
|
|
191
|
+
MCPToolResult(
|
|
192
|
+
content=[types.TextContent(type="text", text="Action failed: invalid action")],
|
|
193
|
+
isError=True,
|
|
194
|
+
),
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
messages = await agent.format_tool_results(tool_calls, tool_results)
|
|
198
|
+
|
|
199
|
+
# Check that error is in the response
|
|
200
|
+
assert len(messages) == 1
|
|
201
|
+
assert messages[0].role == "user"
|
|
202
|
+
parts = messages[0].parts
|
|
203
|
+
assert parts is not None
|
|
204
|
+
function_response = parts[0].function_response
|
|
205
|
+
assert function_response is not None
|
|
206
|
+
response_payload = function_response.response or {}
|
|
207
|
+
assert "error" in response_payload
|
|
208
|
+
|
|
209
|
+
@pytest.mark.asyncio
|
|
210
|
+
async def test_get_response(self, mock_mcp_client, mock_gemini_client):
|
|
211
|
+
"""Test getting model response from Gemini API."""
|
|
212
|
+
# Disable telemetry for this test
|
|
213
|
+
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
214
|
+
agent = GeminiAgent(
|
|
215
|
+
mcp_client=mock_mcp_client,
|
|
216
|
+
model_client=mock_gemini_client,
|
|
217
|
+
validate_api_key=False,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Set up available tools
|
|
221
|
+
agent._available_tools = [
|
|
222
|
+
types.Tool(name="gemini_computer", description="Computer tool", inputSchema={})
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
# Mock the API response
|
|
226
|
+
mock_response = MagicMock()
|
|
227
|
+
mock_candidate = MagicMock()
|
|
228
|
+
|
|
229
|
+
# Create text part
|
|
230
|
+
text_part = MagicMock()
|
|
231
|
+
text_part.text = "I will click at coordinates"
|
|
232
|
+
text_part.function_call = None
|
|
233
|
+
|
|
234
|
+
# Create function call part
|
|
235
|
+
function_call_part = MagicMock()
|
|
236
|
+
function_call_part.text = None
|
|
237
|
+
function_call_part.function_call = MagicMock()
|
|
238
|
+
function_call_part.function_call.name = "click_at"
|
|
239
|
+
function_call_part.function_call.args = {"x": 100, "y": 200}
|
|
240
|
+
|
|
241
|
+
mock_candidate.content = MagicMock()
|
|
242
|
+
mock_candidate.content.parts = [text_part, function_call_part]
|
|
243
|
+
|
|
244
|
+
mock_response.candidates = [mock_candidate]
|
|
245
|
+
|
|
246
|
+
mock_gemini_client.models = MagicMock()
|
|
247
|
+
mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
|
|
248
|
+
|
|
249
|
+
messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Click")])]
|
|
250
|
+
response = await agent.get_response(messages)
|
|
251
|
+
|
|
252
|
+
assert response.content == "I will click at coordinates"
|
|
253
|
+
assert len(response.tool_calls) == 1
|
|
254
|
+
assert response.tool_calls[0].arguments == {"action": "click_at", "x": 100, "y": 200}
|
|
255
|
+
assert response.done is False
|
|
256
|
+
|
|
257
|
+
@pytest.mark.asyncio
|
|
258
|
+
async def test_get_response_text_only(self, mock_mcp_client, mock_gemini_client):
|
|
259
|
+
"""Test getting text-only response."""
|
|
260
|
+
# Disable telemetry for this test
|
|
261
|
+
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
262
|
+
agent = GeminiAgent(
|
|
263
|
+
mcp_client=mock_mcp_client,
|
|
264
|
+
model_client=mock_gemini_client,
|
|
265
|
+
validate_api_key=False,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Mock the API response with text only
|
|
269
|
+
mock_response = MagicMock()
|
|
270
|
+
mock_candidate = MagicMock()
|
|
271
|
+
|
|
272
|
+
text_part = MagicMock()
|
|
273
|
+
text_part.text = "Task completed successfully"
|
|
274
|
+
text_part.function_call = None
|
|
275
|
+
|
|
276
|
+
mock_candidate.content = MagicMock()
|
|
277
|
+
mock_candidate.content.parts = [text_part]
|
|
278
|
+
|
|
279
|
+
mock_response.candidates = [mock_candidate]
|
|
280
|
+
|
|
281
|
+
mock_gemini_client.models = MagicMock()
|
|
282
|
+
mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
|
|
283
|
+
|
|
284
|
+
messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Status?")])]
|
|
285
|
+
response = await agent.get_response(messages)
|
|
286
|
+
|
|
287
|
+
assert response.content == "Task completed successfully"
|
|
288
|
+
assert response.tool_calls == []
|
|
289
|
+
assert response.done is True
|
|
290
|
+
|
|
291
|
+
@pytest.mark.asyncio
|
|
292
|
+
async def test_convert_tools_for_gemini(self, mock_mcp_client, mock_gemini_client):
|
|
293
|
+
"""Test converting MCP tools to Gemini format."""
|
|
294
|
+
agent = GeminiAgent(
|
|
295
|
+
mcp_client=mock_mcp_client,
|
|
296
|
+
model_client=mock_gemini_client,
|
|
297
|
+
validate_api_key=False,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Set up available tools
|
|
301
|
+
agent._available_tools = [
|
|
302
|
+
types.Tool(
|
|
303
|
+
name="gemini_computer",
|
|
304
|
+
description="Computer tool",
|
|
305
|
+
inputSchema={"type": "object"},
|
|
306
|
+
),
|
|
307
|
+
types.Tool(
|
|
308
|
+
name="calculator",
|
|
309
|
+
description="Calculator tool",
|
|
310
|
+
inputSchema={
|
|
311
|
+
"type": "object",
|
|
312
|
+
"properties": {"operation": {"type": "string"}},
|
|
313
|
+
},
|
|
314
|
+
),
|
|
315
|
+
]
|
|
316
|
+
|
|
317
|
+
gemini_tools = agent._convert_tools_for_gemini()
|
|
318
|
+
|
|
319
|
+
# Should have 2 tools: computer_use and calculator
|
|
320
|
+
assert len(gemini_tools) == 2
|
|
321
|
+
|
|
322
|
+
# First should be computer use tool
|
|
323
|
+
assert gemini_tools[0].computer_use is not None
|
|
324
|
+
assert (
|
|
325
|
+
gemini_tools[0].computer_use.environment == genai_types.Environment.ENVIRONMENT_BROWSER
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
# Second should be calculator as function declaration
|
|
329
|
+
assert gemini_tools[1].function_declarations is not None
|
|
330
|
+
assert len(gemini_tools[1].function_declarations) == 1
|
|
331
|
+
assert gemini_tools[1].function_declarations[0].name == "calculator"
|
|
332
|
+
|
|
333
|
+
@pytest.mark.asyncio
|
|
334
|
+
async def test_create_user_message(self, mock_mcp_client, mock_gemini_client):
|
|
335
|
+
"""Test creating a user message."""
|
|
336
|
+
agent = GeminiAgent(
|
|
337
|
+
mcp_client=mock_mcp_client,
|
|
338
|
+
model_client=mock_gemini_client,
|
|
339
|
+
validate_api_key=False,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
message = await agent.create_user_message("Hello Gemini")
|
|
343
|
+
|
|
344
|
+
assert message.role == "user"
|
|
345
|
+
parts = message.parts
|
|
346
|
+
assert parts is not None
|
|
347
|
+
assert len(parts) == 1
|
|
348
|
+
assert parts[0].text == "Hello Gemini"
|
|
349
|
+
|
|
350
|
+
@pytest.mark.asyncio
|
|
351
|
+
async def test_handle_empty_response(self, mock_mcp_client, mock_gemini_client):
|
|
352
|
+
"""Test handling empty response from API."""
|
|
353
|
+
with patch("hud.settings.settings.telemetry_enabled", False):
|
|
354
|
+
agent = GeminiAgent(
|
|
355
|
+
mcp_client=mock_mcp_client,
|
|
356
|
+
model_client=mock_gemini_client,
|
|
357
|
+
validate_api_key=False,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
# Mock empty response
|
|
361
|
+
mock_response = MagicMock()
|
|
362
|
+
mock_response.candidates = []
|
|
363
|
+
|
|
364
|
+
mock_gemini_client.models = MagicMock()
|
|
365
|
+
mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
|
|
366
|
+
|
|
367
|
+
messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Hi")])]
|
|
368
|
+
response = await agent.get_response(messages)
|
|
369
|
+
|
|
370
|
+
assert response.content == ""
|
|
371
|
+
assert response.tool_calls == []
|
|
372
|
+
assert response.done is True
|
hud/cli/__init__.py
CHANGED
|
@@ -93,14 +93,14 @@ def analyze(
|
|
|
93
93
|
) -> None:
|
|
94
94
|
"""🔍 Analyze MCP environment - discover tools, resources, and capabilities.
|
|
95
95
|
|
|
96
|
-
By default, uses cached metadata for instant results.
|
|
96
|
+
[not dim]By default, uses cached metadata for instant results.
|
|
97
97
|
Use --live to run the container for real-time analysis.
|
|
98
98
|
|
|
99
99
|
Examples:
|
|
100
100
|
hud analyze hudpython/test_init # Fast metadata inspection
|
|
101
101
|
hud analyze my-env --live # Full container analysis
|
|
102
102
|
hud analyze --config mcp-config.json # From MCP config
|
|
103
|
-
hud analyze --cursor text-2048-dev # From Cursor config
|
|
103
|
+
hud analyze --cursor text-2048-dev # From Cursor config[/not dim]
|
|
104
104
|
"""
|
|
105
105
|
if config:
|
|
106
106
|
# Load config from JSON file (always live for configs)
|
|
@@ -177,7 +177,7 @@ def debug(
|
|
|
177
177
|
) -> None:
|
|
178
178
|
"""🐛 Debug MCP environment - test initialization, tools, and readiness.
|
|
179
179
|
|
|
180
|
-
Examples:
|
|
180
|
+
[not dim]Examples:
|
|
181
181
|
hud debug . # Debug current directory
|
|
182
182
|
hud debug environments/browser # Debug specific directory
|
|
183
183
|
hud debug . --build # Build then debug
|
|
@@ -185,7 +185,7 @@ def debug(
|
|
|
185
185
|
hud debug my-mcp-server:v1 -e API_KEY=xxx
|
|
186
186
|
hud debug --config mcp-config.json
|
|
187
187
|
hud debug --cursor text-2048-dev
|
|
188
|
-
hud debug . --max-phase 3 # Stop after phase 3
|
|
188
|
+
hud debug . --max-phase 3 # Stop after phase 3[/not dim]
|
|
189
189
|
"""
|
|
190
190
|
# Import here to avoid circular imports
|
|
191
191
|
from hud.utils.hud_console import HUDConsole
|
|
@@ -253,10 +253,23 @@ def debug(
|
|
|
253
253
|
else:
|
|
254
254
|
# Assume it's an image name
|
|
255
255
|
image = first_param
|
|
256
|
-
from .utils.docker import
|
|
256
|
+
from .utils.docker import create_docker_run_command
|
|
257
257
|
|
|
258
|
-
#
|
|
259
|
-
|
|
258
|
+
# For image mode, check if there's a .env file in current directory
|
|
259
|
+
# and use it if available (similar to hud dev behavior)
|
|
260
|
+
cwd = Path.cwd()
|
|
261
|
+
if (cwd / ".env").exists():
|
|
262
|
+
# Use create_docker_run_command to load .env from current directory
|
|
263
|
+
command = create_docker_run_command(
|
|
264
|
+
image,
|
|
265
|
+
docker_args=docker_args,
|
|
266
|
+
env_dir=cwd, # Load .env from current directory
|
|
267
|
+
)
|
|
268
|
+
else:
|
|
269
|
+
# No .env file, use basic command without env loading
|
|
270
|
+
from .utils.docker import build_run_command
|
|
271
|
+
|
|
272
|
+
command = build_run_command(image, docker_args)
|
|
260
273
|
else:
|
|
261
274
|
console.print(
|
|
262
275
|
"[red]Error: Must specify a directory, Docker image, --config, or --cursor[/red]"
|
|
@@ -390,7 +403,7 @@ def dev(
|
|
|
390
403
|
) -> None:
|
|
391
404
|
"""🔥 Development mode - run MCP server with hot-reload.
|
|
392
405
|
|
|
393
|
-
TWO MODES:
|
|
406
|
+
[not dim]TWO MODES:
|
|
394
407
|
|
|
395
408
|
1. Python Module:
|
|
396
409
|
hud dev # Auto-detects module
|
|
@@ -411,7 +424,7 @@ def dev(
|
|
|
411
424
|
hud dev --watch ../shared # Watch additional directories
|
|
412
425
|
|
|
413
426
|
For environment backend servers, use uvicorn directly:
|
|
414
|
-
uvicorn server:app --reload
|
|
427
|
+
uvicorn server:app --reload[/not dim]
|
|
415
428
|
"""
|
|
416
429
|
# Extract module from params if provided (first param when not --docker)
|
|
417
430
|
module = params[0] if params and not docker else None
|
|
@@ -479,7 +492,7 @@ def run(
|
|
|
479
492
|
) -> None:
|
|
480
493
|
"""🚀 Run Docker image as MCP server.
|
|
481
494
|
|
|
482
|
-
A simple wrapper around 'docker run' that can launch images locally or remotely.
|
|
495
|
+
[not dim]A simple wrapper around 'docker run' that can launch images locally or remotely.
|
|
483
496
|
By default, runs remotely via mcp.hud.so. Use --local to run with local Docker.
|
|
484
497
|
|
|
485
498
|
For local Python development with hot-reload, use 'hud dev' instead.
|
|
@@ -489,7 +502,7 @@ def run(
|
|
|
489
502
|
hud run my-image:latest --local # Run with local Docker
|
|
490
503
|
hud run my-image:latest -e KEY=value # Remote with env vars
|
|
491
504
|
hud run my-image:latest --local -e KEY=val # Local with env vars
|
|
492
|
-
hud run my-image:latest --transport http # Use HTTP transport
|
|
505
|
+
hud run my-image:latest --transport http # Use HTTP transport[/not dim]
|
|
493
506
|
"""
|
|
494
507
|
if not params:
|
|
495
508
|
console.print("[red]❌ Docker image is required[/red]")
|
|
@@ -546,7 +559,7 @@ def clone(
|
|
|
546
559
|
) -> None:
|
|
547
560
|
"""🚀 Clone a git repository quietly with a pretty output.
|
|
548
561
|
|
|
549
|
-
This command wraps 'git clone' with the --quiet flag and displays
|
|
562
|
+
[not dim]This command wraps 'git clone' with the --quiet flag and displays
|
|
550
563
|
a rich formatted success message. If the repository contains a clone
|
|
551
564
|
message in pyproject.toml, it will be displayed as a tutorial.
|
|
552
565
|
|
|
@@ -561,7 +574,7 @@ def clone(
|
|
|
561
574
|
# style = "cyan"
|
|
562
575
|
|
|
563
576
|
Examples:
|
|
564
|
-
hud clone https://github.com/user/repo.git
|
|
577
|
+
hud clone https://github.com/user/repo.git[/not dim]
|
|
565
578
|
"""
|
|
566
579
|
# Run the clone
|
|
567
580
|
success, result = clone_repository(url)
|
|
@@ -592,7 +605,7 @@ def build(
|
|
|
592
605
|
) -> None:
|
|
593
606
|
"""🏗️ Build a HUD environment and generate lock file.
|
|
594
607
|
|
|
595
|
-
This command:
|
|
608
|
+
[not dim]This command:
|
|
596
609
|
- Builds a Docker image from your environment
|
|
597
610
|
- Analyzes the MCP server to extract metadata
|
|
598
611
|
- Generates a hud.lock.yaml file for reproducibility
|
|
@@ -601,7 +614,7 @@ def build(
|
|
|
601
614
|
hud build # Build current directory
|
|
602
615
|
hud build environments/text_2048 -e API_KEY=secret
|
|
603
616
|
hud build . --tag my-env:v1.0 -e VAR1=value1 -e VAR2=value2
|
|
604
|
-
hud build . --no-cache # Force rebuild
|
|
617
|
+
hud build . --no-cache # Force rebuild[/not dim]
|
|
605
618
|
"""
|
|
606
619
|
# Parse directory and extra arguments
|
|
607
620
|
if params:
|
|
@@ -657,14 +670,14 @@ def push(
|
|
|
657
670
|
) -> None:
|
|
658
671
|
"""📤 Push HUD environment to registry.
|
|
659
672
|
|
|
660
|
-
Reads hud.lock.yaml from the directory and pushes to registry.
|
|
673
|
+
[not dim]Reads hud.lock.yaml from the directory and pushes to registry.
|
|
661
674
|
Auto-detects your Docker username if --image not specified.
|
|
662
675
|
|
|
663
676
|
Examples:
|
|
664
677
|
hud push # Push with auto-detected name
|
|
665
678
|
hud push --tag v1.0 # Push with specific tag
|
|
666
679
|
hud push . --image myuser/myenv:v1.0
|
|
667
|
-
hud push --yes # Skip confirmation
|
|
680
|
+
hud push --yes # Skip confirmation[/not dim]
|
|
668
681
|
"""
|
|
669
682
|
push_command(directory, image, tag, sign, yes, verbose)
|
|
670
683
|
|
|
@@ -683,12 +696,12 @@ def pull(
|
|
|
683
696
|
) -> None:
|
|
684
697
|
"""📥 Pull HUD environment from registry with metadata preview.
|
|
685
698
|
|
|
686
|
-
Shows environment details before downloading.
|
|
699
|
+
[not dim]Shows environment details before downloading.
|
|
687
700
|
|
|
688
701
|
Examples:
|
|
689
702
|
hud pull hud.lock.yaml # Pull from lock file
|
|
690
703
|
hud pull myuser/myenv:latest # Pull by image reference
|
|
691
|
-
hud pull myuser/myenv --verify-only # Check metadata only
|
|
704
|
+
hud pull myuser/myenv --verify-only # Check metadata only[/not dim]
|
|
692
705
|
"""
|
|
693
706
|
pull_command(target, lock_file, yes, verify_only, verbose)
|
|
694
707
|
|
|
@@ -704,14 +717,14 @@ def list_environments(
|
|
|
704
717
|
) -> None:
|
|
705
718
|
"""📋 List all HUD environments in local registry.
|
|
706
719
|
|
|
707
|
-
Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
|
|
720
|
+
[not dim]Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
|
|
708
721
|
|
|
709
722
|
Examples:
|
|
710
723
|
hud list # List all environments
|
|
711
724
|
hud list --filter text # Filter by name
|
|
712
725
|
hud list --json # Output as JSON
|
|
713
726
|
hud list --all # Show digest column
|
|
714
|
-
hud list --verbose # Show full descriptions
|
|
727
|
+
hud list --verbose # Show full descriptions[/not dim]
|
|
715
728
|
"""
|
|
716
729
|
list_module.list_command(filter_name, json_output, show_all, verbose)
|
|
717
730
|
|
|
@@ -726,7 +739,7 @@ def remove(
|
|
|
726
739
|
) -> None:
|
|
727
740
|
"""🗑️ Remove HUD environments from local registry.
|
|
728
741
|
|
|
729
|
-
Removes environment metadata from ~/.hud/envs/
|
|
742
|
+
[not dim]Removes environment metadata from ~/.hud/envs/
|
|
730
743
|
Note: This does not remove the Docker images.
|
|
731
744
|
|
|
732
745
|
Examples:
|
|
@@ -734,35 +747,36 @@ def remove(
|
|
|
734
747
|
hud remove text_2048 # Remove by name
|
|
735
748
|
hud remove hudpython/test_init # Remove by full name
|
|
736
749
|
hud remove all # Remove all environments
|
|
737
|
-
hud remove all --yes # Remove all without confirmation
|
|
750
|
+
hud remove all --yes # Remove all without confirmation[/not dim]
|
|
738
751
|
"""
|
|
739
752
|
remove_command(target, yes, verbose)
|
|
740
753
|
|
|
741
754
|
|
|
742
755
|
@app.command()
|
|
743
756
|
def init(
|
|
744
|
-
name: str = typer.Argument(None, help="Environment name (default:
|
|
757
|
+
name: str = typer.Argument(None, help="Environment name (default: chosen preset name)"),
|
|
745
758
|
preset: str | None = typer.Option(
|
|
746
759
|
None,
|
|
747
760
|
"--preset",
|
|
748
761
|
"-p",
|
|
749
762
|
help="Preset to use: blank, deep-research, browser, rubrics. If omitted, you'll choose interactively.", # noqa: E501
|
|
750
763
|
),
|
|
751
|
-
directory: str = typer.Option(".", "--dir", "-d", help="
|
|
764
|
+
directory: str = typer.Option(".", "--dir", "-d", help="Parent directory for the environment"),
|
|
752
765
|
force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing files"),
|
|
753
766
|
) -> None:
|
|
754
767
|
"""🚀 Initialize a new HUD environment with minimal boilerplate.
|
|
755
768
|
|
|
756
|
-
Creates a working MCP environment with:
|
|
769
|
+
[not dim]Creates a working MCP environment with:
|
|
757
770
|
- Dockerfile for containerization
|
|
758
771
|
- pyproject.toml for dependencies
|
|
759
772
|
- Minimal MCP server with context
|
|
760
773
|
- Required setup/evaluate tools
|
|
761
774
|
|
|
762
775
|
Examples:
|
|
763
|
-
hud init #
|
|
764
|
-
hud init my-env # Create
|
|
765
|
-
hud init my-env --dir /tmp # Create in /tmp/my-env/
|
|
776
|
+
hud init # Choose preset interactively, create ./preset-name/
|
|
777
|
+
hud init my-env # Create new directory ./my-env/
|
|
778
|
+
hud init my-env --dir /tmp # Create in /tmp/my-env/[/not dim]
|
|
779
|
+
|
|
766
780
|
"""
|
|
767
781
|
create_environment(name, directory, force, preset)
|
|
768
782
|
|
|
@@ -904,6 +918,7 @@ def eval(
|
|
|
904
918
|
[
|
|
905
919
|
{"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE},
|
|
906
920
|
{"name": "OpenAI Computer Use", "value": AgentType.OPENAI},
|
|
921
|
+
{"name": "Gemini Computer Use", "value": AgentType.GEMINI},
|
|
907
922
|
{"name": "vLLM (Local Server)", "value": AgentType.VLLM},
|
|
908
923
|
{"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM},
|
|
909
924
|
]
|
|
@@ -1138,11 +1153,11 @@ def set(
|
|
|
1138
1153
|
) -> None:
|
|
1139
1154
|
"""Persist API keys or other variables for HUD to use by default.
|
|
1140
1155
|
|
|
1141
|
-
Examples:
|
|
1156
|
+
[not dim]Examples:
|
|
1142
1157
|
hud set ANTHROPIC_API_KEY=sk-... OPENAI_API_KEY=sk-...
|
|
1143
1158
|
|
|
1144
1159
|
Values are stored in ~/.hud/.env and are loaded by hud.settings with
|
|
1145
|
-
the lowest precedence (overridden by process env and project .env).
|
|
1160
|
+
the lowest precedence (overridden by process env and project .env).[/not dim]
|
|
1146
1161
|
"""
|
|
1147
1162
|
from hud.utils.hud_console import HUDConsole
|
|
1148
1163
|
|