hud-python 0.4.58__py3-none-any.whl → 0.4.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

@@ -0,0 +1,372 @@
1
+ """Tests for Gemini MCP Agent implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ from unittest.mock import AsyncMock, MagicMock, patch
7
+
8
+ import pytest
9
+ from google.genai import types as genai_types
10
+ from mcp import types
11
+
12
+ from hud.agents.gemini import GeminiAgent
13
+ from hud.types import MCPToolCall, MCPToolResult
14
+
15
+
16
+ class TestGeminiAgent:
17
+ """Test GeminiAgent class."""
18
+
19
+ @pytest.fixture
20
+ def mock_mcp_client(self):
21
+ """Create a mock MCP client."""
22
+ mcp_client = AsyncMock()
23
+ # Set up the mcp_config attribute as a regular dict, not a coroutine
24
+ mcp_client.mcp_config = {"test_server": {"url": "http://test"}}
25
+ # Mock list_tools to return gemini_computer tool
26
+ mcp_client.list_tools = AsyncMock(
27
+ return_value=[
28
+ types.Tool(
29
+ name="gemini_computer",
30
+ description="Gemini computer use tool",
31
+ inputSchema={},
32
+ )
33
+ ]
34
+ )
35
+ mcp_client.initialize = AsyncMock()
36
+ return mcp_client
37
+
38
+ @pytest.fixture
39
+ def mock_gemini_client(self):
40
+ """Create a mock Gemini client."""
41
+ client = MagicMock()
42
+ client.api_key = "test_key"
43
+ # Mock models.list for validation
44
+ client.models = MagicMock()
45
+ client.models.list = MagicMock(return_value=iter([]))
46
+ return client
47
+
48
+ @pytest.mark.asyncio
49
+ async def test_init(self, mock_mcp_client, mock_gemini_client):
50
+ """Test agent initialization."""
51
+ agent = GeminiAgent(
52
+ mcp_client=mock_mcp_client,
53
+ model_client=mock_gemini_client,
54
+ model="gemini-2.5-computer-use-preview-10-2025",
55
+ validate_api_key=False, # Skip validation in tests
56
+ )
57
+
58
+ assert agent.model_name == "gemini-2.5-computer-use-preview-10-2025"
59
+ assert agent.model == "gemini-2.5-computer-use-preview-10-2025"
60
+ assert agent.gemini_client == mock_gemini_client
61
+
62
+ @pytest.mark.asyncio
63
+ async def test_init_without_model_client(self, mock_mcp_client):
64
+ """Test agent initialization without model client."""
65
+ with (
66
+ patch("hud.settings.settings.gemini_api_key", "test_key"),
67
+ patch("hud.agents.gemini.genai.Client") as mock_client_class,
68
+ ):
69
+ mock_client = MagicMock()
70
+ mock_client.api_key = "test_key"
71
+ mock_client.models = MagicMock()
72
+ mock_client.models.list = MagicMock(return_value=iter([]))
73
+ mock_client_class.return_value = mock_client
74
+
75
+ agent = GeminiAgent(
76
+ mcp_client=mock_mcp_client,
77
+ model="gemini-2.5-computer-use-preview-10-2025",
78
+ validate_api_key=False,
79
+ )
80
+
81
+ assert agent.model_name == "gemini-2.5-computer-use-preview-10-2025"
82
+ assert agent.gemini_client is not None
83
+
84
+ @pytest.mark.asyncio
85
+ async def test_format_blocks(self, mock_mcp_client, mock_gemini_client):
86
+ """Test formatting content blocks into Gemini messages."""
87
+ agent = GeminiAgent(
88
+ mcp_client=mock_mcp_client,
89
+ model_client=mock_gemini_client,
90
+ validate_api_key=False,
91
+ )
92
+
93
+ # Test with text only
94
+ text_blocks: list[types.ContentBlock] = [
95
+ types.TextContent(type="text", text="Hello, Gemini!")
96
+ ]
97
+ messages = await agent.format_blocks(text_blocks)
98
+ assert len(messages) == 1
99
+ assert messages[0].role == "user"
100
+ parts = messages[0].parts
101
+ assert parts is not None
102
+ assert len(parts) == 1
103
+ assert parts[0].text == "Hello, Gemini!"
104
+
105
+ # Test with screenshot
106
+ image_blocks: list[types.ContentBlock] = [
107
+ types.TextContent(type="text", text="Look at this"),
108
+ types.ImageContent(
109
+ type="image",
110
+ data=base64.b64encode(b"fakeimage").decode("utf-8"),
111
+ mimeType="image/png",
112
+ ),
113
+ ]
114
+ messages = await agent.format_blocks(image_blocks)
115
+ assert len(messages) == 1
116
+ assert messages[0].role == "user"
117
+ parts = messages[0].parts
118
+ assert parts is not None
119
+ assert len(parts) == 2
120
+ # First part is text
121
+ assert parts[0].text == "Look at this"
122
+ # Second part is image - check that it was created from bytes
123
+ assert parts[1].inline_data is not None
124
+
125
+ @pytest.mark.asyncio
126
+ async def test_format_tool_results(self, mock_mcp_client, mock_gemini_client):
127
+ """Test the agent's format_tool_results method."""
128
+ agent = GeminiAgent(
129
+ mcp_client=mock_mcp_client,
130
+ model_client=mock_gemini_client,
131
+ validate_api_key=False,
132
+ )
133
+
134
+ tool_calls = [
135
+ MCPToolCall(
136
+ name="gemini_computer",
137
+ arguments={"action": "click_at", "x": 100, "y": 200},
138
+ id="call_1", # type: ignore
139
+ gemini_name="click_at", # type: ignore
140
+ ),
141
+ ]
142
+
143
+ tool_results = [
144
+ MCPToolResult(
145
+ content=[
146
+ types.TextContent(type="text", text="Clicked successfully"),
147
+ types.ImageContent(
148
+ type="image",
149
+ data=base64.b64encode(b"screenshot").decode("utf-8"),
150
+ mimeType="image/png",
151
+ ),
152
+ ],
153
+ isError=False,
154
+ ),
155
+ ]
156
+
157
+ messages = await agent.format_tool_results(tool_calls, tool_results)
158
+
159
+ # format_tool_results returns a single user message with function responses
160
+ assert len(messages) == 1
161
+ assert messages[0].role == "user"
162
+ # The content contains function response parts
163
+ parts = messages[0].parts
164
+ assert parts is not None
165
+ assert len(parts) == 1
166
+ function_response = parts[0].function_response
167
+ assert function_response is not None
168
+ assert function_response.name == "click_at"
169
+ response_payload = function_response.response or {}
170
+ assert response_payload.get("success") is True
171
+
172
+ @pytest.mark.asyncio
173
+ async def test_format_tool_results_with_error(self, mock_mcp_client, mock_gemini_client):
174
+ """Test formatting tool results with errors."""
175
+ agent = GeminiAgent(
176
+ mcp_client=mock_mcp_client,
177
+ model_client=mock_gemini_client,
178
+ validate_api_key=False,
179
+ )
180
+
181
+ tool_calls = [
182
+ MCPToolCall(
183
+ name="gemini_computer",
184
+ arguments={"action": "invalid"},
185
+ id="call_error", # type: ignore
186
+ gemini_name="invalid_action", # type: ignore
187
+ ),
188
+ ]
189
+
190
+ tool_results = [
191
+ MCPToolResult(
192
+ content=[types.TextContent(type="text", text="Action failed: invalid action")],
193
+ isError=True,
194
+ ),
195
+ ]
196
+
197
+ messages = await agent.format_tool_results(tool_calls, tool_results)
198
+
199
+ # Check that error is in the response
200
+ assert len(messages) == 1
201
+ assert messages[0].role == "user"
202
+ parts = messages[0].parts
203
+ assert parts is not None
204
+ function_response = parts[0].function_response
205
+ assert function_response is not None
206
+ response_payload = function_response.response or {}
207
+ assert "error" in response_payload
208
+
209
+ @pytest.mark.asyncio
210
+ async def test_get_response(self, mock_mcp_client, mock_gemini_client):
211
+ """Test getting model response from Gemini API."""
212
+ # Disable telemetry for this test
213
+ with patch("hud.settings.settings.telemetry_enabled", False):
214
+ agent = GeminiAgent(
215
+ mcp_client=mock_mcp_client,
216
+ model_client=mock_gemini_client,
217
+ validate_api_key=False,
218
+ )
219
+
220
+ # Set up available tools
221
+ agent._available_tools = [
222
+ types.Tool(name="gemini_computer", description="Computer tool", inputSchema={})
223
+ ]
224
+
225
+ # Mock the API response
226
+ mock_response = MagicMock()
227
+ mock_candidate = MagicMock()
228
+
229
+ # Create text part
230
+ text_part = MagicMock()
231
+ text_part.text = "I will click at coordinates"
232
+ text_part.function_call = None
233
+
234
+ # Create function call part
235
+ function_call_part = MagicMock()
236
+ function_call_part.text = None
237
+ function_call_part.function_call = MagicMock()
238
+ function_call_part.function_call.name = "click_at"
239
+ function_call_part.function_call.args = {"x": 100, "y": 200}
240
+
241
+ mock_candidate.content = MagicMock()
242
+ mock_candidate.content.parts = [text_part, function_call_part]
243
+
244
+ mock_response.candidates = [mock_candidate]
245
+
246
+ mock_gemini_client.models = MagicMock()
247
+ mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
248
+
249
+ messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Click")])]
250
+ response = await agent.get_response(messages)
251
+
252
+ assert response.content == "I will click at coordinates"
253
+ assert len(response.tool_calls) == 1
254
+ assert response.tool_calls[0].arguments == {"action": "click_at", "x": 100, "y": 200}
255
+ assert response.done is False
256
+
257
+ @pytest.mark.asyncio
258
+ async def test_get_response_text_only(self, mock_mcp_client, mock_gemini_client):
259
+ """Test getting text-only response."""
260
+ # Disable telemetry for this test
261
+ with patch("hud.settings.settings.telemetry_enabled", False):
262
+ agent = GeminiAgent(
263
+ mcp_client=mock_mcp_client,
264
+ model_client=mock_gemini_client,
265
+ validate_api_key=False,
266
+ )
267
+
268
+ # Mock the API response with text only
269
+ mock_response = MagicMock()
270
+ mock_candidate = MagicMock()
271
+
272
+ text_part = MagicMock()
273
+ text_part.text = "Task completed successfully"
274
+ text_part.function_call = None
275
+
276
+ mock_candidate.content = MagicMock()
277
+ mock_candidate.content.parts = [text_part]
278
+
279
+ mock_response.candidates = [mock_candidate]
280
+
281
+ mock_gemini_client.models = MagicMock()
282
+ mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
283
+
284
+ messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Status?")])]
285
+ response = await agent.get_response(messages)
286
+
287
+ assert response.content == "Task completed successfully"
288
+ assert response.tool_calls == []
289
+ assert response.done is True
290
+
291
+ @pytest.mark.asyncio
292
+ async def test_convert_tools_for_gemini(self, mock_mcp_client, mock_gemini_client):
293
+ """Test converting MCP tools to Gemini format."""
294
+ agent = GeminiAgent(
295
+ mcp_client=mock_mcp_client,
296
+ model_client=mock_gemini_client,
297
+ validate_api_key=False,
298
+ )
299
+
300
+ # Set up available tools
301
+ agent._available_tools = [
302
+ types.Tool(
303
+ name="gemini_computer",
304
+ description="Computer tool",
305
+ inputSchema={"type": "object"},
306
+ ),
307
+ types.Tool(
308
+ name="calculator",
309
+ description="Calculator tool",
310
+ inputSchema={
311
+ "type": "object",
312
+ "properties": {"operation": {"type": "string"}},
313
+ },
314
+ ),
315
+ ]
316
+
317
+ gemini_tools = agent._convert_tools_for_gemini()
318
+
319
+ # Should have 2 tools: computer_use and calculator
320
+ assert len(gemini_tools) == 2
321
+
322
+ # First should be computer use tool
323
+ assert gemini_tools[0].computer_use is not None
324
+ assert (
325
+ gemini_tools[0].computer_use.environment == genai_types.Environment.ENVIRONMENT_BROWSER
326
+ )
327
+
328
+ # Second should be calculator as function declaration
329
+ assert gemini_tools[1].function_declarations is not None
330
+ assert len(gemini_tools[1].function_declarations) == 1
331
+ assert gemini_tools[1].function_declarations[0].name == "calculator"
332
+
333
+ @pytest.mark.asyncio
334
+ async def test_create_user_message(self, mock_mcp_client, mock_gemini_client):
335
+ """Test creating a user message."""
336
+ agent = GeminiAgent(
337
+ mcp_client=mock_mcp_client,
338
+ model_client=mock_gemini_client,
339
+ validate_api_key=False,
340
+ )
341
+
342
+ message = await agent.create_user_message("Hello Gemini")
343
+
344
+ assert message.role == "user"
345
+ parts = message.parts
346
+ assert parts is not None
347
+ assert len(parts) == 1
348
+ assert parts[0].text == "Hello Gemini"
349
+
350
+ @pytest.mark.asyncio
351
+ async def test_handle_empty_response(self, mock_mcp_client, mock_gemini_client):
352
+ """Test handling empty response from API."""
353
+ with patch("hud.settings.settings.telemetry_enabled", False):
354
+ agent = GeminiAgent(
355
+ mcp_client=mock_mcp_client,
356
+ model_client=mock_gemini_client,
357
+ validate_api_key=False,
358
+ )
359
+
360
+ # Mock empty response
361
+ mock_response = MagicMock()
362
+ mock_response.candidates = []
363
+
364
+ mock_gemini_client.models = MagicMock()
365
+ mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
366
+
367
+ messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Hi")])]
368
+ response = await agent.get_response(messages)
369
+
370
+ assert response.content == ""
371
+ assert response.tool_calls == []
372
+ assert response.done is True
hud/cli/__init__.py CHANGED
@@ -93,14 +93,14 @@ def analyze(
93
93
  ) -> None:
94
94
  """🔍 Analyze MCP environment - discover tools, resources, and capabilities.
95
95
 
96
- By default, uses cached metadata for instant results.
96
+ [not dim]By default, uses cached metadata for instant results.
97
97
  Use --live to run the container for real-time analysis.
98
98
 
99
99
  Examples:
100
100
  hud analyze hudpython/test_init # Fast metadata inspection
101
101
  hud analyze my-env --live # Full container analysis
102
102
  hud analyze --config mcp-config.json # From MCP config
103
- hud analyze --cursor text-2048-dev # From Cursor config
103
+ hud analyze --cursor text-2048-dev # From Cursor config[/not dim]
104
104
  """
105
105
  if config:
106
106
  # Load config from JSON file (always live for configs)
@@ -177,7 +177,7 @@ def debug(
177
177
  ) -> None:
178
178
  """🐛 Debug MCP environment - test initialization, tools, and readiness.
179
179
 
180
- Examples:
180
+ [not dim]Examples:
181
181
  hud debug . # Debug current directory
182
182
  hud debug environments/browser # Debug specific directory
183
183
  hud debug . --build # Build then debug
@@ -185,7 +185,7 @@ def debug(
185
185
  hud debug my-mcp-server:v1 -e API_KEY=xxx
186
186
  hud debug --config mcp-config.json
187
187
  hud debug --cursor text-2048-dev
188
- hud debug . --max-phase 3 # Stop after phase 3
188
+ hud debug . --max-phase 3 # Stop after phase 3[/not dim]
189
189
  """
190
190
  # Import here to avoid circular imports
191
191
  from hud.utils.hud_console import HUDConsole
@@ -403,7 +403,7 @@ def dev(
403
403
  ) -> None:
404
404
  """🔥 Development mode - run MCP server with hot-reload.
405
405
 
406
- TWO MODES:
406
+ [not dim]TWO MODES:
407
407
 
408
408
  1. Python Module:
409
409
  hud dev # Auto-detects module
@@ -424,7 +424,7 @@ def dev(
424
424
  hud dev --watch ../shared # Watch additional directories
425
425
 
426
426
  For environment backend servers, use uvicorn directly:
427
- uvicorn server:app --reload
427
+ uvicorn server:app --reload[/not dim]
428
428
  """
429
429
  # Extract module from params if provided (first param when not --docker)
430
430
  module = params[0] if params and not docker else None
@@ -492,7 +492,7 @@ def run(
492
492
  ) -> None:
493
493
  """🚀 Run Docker image as MCP server.
494
494
 
495
- A simple wrapper around 'docker run' that can launch images locally or remotely.
495
+ [not dim]A simple wrapper around 'docker run' that can launch images locally or remotely.
496
496
  By default, runs remotely via mcp.hud.so. Use --local to run with local Docker.
497
497
 
498
498
  For local Python development with hot-reload, use 'hud dev' instead.
@@ -502,7 +502,7 @@ def run(
502
502
  hud run my-image:latest --local # Run with local Docker
503
503
  hud run my-image:latest -e KEY=value # Remote with env vars
504
504
  hud run my-image:latest --local -e KEY=val # Local with env vars
505
- hud run my-image:latest --transport http # Use HTTP transport
505
+ hud run my-image:latest --transport http # Use HTTP transport[/not dim]
506
506
  """
507
507
  if not params:
508
508
  console.print("[red]❌ Docker image is required[/red]")
@@ -559,7 +559,7 @@ def clone(
559
559
  ) -> None:
560
560
  """🚀 Clone a git repository quietly with a pretty output.
561
561
 
562
- This command wraps 'git clone' with the --quiet flag and displays
562
+ [not dim]This command wraps 'git clone' with the --quiet flag and displays
563
563
  a rich formatted success message. If the repository contains a clone
564
564
  message in pyproject.toml, it will be displayed as a tutorial.
565
565
 
@@ -574,7 +574,7 @@ def clone(
574
574
  # style = "cyan"
575
575
 
576
576
  Examples:
577
- hud clone https://github.com/user/repo.git
577
+ hud clone https://github.com/user/repo.git[/not dim]
578
578
  """
579
579
  # Run the clone
580
580
  success, result = clone_repository(url)
@@ -605,7 +605,7 @@ def build(
605
605
  ) -> None:
606
606
  """🏗️ Build a HUD environment and generate lock file.
607
607
 
608
- This command:
608
+ [not dim]This command:
609
609
  - Builds a Docker image from your environment
610
610
  - Analyzes the MCP server to extract metadata
611
611
  - Generates a hud.lock.yaml file for reproducibility
@@ -614,7 +614,7 @@ def build(
614
614
  hud build # Build current directory
615
615
  hud build environments/text_2048 -e API_KEY=secret
616
616
  hud build . --tag my-env:v1.0 -e VAR1=value1 -e VAR2=value2
617
- hud build . --no-cache # Force rebuild
617
+ hud build . --no-cache # Force rebuild[/not dim]
618
618
  """
619
619
  # Parse directory and extra arguments
620
620
  if params:
@@ -670,14 +670,14 @@ def push(
670
670
  ) -> None:
671
671
  """📤 Push HUD environment to registry.
672
672
 
673
- Reads hud.lock.yaml from the directory and pushes to registry.
673
+ [not dim]Reads hud.lock.yaml from the directory and pushes to registry.
674
674
  Auto-detects your Docker username if --image not specified.
675
675
 
676
676
  Examples:
677
677
  hud push # Push with auto-detected name
678
678
  hud push --tag v1.0 # Push with specific tag
679
679
  hud push . --image myuser/myenv:v1.0
680
- hud push --yes # Skip confirmation
680
+ hud push --yes # Skip confirmation[/not dim]
681
681
  """
682
682
  push_command(directory, image, tag, sign, yes, verbose)
683
683
 
@@ -696,12 +696,12 @@ def pull(
696
696
  ) -> None:
697
697
  """📥 Pull HUD environment from registry with metadata preview.
698
698
 
699
- Shows environment details before downloading.
699
+ [not dim]Shows environment details before downloading.
700
700
 
701
701
  Examples:
702
702
  hud pull hud.lock.yaml # Pull from lock file
703
703
  hud pull myuser/myenv:latest # Pull by image reference
704
- hud pull myuser/myenv --verify-only # Check metadata only
704
+ hud pull myuser/myenv --verify-only # Check metadata only[/not dim]
705
705
  """
706
706
  pull_command(target, lock_file, yes, verify_only, verbose)
707
707
 
@@ -717,14 +717,14 @@ def list_environments(
717
717
  ) -> None:
718
718
  """📋 List all HUD environments in local registry.
719
719
 
720
- Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
720
+ [not dim]Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
721
721
 
722
722
  Examples:
723
723
  hud list # List all environments
724
724
  hud list --filter text # Filter by name
725
725
  hud list --json # Output as JSON
726
726
  hud list --all # Show digest column
727
- hud list --verbose # Show full descriptions
727
+ hud list --verbose # Show full descriptions[/not dim]
728
728
  """
729
729
  list_module.list_command(filter_name, json_output, show_all, verbose)
730
730
 
@@ -739,7 +739,7 @@ def remove(
739
739
  ) -> None:
740
740
  """🗑️ Remove HUD environments from local registry.
741
741
 
742
- Removes environment metadata from ~/.hud/envs/
742
+ [not dim]Removes environment metadata from ~/.hud/envs/
743
743
  Note: This does not remove the Docker images.
744
744
 
745
745
  Examples:
@@ -747,7 +747,7 @@ def remove(
747
747
  hud remove text_2048 # Remove by name
748
748
  hud remove hudpython/test_init # Remove by full name
749
749
  hud remove all # Remove all environments
750
- hud remove all --yes # Remove all without confirmation
750
+ hud remove all --yes # Remove all without confirmation[/not dim]
751
751
  """
752
752
  remove_command(target, yes, verbose)
753
753
 
@@ -766,7 +766,7 @@ def init(
766
766
  ) -> None:
767
767
  """🚀 Initialize a new HUD environment with minimal boilerplate.
768
768
 
769
- Creates a working MCP environment with:
769
+ [not dim]Creates a working MCP environment with:
770
770
  - Dockerfile for containerization
771
771
  - pyproject.toml for dependencies
772
772
  - Minimal MCP server with context
@@ -775,7 +775,8 @@ def init(
775
775
  Examples:
776
776
  hud init # Choose preset interactively, create ./preset-name/
777
777
  hud init my-env # Create new directory ./my-env/
778
- hud init my-env --dir /tmp # Create in /tmp/my-env/
778
+ hud init my-env --dir /tmp # Create in /tmp/my-env/[/not dim]
779
+
779
780
  """
780
781
  create_environment(name, directory, force, preset)
781
782
 
@@ -917,6 +918,7 @@ def eval(
917
918
  [
918
919
  {"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE},
919
920
  {"name": "OpenAI Computer Use", "value": AgentType.OPENAI},
921
+ {"name": "Gemini Computer Use", "value": AgentType.GEMINI},
920
922
  {"name": "vLLM (Local Server)", "value": AgentType.VLLM},
921
923
  {"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM},
922
924
  ]
@@ -1151,11 +1153,11 @@ def set(
1151
1153
  ) -> None:
1152
1154
  """Persist API keys or other variables for HUD to use by default.
1153
1155
 
1154
- Examples:
1156
+ [not dim]Examples:
1155
1157
  hud set ANTHROPIC_API_KEY=sk-... OPENAI_API_KEY=sk-...
1156
1158
 
1157
1159
  Values are stored in ~/.hud/.env and are loaded by hud.settings with
1158
- the lowest precedence (overridden by process env and project .env).
1160
+ the lowest precedence (overridden by process env and project .env).[/not dim]
1159
1161
  """
1160
1162
  from hud.utils.hud_console import HUDConsole
1161
1163
 
hud/cli/eval.py CHANGED
@@ -188,6 +188,24 @@ def build_agent(
188
188
  else:
189
189
  return OperatorAgent(verbose=verbose)
190
190
 
191
+ elif agent_type == AgentType.GEMINI:
192
+ try:
193
+ from hud.agents import GeminiAgent
194
+ except ImportError as e:
195
+ hud_console.error(
196
+ "Gemini agent dependencies are not installed. "
197
+ "Please install with: pip install 'hud-python[agent]'"
198
+ )
199
+ raise typer.Exit(1) from e
200
+
201
+ gemini_kwargs: dict[str, Any] = {
202
+ "model": model or "gemini-2.5-computer-use-preview-10-2025",
203
+ "verbose": verbose,
204
+ }
205
+ if allowed_tools:
206
+ gemini_kwargs["allowed_tools"] = allowed_tools
207
+ return GeminiAgent(**gemini_kwargs)
208
+
191
209
  elif agent_type == AgentType.LITELLM:
192
210
  try:
193
211
  from hud.agents.lite_llm import LiteAgent
@@ -344,6 +362,17 @@ async def run_single_task(
344
362
  agent_config = {"verbose": verbose}
345
363
  if allowed_tools:
346
364
  agent_config["allowed_tools"] = allowed_tools
365
+ elif agent_type == AgentType.GEMINI:
366
+ from hud.agents import GeminiAgent
367
+
368
+ agent_class = GeminiAgent
369
+ agent_config = {
370
+ "model": model or "gemini-2.5-computer-use-preview-10-2025",
371
+ "verbose": verbose,
372
+ "validate_api_key": False,
373
+ }
374
+ if allowed_tools:
375
+ agent_config["allowed_tools"] = allowed_tools
347
376
  elif agent_type == AgentType.LITELLM:
348
377
  from hud.agents.lite_llm import LiteAgent
349
378
 
@@ -534,6 +563,26 @@ async def run_full_dataset(
534
563
  if allowed_tools:
535
564
  agent_config["allowed_tools"] = allowed_tools
536
565
 
566
+ elif agent_type == AgentType.GEMINI:
567
+ try:
568
+ from hud.agents import GeminiAgent
569
+
570
+ agent_class = GeminiAgent
571
+ except ImportError as e:
572
+ hud_console.error(
573
+ "Gemini agent dependencies are not installed. "
574
+ "Please install with: pip install 'hud-python[agent]'"
575
+ )
576
+ raise typer.Exit(1) from e
577
+
578
+ agent_config = {
579
+ "model": model or "gemini-2.5-computer-use-preview-10-2025",
580
+ "verbose": verbose,
581
+ "validate_api_key": False,
582
+ }
583
+ if allowed_tools:
584
+ agent_config["allowed_tools"] = allowed_tools
585
+
537
586
  elif agent_type == AgentType.LITELLM:
538
587
  try:
539
588
  from hud.agents.lite_llm import LiteAgent
@@ -641,7 +690,7 @@ def eval_command(
641
690
  agent: AgentType = typer.Option( # noqa: B008
642
691
  AgentType.CLAUDE,
643
692
  "--agent",
644
- help="Agent backend to use (claude, openai, vllm for local server, or litellm)",
693
+ help="Agent backend to use (claude, gemini, openai, vllm for local servers, or litellm)",
645
694
  ),
646
695
  model: str | None = typer.Option(
647
696
  None,
@@ -757,6 +806,13 @@ def eval_command(
757
806
  "Set it in your environment or run: hud set ANTHROPIC_API_KEY=your-key-here"
758
807
  )
759
808
  raise typer.Exit(1)
809
+ elif agent == AgentType.GEMINI:
810
+ if not settings.gemini_api_key:
811
+ hud_console.error("GEMINI_API_KEY is required for Gemini agent")
812
+ hud_console.info(
813
+ "Set it in your environment or run: hud set GEMINI_API_KEY=your-key-here"
814
+ )
815
+ raise typer.Exit(1)
760
816
  elif agent == AgentType.OPENAI and not settings.openai_api_key:
761
817
  hud_console.error("OPENAI_API_KEY is required for OpenAI agent")
762
818
  hud_console.info("Set it in your environment or run: hud set OPENAI_API_KEY=your-key-here")