hud-python 0.4.21__py3-none-any.whl → 0.4.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (63) hide show
  1. hud/agents/base.py +37 -37
  2. hud/agents/claude.py +11 -6
  3. hud/agents/grounded_openai.py +282 -0
  4. hud/agents/misc/response_agent.py +3 -2
  5. hud/agents/openai.py +2 -2
  6. hud/agents/openai_chat_generic.py +3 -1
  7. hud/agents/tests/test_client.py +6 -1
  8. hud/agents/tests/test_grounded_openai_agent.py +155 -0
  9. hud/cli/__init__.py +34 -24
  10. hud/cli/analyze.py +27 -26
  11. hud/cli/build.py +50 -46
  12. hud/cli/debug.py +7 -7
  13. hud/cli/dev.py +107 -99
  14. hud/cli/eval.py +33 -31
  15. hud/cli/hf.py +53 -53
  16. hud/cli/init.py +28 -28
  17. hud/cli/list_func.py +22 -22
  18. hud/cli/pull.py +36 -36
  19. hud/cli/push.py +76 -74
  20. hud/cli/remove.py +42 -40
  21. hud/cli/rl/__init__.py +2 -2
  22. hud/cli/rl/init.py +41 -41
  23. hud/cli/rl/pod.py +97 -91
  24. hud/cli/rl/ssh.py +42 -40
  25. hud/cli/rl/train.py +75 -73
  26. hud/cli/rl/utils.py +10 -10
  27. hud/cli/tests/test_analyze.py +1 -1
  28. hud/cli/tests/test_analyze_metadata.py +2 -2
  29. hud/cli/tests/test_pull.py +45 -45
  30. hud/cli/tests/test_push.py +31 -29
  31. hud/cli/tests/test_registry.py +15 -15
  32. hud/cli/utils/environment.py +11 -11
  33. hud/cli/utils/interactive.py +18 -18
  34. hud/cli/utils/logging.py +12 -12
  35. hud/cli/utils/metadata.py +12 -12
  36. hud/cli/utils/registry.py +5 -5
  37. hud/cli/utils/runner.py +23 -23
  38. hud/cli/utils/server.py +16 -16
  39. hud/settings.py +6 -0
  40. hud/shared/hints.py +7 -7
  41. hud/tools/executors/tests/test_base_executor.py +1 -1
  42. hud/tools/executors/xdo.py +1 -1
  43. hud/tools/grounding/__init__.py +13 -0
  44. hud/tools/grounding/config.py +54 -0
  45. hud/tools/grounding/grounded_tool.py +314 -0
  46. hud/tools/grounding/grounder.py +302 -0
  47. hud/tools/grounding/tests/__init__.py +1 -0
  48. hud/tools/grounding/tests/test_grounded_tool.py +196 -0
  49. hud/tools/tests/test_playwright_tool.py +1 -1
  50. hud/tools/tests/test_tools_init.py +1 -1
  51. hud/tools/tests/test_utils.py +2 -2
  52. hud/types.py +4 -4
  53. hud/utils/__init__.py +3 -3
  54. hud/utils/agent_factories.py +86 -0
  55. hud/utils/{design.py → hud_console.py} +39 -33
  56. hud/utils/pretty_errors.py +6 -6
  57. hud/utils/tests/test_version.py +1 -1
  58. hud/version.py +1 -1
  59. {hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/METADATA +3 -1
  60. {hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/RECORD +63 -54
  61. {hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/WHEEL +0 -0
  62. {hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/entry_points.txt +0 -0
  63. {hud_python-0.4.21.dist-info → hud_python-0.4.23.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,302 @@
1
+ """OpenAI-based grounder for visual element detection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import io
7
+ import json
8
+ import re
9
+
10
+ from openai import AsyncOpenAI
11
+ from opentelemetry import trace
12
+
13
+ from hud import instrument
14
+ from hud.tools.grounding.config import GrounderConfig # noqa: TC001
15
+
16
+
17
+ class Grounder:
18
+ """Grounder that uses AsyncOpenAI to call vLLM or other model endpoints for visual grounding.
19
+
20
+ This class handles:
21
+ - Image resizing based on configuration
22
+ - API calls to grounding models via AsyncOpenAI
23
+ - Coordinate parsing from model outputs
24
+ - Coordinate format conversion (pixels, normalized)
25
+ """
26
+
27
+ def __init__(self, config: GrounderConfig) -> None:
28
+ """Initialize the grounder with configuration.
29
+
30
+ Args:
31
+ config: GrounderConfig with API endpoint, model, and parsing settings
32
+ """
33
+ self.config = config
34
+ self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.api_base)
35
+
36
+ def _resize_image(self, image_b64: str) -> tuple[str, tuple[int, int], tuple[int, int]]:
37
+ """Resize image according to configuration.
38
+
39
+ Args:
40
+ image_b64: Base64-encoded image string
41
+
42
+ Returns:
43
+ Tuple of (processed_base64, (original_width, original_height),
44
+ (processed_width, processed_height))
45
+ """
46
+ # Decode image
47
+ from PIL import Image
48
+
49
+ image_bytes = base64.b64decode(image_b64)
50
+ img = Image.open(io.BytesIO(image_bytes))
51
+ original_size = (img.width, img.height)
52
+
53
+ if not self.config.resize["enabled"]:
54
+ return image_b64, original_size, original_size
55
+
56
+ # Calculate total pixels
57
+ total_pixels = img.width * img.height
58
+ min_pixels = self.config.resize["min_pixels"]
59
+ max_pixels = self.config.resize["max_pixels"]
60
+ factor = self.config.resize["factor"]
61
+
62
+ # Determine if resizing is needed
63
+ if total_pixels < min_pixels or total_pixels > max_pixels:
64
+ # Calculate scaling factor
65
+ if total_pixels < min_pixels:
66
+ scale = (min_pixels / total_pixels) ** 0.5
67
+ else:
68
+ scale = (max_pixels / total_pixels) ** 0.5
69
+
70
+ # Round dimensions to nearest factor
71
+ new_width = int((img.width * scale) // factor) * factor
72
+ new_height = int((img.height * scale) // factor) * factor
73
+
74
+ # Ensure minimum dimensions
75
+ new_width = max(new_width, factor)
76
+ new_height = max(new_height, factor)
77
+
78
+ # Resize image
79
+ img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
80
+
81
+ # Convert back to base64
82
+ buffer = io.BytesIO()
83
+ img.save(buffer, format="PNG")
84
+ resized_b64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
85
+ return resized_b64, original_size, (new_width, new_height)
86
+
87
+ return image_b64, original_size, original_size
88
+
89
+ def _parse_coordinates(self, response_text: str) -> tuple[float, float] | None:
90
+ """Parse coordinates from model response.
91
+
92
+ Handles multiple formats:
93
+ - (x, y) format from configured regex
94
+ - [x1, y1, x2, y2] bounding box format (returns center point)
95
+ - [x, y] point format
96
+
97
+ Args:
98
+ response_text: Text output from the grounding model
99
+
100
+ Returns:
101
+ Tuple of (x, y) coordinates or None if parsing fails
102
+ """
103
+ # First try the configured regex pattern
104
+ match = re.search(self.config.parser_regex, response_text)
105
+ if match:
106
+ try:
107
+ x = float(match.group(1))
108
+ y = float(match.group(2))
109
+ return (x, y)
110
+ except (ValueError, IndexError):
111
+ # If parsing fails, continue to fallback strategies
112
+ pass
113
+
114
+ # Try to parse as a list/array format [x1, y1, x2, y2] or [x, y]
115
+ # Also handles (x1, y1, x2, y2)
116
+ # Updated pattern to handle both integers and floats
117
+ list_pattern = (
118
+ r"[\[\(](\d+(?:\.\d+)?)[,\s]+(\d+(?:\.\d+)?)"
119
+ r"(?:[,\s]+(\d+(?:\.\d+)?)[,\s]+(\d+(?:\.\d+)?))?[\]\)]"
120
+ )
121
+ list_match = re.search(list_pattern, response_text)
122
+ if list_match:
123
+ x1 = float(list_match.group(1))
124
+ y1 = float(list_match.group(2))
125
+
126
+ # Check if it's a bounding box (4 values) or a point (2 values)
127
+ if list_match.group(3) and list_match.group(4):
128
+ # Bounding box format - return center point
129
+ x2 = float(list_match.group(3))
130
+ y2 = float(list_match.group(4))
131
+ center_x = (x1 + x2) / 2
132
+ center_y = (y1 + y2) / 2
133
+ return (center_x, center_y)
134
+ else:
135
+ # Point format
136
+ return (x1, y1)
137
+
138
+ return None
139
+
140
+ def _convert_coordinates(
141
+ self,
142
+ coords: tuple[float, float],
143
+ processed_size: tuple[int, int],
144
+ original_size: tuple[int, int],
145
+ ) -> tuple[int, int]:
146
+ """Convert coordinates based on output format configuration and scale to original size.
147
+
148
+ Args:
149
+ coords: Raw coordinates from model (can be float for normalized formats)
150
+ processed_size: Dimensions of the processed/resized image (width, height)
151
+ original_size: Original image dimensions (width, height)
152
+
153
+ Returns:
154
+ Converted coordinates in original image pixels
155
+ """
156
+ x, y = coords
157
+ proc_width, proc_height = processed_size
158
+ orig_width, orig_height = original_size
159
+
160
+ # First convert to pixels in the processed image space
161
+ if self.config.output_format == "pixels":
162
+ # Already in pixels of processed image
163
+ proc_x, proc_y = x, y
164
+ elif self.config.output_format == "norm_0_1":
165
+ # Convert from 0-1 normalized to pixels
166
+ proc_x = x * proc_width
167
+ proc_y = y * proc_height
168
+ elif self.config.output_format == "norm_0_999":
169
+ # Convert from 0-999 normalized to pixels
170
+ proc_x = x * proc_width / 999
171
+ proc_y = y * proc_height / 999
172
+ else:
173
+ proc_x, proc_y = x, y
174
+
175
+ # Scale from processed image coordinates to original image coordinates
176
+ scale_x = orig_width / proc_width
177
+ scale_y = orig_height / proc_height
178
+
179
+ final_x = int(proc_x * scale_x)
180
+ final_y = int(proc_y * scale_y)
181
+
182
+ return (final_x, final_y)
183
+
184
+ @instrument(
185
+ name="Grounding.predict_click",
186
+ span_type="agent",
187
+ record_args=True,
188
+ record_result=True,
189
+ )
190
+ async def predict_click(
191
+ self, *, image_b64: str, instruction: str, max_retries: int = 3
192
+ ) -> tuple[int, int] | None:
193
+ """Predict click coordinates for the given instruction on the image.
194
+
195
+ Args:
196
+ image_b64: Base64-encoded screenshot
197
+ instruction: Natural language description of the element to click
198
+ max_retries: Maximum number of retry attempts (default: 3)
199
+
200
+ Returns:
201
+ Tuple of (x, y) pixel coordinates or None if grounding fails
202
+ """
203
+
204
+ # Resize image once outside the retry loop
205
+ processed_image, original_size, processed_size = self._resize_image(image_b64)
206
+
207
+ # Build messages once
208
+ messages = []
209
+
210
+ # Add system prompt if configured
211
+ if self.config.system_prompt:
212
+ messages.append(
213
+ {
214
+ "role": "system",
215
+ "content": (
216
+ self.config.system_prompt
217
+ + f" The image resolution is height {processed_size[1]} "
218
+ + f"and width {processed_size[0]}."
219
+ ),
220
+ }
221
+ )
222
+
223
+ # Add user message with image and instruction
224
+ messages.append(
225
+ {
226
+ "role": "user",
227
+ "content": [
228
+ {
229
+ "type": "image_url",
230
+ "image_url": {"url": f"data:image/png;base64,{processed_image}"},
231
+ },
232
+ {"type": "text", "text": instruction},
233
+ ],
234
+ }
235
+ )
236
+
237
+ # Retry loop
238
+ for attempt in range(max_retries):
239
+ try:
240
+ # Call the grounding model via AsyncOpenAI
241
+ response = await self.client.chat.completions.create(
242
+ model=self.config.model,
243
+ messages=messages,
244
+ temperature=0.0,
245
+ max_tokens=50,
246
+ )
247
+
248
+ # Extract response text
249
+ response_text = response.choices[0].message.content
250
+
251
+ # Manually record the raw response in the span
252
+ span = trace.get_current_span()
253
+ if span and span.is_recording():
254
+ span.set_attribute("grounder.raw_response", json.dumps(response.model_dump()))
255
+ span.set_attribute("grounder.attempt", attempt + 1)
256
+
257
+ # Parse coordinates from response
258
+ if response_text is None:
259
+ if attempt < max_retries - 1:
260
+ continue
261
+ return None
262
+
263
+ coords = self._parse_coordinates(response_text)
264
+ if coords is None:
265
+ if attempt < max_retries - 1:
266
+ continue
267
+ return None
268
+
269
+ # Convert coordinates to original image pixels based on output format and scaling
270
+ pixel_coords = self._convert_coordinates(coords, processed_size, original_size)
271
+
272
+ # Validate coordinates are within image bounds
273
+ x, y = pixel_coords
274
+ if x < 0 or y < 0 or x >= original_size[0] or y >= original_size[1]:
275
+ # Clamp to image bounds
276
+ x = max(0, min(x, original_size[0] - 1))
277
+ y = max(0, min(y, original_size[1] - 1))
278
+ pixel_coords = (x, y)
279
+
280
+ # Record successful grounding in span
281
+ span = trace.get_current_span()
282
+ if span and span.is_recording():
283
+ span.set_attribute("grounder.success", True)
284
+ span.set_attribute(
285
+ "grounder.final_coords", f"{pixel_coords[0]},{pixel_coords[1]}"
286
+ )
287
+ span.set_attribute("grounder.total_attempts", attempt + 1)
288
+
289
+ return pixel_coords
290
+
291
+ except Exception:
292
+ if attempt < max_retries - 1:
293
+ continue
294
+
295
+ # Record failure in span
296
+ span = trace.get_current_span()
297
+ if span and span.is_recording():
298
+ span.set_attribute("grounder.success", False)
299
+ span.set_attribute("grounder.total_attempts", max_retries)
300
+ span.set_attribute("grounder.failure_reason", "All attempts exhausted")
301
+
302
+ return None
@@ -0,0 +1 @@
1
+ """Tests for grounding tools."""
@@ -0,0 +1,196 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+ import mcp.types as types
7
+ import pytest
8
+
9
+ from hud.tools.grounding.grounded_tool import GroundedComputerTool
10
+ from hud.types import MCPToolCall, MCPToolResult
11
+
12
+
13
+ @dataclass
14
+ class FakeResult:
15
+ content: list[types.ContentBlock]
16
+ isError: bool = False
17
+ structuredContent: dict | None = None
18
+
19
+
20
+ class FakeMCPClient:
21
+ """Fake MCP client that implements AgentMCPClient protocol."""
22
+
23
+ _initialized: bool
24
+
25
+ def __init__(self) -> None:
26
+ self.calls: list[tuple[str, dict[str, Any]]] = []
27
+ self._initialized = False
28
+
29
+ @property
30
+ def mcp_config(self) -> dict[str, dict[str, Any]]:
31
+ return {"test": {"command": "echo", "args": ["test"]}}
32
+
33
+ @property
34
+ def is_connected(self) -> bool:
35
+ return self._initialized
36
+
37
+ async def initialize(self, mcp_config: dict[str, dict[str, Any]] | None = None) -> None:
38
+ self._initialized = True
39
+
40
+ async def list_tools(self) -> list[types.Tool]:
41
+ return [types.Tool(name="computer", description="Test tool", inputSchema={})]
42
+
43
+ async def call_tool(self, tool_call: MCPToolCall) -> MCPToolResult:
44
+ self.calls.append((tool_call.name, tool_call.arguments or {}))
45
+ return MCPToolResult(content=[types.TextContent(text="ok", type="text")], isError=False)
46
+
47
+ async def shutdown(self) -> None:
48
+ self._initialized = False
49
+
50
+
51
+ class FakeGrounder:
52
+ """Fake grounder that implements Grounder interface."""
53
+
54
+ def __init__(self, coords: tuple[int, int] | None = (10, 20)) -> None:
55
+ self.coords = coords
56
+ self.calls: list[tuple[str, str]] = []
57
+
58
+ async def predict_click(
59
+ self, *, image_b64: str, instruction: str, max_retries: int = 3
60
+ ) -> tuple[int, int] | None:
61
+ self.calls.append((image_b64[:10], instruction))
62
+ return self.coords
63
+
64
+
65
+ def _png_b64() -> str:
66
+ # 1x1 transparent PNG base64 (valid minimal image)
67
+ return (
68
+ "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGMAAQAABQAB"
69
+ "J2n0mQAAAABJRU5ErkJggg=="
70
+ )
71
+
72
+
73
+ @pytest.mark.asyncio
74
+ async def test_click_action_grounds_and_calls_mcp() -> None:
75
+ client = FakeMCPClient()
76
+ grounder = FakeGrounder(coords=(123, 456))
77
+ tool = GroundedComputerTool(grounder=grounder, mcp_client=client) # type: ignore
78
+
79
+ blocks = await tool(
80
+ action="click",
81
+ element_description="red button",
82
+ screenshot_b64=_png_b64(),
83
+ button="left",
84
+ )
85
+
86
+ assert isinstance(blocks, list)
87
+ # Grounder called once
88
+ assert len(grounder.calls) == 1
89
+ # MCP called with resolved coordinates
90
+ assert client.calls == [("computer", {"action": "click", "x": 123, "y": 456, "button": "left"})]
91
+
92
+
93
+ @pytest.mark.asyncio
94
+ async def test_move_and_scroll_require_element_description_and_screenshot() -> None:
95
+ client = FakeMCPClient()
96
+ grounder = FakeGrounder(coords=(5, 6))
97
+ tool = GroundedComputerTool(grounder=grounder, mcp_client=client) # type: ignore
98
+
99
+ # Missing element_description
100
+ with pytest.raises(Exception) as ei:
101
+ await tool(action="move", screenshot_b64=_png_b64())
102
+ assert "element_description is required" in str(ei.value)
103
+
104
+ # Missing screenshot
105
+ with pytest.raises(Exception) as ei2:
106
+ await tool(action="scroll", element_description="list", scroll_y=100)
107
+ assert "No screenshot available" in str(ei2.value)
108
+
109
+
110
+ @pytest.mark.asyncio
111
+ async def test_drag_grounds_both_points_and_calls_mcp() -> None:
112
+ client = FakeMCPClient()
113
+ grounder = FakeGrounder(coords=(10, 20))
114
+ tool = GroundedComputerTool(grounder=grounder, mcp_client=client) # type: ignore
115
+
116
+ await tool(
117
+ action="drag",
118
+ start_element_description="source",
119
+ end_element_description="target",
120
+ screenshot_b64=_png_b64(),
121
+ button="left",
122
+ )
123
+
124
+ # Two grounding calls (start and end)
125
+ assert len(grounder.calls) == 2
126
+ # Drag path contains two points, same coords from fake grounder
127
+ name, args = client.calls[0]
128
+ assert name == "computer"
129
+ assert args["action"] == "drag"
130
+ assert args["button"] == "left"
131
+ assert args["path"] == [(10, 20), (10, 20)]
132
+
133
+
134
+ @pytest.mark.asyncio
135
+ async def test_drag_requires_both_descriptions_and_screenshot() -> None:
136
+ client = FakeMCPClient()
137
+ grounder = FakeGrounder()
138
+ tool = GroundedComputerTool(grounder=grounder, mcp_client=client) # type: ignore
139
+
140
+ with pytest.raises(Exception) as ei:
141
+ await tool(action="drag", start_element_description="a", screenshot_b64=_png_b64())
142
+ assert "start_element_description and end_element_description" in str(ei.value)
143
+
144
+ with pytest.raises(Exception) as ei2:
145
+ await tool(
146
+ action="drag",
147
+ start_element_description="a",
148
+ end_element_description="b",
149
+ )
150
+ assert "No screenshot available" in str(ei2.value)
151
+
152
+
153
+ @pytest.mark.asyncio
154
+ async def test_direct_actions_bypass_grounding_and_call_mcp() -> None:
155
+ client = FakeMCPClient()
156
+ grounder = FakeGrounder()
157
+ tool = GroundedComputerTool(grounder=grounder, mcp_client=client) # type: ignore
158
+
159
+ # Actions that bypass grounding
160
+ for action, extra in [
161
+ ("screenshot", {}),
162
+ ("type", {"text": "hello"}),
163
+ ("keypress", {"keys": ["ctrl", "a"]}),
164
+ ("wait", {}),
165
+ ("get_current_url", {}),
166
+ ("get_dimensions", {}),
167
+ ("get_environment", {}),
168
+ ]:
169
+ client.calls.clear()
170
+ _ = await tool(action=action, **extra)
171
+ assert client.calls and client.calls[0][0] == "computer"
172
+ assert client.calls[0][1]["action"] == action
173
+ # Grounder not invoked for these
174
+ assert grounder.calls == []
175
+
176
+
177
+ @pytest.mark.asyncio
178
+ async def test_unsupported_action_raises() -> None:
179
+ client = FakeMCPClient()
180
+ grounder = FakeGrounder()
181
+ tool = GroundedComputerTool(grounder=grounder, mcp_client=client) # type: ignore
182
+
183
+ with pytest.raises(Exception) as ei:
184
+ await tool(action="zoom")
185
+ assert "Unsupported action" in str(ei.value)
186
+
187
+
188
+ @pytest.mark.asyncio
189
+ async def test_grounding_failure_propagates_as_error() -> None:
190
+ client = FakeMCPClient()
191
+ grounder = FakeGrounder(coords=None)
192
+ tool = GroundedComputerTool(grounder=grounder, mcp_client=client) # type: ignore
193
+
194
+ with pytest.raises(Exception) as ei:
195
+ await tool(action="click", element_description="x", screenshot_b64=_png_b64())
196
+ assert "Could not locate element" in str(ei.value)
@@ -52,7 +52,7 @@ class TestPlaywrightTool:
52
52
  assert any(isinstance(b, TextContent) for b in blocks)
53
53
  # The actual call includes wait_until parameter with a Field object
54
54
  mock_page.goto.assert_called_once()
55
- args, kwargs = mock_page.goto.call_args
55
+ args, _kwargs = mock_page.goto.call_args
56
56
  assert args[0] == "https://example.com"
57
57
  mock_ensure.assert_called_once()
58
58
 
@@ -33,7 +33,7 @@ class TestToolsInit:
33
33
  """Test lazy import with invalid attribute name."""
34
34
  import hud.tools as tools_module
35
35
 
36
- with pytest.raises(AttributeError, match="module '.*' has no attribute 'InvalidTool'"):
36
+ with pytest.raises(AttributeError, match=r"module '.*' has no attribute 'InvalidTool'"):
37
37
  _ = tools_module.InvalidTool
38
38
 
39
39
  def test_direct_imports_available(self):
@@ -58,7 +58,7 @@ class TestRun:
58
58
  mock_proc.communicate = AsyncMock(return_value=(b"processed", b""))
59
59
 
60
60
  with patch("asyncio.create_subprocess_shell", return_value=mock_proc):
61
- return_code, stdout, stderr = await run("cat", input="test input")
61
+ return_code, stdout, _stderr = await run("cat", input="test input")
62
62
 
63
63
  assert return_code == 0
64
64
  assert stdout == "processed"
@@ -91,7 +91,7 @@ class TestRun:
91
91
  ):
92
92
  mock_wait_for.return_value = (b"done", b"")
93
93
 
94
- return_code, stdout, stderr = await run("sleep 1", timeout=5.0)
94
+ _return_code, _stdout, _stderr = await run("sleep 1", timeout=5.0)
95
95
 
96
96
  # Check that wait_for was called with the correct timeout
97
97
  mock_wait_for.assert_called_once()
hud/types.py CHANGED
@@ -29,9 +29,9 @@ class MCPToolCall(CallToolRequestParams):
29
29
 
30
30
  def __rich__(self) -> str:
31
31
  """Rich representation with color formatting."""
32
- from hud.utils.design import design
32
+ from hud.utils.hud_console import hud_console
33
33
 
34
- return design.format_tool_call(self.name, self.arguments)
34
+ return hud_console.format_tool_call(self.name, self.arguments)
35
35
 
36
36
 
37
37
  class MCPToolResult(CallToolResult):
@@ -74,10 +74,10 @@ class MCPToolResult(CallToolResult):
74
74
 
75
75
  def __rich__(self) -> str:
76
76
  """Rich representation with color formatting."""
77
- from hud.utils.design import design
77
+ from hud.utils.hud_console import hud_console
78
78
 
79
79
  content_summary = self._get_content_summary()
80
- return design.format_tool_result(content_summary, self.isError)
80
+ return hud_console.format_tool_result(content_summary, self.isError)
81
81
 
82
82
 
83
83
  class AgentResponse(BaseModel):
hud/utils/__init__.py CHANGED
@@ -1,10 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from .design import HUDDesign, design
3
+ from .hud_console import HUDConsole, hud_console
4
4
  from .telemetry import stream
5
5
 
6
6
  __all__ = [
7
- "HUDDesign",
8
- "design",
7
+ "HUDConsole",
8
+ "hud_console",
9
9
  "stream",
10
10
  ]
@@ -0,0 +1,86 @@
1
+ """Factory functions for creating agents compatible with run_dataset."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from openai import AsyncOpenAI
8
+
9
+ from hud.agents.grounded_openai import GroundedOpenAIChatAgent
10
+ from hud.agents.openai_chat_generic import GenericOpenAIChatAgent
11
+ from hud.tools.grounding import GrounderConfig
12
+
13
+
14
+ def create_openai_agent(**kwargs: Any) -> GenericOpenAIChatAgent:
15
+ """Factory for GenericOpenAIChatAgent with run_dataset compatibility.
16
+
17
+ Args:
18
+ api_key: OpenAI API key
19
+ base_url: Optional custom API endpoint
20
+ model_name: Model to use (e.g., "gpt-4o-mini")
21
+ **kwargs: Additional arguments passed to GenericOpenAIChatAgent
22
+
23
+ Returns:
24
+ Configured GenericOpenAIChatAgent instance
25
+
26
+ Example:
27
+ >>> from hud.datasets import run_dataset
28
+ >>> from hud.utils.agent_factories import create_openai_agent
29
+ >>> results = await run_dataset(
30
+ ... "My Eval",
31
+ ... "hud-evals/SheetBench-50",
32
+ ... create_openai_agent,
33
+ ... {"api_key": "your-key", "model_name": "gpt-4o-mini"},
34
+ ... )
35
+ """
36
+ api_key = kwargs.pop("api_key", None)
37
+ base_url = kwargs.pop("base_url", None)
38
+
39
+ openai_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
40
+
41
+ return GenericOpenAIChatAgent(openai_client=openai_client, **kwargs)
42
+
43
+
44
+ def create_grounded_agent(**kwargs: Any) -> GroundedOpenAIChatAgent:
45
+ """Factory for GroundedOpenAIChatAgent with run_dataset compatibility.
46
+
47
+ Args:
48
+ api_key: OpenAI API key for planning model
49
+ base_url: Optional custom API endpoint for planning model
50
+ model_name: Planning model to use (e.g., "gpt-4o-mini")
51
+ grounder_api_key: API key for grounding model
52
+ grounder_api_base: API base URL for grounding model (default: OpenRouter)
53
+ grounder_model: Grounding model to use (default: qwen/qwen-2.5-vl-7b-instruct)
54
+ **kwargs: Additional arguments passed to GroundedOpenAIChatAgent
55
+
56
+ Returns:
57
+ Configured GroundedOpenAIChatAgent instance
58
+
59
+ Example:
60
+ >>> from hud.datasets import run_dataset
61
+ >>> from hud.utils.agent_factories import create_grounded_agent
62
+ >>> results = await run_dataset(
63
+ ... "Grounded Eval",
64
+ ... dataset,
65
+ ... create_grounded_agent,
66
+ ... {
67
+ ... "api_key": "openai-key",
68
+ ... "grounder_api_key": "openrouter-key",
69
+ ... "model_name": "gpt-4o-mini",
70
+ ... },
71
+ ... )
72
+ """
73
+ api_key = kwargs.pop("api_key", None)
74
+ base_url = kwargs.pop("base_url", None)
75
+ grounder_api_key = kwargs.pop("grounder_api_key", None)
76
+ grounder_api_base = kwargs.pop("grounder_api_base", "https://openrouter.ai/api/v1")
77
+ grounder_model = kwargs.pop("grounder_model", "qwen/qwen-2.5-vl-7b-instruct")
78
+
79
+ openai_client = AsyncOpenAI(api_key=api_key, base_url=base_url)
80
+ grounder_config = GrounderConfig(
81
+ api_base=grounder_api_base, model=grounder_model, api_key=grounder_api_key
82
+ )
83
+
84
+ return GroundedOpenAIChatAgent(
85
+ openai_client=openai_client, grounder_config=grounder_config, **kwargs
86
+ )