hud-python 0.4.11__py3-none-any.whl → 0.4.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__main__.py +8 -0
- hud/agents/base.py +7 -8
- hud/agents/langchain.py +2 -2
- hud/agents/tests/test_openai.py +3 -1
- hud/cli/__init__.py +114 -52
- hud/cli/build.py +121 -71
- hud/cli/debug.py +2 -2
- hud/cli/{mcp_server.py → dev.py} +101 -38
- hud/cli/eval.py +175 -90
- hud/cli/init.py +442 -64
- hud/cli/list_func.py +72 -71
- hud/cli/pull.py +1 -2
- hud/cli/push.py +35 -23
- hud/cli/remove.py +35 -41
- hud/cli/tests/test_analyze.py +2 -1
- hud/cli/tests/test_analyze_metadata.py +42 -49
- hud/cli/tests/test_build.py +28 -52
- hud/cli/tests/test_cursor.py +1 -1
- hud/cli/tests/test_debug.py +1 -1
- hud/cli/tests/test_list_func.py +75 -64
- hud/cli/tests/test_main_module.py +30 -0
- hud/cli/tests/test_mcp_server.py +3 -3
- hud/cli/tests/test_pull.py +30 -61
- hud/cli/tests/test_push.py +70 -89
- hud/cli/tests/test_registry.py +36 -38
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/utils/__init__.py +1 -0
- hud/cli/{docker_utils.py → utils/docker.py} +36 -0
- hud/cli/{env_utils.py → utils/environment.py} +7 -7
- hud/cli/{interactive.py → utils/interactive.py} +91 -19
- hud/cli/{analyze_metadata.py → utils/metadata.py} +12 -8
- hud/cli/{registry.py → utils/registry.py} +28 -30
- hud/cli/{remote_runner.py → utils/remote_runner.py} +1 -1
- hud/cli/utils/runner.py +134 -0
- hud/cli/utils/server.py +250 -0
- hud/clients/base.py +1 -1
- hud/clients/fastmcp.py +5 -13
- hud/clients/mcp_use.py +6 -10
- hud/server/server.py +35 -5
- hud/shared/exceptions.py +11 -0
- hud/shared/tests/test_exceptions.py +22 -0
- hud/telemetry/tests/__init__.py +0 -0
- hud/telemetry/tests/test_replay.py +40 -0
- hud/telemetry/tests/test_trace.py +63 -0
- hud/tools/base.py +20 -3
- hud/tools/computer/hud.py +15 -6
- hud/tools/executors/tests/test_base_executor.py +27 -0
- hud/tools/response.py +12 -8
- hud/tools/tests/test_response.py +60 -0
- hud/tools/tests/test_tools_init.py +49 -0
- hud/utils/design.py +19 -8
- hud/utils/mcp.py +17 -5
- hud/utils/tests/test_mcp.py +112 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.11.dist-info → hud_python-0.4.13.dist-info}/METADATA +16 -13
- {hud_python-0.4.11.dist-info → hud_python-0.4.13.dist-info}/RECORD +62 -52
- hud/cli/runner.py +0 -160
- /hud/cli/{cursor.py → utils/cursor.py} +0 -0
- /hud/cli/{utils.py → utils/logging.py} +0 -0
- {hud_python-0.4.11.dist-info → hud_python-0.4.13.dist-info}/WHEEL +0 -0
- {hud_python-0.4.11.dist-info → hud_python-0.4.13.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.11.dist-info → hud_python-0.4.13.dist-info}/licenses/LICENSE +0 -0
hud/tools/computer/hud.py
CHANGED
|
@@ -58,20 +58,29 @@ class HudComputerTool(BaseTool):
|
|
|
58
58
|
title: Human-readable display name for the tool (auto-generated from class name)
|
|
59
59
|
description: Tool description (auto-generated from docstring if not provided)
|
|
60
60
|
"""
|
|
61
|
+
# This is the width and height the agent thinks it operates in
|
|
62
|
+
# By default, use subclass's width and height
|
|
63
|
+
# If specifically set to None, use environment width and height
|
|
64
|
+
self.width = width or computer_settings.DISPLAY_WIDTH
|
|
65
|
+
self.height = height or computer_settings.DISPLAY_HEIGHT
|
|
66
|
+
|
|
67
|
+
# Build metadata with resolution info
|
|
68
|
+
meta = {
|
|
69
|
+
"resolution": {
|
|
70
|
+
"width": self.width,
|
|
71
|
+
"height": self.height,
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
61
75
|
# Initialize base tool with executor as env
|
|
62
76
|
super().__init__(
|
|
63
77
|
env=executor,
|
|
64
78
|
name=name or "computer",
|
|
65
79
|
title=title or "Computer Control",
|
|
66
80
|
description=description or "Control computer with mouse, keyboard, and screenshots",
|
|
81
|
+
meta=meta,
|
|
67
82
|
)
|
|
68
83
|
|
|
69
|
-
# This is the width and height the agent thinks it operates in
|
|
70
|
-
# By default, use subclass's width and height
|
|
71
|
-
# If specifically set to None, use environment width and height
|
|
72
|
-
self.width = width or computer_settings.DISPLAY_WIDTH
|
|
73
|
-
self.height = height or computer_settings.DISPLAY_HEIGHT
|
|
74
|
-
|
|
75
84
|
# This is the static width and height of the environment screen
|
|
76
85
|
# And the width and height of the screenshots taken by the tool
|
|
77
86
|
self.environment_width = computer_settings.DISPLAY_WIDTH
|
|
@@ -336,3 +336,30 @@ class TestBaseExecutor:
|
|
|
336
336
|
|
|
337
337
|
assert result1.base64_image == screenshot1
|
|
338
338
|
assert result2.base64_image == screenshot1
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
class TestLazyImports:
|
|
342
|
+
"""Tests for lazy import functionality in executors module."""
|
|
343
|
+
|
|
344
|
+
def test_lazy_import_pyautogui_executor(self):
|
|
345
|
+
"""Test lazy import of PyAutoGUIExecutor."""
|
|
346
|
+
# This should trigger the __getattr__ function and import PyAutoGUIExecutor
|
|
347
|
+
from hud.tools.executors import PyAutoGUIExecutor
|
|
348
|
+
|
|
349
|
+
# Verify it's imported correctly
|
|
350
|
+
assert PyAutoGUIExecutor.__name__ == "PyAutoGUIExecutor"
|
|
351
|
+
|
|
352
|
+
def test_lazy_import_xdo_executor(self):
|
|
353
|
+
"""Test lazy import of XDOExecutor."""
|
|
354
|
+
# This should trigger the __getattr__ function and import XDOExecutor
|
|
355
|
+
from hud.tools.executors import XDOExecutor
|
|
356
|
+
|
|
357
|
+
# Verify it's imported correctly
|
|
358
|
+
assert XDOExecutor.__name__ == "XDOExecutor"
|
|
359
|
+
|
|
360
|
+
def test_lazy_import_invalid_attribute(self):
|
|
361
|
+
"""Test lazy import with invalid attribute name."""
|
|
362
|
+
import hud.tools.executors as executors_module
|
|
363
|
+
|
|
364
|
+
with pytest.raises(AttributeError, match="module '.*' has no attribute 'InvalidExecutor'"):
|
|
365
|
+
_ = executors_module.InvalidExecutor
|
hud/tools/response.py
CHANGED
|
@@ -12,7 +12,7 @@ if TYPE_CHECKING:
|
|
|
12
12
|
class ResponseTool(BaseTool):
|
|
13
13
|
"""
|
|
14
14
|
Protocol for handling responses within environments.
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
This abstract tool defines the interface for response handling in environments.
|
|
17
17
|
Subclasses should implement the __call__ method to handle responses according
|
|
18
18
|
to their specific needs.
|
|
@@ -36,18 +36,22 @@ class ResponseTool(BaseTool):
|
|
|
36
36
|
return blocks
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
name: str = "response"
|
|
40
|
+
title: str = "Response Tool"
|
|
41
|
+
description: str = "Send a text response or list of messages to the environment"
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self, name: str | None = None, title: str | None = None, description: str | None = None
|
|
45
|
+
) -> None:
|
|
40
46
|
super().__init__(
|
|
41
|
-
name=name or
|
|
42
|
-
title=title or
|
|
43
|
-
description=description or
|
|
47
|
+
name=name or self.name,
|
|
48
|
+
title=title or self.title,
|
|
49
|
+
description=description or self.description,
|
|
44
50
|
)
|
|
45
51
|
|
|
46
52
|
@abstractmethod
|
|
47
53
|
async def __call__(
|
|
48
|
-
self,
|
|
49
|
-
response: str | None = None,
|
|
50
|
-
messages: list[ContentBlock] | None = None
|
|
54
|
+
self, response: str | None = None, messages: list[ContentBlock] | None = None
|
|
51
55
|
) -> list[ContentBlock]:
|
|
52
56
|
"""Handle response or messages and return as ContentBlocks.
|
|
53
57
|
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Tests for ResponseTool class."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from hud.tools.response import ResponseTool
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ConcreteResponseTool(ResponseTool):
|
|
11
|
+
"""Concrete implementation for testing."""
|
|
12
|
+
|
|
13
|
+
async def __call__(self, response: str | None = None, messages=None):
|
|
14
|
+
"""Concrete implementation."""
|
|
15
|
+
from mcp.types import TextContent
|
|
16
|
+
|
|
17
|
+
return [TextContent(text=response or "test", type="text")]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TestResponseTool:
|
|
21
|
+
"""Tests for ResponseTool abstract class."""
|
|
22
|
+
|
|
23
|
+
def test_init_with_defaults(self):
|
|
24
|
+
"""Test initialization with default values."""
|
|
25
|
+
tool = ConcreteResponseTool()
|
|
26
|
+
assert tool.name == "response"
|
|
27
|
+
assert tool.title == "Response Tool"
|
|
28
|
+
assert tool.description == "Send a text response or list of messages to the environment"
|
|
29
|
+
|
|
30
|
+
def test_init_with_custom_values(self):
|
|
31
|
+
"""Test initialization with custom values."""
|
|
32
|
+
tool = ConcreteResponseTool(
|
|
33
|
+
name="custom_response", title="Custom Response Tool", description="Custom description"
|
|
34
|
+
)
|
|
35
|
+
assert tool.name == "custom_response"
|
|
36
|
+
assert tool.title == "Custom Response Tool"
|
|
37
|
+
assert tool.description == "Custom description"
|
|
38
|
+
|
|
39
|
+
def test_abstract_method_not_implemented(self):
|
|
40
|
+
"""Test that abstract method raises NotImplementedError when not implemented."""
|
|
41
|
+
|
|
42
|
+
# Create a concrete tool to test the abstract method's NotImplementedError
|
|
43
|
+
tool = ConcreteResponseTool()
|
|
44
|
+
|
|
45
|
+
# This should trigger the NotImplementedError in the abstract method
|
|
46
|
+
with pytest.raises(NotImplementedError, match="Subclasses must implement __call__"):
|
|
47
|
+
# Call the parent abstract method directly to hit the raise line
|
|
48
|
+
import asyncio
|
|
49
|
+
|
|
50
|
+
asyncio.run(ResponseTool.__call__(tool, "test")) # type: ignore[attr-defined]
|
|
51
|
+
|
|
52
|
+
@pytest.mark.asyncio
|
|
53
|
+
async def test_concrete_implementation(self):
|
|
54
|
+
"""Test that concrete implementation works correctly."""
|
|
55
|
+
tool = ConcreteResponseTool()
|
|
56
|
+
result = await tool("Hello, World!")
|
|
57
|
+
|
|
58
|
+
assert len(result) == 1
|
|
59
|
+
assert result[0].text == "Hello, World!"
|
|
60
|
+
assert result[0].type == "text"
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Tests for hud.tools.__init__ module."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestToolsInit:
|
|
9
|
+
"""Tests for the tools package initialization."""
|
|
10
|
+
|
|
11
|
+
def test_lazy_import_anthropic_computer_tool(self):
|
|
12
|
+
"""Test lazy import of AnthropicComputerTool."""
|
|
13
|
+
from hud.tools import AnthropicComputerTool
|
|
14
|
+
|
|
15
|
+
# Verify it's imported correctly
|
|
16
|
+
assert AnthropicComputerTool.__name__ == "AnthropicComputerTool"
|
|
17
|
+
|
|
18
|
+
def test_lazy_import_hud_computer_tool(self):
|
|
19
|
+
"""Test lazy import of HudComputerTool."""
|
|
20
|
+
from hud.tools import HudComputerTool
|
|
21
|
+
|
|
22
|
+
# Verify it's imported correctly
|
|
23
|
+
assert HudComputerTool.__name__ == "HudComputerTool"
|
|
24
|
+
|
|
25
|
+
def test_lazy_import_openai_computer_tool(self):
|
|
26
|
+
"""Test lazy import of OpenAIComputerTool."""
|
|
27
|
+
from hud.tools import OpenAIComputerTool
|
|
28
|
+
|
|
29
|
+
# Verify it's imported correctly
|
|
30
|
+
assert OpenAIComputerTool.__name__ == "OpenAIComputerTool"
|
|
31
|
+
|
|
32
|
+
def test_lazy_import_invalid_attribute(self):
|
|
33
|
+
"""Test lazy import with invalid attribute name."""
|
|
34
|
+
import hud.tools as tools_module
|
|
35
|
+
|
|
36
|
+
with pytest.raises(AttributeError, match="module '.*' has no attribute 'InvalidTool'"):
|
|
37
|
+
_ = tools_module.InvalidTool
|
|
38
|
+
|
|
39
|
+
def test_direct_imports_available(self):
|
|
40
|
+
"""Test that directly imported tools are available."""
|
|
41
|
+
from hud.tools import BaseHub, BaseTool, BashTool, EditTool, PlaywrightTool, ResponseTool
|
|
42
|
+
|
|
43
|
+
# All should be available
|
|
44
|
+
assert BaseHub is not None
|
|
45
|
+
assert BaseTool is not None
|
|
46
|
+
assert BashTool is not None
|
|
47
|
+
assert EditTool is not None
|
|
48
|
+
assert PlaywrightTool is not None
|
|
49
|
+
assert ResponseTool is not None
|
hud/utils/design.py
CHANGED
|
@@ -93,10 +93,10 @@ class HUDDesign:
|
|
|
93
93
|
"""
|
|
94
94
|
console = self._stderr_console if stderr else self._stdout_console
|
|
95
95
|
console.print(f"[default not bold]{message}[/default not bold]")
|
|
96
|
-
|
|
96
|
+
|
|
97
97
|
def print(self, message: str, stderr: bool = True) -> None:
|
|
98
98
|
"""Print a message.
|
|
99
|
-
|
|
99
|
+
|
|
100
100
|
Args:
|
|
101
101
|
message: The message to print
|
|
102
102
|
stderr: If True, output to stderr (default), otherwise stdout
|
|
@@ -136,7 +136,9 @@ class HUDDesign:
|
|
|
136
136
|
console = self._stderr_console if stderr else self._stdout_console
|
|
137
137
|
console.print(f"[default not bold]{json_str}[/default not bold]")
|
|
138
138
|
|
|
139
|
-
def key_value_table(
|
|
139
|
+
def key_value_table(
|
|
140
|
+
self, data: dict[str, str], show_header: bool = False, stderr: bool = True
|
|
141
|
+
) -> None:
|
|
140
142
|
"""Print a key-value table.
|
|
141
143
|
|
|
142
144
|
Args:
|
|
@@ -197,7 +199,14 @@ class HUDDesign:
|
|
|
197
199
|
console = self._stderr_console if stderr else self._stdout_console
|
|
198
200
|
console.print(f"\n[yellow]💡 Hint: {hint}[/yellow]")
|
|
199
201
|
|
|
200
|
-
def status_item(
|
|
202
|
+
def status_item(
|
|
203
|
+
self,
|
|
204
|
+
label: str,
|
|
205
|
+
value: str,
|
|
206
|
+
status: str = "success",
|
|
207
|
+
primary: bool = False,
|
|
208
|
+
stderr: bool = True,
|
|
209
|
+
) -> None:
|
|
201
210
|
"""Print a status item with indicator.
|
|
202
211
|
|
|
203
212
|
Args:
|
|
@@ -211,18 +220,20 @@ class HUDDesign:
|
|
|
211
220
|
"success": f"[{GREEN} not bold]✓[/{GREEN} not bold]",
|
|
212
221
|
"error": f"[{RED} not bold]✗[/{RED} not bold]",
|
|
213
222
|
"warning": "[yellow]⚠[/yellow]",
|
|
214
|
-
"info": f"[{DIM}]•[/{DIM}]"
|
|
223
|
+
"info": f"[{DIM}]•[/{DIM}]",
|
|
215
224
|
}
|
|
216
|
-
|
|
225
|
+
|
|
217
226
|
indicator = indicators.get(status, indicators["info"])
|
|
218
227
|
console = self._stderr_console if stderr else self._stdout_console
|
|
219
|
-
|
|
228
|
+
|
|
220
229
|
if primary:
|
|
221
230
|
console.print(f"{indicator} {label}: [bold cyan]{value}[/bold cyan]")
|
|
222
231
|
else:
|
|
223
232
|
console.print(f"{indicator} {label}: {value}")
|
|
224
233
|
|
|
225
|
-
def command_example(
|
|
234
|
+
def command_example(
|
|
235
|
+
self, command: str, description: str | None = None, stderr: bool = True
|
|
236
|
+
) -> None:
|
|
226
237
|
"""Print a command example with cyan highlighting.
|
|
227
238
|
|
|
228
239
|
Args:
|
hud/utils/mcp.py
CHANGED
|
@@ -23,7 +23,7 @@ def patch_mcp_config(mcp_config: dict[str, dict[str, Any]], patch: MCPConfigPatc
|
|
|
23
23
|
|
|
24
24
|
for server_cfg in mcp_config.values():
|
|
25
25
|
url = server_cfg.get("url", "")
|
|
26
|
-
|
|
26
|
+
|
|
27
27
|
# 1) HTTP header lane (only for hud MCP servers)
|
|
28
28
|
if hud_mcp_url in url and patch.headers:
|
|
29
29
|
for key, value in patch.headers.items():
|
|
@@ -37,9 +37,11 @@ def patch_mcp_config(mcp_config: dict[str, dict[str, Any]], patch: MCPConfigPatc
|
|
|
37
37
|
meta.setdefault(key, value)
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
def setup_hud_telemetry(
|
|
40
|
+
def setup_hud_telemetry(
|
|
41
|
+
mcp_config: dict[str, dict[str, Any]], auto_trace: bool = True
|
|
42
|
+
) -> Any | None:
|
|
41
43
|
"""Setup telemetry for hud servers.
|
|
42
|
-
|
|
44
|
+
|
|
43
45
|
Returns:
|
|
44
46
|
The auto-created trace context manager if one was created, None otherwise.
|
|
45
47
|
Caller is responsible for exiting the context manager.
|
|
@@ -47,12 +49,22 @@ def setup_hud_telemetry(mcp_config: dict[str, dict[str, Any]], auto_trace: bool
|
|
|
47
49
|
if not mcp_config:
|
|
48
50
|
raise ValueError("Please run initialize() before setting up client-side telemetry")
|
|
49
51
|
|
|
52
|
+
# Check if there are any HUD servers to setup telemetry for
|
|
53
|
+
hud_mcp_url = settings.hud_mcp_url
|
|
54
|
+
has_hud_servers = any(
|
|
55
|
+
hud_mcp_url in server_cfg.get("url", "") for server_cfg in mcp_config.values()
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# If no HUD servers, no need for telemetry setup
|
|
59
|
+
if not has_hud_servers:
|
|
60
|
+
return None
|
|
61
|
+
|
|
50
62
|
from hud.otel import get_current_task_run_id
|
|
51
63
|
from hud.telemetry import trace
|
|
52
64
|
|
|
53
65
|
run_id = get_current_task_run_id()
|
|
54
66
|
auto_trace_cm = None
|
|
55
|
-
|
|
67
|
+
|
|
56
68
|
if not run_id and auto_trace:
|
|
57
69
|
auto_trace_cm = trace("My Trace")
|
|
58
70
|
run_id = auto_trace_cm.__enter__()
|
|
@@ -63,5 +75,5 @@ def setup_hud_telemetry(mcp_config: dict[str, dict[str, Any]], auto_trace: bool
|
|
|
63
75
|
mcp_config,
|
|
64
76
|
MCPConfigPatch(headers={"Run-Id": run_id}, meta={"run_id": run_id}),
|
|
65
77
|
)
|
|
66
|
-
|
|
78
|
+
|
|
67
79
|
return auto_trace_cm
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Tests for MCP utility functions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestPatchMCPConfig:
|
|
11
|
+
"""Tests for patch_mcp_config function."""
|
|
12
|
+
|
|
13
|
+
def test_patch_headers_for_hud_servers(self):
|
|
14
|
+
"""Test patching headers for HUD MCP servers."""
|
|
15
|
+
from hud.settings import get_settings
|
|
16
|
+
|
|
17
|
+
settings = get_settings()
|
|
18
|
+
|
|
19
|
+
# Create an MCP config with a HUD server URL
|
|
20
|
+
mcp_config = {"test_server": {"url": f"{settings.hud_mcp_url}/test"}}
|
|
21
|
+
|
|
22
|
+
# Create patch with headers
|
|
23
|
+
patch = MCPConfigPatch(headers={"X-Test-Header": "test-value"}, meta=None)
|
|
24
|
+
|
|
25
|
+
# Apply patch
|
|
26
|
+
patch_mcp_config(mcp_config, patch)
|
|
27
|
+
|
|
28
|
+
# Verify headers were added
|
|
29
|
+
assert "headers" in mcp_config["test_server"]
|
|
30
|
+
assert mcp_config["test_server"]["headers"]["X-Test-Header"] == "test-value" # type: ignore[index]
|
|
31
|
+
|
|
32
|
+
def test_patch_headers_preserves_existing(self):
|
|
33
|
+
"""Test that existing headers are preserved."""
|
|
34
|
+
from hud.settings import get_settings
|
|
35
|
+
|
|
36
|
+
settings = get_settings()
|
|
37
|
+
|
|
38
|
+
# Create config with existing headers
|
|
39
|
+
mcp_config = {
|
|
40
|
+
"test_server": {
|
|
41
|
+
"url": f"{settings.hud_mcp_url}/test",
|
|
42
|
+
"headers": {"Existing-Header": "existing-value"},
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
patch = MCPConfigPatch(
|
|
47
|
+
headers={"X-Test-Header": "test-value", "Existing-Header": "new-value"},
|
|
48
|
+
meta=None,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
patch_mcp_config(mcp_config, patch)
|
|
52
|
+
|
|
53
|
+
# Existing header should be preserved, new one added
|
|
54
|
+
assert mcp_config["test_server"]["headers"]["Existing-Header"] == "existing-value"
|
|
55
|
+
assert mcp_config["test_server"]["headers"]["X-Test-Header"] == "test-value"
|
|
56
|
+
|
|
57
|
+
def test_patch_meta_for_all_servers(self):
|
|
58
|
+
"""Test patching metadata for all servers."""
|
|
59
|
+
mcp_config = {
|
|
60
|
+
"server1": {"url": "http://example.com"},
|
|
61
|
+
"server2": {"url": "http://other.com"},
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
patch = MCPConfigPatch(headers=None, meta={"test_key": "test_value"})
|
|
65
|
+
|
|
66
|
+
patch_mcp_config(mcp_config, patch)
|
|
67
|
+
|
|
68
|
+
# Meta should be added to both servers
|
|
69
|
+
assert mcp_config["server1"]["meta"]["test_key"] == "test_value" # type: ignore[index]
|
|
70
|
+
assert mcp_config["server2"]["meta"]["test_key"] == "test_value" # type: ignore[index]
|
|
71
|
+
|
|
72
|
+
def test_patch_meta_preserves_existing(self):
|
|
73
|
+
"""Test that existing meta is preserved."""
|
|
74
|
+
mcp_config = {
|
|
75
|
+
"test_server": {"url": "http://example.com", "meta": {"existing_key": "existing_value"}}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
patch = MCPConfigPatch(
|
|
79
|
+
headers=None,
|
|
80
|
+
meta={"test_key": "test_value", "existing_key": "new_value"},
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
patch_mcp_config(mcp_config, patch)
|
|
84
|
+
|
|
85
|
+
# Existing meta should be preserved, new one added
|
|
86
|
+
assert mcp_config["test_server"]["meta"]["existing_key"] == "existing_value"
|
|
87
|
+
assert mcp_config["test_server"]["meta"]["test_key"] == "test_value"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class TestSetupHUDTelemetry:
|
|
91
|
+
"""Tests for setup_hud_telemetry function."""
|
|
92
|
+
|
|
93
|
+
def test_empty_config_raises_error(self):
|
|
94
|
+
"""Test that empty config raises ValueError."""
|
|
95
|
+
with pytest.raises(
|
|
96
|
+
ValueError, match="Please run initialize\\(\\) before setting up client-side telemetry"
|
|
97
|
+
):
|
|
98
|
+
setup_hud_telemetry({})
|
|
99
|
+
|
|
100
|
+
def test_none_config_raises_error(self):
|
|
101
|
+
"""Test that None config raises ValueError."""
|
|
102
|
+
with pytest.raises(
|
|
103
|
+
ValueError, match="Please run initialize\\(\\) before setting up client-side telemetry"
|
|
104
|
+
):
|
|
105
|
+
setup_hud_telemetry(None) # type: ignore[arg-type]
|
|
106
|
+
|
|
107
|
+
def test_valid_config_returns_none_when_no_hud_servers(self):
|
|
108
|
+
"""Test that valid config with no HUD servers returns None."""
|
|
109
|
+
mcp_config = {"test_server": {"url": "http://example.com"}}
|
|
110
|
+
|
|
111
|
+
result = setup_hud_telemetry(mcp_config)
|
|
112
|
+
assert result is None
|
hud/utils/tests/test_version.py
CHANGED
hud/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.13
|
|
4
4
|
Summary: SDK for the HUD platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hud-evals/hud-python
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
|
|
@@ -35,10 +35,9 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
35
35
|
Classifier: Programming Language :: Python :: 3.12
|
|
36
36
|
Classifier: Programming Language :: Python :: 3.13
|
|
37
37
|
Requires-Python: <3.14,>=3.11
|
|
38
|
-
Requires-Dist: fastmcp>=2.11.2
|
|
39
38
|
Requires-Dist: httpx<1,>=0.23.0
|
|
40
|
-
Requires-Dist: hud-
|
|
41
|
-
Requires-Dist: mcp>=
|
|
39
|
+
Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
|
|
40
|
+
Requires-Dist: hud-mcp-python-sdk>=3.13.2
|
|
42
41
|
Requires-Dist: opentelemetry-api>=1.34.1
|
|
43
42
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
|
|
44
43
|
Requires-Dist: opentelemetry-instrumentation-mcp>=0.44.1
|
|
@@ -56,7 +55,11 @@ Provides-Extra: agent
|
|
|
56
55
|
Requires-Dist: anthropic; extra == 'agent'
|
|
57
56
|
Requires-Dist: datasets>=2.14.0; extra == 'agent'
|
|
58
57
|
Requires-Dist: dotenv>=0.9.9; extra == 'agent'
|
|
59
|
-
Requires-Dist: hud-mcp-use-python-sdk>=
|
|
58
|
+
Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agent'
|
|
59
|
+
Requires-Dist: ipykernel; extra == 'agent'
|
|
60
|
+
Requires-Dist: ipython<9; extra == 'agent'
|
|
61
|
+
Requires-Dist: jupyter-client; extra == 'agent'
|
|
62
|
+
Requires-Dist: jupyter-core; extra == 'agent'
|
|
60
63
|
Requires-Dist: langchain; extra == 'agent'
|
|
61
64
|
Requires-Dist: langchain-anthropic; extra == 'agent'
|
|
62
65
|
Requires-Dist: langchain-openai; extra == 'agent'
|
|
@@ -66,7 +69,11 @@ Provides-Extra: agents
|
|
|
66
69
|
Requires-Dist: anthropic; extra == 'agents'
|
|
67
70
|
Requires-Dist: datasets>=2.14.0; extra == 'agents'
|
|
68
71
|
Requires-Dist: dotenv>=0.9.9; extra == 'agents'
|
|
69
|
-
Requires-Dist: hud-mcp-use-python-sdk>=
|
|
72
|
+
Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agents'
|
|
73
|
+
Requires-Dist: ipykernel; extra == 'agents'
|
|
74
|
+
Requires-Dist: ipython<9; extra == 'agents'
|
|
75
|
+
Requires-Dist: jupyter-client; extra == 'agents'
|
|
76
|
+
Requires-Dist: jupyter-core; extra == 'agents'
|
|
70
77
|
Requires-Dist: langchain; extra == 'agents'
|
|
71
78
|
Requires-Dist: langchain-anthropic; extra == 'agents'
|
|
72
79
|
Requires-Dist: langchain-openai; extra == 'agents'
|
|
@@ -77,7 +84,7 @@ Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
|
|
|
77
84
|
Requires-Dist: anthropic; extra == 'dev'
|
|
78
85
|
Requires-Dist: datasets>=2.14.0; extra == 'dev'
|
|
79
86
|
Requires-Dist: dotenv>=0.9.9; extra == 'dev'
|
|
80
|
-
Requires-Dist: hud-mcp-use-python-sdk>=
|
|
87
|
+
Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'dev'
|
|
81
88
|
Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
|
|
82
89
|
Requires-Dist: ipykernel; extra == 'dev'
|
|
83
90
|
Requires-Dist: ipython<9; extra == 'dev'
|
|
@@ -233,7 +240,7 @@ Any hud MCP environment and evaluation works with our RL pipeline. Even our remo
|
|
|
233
240
|
|
|
234
241
|
This is Claude Computer Use running on our proprietary financial analyst benchmark [SheetBench-50](https://huggingface.co/datasets/hud-evals/SheetBench-50):
|
|
235
242
|
|
|
236
|
-

|
|
237
244
|
|
|
238
245
|
> [See this trace on _app.hud.so_](https://app.hud.so/trace/9e212e9e-3627-4f1f-9eb5-c6d03c59070a)
|
|
239
246
|
|
|
@@ -385,7 +392,7 @@ result = await ClaudeAgent().run({ # See all agents: https://docs.hud.so/refere
|
|
|
385
392
|
|
|
386
393
|
All leaderboards are publicly available on [app.hud.so/leaderboards](https://app.hud.so/leaderboards) (see [docs](https://docs.hud.so/evaluate-agents/leaderboards))
|
|
387
394
|
|
|
388
|
-

|
|
389
396
|
|
|
390
397
|
We highly suggest running 3-5 evaluations per dataset for the most consistent results across multiple jobs.
|
|
391
398
|
|
|
@@ -430,10 +437,6 @@ graph LR
|
|
|
430
437
|
Trace --> Dashboard
|
|
431
438
|
AnyMCP -->|"MCP"| API
|
|
432
439
|
|
|
433
|
-
style Dashboard fill:#e0e7ff,stroke:#6366f1,stroke-width:2px
|
|
434
|
-
style SDK fill:#fef3c7,stroke:#f59e0b,stroke-width:2px
|
|
435
|
-
style RemoteEnv fill:#d1fae5,stroke:#10b981,stroke-width:2px
|
|
436
|
-
style AnyMCP fill:#fce7f3,stroke:#ec4899,stroke-width:2px,stroke-dasharray: 5 5
|
|
437
440
|
```
|
|
438
441
|
|
|
439
442
|
## CLI reference
|