hud-python 0.4.19__py3-none-any.whl → 0.4.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +7 -0
- hud/agents/base.py +40 -10
- hud/agents/claude.py +44 -25
- hud/agents/tests/test_client.py +6 -27
- hud/cli/__init__.py +50 -20
- hud/cli/build.py +3 -44
- hud/cli/eval.py +25 -6
- hud/cli/init.py +4 -4
- hud/cli/push.py +3 -1
- hud/cli/tests/test_push.py +6 -6
- hud/clients/__init__.py +3 -2
- hud/clients/base.py +25 -26
- hud/clients/mcp_use.py +44 -22
- hud/datasets/task.py +11 -2
- hud/native/__init__.py +6 -0
- hud/native/comparator.py +546 -0
- hud/native/tests/__init__.py +1 -0
- hud/native/tests/test_comparator.py +539 -0
- hud/native/tests/test_native_init.py +79 -0
- hud/otel/instrumentation.py +0 -2
- hud/server/server.py +9 -2
- hud/shared/exceptions.py +204 -31
- hud/shared/hints.py +177 -0
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +385 -144
- hud/tools/__init__.py +2 -0
- hud/tools/playwright.py +1 -1
- hud/tools/submit.py +66 -0
- hud/types.py +33 -5
- hud/utils/design.py +57 -0
- hud/utils/mcp.py +6 -0
- hud/utils/pretty_errors.py +68 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.4.19.dist-info → hud_python-0.4.21.dist-info}/METADATA +2 -4
- {hud_python-0.4.19.dist-info → hud_python-0.4.21.dist-info}/RECORD +39 -31
- {hud_python-0.4.19.dist-info → hud_python-0.4.21.dist-info}/WHEEL +0 -0
- {hud_python-0.4.19.dist-info → hud_python-0.4.21.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.19.dist-info → hud_python-0.4.21.dist-info}/licenses/LICENSE +0 -0
hud/clients/base.py
CHANGED
|
@@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any, Protocol, overload, runtime_checkable
|
|
|
9
9
|
|
|
10
10
|
from mcp.types import Implementation
|
|
11
11
|
|
|
12
|
+
from hud.shared.exceptions import HudAuthenticationError, HudException
|
|
12
13
|
from hud.types import MCPToolCall, MCPToolResult
|
|
13
14
|
from hud.utils.mcp import setup_hud_telemetry
|
|
14
15
|
from hud.version import __version__ as hud_version
|
|
@@ -120,8 +121,10 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
120
121
|
|
|
121
122
|
self._mcp_config = mcp_config or self._mcp_config
|
|
122
123
|
if self._mcp_config is None:
|
|
123
|
-
|
|
124
|
-
|
|
124
|
+
from hud.shared.exceptions import HudConfigError
|
|
125
|
+
|
|
126
|
+
raise HudConfigError(
|
|
127
|
+
"An MCP server configuration is required. "
|
|
125
128
|
"Either pass it to the constructor or call initialize with a configuration"
|
|
126
129
|
)
|
|
127
130
|
|
|
@@ -130,31 +133,23 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
130
133
|
logger.debug("Initializing MCP client...")
|
|
131
134
|
|
|
132
135
|
try:
|
|
136
|
+
# Check if API key is set for HUD API
|
|
137
|
+
for server_config in self._mcp_config.values():
|
|
138
|
+
url = server_config.get("url", "")
|
|
139
|
+
headers = server_config.get("headers", {})
|
|
140
|
+
if "mcp.hud.so" in url and len(headers.get("Authorization", "")) < 10:
|
|
141
|
+
raise HudAuthenticationError(
|
|
142
|
+
f'Sending authorization "{headers.get("Authorization", "")}", which may'
|
|
143
|
+
" be incomplete. Ensure HUD_API_KEY environment variable is set or send it"
|
|
144
|
+
" as a header. You can get an API key at https://app.hud.so"
|
|
145
|
+
)
|
|
133
146
|
# Subclasses implement connection
|
|
134
147
|
await self._connect(self._mcp_config)
|
|
135
|
-
except
|
|
136
|
-
# Re-raise authentication errors with clear message
|
|
137
|
-
if "Authentication failed" in str(e):
|
|
138
|
-
raise
|
|
148
|
+
except HudException:
|
|
139
149
|
raise
|
|
140
150
|
except Exception as e:
|
|
141
|
-
#
|
|
142
|
-
|
|
143
|
-
if "401" in error_msg or "Unauthorized" in error_msg:
|
|
144
|
-
# Check if connecting to HUD API
|
|
145
|
-
for server_config in self._mcp_config.values():
|
|
146
|
-
url = server_config.get("url", "")
|
|
147
|
-
if "mcp.hud.so" in url:
|
|
148
|
-
raise RuntimeError(
|
|
149
|
-
"Authentication failed for HUD API. "
|
|
150
|
-
"Please ensure your HUD_API_KEY environment variable is set correctly. "
|
|
151
|
-
"You can get an API key at https://app.hud.so"
|
|
152
|
-
) from e
|
|
153
|
-
raise RuntimeError(
|
|
154
|
-
"Authentication failed (401 Unauthorized). "
|
|
155
|
-
"Please check your credentials or API key."
|
|
156
|
-
) from e
|
|
157
|
-
raise
|
|
151
|
+
# Auto-converts to appropriate HUD exception type with hints
|
|
152
|
+
raise HudException from e
|
|
158
153
|
|
|
159
154
|
# Common hud behavior - fetch telemetry
|
|
160
155
|
await self._fetch_telemetry()
|
|
@@ -180,7 +175,7 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
180
175
|
self._initialized = False
|
|
181
176
|
logger.info("Client disconnected")
|
|
182
177
|
else:
|
|
183
|
-
logger.
|
|
178
|
+
logger.debug("Client was not initialized, skipping disconnect")
|
|
184
179
|
|
|
185
180
|
@overload
|
|
186
181
|
async def call_tool(self, tool_call: MCPToolCall, /) -> MCPToolResult: ...
|
|
@@ -248,7 +243,9 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
248
243
|
def mcp_config(self) -> dict[str, dict[str, Any]]:
|
|
249
244
|
"""Get the MCP config."""
|
|
250
245
|
if self._mcp_config is None:
|
|
251
|
-
|
|
246
|
+
from hud.shared.exceptions import HudConfigError
|
|
247
|
+
|
|
248
|
+
raise HudConfigError("Please initialize the client with a valid MCP config")
|
|
252
249
|
return self._mcp_config
|
|
253
250
|
|
|
254
251
|
async def __aenter__(self: Any) -> Any:
|
|
@@ -317,7 +314,9 @@ class BaseHUDClient(AgentMCPClient):
|
|
|
317
314
|
- metadata: Environment metadata
|
|
318
315
|
"""
|
|
319
316
|
if not self._initialized:
|
|
320
|
-
|
|
317
|
+
from hud.shared.exceptions import HudClientError
|
|
318
|
+
|
|
319
|
+
raise HudClientError("Client must be initialized before analyzing the environment")
|
|
321
320
|
|
|
322
321
|
analysis: dict[str, Any] = {
|
|
323
322
|
"tools": [],
|
hud/clients/mcp_use.py
CHANGED
|
@@ -3,10 +3,12 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Any
|
|
7
7
|
|
|
8
|
-
from mcp import Implementation
|
|
8
|
+
from mcp import Implementation, types
|
|
9
9
|
from mcp.shared.exceptions import McpError
|
|
10
|
+
from mcp_use.client import MCPClient as MCPUseClient
|
|
11
|
+
from mcp_use.session import MCPSession as MCPUseSession
|
|
10
12
|
from pydantic import AnyUrl
|
|
11
13
|
|
|
12
14
|
from hud.types import MCPToolCall, MCPToolResult
|
|
@@ -14,18 +16,6 @@ from hud.version import __version__ as hud_version
|
|
|
14
16
|
|
|
15
17
|
from .base import BaseHUDClient
|
|
16
18
|
|
|
17
|
-
if TYPE_CHECKING:
|
|
18
|
-
from mcp import types
|
|
19
|
-
from mcp_use.client import MCPClient as MCPUseClient # type: ignore[attr-defined]
|
|
20
|
-
from mcp_use.session import MCPSession as MCPUseSession # type: ignore[attr-defined]
|
|
21
|
-
|
|
22
|
-
try:
|
|
23
|
-
from mcp_use.client import MCPClient as MCPUseClient # type: ignore[attr-defined]
|
|
24
|
-
from mcp_use.session import MCPSession as MCPUseSession # type: ignore[attr-defined]
|
|
25
|
-
except ImportError:
|
|
26
|
-
MCPUseClient = None # type: ignore[misc, assignment]
|
|
27
|
-
MCPUseSession = None # type: ignore[misc, assignment]
|
|
28
|
-
|
|
29
19
|
logger = logging.getLogger(__name__)
|
|
30
20
|
|
|
31
21
|
|
|
@@ -53,7 +43,9 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
53
43
|
)
|
|
54
44
|
|
|
55
45
|
self._sessions: dict[str, Any] = {} # Will be MCPUseSession when available
|
|
56
|
-
self._tool_map: dict[
|
|
46
|
+
self._tool_map: dict[
|
|
47
|
+
str, tuple[str, types.Tool, types.Tool]
|
|
48
|
+
] = {} # server_name, original_tool, prefixed_tool
|
|
57
49
|
self._client: Any | None = None # Will be MCPUseClient when available
|
|
58
50
|
|
|
59
51
|
async def _connect(self, mcp_config: dict[str, dict[str, Any]]) -> None:
|
|
@@ -106,14 +98,23 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
106
98
|
logger.info("Check that the MCP server is running and accessible")
|
|
107
99
|
raise
|
|
108
100
|
|
|
101
|
+
# Populate tool map during initialization
|
|
102
|
+
await self.list_tools()
|
|
103
|
+
|
|
109
104
|
async def list_tools(self) -> list[types.Tool]:
|
|
110
105
|
"""List all available tools from all sessions."""
|
|
111
106
|
if self._client is None or not self._sessions:
|
|
112
107
|
raise ValueError("Client is not connected, call initialize() first")
|
|
113
108
|
|
|
109
|
+
if self._tool_map:
|
|
110
|
+
return [tool[2] for tool in self._tool_map.values()]
|
|
111
|
+
|
|
114
112
|
all_tools = []
|
|
115
113
|
self._tool_map = {}
|
|
116
114
|
|
|
115
|
+
# Check if we need to prefix (more than one server)
|
|
116
|
+
use_prefix = len(self._sessions) > 1
|
|
117
|
+
|
|
117
118
|
for server_name, session in self._sessions.items():
|
|
118
119
|
try:
|
|
119
120
|
# Ensure session is initialized
|
|
@@ -136,10 +137,26 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
136
137
|
[tool.name for tool in tools_result.tools],
|
|
137
138
|
)
|
|
138
139
|
|
|
139
|
-
# Add to collections
|
|
140
|
+
# Add to collections with optional prefix
|
|
140
141
|
for tool in tools_result.tools:
|
|
141
|
-
|
|
142
|
-
|
|
142
|
+
if use_prefix:
|
|
143
|
+
# Create a new tool with prefixed name
|
|
144
|
+
prefixed_name = f"{server_name}_{tool.name}"
|
|
145
|
+
# Create a new tool instance with prefixed name
|
|
146
|
+
from mcp import types as mcp_types
|
|
147
|
+
|
|
148
|
+
prefixed_tool = mcp_types.Tool(
|
|
149
|
+
name=prefixed_name,
|
|
150
|
+
description=tool.description,
|
|
151
|
+
inputSchema=tool.inputSchema,
|
|
152
|
+
)
|
|
153
|
+
all_tools.append(prefixed_tool)
|
|
154
|
+
# Map prefixed name to (server_name, original_tool)
|
|
155
|
+
self._tool_map[prefixed_name] = (server_name, tool, prefixed_tool)
|
|
156
|
+
else:
|
|
157
|
+
# Single server - no prefix needed
|
|
158
|
+
all_tools.append(tool)
|
|
159
|
+
self._tool_map[tool.name] = (server_name, tool, tool)
|
|
143
160
|
|
|
144
161
|
# Log detailed tool info in verbose mode
|
|
145
162
|
if self.verbose:
|
|
@@ -164,15 +181,20 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
164
181
|
raise ValueError("Client is not connected, call initialize() first")
|
|
165
182
|
|
|
166
183
|
if tool_call.name not in self._tool_map:
|
|
167
|
-
|
|
184
|
+
return MCPToolResult(
|
|
185
|
+
content=[types.TextContent(type="text", text=f"Tool '{tool_call.name}' not found")],
|
|
186
|
+
isError=True,
|
|
187
|
+
structuredContent=None,
|
|
188
|
+
)
|
|
168
189
|
|
|
169
|
-
server_name, _ = self._tool_map[tool_call.name]
|
|
190
|
+
server_name, original_tool, _ = self._tool_map[tool_call.name]
|
|
170
191
|
session = self._sessions[server_name]
|
|
171
192
|
|
|
172
193
|
if self.verbose:
|
|
173
194
|
logger.debug(
|
|
174
|
-
"Calling tool '%s' on server '%s' with arguments: %s",
|
|
195
|
+
"Calling tool '%s' (original: '%s') on server '%s' with arguments: %s",
|
|
175
196
|
tool_call.name,
|
|
197
|
+
original_tool.name,
|
|
176
198
|
server_name,
|
|
177
199
|
tool_call.arguments,
|
|
178
200
|
)
|
|
@@ -181,7 +203,7 @@ class MCPUseHUDClient(BaseHUDClient):
|
|
|
181
203
|
raise ValueError(f"Client session not initialized for {server_name}")
|
|
182
204
|
|
|
183
205
|
result = await session.connector.client_session.call_tool(
|
|
184
|
-
name=
|
|
206
|
+
name=original_tool.name, # Use original tool name, not prefixed
|
|
185
207
|
arguments=tool_call.arguments or {},
|
|
186
208
|
)
|
|
187
209
|
|
hud/datasets/task.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
|
+
import logging
|
|
6
7
|
from collections import defaultdict
|
|
7
8
|
from string import Template
|
|
8
9
|
from typing import Any
|
|
@@ -12,6 +13,8 @@ from pydantic import BaseModel, Field, field_validator
|
|
|
12
13
|
from hud.settings import settings
|
|
13
14
|
from hud.types import MCPToolCall
|
|
14
15
|
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
15
18
|
|
|
16
19
|
class Task(BaseModel):
|
|
17
20
|
"""
|
|
@@ -48,7 +51,9 @@ class Task(BaseModel):
|
|
|
48
51
|
try:
|
|
49
52
|
return json.loads(v)
|
|
50
53
|
except json.JSONDecodeError as e:
|
|
51
|
-
|
|
54
|
+
from hud.shared.exceptions import HudConfigError
|
|
55
|
+
|
|
56
|
+
raise HudConfigError(f"Invalid JSON string: {e}") from e
|
|
52
57
|
return v
|
|
53
58
|
|
|
54
59
|
@field_validator("setup_tool", "evaluate_tool", mode="before")
|
|
@@ -63,7 +68,9 @@ class Task(BaseModel):
|
|
|
63
68
|
try:
|
|
64
69
|
v = json.loads(v)
|
|
65
70
|
except json.JSONDecodeError as e:
|
|
66
|
-
|
|
71
|
+
from hud.shared.exceptions import HudConfigError
|
|
72
|
+
|
|
73
|
+
raise HudConfigError(f"Invalid JSON string: {e}") from e
|
|
67
74
|
|
|
68
75
|
if isinstance(v, dict):
|
|
69
76
|
return MCPToolCall(**v)
|
|
@@ -90,6 +97,8 @@ class Task(BaseModel):
|
|
|
90
97
|
|
|
91
98
|
if settings.api_key:
|
|
92
99
|
mapping["HUD_API_KEY"] = settings.api_key
|
|
100
|
+
else:
|
|
101
|
+
logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
|
|
93
102
|
|
|
94
103
|
def substitute_in_value(obj: Any) -> Any:
|
|
95
104
|
"""Recursively substitute variables in nested structures."""
|