PyPI - hud-python - Versions diffs - 0.4.19__py3-none-any.whl → 0.4.21__py3-none-any.whl - Mend

hud-python 0.4.19py3-none-any.whl → 0.4.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (39) hide show

hud/__init__.py +7 -0
hud/agents/base.py +40 -10
hud/agents/claude.py +44 -25
hud/agents/tests/test_client.py +6 -27
hud/cli/__init__.py +50 -20
hud/cli/build.py +3 -44
hud/cli/eval.py +25 -6
hud/cli/init.py +4 -4
hud/cli/push.py +3 -1
hud/cli/tests/test_push.py +6 -6
hud/clients/__init__.py +3 -2
hud/clients/base.py +25 -26
hud/clients/mcp_use.py +44 -22
hud/datasets/task.py +11 -2
hud/native/__init__.py +6 -0
hud/native/comparator.py +546 -0
hud/native/tests/__init__.py +1 -0
hud/native/tests/test_comparator.py +539 -0
hud/native/tests/test_native_init.py +79 -0
hud/otel/instrumentation.py +0 -2
hud/server/server.py +9 -2
hud/shared/exceptions.py +204 -31
hud/shared/hints.py +177 -0
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +385 -144
hud/tools/__init__.py +2 -0
hud/tools/playwright.py +1 -1
hud/tools/submit.py +66 -0
hud/types.py +33 -5
hud/utils/design.py +57 -0
hud/utils/mcp.py +6 -0
hud/utils/pretty_errors.py +68 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.19.dist-info → hud_python-0.4.21.dist-info}/METADATA +2 -4
{hud_python-0.4.19.dist-info → hud_python-0.4.21.dist-info}/RECORD +39 -31
{hud_python-0.4.19.dist-info → hud_python-0.4.21.dist-info}/WHEEL +0 -0
{hud_python-0.4.19.dist-info → hud_python-0.4.21.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.19.dist-info → hud_python-0.4.21.dist-info}/licenses/LICENSE +0 -0

hud/clients/base.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any, Protocol, overload, runtime_checkable
 from mcp.types import Implementation
+from hud.shared.exceptions import HudAuthenticationError, HudException
 from hud.types import MCPToolCall, MCPToolResult
 from hud.utils.mcp import setup_hud_telemetry
 from hud.version import __version__ as hud_version
@@ -120,8 +121,10 @@ class BaseHUDClient(AgentMCPClient):
         self._mcp_config = mcp_config or self._mcp_config
         if self._mcp_config is None:
-            raise ValueError(
-                "An MCP server configuration is required"
+            from hud.shared.exceptions import HudConfigError
+            raise HudConfigError(
+                "An MCP server configuration is required. "
                 "Either pass it to the constructor or call initialize with a configuration"
             )
@@ -130,31 +133,23 @@ class BaseHUDClient(AgentMCPClient):
         logger.debug("Initializing MCP client...")
         try:
+            # Check if API key is set for HUD API
+            for server_config in self._mcp_config.values():
+                url = server_config.get("url", "")
+                headers = server_config.get("headers", {})
+                if "mcp.hud.so" in url and len(headers.get("Authorization", "")) < 10:
+                    raise HudAuthenticationError(
+                        f'Sending authorization "{headers.get("Authorization", "")}", which may'
+                        " be incomplete. Ensure HUD_API_KEY environment variable is set or send it"
+                        " as a header. You can get an API key at https://app.hud.so"
+                    )
             # Subclasses implement connection
             await self._connect(self._mcp_config)
-        except RuntimeError as e:
-            # Re-raise authentication errors with clear message
-            if "Authentication failed" in str(e):
-                raise
+        except HudException:
             raise
         except Exception as e:
-            # Check for authentication errors in the exception chain
-            error_msg = str(e)
-            if "401" in error_msg or "Unauthorized" in error_msg:
-                # Check if connecting to HUD API
-                for server_config in self._mcp_config.values():
-                    url = server_config.get("url", "")
-                    if "mcp.hud.so" in url:
-                        raise RuntimeError(
-                            "Authentication failed for HUD API. "
-                            "Please ensure your HUD_API_KEY environment variable is set correctly. "
-                            "You can get an API key at https://app.hud.so"
-                        ) from e
-                raise RuntimeError(
-                    "Authentication failed (401 Unauthorized). "
-                    "Please check your credentials or API key."
-                ) from e
-            raise
+            # Auto-converts to appropriate HUD exception type with hints
+            raise HudException from e
         # Common hud behavior - fetch telemetry
         await self._fetch_telemetry()
@@ -180,7 +175,7 @@ class BaseHUDClient(AgentMCPClient):
             self._initialized = False
             logger.info("Client disconnected")
         else:
-            logger.warning("Client is not running, cannot disconnect")
+            logger.debug("Client was not initialized, skipping disconnect")
     @overload
     async def call_tool(self, tool_call: MCPToolCall, /) -> MCPToolResult: ...
@@ -248,7 +243,9 @@ class BaseHUDClient(AgentMCPClient):
     def mcp_config(self) -> dict[str, dict[str, Any]]:
         """Get the MCP config."""
         if self._mcp_config is None:
-            raise ValueError("Please initialize the client with a valid MCP config")
+            from hud.shared.exceptions import HudConfigError
+            raise HudConfigError("Please initialize the client with a valid MCP config")
         return self._mcp_config
     async def __aenter__(self: Any) -> Any:
@@ -317,7 +314,9 @@ class BaseHUDClient(AgentMCPClient):
             - metadata: Environment metadata
         """
         if not self._initialized:
-            raise ValueError("Client must be initialized before analyzing the environment")
+            from hud.shared.exceptions import HudClientError
+            raise HudClientError("Client must be initialized before analyzing the environment")
         analysis: dict[str, Any] = {
             "tools": [],

hud/clients/mcp_use.py CHANGED Viewed

@@ -3,10 +3,12 @@
 from __future__ import annotations
 import logging
-from typing import TYPE_CHECKING, Any
+from typing import Any
-from mcp import Implementation
+from mcp import Implementation, types
 from mcp.shared.exceptions import McpError
+from mcp_use.client import MCPClient as MCPUseClient
+from mcp_use.session import MCPSession as MCPUseSession
 from pydantic import AnyUrl
 from hud.types import MCPToolCall, MCPToolResult
@@ -14,18 +16,6 @@ from hud.version import __version__ as hud_version
 from .base import BaseHUDClient
-if TYPE_CHECKING:
-    from mcp import types
-    from mcp_use.client import MCPClient as MCPUseClient  # type: ignore[attr-defined]
-    from mcp_use.session import MCPSession as MCPUseSession  # type: ignore[attr-defined]
-try:
-    from mcp_use.client import MCPClient as MCPUseClient  # type: ignore[attr-defined]
-    from mcp_use.session import MCPSession as MCPUseSession  # type: ignore[attr-defined]
-except ImportError:
-    MCPUseClient = None  # type: ignore[misc, assignment]
-    MCPUseSession = None  # type: ignore[misc, assignment]
 logger = logging.getLogger(__name__)
@@ -53,7 +43,9 @@ class MCPUseHUDClient(BaseHUDClient):
             )
         self._sessions: dict[str, Any] = {}  # Will be MCPUseSession when available
-        self._tool_map: dict[str, tuple[str, types.Tool]] = {}
+        self._tool_map: dict[
+            str, tuple[str, types.Tool, types.Tool]
+        ] = {}  # server_name, original_tool, prefixed_tool
         self._client: Any | None = None  # Will be MCPUseClient when available
     async def _connect(self, mcp_config: dict[str, dict[str, Any]]) -> None:
@@ -106,14 +98,23 @@ class MCPUseHUDClient(BaseHUDClient):
                 logger.info("Check that the MCP server is running and accessible")
             raise
+        # Populate tool map during initialization
+        await self.list_tools()
     async def list_tools(self) -> list[types.Tool]:
         """List all available tools from all sessions."""
         if self._client is None or not self._sessions:
             raise ValueError("Client is not connected, call initialize() first")
+        if self._tool_map:
+            return [tool[2] for tool in self._tool_map.values()]
         all_tools = []
         self._tool_map = {}
+        # Check if we need to prefix (more than one server)
+        use_prefix = len(self._sessions) > 1
         for server_name, session in self._sessions.items():
             try:
                 # Ensure session is initialized
@@ -136,10 +137,26 @@ class MCPUseHUDClient(BaseHUDClient):
                     [tool.name for tool in tools_result.tools],
                 )
-                # Add to collections
+                # Add to collections with optional prefix
                 for tool in tools_result.tools:
-                    all_tools.append(tool)
-                    self._tool_map[tool.name] = (server_name, tool)
+                    if use_prefix:
+                        # Create a new tool with prefixed name
+                        prefixed_name = f"{server_name}_{tool.name}"
+                        # Create a new tool instance with prefixed name
+                        from mcp import types as mcp_types
+                        prefixed_tool = mcp_types.Tool(
+                            name=prefixed_name,
+                            description=tool.description,
+                            inputSchema=tool.inputSchema,
+                        )
+                        all_tools.append(prefixed_tool)
+                        # Map prefixed name to (server_name, original_tool)
+                        self._tool_map[prefixed_name] = (server_name, tool, prefixed_tool)
+                    else:
+                        # Single server - no prefix needed
+                        all_tools.append(tool)
+                        self._tool_map[tool.name] = (server_name, tool, tool)
                 # Log detailed tool info in verbose mode
                 if self.verbose:
@@ -164,15 +181,20 @@ class MCPUseHUDClient(BaseHUDClient):
             raise ValueError("Client is not connected, call initialize() first")
         if tool_call.name not in self._tool_map:
-            raise ValueError(f"Tool '{tool_call.name}' not found")
+            return MCPToolResult(
+                content=[types.TextContent(type="text", text=f"Tool '{tool_call.name}' not found")],
+                isError=True,
+                structuredContent=None,
+            )
-        server_name, _ = self._tool_map[tool_call.name]
+        server_name, original_tool, _ = self._tool_map[tool_call.name]
         session = self._sessions[server_name]
         if self.verbose:
             logger.debug(
-                "Calling tool '%s' on server '%s' with arguments: %s",
+                "Calling tool '%s' (original: '%s') on server '%s' with arguments: %s",
                 tool_call.name,
+                original_tool.name,
                 server_name,
                 tool_call.arguments,
             )
@@ -181,7 +203,7 @@ class MCPUseHUDClient(BaseHUDClient):
             raise ValueError(f"Client session not initialized for {server_name}")
         result = await session.connector.client_session.call_tool(
-            name=tool_call.name,
+            name=original_tool.name,  # Use original tool name, not prefixed
             arguments=tool_call.arguments or {},
         )

hud/datasets/task.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import json
+import logging
 from collections import defaultdict
 from string import Template
 from typing import Any
@@ -12,6 +13,8 @@ from pydantic import BaseModel, Field, field_validator
 from hud.settings import settings
 from hud.types import MCPToolCall
+logger = logging.getLogger(__name__)
 class Task(BaseModel):
     """
@@ -48,7 +51,9 @@ class Task(BaseModel):
             try:
                 return json.loads(v)
             except json.JSONDecodeError as e:
-                raise ValueError(f"Invalid JSON string: {e}") from e
+                from hud.shared.exceptions import HudConfigError
+                raise HudConfigError(f"Invalid JSON string: {e}") from e
         return v
     @field_validator("setup_tool", "evaluate_tool", mode="before")
@@ -63,7 +68,9 @@ class Task(BaseModel):
             try:
                 v = json.loads(v)
             except json.JSONDecodeError as e:
-                raise ValueError(f"Invalid JSON string: {e}") from e
+                from hud.shared.exceptions import HudConfigError
+                raise HudConfigError(f"Invalid JSON string: {e}") from e
         if isinstance(v, dict):
             return MCPToolCall(**v)
@@ -90,6 +97,8 @@ class Task(BaseModel):
         if settings.api_key:
             mapping["HUD_API_KEY"] = settings.api_key
+        else:
+            logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
         def substitute_in_value(obj: Any) -> Any:
             """Recursively substitute variables in nested structures."""

hud/native/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Native Python MCP servers for HUD.
+These servers run as pure Python processes without containerization.
+They can be run standalone or mounted into other servers, providing
+lightweight evaluation and comparison capabilities.
+"""

hud-python 0.4.19__py3-none-any.whl → 0.4.21__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.19py3-none-any.whl → 0.4.21py3-none-any.whl