PyPI - hud-python - Versions diffs - 0.4.18__py3-none-any.whl → 0.4.20__py3-none-any.whl - Mend

hud-python 0.4.18py3-none-any.whl → 0.4.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (13) hide show

hud/agents/base.py +30 -11
hud/agents/claude.py +41 -27
hud/agents/openai_chat_generic.py +11 -12
hud/clients/base.py +10 -22
hud/datasets/task.py +5 -0
hud/tools/playwright.py +1 -1
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.18.dist-info → hud_python-0.4.20.dist-info}/METADATA +1 -1
{hud_python-0.4.18.dist-info → hud_python-0.4.20.dist-info}/RECORD +13 -13
{hud_python-0.4.18.dist-info → hud_python-0.4.20.dist-info}/WHEEL +0 -0
{hud_python-0.4.18.dist-info → hud_python-0.4.20.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.18.dist-info → hud_python-0.4.20.dist-info}/licenses/LICENSE +0 -0

hud/agents/base.py CHANGED Viewed

@@ -30,9 +30,19 @@ class MCPAgent(ABC):
     """
     Base class for MCP-enabled agents.
-    This class provides the foundation for agents that interact with MCP servers,
-    handling tool discovery and filtering while leaving provider-specific
-    implementation details to subclasses.
+    Provides common behavior for agents that interact with MCP servers, including:
+    - Client management: accepts an `AgentMCPClient` or auto-creates one at
+      runtime when `run()` is called with a `Task` that includes `mcp_config`.
+    - Tool lifecycle: discovery, filtering (`allowed_tools`, `disallowed_tools`),
+      and automatic marking of lifecycle tools (setup/evaluate) from a `Task`.
+    - Messaging: system prompt handling, optional inclusion of setup output on
+      the first turn, and control over initial screenshots.
+    - Telemetry & UX: standardized logging/printing via `HUDDesign` and optional
+      automatic tracing (`auto_trace`).
+    Subclasses implement provider-specific formatting and response fetching
+    by overriding these abstract methods: `get_system_messages`, `get_response`,
+    `format_blocks`, and `format_tool_results`.
     """
     metadata: dict[str, Any]
@@ -59,14 +69,23 @@ class MCPAgent(ABC):
         Initialize the base MCP agent.
         Args:
-            mcp_client: AgentMCPClient instance for server connections
-            allowed_tools: List of tool names to allow (None = all tools)
-            disallowed_tools: List of tool names to disallow
-            lifecycle_tools: List of tool names to use for lifecycle tools
-            initial_screenshot: Whether to capture screenshot before first prompt
-            system_prompt: System prompt to use
-            append_setup_output: Whether to append setup tool output to initial messages
-            verbose: If True, sets logging level to INFO. If False, only WARNING and above.
+            mcp_client: Client for connecting to MCP servers. If None, a client
+                is auto-created at runtime when `run()` is called with a `Task`
+                that provides `mcp_config`.
+            allowed_tools: Names of tools to allow (None means allow all).
+            disallowed_tools: Names of tools to always exclude.
+            lifecycle_tools: Tools reserved for lifecycle phases (e.g., setup,
+                evaluate). These are hidden from normal tool calling.
+            system_prompt: System prompt to seed the conversation.
+            append_setup_output: Whether to append setup tool output to the
+                first turn's messages.
+            initial_screenshot: Whether to include an initial screenshot before
+                the first prompt (when supported by the environment).
+            model_name: Label used in telemetry/logging to identify the model.
+            response_agent: Optional automation that can respond to the model's
+                outputs to keep the loop going (e.g., auto-continue/stop).
+            auto_trace: If True, automatically creates a trace/span for runs.
+            verbose: If True, increases logging verbosity for developer UX.
         """
         self.mcp_client = mcp_client

hud/agents/claude.py CHANGED Viewed

@@ -306,35 +306,49 @@ class ClaudeAgent(MCPAgent):
         """Convert MCP tools to Claude tool format."""
         claude_tools = []
         self._claude_to_mcp_tool_map = {}  # Reset mapping
+        # Find computer tool by priority
+        computer_tool_priority = ["anthropic_computer", "computer_anthropic", "computer"]
+        selected_computer_tool = None
+        for priority_name in computer_tool_priority:
+            for tool in self._available_tools:
+                if tool.name == priority_name:
+                    selected_computer_tool = tool
+                    break
+            if selected_computer_tool:
+                break
+        # Add the selected computer tool if found
+        if selected_computer_tool:
+            claude_tool = {
+                "type": "computer_20250124",
+                "name": "computer",
+                "display_width_px": self.metadata["display_width"],
+                "display_height_px": self.metadata["display_height"],
+            }
+            # Map Claude's "computer" back to the actual MCP tool name
+            self._claude_to_mcp_tool_map["computer"] = selected_computer_tool.name
+            claude_tools.append(claude_tool)
+            logger.debug(f"Using {selected_computer_tool.name} as computer tool for Claude")
+        # Add other non-computer tools
         for tool in self._available_tools:
-            # Special handling for computer use tools
-            if tool.name in ["computer", "computer_anthropic", "anthropic_computer"]:
-                # Use Claude's native computer use format with configurable dimensions
-                claude_tool = {
-                    "type": "computer_20250124",
-                    "name": "computer",
-                    "display_width_px": self.metadata["display_width"],
-                    "display_height_px": self.metadata["display_height"],
-                }
-                # Map Claude's "computer" back to the actual MCP tool name
-                self._claude_to_mcp_tool_map["computer"] = tool.name
-            elif tool.name not in self.lifecycle_tools:
-                # Convert regular tools
-                claude_tool = {
-                    "name": tool.name,
-                    "description": tool.description or f"Execute {tool.name}",
-                    "input_schema": tool.inputSchema
-                    or {
-                        "type": "object",
-                        "properties": {},
-                    },
-                }
-                # Direct mapping for non-computer tools
-                self._claude_to_mcp_tool_map[tool.name] = tool.name
-            else:
+            # Skip computer tools (already handled) and lifecycle tools
+            if tool.name in computer_tool_priority or tool.name in self.lifecycle_tools:
                 continue
+            claude_tool = {
+                "name": tool.name,
+                "description": tool.description or f"Execute {tool.name}",
+                "input_schema": tool.inputSchema
+                or {
+                    "type": "object",
+                    "properties": {},
+                },
+            }
+            # Direct mapping for non-computer tools
+            self._claude_to_mcp_tool_map[tool.name] = tool.name
             claude_tools.append(claude_tool)
         self.claude_tools = claude_tools

hud/agents/openai_chat_generic.py CHANGED Viewed

@@ -7,7 +7,7 @@ through the existing :class:`hud.agent.MCPAgent` scaffolding.
 Key points:
 - Stateless, no special server-side conversation state is assumed.
 - Accepts an :class:`openai.AsyncOpenAI` client, caller can supply their own
-  base_url / api_key (e.g. ART, llama.cpp, together.ai, …)
+  base_url / api_key (e.g. llama.cpp, together.ai, …)
 - All HUD features (step_count, OTel spans, tool filtering, screenshots, …)
   come from the ``MCPAgent`` base class, we only implement the three abstract
   methods
@@ -30,8 +30,6 @@ if TYPE_CHECKING:
     from openai import AsyncOpenAI
     from openai.types.chat import ChatCompletionToolParam
-    from hud.clients import AgentMCPClient
 logger = logging.getLogger(__name__)
@@ -40,19 +38,19 @@ class GenericOpenAIChatAgent(MCPAgent):
     def __init__(
         self,
-        mcp_client: AgentMCPClient,
         *,
         openai_client: AsyncOpenAI,
         model_name: str = "gpt-4o-mini",
         parallel_tool_calls: bool = False,
-        logprobs: bool = False,
+        completion_kwargs: dict[str, Any] | None = None,
         **agent_kwargs: Any,
     ) -> None:
-        super().__init__(mcp_client=mcp_client, **agent_kwargs)
+        # Accept base-agent settings via **agent_kwargs (e.g., mcp_client, system_prompt, etc.)
+        super().__init__(**agent_kwargs)
         self.oai = openai_client
         self.model_name = model_name
         self.parallel_tool_calls = parallel_tool_calls
-        self.logprobs = logprobs
+        self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
         self.conversation_history = []
     @staticmethod
@@ -177,12 +175,15 @@ class GenericOpenAIChatAgent(MCPAgent):
         # Convert MCP tool schemas to OpenAI format
         mcp_schemas = self.get_tool_schemas()
+        protected_keys = {"model", "messages", "tools", "parallel_tool_calls"}
+        extra = {k: v for k, v in (self.completion_kwargs or {}).items() if k not in protected_keys}
         response = await self.oai.chat.completions.create(
             model=self.model_name,
             messages=messages,
             tools=cast("list[ChatCompletionToolParam]", mcp_schemas),
             parallel_tool_calls=self.parallel_tool_calls,
-            logprobs=self.logprobs,
+            **extra,
         )
         choice = response.choices[0]
@@ -247,9 +248,7 @@ class GenericOpenAIChatAgent(MCPAgent):
                         image_parts.append(
                             {
                                 "type": "image_url",
-                                "image_url": {
-                                    "url": f"data:{mime_type};base64,{data}"
-                                },
+                                "image_url": {"url": f"data:{mime_type};base64,{data}"},
                             }
                         )
                 elif isinstance(item, types.TextContent):
@@ -276,7 +275,7 @@ class GenericOpenAIChatAgent(MCPAgent):
                 # Add a user message with the images
                 content_with_images = [
                     {"type": "text", "text": "Tool returned the following:"},
-                    *image_parts
+                    *image_parts,
                 ]
                 rendered.append(
                     {

hud/clients/base.py CHANGED Viewed

@@ -130,31 +130,19 @@ class BaseHUDClient(AgentMCPClient):
         logger.debug("Initializing MCP client...")
         try:
+            # Check if API key is set for HUD API
+            for server_config in self._mcp_config.values():
+                url = server_config.get("url", "")
+                headers = server_config.get("headers", {})
+                if "mcp.hud.so" in url and len(headers.get("Authorization", "")) < 10:
+                    raise RuntimeError(
+                        "Please ensure your HUD_API_KEY environment variable is set correctly."
+                        "You can get an API key at https://app.hud.so"
+                    )
             # Subclasses implement connection
             await self._connect(self._mcp_config)
-        except RuntimeError as e:
-            # Re-raise authentication errors with clear message
-            if "Authentication failed" in str(e):
-                raise
-            raise
         except Exception as e:
-            # Check for authentication errors in the exception chain
-            error_msg = str(e)
-            if "401" in error_msg or "Unauthorized" in error_msg:
-                # Check if connecting to HUD API
-                for server_config in self._mcp_config.values():
-                    url = server_config.get("url", "")
-                    if "mcp.hud.so" in url:
-                        raise RuntimeError(
-                            "Authentication failed for HUD API. "
-                            "Please ensure your HUD_API_KEY environment variable is set correctly. "
-                            "You can get an API key at https://app.hud.so"
-                        ) from e
-                raise RuntimeError(
-                    "Authentication failed (401 Unauthorized). "
-                    "Please check your credentials or API key."
-                ) from e
-            raise
+            raise e
         # Common hud behavior - fetch telemetry
         await self._fetch_telemetry()

hud/datasets/task.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import json
+import logging
 from collections import defaultdict
 from string import Template
 from typing import Any
@@ -12,6 +13,8 @@ from pydantic import BaseModel, Field, field_validator
 from hud.settings import settings
 from hud.types import MCPToolCall
+logger = logging.getLogger(__name__)
 class Task(BaseModel):
     """
@@ -90,6 +93,8 @@ class Task(BaseModel):
         if settings.api_key:
             mapping["HUD_API_KEY"] = settings.api_key
+        else:
+            logger.error("HUD_API_KEY is not set, tracing and remote training will not work")
         def substitute_in_value(obj: Any) -> Any:
             """Recursively substitute variables in nested structures."""

hud/tools/playwright.py CHANGED Viewed

@@ -153,7 +153,7 @@ class PlaywrightTool(BaseTool):
         """Ensure browser is launched and ready."""
         if self._browser is None or not self._browser.is_connected():
             if self._cdp_url:
-                logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
+                logger.info("Connecting to remote browser via CDP")
             else:
                 logger.info("Launching Playwright browser...")

hud/utils/tests/test_version.py CHANGED Viewed

@@ -5,4 +5,4 @@ def test_import():
     """Test that the package can be imported."""
     import hud
-    assert hud.__version__ == "0.4.18"
+    assert hud.__version__ == "0.4.20"

hud/version.py CHANGED Viewed

@@ -4,4 +4,4 @@ Version information for the HUD SDK.
 from __future__ import annotations
-__version__ = "0.4.18"
+__version__ = "0.4.20"

{hud_python-0.4.18.dist-info → hud_python-0.4.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.18
+Version: 0.4.20
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues

{hud_python-0.4.18.dist-info → hud_python-0.4.20.dist-info}/RECORD RENAMED Viewed

@@ -2,13 +2,13 @@ hud/__init__.py,sha256=BjAhZtsHbGN371Q8t3o4v4jltedkmDE85xW0yOILU9g,397
 hud/__main__.py,sha256=YR8Dq8OhINOsVfQ55PmRXXg4fEK84Rt_-rMtJ5rvhWo,145
 hud/settings.py,sha256=q9aZiHjvbL4oLE-N8AttTW4rmzS8zPMnsca-iMGyEGc,2362
 hud/types.py,sha256=gNnyS1G7aYHIR5sT3k3bOfSTFnPylUO6lNGLWbjbeYk,5149
-hud/version.py,sha256=8Ag1N-qzwxUt5QwVLTJ5Z43L6M6O6FLpCKva6zONOfc,105
+hud/version.py,sha256=pgW9sHjEdTZlk7884zAV7kzAGXkPVC1P6p_MTwNJTSI,105
 hud/agents/__init__.py,sha256=UoIkljWdbq4bM0LD-mSaw6w826EqdEjOk7r6glNYwYQ,286
-hud/agents/base.py,sha256=rbwYP_a6XTwhY_5CaBlE7SWflnTq1EOuDiNY2XeUWdM,28275
-hud/agents/claude.py,sha256=_eD_XKZhVJ6grkHQfbS6JskztueomQcmJeGJMbfNdmE,14534
+hud/agents/base.py,sha256=t3bPRTKzGuejhSeo1jLNprlUv6zNU9ezQfP16tX_pXw,29562
+hud/agents/claude.py,sha256=v061ulKO4n-1dIm3iuY5E1PcEQiErFQbeKsP0GynIWA,15062
 hud/agents/langchain.py,sha256=1EgCy8jfjunsWxlPC5XfvfLS6_XZVrIF1ZjtHcrvhYw,9584
 hud/agents/openai.py,sha256=tvFYsZ5yaoLkfjMnHe-COxRttMsLRXBLPdSqgeipQRk,14257
-hud/agents/openai_chat_generic.py,sha256=Q6eKlKQIF2o04eGpIcBAyqpdcgRvuolbxmgWTT6ktEQ,10478
+hud/agents/openai_chat_generic.py,sha256=PQAD4GGE6sHs8R95qpgDBHEbSOJ7WXCYGYFmd3Nic1g,10628
 hud/agents/misc/__init__.py,sha256=BYi4Ytp9b_vycpZFXnr5Oyw6ncKLNNGml8Jrb7bWUb4,136
 hud/agents/misc/response_agent.py,sha256=pnaomb4H-QJm1YKU3tC1YnZXxOlDbTHIXaIH-6Nkb6I,3102
 hud/agents/tests/__init__.py,sha256=W-O-_4i34d9TTyEHV-O_q1Ai1gLhzwDaaPo02_TWQIY,34
@@ -66,7 +66,7 @@ hud/cli/utils/runner.py,sha256=qZI1lFNZIFn6d919awUkMtjQ36TfhAvyqGRzQmkal8c,4269
 hud/cli/utils/server.py,sha256=uSx2DjG5vX-PFoD8zNH-gBHbkTNSHveFSVdAfmp09Tc,7341
 hud/clients/README.md,sha256=XNE3mch95ozDgVqfwCGcrhlHY9CwT1GKfNANNboowto,3826
 hud/clients/__init__.py,sha256=bcPIa7dwH5ENsjh7CzjsJ84fm7Ma93NBc2lGfSjGAKM,328
-hud/clients/base.py,sha256=F8wq-UGoW1J_MguHq5w_Tcr0mJ4awSWbFOE8xP7sSDA,14129
+hud/clients/base.py,sha256=rWh6PbB53HRrbuVJhv1-zuLeEE0bJMWJf9zUCSoii2Q,13592
 hud/clients/fastmcp.py,sha256=KJGi8bmds0Q6rHnkTXb_Hw9ZqWmSo0OfjW05SSuyEJU,9182
 hud/clients/mcp_use.py,sha256=tgvQ5MyY1cJeCR1M7dwYMfDmPnxOQuXPjZeKCr98CJc,11962
 hud/clients/tests/__init__.py,sha256=sKOtJFFa4mDIXh1U6O8ZUHjigE8CiRMQ2PzJTIBZuVE,33
@@ -76,7 +76,7 @@ hud/clients/tests/test_protocol.py,sha256=aK4CS4g3j1D5jPo83ykzZuHUvcZFAulYtIq9T9
 hud/clients/utils/__init__.py,sha256=ucYJqOVpEsN-D9OFE2YTNLG628MgxcZAzfYhnbzx02k,32
 hud/clients/utils/retry_transport.py,sha256=Rsq25eiKKt_pM1bas78QEZvO0illK97X_3opmaS3A3w,6809
 hud/datasets/__init__.py,sha256=74T4mrjELKtE04XkZKwU8QAJcg2wjqXLqRO9s4GlPr4,678
-hud/datasets/task.py,sha256=V82HzRb2_c2MO9EG5ZcY-PMsLt3234Uks7WlkMta5HY,3615
+hud/datasets/task.py,sha256=HjkUS6uFfQkQ1Is3fbsfw0a3pq7FBwfqcnzFVv6txZA,3776
 hud/datasets/utils.py,sha256=3hKvZTkZuCRkTeITB86nNdA1dtHZAqFfAdSPMtcTUhs,4275
 hud/datasets/execution/__init__.py,sha256=4m1AEpMQaUSJFVN_iAXvY6zFttVgZKwE6oQtC0Rrk7U,330
 hud/datasets/execution/parallel.py,sha256=4aL1XpS3vOBqZjgs0vrMZJ4eAoi86Td8C-m5SUtVxMs,25231
@@ -116,7 +116,7 @@ hud/tools/__init__.py,sha256=dT-s4zs2B5GsOZ_K2tZZLKuSIp4u3RIvNYMJ_eUpkrE,960
 hud/tools/base.py,sha256=4qm5LS3SAkrq_lyfToWYCN9tNvTHohKJNH2siHkE364,15824
 hud/tools/bash.py,sha256=LJViMGb3lTGBm_gequVVTM7ySh1Xh9bOOIZXU29Lmrw,5209
 hud/tools/edit.py,sha256=N0AYFXp07-vAJy2li7lvHOL6hfgJOU4LL3iLSZrbRWU,12745
-hud/tools/playwright.py,sha256=lF7NxyEu8YbB7tpmCoTf8p9HxIrejahC67x3Xs0Jjb4,15007
+hud/tools/playwright.py,sha256=iyMrQ-ZKyeFia2fBp0yguXswTcXfGqdZcTXXCfUupFU,14988
 hud/tools/response.py,sha256=t6Oc8NM4u951A1XMCBaIkFyu3VNEQ8dcWURyTygfZmA,2228
 hud/tools/types.py,sha256=g-CWnUUDSxxIfUy54S1bpY1nfTzdYO1R_nPKYReABjQ,2734
 hud/tools/utils.py,sha256=bfVyYMcBOJvr1QdptCjVb6jaHVGIL5WUxmY59kzMekQ,1447
@@ -157,10 +157,10 @@ hud/utils/tests/test_init.py,sha256=2QLQSGgyP9wJhOvPCusm_zjJad0qApOZi1BXpxcdHXQ,
 hud/utils/tests/test_mcp.py,sha256=0pUa16mL-bqbZDXp5NHBnt1gO5o10BOg7zTMHZ1DNPM,4023
 hud/utils/tests/test_progress.py,sha256=QSF7Kpi03Ff_l3mAeqW9qs1nhK50j9vBiSobZq7T4f4,7394
 hud/utils/tests/test_telemetry.py,sha256=5jl7bEx8C8b-FfFUko5pf4UY-mPOR-9HaeL98dGtVHM,2781
-hud/utils/tests/test_version.py,sha256=Ur5o4UVJbPy4rYJUIc3yBCTK-mk9CAf_7bHv2qSPJEI,160
+hud/utils/tests/test_version.py,sha256=UgaAapQpzHdJPFqy5Mhn8AT45nMCWwiy75_dhLcUlic,160
 hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hud_python-0.4.18.dist-info/METADATA,sha256=vvUR4EBJmH6WqrLg2OxsupIJLs_6S8aVPaCRJjN3sJI,20287
-hud_python-0.4.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hud_python-0.4.18.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
-hud_python-0.4.18.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
-hud_python-0.4.18.dist-info/RECORD,,
+hud_python-0.4.20.dist-info/METADATA,sha256=YSbi6IhmvzoZl2h_RR_XyoHXe5caBtiUFlkEn7PjTnQ,20287
+hud_python-0.4.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hud_python-0.4.20.dist-info/entry_points.txt,sha256=jJbodNFg1m0-CDofe5AHvB4zKBq7sSdP97-ohaQ3ae4,63
+hud_python-0.4.20.dist-info/licenses/LICENSE,sha256=yIzBheVUf86FC1bztAcr7RYWWNxyd3B-UJQ3uddg1HA,1078
+hud_python-0.4.20.dist-info/RECORD,,

{hud_python-0.4.18.dist-info → hud_python-0.4.20.dist-info}/WHEEL RENAMED Viewed

File without changes

{hud_python-0.4.18.dist-info → hud_python-0.4.20.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{hud_python-0.4.18.dist-info → hud_python-0.4.20.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hud-python 0.4.18__py3-none-any.whl → 0.4.20__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.18py3-none-any.whl → 0.4.20py3-none-any.whl