PyPI - hud-python - Versions diffs - 0.4.66__tar.gz → 0.4.68__tar.gz - Mend

hud-python 0.4.66tar.gz → 0.4.68tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (332) hide show

{hud_python-0.4.66 → hud_python-0.4.68}/.gitignore RENAMED Viewed

@@ -53,4 +53,5 @@ hud/rl/checkpoints_test/
 .ck/
-.hud_eval_config
+.hud_eval_config
+.hud_eval.toml

{hud_python-0.4.66 → hud_python-0.4.68}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.66
+Version: 0.4.68
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -35,7 +35,7 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Requires-Python: <3.13,>=3.11
-Requires-Dist: anthropic
+Requires-Dist: anthropic>=0.75
 Requires-Dist: blessed>=1.20.0
 Requires-Dist: datasets>=2.14.0
 Requires-Dist: google-genai
@@ -45,7 +45,7 @@ Requires-Dist: hud-mcp-python-sdk>=3.13.2
 Requires-Dist: hud-mcp-use-python-sdk==2.3.20
 Requires-Dist: langchain==0.3.27
 Requires-Dist: numpy>=1.24.0
-Requires-Dist: openai
+Requires-Dist: openai>=2.8.1
 Requires-Dist: opentelemetry-api>=1.34.1
 Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
 Requires-Dist: opentelemetry-instrumentation-mcp==0.47.0
@@ -64,74 +64,50 @@ Requires-Dist: typer>=0.9.0
 Requires-Dist: watchfiles>=0.21.0
 Requires-Dist: wrapt>=1.14.0
 Provides-Extra: agent
-Requires-Dist: aiodocker>=0.24.0; extra == 'agent'
 Requires-Dist: dotenv>=0.9.9; extra == 'agent'
-Requires-Dist: inspect-ai>=0.3.80; extra == 'agent'
 Requires-Dist: ipykernel; extra == 'agent'
 Requires-Dist: ipython<9; extra == 'agent'
 Requires-Dist: jupyter-client; extra == 'agent'
 Requires-Dist: jupyter-core; extra == 'agent'
-Requires-Dist: langchain; extra == 'agent'
-Requires-Dist: langchain-anthropic; extra == 'agent'
-Requires-Dist: langchain-openai; extra == 'agent'
-Requires-Dist: litellm>=1.55.0; extra == 'agent'
 Requires-Dist: pillow>=11.1.0; extra == 'agent'
 Requires-Dist: playwright; extra == 'agent'
 Requires-Dist: pyautogui>=0.9.54; extra == 'agent'
-Requires-Dist: pyright==1.1.401; extra == 'agent'
+Requires-Dist: pyright==1.1.407; extra == 'agent'
 Requires-Dist: pytest-asyncio; extra == 'agent'
 Requires-Dist: pytest-cov; extra == 'agent'
 Requires-Dist: pytest-mock; extra == 'agent'
 Requires-Dist: pytest<9,>=8.1.1; extra == 'agent'
 Requires-Dist: ruff>=0.11.8; extra == 'agent'
-Requires-Dist: setuptools; extra == 'agent'
-Requires-Dist: textdistance<5,>=4.5.0; extra == 'agent'
 Provides-Extra: agents
-Requires-Dist: aiodocker>=0.24.0; extra == 'agents'
 Requires-Dist: dotenv>=0.9.9; extra == 'agents'
-Requires-Dist: inspect-ai>=0.3.80; extra == 'agents'
 Requires-Dist: ipykernel; extra == 'agents'
 Requires-Dist: ipython<9; extra == 'agents'
 Requires-Dist: jupyter-client; extra == 'agents'
 Requires-Dist: jupyter-core; extra == 'agents'
-Requires-Dist: langchain; extra == 'agents'
-Requires-Dist: langchain-anthropic; extra == 'agents'
-Requires-Dist: langchain-openai; extra == 'agents'
-Requires-Dist: litellm>=1.55.0; extra == 'agents'
 Requires-Dist: pillow>=11.1.0; extra == 'agents'
 Requires-Dist: playwright; extra == 'agents'
 Requires-Dist: pyautogui>=0.9.54; extra == 'agents'
-Requires-Dist: pyright==1.1.401; extra == 'agents'
+Requires-Dist: pyright==1.1.407; extra == 'agents'
 Requires-Dist: pytest-asyncio; extra == 'agents'
 Requires-Dist: pytest-cov; extra == 'agents'
 Requires-Dist: pytest-mock; extra == 'agents'
 Requires-Dist: pytest<9,>=8.1.1; extra == 'agents'
 Requires-Dist: ruff>=0.11.8; extra == 'agents'
-Requires-Dist: setuptools; extra == 'agents'
-Requires-Dist: textdistance<5,>=4.5.0; extra == 'agents'
 Provides-Extra: dev
-Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
 Requires-Dist: dotenv>=0.9.9; extra == 'dev'
-Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
 Requires-Dist: ipykernel; extra == 'dev'
 Requires-Dist: ipython<9; extra == 'dev'
 Requires-Dist: jupyter-client; extra == 'dev'
 Requires-Dist: jupyter-core; extra == 'dev'
-Requires-Dist: langchain; extra == 'dev'
-Requires-Dist: langchain-anthropic; extra == 'dev'
-Requires-Dist: langchain-openai; extra == 'dev'
-Requires-Dist: litellm>=1.55.0; extra == 'dev'
 Requires-Dist: pillow>=11.1.0; extra == 'dev'
 Requires-Dist: playwright; extra == 'dev'
 Requires-Dist: pyautogui>=0.9.54; extra == 'dev'
-Requires-Dist: pyright==1.1.401; extra == 'dev'
+Requires-Dist: pyright==1.1.407; extra == 'dev'
 Requires-Dist: pytest-asyncio; extra == 'dev'
 Requires-Dist: pytest-cov; extra == 'dev'
 Requires-Dist: pytest-mock; extra == 'dev'
 Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
 Requires-Dist: ruff>=0.11.8; extra == 'dev'
-Requires-Dist: setuptools; extra == 'dev'
-Requires-Dist: textdistance<5,>=4.5.0; extra == 'dev'
 Provides-Extra: rl
 Requires-Dist: bitsandbytes>=0.41.0; (sys_platform == 'linux') and extra == 'rl'
 Requires-Dist: liger-kernel>=0.5.0; (sys_platform == 'linux') and extra == 'rl'
@@ -151,15 +127,15 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
 [![PyPI version](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
 [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
-[![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLnNvL21jcCJ9)
+[![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
 [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
 [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
 [![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
-### Are you a startup building agents?
+### Are you an enterprise building agents?
-[📅 Hop on a call](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.ai](mailto:founders@hud.ai)
+[📅 Hop on a call](https://cal.com/jay-hud) or [📧 founders@hud.ai](mailto:founders@hud.ai)
 ## Highlights
@@ -179,7 +155,7 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
 pip install hud-python
 # CLI - RL pipeline, environment design
-uv tool install hud-python
+uv tool install hud-python@latest
 # uv tool update-shell
 ```
@@ -439,7 +415,7 @@ Train with the new interactive `hud rl` flow:
 ```bash
 # Install CLI
-uv tool install hud-python
+uv tool install hud-python@latest
 # Option A: Run directly from a HuggingFace dataset
 hud rl hud-evals/2048-basic

{hud_python-0.4.66 → hud_python-0.4.68}/README.md RENAMED Viewed

@@ -10,15 +10,15 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
 [![PyPI version](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
 [![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
-[![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLnNvL21jcCJ9)
+[![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
 [![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
 [![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
 [![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
-### Are you a startup building agents?
+### Are you an enterprise building agents?
-[📅 Hop on a call](https://cal.com/jay-ram-z6st6w/demo) or [📧 founders@hud.ai](mailto:founders@hud.ai)
+[📅 Hop on a call](https://cal.com/jay-hud) or [📧 founders@hud.ai](mailto:founders@hud.ai)
 ## Highlights
@@ -38,7 +38,7 @@ OSS RL environment + evals toolkit. Wrap software as environments, run benchmark
 pip install hud-python
 # CLI - RL pipeline, environment design
-uv tool install hud-python
+uv tool install hud-python@latest
 # uv tool update-shell
 ```
@@ -298,7 +298,7 @@ Train with the new interactive `hud rl` flow:
 ```bash
 # Install CLI
-uv tool install hud-python
+uv tool install hud-python@latest
 # Option A: Run directly from a HuggingFace dataset
 hud rl hud-evals/2048-basic

{hud_python-0.4.66 → hud_python-0.4.68}/environments/README.md RENAMED Viewed

@@ -60,7 +60,7 @@ The HUD SDK includes a powerful CLI for debugging and analyzing MCP environments
 ```bash
 # Install HUD CLI globally with uv (recommended)
-uv tool install hud-python
+uv tool install hud-python@latest
 # Or use without installing
 uvx --from hud-python hud --help

{hud_python-0.4.66 → hud_python-0.4.68}/environments/browser/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "hud-browser-controller"
 version = "0.1.0"
 description = "HUD Browser Controller - MCP interface for browser environments"
 requires-python = ">=3.11,<3.14"
-dependencies = [ "pydantic>=2.6,<3", "pydantic-settings>=2.2,<3", "hud-python@git+https://github.com/hud-evals/hud-python@env-cli-improvements", "playwright", "pyautogui", "httpx", "typer", "fastapi>=0.104.1", "uvicorn[standard]>=0.24.0", "python-multipart>=0.0.6",]
+dependencies = [ "pydantic>=2.6,<3", "pydantic-settings>=2.2,<3", "hud-python>=0.4.68", "playwright", "pyautogui", "httpx", "typer", "fastapi>=0.104.1", "uvicorn[standard]>=0.24.0", "python-multipart>=0.0.6",]
 [build-system]
 requires = [ "hatchling",]

{hud_python-0.4.66 → hud_python-0.4.68}/environments/online_mind2web/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ name = "hud-om2w"
 version = "0.1.0"
 description = "HUD Remote Browser Controller with MCP tools for cloud browser providers"
 requires-python = ">=3.11,<3.13"
-dependencies = [ "hud-python==0.4.61", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
+dependencies = [ "hud-python>=0.4.68", "anthropic>=0.74.0", "pyautogui", "playwright", "httpx", "typer", "google-api-python-client", "google-auth",]
 [build-system]
 requires = [ "hatchling",]

{hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/__init__.py RENAMED Viewed

@@ -3,13 +3,15 @@ from __future__ import annotations
 from .base import MCPAgent
 from .claude import ClaudeAgent
 from .gemini import GeminiAgent
-from .openai import OperatorAgent
-from .openai_chat_generic import GenericOpenAIChatAgent
+from .openai import OpenAIAgent
+from .openai_chat import OpenAIChatAgent
+from .operator import OperatorAgent
 __all__ = [
     "ClaudeAgent",
     "GeminiAgent",
-    "GenericOpenAIChatAgent",
     "MCPAgent",
+    "OpenAIAgent",
+    "OpenAIChatAgent",
     "OperatorAgent",
 ]

{hud_python-0.4.66 → hud_python-0.4.68}/hud/agents/base.py RENAMED Viewed

@@ -10,22 +10,32 @@ from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Any, ClassVar, Literal
 import mcp.types as types
+from pydantic import BaseModel, ConfigDict
 from hud.agents.utils import log_agent_metadata_to_status, log_task_config_to_current_trace
-from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
+from hud.clients.base import AgentMCPClient
+from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
 from hud.utils.hud_console import HUDConsole
 from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
 if TYPE_CHECKING:
-    from hud.clients.base import AgentMCPClient
     from hud.datasets import Task
-    from .misc import ResponseAgent
 logger = logging.getLogger(__name__)
+class BaseCreateParams(BaseModel):
+    """Runtime parameters for agent creation."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    mcp_client: AgentMCPClient | None = None
+    auto_trace: bool = True
+    auto_respond: bool = False
+    verbose: bool = False
 class MCPAgent(ABC):
     """
     Base class for MCP-enabled agents.
@@ -45,80 +55,67 @@ class MCPAgent(ABC):
     `format_blocks`, and `format_tool_results`.
     """
-    metadata: dict[str, Any] | None = None
+    metadata: ClassVar[dict[str, Any] | None] = None
     required_tools: ClassVar[list[str]] = []  # Tools that must be available
+    config_cls: ClassVar[type[BaseAgentConfig]] = BaseAgentConfig
-    def __init__(
-        self,
-        mcp_client: AgentMCPClient | None = None,
-        # Filtering
-        allowed_tools: list[str] | None = None,
-        disallowed_tools: list[str] | None = None,
-        response_tool_name: str | None = None,
-        # Messages
-        system_prompt: str | None = None,
-        append_setup_output: bool = True,
-        initial_screenshot: bool = True,
-        # Misc
-        model_name: str = "mcp-agent",
-        checkpoint_name: str | None = None,
-        response_agent: ResponseAgent | None = None,
-        auto_trace: bool = True,
-        verbose: bool = False,
-    ) -> None:
-        """
-        Initialize the base MCP agent.
+    def __init__(self, params: BaseCreateParams | None = None, **kwargs: Any) -> None:
+        if params is None:
+            import warnings
-        Args:
-            mcp_client: Client for connecting to MCP servers. If None, a client
-                is auto-created at runtime when `run()` is called with a `Task`
-                that provides `mcp_config`.
-            allowed_tools: Names of tools to allow (None means allow all).
-            disallowed_tools: Names of tools to always exclude.
-            response_tool_name: Name of the tool to use for response.
-            system_prompt: System prompt to seed the conversation.
-            append_setup_output: Whether to append setup tool output to the
-                first turn's messages.
-            initial_screenshot: Whether to include an initial screenshot before
-                the first prompt (when supported by the environment).
-            model_name: Label used in telemetry/logging to identify the model.
-            response_agent: Optional automation that can respond to the model's
-                outputs to keep the loop going (e.g., auto-continue/stop).
-            auto_trace: If True, automatically creates a trace/span for runs.
-            verbose: If True, increases logging verbosity for developer UX.
-        """
+            warnings.warn(
+                f"Passing kwargs to {self.__class__.__name__}() is deprecated. "
+                f"Use {self.__class__.__name__}.create(...) instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            CreateParams = type(
+                f"{self.config_cls.__name__}CreateParams",
+                (BaseCreateParams, self.config_cls),
+                {"__module__": self.config_cls.__module__},
+            )
+            params = CreateParams(**kwargs)
+        config_kwargs = {
+            k: getattr(params, k) for k in self.config_cls.model_fields if hasattr(params, k)
+        }
+        self.config = self.config_cls(**config_kwargs)
-        self.mcp_client = mcp_client
-        self._auto_created_client = False  # Track if we created the client
+        self.mcp_client = params.mcp_client
+        self.model_name: str = getattr(params, "model_name", "MCPAgent")
+        self.checkpoint_name: str = getattr(params, "checkpoint_name", "unknown")
+        self.auto_respond = params.auto_respond
-        self.model_name = model_name
-        self.checkpoint_name = checkpoint_name
         self.console = HUDConsole(logger=logger)
-        # Set verbose mode if requested
-        if verbose:
+        if params.verbose:
             self.console.set_verbose(True)
-        # User filtering
-        self.allowed_tools: list[str] | None = allowed_tools
-        self.disallowed_tools: list[str] | None = disallowed_tools
-        self._available_tools: list[types.Tool] | None = None
-        # Messages
-        self.system_prompt = system_prompt
-        self.append_setup_output = append_setup_output
-        self.initial_screenshot = initial_screenshot
+        self.allowed_tools = self.config.allowed_tools
+        self.disallowed_tools = self.config.disallowed_tools
+        self.system_prompt = self.config.system_prompt
+        self.append_setup_output = self.config.append_setup_output
+        self.initial_screenshot = self.config.initial_screenshot
+        self.response_tool_name = self.config.response_tool_name
-        # Initialize these here so methods can be called before initialize()
-        self._tool_map: dict[str, types.Tool] = {}  # Simplified: just name to tool
-        self.response_tool_name = response_tool_name
+        self._available_tools: list[types.Tool] | None = None
+        self._tool_map: dict[str, types.Tool] = {}
         # Trace
-        self._auto_trace = auto_trace
-        self._auto_trace_cm: Any | None = None  # Store auto-created trace context manager
+        self._auto_trace = params.auto_trace
+        self._auto_trace_cm: Any | None = None
-        # Response agent to automatically interact with the model
-        self.response_agent = response_agent
+    @classmethod
+    def create(cls, **kwargs: Any) -> MCPAgent:
+        """
+        Factory method to create an agent with typed parameters.
+        """
+        CreateParams = type(
+            f"{cls.config_cls.__name__}CreateParams",
+            (BaseCreateParams, cls.config_cls),
+            {"__module__": cls.config_cls.__module__},
+        )
+        return cls(params=CreateParams(**kwargs))
     async def initialize(self, task: str | Task | None = None) -> None:
         """Initialize the agent with task-specific configuration."""
@@ -129,7 +126,6 @@ class MCPAgent(ABC):
             from hud.clients import MCPClient
             self.mcp_client = MCPClient(mcp_config=task.mcp_config)
-            self._auto_created_client = True
             self.console.debug("Auto-created MCPClient from task.mcp_config")
         # Ensure we have a client
@@ -148,41 +144,41 @@ class MCPAgent(ABC):
         try:
             await self.mcp_client.initialize()
         except Exception as e:
+            self.console.error_log(f"Failed to initialize MCP client: {e}")
             self._handle_connection_error(e)
         # If task is provided, apply agent_config and add lifecycle tools
         if isinstance(task, Task) and task.agent_config:
-            if task.agent_config.get("system_prompt"):
+            agent_cfg = task.agent_config
+            if agent_cfg.system_prompt:
                 if self.system_prompt is None:
-                    self.system_prompt = task.agent_config["system_prompt"]
+                    self.system_prompt = agent_cfg.system_prompt
                 else:
-                    self.system_prompt += "\n\n" + task.agent_config["system_prompt"]
-            if "append_setup_output" in task.agent_config:
-                self.append_setup_output = task.agent_config["append_setup_output"]
-            if "initial_screenshot" in task.agent_config:
-                self.initial_screenshot = task.agent_config["initial_screenshot"]
-            if "allowed_tools" in task.agent_config:
+                    self.system_prompt += "\n\n" + agent_cfg.system_prompt
+            if "append_setup_output" in agent_cfg.model_fields_set:
+                self.append_setup_output = agent_cfg.append_setup_output
+            if "initial_screenshot" in agent_cfg.model_fields_set:
+                self.initial_screenshot = agent_cfg.initial_screenshot
+            if agent_cfg.allowed_tools is not None:
                 # If allowed_tools has already been set, we take the intersection of the two
                 # If the list had been empty, we were allowing all tools, so we overwrite this
                 if isinstance(self.allowed_tools, list) and len(self.allowed_tools) > 0:
                     # If task allows "*", keep CLI's allowed_tools unchanged
-                    if "*" not in task.agent_config["allowed_tools"]:
+                    if "*" not in agent_cfg.allowed_tools:
                         self.allowed_tools = [
-                            tool
-                            for tool in self.allowed_tools
-                            if tool in task.agent_config["allowed_tools"]
+                            tool for tool in self.allowed_tools if tool in agent_cfg.allowed_tools
                         ]
                     # else: task allows all tools, so CLI's allowed_tools takes precedence
                 else:  # If allowed_tools is None, we overwrite it
-                    self.allowed_tools = task.agent_config["allowed_tools"]
-            if "disallowed_tools" in task.agent_config:
+                    self.allowed_tools = agent_cfg.allowed_tools
+            if agent_cfg.disallowed_tools is not None:
                 # If disallowed_tools has already been set, we take the union of the two
                 if isinstance(self.disallowed_tools, list):
-                    self.disallowed_tools.extend(task.agent_config["disallowed_tools"])
+                    self.disallowed_tools.extend(agent_cfg.disallowed_tools)
                 else:  # If disallowed_tools is None, we overwrite it
-                    self.disallowed_tools = task.agent_config["disallowed_tools"]
-            if "response_tool_name" in task.agent_config:
-                self.response_tool_name = task.agent_config["response_tool_name"]
+                    self.disallowed_tools = agent_cfg.disallowed_tools
+            if agent_cfg.response_tool_name is not None:
+                self.response_tool_name = agent_cfg.response_tool_name
         all_tools = await self.mcp_client.list_tools()
         self._available_tools = []
@@ -201,6 +197,15 @@ class MCPAgent(ABC):
                 continue
             self._available_tools.append(tool)
+        # Validate required tools are present
+        available_tool_names = {t.name for t in self._available_tools}
+        missing_tools = [tool for tool in self.required_tools if tool not in available_tool_names]
+        if missing_tools:
+            raise ValueError(
+                f"Required tools are missing: {missing_tools}. "
+                f"Available tools: {sorted(available_tool_names)}"
+            )
         self.console.info(
             f"Agent initialized with {len(self.get_available_tools())} tools: {', '.join([t.name for t in self.get_available_tools()])}"  # noqa: E501
         )
@@ -290,6 +295,10 @@ class MCPAgent(ABC):
                 self.console.progress_log(f"Setting up tool phase: {task.setup_tool}")
                 results = await self.call_tools(task.setup_tool)
                 if any(result.isError for result in results):
+                    for result in results:
+                        if result.isError:
+                            self.console.error_log(f"Error in setup tool: {result}")
                     return Trace(
                         reward=0.0,
                         done=True,
@@ -389,6 +398,8 @@ class MCPAgent(ABC):
         final_response = None
         error = None
+        messages: list[Any] = []
         try:
             # Start with system messages
             messages = await self.get_system_messages()
@@ -413,15 +424,16 @@ class MCPAgent(ABC):
                     # Check if we should stop
                     if response.done or not response.tool_calls:
-                        # Optional external ResponseAgent to decide whether to stop
-                        decision = "STOP"
-                        if self.response_agent is not None and response.content:
+                        # Use auto_respond to decide whether to stop
+                        decision: Literal["STOP", "CONTINUE"] = "STOP"
+                        if self.auto_respond and response.content:
                             try:
-                                decision = await self.response_agent.determine_response(
-                                    response.content
-                                )
+                                from hud.agents.misc import ResponseAgent
+                                response_agent = ResponseAgent()
+                                decision = await response_agent.determine_response(response.content)
                             except Exception as e:
-                                self.console.warning_log(f"ResponseAgent failed: {e}")
+                                self.console.warning_log(f"Auto-respond failed: {e}")
                         if decision == "STOP":
                             # Try to submit response through lifecycle tool
                             await self._maybe_submit_response(response, messages)
@@ -436,11 +448,7 @@ class MCPAgent(ABC):
                     # 2. Execute tools
                     tool_calls = response.tool_calls
-                    for tool_call in tool_calls:
-                        self.console.info_log(f"{tool_call}")
                     tool_results = await self.call_tools(tool_calls)
-                    for tool_result in tool_results:
-                        self.console.info_log(f"{tool_result}")
                     # 3. Format tool results and add to messages
                     tool_messages = await self.format_tool_results(tool_calls, tool_results)
@@ -699,8 +707,8 @@ class MCPAgent(ABC):
             finally:
                 self._auto_trace_cm = None
-        # Clean up auto-created client
-        if self._auto_created_client and self.mcp_client:
+        # Always clean up the client
+        if self.mcp_client:
             try:
                 await self.mcp_client.shutdown()
                 self.console.debug("Closed auto-created MCPClient")
@@ -708,7 +716,6 @@ class MCPAgent(ABC):
                 self.console.warning_log(f"Failed to close auto-created client: {e}")
             finally:
                 self.mcp_client = None
-                self._auto_created_client = False
     def _is_connection_error(self, e: Exception) -> bool:
         """Check if an exception is a connection error."""

hud-python 0.4.66__tar.gz → 0.4.68__tar.gz

hud-python 0.4.66tar.gz → 0.4.68tar.gz