PyPI - hud-python - Versions diffs - 0.4.28__py3-none-any.whl → 0.4.29__py3-none-any.whl - Mend

hud-python 0.4.28py3-none-any.whl → 0.4.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (75) hide show

hud/__init__.py +2 -1
hud/agents/base.py +73 -45
hud/agents/claude.py +8 -4
hud/agents/openai_chat_generic.py +65 -40
hud/agents/tests/test_base.py +0 -4
hud/agents/tests/test_openai.py +1 -1
hud/cli/__init__.py +182 -52
hud/cli/dev.py +8 -9
hud/cli/eval.py +317 -119
hud/cli/flows/__init__.py +0 -0
hud/cli/flows/tasks.py +0 -0
hud/cli/get.py +160 -0
hud/cli/rl/__init__.py +563 -71
hud/cli/rl/config.py +94 -0
hud/cli/rl/display.py +133 -0
hud/cli/rl/gpu.py +63 -0
hud/cli/rl/gpu_utils.py +318 -0
hud/cli/rl/presets.py +96 -0
hud/cli/rl/remote_runner.py +348 -0
hud/cli/rl/rl_api.py +150 -0
hud/cli/rl/vllm.py +177 -0
hud/cli/tests/test_analyze_metadata.py +0 -1
hud/cli/utils/tasks.py +26 -0
hud/clients/base.py +21 -23
hud/clients/mcp_use.py +36 -44
hud/clients/tests/test_mcp_use_retry.py +10 -10
hud/datasets/__init__.py +4 -3
hud/datasets/{execution/parallel.py → parallel.py} +1 -1
hud/datasets/{execution/runner.py → runner.py} +1 -1
hud/datasets/utils.py +1 -1
hud/native/tests/test_native_init.py +1 -1
hud/otel/config.py +1 -1
hud/otel/instrumentation.py +35 -0
hud/rl/README.md +31 -0
hud/rl/__init__.py +1 -0
hud/rl/actor.py +174 -0
hud/rl/buffer.py +371 -0
hud/rl/chat_template.jinja +101 -0
hud/rl/config.py +184 -0
hud/rl/distributed.py +95 -0
hud/rl/learner.py +586 -0
hud/rl/tests/__init__.py +1 -0
hud/rl/tests/test_learner.py +171 -0
hud/rl/train.py +354 -0
hud/rl/types.py +101 -0
hud/rl/utils/start_vllm_server.sh +30 -0
hud/rl/utils.py +524 -0
hud/rl/vllm_adapter.py +125 -0
hud/settings.py +6 -0
hud/telemetry/__init__.py +2 -1
hud/telemetry/job.py +46 -3
hud/telemetry/tests/test_trace.py +3 -3
hud/telemetry/trace.py +85 -13
hud/tools/tests/test_computer.py +3 -3
hud/tools/tests/test_computer_actions.py +1 -1
hud/types.py +123 -2
hud/utils/group_eval.py +223 -0
hud/utils/hud_console.py +113 -13
hud/utils/tasks.py +119 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/METADATA +20 -2
{hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/RECORD +66 -46
hud/cli/hf.py +0 -406
hud/cli/rl/README.md +0 -243
hud/cli/rl/init.py +0 -370
hud/cli/rl/pod.py +0 -501
hud/cli/rl/ssh.py +0 -322
hud/cli/rl/train.py +0 -562
hud/cli/rl/utils.py +0 -165
hud/datasets/execution/__init__.py +0 -13
hud/datasets/task.py +0 -116
{hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/WHEEL +0 -0
{hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.28.dist-info → hud_python-0.4.29.dist-info}/licenses/LICENSE +0 -0

hud/__init__.py CHANGED Viewed

@@ -5,9 +5,10 @@ tools for building, evaluating, and training AI agents.
 from __future__ import annotations
-from .telemetry import clear_trace, create_job, get_trace, instrument, job, trace
+from .telemetry import Trace, clear_trace, create_job, get_trace, instrument, job, trace
 __all__ = [
+    "Trace",
     "clear_trace",
     "create_job",
     "get_trace",

hud/agents/base.py CHANGED Viewed

@@ -45,7 +45,7 @@ class MCPAgent(ABC):
     `format_blocks`, and `format_tool_results`.
     """
-    metadata: dict[str, Any]
+    metadata: dict[str, Any] | None = None
     required_tools: ClassVar[list[str]] = []  # Tools that must be available
     def __init__(
@@ -54,7 +54,6 @@ class MCPAgent(ABC):
         # Filtering
         allowed_tools: list[str] | None = None,
         disallowed_tools: list[str] | None = None,
-        lifecycle_tools: list[str] | None = None,
         # Messages
         system_prompt: str = GLOBAL_SYSTEM_PROMPT,
         append_setup_output: bool = True,
@@ -74,8 +73,6 @@ class MCPAgent(ABC):
                 that provides `mcp_config`.
             allowed_tools: Names of tools to allow (None means allow all).
             disallowed_tools: Names of tools to always exclude.
-            lifecycle_tools: Tools reserved for lifecycle phases (e.g., setup,
-                evaluate). These are hidden from normal tool calling.
             system_prompt: System prompt to seed the conversation.
             append_setup_output: Whether to append setup tool output to the
                 first turn's messages.
@@ -98,10 +95,13 @@ class MCPAgent(ABC):
         if verbose:
             self.console.set_verbose(True)
-        # Filtering
+        # User filtering
         self.allowed_tools = allowed_tools
         self.disallowed_tools = disallowed_tools or []
-        self.lifecycle_tools = lifecycle_tools or []
+        # Task filtering
+        self.agent_tools = None
+        self.lifecycle_tools = []
         # Messages
         self.system_prompt = system_prompt
@@ -112,7 +112,6 @@ class MCPAgent(ABC):
         self._available_tools: list[types.Tool] = []
         self._tool_map: dict[str, types.Tool] = {}  # Simplified: just name to tool
         self.response_tool_name = None
-        self.initialization_complete = False
         # Trace
         self._auto_trace = auto_trace
@@ -131,7 +130,7 @@ class MCPAgent(ABC):
             self.mcp_client = MCPClient(mcp_config=task.mcp_config)
             self._auto_created_client = True
-            self.console.info_log("Auto-created MCPClient from task.mcp_config")
+            self.console.debug("Auto-created MCPClient from task.mcp_config")
         # Ensure we have a client
         if self.mcp_client is None:
@@ -149,17 +148,21 @@ class MCPAgent(ABC):
         # If task is provided, add lifecycle tools
         if isinstance(task, Task):
+            if task.agent_tools:
+                self.agent_tools = task.agent_tools
             if task.setup_tool:
                 if isinstance(task.setup_tool, list):
                     for tool in task.setup_tool:
-                        self.lifecycle_tools.append(tool.name)
-                else:
+                        if self.agent_tools and tool.name not in self.agent_tools:
+                            self.lifecycle_tools.append(tool.name)
+                elif self.agent_tools and task.setup_tool.name not in self.agent_tools:
                     self.lifecycle_tools.append(task.setup_tool.name)
             if task.evaluate_tool:
                 if isinstance(task.evaluate_tool, list):
                     for tool in task.evaluate_tool:
-                        self.lifecycle_tools.append(tool.name)
-                else:
+                        if self.agent_tools and tool.name not in self.agent_tools:
+                            self.lifecycle_tools.append(tool.name)
+                elif self.agent_tools and task.evaluate_tool.name not in self.agent_tools:
                     self.lifecycle_tools.append(task.evaluate_tool.name)
             if task.system_prompt:
                 self.system_prompt += "\n\n" + task.system_prompt
@@ -167,11 +170,6 @@ class MCPAgent(ABC):
         # Re-apply filtering with updated lifecycle tools
         await self._filter_tools()
-        num_tools = len(self._available_tools)
-        self.console.success_log(
-            f"Agent initialized with {num_tools} available tools (after filtering)"
-        )
     async def run(self, prompt_or_task: str | Task | dict[str, Any], max_steps: int = 10) -> Trace:
         """
         Run the agent with the given prompt or task.
@@ -188,12 +186,12 @@ class MCPAgent(ABC):
         if isinstance(prompt_or_task, dict):
             prompt_or_task = Task(**prompt_or_task)
+        elif not isinstance(prompt_or_task, str) and not isinstance(prompt_or_task, Task):
+            raise TypeError(f"prompt_or_task must be str or Task, got {type(prompt_or_task)}")
         try:
             # Establish the connection with the MCP server/Environment
-            if not self.initialization_complete:
-                await self.initialize(prompt_or_task)
-                self.initialization_complete = True
+            await self.initialize(prompt_or_task)
             # Handle Task objects with full lifecycle
             if isinstance(prompt_or_task, Task):
@@ -204,8 +202,6 @@ class MCPAgent(ABC):
                 context = text_to_blocks(prompt_or_task)
                 return await self._run_context(context, max_steps=max_steps)
-            else:
-                raise TypeError(f"prompt_or_task must be str or Task, got {type(prompt_or_task)}")
         except Exception as e:
             # Always return a Trace object for any exception
             if self._is_connection_error(e):
@@ -240,8 +236,6 @@ class MCPAgent(ABC):
         Returns:
             Trace with reward from evaluation
         """
-        prompt_result = None
         try:
             # Setup phase
             start_context: list[types.ContentBlock] = []
@@ -255,7 +249,13 @@ class MCPAgent(ABC):
                 self.console.progress_log(f"Setting up tool phase: {task.setup_tool}")
                 results = await self.call_tools(task.setup_tool)
                 if any(result.isError for result in results):
-                    raise RuntimeError(f"{results}")
+                    return Trace(
+                        reward=0.0,
+                        done=True,
+                        content=f"Setup tool failed: {results}",
+                        isError=True,
+                        task=task,
+                    )
                 if self.append_setup_output and isinstance(results[0].content, list):
                     start_context.extend(results[0].content)
@@ -268,13 +268,12 @@ class MCPAgent(ABC):
         except Exception as e:
             self.console.error_log(f"Task execution failed: {e}")
             # Create an error result but don't return yet - we still want to evaluate
-            prompt_result = Trace(reward=0.0, done=True, content=str(e), isError=True)
+            prompt_result = Trace(reward=0.0, done=True, content=str(e), isError=True, task=task)
             prompt_result.populate_from_context()
         # Always evaluate if we have evaluate tool, regardless of errors
         if task.evaluate_tool is not None:
             try:
-                self.console.progress_log(f"Evaluating tool phase: {task.evaluate_tool}")
                 results = await self.call_tools(task.evaluate_tool)
                 if any(result.isError for result in results):
@@ -286,18 +285,24 @@ class MCPAgent(ABC):
                             done=True,
                             content="Task failed before evaluation",
                             isError=True,
+                            task=task,
                         )
                     prompt_result.reward = 0.0  # Default to 0 on error
                 else:
                     # Extract reward and content from evaluation
                     if results:
                         reward = find_reward(results[0])
+                        self.console.info_log(f"Eval: {reward:.4f} {task.evaluate_tool}")
                         eval_content = find_content(results[0])
                         # Update the prompt result with evaluation reward
                         if prompt_result is None:
                             prompt_result = Trace(
-                                reward=reward, done=True, content=eval_content or "", isError=False
+                                reward=reward,
+                                done=True,
+                                content=eval_content or "",
+                                isError=False,
+                                task=task,
                             )
                         else:
                             prompt_result.reward = reward
@@ -316,14 +321,16 @@ class MCPAgent(ABC):
                 # Ensure we have a result even if evaluation failed
                 if prompt_result is None:
                     prompt_result = Trace(
-                        reward=0.0, done=True, content=f"Evaluation failed: {e}", isError=True
+                        reward=0.0,
+                        done=True,
+                        content=f"Evaluation failed: {e}",
+                        isError=True,
+                        task=task,
                     )
-        return (
-            prompt_result
-            if prompt_result
-            else Trace(reward=0.0, done=True, content="No result available", isError=True)
-        )
+        prompt_result.task = task
+        return prompt_result
     async def _run_context(
         self, context: list[types.ContentBlock], *, max_steps: int = 10
@@ -388,7 +395,11 @@ class MCPAgent(ABC):
                     # 2. Execute tools
                     tool_calls = response.tool_calls
+                    for tool_call in tool_calls:
+                        self.console.info_log(f"{tool_call}")
                     tool_results = await self.call_tools(tool_calls)
+                    for tool_result in tool_results:
+                        self.console.info_log(f"{tool_result}")
                     # 3. Format tool results and add to messages
                     tool_messages = await self.format_tool_results(tool_calls, tool_results)
@@ -422,13 +433,23 @@ class MCPAgent(ABC):
             error = str(e)
         # Build result
-        trace_result = Trace(
-            reward=0.0,  # Default - will be set by task evaluation if applicable
-            done=True,
-            content=final_response.content if final_response else None,
-            isError=error is not None,
-            info={"error": error} if error else {},
-        )
+        if error is not None or (
+            final_response and hasattr(final_response, "isError") and final_response.isError
+        ):
+            is_error = True
+        else:
+            is_error = False
+        # Ensure all parameters are the correct type
+        trace_params = {
+            "reward": 0.0,
+            "done": True,
+            "messages": messages,
+            "content": final_response.content if final_response else error,
+            "isError": is_error,
+            "info": {"error": error} if error else {},
+        }
+        trace_result = Trace(**trace_params)
         # Populate trace steps from current context
         trace_result.populate_from_context()
@@ -474,16 +495,14 @@ class MCPAgent(ABC):
         return results
     @abstractmethod
-    async def get_system_messages(self) -> list[Any]:
+    async def get_system_messages(self) -> list[types.ContentBlock]:
         """
         Get the system prompt.
         """
         raise NotImplementedError
     @abstractmethod
-    async def get_response(
-        self, messages: list[Any]
-    ) -> AgentResponse:  # maybe type messages as list[types.ContentBlock]
+    async def get_response(self, messages: list[Any]) -> AgentResponse:
         """
         Get response from the model including any tool calls.
@@ -607,6 +626,7 @@ class MCPAgent(ABC):
         self.console.debug(f"All tools: {[t.name for t in all_tools]}")
         self.console.debug(f"Allowed tools: {self.allowed_tools}")
+        self.console.debug(f"Agent tools: {self.agent_tools}")
         self.console.debug(f"Disallowed tools: {self.disallowed_tools}")
         self.console.debug(f"Lifecycle tools: {self.lifecycle_tools}")
@@ -619,6 +639,9 @@ class MCPAgent(ABC):
                 if self.allowed_tools and tool.name not in self.allowed_tools:
                     self.console.debug(f"Skipping tool '{tool.name}' - not in allowed_tools")
                     continue
+                if self.agent_tools and tool.name not in self.agent_tools:
+                    self.console.debug(f"Skipping tool '{tool.name}' - not in agent_tools")
+                    continue
                 if tool.name in self.disallowed_tools:
                     self.console.debug(f"Skipping tool '{tool.name}' - in disallowed_tools")
                     continue
@@ -641,6 +664,11 @@ class MCPAgent(ABC):
                     f"Available tools: {list(available_tool_names)}"
                 )
+        available_tools = self.get_available_tools()
+        self.console.info(
+            f"Agent initialized with {len(available_tools)} tools: {', '.join([t.name for t in available_tools])}"  # noqa: E501
+        )
     async def _maybe_submit_response(self, response: AgentResponse, messages: list[Any]) -> None:
         """Submit response through lifecycle tool if available.

hud/agents/claude.py CHANGED Viewed

@@ -28,6 +28,7 @@ import mcp.types as types
 from hud.settings import settings
 from hud.tools.computer.settings import computer_settings
 from hud.types import AgentResponse, MCPToolCall, MCPToolResult
+from hud.utils.hud_console import HUDConsole
 from .base import MCPAgent
@@ -78,6 +79,7 @@ class ClaudeAgent(MCPAgent):
         self.model = model
         self.max_tokens = max_tokens
         self.use_computer_beta = use_computer_beta
+        self.hud_console = HUDConsole(logger=logger)
         self.model_name = self.model
@@ -149,7 +151,7 @@ class ClaudeAgent(MCPAgent):
                 )
             else:
                 # For other types, try to cast but log a warning
-                logger.warning("Unknown content block type: %s", type(block))
+                self.hud_console.log(f"Unknown content block type: {type(block)}", level="warning")
                 anthropic_blocks.append(cast("BetaContentBlockParam", block))
         return [
@@ -201,7 +203,7 @@ class ClaudeAgent(MCPAgent):
                     or "request_too_large" in str(e)
                     or e.status_code == 413
                 ):
-                    logger.warning("Prompt too long, truncating message history")
+                    self.hud_console.warning("Prompt too long, truncating message history")
                     # Keep first message and last 20 messages
                     if len(current_messages) > 21:
                         current_messages = [current_messages[0], *current_messages[-20:]]
@@ -266,7 +268,7 @@ class ClaudeAgent(MCPAgent):
             # Extract Claude-specific metadata from extra fields
             tool_use_id = tool_call.id
             if not tool_use_id:
-                logger.warning("No tool_use_id found for %s", tool_call.name)
+                self.hud_console.warning(f"No tool_use_id found for {tool_call.name}")
                 continue
             # Convert MCP tool results to Claude format
@@ -335,7 +337,9 @@ class ClaudeAgent(MCPAgent):
             # Map Claude's "computer" back to the actual MCP tool name
             self._claude_to_mcp_tool_map["computer"] = selected_computer_tool.name
             claude_tools.append(claude_tool)
-            logger.debug("Using %s as computer tool for Claude", selected_computer_tool.name)
+            self.hud_console.debug(
+                f"Using {selected_computer_tool.name} as computer tool for Claude"
+            )
         # Add other non-computer tools
         for tool in self._available_tools:

hud/agents/openai_chat_generic.py CHANGED Viewed

@@ -23,6 +23,7 @@ import mcp.types as types
 from hud import instrument
 from hud.types import AgentResponse, MCPToolCall, MCPToolResult
+from hud.utils.hud_console import HUDConsole
 from .base import MCPAgent
@@ -43,7 +44,6 @@ class GenericOpenAIChatAgent(MCPAgent):
         *,
         openai_client: AsyncOpenAI,
         model_name: str = "gpt-4o-mini",
-        parallel_tool_calls: bool = False,
         completion_kwargs: dict[str, Any] | None = None,
         **agent_kwargs: Any,
     ) -> None:
@@ -51,17 +51,22 @@ class GenericOpenAIChatAgent(MCPAgent):
         super().__init__(**agent_kwargs)
         self.oai = openai_client
         self.model_name = model_name
-        self.parallel_tool_calls = parallel_tool_calls
         self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
-        self.conversation_history = []
+        self.mcp_schemas = []
+        self.hud_console = HUDConsole(logger=logger)
     @staticmethod
     def _oai_to_mcp(tool_call: Any) -> MCPToolCall:  # type: ignore[valid-type]
         """Convert an OpenAI ``tool_call`` to :class:`MCPToolCall`."""
+        args = json.loads(tool_call.function.arguments or "{}")
+        if isinstance(args, list):
+            args = args[0]
+        if not isinstance(args, dict):
+            args = {}
         return MCPToolCall(
             id=tool_call.id,
             name=tool_call.function.name,
-            arguments=json.loads(tool_call.function.arguments or "{}"),
+            arguments=args,
         )
     async def get_system_messages(self) -> list[Any]:
@@ -177,45 +182,65 @@ class GenericOpenAIChatAgent(MCPAgent):
         # Convert MCP tool schemas to OpenAI format
         mcp_schemas = self.get_tool_schemas()
-        protected_keys = {"model", "messages", "tools", "parallel_tool_calls"}
+        protected_keys = {"model", "messages", "tools"}
         extra = {k: v for k, v in (self.completion_kwargs or {}).items() if k not in protected_keys}
-        response = await self.oai.chat.completions.create(
-            model=self.model_name,
-            messages=messages,
-            tools=cast("list[ChatCompletionToolParam]", mcp_schemas),
-            parallel_tool_calls=self.parallel_tool_calls,
-            **extra,
-        )
+        try:
+            response = await self.oai.chat.completions.create(
+                model=self.model_name,
+                messages=messages,
+                tools=cast("list[ChatCompletionToolParam]", mcp_schemas),
+                **extra,
+            )
+        except Exception as e:
+            error_content = f"Error getting response {e}"
+            if "Invalid JSON" in str(e):
+                error_content = "Invalid JSON, response was truncated"
+            self.hud_console.warning_log(error_content)
+            return AgentResponse(
+                content=error_content,
+                tool_calls=[],
+                done=True,
+                isError=True,
+                raw=None,
+            )
         choice = response.choices[0]
         msg = choice.message
         assistant_msg: dict[str, Any] = {"role": "assistant"}
         if msg.content:
             assistant_msg["content"] = msg.content
         if msg.tool_calls:
-            assistant_msg["tool_calls"] = msg.tool_calls
+            serialized_tool_calls = []
+            for tc in msg.tool_calls:
+                serialized_tc = {
+                    "id": tc.id,
+                    "type": "function",
+                    "function": {"name": tc.function.name, "arguments": tc.function.arguments},
+                }
+                serialized_tool_calls.append(serialized_tc)
+            assistant_msg["tool_calls"] = serialized_tool_calls
         messages.append(assistant_msg)
-        # Store the complete conversation history
-        self.conversation_history = messages.copy()
         tool_calls = []
         if msg.tool_calls:
             for tc in msg.tool_calls:
                 if tc.function.name is not None:  # type: ignore
-                    tool_calls.append(self._oai_to_mcp(tc))
-                    if not self.parallel_tool_calls:
-                        break
+                    tool_calls.extend(self._oai_to_mcp(tc))
+        # Only stop on length (token limit), never on "stop"
+        done = choice.finish_reason == "length"
+        if done:
+            self.hud_console.info_log(f"Done decision: finish_reason={choice.finish_reason}")
         return AgentResponse(
             content=msg.content or "",
             tool_calls=tool_calls,
-            done=choice.finish_reason in ("stop", "length"),
+            done=done,
             raw=response,  # Include raw response for access to Choice objects
         )
@@ -230,15 +255,15 @@ class GenericOpenAIChatAgent(MCPAgent):
         When images are present, we return both a tool message and a user message.
         """
         rendered: list[dict[str, Any]] = []
+        # Separate text and image content
+        image_parts = []
         for call, res in zip(tool_calls, tool_results, strict=False):
             # Use structuredContent.result if available, otherwise use content
-            items = res.content
-            if res.structuredContent and isinstance(res.structuredContent, dict):
-                items = res.structuredContent.get("result", res.content)
-            # Separate text and image content
             text_parts = []
-            image_parts = []
+            items = res.content
+            if not res.content and res.structuredContent:
+                items = [res.structuredContent.get("result", res.content)]
             for item in items:
                 if isinstance(item, dict):
@@ -272,18 +297,18 @@ class GenericOpenAIChatAgent(MCPAgent):
                 }
             )
-            # If there are images, add them as a separate user message
-            if image_parts:
-                # Add a user message with the images
-                content_with_images = [
-                    {"type": "text", "text": "Tool returned the following:"},
-                    *image_parts,
-                ]
-                rendered.append(
-                    {
-                        "role": "user",
-                        "content": content_with_images,
-                    }
-                )
+        # If there are images, add them as a separate user message
+        if image_parts:
+            # Add a user message with the images
+            content_with_images = [
+                {"type": "text", "text": "Tool returned the following:"},
+                image_parts[-1],
+            ]
+            rendered.append(
+                {
+                    "role": "user",
+                    "content": content_with_images,
+                }
+            )
         return rendered

hud/agents/tests/test_base.py CHANGED Viewed

@@ -97,7 +97,6 @@ class TestBaseMCPAgent:
         assert agent.disallowed_tools == []
         assert agent.initial_screenshot is True
         assert agent.system_prompt is not None  # Default system prompt is set
-        assert agent.lifecycle_tools == []
     def test_init_with_params(self):
         """Test initialization with custom parameters."""
@@ -108,7 +107,6 @@ class TestBaseMCPAgent:
             disallowed_tools=["bad_tool"],
             initial_screenshot=True,
             system_prompt="Custom prompt",
-            lifecycle_tools=["custom_setup", "custom_eval"],
         )
         assert agent.mcp_client == client
@@ -116,7 +114,6 @@ class TestBaseMCPAgent:
         assert agent.disallowed_tools == ["bad_tool"]
         assert agent.initial_screenshot is True
         assert agent.system_prompt == "Custom prompt"
-        assert agent.lifecycle_tools == ["custom_setup", "custom_eval"]
     @pytest.mark.asyncio
     async def test_init_no_client_no_task(self):
@@ -631,7 +628,6 @@ class TestMCPAgentExtended:
         # Lifecycle tools are specified by name, not as objects
         agent = MockAgentExtended(
             mcp_client=mock_client,
-            lifecycle_tools=["screenshot"],  # Use tool name
             responses=[{"role": "assistant", "content": "Done", "tool_calls": []}],
         )

hud/agents/tests/test_openai.py CHANGED Viewed

@@ -156,7 +156,7 @@ class TestOperatorAgent:
         messages = [{"prompt": "What's on the screen?", "screenshot": None}]
         response = await agent.get_response(messages)
-        assert response.content == "I can see the screen content."
+        assert response.content[0].text == "I can see the screen content."
         assert response.done is True
     @pytest.mark.asyncio

hud-python 0.4.28__py3-none-any.whl → 0.4.29__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.28py3-none-any.whl → 0.4.29py3-none-any.whl