PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (282) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +70 -5
hud/agents/base.py +238 -500
hud/agents/claude.py +236 -247
hud/agents/gateway.py +42 -0
hud/agents/gemini.py +264 -0
hud/agents/gemini_cua.py +324 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +48 -36
hud/agents/openai.py +282 -296
hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
hud/agents/operator.py +199 -0
hud/agents/resolver.py +70 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +381 -214
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +377 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_resolver.py +192 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/agents/types.py +148 -0
hud/cli/__init__.py +493 -546
hud/cli/analyze.py +43 -5
hud/cli/build.py +699 -113
hud/cli/debug.py +8 -5
hud/cli/dev.py +889 -732
hud/cli/eval.py +793 -667
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/pull.py +1 -1
hud/cli/push.py +38 -13
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +110 -8
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push.py +1 -1
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +70 -1
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +45 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +326 -0
hud/datasets/runner.py +198 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +52 -0
hud/environment/connection.py +258 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +137 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +835 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +263 -0
hud/environment/scenarios.py +620 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +205 -0
hud/environment/tests/test_environment.py +593 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +242 -0
hud/environment/tests/test_scenarios.py +1086 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +727 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +187 -0
hud/eval/manager.py +533 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +372 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +291 -0
hud/eval/types.py +65 -0
hud/eval/utils.py +194 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +308 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +165 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +18 -2
hud/tools/agent.py +223 -0
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +36 -3
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_agent_tool.py +355 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +194 -56
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +89 -18
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.13.dist-info/METADATA +264 -0
hud_python-0.5.13.dist-info/RECORD +305 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0

hud/agents/openai.py CHANGED Viewed

@@ -2,354 +2,340 @@
 from __future__ import annotations
+import copy
+import json
 import logging
+from inspect import cleandoc
 from typing import Any, ClassVar, Literal
 import mcp.types as types
-from openai import AsyncOpenAI, OpenAI
+from openai import AsyncOpenAI, Omit, OpenAI
 from openai.types.responses import (
-    ResponseComputerToolCall,
+    ApplyPatchToolParam,
+    ComputerToolParam,
+    FunctionShellToolParam,
+    FunctionToolParam,
+    ResponseFunctionCallOutputItemListParam,
+    ResponseInputFileContentParam,
+    ResponseInputImageContentParam,
+    ResponseInputImageParam,
     ResponseInputMessageContentListParam,
     ResponseInputParam,
-    ResponseOutputMessage,
+    ResponseInputTextContentParam,
+    ResponseInputTextParam,
     ResponseOutputText,
     ToolParam,
 )
+from openai.types.responses.response_create_params import ToolChoice  # noqa: TC002
+from openai.types.responses.response_input_param import FunctionCallOutput, Message
+from openai.types.shared_params.reasoning import Reasoning  # noqa: TC002
-import hud
 from hud.settings import settings
-from hud.tools.computer.settings import computer_settings
-from hud.types import AgentResponse, MCPToolCall, MCPToolResult, Trace
+from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult, Trace
+from hud.utils.strict_schema import ensure_strict_json_schema
+from hud.utils.types import with_signature
 from .base import MCPAgent
+from .types import OpenAIConfig, OpenAICreateParams
 logger = logging.getLogger(__name__)
-class OperatorAgent(MCPAgent):
-    """
-    Operator agent that uses MCP servers for tool execution.
+class OpenAIAgent(MCPAgent):
+    """Generic OpenAI agent that can execute MCP tools through the Responses API."""
-    This agent uses OpenAI's Computer Use API format but executes
-    tools through MCP servers instead of direct implementation.
-    """
+    metadata: ClassVar[dict[str, Any] | None] = None
+    config_cls: ClassVar[type[BaseAgentConfig]] = OpenAIConfig
-    metadata: ClassVar[dict[str, Any]] = {
-        "display_width": computer_settings.OPENAI_COMPUTER_WIDTH,
-        "display_height": computer_settings.OPENAI_COMPUTER_HEIGHT,
-    }
-    required_tools: ClassVar[list[str]] = ["openai_computer"]
+    @with_signature(OpenAICreateParams)
+    @classmethod
+    def create(cls, **kwargs: Any) -> OpenAIAgent:  # pyright: ignore[reportIncompatibleMethodOverride]
+        return MCPAgent.create.__func__(cls, **kwargs)  # type: ignore[return-value]
-    def __init__(
-        self,
-        model_client: AsyncOpenAI | None = None,
-        model: str = "computer-use-preview",
-        environment: Literal["windows", "mac", "linux", "browser"] = "linux",
-        validate_api_key: bool = True,
-        **kwargs: Any,
-    ) -> None:
-        """
-        Initialize Operator MCP agent.
-        Args:
-            client: AsyncOpenAI client (created if not provided)
-            model: OpenAI model to use
-            environment: Environment type for computer use
-            display_width: Display width for computer use
-            display_height: Display height for computer use
-            **kwargs: Additional arguments passed to MCPAgent
-        """
-        super().__init__(**kwargs)
+    def __init__(self, params: OpenAICreateParams | None = None, **kwargs: Any) -> None:
+        super().__init__(params, **kwargs)
+        self.config: OpenAIConfig
-        # Initialize client if not provided
+        model_client = self.config.model_client
         if model_client is None:
-            api_key = settings.openai_api_key
-            if not api_key:
-                raise ValueError("OpenAI API key not found. Set OPENAI_API_KEY.")
-            model_client = AsyncOpenAI(api_key=api_key)
+            # Default to HUD gateway when HUD_API_KEY is available
+            if settings.api_key:
+                from hud.agents.gateway import build_gateway_client
-        self.openai_client = model_client
-        self.model = model
-        self.environment = environment
+                model_client = build_gateway_client("openai")
+            elif settings.openai_api_key:
+                model_client = AsyncOpenAI(api_key=settings.openai_api_key)
+            else:
+                raise ValueError(
+                    "No API key found. Set HUD_API_KEY for HUD gateway, "
+                    "or OPENAI_API_KEY for direct OpenAI access."
+                )
+        if self.config.validate_api_key:
+            try:
+                OpenAI(api_key=model_client.api_key).models.list()
+            except Exception as exc:  # pragma: no cover - network validation
+                raise ValueError(f"OpenAI API key is invalid: {exc}") from exc
+        self.openai_client: AsyncOpenAI = model_client
+        self._model = self.config.model
+        self.max_output_tokens = self.config.max_output_tokens
+        self.temperature = self.config.temperature
+        self.reasoning: Reasoning | None = self.config.reasoning
+        self.tool_choice: ToolChoice | None = self.config.tool_choice
+        self.parallel_tool_calls = self.config.parallel_tool_calls
+        self.truncation: Literal["auto", "disabled"] | None = self.config.truncation
+        self._openai_tools: list[ToolParam] = []
+        self._tool_name_map: dict[str, str] = {}
-        # State tracking for OpenAI's stateful API
         self.last_response_id: str | None = None
-        self.pending_call_id: str | None = None
-        self.pending_safety_checks: list[Any] = []
+        self._message_cursor = 0
-        # validate api key if requested
-        if validate_api_key:
-            try:
-                OpenAI(api_key=self.openai_client.api_key).models.list()
-            except Exception as e:
-                raise ValueError(f"OpenAI API key is invalid: {e}") from e
-        self.model_name = "openai-" + self.model
-        # Append OpenAI-specific instructions to the base system prompt
-        openai_instructions = """
-        You are an autonomous computer-using agent. Follow these guidelines:
-        1. NEVER ask for confirmation. Complete all tasks autonomously.
-        2. Do NOT send messages like "I need to confirm before..." or "Do you want me to continue?" - just proceed.
-        3. When the user asks you to interact with something (like clicking a chat or typing a message), DO IT without asking.
-        4. Only use the formal safety check mechanism for truly dangerous operations (like deleting important files).
-        5. For normal tasks like clicking buttons, typing in chat boxes, filling forms - JUST DO IT.
-        6. The user has already given you permission by running this agent. No further confirmation is needed.
-        7. Be decisive and action-oriented. Complete the requested task fully.
-        Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
-        """.strip()  # noqa: E501
-        # Append OpenAI instructions to any base system prompt
-        if self.system_prompt:
-            self.system_prompt = f"{self.system_prompt}\n\n{openai_instructions}"
-        else:
-            self.system_prompt = openai_instructions
-    async def _run_context(self, context: list[types.ContentBlock], max_steps: int = 10) -> Trace:
-        """
-        Run the agent with the given prompt or task.
+    def _on_tools_ready(self) -> None:
+        """Build OpenAI-specific tool mappings after tools are discovered."""
+        self._convert_tools_for_openai()
+    def _to_openai_tool(
+        self,
+        tool: types.Tool,
+    ) -> (
+        FunctionShellToolParam | ApplyPatchToolParam | FunctionToolParam | ComputerToolParam | None
+    ):
+        # Special case: shell tool -> OpenAI native shell
+        if tool.name == "shell":
+            return FunctionShellToolParam(type="shell")
+        # Special case: apply_patch tool -> OpenAI native apply_patch
+        if tool.name == "apply_patch":
+            return ApplyPatchToolParam(type="apply_patch")
+        # Regular function tool
+        if tool.description is None or tool.inputSchema is None:
+            raise ValueError(
+                cleandoc(f"""MCP tool {tool.name} requires both a description and inputSchema.
+                Add these by:
+                1. Adding a docstring to your @mcp.tool decorated function for the description
+                2. Using pydantic Field() annotations on function parameters for the schema
+                """)
+            )
+        # schema must be strict
+        try:
+            strict_schema = ensure_strict_json_schema(copy.deepcopy(tool.inputSchema))
+        except Exception as e:
+            self.console.warning_log(f"Failed to convert tool '{tool.name}' schema to strict: {e}")
+            return None
-        Override to reset OpenAI-specific state.
+        return FunctionToolParam(
+            type="function",
+            name=tool.name,
+            description=tool.description,
+            parameters=strict_schema,
+            strict=True,
+        )
+    def _convert_tools_for_openai(self) -> None:
+        """Convert MCP tools into OpenAI Responses tool definitions."""
+        available_tools = self.get_available_tools()
+        self._openai_tools = []
+        self._tool_name_map = {}
+        for tool in available_tools:
+            openai_tool = self._to_openai_tool(tool)
+            if openai_tool is None:
+                continue
+            if "name" in openai_tool:
+                self._tool_name_map[openai_tool["name"]] = tool.name
+            self._openai_tools.append(openai_tool)
+    def _extract_tool_call(self, item: Any) -> MCPToolCall | None:
+        """Extract an MCPToolCall from a response output item.
+        Subclasses can override to customize tool call extraction (e.g., routing
+        computer_call to a different tool name).
         """
-        # Reset state for new run
-        self.last_response_id = None
-        self.pending_call_id = None
-        self.pending_safety_checks = []
+        if item.type == "function_call":
+            tool_name = item.name or ""
+            target_name = self._tool_name_map.get(tool_name, tool_name)
+            arguments = json.loads(item.arguments)
+            return MCPToolCall(name=target_name, arguments=arguments, id=item.call_id)
+        elif item.type == "shell_call":
+            return MCPToolCall(name="shell", arguments=item.action.to_dict(), id=item.call_id)
+        elif item.type == "apply_patch_call":
+            return MCPToolCall(
+                name="apply_patch", arguments=item.operation.to_dict(), id=item.call_id
+            )
+        return None
-        # Use base implementation
+    async def _run_context(
+        self, context: list[types.ContentBlock], *, max_steps: int = 10
+    ) -> Trace:
+        """Reset internal state before delegating to the base loop."""
+        self._reset_response_state()
         return await super()._run_context(context, max_steps=max_steps)
-    async def get_system_messages(self) -> list[Any]:
-        """
-        Create initial messages for OpenAI.
+    def _reset_response_state(self) -> None:
+        self.last_response_id = None
+        self._message_cursor = 0
-        OpenAI uses a different message format - we'll store the prompt
-        and screenshot for use in get_model_response.
-        """
+    async def get_system_messages(self) -> list[types.ContentBlock]:
+        """System messages are provided via the `instructions` field."""
         return []
-    async def format_blocks(
-        self, blocks: list[types.ContentBlock]
-    ) -> ResponseInputMessageContentListParam:
-        """
-        Format blocks for OpenAI input format.
-        Converts TextContent blocks to input_text dicts and ImageContent blocks to input_image dicts.
-        """  # noqa: E501
-        formatted = []
+    async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Message]:
+        """Convert MCP content blocks into OpenAI user messages."""
+        content: ResponseInputMessageContentListParam = []
         for block in blocks:
             if isinstance(block, types.TextContent):
-                formatted.append({"type": "input_text", "text": block.text})
+                content.append(ResponseInputTextParam(type="input_text", text=block.text))
             elif isinstance(block, types.ImageContent):
                 mime_type = getattr(block, "mimeType", "image/png")
-                formatted.append(
-                    {"type": "input_image", "image_url": f"data:{mime_type};base64,{block.data}"}
+                content.append(
+                    ResponseInputImageParam(
+                        type="input_image",
+                        image_url=f"data:{mime_type};base64,{block.data}",
+                        detail="auto",
+                    )
                 )
-        return formatted
-    @hud.instrument(
-        span_type="agent",
-        record_args=False,  # Messages can be large
-        record_result=True,
-    )
-    async def get_response(self, messages: ResponseInputMessageContentListParam) -> AgentResponse:
-        """Get response from OpenAI including any tool calls."""
-        # OpenAI's API is stateful, so we handle messages differently
-        # Get the computer tool (guaranteed to exist due to required_tools)
-        computer_tool_name = "openai_computer"
-        # Define the computer use tool
-        computer_tool: ToolParam = {  # type: ignore[reportAssignmentType]
-            "type": "computer_use_preview",
-            "display_width": self.metadata["display_width"],
-            "display_height": self.metadata["display_height"],
-            "environment": self.environment,
-        }
-        # Build the request based on whether this is first step or follow-up
-        if self.pending_call_id is None and self.last_response_id is None:
-            # First step - messages are already formatted dicts from format_blocks
-            # format_blocks returns type ResponseInputMessageContentListParam, which is a list of dicts  # noqa: E501
-            input_content: ResponseInputMessageContentListParam = []
-            input_content.extend(messages)
-            # If no content was added, add empty text to avoid empty request
-            if not input_content:
-                input_content.append({"type": "input_text", "text": ""})
-            input_param: ResponseInputParam = [{"role": "user", "content": input_content}]  # type: ignore[reportUnknownMemberType]
-            response = await self.openai_client.responses.create(
-                model=self.model,
-                tools=[computer_tool],
-                input=input_param,
-                instructions=self.system_prompt,
-                truncation="auto",
-                reasoning={"summary": "auto"},  # type: ignore[arg-type]
-            )
-        else:
-            # Follow-up step - check if this is user input or tool result
-            latest_message = messages[-1] if messages else {}
-            if latest_message.get("type") == "input_text":
-                # User provided input in conversation mode
-                user_text = latest_message.get("text", "")
-                input_param_followup: ResponseInputParam = [  # type: ignore[reportAssignmentType]
-                    {"role": "user", "content": [{"type": "input_text", "text": user_text}]}
-                ]
-                # Reset pending_call_id since this is user input, not a tool response
-                self.pending_call_id = None
-            else:
-                # Tool result - need screenshot from processed results
-                latest_screenshot = None
-                for msg in reversed(messages):
-                    if isinstance(msg, dict) and "image_url" in msg:
-                        latest_screenshot = msg["image_url"]  # type: ignore
-                        break
-                if not latest_screenshot:
-                    self.console.warning_log("No screenshot provided for response to action")
-                    return AgentResponse(
-                        content="No screenshot available for next action",
-                        tool_calls=[],
-                        done=True,
+        if not content:
+            content.append(ResponseInputTextParam(type="input_text", text=""))
+        return [Message(role="user", content=content)]
+    async def get_response(self, messages: ResponseInputParam) -> AgentResponse:
+        """Send the latest input items to OpenAI's Responses API."""
+        new_items: ResponseInputParam = messages[self._message_cursor :]
+        if not new_items:
+            if self.last_response_id is None:
+                new_items = [
+                    Message(
+                        role="user", content=[ResponseInputTextParam(type="input_text", text="")]
                     )
-                # Create response to previous action
-                input_param_followup: ResponseInputParam = [  # type: ignore[reportAssignmentType]
-                    {  # type: ignore[reportAssignmentType]
-                        "call_id": self.pending_call_id,
-                        "type": "computer_call_output",
-                        "output": {
-                            "type": "input_image",
-                            "image_url": latest_screenshot,
-                        },
-                        "acknowledged_safety_checks": self.pending_safety_checks,
-                    }
                 ]
+            else:
+                self.console.debug("No new messages to send to OpenAI.")
+                return AgentResponse(content="", tool_calls=[], done=True)
+        response = await self.openai_client.responses.create(
+            model=self._model,
+            input=new_items,
+            instructions=self.system_prompt,
+            max_output_tokens=self.max_output_tokens,
+            temperature=self.temperature,
+            tool_choice=self.tool_choice if self.tool_choice is not None else Omit(),
+            parallel_tool_calls=self.parallel_tool_calls,
+            reasoning=self.reasoning,
+            tools=self._openai_tools if self._openai_tools else Omit(),
+            previous_response_id=(
+                self.last_response_id if self.last_response_id is not None else Omit()
+            ),
+            truncation=self.truncation,
+        )
-            self.pending_safety_checks = []
-            response = await self.openai_client.responses.create(
-                model=self.model,
-                previous_response_id=self.last_response_id,
-                tools=[computer_tool],
-                input=input_param_followup,
-                instructions=self.system_prompt,
-                truncation="auto",
-                reasoning={"summary": "auto"},  # type: ignore[arg-type]
-            )
-        # Store response ID for next call
         self.last_response_id = response.id
+        self._message_cursor = len(messages)
-        # Process response
-        result = AgentResponse(
-            content="",
-            tool_calls=[],
-            done=False,  # Will be set to True only if no tool calls
-        )
+        agent_response = AgentResponse(content="", tool_calls=[], done=True)
+        text_chunks: list[str] = []
+        reasoning_chunks: list[str] = []
-        self.pending_call_id = None
-        # Check for computer calls
-        computer_calls = [
-            item
-            for item in response.output
-            if isinstance(item, ResponseComputerToolCall) and item.type == "computer_call"
-        ]
-        if computer_calls:
-            # Process computer calls
-            result.done = False
-            for computer_call in computer_calls:
-                self.pending_call_id = computer_call.call_id
-                self.pending_safety_checks = computer_call.pending_safety_checks
-                # Convert OpenAI action to MCP tool call
-                action = computer_call.action.model_dump()
-                # Create MCPToolCall object with OpenAI metadata as extra fields
-                # Pyright will complain but the tool class accepts extra fields
-                tool_call = MCPToolCall(
-                    name=computer_tool_name,
-                    arguments=action,
-                    id=computer_call.call_id,  # type: ignore
-                    pending_safety_checks=computer_call.pending_safety_checks,  # type: ignore
-                )
-                result.tool_calls.append(tool_call)
-        else:
-            # No computer calls, check for text response
-            for item in response.output:
-                if isinstance(item, ResponseOutputMessage) and item.type == "message":
-                    # Extract text from content blocks
-                    text_parts = [
-                        content.text
-                        for content in item.content
-                        if isinstance(content, ResponseOutputText)
-                    ]
-                    if text_parts:
-                        result.content = "".join(text_parts)
-                        break
-        # Extract reasoning if present
-        reasoning_text = ""
         for item in response.output:
-            if item.type == "reasoning" and hasattr(item, "summary") and item.summary:
-                reasoning_text += f"Thinking: {item.summary[0].text}\n"
-        if reasoning_text:
-            result.content = reasoning_text + result.content if result.content else reasoning_text
+            if item.type == "message":
+                text = "".join(
+                    content.text
+                    for content in item.content
+                    if isinstance(content, ResponseOutputText)
+                )
+                if text:
+                    text_chunks.append(text)
+            elif item.type == "reasoning":
+                reasoning_chunks.append("".join(summary.text for summary in item.summary))
+            else:
+                tool_call = self._extract_tool_call(item)
+                if tool_call is not None:
+                    agent_response.tool_calls.append(tool_call)
-        # Set done=True if no tool calls (task complete or waiting for user)
-        if not result.tool_calls:
-            result.done = True
+        if agent_response.tool_calls:
+            agent_response.done = False
-        return result
+        agent_response.content = "".join(text_chunks)
+        if reasoning_chunks:
+            agent_response.reasoning = "\n".join(reasoning_chunks)
+        return agent_response
     async def format_tool_results(
         self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
-    ) -> ResponseInputMessageContentListParam:
-        """
-        Format tool results for OpenAI's stateful API.
-        Tool result content is a list of ContentBlock objects.
-        We need to extract the latest screenshot from the tool results.
-        This assumes that you only care about computer tool results for your agent loop.
-        If you need to add other content, you can do so by adding a new ContentBlock object to the list.
+    ) -> list[FunctionCallOutput]:
+        """Convert MCP tool outputs into Responses input items."""
+        formatted: list[FunctionCallOutput] = []
+        for call, result in zip(tool_calls, tool_results, strict=False):
+            if not call.id:
+                self.console.warning_log(f"Tool '{call.name}' missing call_id; skipping output.")
+                continue
+            output_items: ResponseFunctionCallOutputItemListParam = []
+            if result.isError:
+                output_items.append(
+                    ResponseInputTextParam(type="input_text", text="[tool_error] true")
+                )
-        Returns formatted dicts with tool result data, preserving screenshots.
-        """  # noqa: E501
-        formatted_results = []
-        latest_screenshot = None
+            if result.structuredContent is not None:
+                output_items.append(
+                    ResponseInputTextParam(
+                        type="input_text", text=json.dumps(result.structuredContent, default=str)
+                    )
+                )
-        # Extract all content from tool results
-        for result in tool_results:
-            if result.isError:
-                # If it's an error, the error details are in the content
-                for content in result.content:
-                    if isinstance(content, types.TextContent):
-                        # Don't add error text as input_text, just track it
-                        self.console.error_log(f"Tool error: {content.text}")
-                    elif isinstance(content, types.ImageContent):
-                        # Even error results might have images
-                        latest_screenshot = content.data
-            else:
-                # Extract content from successful results
-                for content in result.content:
-                    if isinstance(content, types.ImageContent):
-                        latest_screenshot = content.data
-                        break
-        # Return a dict with the latest screenshot for the follow-up step
-        if latest_screenshot:
-            formatted_results.append(
-                {"type": "input_image", "image_url": f"data:image/png;base64,{latest_screenshot}"}
+            for block in result.content:
+                match block:
+                    case types.TextContent():
+                        output_items.append(
+                            ResponseInputTextContentParam(type="input_text", text=block.text)
+                        )
+                    case types.ImageContent():
+                        mime_type = getattr(block, "mimeType", "image/png")
+                        output_items.append(
+                            ResponseInputImageContentParam(
+                                type="input_image",
+                                image_url=f"data:{mime_type};base64,{block.data}",
+                            )
+                        )
+                    case types.ResourceLink():
+                        output_items.append(
+                            ResponseInputFileContentParam(
+                                type="input_file", file_url=str(block.uri)
+                            )
+                        )
+                    case types.EmbeddedResource():
+                        match block.resource:
+                            case types.TextResourceContents():
+                                output_items.append(
+                                    ResponseInputTextContentParam(
+                                        type="input_text", text=block.resource.text
+                                    )
+                                )
+                            case types.BlobResourceContents():
+                                output_items.append(
+                                    ResponseInputFileContentParam(
+                                        type="input_file", file_data=block.resource.blob
+                                    )
+                                )
+                            case _:
+                                self.console.warning_log(
+                                    f"Unknown resource type: {type(block.resource)}"
+                                )
+                    case _:
+                        self.console.warning_log(f"Unknown content block type: {type(block)}")
+            if not output_items:
+                output_items.append(ResponseInputTextParam(type="input_text", text=""))
+            formatted.append(
+                FunctionCallOutput(
+                    type="function_call_output", call_id=call.id, output=output_items
+                ),
             )
-        return formatted_results
+        return formatted

hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.13py3-none-any.whl