PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (274) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +11 -5
hud/agents/base.py +220 -500
hud/agents/claude.py +200 -240
hud/agents/gemini.py +275 -0
hud/agents/gemini_cua.py +335 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +41 -36
hud/agents/openai.py +291 -292
hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
hud/agents/operator.py +211 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +379 -210
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +376 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/cli/__init__.py +461 -545
hud/cli/analyze.py +43 -5
hud/cli/build.py +664 -110
hud/cli/debug.py +8 -5
hud/cli/dev.py +882 -734
hud/cli/eval.py +782 -668
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/push.py +29 -11
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +108 -6
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +69 -0
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +40 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +327 -0
hud/datasets/runner.py +192 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +50 -0
hud/environment/connection.py +206 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +109 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +694 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +112 -0
hud/environment/scenarios.py +493 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +218 -0
hud/environment/tests/test_environment.py +161 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +201 -0
hud/environment/tests/test_scenarios.py +280 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +674 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +185 -0
hud/eval/manager.py +466 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +340 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +145 -0
hud/eval/types.py +63 -0
hud/eval/utils.py +183 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +151 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +158 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +16 -2
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +4 -0
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +167 -57
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +61 -3
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.1.dist-info/METADATA +264 -0
hud_python-0.5.1.dist-info/RECORD +299 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0

hud/agents/{openai_chat_generic.py → openai_chat.py} RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Generic OpenAI chat-completions agent.
+"""OpenAI Chat Completions Agent.
 This class provides the minimal glue required to connect any endpoint that
 implements the OpenAI compatible *chat.completions* API with MCP tool calling
@@ -6,6 +6,7 @@ through the existing :class:`hud.agent.MCPAgent` scaffolding.
 Key points:
 - Stateless, no special server-side conversation state is assumed.
+- Defaults to HUD inference gateway (inference.hud.ai) when HUD_API_KEY is set
 - Accepts an :class:`openai.AsyncOpenAI` client, caller can supply their own
   base_url / api_key (e.g. llama.cpp, together.ai, …)
 - All HUD features (step_count, OTel spans, tool filtering, screenshots, …)
@@ -20,39 +21,85 @@ import logging
 from typing import TYPE_CHECKING, Any, ClassVar, cast
 import mcp.types as types
+from openai import AsyncOpenAI
+from pydantic import ConfigDict, Field
-from hud import instrument
-from hud.types import AgentResponse, MCPToolCall, MCPToolResult
+from hud.settings import settings
+from hud.types import AgentResponse, BaseAgentConfig, MCPToolCall, MCPToolResult
 from hud.utils.hud_console import HUDConsole
+from hud.utils.types import with_signature
-from .base import MCPAgent
+from .base import BaseCreateParams, MCPAgent
 if TYPE_CHECKING:
-    from openai import AsyncOpenAI
     from openai.types.chat import ChatCompletionToolParam
 logger = logging.getLogger(__name__)
-class GenericOpenAIChatAgent(MCPAgent):
+class OpenAIChatConfig(BaseAgentConfig):
+    """Configuration for `OpenAIChatAgent`."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_name: str = "OpenAI Chat"
+    model: str = "gpt-5-mini"
+    openai_client: AsyncOpenAI | None = None
+    api_key: str | None = None
+    base_url: str | None = None
+    completion_kwargs: dict[str, Any] = Field(default_factory=dict)
+class OpenAIChatCreateParams(BaseCreateParams, OpenAIChatConfig):
+    pass
+class OpenAIChatAgent(MCPAgent):
     """MCP-enabled agent that speaks the OpenAI *chat.completions* protocol."""
-    metadata: ClassVar[dict[str, Any]] = {}
+    metadata: ClassVar[dict[str, Any] | None] = None
+    config_cls: ClassVar[type[BaseAgentConfig]] = OpenAIChatConfig
+    @with_signature(OpenAIChatCreateParams)
+    @classmethod
+    def create(cls, **kwargs: Any) -> OpenAIChatAgent:  # pyright: ignore[reportIncompatibleMethodOverride]
+        return MCPAgent.create.__func__(cls, **kwargs)  # type: ignore[return-value]
+    def __init__(self, params: OpenAIChatCreateParams | None = None, **kwargs: Any) -> None:
+        super().__init__(params, **kwargs)
+        self.config: OpenAIChatConfig
+        if (
+            self.config.api_key
+            and self.config.base_url
+            and settings.hud_gateway_url in self.config.base_url
+            and settings.api_key
+            and self.config.api_key != settings.api_key
+        ):
+            raise ValueError(
+                "OpenAIChatAgent api_key is not allowed with HUD Gateway. "
+                "Use HUD_API_KEY for gateway auth and BYOK headers for provider keys."
+            )
-    def __init__(
-        self,
-        *,
-        openai_client: AsyncOpenAI | None,
-        model_name: str = "gpt-4o-mini",
-        completion_kwargs: dict[str, Any] | None = None,
-        **agent_kwargs: Any,
-    ) -> None:
-        # Accept base-agent settings via **agent_kwargs (e.g., mcp_client, system_prompt, etc.)
-        super().__init__(**agent_kwargs)
-        self.oai = openai_client
-        self.model_name = model_name
-        self.completion_kwargs: dict[str, Any] = completion_kwargs or {}
-        self.mcp_schemas = []
+        if self.config.openai_client is not None:
+            self.oai = self.config.openai_client
+        elif self.config.api_key is not None or self.config.base_url is not None:
+            self.oai = AsyncOpenAI(api_key=self.config.api_key, base_url=self.config.base_url)
+        elif settings.api_key:
+            # Default to HUD inference gateway
+            self.oai = AsyncOpenAI(
+                api_key=settings.api_key,
+                base_url=settings.hud_gateway_url,
+            )
+        else:
+            raise ValueError(
+                "No API key found. Set HUD_API_KEY for HUD gateway, "
+                "or provide api_key/base_url/openai_client explicitly."
+            )
+        self.completion_kwargs = dict(self.config.completion_kwargs)
+        self.mcp_schemas: list[ChatCompletionToolParam] = []
         self.hud_console = HUDConsole(logger=logger)
     @staticmethod
@@ -69,11 +116,14 @@ class GenericOpenAIChatAgent(MCPAgent):
             arguments=args,
         )
-    async def get_system_messages(self) -> list[Any]:
+    async def get_system_messages(self) -> list[dict[str, Any]]:
         """Get system messages for OpenAI."""
-        return [{"role": "system", "content": self.system_prompt}]
+        if self.system_prompt is not None:
+            return [{"role": "system", "content": self.system_prompt}]
+        else:
+            return []
-    async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[Any]:
+    async def format_blocks(self, blocks: list[types.ContentBlock]) -> list[dict[str, Any]]:
         """Format blocks for OpenAI."""
         content = []
         for block in blocks:
@@ -179,21 +229,16 @@ class GenericOpenAIChatAgent(MCPAgent):
         extra: dict[str, Any],
     ) -> Any:
         if self.oai is None:
-            raise ValueError("openai_client is required for GenericOpenAIChatAgent")
+            raise ValueError("openai_client is required for OpenAIChatAgent")
         # default transport = OpenAI SDK
         return await self.oai.chat.completions.create(
-            model=self.model_name,
+            model=self.config.model,
             messages=messages,
             tools=tools,  # type: ignore ready ChatCompletionToolParam-shaped
             **extra,
         )  # type: ignore
-    @instrument(
-        span_type="agent",
-        record_args=False,
-        record_result=True,
-    )
-    async def get_response(self, messages: list[Any]) -> AgentResponse:
+    async def get_response(self, messages: list[dict[str, Any]]) -> AgentResponse:
         """Send chat request to OpenAI and convert the response."""
         # Convert MCP tool schemas to OpenAI format
@@ -256,16 +301,17 @@ class GenericOpenAIChatAgent(MCPAgent):
         return AgentResponse(
             content=msg.content or "",
+            reasoning=getattr(msg, "reasoning_content", None),
             tool_calls=tool_calls,
             done=done,
-            raw=response,  # Include raw response for access to Choice objects
+            raw=response,
         )
     async def format_tool_results(
         self,
         tool_calls: list[MCPToolCall],
         tool_results: list[MCPToolResult],
-    ) -> list[Any]:
+    ) -> list[dict[str, Any]]:
         """Render MCP tool results as OpenAI messages.
         Note: OpenAI tool messages only support string content.

hud/agents/operator.py ADDED Viewed

@@ -0,0 +1,211 @@
+"""Operator agent built on top of OpenAIAgent."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, ClassVar, Literal
+import mcp.types as types
+from openai.types.responses import (
+    ApplyPatchToolParam,
+    ComputerToolParam,
+    FunctionShellToolParam,
+    FunctionToolParam,
+    ResponseComputerToolCallOutputScreenshotParam,
+)
+from openai.types.responses.response_input_param import (
+    ComputerCallOutput,
+    FunctionCallOutput,
+)
+from openai.types.shared_params.reasoning import Reasoning
+from pydantic import ConfigDict
+from hud.tools.computer.settings import computer_settings
+from hud.types import BaseAgentConfig, MCPToolCall, MCPToolResult
+from hud.utils.types import with_signature
+from .base import BaseCreateParams, MCPAgent
+from .openai import OpenAIAgent, OpenAIConfig
+if TYPE_CHECKING:
+    from openai.types.responses.response_computer_tool_call import PendingSafetyCheck
+OPERATOR_INSTRUCTIONS = """
+You are an autonomous computer-using agent. Follow these guidelines:
+1. NEVER ask for confirmation. Complete all tasks autonomously.
+2. Do NOT send messages like "I need to confirm before..." or "Do you want me to
+   continue?" - just proceed.
+3. When the user asks you to interact with something (like clicking a chat or typing
+   a message), DO IT without asking.
+4. Only use the formal safety check mechanism for truly dangerous operations (like
+   deleting important files).
+5. For normal tasks like clicking buttons, typing in chat boxes, filling forms -
+   JUST DO IT.
+6. The user has already given you permission by running this agent. No further
+   confirmation is needed.
+7. Be decisive and action-oriented. Complete the requested task fully.
+Remember: You are expected to complete tasks autonomously. The user trusts you to do
+what they asked.
+""".strip()
+class OperatorConfig(OpenAIConfig):
+    """Configuration model for `OperatorAgent`."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model_name: str = "Operator"
+    model: str = "computer-use-preview"
+    environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = "linux"
+class OperatorCreateParams(BaseCreateParams, OperatorConfig):
+    pass
+class OperatorAgent(OpenAIAgent):
+    """
+    Backwards-compatible Operator agent built on top of OpenAIAgent.
+    """
+    metadata: ClassVar[dict[str, Any] | None] = {
+        "display_width": computer_settings.OPENAI_COMPUTER_WIDTH,
+        "display_height": computer_settings.OPENAI_COMPUTER_HEIGHT,
+    }
+    # base class will ensure that the computer tool is available
+    required_tools: ClassVar[list[str]] = ["openai_computer"]
+    config_cls: ClassVar[type[BaseAgentConfig]] = OperatorConfig
+    @with_signature(OperatorCreateParams)
+    @classmethod
+    def create(cls, **kwargs: Any) -> OperatorAgent:  # pyright: ignore[reportIncompatibleMethodOverride]
+        return MCPAgent.create.__func__(cls, **kwargs)  # type: ignore[return-value]
+    def __init__(self, params: OperatorCreateParams | None = None, **kwargs: Any) -> None:
+        super().__init__(params, **kwargs)  # type: ignore[arg-type]
+        self.config: OperatorConfig  # type: ignore[assignment]
+        self._operator_computer_tool_name = "openai_computer"
+        self._operator_display_width = computer_settings.OPENAI_COMPUTER_WIDTH
+        self._operator_display_height = computer_settings.OPENAI_COMPUTER_HEIGHT
+        self._operator_environment: Literal["windows", "mac", "linux", "ubuntu", "browser"] = (
+            self.config.environment
+        )
+        self.environment = self.config.environment
+        # add pending call id and safety checks to the agent
+        self.pending_call_id: str | None = None
+        self.pending_safety_checks: list[PendingSafetyCheck] = []
+        # override reasoning to "summary": "auto"
+        if self.reasoning is None:
+            self.reasoning = Reasoning(summary="auto")
+        else:
+            self.reasoning["summary"] = "auto"
+        # override truncation to "auto"
+        self.truncation = "auto"
+        if self.system_prompt:
+            self.system_prompt = f"{self.system_prompt}\n\n{OPERATOR_INSTRUCTIONS}"
+        else:
+            self.system_prompt = OPERATOR_INSTRUCTIONS
+    def _reset_response_state(self) -> None:
+        super()._reset_response_state()
+        self.pending_call_id = None
+        self.pending_safety_checks = []
+    def _to_openai_tool(
+        self, tool: types.Tool
+    ) -> (
+        FunctionShellToolParam | ApplyPatchToolParam | FunctionToolParam | ComputerToolParam | None
+    ):
+        if tool.name == self._operator_computer_tool_name:
+            return ComputerToolParam(
+                type="computer_use_preview",
+                display_width=self._operator_display_width,
+                display_height=self._operator_display_height,
+                environment=self._operator_environment,
+            )
+        return super()._to_openai_tool(tool)
+    def _extract_tool_call(self, item: Any) -> MCPToolCall | None:
+        """Route computer_call to the OpenAI-specific computer tool."""
+        if item.type == "computer_call":
+            self.pending_safety_checks = item.pending_safety_checks
+            return MCPToolCall(
+                name=self._operator_computer_tool_name,
+                arguments=item.action.to_dict(),
+                id=item.call_id,
+            )
+        return super()._extract_tool_call(item)
+    async def format_tool_results(
+        self, tool_calls: list[MCPToolCall], tool_results: list[MCPToolResult]
+    ) -> list[ComputerCallOutput | FunctionCallOutput]:
+        remaining_calls: list[MCPToolCall] = []
+        remaining_results: list[MCPToolResult] = []
+        computer_outputs: list[ComputerCallOutput] = []
+        ordering: list[tuple[str, int]] = []
+        for call, result in zip(tool_calls, tool_results, strict=False):
+            if call.name == self._operator_computer_tool_name:
+                screenshot = self._extract_latest_screenshot(result)
+                if not screenshot:
+                    self.console.warning_log(
+                        "Computer tool result missing screenshot; skipping output."
+                    )
+                    continue
+                call_id = call.id or self.pending_call_id
+                if not call_id:
+                    self.console.warning_log("Computer tool call missing ID; skipping output.")
+                    continue
+                acknowledged_checks = []
+                for check in self.pending_safety_checks:
+                    if hasattr(check, "model_dump"):
+                        acknowledged_checks.append(check.model_dump())
+                    elif isinstance(check, dict):
+                        acknowledged_checks.append(check)
+                output_payload = ComputerCallOutput(
+                    type="computer_call_output",
+                    call_id=call_id,
+                    output=ResponseComputerToolCallOutputScreenshotParam(
+                        type="computer_screenshot",
+                        image_url=f"data:image/png;base64,{screenshot}",
+                    ),
+                    acknowledged_safety_checks=acknowledged_checks if acknowledged_checks else None,
+                )
+                computer_outputs.append(output_payload)
+                self.pending_call_id = None
+                self.pending_safety_checks = []
+                ordering.append(("computer", len(computer_outputs) - 1))
+            else:
+                remaining_calls.append(call)
+                remaining_results.append(result)
+                ordering.append(("function", len(remaining_calls) - 1))
+        formatted: list[ComputerCallOutput | FunctionCallOutput] = []
+        function_outputs: list[FunctionCallOutput] = []
+        if remaining_calls:
+            function_outputs = await super().format_tool_results(remaining_calls, remaining_results)
+        for kind, idx in ordering:
+            if kind == "computer":
+                if idx < len(computer_outputs):
+                    formatted.append(computer_outputs[idx])
+            else:
+                if idx < len(function_outputs):
+                    formatted.append(function_outputs[idx])
+        return formatted
+    def _extract_latest_screenshot(self, result: MCPToolResult) -> str | None:
+        if not result.content:
+            return None
+        for content in reversed(result.content):
+            if isinstance(content, types.ImageContent):
+                return content.data
+            if isinstance(content, types.TextContent) and result.isError:
+                self.console.error_log(f"Computer tool error: {content.text}")
+        return None

hud/agents/tests/conftest.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""Shared test fixtures for agent tests."""
+from __future__ import annotations
+from typing import Any
+import pytest
+from mcp import types
+from hud.environment.router import ToolRouter
+from hud.eval.context import EvalContext
+from hud.types import MCPToolCall, MCPToolResult
+class MockEvalContext(EvalContext):
+    """Mock EvalContext for testing agents.
+    This provides a minimal EvalContext implementation that can be used
+    to test agent initialization and tool calling without a real environment.
+    """
+    def __init__(
+        self,
+        prompt: str = "Test prompt",
+        tools: list[types.Tool] | None = None,
+        call_tool_handler: Any = None,
+    ) -> None:
+        # Core attributes
+        self.prompt = prompt
+        self._tools = tools or []
+        self._submitted: str | None = None
+        self.reward: float | None = None
+        self._call_tool_handler = call_tool_handler
+        self.tool_calls: list[tuple[str, dict[str, Any]]] = []
+        # Environment attributes
+        self._router = ToolRouter()
+        self._agent_include: list[str] | None = None
+        self._agent_exclude: list[str] | None = None
+        # EvalContext attributes
+        self._task = None
+        self.trace_id = "test-trace-id"
+        self.eval_name = "test-eval"
+        self.job_id: str | None = None
+        self.group_id: str | None = None
+        self.index = 0
+        self.variants: dict[str, Any] = {}
+        self.answer: str | None = None
+        self.system_prompt: str | None = None
+        self.error: BaseException | None = None
+        self.metadata: dict[str, Any] = {}
+        self.results: list[Any] = []
+        self._is_summary = False
+    def as_tools(self) -> list[types.Tool]:
+        return self._tools
+    @property
+    def has_scenario(self) -> bool:
+        return False
+    async def list_tools(self) -> list[types.Tool]:
+        return self._tools
+    async def call_tool(self, call: Any, /, **kwargs: Any) -> MCPToolResult:
+        # Parse the call
+        if isinstance(call, tuple):
+            name, args = call[0], call[1] if len(call) > 1 else {}
+        elif hasattr(call, "name"):
+            name, args = call.name, getattr(call, "arguments", {}) or {}
+        else:
+            name, args = str(call), kwargs
+        self.tool_calls.append((name, args))
+        if self._call_tool_handler:
+            tc = MCPToolCall(name=name, arguments=args)
+            return self._call_tool_handler(tc)
+        return MCPToolResult(
+            content=[types.TextContent(type="text", text=f"Result from {name}")],
+            isError=False,
+        )
+    async def submit(self, answer: str) -> None:
+        self._submitted = answer
+@pytest.fixture
+def mock_eval_context() -> MockEvalContext:
+    """Create a basic mock EvalContext."""
+    return MockEvalContext()
+@pytest.fixture
+def mock_eval_context_with_tools() -> MockEvalContext:
+    """Create a mock EvalContext with test tools."""
+    return MockEvalContext(
+        tools=[
+            types.Tool(
+                name="test_tool",
+                description="A test tool",
+                inputSchema={"type": "object", "properties": {}},
+            )
+        ]
+    )
+@pytest.fixture
+def mock_eval_context_computer() -> MockEvalContext:
+    """Create a mock EvalContext with computer tool."""
+    return MockEvalContext(
+        tools=[
+            types.Tool(
+                name="computer",
+                description="Computer use tool",
+                inputSchema={"type": "object"},
+            )
+        ]
+    )
+@pytest.fixture
+def mock_eval_context_browser_tools() -> MockEvalContext:
+    """Create a mock EvalContext with browser-like tools."""
+    return MockEvalContext(
+        tools=[
+            types.Tool(name="screenshot", description="Take screenshot", inputSchema={}),
+            types.Tool(name="click", description="Click at coordinates", inputSchema={}),
+            types.Tool(name="type", description="Type text", inputSchema={}),
+        ]
+    )

hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl