PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (274) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +11 -5
hud/agents/base.py +220 -500
hud/agents/claude.py +200 -240
hud/agents/gemini.py +275 -0
hud/agents/gemini_cua.py +335 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +41 -36
hud/agents/openai.py +291 -292
hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
hud/agents/operator.py +211 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +379 -210
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +376 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/cli/__init__.py +461 -545
hud/cli/analyze.py +43 -5
hud/cli/build.py +664 -110
hud/cli/debug.py +8 -5
hud/cli/dev.py +882 -734
hud/cli/eval.py +782 -668
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/push.py +29 -11
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +108 -6
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +69 -0
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +40 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +327 -0
hud/datasets/runner.py +192 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +50 -0
hud/environment/connection.py +206 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +109 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +694 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +112 -0
hud/environment/scenarios.py +493 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +218 -0
hud/environment/tests/test_environment.py +161 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +201 -0
hud/environment/tests/test_scenarios.py +280 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +674 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +185 -0
hud/eval/manager.py +466 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +340 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +145 -0
hud/eval/types.py +63 -0
hud/eval/utils.py +183 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +151 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +158 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +16 -2
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +4 -0
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +167 -57
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +61 -3
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.1.dist-info/METADATA +264 -0
hud_python-0.5.1.dist-info/RECORD +299 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0

hud/agents/tests/test_client.py CHANGED Viewed

@@ -15,7 +15,6 @@ from hud.types import MCPToolResult
 logger = logging.getLogger(__name__)
-@patch("hud.clients.base.setup_hud_telemetry")
 class TestMCPClient:
     """Test MCPClient class."""
@@ -34,7 +33,7 @@ class TestMCPClient:
             yield mock_instance
     @pytest.mark.asyncio
-    async def test_connect_single_server(self, mock_telemetry, mock_mcp_use_client):
+    async def test_connect_single_server(self, mock_mcp_use_client):
         """Test connecting to a single server."""
         config = {"test_server": {"command": "python", "args": ["-m", "test_server"]}}
@@ -77,7 +76,7 @@ class TestMCPClient:
         assert names == {"tool1", "tool2"}
     @pytest.mark.asyncio
-    async def test_connect_multiple_servers(self, mock_telemetry, mock_mcp_use_client):
+    async def test_connect_multiple_servers(self, mock_mcp_use_client):
         """Test connecting to multiple servers."""
         config = {
             "server1": {"command": "python", "args": ["-m", "server1"]},
@@ -129,7 +128,7 @@ class TestMCPClient:
         assert names == {"server1_tool1", "server2_tool2"}
     @pytest.mark.asyncio
-    async def test_call_tool(self, mock_telemetry, mock_mcp_use_client):
+    async def test_call_tool(self, mock_mcp_use_client):
         """Test calling a tool."""
         config = {"test": {"command": "test"}}
         client = MCPClient(mcp_config=config)
@@ -180,7 +179,7 @@ class TestMCPClient:
         )
     @pytest.mark.asyncio
-    async def test_call_tool_not_found(self, mock_telemetry, mock_mcp_use_client):
+    async def test_call_tool_not_found(self, mock_mcp_use_client):
         """Test calling a non-existent tool."""
         config = {"test": {"command": "test"}}
         client = MCPClient(mcp_config=config)
@@ -208,7 +207,7 @@ class TestMCPClient:
         assert "Tool 'nonexistent' not found" in text_content
     @pytest.mark.asyncio
-    async def test_get_telemetry_data(self, mock_telemetry, mock_mcp_use_client):
+    async def test_get_telemetry_data(self, mock_mcp_use_client):
         """Test getting telemetry data."""
         config = {"test": {"command": "test"}}
         client = MCPClient(mcp_config=config)
@@ -245,7 +244,7 @@ class TestMCPClient:
         assert isinstance(telemetry_data, dict)
     @pytest.mark.asyncio
-    async def test_close(self, mock_telemetry, mock_mcp_use_client):
+    async def test_close(self, mock_mcp_use_client):
         """Test closing client connections."""
         config = {"test": {"command": "test"}}
         client = MCPClient(mcp_config=config)
@@ -267,7 +266,7 @@ class TestMCPClient:
         mock_mcp_use_client.close_all_sessions.assert_called_once()
     @pytest.mark.asyncio
-    async def test_context_manager(self, mock_telemetry, mock_mcp_use_client):
+    async def test_context_manager(self, mock_mcp_use_client):
         """Test using client as context manager."""
         mock_session = MagicMock()
         mock_session.connector = MagicMock()
@@ -291,7 +290,7 @@ class TestMCPClient:
         mock_mcp_use_client.close_all_sessions.assert_called_once()
     @pytest.mark.asyncio
-    async def test_get_available_tools(self, mock_telemetry, mock_mcp_use_client):
+    async def test_get_available_tools(self, mock_mcp_use_client):
         """Test getting available tools."""
         config = {"test": {"command": "test"}}
         client = MCPClient(mcp_config=config)
@@ -319,7 +318,7 @@ class TestMCPClient:
         assert names == {"tool1", "tool2"}
     @pytest.mark.asyncio
-    async def test_get_tool_map(self, mock_telemetry, mock_mcp_use_client):
+    async def test_get_tool_map(self, mock_mcp_use_client):
         """Test getting tool map."""
         config = {"test": {"command": "test"}}
         client = MCPClient(mcp_config=config)

hud/agents/tests/test_gemini.py ADDED Viewed

@@ -0,0 +1,369 @@
+"""Tests for Gemini MCP Agent implementation."""
+from __future__ import annotations
+import base64
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock, patch
+import pytest
+from google import genai
+from google.genai import types as genai_types
+from mcp import types
+from hud.agents.gemini import GeminiAgent
+from hud.environment.router import ToolRouter
+from hud.eval.context import EvalContext
+from hud.types import MCPToolCall, MCPToolResult
+class MockEvalContext(EvalContext):
+    """Mock EvalContext for testing."""
+    def __init__(self, tools: list[types.Tool] | None = None) -> None:
+        # Core attributes
+        self.prompt = "Test prompt"
+        self._tools = tools or []
+        self._submitted: str | None = None
+        self.reward: float | None = None
+        # Environment attributes
+        self._router = ToolRouter()
+        self._agent_include: list[str] | None = None
+        self._agent_exclude: list[str] | None = None
+        # EvalContext attributes
+        self._task = None
+        self.trace_id = "test-trace-id"
+        self.eval_name = "test-eval"
+        self.job_id: str | None = None
+        self.group_id: str | None = None
+        self.index = 0
+        self.variants: dict[str, Any] = {}
+        self.answer: str | None = None
+        self.system_prompt: str | None = None
+        self.error: BaseException | None = None
+        self.metadata: dict[str, Any] = {}
+        self.results: list[Any] = []
+        self._is_summary = False
+    def as_tools(self) -> list[types.Tool]:
+        return self._tools
+    @property
+    def has_scenario(self) -> bool:
+        return False
+    async def list_tools(self) -> list[types.Tool]:
+        return self._tools
+    async def call_tool(self, call: Any, /, **kwargs: Any) -> MCPToolResult:
+        return MCPToolResult(
+            content=[types.TextContent(type="text", text="ok")],
+            isError=False,
+        )
+    async def submit(self, answer: str) -> None:
+        self._submitted = answer
+class TestGeminiAgent:
+    """Test GeminiAgent base class."""
+    @pytest.fixture
+    def mock_gemini_client(self) -> MagicMock:
+        """Create a stub Gemini client."""
+        client = MagicMock(spec=genai.Client)
+        client.api_key = "test_key"
+        client.models = MagicMock()
+        client.models.list = MagicMock(return_value=iter([]))
+        client.models.generate_content = MagicMock()
+        # Set up async interface (aio.models.generate_content)
+        client.aio = MagicMock()
+        client.aio.models = MagicMock()
+        client.aio.models.generate_content = AsyncMock()
+        return client
+    @pytest.mark.asyncio
+    async def test_init(self, mock_gemini_client: MagicMock) -> None:
+        """Test agent initialization."""
+        agent = GeminiAgent.create(
+            model_client=mock_gemini_client,
+            model="gemini-2.5-flash",
+            validate_api_key=False,
+        )
+        assert agent.model_name == "Gemini"
+        assert agent.config.model == "gemini-2.5-flash"
+        assert agent.gemini_client == mock_gemini_client
+    @pytest.mark.asyncio
+    async def test_init_without_model_client(self) -> None:
+        """Test agent initialization without model client."""
+        with (
+            patch("hud.settings.settings.gemini_api_key", "test_key"),
+            patch("hud.agents.gemini.genai.Client") as mock_client_class,
+        ):
+            mock_client = MagicMock()
+            mock_client.api_key = "test_key"
+            mock_client.models = MagicMock()
+            mock_client.models.list = MagicMock(return_value=iter([]))
+            mock_client_class.return_value = mock_client
+            agent = GeminiAgent.create(
+                model="gemini-2.5-flash",
+                validate_api_key=False,
+            )
+            assert agent.gemini_client is not None
+    @pytest.mark.asyncio
+    async def test_format_blocks_text_only(self, mock_gemini_client: MagicMock) -> None:
+        """Test formatting text content blocks."""
+        agent = GeminiAgent.create(
+            model_client=mock_gemini_client,
+            validate_api_key=False,
+        )
+        blocks: list[types.ContentBlock] = [
+            types.TextContent(type="text", text="Hello, world!"),
+            types.TextContent(type="text", text="How are you?"),
+        ]
+        messages = await agent.format_blocks(blocks)
+        assert len(messages) == 1
+        assert messages[0].role == "user"
+        assert messages[0].parts is not None
+        assert len(messages[0].parts) == 2
+    @pytest.mark.asyncio
+    async def test_format_blocks_with_image(self, mock_gemini_client: MagicMock) -> None:
+        """Test formatting image content blocks."""
+        agent = GeminiAgent.create(
+            model_client=mock_gemini_client,
+            validate_api_key=False,
+        )
+        # Create a tiny valid base64 PNG
+        png_data = base64.b64encode(b"\x89PNG\r\n\x1a\n").decode()
+        blocks: list[types.ContentBlock] = [
+            types.TextContent(type="text", text="Look at this:"),
+            types.ImageContent(type="image", data=png_data, mimeType="image/png"),
+        ]
+        messages = await agent.format_blocks(blocks)
+        assert len(messages) == 1
+        assert messages[0].parts is not None
+        assert len(messages[0].parts) == 2
+    @pytest.mark.asyncio
+    async def test_format_tool_results(self, mock_gemini_client: MagicMock) -> None:
+        """Test formatting tool results."""
+        agent = GeminiAgent.create(
+            model_client=mock_gemini_client,
+            validate_api_key=False,
+        )
+        tool_calls = [MCPToolCall(id="call_123", name="test_tool", arguments={})]
+        tool_results = [
+            MCPToolResult(
+                content=[types.TextContent(type="text", text="Tool output")],
+                isError=False,
+            )
+        ]
+        messages = await agent.format_tool_results(tool_calls, tool_results)
+        assert len(messages) == 1
+        assert messages[0].role == "user"
+    @pytest.mark.asyncio
+    async def test_get_system_messages(self, mock_gemini_client: MagicMock) -> None:
+        """Test that system messages return empty (Gemini uses system_instruction)."""
+        agent = GeminiAgent.create(
+            model_client=mock_gemini_client,
+            system_prompt="You are a helpful assistant.",
+            validate_api_key=False,
+        )
+        messages = await agent.get_system_messages()
+        # Gemini doesn't use system messages in the message list
+        assert messages == []
+    @pytest.mark.asyncio
+    async def test_get_response_text_only(self, mock_gemini_client: MagicMock) -> None:
+        """Test getting text-only response."""
+        # Disable telemetry for this test
+        with patch("hud.settings.settings.telemetry_enabled", False):
+            agent = GeminiAgent.create(
+                model_client=mock_gemini_client,
+                validate_api_key=False,
+            )
+            # Set up agent as initialized (no tools needed for this test)
+            agent.gemini_tools = []
+            agent._initialized = True
+            # Mock the API response with text only
+            mock_response = MagicMock()
+            mock_candidate = MagicMock()
+            text_part = MagicMock()
+            text_part.text = "Task completed successfully"
+            text_part.function_call = None
+            mock_candidate.content = MagicMock()
+            mock_candidate.content.parts = [text_part]
+            mock_response.candidates = [mock_candidate]
+            mock_gemini_client.aio.models.generate_content = AsyncMock(return_value=mock_response)
+            messages = [
+                genai_types.Content(role="user", parts=[genai_types.Part.from_text(text="Status?")])
+            ]
+            response = await agent.get_response(messages)
+            assert response.content == "Task completed successfully"
+            assert response.tool_calls == []
+            assert response.done is True
+    @pytest.mark.asyncio
+    async def test_get_response_with_thinking(self, mock_gemini_client: MagicMock) -> None:
+        """Test getting response with thinking content."""
+        with patch("hud.settings.settings.telemetry_enabled", False):
+            agent = GeminiAgent.create(
+                model_client=mock_gemini_client,
+                validate_api_key=False,
+            )
+            # Set up agent as initialized (no tools needed for this test)
+            agent.gemini_tools = []
+            agent._initialized = True
+            mock_response = MagicMock()
+            mock_candidate = MagicMock()
+            thinking_part = MagicMock()
+            thinking_part.text = "Let me reason through this..."
+            thinking_part.function_call = None
+            thinking_part.thought = True
+            text_part = MagicMock()
+            text_part.text = "Here is my answer"
+            text_part.function_call = None
+            text_part.thought = False
+            mock_candidate.content = MagicMock()
+            mock_candidate.content.parts = [thinking_part, text_part]
+            mock_response.candidates = [mock_candidate]
+            mock_gemini_client.aio.models.generate_content = AsyncMock(return_value=mock_response)
+            messages = [
+                genai_types.Content(
+                    role="user", parts=[genai_types.Part.from_text(text="Hard question")]
+                )
+            ]
+            response = await agent.get_response(messages)
+            assert response.content == "Here is my answer"
+            assert response.reasoning == "Let me reason through this..."
+    @pytest.mark.asyncio
+    async def test_convert_tools_for_gemini(self, mock_gemini_client: MagicMock) -> None:
+        """Test converting MCP tools to Gemini format."""
+        tools = [
+            types.Tool(
+                name="my_tool",
+                description="A test tool",
+                inputSchema={"type": "object", "properties": {"x": {"type": "string"}}},
+            )
+        ]
+        ctx = MockEvalContext(tools=tools)
+        agent = GeminiAgent.create(
+            model_client=mock_gemini_client,
+            validate_api_key=False,
+        )
+        agent.ctx = ctx
+        await agent._initialize_from_ctx(ctx)
+        # Check that tools were converted
+        assert len(agent.gemini_tools) == 1
+        # Gemini tools have function_declarations - cast to genai Tool type
+        gemini_tool = agent.gemini_tools[0]
+        assert isinstance(gemini_tool, genai_types.Tool)
+        assert gemini_tool.function_declarations is not None
+        assert gemini_tool.function_declarations[0].name == "my_tool"
+class TestGeminiToolConversion:
+    """Tests for tool conversion to Gemini format."""
+    @pytest.fixture
+    def mock_gemini_client(self) -> MagicMock:
+        """Create a stub Gemini client."""
+        client = MagicMock(spec=genai.Client)
+        client.api_key = "test_key"
+        client.models = MagicMock()
+        client.models.list = MagicMock(return_value=iter([]))
+        # Set up async interface
+        client.aio = MagicMock()
+        client.aio.models = MagicMock()
+        client.aio.models.generate_content = AsyncMock()
+        return client
+    @pytest.mark.asyncio
+    async def test_tool_with_properties(self, mock_gemini_client: MagicMock) -> None:
+        """Test tool with input properties."""
+        tools = [
+            types.Tool(
+                name="search",
+                description="Search the web",
+                inputSchema={
+                    "type": "object",
+                    "properties": {
+                        "query": {"type": "string", "description": "Search query"},
+                        "limit": {"type": "integer", "description": "Max results"},
+                    },
+                    "required": ["query"],
+                },
+            )
+        ]
+        ctx = MockEvalContext(tools=tools)
+        agent = GeminiAgent.create(
+            model_client=mock_gemini_client,
+            validate_api_key=False,
+        )
+        agent.ctx = ctx
+        await agent._initialize_from_ctx(ctx)
+        assert len(agent.gemini_tools) == 1
+        gemini_tool = agent.gemini_tools[0]
+        # Gemini tools have function_declarations - cast to genai Tool type
+        assert isinstance(gemini_tool, genai_types.Tool)
+        assert gemini_tool.function_declarations is not None
+        assert gemini_tool.function_declarations[0].name == "search"
+        assert gemini_tool.function_declarations[0].parameters_json_schema is not None
+    @pytest.mark.asyncio
+    async def test_tool_without_schema(self, mock_gemini_client: MagicMock) -> None:
+        """Test tool without description raises error."""
+        # Create a tool with inputSchema but no description
+        tools = [
+            types.Tool(
+                name="incomplete",
+                description=None,
+                inputSchema={"type": "object"},
+            )
+        ]
+        ctx = MockEvalContext(tools=tools)
+        agent = GeminiAgent.create(
+            model_client=mock_gemini_client,
+            validate_api_key=False,
+        )
+        agent.ctx = ctx
+        with pytest.raises(ValueError, match="requires both a description"):
+            await agent._initialize_from_ctx(ctx)

hud/agents/tests/test_grounded_openai_agent.py CHANGED Viewed

@@ -1,60 +1,16 @@
 from __future__ import annotations
-import json
 from typing import Any
 import mcp.types as types
 import pytest
+from openai import AsyncOpenAI
 from hud.agents.grounded_openai import GroundedOpenAIChatAgent
 from hud.tools.grounding import GrounderConfig
 from hud.types import MCPToolCall, MCPToolResult
-class DummyOpenAI:
-    class chat:  # type: ignore[no-redef]
-        class completions:
-            @staticmethod
-            async def create(**kwargs: Any) -> Any:
-                # Return a minimal object mimicking OpenAI response
-                class Msg:
-                    def __init__(self) -> None:
-                        self.content = "Thinking..."
-                        self.tool_calls = [
-                            type(
-                                "ToolCall",
-                                (),
-                                {
-                                    "id": "call_1",
-                                    "function": type(
-                                        "Fn",
-                                        (),
-                                        {
-                                            "name": "computer",
-                                            "arguments": json.dumps(
-                                                {
-                                                    "action": "click",
-                                                    "element_description": "blue button",
-                                                }
-                                            ),
-                                        },
-                                    ),
-                                },
-                            )()
-                        ]
-                class Choice:
-                    def __init__(self) -> None:
-                        self.message = Msg()
-                        self.finish_reason = "tool_calls"
-                class Resp:
-                    def __init__(self) -> None:
-                        self.choices = [Choice()]
-                return Resp()
 class FakeMCPClient:
     def __init__(self) -> None:
         self.tools: list[types.Tool] = [
@@ -62,6 +18,7 @@ class FakeMCPClient:
             types.Tool(name="setup", description="internal functions", inputSchema={}),
         ]
         self.called: list[MCPToolCall] = []
+        self._initialized = True
     async def initialize(self, mcp_config: dict[str, dict[str, Any]] | None = None) -> None:
         return None
@@ -77,6 +34,10 @@ class FakeMCPClient:
     def mcp_config(self) -> dict[str, dict[str, Any]]:
         return {"local": {"command": "echo", "args": ["ok"]}}
+    @property
+    def is_connected(self) -> bool:
+        return self._initialized
     async def shutdown(self) -> None:
         return None
@@ -109,19 +70,20 @@ class DummyGroundedTool:
 @pytest.mark.asyncio
 async def test_call_tools_injects_screenshot_and_delegates(monkeypatch: pytest.MonkeyPatch) -> None:
-    # Agent with fake OpenAI client and fake MCP client
+    # Agent with fake OpenAI client
     grounder_cfg = GrounderConfig(api_base="http://example", model="qwen")
-    agent = GroundedOpenAIChatAgent(
+    fake_openai = AsyncOpenAI(api_key="test")
+    agent = GroundedOpenAIChatAgent.create(
         grounder_config=grounder_cfg,
-        openai_client=DummyOpenAI(),
-        model_name="gpt-4o-mini",
-        mcp_client=FakeMCPClient(),
+        openai_client=fake_openai,
+        model="gpt-4o-mini",
         initial_screenshot=False,
     )
     # Inject a dummy grounded tool to observe args without full initialization
     dummy_tool = DummyGroundedTool()
     agent.grounded_tool = dummy_tool  # type: ignore
+    agent._initialized = True  # Mark as initialized to skip context initialization
     # Seed conversation history with a user image
     png_b64 = (
@@ -153,3 +115,56 @@ async def test_call_tools_injects_screenshot_and_delegates(monkeypatch: pytest.M
     assert dummy_tool.last_args["element_description"] == "blue button"
     assert "screenshot_b64" in dummy_tool.last_args
     assert isinstance(dummy_tool.last_args["screenshot_b64"], str)
+@pytest.mark.asyncio
+async def test_get_response_with_reasoning() -> None:
+    """Test that reasoning content is extracted from the response."""
+    from unittest.mock import AsyncMock, MagicMock, patch
+    grounder_cfg = GrounderConfig(api_base="http://example", model="qwen")
+    fake_openai = AsyncOpenAI(api_key="test")
+    with patch("hud.settings.settings.telemetry_enabled", False):
+        agent = GroundedOpenAIChatAgent.create(
+            grounder_config=grounder_cfg,
+            openai_client=fake_openai,
+            model="gpt-4o-mini",
+            initial_screenshot=False,
+        )
+        mock_response = MagicMock()
+        mock_choice = MagicMock()
+        mock_message = MagicMock()
+        mock_message.content = "Here is my answer"
+        mock_message.reasoning_content = "Let me think step by step..."
+        mock_message.tool_calls = None
+        mock_choice.message = mock_message
+        mock_choice.finish_reason = "stop"
+        mock_response.choices = [mock_choice]
+        agent.oai.chat.completions.create = AsyncMock(return_value=mock_response)
+        agent._initialized = True  # Mark as initialized to skip context initialization
+        # Include an image so get_response doesn't try to take a screenshot via ctx
+        png_b64 = (
+            "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGMAAQAABQAB"
+            "J2n0mQAAAABJRU5ErkJggg=="
+        )
+        agent.conversation_history = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{png_b64}"}},
+                    {"type": "text", "text": "Hard question"},
+                ],
+            }
+        ]
+        response = await agent.get_response(agent.conversation_history)
+        assert response.content == "Here is my answer"
+        assert response.reasoning == "Let me think step by step..."

hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl