PyPI - hud-python - Versions diffs - 0.4.57__py3-none-any.whl → 0.4.59__py3-none-any.whl - Mend

hud-python 0.4.57py3-none-any.whl → 0.4.59py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (35) hide show

hud/agents/__init__.py +2 -0
hud/agents/gemini.py +492 -0
hud/agents/tests/test_gemini.py +372 -0
hud/cli/__init__.py +46 -31
hud/cli/dev.py +111 -1
hud/cli/eval.py +59 -3
hud/cli/flows/dev.py +5 -3
hud/cli/init.py +14 -18
hud/cli/push.py +2 -2
hud/cli/rl/__init__.py +1 -1
hud/cli/rl/celebrate.py +1 -1
hud/cli/rl/remote_runner.py +3 -3
hud/cli/tests/test_eval.py +20 -0
hud/clients/base.py +1 -1
hud/clients/fastmcp.py +1 -1
hud/otel/config.py +1 -1
hud/otel/context.py +2 -2
hud/server/server.py +283 -36
hud/settings.py +6 -0
hud/shared/hints.py +3 -3
hud/telemetry/job.py +2 -2
hud/tools/__init__.py +13 -2
hud/tools/computer/__init__.py +2 -0
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/settings.py +21 -0
hud/tools/playwright.py +17 -2
hud/tools/types.py +9 -1
hud/types.py +2 -1
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.57.dist-info → hud_python-0.4.59.dist-info}/METADATA +2 -1
{hud_python-0.4.57.dist-info → hud_python-0.4.59.dist-info}/RECORD +35 -32
{hud_python-0.4.57.dist-info → hud_python-0.4.59.dist-info}/WHEEL +0 -0
{hud_python-0.4.57.dist-info → hud_python-0.4.59.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.57.dist-info → hud_python-0.4.59.dist-info}/licenses/LICENSE +0 -0

hud/agents/tests/test_gemini.py ADDED Viewed

@@ -0,0 +1,372 @@
+"""Tests for Gemini MCP Agent implementation."""
+from __future__ import annotations
+import base64
+from unittest.mock import AsyncMock, MagicMock, patch
+import pytest
+from google.genai import types as genai_types
+from mcp import types
+from hud.agents.gemini import GeminiAgent
+from hud.types import MCPToolCall, MCPToolResult
+class TestGeminiAgent:
+    """Test GeminiAgent class."""
+    @pytest.fixture
+    def mock_mcp_client(self):
+        """Create a mock MCP client."""
+        mcp_client = AsyncMock()
+        # Set up the mcp_config attribute as a regular dict, not a coroutine
+        mcp_client.mcp_config = {"test_server": {"url": "http://test"}}
+        # Mock list_tools to return gemini_computer tool
+        mcp_client.list_tools = AsyncMock(
+            return_value=[
+                types.Tool(
+                    name="gemini_computer",
+                    description="Gemini computer use tool",
+                    inputSchema={},
+                )
+            ]
+        )
+        mcp_client.initialize = AsyncMock()
+        return mcp_client
+    @pytest.fixture
+    def mock_gemini_client(self):
+        """Create a mock Gemini client."""
+        client = MagicMock()
+        client.api_key = "test_key"
+        # Mock models.list for validation
+        client.models = MagicMock()
+        client.models.list = MagicMock(return_value=iter([]))
+        return client
+    @pytest.mark.asyncio
+    async def test_init(self, mock_mcp_client, mock_gemini_client):
+        """Test agent initialization."""
+        agent = GeminiAgent(
+            mcp_client=mock_mcp_client,
+            model_client=mock_gemini_client,
+            model="gemini-2.5-computer-use-preview-10-2025",
+            validate_api_key=False,  # Skip validation in tests
+        )
+        assert agent.model_name == "gemini-2.5-computer-use-preview-10-2025"
+        assert agent.model == "gemini-2.5-computer-use-preview-10-2025"
+        assert agent.gemini_client == mock_gemini_client
+    @pytest.mark.asyncio
+    async def test_init_without_model_client(self, mock_mcp_client):
+        """Test agent initialization without model client."""
+        with (
+            patch("hud.settings.settings.gemini_api_key", "test_key"),
+            patch("hud.agents.gemini.genai.Client") as mock_client_class,
+        ):
+            mock_client = MagicMock()
+            mock_client.api_key = "test_key"
+            mock_client.models = MagicMock()
+            mock_client.models.list = MagicMock(return_value=iter([]))
+            mock_client_class.return_value = mock_client
+            agent = GeminiAgent(
+                mcp_client=mock_mcp_client,
+                model="gemini-2.5-computer-use-preview-10-2025",
+                validate_api_key=False,
+            )
+            assert agent.model_name == "gemini-2.5-computer-use-preview-10-2025"
+            assert agent.gemini_client is not None
+    @pytest.mark.asyncio
+    async def test_format_blocks(self, mock_mcp_client, mock_gemini_client):
+        """Test formatting content blocks into Gemini messages."""
+        agent = GeminiAgent(
+            mcp_client=mock_mcp_client,
+            model_client=mock_gemini_client,
+            validate_api_key=False,
+        )
+        # Test with text only
+        text_blocks: list[types.ContentBlock] = [
+            types.TextContent(type="text", text="Hello, Gemini!")
+        ]
+        messages = await agent.format_blocks(text_blocks)
+        assert len(messages) == 1
+        assert messages[0].role == "user"
+        parts = messages[0].parts
+        assert parts is not None
+        assert len(parts) == 1
+        assert parts[0].text == "Hello, Gemini!"
+        # Test with screenshot
+        image_blocks: list[types.ContentBlock] = [
+            types.TextContent(type="text", text="Look at this"),
+            types.ImageContent(
+                type="image",
+                data=base64.b64encode(b"fakeimage").decode("utf-8"),
+                mimeType="image/png",
+            ),
+        ]
+        messages = await agent.format_blocks(image_blocks)
+        assert len(messages) == 1
+        assert messages[0].role == "user"
+        parts = messages[0].parts
+        assert parts is not None
+        assert len(parts) == 2
+        # First part is text
+        assert parts[0].text == "Look at this"
+        # Second part is image - check that it was created from bytes
+        assert parts[1].inline_data is not None
+    @pytest.mark.asyncio
+    async def test_format_tool_results(self, mock_mcp_client, mock_gemini_client):
+        """Test the agent's format_tool_results method."""
+        agent = GeminiAgent(
+            mcp_client=mock_mcp_client,
+            model_client=mock_gemini_client,
+            validate_api_key=False,
+        )
+        tool_calls = [
+            MCPToolCall(
+                name="gemini_computer",
+                arguments={"action": "click_at", "x": 100, "y": 200},
+                id="call_1",  # type: ignore
+                gemini_name="click_at",  # type: ignore
+            ),
+        ]
+        tool_results = [
+            MCPToolResult(
+                content=[
+                    types.TextContent(type="text", text="Clicked successfully"),
+                    types.ImageContent(
+                        type="image",
+                        data=base64.b64encode(b"screenshot").decode("utf-8"),
+                        mimeType="image/png",
+                    ),
+                ],
+                isError=False,
+            ),
+        ]
+        messages = await agent.format_tool_results(tool_calls, tool_results)
+        # format_tool_results returns a single user message with function responses
+        assert len(messages) == 1
+        assert messages[0].role == "user"
+        # The content contains function response parts
+        parts = messages[0].parts
+        assert parts is not None
+        assert len(parts) == 1
+        function_response = parts[0].function_response
+        assert function_response is not None
+        assert function_response.name == "click_at"
+        response_payload = function_response.response or {}
+        assert response_payload.get("success") is True
+    @pytest.mark.asyncio
+    async def test_format_tool_results_with_error(self, mock_mcp_client, mock_gemini_client):
+        """Test formatting tool results with errors."""
+        agent = GeminiAgent(
+            mcp_client=mock_mcp_client,
+            model_client=mock_gemini_client,
+            validate_api_key=False,
+        )
+        tool_calls = [
+            MCPToolCall(
+                name="gemini_computer",
+                arguments={"action": "invalid"},
+                id="call_error",  # type: ignore
+                gemini_name="invalid_action",  # type: ignore
+            ),
+        ]
+        tool_results = [
+            MCPToolResult(
+                content=[types.TextContent(type="text", text="Action failed: invalid action")],
+                isError=True,
+            ),
+        ]
+        messages = await agent.format_tool_results(tool_calls, tool_results)
+        # Check that error is in the response
+        assert len(messages) == 1
+        assert messages[0].role == "user"
+        parts = messages[0].parts
+        assert parts is not None
+        function_response = parts[0].function_response
+        assert function_response is not None
+        response_payload = function_response.response or {}
+        assert "error" in response_payload
+    @pytest.mark.asyncio
+    async def test_get_response(self, mock_mcp_client, mock_gemini_client):
+        """Test getting model response from Gemini API."""
+        # Disable telemetry for this test
+        with patch("hud.settings.settings.telemetry_enabled", False):
+            agent = GeminiAgent(
+                mcp_client=mock_mcp_client,
+                model_client=mock_gemini_client,
+                validate_api_key=False,
+            )
+            # Set up available tools
+            agent._available_tools = [
+                types.Tool(name="gemini_computer", description="Computer tool", inputSchema={})
+            ]
+            # Mock the API response
+            mock_response = MagicMock()
+            mock_candidate = MagicMock()
+            # Create text part
+            text_part = MagicMock()
+            text_part.text = "I will click at coordinates"
+            text_part.function_call = None
+            # Create function call part
+            function_call_part = MagicMock()
+            function_call_part.text = None
+            function_call_part.function_call = MagicMock()
+            function_call_part.function_call.name = "click_at"
+            function_call_part.function_call.args = {"x": 100, "y": 200}
+            mock_candidate.content = MagicMock()
+            mock_candidate.content.parts = [text_part, function_call_part]
+            mock_response.candidates = [mock_candidate]
+            mock_gemini_client.models = MagicMock()
+            mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
+            messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Click")])]
+            response = await agent.get_response(messages)
+            assert response.content == "I will click at coordinates"
+            assert len(response.tool_calls) == 1
+            assert response.tool_calls[0].arguments == {"action": "click_at", "x": 100, "y": 200}
+            assert response.done is False
+    @pytest.mark.asyncio
+    async def test_get_response_text_only(self, mock_mcp_client, mock_gemini_client):
+        """Test getting text-only response."""
+        # Disable telemetry for this test
+        with patch("hud.settings.settings.telemetry_enabled", False):
+            agent = GeminiAgent(
+                mcp_client=mock_mcp_client,
+                model_client=mock_gemini_client,
+                validate_api_key=False,
+            )
+            # Mock the API response with text only
+            mock_response = MagicMock()
+            mock_candidate = MagicMock()
+            text_part = MagicMock()
+            text_part.text = "Task completed successfully"
+            text_part.function_call = None
+            mock_candidate.content = MagicMock()
+            mock_candidate.content.parts = [text_part]
+            mock_response.candidates = [mock_candidate]
+            mock_gemini_client.models = MagicMock()
+            mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
+            messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Status?")])]
+            response = await agent.get_response(messages)
+            assert response.content == "Task completed successfully"
+            assert response.tool_calls == []
+            assert response.done is True
+    @pytest.mark.asyncio
+    async def test_convert_tools_for_gemini(self, mock_mcp_client, mock_gemini_client):
+        """Test converting MCP tools to Gemini format."""
+        agent = GeminiAgent(
+            mcp_client=mock_mcp_client,
+            model_client=mock_gemini_client,
+            validate_api_key=False,
+        )
+        # Set up available tools
+        agent._available_tools = [
+            types.Tool(
+                name="gemini_computer",
+                description="Computer tool",
+                inputSchema={"type": "object"},
+            ),
+            types.Tool(
+                name="calculator",
+                description="Calculator tool",
+                inputSchema={
+                    "type": "object",
+                    "properties": {"operation": {"type": "string"}},
+                },
+            ),
+        ]
+        gemini_tools = agent._convert_tools_for_gemini()
+        # Should have 2 tools: computer_use and calculator
+        assert len(gemini_tools) == 2
+        # First should be computer use tool
+        assert gemini_tools[0].computer_use is not None
+        assert (
+            gemini_tools[0].computer_use.environment == genai_types.Environment.ENVIRONMENT_BROWSER
+        )
+        # Second should be calculator as function declaration
+        assert gemini_tools[1].function_declarations is not None
+        assert len(gemini_tools[1].function_declarations) == 1
+        assert gemini_tools[1].function_declarations[0].name == "calculator"
+    @pytest.mark.asyncio
+    async def test_create_user_message(self, mock_mcp_client, mock_gemini_client):
+        """Test creating a user message."""
+        agent = GeminiAgent(
+            mcp_client=mock_mcp_client,
+            model_client=mock_gemini_client,
+            validate_api_key=False,
+        )
+        message = await agent.create_user_message("Hello Gemini")
+        assert message.role == "user"
+        parts = message.parts
+        assert parts is not None
+        assert len(parts) == 1
+        assert parts[0].text == "Hello Gemini"
+    @pytest.mark.asyncio
+    async def test_handle_empty_response(self, mock_mcp_client, mock_gemini_client):
+        """Test handling empty response from API."""
+        with patch("hud.settings.settings.telemetry_enabled", False):
+            agent = GeminiAgent(
+                mcp_client=mock_mcp_client,
+                model_client=mock_gemini_client,
+                validate_api_key=False,
+            )
+            # Mock empty response
+            mock_response = MagicMock()
+            mock_response.candidates = []
+            mock_gemini_client.models = MagicMock()
+            mock_gemini_client.models.generate_content = MagicMock(return_value=mock_response)
+            messages = [genai_types.Content(role="user", parts=[genai_types.Part(text="Hi")])]
+            response = await agent.get_response(messages)
+            assert response.content == ""
+            assert response.tool_calls == []
+            assert response.done is True

hud/cli/__init__.py CHANGED Viewed

@@ -93,14 +93,14 @@ def analyze(
 ) -> None:
     """🔍 Analyze MCP environment - discover tools, resources, and capabilities.
-    By default, uses cached metadata for instant results.
+    [not dim]By default, uses cached metadata for instant results.
     Use --live to run the container for real-time analysis.
     Examples:
         hud analyze hudpython/test_init      # Fast metadata inspection
         hud analyze my-env --live            # Full container analysis
         hud analyze --config mcp-config.json # From MCP config
-        hud analyze --cursor text-2048-dev   # From Cursor config
+        hud analyze --cursor text-2048-dev   # From Cursor config[/not dim]
     """
     if config:
         # Load config from JSON file (always live for configs)
@@ -177,7 +177,7 @@ def debug(
 ) -> None:
     """🐛 Debug MCP environment - test initialization, tools, and readiness.
-    Examples:
+    [not dim]Examples:
         hud debug .                              # Debug current directory
         hud debug environments/browser           # Debug specific directory
         hud debug . --build                      # Build then debug
@@ -185,7 +185,7 @@ def debug(
         hud debug my-mcp-server:v1 -e API_KEY=xxx
         hud debug --config mcp-config.json
         hud debug --cursor text-2048-dev
-        hud debug . --max-phase 3               # Stop after phase 3
+        hud debug . --max-phase 3               # Stop after phase 3[/not dim]
     """
     # Import here to avoid circular imports
     from hud.utils.hud_console import HUDConsole
@@ -253,10 +253,23 @@ def debug(
         else:
             # Assume it's an image name
             image = first_param
-            from .utils.docker import build_run_command
+            from .utils.docker import create_docker_run_command
-            # Image-only mode: do not auto-inject local .env
-            command = build_run_command(image, docker_args)
+            # For image mode, check if there's a .env file in current directory
+            # and use it if available (similar to hud dev behavior)
+            cwd = Path.cwd()
+            if (cwd / ".env").exists():
+                # Use create_docker_run_command to load .env from current directory
+                command = create_docker_run_command(
+                    image,
+                    docker_args=docker_args,
+                    env_dir=cwd,  # Load .env from current directory
+                )
+            else:
+                # No .env file, use basic command without env loading
+                from .utils.docker import build_run_command
+                command = build_run_command(image, docker_args)
     else:
         console.print(
             "[red]Error: Must specify a directory, Docker image, --config, or --cursor[/red]"
@@ -390,7 +403,7 @@ def dev(
 ) -> None:
     """🔥 Development mode - run MCP server with hot-reload.
-    TWO MODES:
+    [not dim]TWO MODES:
     1. Python Module:
        hud dev                    # Auto-detects module
@@ -411,7 +424,7 @@ def dev(
         hud dev --watch ../shared    # Watch additional directories
     For environment backend servers, use uvicorn directly:
-        uvicorn server:app --reload
+        uvicorn server:app --reload[/not dim]
     """
     # Extract module from params if provided (first param when not --docker)
     module = params[0] if params and not docker else None
@@ -479,7 +492,7 @@ def run(
 ) -> None:
     """🚀 Run Docker image as MCP server.
-    A simple wrapper around 'docker run' that can launch images locally or remotely.
+    [not dim]A simple wrapper around 'docker run' that can launch images locally or remotely.
     By default, runs remotely via mcp.hud.so. Use --local to run with local Docker.
     For local Python development with hot-reload, use 'hud dev' instead.
@@ -489,7 +502,7 @@ def run(
         hud run my-image:latest --local            # Run with local Docker
         hud run my-image:latest -e KEY=value       # Remote with env vars
         hud run my-image:latest --local -e KEY=val # Local with env vars
-        hud run my-image:latest --transport http   # Use HTTP transport
+        hud run my-image:latest --transport http   # Use HTTP transport[/not dim]
     """
     if not params:
         console.print("[red]❌ Docker image is required[/red]")
@@ -546,7 +559,7 @@ def clone(
 ) -> None:
     """🚀 Clone a git repository quietly with a pretty output.
-    This command wraps 'git clone' with the --quiet flag and displays
+    [not dim]This command wraps 'git clone' with the --quiet flag and displays
     a rich formatted success message. If the repository contains a clone
     message in pyproject.toml, it will be displayed as a tutorial.
@@ -561,7 +574,7 @@ def clone(
     # style = "cyan"
     Examples:
-        hud clone https://github.com/user/repo.git
+        hud clone https://github.com/user/repo.git[/not dim]
     """
     # Run the clone
     success, result = clone_repository(url)
@@ -592,7 +605,7 @@ def build(
 ) -> None:
     """🏗️ Build a HUD environment and generate lock file.
-    This command:
+    [not dim]This command:
     - Builds a Docker image from your environment
     - Analyzes the MCP server to extract metadata
     - Generates a hud.lock.yaml file for reproducibility
@@ -601,7 +614,7 @@ def build(
         hud build                    # Build current directory
         hud build environments/text_2048 -e API_KEY=secret
         hud build . --tag my-env:v1.0 -e VAR1=value1 -e VAR2=value2
-        hud build . --no-cache       # Force rebuild
+        hud build . --no-cache       # Force rebuild[/not dim]
     """
     # Parse directory and extra arguments
     if params:
@@ -657,14 +670,14 @@ def push(
 ) -> None:
     """📤 Push HUD environment to registry.
-    Reads hud.lock.yaml from the directory and pushes to registry.
+    [not dim]Reads hud.lock.yaml from the directory and pushes to registry.
     Auto-detects your Docker username if --image not specified.
     Examples:
         hud push                     # Push with auto-detected name
         hud push --tag v1.0          # Push with specific tag
         hud push . --image myuser/myenv:v1.0
-        hud push --yes               # Skip confirmation
+        hud push --yes               # Skip confirmation[/not dim]
     """
     push_command(directory, image, tag, sign, yes, verbose)
@@ -683,12 +696,12 @@ def pull(
 ) -> None:
     """📥 Pull HUD environment from registry with metadata preview.
-    Shows environment details before downloading.
+    [not dim]Shows environment details before downloading.
     Examples:
         hud pull hud.lock.yaml               # Pull from lock file
         hud pull myuser/myenv:latest        # Pull by image reference
-        hud pull myuser/myenv --verify-only # Check metadata only
+        hud pull myuser/myenv --verify-only # Check metadata only[/not dim]
     """
     pull_command(target, lock_file, yes, verify_only, verbose)
@@ -704,14 +717,14 @@ def list_environments(
 ) -> None:
     """📋 List all HUD environments in local registry.
-    Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
+    [not dim]Shows environments pulled with 'hud pull' stored in ~/.hud/envs/
     Examples:
         hud list                    # List all environments
         hud list --filter text      # Filter by name
         hud list --json            # Output as JSON
         hud list --all             # Show digest column
-        hud list --verbose         # Show full descriptions
+        hud list --verbose         # Show full descriptions[/not dim]
     """
     list_module.list_command(filter_name, json_output, show_all, verbose)
@@ -726,7 +739,7 @@ def remove(
 ) -> None:
     """🗑️ Remove HUD environments from local registry.
-    Removes environment metadata from ~/.hud/envs/
+    [not dim]Removes environment metadata from ~/.hud/envs/
     Note: This does not remove the Docker images.
     Examples:
@@ -734,35 +747,36 @@ def remove(
         hud remove text_2048           # Remove by name
         hud remove hudpython/test_init # Remove by full name
         hud remove all                 # Remove all environments
-        hud remove all --yes           # Remove all without confirmation
+        hud remove all --yes           # Remove all without confirmation[/not dim]
     """
     remove_command(target, yes, verbose)
 @app.command()
 def init(
-    name: str = typer.Argument(None, help="Environment name (default: current directory name)"),
+    name: str = typer.Argument(None, help="Environment name (default: chosen preset name)"),
     preset: str | None = typer.Option(
         None,
         "--preset",
         "-p",
         help="Preset to use: blank, deep-research, browser, rubrics. If omitted, you'll choose interactively.",  # noqa: E501
     ),
-    directory: str = typer.Option(".", "--dir", "-d", help="Target directory"),
+    directory: str = typer.Option(".", "--dir", "-d", help="Parent directory for the environment"),
     force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing files"),
 ) -> None:
     """🚀 Initialize a new HUD environment with minimal boilerplate.
-    Creates a working MCP environment with:
+    [not dim]Creates a working MCP environment with:
     - Dockerfile for containerization
     - pyproject.toml for dependencies
     - Minimal MCP server with context
     - Required setup/evaluate tools
     Examples:
-        hud init                    # Use current directory name
-        hud init my-env             # Create in ./my-env/
-        hud init my-env --dir /tmp  # Create in /tmp/my-env/
+        hud init                    # Choose preset interactively, create ./preset-name/
+        hud init my-env             # Create new directory ./my-env/
+        hud init my-env --dir /tmp  # Create in /tmp/my-env/[/not dim]
     """
     create_environment(name, directory, force, preset)
@@ -904,6 +918,7 @@ def eval(
             [
                 {"name": "Claude 4 Sonnet", "value": AgentType.CLAUDE},
                 {"name": "OpenAI Computer Use", "value": AgentType.OPENAI},
+                {"name": "Gemini Computer Use", "value": AgentType.GEMINI},
                 {"name": "vLLM (Local Server)", "value": AgentType.VLLM},
                 {"name": "LiteLLM (Multi-provider)", "value": AgentType.LITELLM},
             ]
@@ -1138,11 +1153,11 @@ def set(
 ) -> None:
     """Persist API keys or other variables for HUD to use by default.
-    Examples:
+    [not dim]Examples:
         hud set ANTHROPIC_API_KEY=sk-... OPENAI_API_KEY=sk-...
     Values are stored in ~/.hud/.env and are loaded by hud.settings with
-    the lowest precedence (overridden by process env and project .env).
+    the lowest precedence (overridden by process env and project .env).[/not dim]
     """
     from hud.utils.hud_console import HUDConsole

hud-python 0.4.57__py3-none-any.whl → 0.4.59__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.57py3-none-any.whl → 0.4.59py3-none-any.whl