PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (274) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +11 -5
hud/agents/base.py +220 -500
hud/agents/claude.py +200 -240
hud/agents/gemini.py +275 -0
hud/agents/gemini_cua.py +335 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +41 -36
hud/agents/openai.py +291 -292
hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
hud/agents/operator.py +211 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +379 -210
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +376 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/cli/__init__.py +461 -545
hud/cli/analyze.py +43 -5
hud/cli/build.py +664 -110
hud/cli/debug.py +8 -5
hud/cli/dev.py +882 -734
hud/cli/eval.py +782 -668
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/push.py +29 -11
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +108 -6
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +69 -0
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +40 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +327 -0
hud/datasets/runner.py +192 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +50 -0
hud/environment/connection.py +206 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +109 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +694 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +112 -0
hud/environment/scenarios.py +493 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +218 -0
hud/environment/tests/test_environment.py +161 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +201 -0
hud/environment/tests/test_scenarios.py +280 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +674 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +185 -0
hud/eval/manager.py +466 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +340 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +145 -0
hud/eval/types.py +63 -0
hud/eval/utils.py +183 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +151 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +158 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +16 -2
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +4 -0
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +167 -57
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +61 -3
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.1.dist-info/METADATA +264 -0
hud_python-0.5.1.dist-info/RECORD +299 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0

hud/tools/base.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import logging
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Any, cast
@@ -8,7 +9,7 @@ from fastmcp import FastMCP
 from hud.tools.types import ContentBlock, EvaluationResult
 if TYPE_CHECKING:
-    from collections.abc import Callable
+    from collections.abc import Awaitable, Callable
     from fastmcp.tools import FunctionTool
     from fastmcp.tools.tool import Tool, ToolResult
@@ -16,6 +17,8 @@ if TYPE_CHECKING:
 # Basic result types for tools
 BaseResult = list[ContentBlock] | EvaluationResult
+logger = logging.getLogger(__name__)
 class BaseTool(ABC):
     """
@@ -58,6 +61,10 @@ class BaseTool(ABC):
         self.title = title or self.__class__.__name__.replace("Tool", "").replace("_", " ").title()
         self.description = description or (self.__doc__.strip() if self.__doc__ else None)
         self.meta = meta
+        self._callbacks: dict[
+            str,
+            list[Callable[..., Awaitable[Any]]],
+        ] = {}  # {"event_name": [callback_functions]}
         # Expose attributes FastMCP expects when registering an instance directly
         self.__name__ = self.name  # FastMCP uses fn.__name__ if name param omitted
@@ -100,13 +107,48 @@ class BaseTool(ABC):
             )
         return self._mcp_tool
+    def add_callback(self, event_type: str, callback: Callable[..., Awaitable[Any]]) -> None:
+        """Register a callback function for specific event
+        Args:
+            event_type: (Required) Specific event name to trigger callback
+                        e.g. "after_click", "before_navigate"
+            callback: (Required) Async function to call. Must be defined by `async def f(...)`
+        """
+        if event_type not in self._callbacks:
+            self._callbacks[event_type] = []
+        self._callbacks[event_type].append(callback)
+    def remove_callback(self, event_type: str, callback: Callable[..., Awaitable[Any]]) -> None:
+        """Remove a registered callback
+        Args:
+            event_type: (Required) Specific event name to trigger callback
+                        e.g. "after_click", "before_navigate"
+            callback: (Required) Function to remove from callback list.
+        """
+        if (event_type in self._callbacks) and (callback in self._callbacks[event_type]):
+            self._callbacks[event_type].remove(callback)
+    async def _trigger_callbacks(self, event_type: str, **kwargs: Any) -> None:
+        """Trigger all registered callback functions of an event type"""
+        callback_list = self._callbacks.get(event_type, [])
+        for callback in callback_list:
+            try:
+                await callback(**kwargs)
+            except Exception as e:
+                logger.warning("Callback failed for %s: %s", event_type, e)
 # Prefix for internal tool names
 _INTERNAL_PREFIX = "int_"
 class BaseHub(FastMCP):
-    """A composition-friendly FastMCP server that holds an internal tool dispatcher."""
+    """A composition-friendly FastMCP server that holds an internal tool dispatcher.
+    Note: BaseHub can be used standalone or to wrap existing routers. For the newer
+    FastAPI-like pattern, consider using HiddenRouter from hud.server instead.
+    """
     env: Any
@@ -129,6 +171,10 @@ class BaseHub(FastMCP):
             Optional long-lived environment object. Stored on the server
             instance (``layer.env``) and therefore available to every request
             via ``ctx.fastmcp.env``.
+        title:
+            Optional title for the dispatcher tool.
+        description:
+            Optional description for the dispatcher tool.
         meta:
             Metadata to include in MCP tool listing.
         """
@@ -370,8 +416,12 @@ class BaseHub(FastMCP):
                 }
     # Override _list_tools to hide internal tools when mounted
-    async def _list_tools(self) -> list[Tool]:
-        """Override _list_tools to hide internal tools when mounted."""
+    async def _list_tools(self, context: Any = None) -> list[Tool]:
+        """Override _list_tools to hide internal tools when mounted.
+        Args:
+            context: MiddlewareContext passed by FastMCP (optional for backwards compat)
+        """
         return [
             tool
             for key, tool in self._tool_manager._tools.items()

hud/tools/bash.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import asyncio
 import os
 import sys
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 from .base import BaseTool
 from .types import ContentResult, ToolError
@@ -140,7 +140,7 @@ class BashTool(BaseTool):
         self.env = value
     async def __call__(
-        self, command: str | None = None, restart: bool = False, **kwargs: Any
+        self, command: str | None = None, restart: bool = False
     ) -> list[ContentBlock]:
         if restart:
             if self.session:

hud/tools/computer/__init__.py CHANGED Viewed

@@ -3,13 +3,17 @@
 from __future__ import annotations
 from .anthropic import AnthropicComputerTool
+from .gemini import GeminiComputerTool
 from .hud import HudComputerTool
 from .openai import OpenAIComputerTool
+from .qwen import QwenComputerTool
 from .settings import computer_settings
 __all__ = [
     "AnthropicComputerTool",
+    "GeminiComputerTool",
     "HudComputerTool",
     "OpenAIComputerTool",
+    "QwenComputerTool",
     "computer_settings",
 ]

hud/tools/computer/anthropic.py CHANGED Viewed

@@ -141,13 +141,13 @@ class AnthropicComputerTool(HudComputerTool):
     async def __call__(
         self,
         action: str = Field(..., description="The action to perform on the computer"),
-        coordinate: list[int] | tuple[int, int] | None = Field(
+        coordinate: list[int] | None = Field(
             None, description="The coordinate to interact with on the computer [x, y]"
         ),
         text: str | None = Field(
             None, description="The text to type on the computer or key to press"
         ),
-        start_coordinate: list[int] | tuple[int, int] | None = Field(
+        start_coordinate: list[int] | None = Field(
             None, description="The starting coordinate for drag actions [x, y]"
         ),
         scroll_direction: str | None = Field(

hud/tools/computer/gemini.py ADDED Viewed

@@ -0,0 +1,385 @@
+from __future__ import annotations
+import logging
+import platform
+from typing import TYPE_CHECKING, Any, Literal
+from mcp import ErrorData, McpError
+from mcp.types import INVALID_PARAMS, ContentBlock
+from pydantic import Field
+from hud.tools.types import ContentResult
+from .hud import HudComputerTool
+from .settings import computer_settings
+if TYPE_CHECKING:
+    from hud.tools.executors.base import BaseExecutor
+logger = logging.getLogger(__name__)
+ACTION_FIELD = Field(..., description="Gemini Computer Use action to perform")
+X_FIELD = Field(None, description="X coordinate (pixels in agent space)")
+Y_FIELD = Field(None, description="Y coordinate (pixels in agent space)")
+TEXT_FIELD = Field(None, description="Text to type")
+PRESS_ENTER_FIELD = Field(None, description="Whether to press Enter after typing (type_text_at)")
+CLEAR_BEFORE_TYPING_FIELD = Field(
+    None, description="Whether to select-all before typing (type_text_at)"
+)
+DIRECTION_FIELD = Field(None, description="Scroll direction for scroll_document/scroll_at")
+MAGNITUDE_FIELD = Field(None, description="Scroll magnitude (pixels in agent space)")
+URL_FIELD = Field(None, description="Target URL for navigate")
+KEYS_FIELD = Field(None, description="Keys for key_combination")
+DESTINATION_X_FIELD = Field(None, description="Destination X for drag_and_drop (agent space)")
+DESTINATION_Y_FIELD = Field(None, description="Destination Y for drag_and_drop (agent space)")
+TAKE_SCREENSHOT_ON_CLICK_FIELD = Field(
+    True, description="Whether to include a screenshot for interactive actions"
+)
+class GeminiComputerTool(HudComputerTool):
+    """
+    Gemini Computer Use tool for interacting with a computer via MCP.
+    Maps Gemini's predefined function names (open_web_browser, click_at, hover_at,
+    type_text_at, scroll_document, scroll_at, wait_5_seconds, go_back, go_forward,
+    search, navigate, key_combination, drag_and_drop) to executor actions.
+    """
+    def __init__(
+        self,
+        # Define within environment based on platform
+        executor: BaseExecutor | None = None,
+        platform_type: Literal["auto", "xdo", "pyautogui"] = "auto",
+        display_num: int | None = None,
+        # Overrides for what dimensions the agent thinks it operates in
+        width: int = computer_settings.GEMINI_COMPUTER_WIDTH,
+        height: int = computer_settings.GEMINI_COMPUTER_HEIGHT,
+        rescale_images: bool = computer_settings.GEMINI_RESCALE_IMAGES,
+        # What the agent sees as the tool's name, title, and description
+        name: str | None = None,
+        title: str | None = None,
+        description: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Initialize with Gemini's default dimensions.
+        """
+        super().__init__(
+            executor=executor,
+            platform_type=platform_type,
+            display_num=display_num,
+            width=width,
+            height=height,
+            rescale_images=rescale_images,
+            name=name or "gemini_computer",
+            title=title or "Gemini Computer Tool",
+            description=description or "Control computer with mouse, keyboard, and screenshots",
+            **kwargs,
+        )
+    async def __call__(
+        self,
+        action: str = ACTION_FIELD,
+        # Common coordinates
+        x: int | None = X_FIELD,
+        y: int | None = Y_FIELD,
+        # Text input
+        text: str | None = TEXT_FIELD,
+        press_enter: bool | None = PRESS_ENTER_FIELD,
+        clear_before_typing: bool | None = CLEAR_BEFORE_TYPING_FIELD,
+        # Scroll parameters
+        direction: Literal["up", "down", "left", "right"] | None = DIRECTION_FIELD,
+        magnitude: int | None = MAGNITUDE_FIELD,
+        # Navigation
+        url: str | None = URL_FIELD,
+        # Key combos
+        keys: list[str] | str | None = KEYS_FIELD,
+        # Drag parameters
+        destination_x: int | None = DESTINATION_X_FIELD,
+        destination_y: int | None = DESTINATION_Y_FIELD,
+        # Behavior
+        take_screenshot_on_click: bool = TAKE_SCREENSHOT_ON_CLICK_FIELD,
+    ) -> list[ContentBlock]:
+        """
+        Handle Gemini Computer Use API calls by mapping to executor actions.
+        Returns:
+            List of MCP content blocks
+        """
+        logger.info("GeminiComputerTool received action: %s", action)
+        # Helper to finalize ContentResult: rescale if requested and ensure URL metadata
+        async def _finalize(
+            result: ContentResult, requested_url: str | None = None
+        ) -> list[ContentBlock]:
+            if result.base64_image and self.rescale_images:
+                try:
+                    result.base64_image = await self._rescale_screenshot(result.base64_image)
+                except Exception as e:
+                    logger.warning("Failed to rescale screenshot: %s", e)
+            # Always include URL metadata if provided; otherwise default to about:blank
+            result.url = requested_url or result.url or "about:blank"
+            return result.to_content_blocks()
+        # Scale coordinates helper
+        def _scale(xv: int | None, yv: int | None) -> tuple[int | None, int | None]:
+            return self._scale_coordinates(xv, yv)
+        # Gemini emits coordinates/magnitudes in a 0-1000 normalized space.
+        def _denormalize(value: float | None, axis: Literal["x", "y"]) -> int | None:
+            if value is None:
+                return None
+            try:
+                numeric = float(value)
+            except (TypeError, ValueError):
+                try:
+                    return int(value)  # type: ignore[arg-type]
+                except (TypeError, ValueError):
+                    return None
+            # Treat values within the normalized range (including defaults like 800).
+            if 0 <= numeric <= 1000:
+                target = self.width if axis == "x" else self.height
+                numeric = numeric / 1000 * target
+            return round(numeric)
+        def _scale_distance(value: int | None, axis: Literal["x", "y"]) -> int | None:
+            if value is None:
+                return None
+            scale = self.scale_x if axis == "x" else self.scale_y
+            if scale != 1.0:
+                return round(value / scale)
+            return value
+        # Map actions
+        if action == "open_web_browser":
+            screenshot = await self.executor.screenshot()
+            if screenshot:
+                result = ContentResult(base64_image=screenshot, url="about:blank")
+            else:
+                result = ContentResult(error="Failed to take screenshot", url="about:blank")
+            return await _finalize(result)
+        elif action == "click_at":
+            if x is None or y is None:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="x and y are required"))
+            dx = _denormalize(x, "x")
+            dy = _denormalize(y, "y")
+            sx, sy = _scale(dx, dy)
+            result = await self.executor.click(x=sx, y=sy)
+            return await _finalize(result)
+        elif action == "hover_at":
+            if x is None or y is None:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="x and y are required"))
+            dx = _denormalize(x, "x")
+            dy = _denormalize(y, "y")
+            sx, sy = _scale(dx, dy)
+            result = await self.executor.move(x=sx, y=sy)
+            return await _finalize(result)
+        elif action == "type_text_at":
+            if x is None or y is None:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="x and y are required"))
+            if text is None:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="text is required"))
+            dx = _denormalize(x, "x")
+            dy = _denormalize(y, "y")
+            sx, sy = _scale(dx, dy)
+            # Focus the field
+            await self.executor.move(x=sx, y=sy, take_screenshot=False)
+            await self.executor.click(x=sx, y=sy, take_screenshot=False)
+            # Clear existing text if requested
+            if clear_before_typing is None or clear_before_typing:
+                is_mac = platform.system().lower() == "darwin"
+                combo = ["cmd", "a"] if is_mac else ["ctrl", "a"]
+                await self.executor.press(keys=combo, take_screenshot=False)
+                delete_key = "backspace" if is_mac else "delete"
+                await self.executor.press(keys=[delete_key], take_screenshot=False)
+            # Type (optionally press enter after)
+            result = await self.executor.write(text=text, enter_after=bool(press_enter))
+            return await _finalize(result)
+        elif action == "scroll_document":
+            if direction is None:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="direction is required"))
+            # Default magnitude similar to reference implementation
+            mag = magnitude if magnitude is not None else 800
+            # Convert to environment units while preserving sign
+            if direction in ("down", "up"):
+                distance = _denormalize(mag, "y")
+                if distance is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS, message="Unable to determine scroll magnitude"
+                        )
+                    )
+                distance = _scale_distance(distance, "y")
+                if distance is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS,
+                            message="Unable to determine scroll magnitude",
+                        )
+                    )
+                scroll_y = distance if direction == "down" else -distance
+                scroll_x = None
+            elif direction in ("right", "left"):
+                distance = _denormalize(mag, "x")
+                if distance is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS, message="Unable to determine scroll magnitude"
+                        )
+                    )
+                distance = _scale_distance(distance, "x")
+                if distance is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS,
+                            message="Unable to determine scroll magnitude",
+                        )
+                    )
+                scroll_x = distance if direction == "right" else -distance
+                scroll_y = None
+            else:
+                raise McpError(
+                    ErrorData(code=INVALID_PARAMS, message=f"Invalid direction: {direction}")
+                )
+            result = await self.executor.scroll(scroll_x=scroll_x, scroll_y=scroll_y)
+            return await _finalize(result)
+        elif action == "scroll_at":
+            if direction is None:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="direction is required"))
+            if x is None or y is None:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="x and y are required"))
+            mag = magnitude if magnitude is not None else 800
+            dx = _denormalize(x, "x")
+            dy = _denormalize(y, "y")
+            sx, sy = _scale(dx, dy)
+            if direction in ("down", "up"):
+                distance = _denormalize(mag, "y")
+                if distance is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS, message="Unable to determine scroll magnitude"
+                        )
+                    )
+                distance = _scale_distance(distance, "y")
+                if distance is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS,
+                            message="Unable to determine scroll magnitude",
+                        )
+                    )
+                scroll_y = distance if direction == "down" else -distance
+                scroll_x = None
+            elif direction in ("right", "left"):
+                distance = _denormalize(mag, "x")
+                if distance is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS, message="Unable to determine scroll magnitude"
+                        )
+                    )
+                distance = _scale_distance(distance, "x")
+                if distance is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS,
+                            message="Unable to determine scroll magnitude",
+                        )
+                    )
+                scroll_x = distance if direction == "right" else -distance
+                scroll_y = None
+            else:
+                raise McpError(
+                    ErrorData(code=INVALID_PARAMS, message=f"Invalid direction: {direction}")
+                )
+            result = await self.executor.scroll(x=sx, y=sy, scroll_x=scroll_x, scroll_y=scroll_y)
+            return await _finalize(result)
+        elif action == "wait_5_seconds":
+            result = await self.executor.wait(time=5000)
+            return await _finalize(result)
+        elif action == "go_back":
+            is_mac = platform.system().lower() == "darwin"
+            combo = ["cmd", "["] if is_mac else ["alt", "left"]
+            result = await self.executor.press(keys=combo)
+            return await _finalize(result)
+        elif action == "go_forward":
+            is_mac = platform.system().lower() == "darwin"
+            combo = ["cmd", "]"] if is_mac else ["alt", "right"]
+            result = await self.executor.press(keys=combo)
+            return await _finalize(result)
+        elif action == "search":
+            # Best-effort navigate to a default search page
+            target = url or "https://www.google.com"
+            is_mac = platform.system().lower() == "darwin"
+            await self.executor.press(
+                keys=["cmd", "l"] if is_mac else ["ctrl", "l"], take_screenshot=False
+            )
+            result = await self.executor.write(text=target, enter_after=True)
+            return await _finalize(result, requested_url=target)
+        elif action == "navigate":
+            if not url:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="url is required"))
+            is_mac = platform.system().lower() == "darwin"
+            await self.executor.press(
+                keys=["cmd", "l"] if is_mac else ["ctrl", "l"], take_screenshot=False
+            )
+            result = await self.executor.write(text=url, enter_after=True)
+            return await _finalize(result, requested_url=url)
+        elif action == "key_combination":
+            if keys is None:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="keys is required"))
+            if isinstance(keys, str):
+                # Accept formats like "ctrl+c" or "ctrl+shift+t"
+                key_list = [k.strip() for k in keys.split("+") if k.strip()]
+            else:
+                key_list = keys
+            result = await self.executor.press(keys=key_list)
+            return await _finalize(result)
+        elif action == "drag_and_drop":
+            if x is None or y is None or destination_x is None or destination_y is None:
+                raise McpError(
+                    ErrorData(
+                        code=INVALID_PARAMS,
+                        message="x, y, destination_x, and destination_y are required",
+                    )
+                )
+            sx_norm = _denormalize(x, "x")
+            sy_norm = _denormalize(y, "y")
+            dx_norm = _denormalize(destination_x, "x")
+            dy_norm = _denormalize(destination_y, "y")
+            sx, sy = _scale(sx_norm, sy_norm)
+            dx_scaled, dy_scaled = _scale(dx_norm, dy_norm)
+            # Build a two-point path
+            path = []  # type: list[tuple[int, int]]
+            if (
+                sx is not None
+                and sy is not None
+                and dx_scaled is not None
+                and dy_scaled is not None
+            ):
+                path = [(sx, sy), (dx_scaled, dy_scaled)]
+            result = await self.executor.drag(path=path)
+            return await _finalize(result)
+        else:
+            raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))

hud/tools/computer/hud.py CHANGED Viewed

@@ -13,7 +13,7 @@ from hud.tools.base import BaseTool
 from hud.tools.executors.base import BaseExecutor
 from hud.tools.executors.pyautogui import PyAutoGUIExecutor
 from hud.tools.executors.xdo import XDOExecutor
-from hud.tools.types import ContentResult, ToolError
+from hud.tools.types import ContentResult, Coordinate, ToolError
 from .settings import computer_settings
@@ -231,7 +231,23 @@ class HudComputerTool(BaseTool):
     async def __call__(
         self,
-        action: str = Field(..., description="The action name (click, press, write, move, etc.)"),
+        action: Literal[
+            "click",
+            "press",
+            "keydown",
+            "keyup",
+            "write",
+            "scroll",
+            "move",
+            "wait",
+            "drag",
+            "response",
+            "screenshot",
+            "position",
+            "hold_key",
+            "mouse_down",
+            "mouse_up",
+        ] = Field(..., description="The action name (click, press, write, move, etc.)"),
         # Click parameters
         x: int | None = Field(None, description="X coordinate for click/move/scroll actions"),
         y: int | None = Field(None, description="Y coordinate for click/move/scroll actions"),
@@ -254,8 +270,8 @@ class HudComputerTool(BaseTool):
         offset_x: int | None = Field(None, description="X offset for relative move"),
         offset_y: int | None = Field(None, description="Y offset for relative move"),
         # Drag parameters
-        path: list[tuple[int, int]] | None = Field(
-            None, description="Path for drag actions as list of (x, y) coordinates"
+        path: list[Coordinate] | None = Field(
+            None, description="Path for drag actions as list of {x, y} coordinates"
         ),
         # Wait parameter
         time: int | None = Field(None, description="Time in milliseconds for wait action"),
@@ -332,8 +348,9 @@ class HudComputerTool(BaseTool):
             elif action == "drag":
                 if path is None:
                     raise ToolError("path parameter is required for drag")
-                # Scale path from client space to screen space
-                scaled_path = self._scale_path(path)
+                # Convert Coordinate objects to tuples and scale from client space to screen space
+                path_tuples = [(point.x, point.y) for point in path]
+                scaled_path = self._scale_path(path_tuples)
                 result = await self.executor.drag(
                     path=scaled_path, pattern=pattern, hold_keys=hold_keys
                 )

hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl