PyPI - hud-python - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

hud-python 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (130) hide show

hud/__init__.py +22 -22
hud/agents/__init__.py +13 -15
hud/agents/base.py +599 -599
hud/agents/claude.py +373 -373
hud/agents/langchain.py +261 -250
hud/agents/misc/__init__.py +7 -7
hud/agents/misc/response_agent.py +82 -80
hud/agents/openai.py +352 -352
hud/agents/openai_chat_generic.py +154 -154
hud/agents/tests/__init__.py +1 -1
hud/agents/tests/test_base.py +742 -742
hud/agents/tests/test_claude.py +324 -324
hud/agents/tests/test_client.py +363 -363
hud/agents/tests/test_openai.py +237 -237
hud/cli/__init__.py +617 -617
hud/cli/__main__.py +8 -8
hud/cli/analyze.py +371 -371
hud/cli/analyze_metadata.py +230 -230
hud/cli/build.py +498 -427
hud/cli/clone.py +185 -185
hud/cli/cursor.py +92 -92
hud/cli/debug.py +392 -392
hud/cli/docker_utils.py +83 -83
hud/cli/init.py +280 -281
hud/cli/interactive.py +353 -353
hud/cli/mcp_server.py +764 -756
hud/cli/pull.py +330 -336
hud/cli/push.py +404 -370
hud/cli/remote_runner.py +311 -311
hud/cli/runner.py +160 -160
hud/cli/tests/__init__.py +3 -3
hud/cli/tests/test_analyze.py +284 -284
hud/cli/tests/test_cli_init.py +265 -265
hud/cli/tests/test_cli_main.py +27 -27
hud/cli/tests/test_clone.py +142 -142
hud/cli/tests/test_cursor.py +253 -253
hud/cli/tests/test_debug.py +453 -453
hud/cli/tests/test_mcp_server.py +139 -139
hud/cli/tests/test_utils.py +388 -388
hud/cli/utils.py +263 -263
hud/clients/README.md +143 -143
hud/clients/__init__.py +16 -16
hud/clients/base.py +378 -379
hud/clients/fastmcp.py +222 -222
hud/clients/mcp_use.py +298 -278
hud/clients/tests/__init__.py +1 -1
hud/clients/tests/test_client_integration.py +111 -111
hud/clients/tests/test_fastmcp.py +342 -342
hud/clients/tests/test_protocol.py +188 -188
hud/clients/utils/__init__.py +1 -1
hud/clients/utils/retry_transport.py +160 -160
hud/datasets.py +327 -322
hud/misc/__init__.py +1 -1
hud/misc/claude_plays_pokemon.py +292 -292
hud/otel/__init__.py +35 -35
hud/otel/collector.py +142 -142
hud/otel/config.py +164 -164
hud/otel/context.py +536 -536
hud/otel/exporters.py +366 -366
hud/otel/instrumentation.py +97 -97
hud/otel/processors.py +118 -118
hud/otel/tests/__init__.py +1 -1
hud/otel/tests/test_processors.py +197 -197
hud/server/__init__.py +5 -5
hud/server/context.py +114 -114
hud/server/helper/__init__.py +5 -5
hud/server/low_level.py +132 -132
hud/server/server.py +170 -166
hud/server/tests/__init__.py +3 -3
hud/settings.py +73 -73
hud/shared/__init__.py +5 -5
hud/shared/exceptions.py +180 -180
hud/shared/requests.py +264 -264
hud/shared/tests/test_exceptions.py +157 -157
hud/shared/tests/test_requests.py +275 -275
hud/telemetry/__init__.py +25 -25
hud/telemetry/instrument.py +379 -379
hud/telemetry/job.py +309 -309
hud/telemetry/replay.py +74 -74
hud/telemetry/trace.py +83 -83
hud/tools/__init__.py +33 -33
hud/tools/base.py +365 -365
hud/tools/bash.py +161 -161
hud/tools/computer/__init__.py +15 -15
hud/tools/computer/anthropic.py +437 -437
hud/tools/computer/hud.py +376 -376
hud/tools/computer/openai.py +295 -295
hud/tools/computer/settings.py +82 -82
hud/tools/edit.py +314 -314
hud/tools/executors/__init__.py +30 -30
hud/tools/executors/base.py +539 -539
hud/tools/executors/pyautogui.py +621 -621
hud/tools/executors/tests/__init__.py +1 -1
hud/tools/executors/tests/test_base_executor.py +338 -338
hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
hud/tools/executors/xdo.py +511 -511
hud/tools/playwright.py +412 -412
hud/tools/tests/__init__.py +3 -3
hud/tools/tests/test_base.py +282 -282
hud/tools/tests/test_bash.py +158 -158
hud/tools/tests/test_bash_extended.py +197 -197
hud/tools/tests/test_computer.py +425 -425
hud/tools/tests/test_computer_actions.py +34 -34
hud/tools/tests/test_edit.py +259 -259
hud/tools/tests/test_init.py +27 -27
hud/tools/tests/test_playwright_tool.py +183 -183
hud/tools/tests/test_tools.py +145 -145
hud/tools/tests/test_utils.py +156 -156
hud/tools/types.py +72 -72
hud/tools/utils.py +50 -50
hud/types.py +136 -136
hud/utils/__init__.py +10 -10
hud/utils/async_utils.py +65 -65
hud/utils/design.py +236 -168
hud/utils/mcp.py +55 -55
hud/utils/progress.py +149 -149
hud/utils/telemetry.py +66 -66
hud/utils/tests/test_async_utils.py +173 -173
hud/utils/tests/test_init.py +17 -17
hud/utils/tests/test_progress.py +261 -261
hud/utils/tests/test_telemetry.py +82 -82
hud/utils/tests/test_version.py +8 -8
hud/version.py +7 -7
{hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
hud_python-0.4.3.dist-info/RECORD +131 -0
{hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
hud/agents/art.py +0 -101
hud_python-0.4.1.dist-info/RECORD +0 -132
{hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
{hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0

hud/tools/computer/hud.py CHANGED Viewed

@@ -1,376 +1,376 @@
-# flake8: noqa: B008
-from __future__ import annotations
-import logging
-import platform
-from typing import Literal
-from mcp import ErrorData, McpError
-from mcp.types import INVALID_PARAMS, ContentBlock, TextContent
-from pydantic import Field
-from hud.tools.base import BaseTool
-from hud.tools.executors.base import BaseExecutor
-from hud.tools.executors.pyautogui import PyAutoGUIExecutor
-from hud.tools.executors.xdo import XDOExecutor
-from hud.tools.types import ContentResult, ToolError
-from .settings import computer_settings
-logger = logging.getLogger(__name__)
-class HudComputerTool(BaseTool):
-    """
-    A tool that allows the agent to control the computer.
-    """
-    def __init__(
-        self,
-        # Define within environment based on platform
-        executor: BaseExecutor | None = None,
-        platform_type: Literal["auto", "xdo", "pyautogui"] = "auto",
-        display_num: int | None = None,
-        # Overrides for what dimensions the agent thinks it operates in
-        # Define per subclass (e.g., Anthropic, OpenAI)
-        width: int | None = computer_settings.HUD_COMPUTER_WIDTH,
-        height: int | None = computer_settings.HUD_COMPUTER_HEIGHT,
-        rescale_images: bool = computer_settings.HUD_RESCALE_IMAGES,
-        # What the agent sees as the tool's name, title, and description
-        name: str | None = None,
-        title: str | None = None,
-        description: str | None = None,
-    ) -> None:
-        """
-        Initialize the HUD computer tool.
-        Args:
-            executor: Executor to use for the tool
-            platform_type: Which executor to use if executor not provided:
-                - "auto": Automatically detect based on platform
-                - "xdo": Use XDOExecutor (Linux/X11 only)
-                - "pyautogui": Use PyAutoGUIExecutor (cross-platform)
-            display_num: X display number
-            width: Target width for rescaling (None = use environment width)
-            height: Target height for rescaling (None = use environment height)
-            rescale_images: If True, rescale screenshots. If False, only rescale action coordinates
-            name: Tool name for MCP registration (auto-generated from class name if not provided)
-            title: Human-readable display name for the tool (auto-generated from class name)
-            description: Tool description (auto-generated from docstring if not provided)
-        """
-        # Initialize base tool with executor as env
-        super().__init__(
-            env=executor,
-            name=name or "computer",
-            title=title or "Computer Control",
-            description=description or "Control computer with mouse, keyboard, and screenshots",
-        )
-        # This is the width and height the agent thinks it operates in
-        # By default, use subclass's width and height
-        # If specifically set to None, use environment width and height
-        self.width = width or computer_settings.DISPLAY_WIDTH
-        self.height = height or computer_settings.DISPLAY_HEIGHT
-        # This is the static width and height of the environment screen
-        # And the width and height of the screenshots taken by the tool
-        self.environment_width = computer_settings.DISPLAY_WIDTH
-        self.environment_height = computer_settings.DISPLAY_HEIGHT
-        # Some APIs rescale screenshots automatically to the agent's width and height, some don't
-        # Defined per subclass (e.g., Anthropic, OpenAI)
-        # In case you need your agent to receive pre-formatted screenshots, set env variable True
-        self.rescale_images = rescale_images
-        logger.debug(
-            "Agent Screen Width: %s, Agent Screen Height: %s",
-            self.width,
-            self.height,
-            "Environment Screen Width: %s, Environment Screen Height: %s",
-            self.environment_width,
-            self.environment_height,
-        )
-        # Calculate scaling factors from base screen size to target size
-        self.scale_x = self.width / self.environment_width
-        self.scale_y = self.height / self.environment_height
-        # Check if we need to scale
-        self.needs_scaling = min(self.scale_x, self.scale_y) != 1.0
-        # Use environment settings for display number
-        self.display_num = display_num or computer_settings.DISPLAY_NUM
-        logger.debug("Display number: %s", self.display_num)
-        # If no executor provided, create one based on platform
-        if self.env is None:
-            self._choose_executor(platform_type, self.display_num)
-    @property
-    def executor(self) -> BaseExecutor:
-        """Get the executor (alias for context)."""
-        return self.env
-    @executor.setter
-    def executor(self, value: BaseExecutor) -> None:
-        """Set the executor (alias for context)."""
-        self.env = value
-    def _choose_executor(
-        self,
-        platform_type: Literal["auto", "xdo", "pyautogui"],
-        display_num: int | None,
-    ) -> None:
-        """Choose executor based on platform_type."""
-        # Choose executor based on platform_type
-        if platform_type == "auto":
-            # Auto-detect based on platform
-            system = platform.system().lower()
-            if system == "linux":
-                # Try XDO first on Linux
-                if XDOExecutor.is_available():
-                    self.executor = XDOExecutor(display_num=display_num)
-                    logger.info("Using XDOExecutor")
-                elif PyAutoGUIExecutor.is_available():
-                    self.executor = PyAutoGUIExecutor(display_num=display_num)
-                    logger.info("Using PyAutoGUIExecutor")
-                else:
-                    self.executor = BaseExecutor(display_num=display_num)
-                    logger.info("No display available, using BaseExecutor (simulation mode)")
-            else:
-                # Windows/macOS - try PyAutoGUI
-                if PyAutoGUIExecutor.is_available():
-                    self.executor = PyAutoGUIExecutor(display_num=display_num)
-                    logger.info("Using PyAutoGUIExecutor")
-                else:
-                    self.executor = BaseExecutor(display_num=display_num)
-                    logger.info("PyAutoGUI not available, using BaseExecutor (simulation mode)")
-        elif platform_type == "xdo":
-            if XDOExecutor.is_available():
-                self.executor = XDOExecutor(display_num=display_num)
-                logger.info("Using XDOExecutor")
-            else:
-                self.executor = BaseExecutor(display_num=display_num)
-                logger.warning("XDO not available, using BaseExecutor (simulation mode)")
-        elif platform_type == "pyautogui":
-            if PyAutoGUIExecutor.is_available():
-                self.executor = PyAutoGUIExecutor(display_num=display_num)
-                logger.info("Using PyAutoGUIExecutor")
-            else:
-                self.executor = BaseExecutor(display_num=display_num)
-                logger.warning("PyAutoGUI not available, using BaseExecutor (simulation mode)")
-        else:
-            raise ValueError(f"Invalid platform_type: {platform_type}")
-    def _scale_coordinates(self, x: int | None, y: int | None) -> tuple[int | None, int | None]:
-        """Scale coordinates from target space to screen space."""
-        if x is not None and self.scale_x != 1.0:
-            x = int(x / self.scale_x)
-        if y is not None and self.scale_y != 1.0:
-            y = int(y / self.scale_y)
-        return x, y
-    def _scale_path(self, path: list[tuple[int, int]]) -> list[tuple[int, int]]:
-        """Scale a path from target space to screen space."""
-        scaled_path = []
-        for x, y in path:
-            scaled_x, scaled_y = self._scale_coordinates(x, y)
-            if scaled_x is not None and scaled_y is not None:
-                scaled_path.append((scaled_x, scaled_y))
-        return scaled_path
-    async def _rescale_screenshot(self, screenshot_base64: str) -> str:
-        """Rescale a screenshot if rescale_images is True."""
-        if not self.rescale_images or not self.needs_scaling:
-            return screenshot_base64
-        try:
-            import base64
-            from io import BytesIO
-            from PIL import Image  # type: ignore[import-not-found]
-            # Decode base64 to image
-            image_data = base64.b64decode(screenshot_base64)
-            image = Image.open(BytesIO(image_data))
-            logger.info(
-                "Resizing screenshot from %s x %s to %s x %s",
-                image.width,
-                image.height,
-                self.width,
-                self.height,
-            )
-            # Resize to exact target dimensions
-            resized = image.resize((self.width, self.height), Image.Resampling.LANCZOS)
-            # Convert back to base64
-            buffer = BytesIO()
-            resized.save(buffer, format="PNG")
-            resized_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
-            return resized_base64
-        except Exception as e:
-            logger.warning("Failed to rescale screenshot: %s", e)
-            return screenshot_base64
-    async def __call__(
-        self,
-        action: str = Field(..., description="The action name (click, type, move, etc.)"),
-        # Click parameters
-        x: int | None = Field(None, description="X coordinate for click/move/scroll actions"),
-        y: int | None = Field(None, description="Y coordinate for click/move/scroll actions"),
-        button: Literal["left", "right", "middle", "back", "forward"] | None = Field(
-            None, description="Mouse button for click actions"
-        ),
-        pattern: list[int] | None = Field(
-            None, description="Click pattern for multi-clicks (e.g., [100] for double-click)"
-        ),
-        # Key/Type parameters
-        text: str | None = Field(None, description="Text for type/response actions"),
-        keys: list[str] | None = Field(None, description="Keys for press/keydown/keyup actions"),
-        enter_after: bool | None = Field(None, description="Whether to press Enter after typing"),
-        # Scroll parameters
-        scroll_x: int | None = Field(
-            None, description="Horizontal scroll amount (positive = right)"
-        ),
-        scroll_y: int | None = Field(None, description="Vertical scroll amount (positive = down)"),
-        # Move parameters
-        offset_x: int | None = Field(None, description="X offset for relative move"),
-        offset_y: int | None = Field(None, description="Y offset for relative move"),
-        # Drag parameters
-        path: list[tuple[int, int]] | None = Field(
-            None, description="Path for drag actions as list of (x, y) coordinates"
-        ),
-        # Wait parameter
-        time: int | None = Field(None, description="Time in milliseconds for wait action"),
-        # General parameters
-        hold_keys: list[str] | None = Field(None, description="Keys to hold during action"),
-        # hold_key specific
-        duration: float | None = Field(None, description="Duration in seconds for hold_key action"),
-    ) -> list[ContentBlock]:
-        """
-        Execute a computer control action by name.
-        Returns:
-            List of MCP content blocks
-        """
-        logger.info("HudComputerTool executing action: %s", action)
-        try:
-            # Delegate to executor based on action
-            if action == "click":
-                # Scale coordinates from client space to screen space
-                scaled_x, scaled_y = self._scale_coordinates(x, y)
-                result = await self.executor.click(
-                    x=scaled_x,
-                    y=scaled_y,
-                    button=button or "left",
-                    pattern=pattern,
-                    hold_keys=hold_keys,
-                )
-            elif action == "press":
-                if keys is None:
-                    raise ToolError("keys parameter is required for press")
-                result = await self.executor.press(keys=keys)
-            elif action == "keydown":
-                if keys is None:
-                    raise ToolError("keys parameter is required for keydown")
-                result = await self.executor.keydown(keys=keys)
-            elif action == "keyup":
-                if keys is None:
-                    raise ToolError("keys parameter is required for keyup")
-                result = await self.executor.keyup(keys=keys)
-            elif action == "type":
-                if text is None:
-                    raise ToolError("text parameter is required for type")
-                result = await self.executor.write(text=text, enter_after=enter_after or False)
-            elif action == "scroll":
-                # Scale coordinates from client space to screen space
-                scaled_x, scaled_y = self._scale_coordinates(x, y)
-                result = await self.executor.scroll(
-                    x=scaled_x,
-                    y=scaled_y,
-                    scroll_x=scroll_x,
-                    scroll_y=scroll_y,
-                    hold_keys=hold_keys,
-                )
-            elif action == "move":
-                # Scale coordinates from client space to screen space
-                scaled_x, scaled_y = self._scale_coordinates(x, y)
-                scaled_offset_x, scaled_offset_y = self._scale_coordinates(offset_x, offset_y)
-                result = await self.executor.move(
-                    x=scaled_x, y=scaled_y, offset_x=scaled_offset_x, offset_y=scaled_offset_y
-                )
-            elif action == "wait":
-                if time is None:
-                    raise ToolError("time parameter is required for wait")
-                result = await self.executor.wait(time=time)
-            elif action == "drag":
-                if path is None:
-                    raise ToolError("path parameter is required for drag")
-                # Scale path from client space to screen space
-                scaled_path = self._scale_path(path)
-                result = await self.executor.drag(
-                    path=scaled_path, pattern=pattern, hold_keys=hold_keys
-                )
-            elif action == "response":
-                if text is None:
-                    raise ToolError("text parameter is required for response")
-                return [TextContent(text=text, type="text")]
-            elif action == "screenshot":
-                screenshot = await self.executor.screenshot()
-                if screenshot:
-                    # Rescale screenshot if requested
-                    screenshot = await self._rescale_screenshot(screenshot)
-                    result = ContentResult(base64_image=screenshot)
-                else:
-                    result = ContentResult(error="Failed to take screenshot")
-            elif action == "position":
-                result = await self.executor.position()
-            elif action == "hold_key":
-                if text is None:
-                    raise ToolError("text parameter is required for hold_key")
-                if duration is None:
-                    raise ToolError("duration parameter is required for hold_key")
-                result = await self.executor.hold_key(key=text, duration=duration)
-            elif action == "mouse_down":
-                result = await self.executor.mouse_down(button=button or "left")
-            elif action == "mouse_up":
-                result = await self.executor.mouse_up(button=button or "left")
-            else:
-                raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
-            # Rescale screenshot in result if present
-            if isinstance(result, ContentResult) and result.base64_image and self.rescale_images:
-                rescaled_image = await self._rescale_screenshot(result.base64_image)
-                result.base64_image = rescaled_image
-            # Convert result to content blocks
-            return result.to_content_blocks()
-        except TypeError as e:
-            raise McpError(
-                ErrorData(code=INVALID_PARAMS, message=f"Invalid parameters for {action}: {e!s}")
-            ) from e
+# flake8: noqa: B008
+from __future__ import annotations
+import logging
+import platform
+from typing import Literal
+from mcp import ErrorData, McpError
+from mcp.types import INVALID_PARAMS, ContentBlock, TextContent
+from pydantic import Field
+from hud.tools.base import BaseTool
+from hud.tools.executors.base import BaseExecutor
+from hud.tools.executors.pyautogui import PyAutoGUIExecutor
+from hud.tools.executors.xdo import XDOExecutor
+from hud.tools.types import ContentResult, ToolError
+from .settings import computer_settings
+logger = logging.getLogger(__name__)
+class HudComputerTool(BaseTool):
+    """
+    A tool that allows the agent to control the computer.
+    """
+    def __init__(
+        self,
+        # Define within environment based on platform
+        executor: BaseExecutor | None = None,
+        platform_type: Literal["auto", "xdo", "pyautogui"] = "auto",
+        display_num: int | None = None,
+        # Overrides for what dimensions the agent thinks it operates in
+        # Define per subclass (e.g., Anthropic, OpenAI)
+        width: int | None = computer_settings.HUD_COMPUTER_WIDTH,
+        height: int | None = computer_settings.HUD_COMPUTER_HEIGHT,
+        rescale_images: bool = computer_settings.HUD_RESCALE_IMAGES,
+        # What the agent sees as the tool's name, title, and description
+        name: str | None = None,
+        title: str | None = None,
+        description: str | None = None,
+    ) -> None:
+        """
+        Initialize the HUD computer tool.
+        Args:
+            executor: Executor to use for the tool
+            platform_type: Which executor to use if executor not provided:
+                - "auto": Automatically detect based on platform
+                - "xdo": Use XDOExecutor (Linux/X11 only)
+                - "pyautogui": Use PyAutoGUIExecutor (cross-platform)
+            display_num: X display number
+            width: Target width for rescaling (None = use environment width)
+            height: Target height for rescaling (None = use environment height)
+            rescale_images: If True, rescale screenshots. If False, only rescale action coordinates
+            name: Tool name for MCP registration (auto-generated from class name if not provided)
+            title: Human-readable display name for the tool (auto-generated from class name)
+            description: Tool description (auto-generated from docstring if not provided)
+        """
+        # Initialize base tool with executor as env
+        super().__init__(
+            env=executor,
+            name=name or "computer",
+            title=title or "Computer Control",
+            description=description or "Control computer with mouse, keyboard, and screenshots",
+        )
+        # This is the width and height the agent thinks it operates in
+        # By default, use subclass's width and height
+        # If specifically set to None, use environment width and height
+        self.width = width or computer_settings.DISPLAY_WIDTH
+        self.height = height or computer_settings.DISPLAY_HEIGHT
+        # This is the static width and height of the environment screen
+        # And the width and height of the screenshots taken by the tool
+        self.environment_width = computer_settings.DISPLAY_WIDTH
+        self.environment_height = computer_settings.DISPLAY_HEIGHT
+        # Some APIs rescale screenshots automatically to the agent's width and height, some don't
+        # Defined per subclass (e.g., Anthropic, OpenAI)
+        # In case you need your agent to receive pre-formatted screenshots, set env variable True
+        self.rescale_images = rescale_images
+        logger.debug(
+            "Agent Screen Width: %s, Agent Screen Height: %s",
+            self.width,
+            self.height,
+            "Environment Screen Width: %s, Environment Screen Height: %s",
+            self.environment_width,
+            self.environment_height,
+        )
+        # Calculate scaling factors from base screen size to target size
+        self.scale_x = self.width / self.environment_width
+        self.scale_y = self.height / self.environment_height
+        # Check if we need to scale
+        self.needs_scaling = min(self.scale_x, self.scale_y) != 1.0
+        # Use environment settings for display number
+        self.display_num = display_num or computer_settings.DISPLAY_NUM
+        logger.debug("Display number: %s", self.display_num)
+        # If no executor provided, create one based on platform
+        if self.env is None:
+            self._choose_executor(platform_type, self.display_num)
+    @property
+    def executor(self) -> BaseExecutor:
+        """Get the executor (alias for context)."""
+        return self.env
+    @executor.setter
+    def executor(self, value: BaseExecutor) -> None:
+        """Set the executor (alias for context)."""
+        self.env = value
+    def _choose_executor(
+        self,
+        platform_type: Literal["auto", "xdo", "pyautogui"],
+        display_num: int | None,
+    ) -> None:
+        """Choose executor based on platform_type."""
+        # Choose executor based on platform_type
+        if platform_type == "auto":
+            # Auto-detect based on platform
+            system = platform.system().lower()
+            if system == "linux":
+                # Try XDO first on Linux
+                if XDOExecutor.is_available():
+                    self.executor = XDOExecutor(display_num=display_num)
+                    logger.info("Using XDOExecutor")
+                elif PyAutoGUIExecutor.is_available():
+                    self.executor = PyAutoGUIExecutor(display_num=display_num)
+                    logger.info("Using PyAutoGUIExecutor")
+                else:
+                    self.executor = BaseExecutor(display_num=display_num)
+                    logger.info("No display available, using BaseExecutor (simulation mode)")
+            else:
+                # Windows/macOS - try PyAutoGUI
+                if PyAutoGUIExecutor.is_available():
+                    self.executor = PyAutoGUIExecutor(display_num=display_num)
+                    logger.info("Using PyAutoGUIExecutor")
+                else:
+                    self.executor = BaseExecutor(display_num=display_num)
+                    logger.info("PyAutoGUI not available, using BaseExecutor (simulation mode)")
+        elif platform_type == "xdo":
+            if XDOExecutor.is_available():
+                self.executor = XDOExecutor(display_num=display_num)
+                logger.info("Using XDOExecutor")
+            else:
+                self.executor = BaseExecutor(display_num=display_num)
+                logger.warning("XDO not available, using BaseExecutor (simulation mode)")
+        elif platform_type == "pyautogui":
+            if PyAutoGUIExecutor.is_available():
+                self.executor = PyAutoGUIExecutor(display_num=display_num)
+                logger.info("Using PyAutoGUIExecutor")
+            else:
+                self.executor = BaseExecutor(display_num=display_num)
+                logger.warning("PyAutoGUI not available, using BaseExecutor (simulation mode)")
+        else:
+            raise ValueError(f"Invalid platform_type: {platform_type}")
+    def _scale_coordinates(self, x: int | None, y: int | None) -> tuple[int | None, int | None]:
+        """Scale coordinates from target space to screen space."""
+        if x is not None and self.scale_x != 1.0:
+            x = int(x / self.scale_x)
+        if y is not None and self.scale_y != 1.0:
+            y = int(y / self.scale_y)
+        return x, y
+    def _scale_path(self, path: list[tuple[int, int]]) -> list[tuple[int, int]]:
+        """Scale a path from target space to screen space."""
+        scaled_path = []
+        for x, y in path:
+            scaled_x, scaled_y = self._scale_coordinates(x, y)
+            if scaled_x is not None and scaled_y is not None:
+                scaled_path.append((scaled_x, scaled_y))
+        return scaled_path
+    async def _rescale_screenshot(self, screenshot_base64: str) -> str:
+        """Rescale a screenshot if rescale_images is True."""
+        if not self.rescale_images or not self.needs_scaling:
+            return screenshot_base64
+        try:
+            import base64
+            from io import BytesIO
+            from PIL import Image  # type: ignore[import-not-found]
+            # Decode base64 to image
+            image_data = base64.b64decode(screenshot_base64)
+            image = Image.open(BytesIO(image_data))
+            logger.info(
+                "Resizing screenshot from %s x %s to %s x %s",
+                image.width,
+                image.height,
+                self.width,
+                self.height,
+            )
+            # Resize to exact target dimensions
+            resized = image.resize((self.width, self.height), Image.Resampling.LANCZOS)
+            # Convert back to base64
+            buffer = BytesIO()
+            resized.save(buffer, format="PNG")
+            resized_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
+            return resized_base64
+        except Exception as e:
+            logger.warning("Failed to rescale screenshot: %s", e)
+            return screenshot_base64
+    async def __call__(
+        self,
+        action: str = Field(..., description="The action name (click, type, move, etc.)"),
+        # Click parameters
+        x: int | None = Field(None, description="X coordinate for click/move/scroll actions"),
+        y: int | None = Field(None, description="Y coordinate for click/move/scroll actions"),
+        button: Literal["left", "right", "middle", "back", "forward"] | None = Field(
+            None, description="Mouse button for click actions"
+        ),
+        pattern: list[int] | None = Field(
+            None, description="Click pattern for multi-clicks (e.g., [100] for double-click)"
+        ),
+        # Key/Type parameters
+        text: str | None = Field(None, description="Text for type/response actions"),
+        keys: list[str] | None = Field(None, description="Keys for press/keydown/keyup actions"),
+        enter_after: bool | None = Field(None, description="Whether to press Enter after typing"),
+        # Scroll parameters
+        scroll_x: int | None = Field(
+            None, description="Horizontal scroll amount (positive = right)"
+        ),
+        scroll_y: int | None = Field(None, description="Vertical scroll amount (positive = down)"),
+        # Move parameters
+        offset_x: int | None = Field(None, description="X offset for relative move"),
+        offset_y: int | None = Field(None, description="Y offset for relative move"),
+        # Drag parameters
+        path: list[tuple[int, int]] | None = Field(
+            None, description="Path for drag actions as list of (x, y) coordinates"
+        ),
+        # Wait parameter
+        time: int | None = Field(None, description="Time in milliseconds for wait action"),
+        # General parameters
+        hold_keys: list[str] | None = Field(None, description="Keys to hold during action"),
+        # hold_key specific
+        duration: float | None = Field(None, description="Duration in seconds for hold_key action"),
+    ) -> list[ContentBlock]:
+        """
+        Execute a computer control action by name.
+        Returns:
+            List of MCP content blocks
+        """
+        logger.info("HudComputerTool executing action: %s", action)
+        try:
+            # Delegate to executor based on action
+            if action == "click":
+                # Scale coordinates from client space to screen space
+                scaled_x, scaled_y = self._scale_coordinates(x, y)
+                result = await self.executor.click(
+                    x=scaled_x,
+                    y=scaled_y,
+                    button=button or "left",
+                    pattern=pattern,
+                    hold_keys=hold_keys,
+                )
+            elif action == "press":
+                if keys is None:
+                    raise ToolError("keys parameter is required for press")
+                result = await self.executor.press(keys=keys)
+            elif action == "keydown":
+                if keys is None:
+                    raise ToolError("keys parameter is required for keydown")
+                result = await self.executor.keydown(keys=keys)
+            elif action == "keyup":
+                if keys is None:
+                    raise ToolError("keys parameter is required for keyup")
+                result = await self.executor.keyup(keys=keys)
+            elif action == "type":
+                if text is None:
+                    raise ToolError("text parameter is required for type")
+                result = await self.executor.write(text=text, enter_after=enter_after or False)
+            elif action == "scroll":
+                # Scale coordinates from client space to screen space
+                scaled_x, scaled_y = self._scale_coordinates(x, y)
+                result = await self.executor.scroll(
+                    x=scaled_x,
+                    y=scaled_y,
+                    scroll_x=scroll_x,
+                    scroll_y=scroll_y,
+                    hold_keys=hold_keys,
+                )
+            elif action == "move":
+                # Scale coordinates from client space to screen space
+                scaled_x, scaled_y = self._scale_coordinates(x, y)
+                scaled_offset_x, scaled_offset_y = self._scale_coordinates(offset_x, offset_y)
+                result = await self.executor.move(
+                    x=scaled_x, y=scaled_y, offset_x=scaled_offset_x, offset_y=scaled_offset_y
+                )
+            elif action == "wait":
+                if time is None:
+                    raise ToolError("time parameter is required for wait")
+                result = await self.executor.wait(time=time)
+            elif action == "drag":
+                if path is None:
+                    raise ToolError("path parameter is required for drag")
+                # Scale path from client space to screen space
+                scaled_path = self._scale_path(path)
+                result = await self.executor.drag(
+                    path=scaled_path, pattern=pattern, hold_keys=hold_keys
+                )
+            elif action == "response":
+                if text is None:
+                    raise ToolError("text parameter is required for response")
+                return [TextContent(text=text, type="text")]
+            elif action == "screenshot":
+                screenshot = await self.executor.screenshot()
+                if screenshot:
+                    # Rescale screenshot if requested
+                    screenshot = await self._rescale_screenshot(screenshot)
+                    result = ContentResult(base64_image=screenshot)
+                else:
+                    result = ContentResult(error="Failed to take screenshot")
+            elif action == "position":
+                result = await self.executor.position()
+            elif action == "hold_key":
+                if text is None:
+                    raise ToolError("text parameter is required for hold_key")
+                if duration is None:
+                    raise ToolError("duration parameter is required for hold_key")
+                result = await self.executor.hold_key(key=text, duration=duration)
+            elif action == "mouse_down":
+                result = await self.executor.mouse_down(button=button or "left")
+            elif action == "mouse_up":
+                result = await self.executor.mouse_up(button=button or "left")
+            else:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
+            # Rescale screenshot in result if present
+            if isinstance(result, ContentResult) and result.base64_image and self.rescale_images:
+                rescaled_image = await self._rescale_screenshot(result.base64_image)
+                result.base64_image = rescaled_image
+            # Convert result to content blocks
+            return result.to_content_blocks()
+        except TypeError as e:
+            raise McpError(
+                ErrorData(code=INVALID_PARAMS, message=f"Invalid parameters for {action}: {e!s}")
+            ) from e

hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl