PyPI - hud-python - Versions diffs - 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

hud-python 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show

hud/__init__.py +22 -89
hud/agents/__init__.py +17 -0
hud/agents/art.py +101 -0
hud/agents/base.py +599 -0
hud/{mcp → agents}/claude.py +373 -321
hud/{mcp → agents}/langchain.py +250 -250
hud/agents/misc/__init__.py +7 -0
hud/{agent → agents}/misc/response_agent.py +80 -80
hud/{mcp → agents}/openai.py +352 -334
hud/agents/openai_chat_generic.py +154 -0
hud/{mcp → agents}/tests/__init__.py +1 -1
hud/agents/tests/test_base.py +742 -0
hud/agents/tests/test_claude.py +324 -0
hud/{mcp → agents}/tests/test_client.py +363 -324
hud/{mcp → agents}/tests/test_openai.py +237 -238
hud/cli/__init__.py +617 -0
hud/cli/__main__.py +8 -0
hud/cli/analyze.py +371 -0
hud/cli/analyze_metadata.py +230 -0
hud/cli/build.py +427 -0
hud/cli/clone.py +185 -0
hud/cli/cursor.py +92 -0
hud/cli/debug.py +392 -0
hud/cli/docker_utils.py +83 -0
hud/cli/init.py +281 -0
hud/cli/interactive.py +353 -0
hud/cli/mcp_server.py +756 -0
hud/cli/pull.py +336 -0
hud/cli/push.py +379 -0
hud/cli/remote_runner.py +311 -0
hud/cli/runner.py +160 -0
hud/cli/tests/__init__.py +3 -0
hud/cli/tests/test_analyze.py +284 -0
hud/cli/tests/test_cli_init.py +265 -0
hud/cli/tests/test_cli_main.py +27 -0
hud/cli/tests/test_clone.py +142 -0
hud/cli/tests/test_cursor.py +253 -0
hud/cli/tests/test_debug.py +453 -0
hud/cli/tests/test_mcp_server.py +139 -0
hud/cli/tests/test_utils.py +388 -0
hud/cli/utils.py +263 -0
hud/clients/README.md +143 -0
hud/clients/__init__.py +16 -0
hud/clients/base.py +354 -0
hud/clients/fastmcp.py +202 -0
hud/clients/mcp_use.py +278 -0
hud/clients/tests/__init__.py +1 -0
hud/clients/tests/test_client_integration.py +111 -0
hud/clients/tests/test_fastmcp.py +342 -0
hud/clients/tests/test_protocol.py +188 -0
hud/clients/utils/__init__.py +1 -0
hud/clients/utils/retry_transport.py +160 -0
hud/datasets.py +322 -192
hud/misc/__init__.py +1 -0
hud/{agent → misc}/claude_plays_pokemon.py +292 -283
hud/otel/__init__.py +35 -0
hud/otel/collector.py +142 -0
hud/otel/config.py +164 -0
hud/otel/context.py +536 -0
hud/otel/exporters.py +366 -0
hud/otel/instrumentation.py +97 -0
hud/otel/processors.py +118 -0
hud/otel/tests/__init__.py +1 -0
hud/otel/tests/test_processors.py +197 -0
hud/server/__init__.py +5 -5
hud/server/context.py +114 -0
hud/server/helper/__init__.py +5 -0
hud/server/low_level.py +132 -0
hud/server/server.py +166 -0
hud/server/tests/__init__.py +3 -0
hud/settings.py +73 -79
hud/shared/__init__.py +5 -0
hud/{exceptions.py → shared/exceptions.py} +180 -180
hud/{server → shared}/requests.py +264 -264
hud/shared/tests/test_exceptions.py +157 -0
hud/{server → shared}/tests/test_requests.py +275 -275
hud/telemetry/__init__.py +25 -30
hud/telemetry/instrument.py +379 -0
hud/telemetry/job.py +309 -141
hud/telemetry/replay.py +74 -0
hud/telemetry/trace.py +83 -0
hud/tools/__init__.py +33 -34
hud/tools/base.py +365 -65
hud/tools/bash.py +161 -137
hud/tools/computer/__init__.py +15 -13
hud/tools/computer/anthropic.py +437 -414
hud/tools/computer/hud.py +376 -328
hud/tools/computer/openai.py +295 -286
hud/tools/computer/settings.py +82 -0
hud/tools/edit.py +314 -290
hud/tools/executors/__init__.py +30 -30
hud/tools/executors/base.py +539 -532
hud/tools/executors/pyautogui.py +621 -619
hud/tools/executors/tests/__init__.py +1 -1
hud/tools/executors/tests/test_base_executor.py +338 -338
hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
hud/tools/executors/xdo.py +511 -503
hud/tools/{playwright_tool.py → playwright.py} +412 -379
hud/tools/tests/__init__.py +3 -3
hud/tools/tests/test_base.py +282 -0
hud/tools/tests/test_bash.py +158 -152
hud/tools/tests/test_bash_extended.py +197 -0
hud/tools/tests/test_computer.py +425 -52
hud/tools/tests/test_computer_actions.py +34 -34
hud/tools/tests/test_edit.py +259 -240
hud/tools/tests/test_init.py +27 -27
hud/tools/tests/test_playwright_tool.py +183 -183
hud/tools/tests/test_tools.py +145 -157
hud/tools/tests/test_utils.py +156 -156
hud/tools/types.py +72 -0
hud/tools/utils.py +50 -50
hud/types.py +136 -89
hud/utils/__init__.py +10 -16
hud/utils/async_utils.py +65 -0
hud/utils/design.py +168 -0
hud/utils/mcp.py +55 -0
hud/utils/progress.py +149 -149
hud/utils/telemetry.py +66 -66
hud/utils/tests/test_async_utils.py +173 -0
hud/utils/tests/test_init.py +17 -21
hud/utils/tests/test_progress.py +261 -225
hud/utils/tests/test_telemetry.py +82 -37
hud/utils/tests/test_version.py +8 -8
hud/version.py +7 -7
hud_python-0.4.0.dist-info/METADATA +474 -0
hud_python-0.4.0.dist-info/RECORD +132 -0
hud_python-0.4.0.dist-info/entry_points.txt +3 -0
{hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
hud/adapters/__init__.py +0 -8
hud/adapters/claude/__init__.py +0 -5
hud/adapters/claude/adapter.py +0 -180
hud/adapters/claude/tests/__init__.py +0 -1
hud/adapters/claude/tests/test_adapter.py +0 -519
hud/adapters/common/__init__.py +0 -6
hud/adapters/common/adapter.py +0 -178
hud/adapters/common/tests/test_adapter.py +0 -289
hud/adapters/common/types.py +0 -446
hud/adapters/operator/__init__.py +0 -5
hud/adapters/operator/adapter.py +0 -108
hud/adapters/operator/tests/__init__.py +0 -1
hud/adapters/operator/tests/test_adapter.py +0 -370
hud/agent/__init__.py +0 -19
hud/agent/base.py +0 -126
hud/agent/claude.py +0 -271
hud/agent/langchain.py +0 -215
hud/agent/misc/__init__.py +0 -3
hud/agent/operator.py +0 -268
hud/agent/tests/__init__.py +0 -1
hud/agent/tests/test_base.py +0 -202
hud/env/__init__.py +0 -11
hud/env/client.py +0 -35
hud/env/docker_client.py +0 -349
hud/env/environment.py +0 -446
hud/env/local_docker_client.py +0 -358
hud/env/remote_client.py +0 -212
hud/env/remote_docker_client.py +0 -292
hud/gym.py +0 -130
hud/job.py +0 -773
hud/mcp/__init__.py +0 -17
hud/mcp/base.py +0 -631
hud/mcp/client.py +0 -312
hud/mcp/tests/test_base.py +0 -512
hud/mcp/tests/test_claude.py +0 -294
hud/task.py +0 -149
hud/taskset.py +0 -237
hud/telemetry/_trace.py +0 -347
hud/telemetry/context.py +0 -230
hud/telemetry/exporter.py +0 -575
hud/telemetry/instrumentation/__init__.py +0 -3
hud/telemetry/instrumentation/mcp.py +0 -259
hud/telemetry/instrumentation/registry.py +0 -59
hud/telemetry/mcp_models.py +0 -270
hud/telemetry/tests/__init__.py +0 -1
hud/telemetry/tests/test_context.py +0 -210
hud/telemetry/tests/test_trace.py +0 -312
hud/tools/helper/README.md +0 -56
hud/tools/helper/__init__.py +0 -9
hud/tools/helper/mcp_server.py +0 -78
hud/tools/helper/server_initialization.py +0 -115
hud/tools/helper/utils.py +0 -58
hud/trajectory.py +0 -94
hud/utils/agent.py +0 -37
hud/utils/common.py +0 -256
hud/utils/config.py +0 -120
hud/utils/deprecation.py +0 -115
hud/utils/misc.py +0 -53
hud/utils/tests/test_common.py +0 -277
hud/utils/tests/test_config.py +0 -129
hud_python-0.3.4.dist-info/METADATA +0 -284
hud_python-0.3.4.dist-info/RECORD +0 -120
/hud/{adapters/common → shared}/tests/__init__.py +0 -0
{hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0

hud/tools/computer/hud.py CHANGED Viewed

@@ -1,328 +1,376 @@
-# flake8: noqa: B008
-from __future__ import annotations
-import logging
-import platform
-from typing import Literal
-from mcp import ErrorData, McpError
-from mcp.types import INVALID_PARAMS, ImageContent, TextContent
-from pydantic import Field
-from hud.tools.base import ToolError, ToolResult, tool_result_to_content_blocks
-from hud.tools.executors.base import BaseExecutor
-from hud.tools.executors.pyautogui import PyAutoGUIExecutor
-from hud.tools.executors.xdo import XDOExecutor
-logger = logging.getLogger(__name__)
-BASE_SCREEN_WIDTH = 1920
-BASE_SCREEN_HEIGHT = 1080
-class HudComputerTool:
-    """
-    A tool that allows the agent to control the computer.
-    """
-    def __init__(
-        self,
-        width: int | None = None,
-        height: int | None = None,
-        display_num: int | None = None,
-        platform_type: Literal["auto", "xdo", "pyautogui"] = "auto",
-        custom_executor: BaseExecutor | None = None,
-        rescale_images: bool = False,
-    ) -> None:
-        """
-        Initialize the HUD computer tool.
-        Args:
-            width: Target width for rescaling (None = use actual screen width)
-            height: Target height for rescaling (None = use actual screen height)
-            display_num: X display number
-            platform_type: Which executor to use:
-                - "auto": Automatically detect based on platform
-                - "xdo": Use XDOExecutor (Linux/X11 only)
-                - "pyautogui": Use PyAutoGUIExecutor (cross-platform)
-            custom_executor: If None, executor class will be determined based on platform_type.
-            rescale_images: If True, rescale screenshots. If False, only rescale action coordinates
-        """
-        # Use provided dimensions or defaults
-        self.width = width or BASE_SCREEN_WIDTH
-        self.height = height or BASE_SCREEN_HEIGHT
-        self.rescale_images = rescale_images
-        logger.info("Width: %s, Height: %s", self.width, self.height)
-        logger.info(
-            "Base Screen Width: %s, Base Screen Height: %s",
-            BASE_SCREEN_WIDTH,
-            BASE_SCREEN_HEIGHT,
-        )
-        # Calculate scaling factors from base screen size to target size
-        self.scale_x = self.width / BASE_SCREEN_WIDTH
-        self.scale_y = self.height / BASE_SCREEN_HEIGHT
-        logger.info("Scale X: %s, Scale Y: %s", self.scale_x, self.scale_y)
-        self.scale = min(self.scale_x, self.scale_y)
-        logger.info("Scaling factor: %s", self.scale)
-        # Check if we need to scale
-        self.needs_scaling = self.scale != 1.0
-        if custom_executor is None:
-            self._choose_executor(platform_type, display_num)
-        else:
-            self.executor = custom_executor
-    def _choose_executor(
-        self,
-        platform_type: Literal["auto", "xdo", "pyautogui"],
-        display_num: int | None,
-    ) -> None:
-        """Choose executor based on platform_type."""
-        # Choose executor based on platform_type
-        if platform_type == "auto":
-            # Auto-detect based on platform
-            system = platform.system().lower()
-            if system == "linux":
-                # Try XDO first on Linux
-                if XDOExecutor.is_available():
-                    self.executor = XDOExecutor(display_num=display_num)
-                    logger.info("Using XDOExecutor")
-                elif PyAutoGUIExecutor.is_available():
-                    self.executor = PyAutoGUIExecutor(display_num=display_num)
-                    logger.info("Using PyAutoGUIExecutor")
-                else:
-                    self.executor = BaseExecutor(display_num=display_num)
-                    logger.info("No display available, using BaseExecutor (simulation mode)")
-            else:
-                # Windows/macOS - try PyAutoGUI
-                if PyAutoGUIExecutor.is_available():
-                    self.executor = PyAutoGUIExecutor(display_num=display_num)
-                    logger.info("Using PyAutoGUIExecutor")
-                else:
-                    self.executor = BaseExecutor(display_num=display_num)
-                    logger.info("PyAutoGUI not available, using BaseExecutor (simulation mode)")
-        elif platform_type == "xdo":
-            if XDOExecutor.is_available():
-                self.executor = XDOExecutor(display_num=display_num)
-                logger.info("Using XDOExecutor")
-            else:
-                self.executor = BaseExecutor(display_num=display_num)
-                logger.warning("XDO not available, using BaseExecutor (simulation mode)")
-        elif platform_type == "pyautogui":
-            if PyAutoGUIExecutor.is_available():
-                self.executor = PyAutoGUIExecutor(display_num=display_num)
-                logger.info("Using PyAutoGUIExecutor")
-            else:
-                self.executor = BaseExecutor(display_num=display_num)
-                logger.warning("PyAutoGUI not available, using BaseExecutor (simulation mode)")
-        else:
-            raise ValueError(f"Invalid platform_type: {platform_type}")
-    def _scale_coordinates(self, x: int | None, y: int | None) -> tuple[int | None, int | None]:
-        """Scale coordinates from target space to screen space."""
-        if x is not None:
-            x = int(x / self.scale_x)
-        if y is not None:
-            y = int(y / self.scale_y)
-        return x, y
-    def _scale_path(self, path: list[tuple[int, int]]) -> list[tuple[int, int]]:
-        """Scale a path from target space to screen space."""
-        scaled_path = []
-        for x, y in path:
-            scaled_x, scaled_y = self._scale_coordinates(x, y)
-            if scaled_x is not None and scaled_y is not None:
-                scaled_path.append((scaled_x, scaled_y))
-        return scaled_path
-    async def _rescale_screenshot(self, screenshot_base64: str) -> str:
-        """Rescale a screenshot if rescale_images is True."""
-        if not self.rescale_images or not self.needs_scaling:
-            return screenshot_base64
-        try:
-            import base64
-            from io import BytesIO
-            from PIL import Image
-            # Decode base64 to image
-            image_data = base64.b64decode(screenshot_base64)
-            image = Image.open(BytesIO(image_data))
-            # Resize to exact target dimensions
-            resized = image.resize((self.width, self.height), Image.Resampling.LANCZOS)
-            # Convert back to base64
-            buffer = BytesIO()
-            resized.save(buffer, format="PNG")
-            resized_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
-            return resized_base64
-        except Exception as e:
-            logger.warning("Failed to rescale screenshot: %s", e)
-            return screenshot_base64
-    async def __call__(
-        self,
-        action: str = Field(..., description="The action name (click, type, move, etc.)"),
-        # Click parameters
-        x: int | None = Field(None, description="X coordinate for click/move/scroll actions"),
-        y: int | None = Field(None, description="Y coordinate for click/move/scroll actions"),
-        button: Literal["left", "right", "middle", "back", "forward"] | None = Field(
-            None, description="Mouse button for click actions"
-        ),
-        pattern: list[int] | None = Field(
-            None, description="Click pattern for multi-clicks (e.g., [100] for double-click)"
-        ),
-        # Key/Type parameters
-        text: str | None = Field(None, description="Text for type/response actions"),
-        keys: list[str] | None = Field(None, description="Keys for press/keydown/keyup actions"),
-        enter_after: bool | None = Field(None, description="Whether to press Enter after typing"),
-        # Scroll parameters
-        scroll_x: int | None = Field(
-            None, description="Horizontal scroll amount (positive = right)"
-        ),
-        scroll_y: int | None = Field(None, description="Vertical scroll amount (positive = down)"),
-        # Move parameters
-        offset_x: int | None = Field(None, description="X offset for relative move"),
-        offset_y: int | None = Field(None, description="Y offset for relative move"),
-        # Drag parameters
-        path: list[tuple[int, int]] | None = Field(
-            None, description="Path for drag actions as list of (x, y) coordinates"
-        ),
-        # Wait parameter
-        time: int | None = Field(None, description="Time in milliseconds for wait action"),
-        # General parameters
-        hold_keys: list[str] | None = Field(None, description="Keys to hold during action"),
-        # hold_key specific
-        duration: float | None = Field(None, description="Duration in seconds for hold_key action"),
-    ) -> list[ImageContent | TextContent]:
-        """
-        Execute a computer control action by name.
-        Returns:
-            List of MCP content blocks
-        """
-        logger.info("HudComputerTool executing action: %s", action)
-        try:
-            # Delegate to executor based on action
-            if action == "click":
-                # Scale coordinates from client space to screen space
-                scaled_x, scaled_y = self._scale_coordinates(x, y)
-                result = await self.executor.click(
-                    x=scaled_x,
-                    y=scaled_y,
-                    button=button or "left",
-                    pattern=pattern,
-                    hold_keys=hold_keys,
-                )
-            elif action == "press":
-                if keys is None:
-                    raise ToolError("keys parameter is required for press")
-                result = await self.executor.press(keys=keys)
-            elif action == "keydown":
-                if keys is None:
-                    raise ToolError("keys parameter is required for keydown")
-                result = await self.executor.keydown(keys=keys)
-            elif action == "keyup":
-                if keys is None:
-                    raise ToolError("keys parameter is required for keyup")
-                result = await self.executor.keyup(keys=keys)
-            elif action == "type":
-                if text is None:
-                    raise ToolError("text parameter is required for type")
-                result = await self.executor.type(text=text, enter_after=enter_after or False)
-            elif action == "scroll":
-                # Scale coordinates from client space to screen space
-                scaled_x, scaled_y = self._scale_coordinates(x, y)
-                result = await self.executor.scroll(
-                    x=scaled_x,
-                    y=scaled_y,
-                    scroll_x=scroll_x,
-                    scroll_y=scroll_y,
-                    hold_keys=hold_keys,
-                )
-            elif action == "move":
-                # Scale coordinates from client space to screen space
-                scaled_x, scaled_y = self._scale_coordinates(x, y)
-                scaled_offset_x, scaled_offset_y = self._scale_coordinates(offset_x, offset_y)
-                result = await self.executor.move(
-                    x=scaled_x, y=scaled_y, offset_x=scaled_offset_x, offset_y=scaled_offset_y
-                )
-            elif action == "wait":
-                if time is None:
-                    raise ToolError("time parameter is required for wait")
-                result = await self.executor.wait(time=time)
-            elif action == "drag":
-                if path is None:
-                    raise ToolError("path parameter is required for drag")
-                # Scale path from client space to screen space
-                scaled_path = self._scale_path(path)
-                result = await self.executor.drag(
-                    path=scaled_path, pattern=pattern, hold_keys=hold_keys
-                )
-            elif action == "response":
-                if text is None:
-                    raise ToolError("text parameter is required for response")
-                return [TextContent(text=text, type="text")]
-            elif action == "screenshot":
-                screenshot = await self.executor.screenshot()
-                if screenshot:
-                    # Rescale screenshot if requested
-                    screenshot = await self._rescale_screenshot(screenshot)
-                    result = ToolResult(base64_image=screenshot)
-                else:
-                    result = ToolResult(error="Failed to take screenshot")
-            elif action == "position":
-                result = await self.executor.position()
-            elif action == "hold_key":
-                if text is None:
-                    raise ToolError("text parameter is required for hold_key")
-                if duration is None:
-                    raise ToolError("duration parameter is required for hold_key")
-                result = await self.executor.hold_key(key=text, duration=duration)
-            elif action == "mouse_down":
-                result = await self.executor.mouse_down(button=button or "left")
-            elif action == "mouse_up":
-                result = await self.executor.mouse_up(button=button or "left")
-            else:
-                raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
-            # Rescale screenshot in result if present
-            if isinstance(result, ToolResult) and result.base64_image and self.rescale_images:
-                rescaled_image = await self._rescale_screenshot(result.base64_image)
-                result = result.replace(base64_image=rescaled_image)
-            # Convert result to content blocks
-            return tool_result_to_content_blocks(result)
-        except TypeError as e:
-            raise McpError(
-                ErrorData(code=INVALID_PARAMS, message=f"Invalid parameters for {action}: {e!s}")
-            ) from e
+# flake8: noqa: B008
+from __future__ import annotations
+import logging
+import platform
+from typing import Literal
+from mcp import ErrorData, McpError
+from mcp.types import INVALID_PARAMS, ContentBlock, TextContent
+from pydantic import Field
+from hud.tools.base import BaseTool
+from hud.tools.executors.base import BaseExecutor
+from hud.tools.executors.pyautogui import PyAutoGUIExecutor
+from hud.tools.executors.xdo import XDOExecutor
+from hud.tools.types import ContentResult, ToolError
+from .settings import computer_settings
+logger = logging.getLogger(__name__)
+class HudComputerTool(BaseTool):
+    """
+    A tool that allows the agent to control the computer.
+    """
+    def __init__(
+        self,
+        # Define within environment based on platform
+        executor: BaseExecutor | None = None,
+        platform_type: Literal["auto", "xdo", "pyautogui"] = "auto",
+        display_num: int | None = None,
+        # Overrides for what dimensions the agent thinks it operates in
+        # Define per subclass (e.g., Anthropic, OpenAI)
+        width: int | None = computer_settings.HUD_COMPUTER_WIDTH,
+        height: int | None = computer_settings.HUD_COMPUTER_HEIGHT,
+        rescale_images: bool = computer_settings.HUD_RESCALE_IMAGES,
+        # What the agent sees as the tool's name, title, and description
+        name: str | None = None,
+        title: str | None = None,
+        description: str | None = None,
+    ) -> None:
+        """
+        Initialize the HUD computer tool.
+        Args:
+            executor: Executor to use for the tool
+            platform_type: Which executor to use if executor not provided:
+                - "auto": Automatically detect based on platform
+                - "xdo": Use XDOExecutor (Linux/X11 only)
+                - "pyautogui": Use PyAutoGUIExecutor (cross-platform)
+            display_num: X display number
+            width: Target width for rescaling (None = use environment width)
+            height: Target height for rescaling (None = use environment height)
+            rescale_images: If True, rescale screenshots. If False, only rescale action coordinates
+            name: Tool name for MCP registration (auto-generated from class name if not provided)
+            title: Human-readable display name for the tool (auto-generated from class name)
+            description: Tool description (auto-generated from docstring if not provided)
+        """
+        # Initialize base tool with executor as env
+        super().__init__(
+            env=executor,
+            name=name or "computer",
+            title=title or "Computer Control",
+            description=description or "Control computer with mouse, keyboard, and screenshots",
+        )
+        # This is the width and height the agent thinks it operates in
+        # By default, use subclass's width and height
+        # If specifically set to None, use environment width and height
+        self.width = width or computer_settings.DISPLAY_WIDTH
+        self.height = height or computer_settings.DISPLAY_HEIGHT
+        # This is the static width and height of the environment screen
+        # And the width and height of the screenshots taken by the tool
+        self.environment_width = computer_settings.DISPLAY_WIDTH
+        self.environment_height = computer_settings.DISPLAY_HEIGHT
+        # Some APIs rescale screenshots automatically to the agent's width and height, some don't
+        # Defined per subclass (e.g., Anthropic, OpenAI)
+        # In case you need your agent to receive pre-formatted screenshots, set env variable True
+        self.rescale_images = rescale_images
+        logger.debug(
+            "Agent Screen Width: %s, Agent Screen Height: %s",
+            self.width,
+            self.height,
+            "Environment Screen Width: %s, Environment Screen Height: %s",
+            self.environment_width,
+            self.environment_height,
+        )
+        # Calculate scaling factors from base screen size to target size
+        self.scale_x = self.width / self.environment_width
+        self.scale_y = self.height / self.environment_height
+        # Check if we need to scale
+        self.needs_scaling = min(self.scale_x, self.scale_y) != 1.0
+        # Use environment settings for display number
+        self.display_num = display_num or computer_settings.DISPLAY_NUM
+        logger.debug("Display number: %s", self.display_num)
+        # If no executor provided, create one based on platform
+        if self.env is None:
+            self._choose_executor(platform_type, self.display_num)
+    @property
+    def executor(self) -> BaseExecutor:
+        """Get the executor (alias for context)."""
+        return self.env
+    @executor.setter
+    def executor(self, value: BaseExecutor) -> None:
+        """Set the executor (alias for context)."""
+        self.env = value
+    def _choose_executor(
+        self,
+        platform_type: Literal["auto", "xdo", "pyautogui"],
+        display_num: int | None,
+    ) -> None:
+        """Choose executor based on platform_type."""
+        # Choose executor based on platform_type
+        if platform_type == "auto":
+            # Auto-detect based on platform
+            system = platform.system().lower()
+            if system == "linux":
+                # Try XDO first on Linux
+                if XDOExecutor.is_available():
+                    self.executor = XDOExecutor(display_num=display_num)
+                    logger.info("Using XDOExecutor")
+                elif PyAutoGUIExecutor.is_available():
+                    self.executor = PyAutoGUIExecutor(display_num=display_num)
+                    logger.info("Using PyAutoGUIExecutor")
+                else:
+                    self.executor = BaseExecutor(display_num=display_num)
+                    logger.info("No display available, using BaseExecutor (simulation mode)")
+            else:
+                # Windows/macOS - try PyAutoGUI
+                if PyAutoGUIExecutor.is_available():
+                    self.executor = PyAutoGUIExecutor(display_num=display_num)
+                    logger.info("Using PyAutoGUIExecutor")
+                else:
+                    self.executor = BaseExecutor(display_num=display_num)
+                    logger.info("PyAutoGUI not available, using BaseExecutor (simulation mode)")
+        elif platform_type == "xdo":
+            if XDOExecutor.is_available():
+                self.executor = XDOExecutor(display_num=display_num)
+                logger.info("Using XDOExecutor")
+            else:
+                self.executor = BaseExecutor(display_num=display_num)
+                logger.warning("XDO not available, using BaseExecutor (simulation mode)")
+        elif platform_type == "pyautogui":
+            if PyAutoGUIExecutor.is_available():
+                self.executor = PyAutoGUIExecutor(display_num=display_num)
+                logger.info("Using PyAutoGUIExecutor")
+            else:
+                self.executor = BaseExecutor(display_num=display_num)
+                logger.warning("PyAutoGUI not available, using BaseExecutor (simulation mode)")
+        else:
+            raise ValueError(f"Invalid platform_type: {platform_type}")
+    def _scale_coordinates(self, x: int | None, y: int | None) -> tuple[int | None, int | None]:
+        """Scale coordinates from target space to screen space."""
+        if x is not None and self.scale_x != 1.0:
+            x = int(x / self.scale_x)
+        if y is not None and self.scale_y != 1.0:
+            y = int(y / self.scale_y)
+        return x, y
+    def _scale_path(self, path: list[tuple[int, int]]) -> list[tuple[int, int]]:
+        """Scale a path from target space to screen space."""
+        scaled_path = []
+        for x, y in path:
+            scaled_x, scaled_y = self._scale_coordinates(x, y)
+            if scaled_x is not None and scaled_y is not None:
+                scaled_path.append((scaled_x, scaled_y))
+        return scaled_path
+    async def _rescale_screenshot(self, screenshot_base64: str) -> str:
+        """Rescale a screenshot if rescale_images is True."""
+        if not self.rescale_images or not self.needs_scaling:
+            return screenshot_base64
+        try:
+            import base64
+            from io import BytesIO
+            from PIL import Image  # type: ignore[import-not-found]
+            # Decode base64 to image
+            image_data = base64.b64decode(screenshot_base64)
+            image = Image.open(BytesIO(image_data))
+            logger.info(
+                "Resizing screenshot from %s x %s to %s x %s",
+                image.width,
+                image.height,
+                self.width,
+                self.height,
+            )
+            # Resize to exact target dimensions
+            resized = image.resize((self.width, self.height), Image.Resampling.LANCZOS)
+            # Convert back to base64
+            buffer = BytesIO()
+            resized.save(buffer, format="PNG")
+            resized_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
+            return resized_base64
+        except Exception as e:
+            logger.warning("Failed to rescale screenshot: %s", e)
+            return screenshot_base64
+    async def __call__(
+        self,
+        action: str = Field(..., description="The action name (click, type, move, etc.)"),
+        # Click parameters
+        x: int | None = Field(None, description="X coordinate for click/move/scroll actions"),
+        y: int | None = Field(None, description="Y coordinate for click/move/scroll actions"),
+        button: Literal["left", "right", "middle", "back", "forward"] | None = Field(
+            None, description="Mouse button for click actions"
+        ),
+        pattern: list[int] | None = Field(
+            None, description="Click pattern for multi-clicks (e.g., [100] for double-click)"
+        ),
+        # Key/Type parameters
+        text: str | None = Field(None, description="Text for type/response actions"),
+        keys: list[str] | None = Field(None, description="Keys for press/keydown/keyup actions"),
+        enter_after: bool | None = Field(None, description="Whether to press Enter after typing"),
+        # Scroll parameters
+        scroll_x: int | None = Field(
+            None, description="Horizontal scroll amount (positive = right)"
+        ),
+        scroll_y: int | None = Field(None, description="Vertical scroll amount (positive = down)"),
+        # Move parameters
+        offset_x: int | None = Field(None, description="X offset for relative move"),
+        offset_y: int | None = Field(None, description="Y offset for relative move"),
+        # Drag parameters
+        path: list[tuple[int, int]] | None = Field(
+            None, description="Path for drag actions as list of (x, y) coordinates"
+        ),
+        # Wait parameter
+        time: int | None = Field(None, description="Time in milliseconds for wait action"),
+        # General parameters
+        hold_keys: list[str] | None = Field(None, description="Keys to hold during action"),
+        # hold_key specific
+        duration: float | None = Field(None, description="Duration in seconds for hold_key action"),
+    ) -> list[ContentBlock]:
+        """
+        Execute a computer control action by name.
+        Returns:
+            List of MCP content blocks
+        """
+        logger.info("HudComputerTool executing action: %s", action)
+        try:
+            # Delegate to executor based on action
+            if action == "click":
+                # Scale coordinates from client space to screen space
+                scaled_x, scaled_y = self._scale_coordinates(x, y)
+                result = await self.executor.click(
+                    x=scaled_x,
+                    y=scaled_y,
+                    button=button or "left",
+                    pattern=pattern,
+                    hold_keys=hold_keys,
+                )
+            elif action == "press":
+                if keys is None:
+                    raise ToolError("keys parameter is required for press")
+                result = await self.executor.press(keys=keys)
+            elif action == "keydown":
+                if keys is None:
+                    raise ToolError("keys parameter is required for keydown")
+                result = await self.executor.keydown(keys=keys)
+            elif action == "keyup":
+                if keys is None:
+                    raise ToolError("keys parameter is required for keyup")
+                result = await self.executor.keyup(keys=keys)
+            elif action == "type":
+                if text is None:
+                    raise ToolError("text parameter is required for type")
+                result = await self.executor.write(text=text, enter_after=enter_after or False)
+            elif action == "scroll":
+                # Scale coordinates from client space to screen space
+                scaled_x, scaled_y = self._scale_coordinates(x, y)
+                result = await self.executor.scroll(
+                    x=scaled_x,
+                    y=scaled_y,
+                    scroll_x=scroll_x,
+                    scroll_y=scroll_y,
+                    hold_keys=hold_keys,
+                )
+            elif action == "move":
+                # Scale coordinates from client space to screen space
+                scaled_x, scaled_y = self._scale_coordinates(x, y)
+                scaled_offset_x, scaled_offset_y = self._scale_coordinates(offset_x, offset_y)
+                result = await self.executor.move(
+                    x=scaled_x, y=scaled_y, offset_x=scaled_offset_x, offset_y=scaled_offset_y
+                )
+            elif action == "wait":
+                if time is None:
+                    raise ToolError("time parameter is required for wait")
+                result = await self.executor.wait(time=time)
+            elif action == "drag":
+                if path is None:
+                    raise ToolError("path parameter is required for drag")
+                # Scale path from client space to screen space
+                scaled_path = self._scale_path(path)
+                result = await self.executor.drag(
+                    path=scaled_path, pattern=pattern, hold_keys=hold_keys
+                )
+            elif action == "response":
+                if text is None:
+                    raise ToolError("text parameter is required for response")
+                return [TextContent(text=text, type="text")]
+            elif action == "screenshot":
+                screenshot = await self.executor.screenshot()
+                if screenshot:
+                    # Rescale screenshot if requested
+                    screenshot = await self._rescale_screenshot(screenshot)
+                    result = ContentResult(base64_image=screenshot)
+                else:
+                    result = ContentResult(error="Failed to take screenshot")
+            elif action == "position":
+                result = await self.executor.position()
+            elif action == "hold_key":
+                if text is None:
+                    raise ToolError("text parameter is required for hold_key")
+                if duration is None:
+                    raise ToolError("duration parameter is required for hold_key")
+                result = await self.executor.hold_key(key=text, duration=duration)
+            elif action == "mouse_down":
+                result = await self.executor.mouse_down(button=button or "left")
+            elif action == "mouse_up":
+                result = await self.executor.mouse_up(button=button or "left")
+            else:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
+            # Rescale screenshot in result if present
+            if isinstance(result, ContentResult) and result.base64_image and self.rescale_images:
+                rescaled_image = await self._rescale_screenshot(result.base64_image)
+                result.base64_image = rescaled_image
+            # Convert result to content blocks
+            return result.to_content_blocks()
+        except TypeError as e:
+            raise McpError(
+                ErrorData(code=INVALID_PARAMS, message=f"Invalid parameters for {action}: {e!s}")
+            ) from e

hud-python 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

hud-python 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl