PyPI - hud-python - Versions diffs - 0.4.46__py3-none-any.whl → 0.4.48__py3-none-any.whl - Mend

hud-python 0.4.46py3-none-any.whl → 0.4.48py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (33) hide show

hud/agents/base.py +49 -142
hud/agents/claude.py +5 -6
hud/agents/misc/integration_test_agent.py +2 -0
hud/agents/tests/test_base.py +2 -5
hud/cli/__init__.py +2 -2
hud/cli/eval.py +14 -9
hud/cli/flows/tasks.py +2 -4
hud/cli/rl/local_runner.py +25 -13
hud/cli/rl/vllm.py +2 -0
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_eval.py +525 -0
hud/cli/tests/test_utils.py +1 -1
hud/datasets/parallel.py +0 -12
hud/datasets/runner.py +1 -4
hud/rl/actor.py +4 -2
hud/rl/distributed.py +1 -1
hud/rl/learner.py +2 -1
hud/rl/train.py +1 -1
hud/telemetry/trace.py +1 -1
hud/tools/base.py +11 -9
hud/tools/computer/__init__.py +2 -0
hud/tools/computer/qwen.py +431 -0
hud/tools/computer/settings.py +16 -0
hud/tools/executors/pyautogui.py +1 -1
hud/tools/playwright.py +1 -1
hud/types.py +2 -3
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.46.dist-info → hud_python-0.4.48.dist-info}/METADATA +1 -1
{hud_python-0.4.46.dist-info → hud_python-0.4.48.dist-info}/RECORD +33 -31
{hud_python-0.4.46.dist-info → hud_python-0.4.48.dist-info}/WHEEL +0 -0
{hud_python-0.4.46.dist-info → hud_python-0.4.48.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.46.dist-info → hud_python-0.4.48.dist-info}/licenses/LICENSE +0 -0

hud/tools/base.py CHANGED Viewed

@@ -1,14 +1,15 @@
 from __future__ import annotations
+import logging
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, cast, Awaitable
+from typing import TYPE_CHECKING, Any, cast
 from fastmcp import FastMCP
 from hud.tools.types import ContentBlock, EvaluationResult
 if TYPE_CHECKING:
-    from collections.abc import Callable
+    from collections.abc import Awaitable, Callable
     from fastmcp.tools import FunctionTool
     from fastmcp.tools.tool import Tool, ToolResult
@@ -16,9 +17,9 @@ if TYPE_CHECKING:
 # Basic result types for tools
 BaseResult = list[ContentBlock] | EvaluationResult
-import logging
 logger = logging.getLogger(__name__)
 class BaseTool(ABC):
     """
     Base helper class for all MCP tools to constrain their output.
@@ -106,9 +107,9 @@ class BaseTool(ABC):
             )
         return self._mcp_tool
-    def add_callback(self, event_type: str, callback: Callable[..., Awaitable[Any]]):
+    def add_callback(self, event_type: str, callback: Callable[..., Awaitable[Any]]) -> None:
         """Register a callback function for specific event
         Args:
             event_type: (Required) Specific event name to trigger callback
                         e.g. "after_click", "before_navigate"
@@ -118,7 +119,7 @@ class BaseTool(ABC):
             self._callbacks[event_type] = []
         self._callbacks[event_type].append(callback)
-    def remove_callback(self, event_type: str, callback: Callable[..., Awaitable[Any]]):
+    def remove_callback(self, event_type: str, callback: Callable[..., Awaitable[Any]]) -> None:
         """Remove a registered callback
         Args:
             event_type: (Required) Specific event name to trigger callback
@@ -127,15 +128,16 @@ class BaseTool(ABC):
         """
         if (event_type in self._callbacks) and (callback in self._callbacks[event_type]):
             self._callbacks[event_type].remove(callback)
-    async def _trigger_callbacks(self, event_type: str, **kwargs):
+    async def _trigger_callbacks(self, event_type: str, **kwargs: Any) -> None:
         """Trigger all registered callback functions of an event type"""
         callback_list = self._callbacks.get(event_type, [])
         for callback in callback_list:
             try:
                 await callback(**kwargs)
             except Exception as e:
-                logger.warning(f"Callback failed for {event_type}: {e}")
+                logger.warning("Callback failed for %s: %s", event_type, e)
 # Prefix for internal tool names
 _INTERNAL_PREFIX = "int_"

hud/tools/computer/__init__.py CHANGED Viewed

@@ -5,11 +5,13 @@ from __future__ import annotations
 from .anthropic import AnthropicComputerTool
 from .hud import HudComputerTool
 from .openai import OpenAIComputerTool
+from .qwen import QwenComputerTool
 from .settings import computer_settings
 __all__ = [
     "AnthropicComputerTool",
     "HudComputerTool",
     "OpenAIComputerTool",
+    "QwenComputerTool",
     "computer_settings",
 ]

hud/tools/computer/qwen.py ADDED Viewed

@@ -0,0 +1,431 @@
+# flake8: noqa: B008
+from __future__ import annotations
+import logging
+import re
+from typing import TYPE_CHECKING, Any, Literal
+from mcp import ErrorData, McpError
+from mcp.types import INTERNAL_ERROR, INVALID_PARAMS, ContentBlock
+from pydantic import Field
+from hud.tools.types import ContentResult
+from .hud import HudComputerTool
+from .settings import computer_settings
+if TYPE_CHECKING:
+    from hud.tools.executors.base import BaseExecutor
+logger = logging.getLogger(__name__)
+class QwenComputerTool(HudComputerTool):
+    """
+    Qwen Computer Use tool for interacting with the computer.
+    """
+    name: str = "computer_use"
+    api_type: str = "computer_use"
+    def __init__(
+        self,
+        # Define within environment based on platform
+        executor: BaseExecutor | None = None,
+        platform_type: Literal["auto", "xdo", "pyautogui"] = "auto",
+        display_num: int | None = None,
+        # Overrides for what dimensions the agent thinks it operates in
+        width: int = computer_settings.QWEN_COMPUTER_WIDTH,
+        height: int = computer_settings.QWEN_COMPUTER_HEIGHT,
+        rescale_images: bool = computer_settings.QWEN_RESCALE_IMAGES,
+        # What the agent sees as the tool's name, title, and description
+        name: str | None = None,
+        title: str | None = None,
+        description: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Initialize with Qwen's default dimensions.
+        Args:
+            width: Target width for rescaling (None = use environment width)
+            height: Target height for rescaling (None = use environment height)
+            rescale_images: If True, rescale screenshots. If False, only rescale action coordinates
+            name: Tool name for MCP registration (auto-generated from class name if not provided)
+            title: Human-readable display name for the tool (auto-generated from class name)
+            description: Tool description (auto-generated from docstring if not provided)
+        """
+        # Store dimensions for description
+        self.display_width_px = width
+        self.display_height_px = height
+        # Build custom description with resolution info
+        custom_description = (
+            description
+            or f"""
+Use a mouse and keyboard to interact with a computer, and take screenshots.
+* This is an interface to a desktop GUI. You do not have access to a terminal or
+applications menu. You must click on desktop icons to start applications.
+* Some applications may take time to start or process actions, so you may need to
+wait and take successive screenshots to see the results of your actions. E.g. if you
+click on Firefox and a window doesn't open, try wait and taking another screenshot.
+* The screen's resolution is {width}x{height}.
+* Whenever you intend to move the cursor to click on an element like an icon, you
+should consult a screenshot to determine the coordinates of the element before
+moving the cursor.
+* If you tried clicking on a program or link but it failed to load, even after
+waiting, try adjusting your cursor position so that the tip of the cursor visually
+falls on the element that you want to click.
+* Make sure to click any buttons, links, icons, etc with the cursor tip in the
+center of the element. Don't click boxes on their edges.
+""".strip()
+        )
+        super().__init__(
+            executor=executor,
+            platform_type=platform_type,
+            display_num=display_num,
+            width=width,
+            height=height,
+            rescale_images=rescale_images,
+            name=name or "qwen_computer",
+            title=title or "Qwen Computer Tool",
+            description=custom_description,
+            **kwargs,
+        )
+    def to_params(self) -> dict:
+        """Convert to Qwen tool parameters."""
+        return {
+            "type": self.api_type,
+            "name": self.name,
+            "display_width_px": self.display_width_px,
+            "display_height_px": self.display_height_px,
+            "description": self.description,
+            "parameters": {
+                "properties": {
+                    "action": {
+                        "description": """
+The action to perform. The available actions are:
+* `key`: Performs key down presses on the arguments passed in order, then performs
+key releases in reverse order.
+* `type`: Type a string of text on the keyboard.
+* `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the
+screen.
+* `left_click`: Click the left mouse button at a specified (x, y) pixel coordinate
+on the screen.
+* `left_click_drag`: Click and drag the cursor to a specified (x, y) pixel
+coordinate on the screen.
+* `right_click`: Click the right mouse button at a specified (x, y) pixel
+coordinate on the screen.
+* `middle_click`: Click the middle mouse button at a specified (x, y) pixel
+coordinate on the screen.
+* `double_click`: Double-click the left mouse button at a specified (x, y) pixel
+coordinate on the screen.
+* `triple_click`: Triple-click the left mouse button at a specified (x, y) pixel
+coordinate on the screen.
+* `scroll`: Performs a scroll of the mouse scroll wheel.
+* `hscroll`: Performs a horizontal scroll.
+* `wait`: Wait specified seconds for the change to happen.
+* `terminate`: Terminate the current task and report its completion status
+(NOT SUPPORTED).
+* `answer`: Answer a question (NOT SUPPORTED).
+""".strip(),
+                        "enum": [
+                            "key",
+                            "type",
+                            "mouse_move",
+                            "left_click",
+                            "left_click_drag",
+                            "right_click",
+                            "middle_click",
+                            "double_click",
+                            "triple_click",
+                            "scroll",
+                            "hscroll",
+                            "wait",
+                            "terminate",
+                            "answer",
+                        ],
+                        "type": "string",
+                    },
+                    "keys": {
+                        "description": "Required only by `action=key`.",
+                        "type": "array",
+                    },
+                    "text": {
+                        "description": "Required only by `action=type` and `action=answer`.",
+                        "type": "string",
+                    },
+                    "coordinate": {
+                        "description": (
+                            "(x, y): The x (pixels from the left edge) and y "
+                            "(pixels from the top edge) coordinates to move the mouse to."
+                        ),
+                        "type": "array",
+                    },
+                    "pixels": {
+                        "description": (
+                            "The amount of scrolling to perform. Positive values scroll up, "
+                            "negative values scroll down. Required only by `action=scroll` "
+                            "and `action=hscroll`."
+                        ),
+                        "type": "number",
+                    },
+                    "time": {
+                        "description": "The seconds to wait. Required only by `action=wait`.",
+                        "type": "number",
+                    },
+                    "status": {
+                        "description": (
+                            "The status of the task. Required only by `action=terminate`."
+                        ),
+                        "type": "string",
+                        "enum": ["success", "failure"],
+                    },
+                },
+                "required": ["action"],
+                "type": "object",
+            },
+        }
+    async def __call__(
+        self,
+        action: str = Field(..., description="The action to perform on the computer"),
+        keys: list[str] | None = Field(None, description="Keys for key action"),
+        text: str | None = Field(None, description="Text to type"),
+        coordinate: list[int] | tuple[int, int] | None = Field(
+            None, description="The coordinate to interact with on the computer [x, y]"
+        ),
+        pixels: int | None = Field(None, description="Pixels to scroll"),
+        time: float | None = Field(None, description="Time to wait in seconds"),
+        status: str | None = Field(None, description="Status for terminate action"),
+    ) -> list[ContentBlock]:
+        """
+        Handle Qwen Computer Use API calls.
+        This converts Qwen's action format to HudComputerTool's format.
+        Returns:
+            List of MCP content blocks
+        """
+        logger.info("QwenComputerTool received action: %s", action)
+        # Handle non-computer actions that should raise errors
+        if action == "terminate":
+            raise McpError(
+                ErrorData(
+                    code=INVALID_PARAMS,
+                    message=(
+                        "terminate action is not supported for computer control. This is a no-op."
+                    ),
+                )
+            )
+        if action == "answer":
+            raise McpError(
+                ErrorData(
+                    code=INVALID_PARAMS,
+                    message="answer action is not supported for computer control. This is a no-op.",
+                )
+            )
+        # Convert lists to tuples if needed
+        coord_tuple = None
+        if coordinate:
+            coord_tuple = tuple(coordinate) if isinstance(coordinate, list) else coordinate
+        # Map Qwen actions to HudComputerTool actions
+        if action == "left_click":
+            if coord_tuple and len(coord_tuple) >= 2:
+                scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
+                logger.info("Scaled coordinates: %s, %s", scaled_x, scaled_y)
+                result = await self.executor.click(x=scaled_x, y=scaled_y)
+            else:
+                raise McpError(
+                    ErrorData(code=INVALID_PARAMS, message="coordinate is required for left_click")
+                )
+        elif action == "double_click":
+            if coord_tuple and len(coord_tuple) >= 2:
+                # Use pattern for double-click
+                scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
+                result = await self.executor.click(x=scaled_x, y=scaled_y, pattern=[100])
+            else:
+                raise McpError(
+                    ErrorData(
+                        code=INVALID_PARAMS, message="coordinate is required for double_click"
+                    )
+                )
+        elif action == "triple_click":
+            if coord_tuple and len(coord_tuple) >= 2:
+                # Use pattern for triple-click (simulated as double-click)
+                scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
+                # Note: triple-click simulated as double-click as per requirement
+                result = await self.executor.click(x=scaled_x, y=scaled_y, pattern=[100])
+            else:
+                raise McpError(
+                    ErrorData(
+                        code=INVALID_PARAMS, message="coordinate is required for triple_click"
+                    )
+                )
+        elif action == "right_click":
+            if coord_tuple and len(coord_tuple) >= 2:
+                scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
+                result = await self.executor.click(x=scaled_x, y=scaled_y, button="right")
+            else:
+                raise McpError(
+                    ErrorData(code=INVALID_PARAMS, message="coordinate is required for right_click")
+                )
+        elif action == "middle_click":
+            if coord_tuple and len(coord_tuple) >= 2:
+                scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
+                result = await self.executor.click(x=scaled_x, y=scaled_y, button="middle")
+            else:
+                raise McpError(
+                    ErrorData(
+                        code=INVALID_PARAMS, message="coordinate is required for middle_click"
+                    )
+                )
+        elif action == "mouse_move":
+            if coord_tuple and len(coord_tuple) >= 2:
+                scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
+                result = await self.executor.move(x=scaled_x, y=scaled_y)
+            else:
+                raise McpError(
+                    ErrorData(code=INVALID_PARAMS, message="coordinate is required for mouse_move")
+                )
+        elif action == "type":
+            if text:
+                result = await self.executor.write(text=text)
+            else:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="text is required for type"))
+        elif action == "key":
+            if keys:
+                # Qwen sends an array of keys to press
+                result = await self.executor.press(keys=keys)
+            else:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="keys is required for key"))
+        elif action == "scroll":
+            if pixels is None:
+                raise McpError(
+                    ErrorData(code=INVALID_PARAMS, message="pixels is required for scroll")
+                )
+            # Qwen's pixels: positive scrolls up, negative scrolls down
+            # HUD's scroll_y: positive scrolls down, negative scrolls up
+            # So we need to negate the value
+            scroll_y = -pixels
+            if coord_tuple and len(coord_tuple) >= 2:
+                scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
+                result = await self.executor.scroll(x=scaled_x, y=scaled_y, scroll_y=scroll_y)
+            else:
+                result = await self.executor.scroll(scroll_y=scroll_y)
+        elif action == "hscroll":
+            if pixels is None:
+                raise McpError(
+                    ErrorData(code=INVALID_PARAMS, message="pixels is required for hscroll")
+                )
+            # For horizontal scroll, positive values scroll right, negative scroll left
+            scroll_x = pixels
+            if coord_tuple and len(coord_tuple) >= 2:
+                scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
+                result = await self.executor.scroll(x=scaled_x, y=scaled_y, scroll_x=scroll_x)
+            else:
+                result = await self.executor.scroll(scroll_x=scroll_x)
+        elif action == "left_click_drag":
+            if coord_tuple and len(coord_tuple) >= 2:
+                # For drag, we need a path. Qwen provides the end coordinate.
+                # We'll get the current position and drag from there to the target
+                current_pos = await self.executor.position()
+                if isinstance(current_pos, ContentResult) and current_pos.output:
+                    # Parse the position from the output
+                    match = re.search(r"x=(\d+), y=(\d+)", current_pos.output)
+                    if match:
+                        # Current position is in screen coordinates
+                        screen_start_x, screen_start_y = int(match.group(1)), int(match.group(2))
+                        # End position is in agent coordinates, needs scaling
+                        scaled_end_x, scaled_end_y = self._scale_coordinates(
+                            coord_tuple[0], coord_tuple[1]
+                        )
+                        # Create path in screen coordinates
+                        path = [(screen_start_x, screen_start_y), (scaled_end_x, scaled_end_y)]
+                        # Path is already in screen coordinates, no need to scale again
+                        result = await self.executor.drag(path=path)
+                    else:
+                        raise McpError(
+                            ErrorData(
+                                code=INTERNAL_ERROR, message="Failed to parse current position"
+                            )
+                        )
+                else:
+                    raise McpError(
+                        ErrorData(code=INTERNAL_ERROR, message="Failed to get current position")
+                    )
+            else:
+                raise McpError(
+                    ErrorData(
+                        code=INVALID_PARAMS, message="coordinate is required for left_click_drag"
+                    )
+                )
+        elif action == "wait":
+            if time is None:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="time is required for wait"))
+            if time < 0:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message="time must be non-negative"))
+            # Convert seconds to milliseconds for HudComputerTool
+            result = await self.executor.wait(time=int(time * 1000))
+        else:
+            # Unknown action
+            raise McpError(ErrorData(code=INTERNAL_ERROR, message=f"Invalid action: {action}"))
+        # Rescale screenshot in result if present
+        if isinstance(result, ContentResult) and result.base64_image and self.rescale_images:
+            rescaled_image = await self._rescale_screenshot(result.base64_image)
+            result.base64_image = rescaled_image
+        # Auto-add screenshot for interactive actions
+        interactive_actions = {
+            "left_click",
+            "double_click",
+            "triple_click",
+            "right_click",
+            "middle_click",
+            "mouse_move",
+            "type",
+            "key",
+            "scroll",
+            "hscroll",
+            "left_click_drag",
+        }
+        if (
+            action in interactive_actions
+            and isinstance(result, ContentResult)
+            and not result.base64_image
+        ):
+            screenshot_base64 = await self.executor.screenshot()
+            if screenshot_base64:
+                # Rescale screenshot if requested
+                screenshot_base64 = await self._rescale_screenshot(screenshot_base64)
+                result = ContentResult(
+                    output=result.output, error=result.error, base64_image=screenshot_base64
+                )
+        # Convert to content blocks
+        return result.to_content_blocks()

hud/tools/computer/settings.py CHANGED Viewed

@@ -62,6 +62,17 @@ class ComputerSettings(BaseSettings):
         validation_alias="OPENAI_COMPUTER_HEIGHT",
     )
+    QWEN_COMPUTER_WIDTH: int = Field(
+        default=1920,
+        description="Width of the display to use for the Qwen computer tools",
+        validation_alias="QWEN_COMPUTER_WIDTH",
+    )
+    QWEN_COMPUTER_HEIGHT: int = Field(
+        default=1080,
+        description="Height of the display to use for the Qwen computer tools",
+        validation_alias="QWEN_COMPUTER_HEIGHT",
+    )
     HUD_RESCALE_IMAGES: bool = Field(
         default=False,
         description="Whether to rescale images to the agent width and height",
@@ -77,6 +88,11 @@ class ComputerSettings(BaseSettings):
         description="Whether to rescale images to the agent width and height",
         validation_alias="OPENAI_RESCALE_IMAGES",
     )
+    QWEN_RESCALE_IMAGES: bool = Field(
+        default=True,
+        description="Whether to rescale images to the agent width and height",
+        validation_alias="QWEN_RESCALE_IMAGES",
+    )
 computer_settings = ComputerSettings()

hud/tools/executors/pyautogui.py CHANGED Viewed

@@ -31,7 +31,7 @@ def _get_pyautogui() -> Any | None:
             try:
                 from hud.tools.computer import computer_settings
-                os.environ["DISPLAY"] = str(computer_settings.DISPLAY_NUM)
+                os.environ["DISPLAY"] = f":{computer_settings.DISPLAY_NUM}"
             except (ImportError, AttributeError):
                 os.environ["DISPLAY"] = ":0"

hud/tools/playwright.py CHANGED Viewed

@@ -280,7 +280,7 @@ class PlaywrightTool(BaseTool):
         try:
             # Always return base64 encoded screenshot as ToolResult
-            screenshot_bytes = await self.page.screenshot(full_page=True)
+            screenshot_bytes = await self.page.screenshot(full_page=False)
             import base64
             screenshot_b64 = base64.b64encode(screenshot_bytes).decode()

hud/types.py CHANGED Viewed

@@ -43,11 +43,10 @@ class Task(BaseModel):
     setup_tool: MCPToolCall | list[MCPToolCall] | None = None
     evaluate_tool: MCPToolCall | list[MCPToolCall] | None = None
     integration_test_tool: MCPToolCall | list[MCPToolCall] | None = None
-    agent_tools: list[str] | None = None
-    system_prompt: str | None = None
+    agent_config: dict[str, Any] | None = None
     metadata: dict[str, Any] = Field(default_factory=dict)
-    @field_validator("mcp_config", "metadata", mode="before")
+    @field_validator("mcp_config", "metadata", "agent_config", mode="before")
     @classmethod
     def parse_json_strings(cls, v: Any) -> Any:
         """Parse JSON strings into dictionaries."""

hud/utils/tests/test_version.py CHANGED Viewed

@@ -5,4 +5,4 @@ def test_import():
     """Test that the package can be imported."""
     import hud
-    assert hud.__version__ == "0.4.46"
+    assert hud.__version__ == "0.4.48"

hud/version.py CHANGED Viewed

@@ -4,4 +4,4 @@ Version information for the HUD SDK.
 from __future__ import annotations
-__version__ = "0.4.46"
+__version__ = "0.4.48"

{hud_python-0.4.46.dist-info → hud_python-0.4.48.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.46
+Version: 0.4.48
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues

hud-python 0.4.46__py3-none-any.whl → 0.4.48__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.46py3-none-any.whl → 0.4.48py3-none-any.whl