PyPI - hud-python - Versions diffs - 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

hud-python 0.2.10py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (86) hide show

hud/__init__.py +20 -8
hud/adapters/common/adapter.py +14 -3
hud/adapters/common/tests/test_adapter.py +16 -4
hud/datasets.py +188 -0
hud/env/docker_client.py +15 -3
hud/env/environment.py +10 -7
hud/env/local_docker_client.py +29 -7
hud/env/remote_client.py +1 -1
hud/env/remote_docker_client.py +2 -2
hud/exceptions.py +2 -1
hud/gym.py +0 -9
hud/mcp/__init__.py +17 -0
hud/mcp/base.py +631 -0
hud/mcp/claude.py +321 -0
hud/mcp/client.py +312 -0
hud/mcp/langchain.py +250 -0
hud/mcp/openai.py +334 -0
hud/mcp/tests/__init__.py +1 -0
hud/mcp/tests/test_base.py +512 -0
hud/mcp/tests/test_claude.py +294 -0
hud/mcp/tests/test_client.py +324 -0
hud/mcp/tests/test_openai.py +238 -0
hud/settings.py +20 -2
hud/task.py +5 -88
hud/taskset.py +2 -23
hud/telemetry/__init__.py +16 -7
hud/telemetry/_trace.py +246 -72
hud/telemetry/context.py +88 -27
hud/telemetry/exporter.py +171 -11
hud/telemetry/instrumentation/mcp.py +174 -410
hud/telemetry/job.py +141 -0
hud/telemetry/mcp_models.py +13 -74
hud/telemetry/tests/test_context.py +9 -6
hud/telemetry/tests/test_trace.py +120 -78
hud/tools/__init__.py +34 -0
hud/tools/base.py +65 -0
hud/tools/bash.py +137 -0
hud/tools/computer/__init__.py +13 -0
hud/tools/computer/anthropic.py +411 -0
hud/tools/computer/hud.py +315 -0
hud/tools/computer/openai.py +283 -0
hud/tools/edit.py +290 -0
hud/tools/executors/__init__.py +30 -0
hud/tools/executors/base.py +331 -0
hud/tools/executors/pyautogui.py +619 -0
hud/tools/executors/tests/__init__.py +1 -0
hud/tools/executors/tests/test_base_executor.py +338 -0
hud/tools/executors/tests/test_pyautogui_executor.py +165 -0
hud/tools/executors/xdo.py +503 -0
hud/tools/helper/README.md +56 -0
hud/tools/helper/__init__.py +9 -0
hud/tools/helper/mcp_server.py +78 -0
hud/tools/helper/server_initialization.py +115 -0
hud/tools/helper/utils.py +58 -0
hud/tools/playwright_tool.py +379 -0
hud/tools/tests/__init__.py +3 -0
hud/tools/tests/test_bash.py +152 -0
hud/tools/tests/test_computer.py +52 -0
hud/tools/tests/test_computer_actions.py +34 -0
hud/tools/tests/test_edit.py +240 -0
hud/tools/tests/test_init.py +27 -0
hud/tools/tests/test_playwright_tool.py +183 -0
hud/tools/tests/test_tools.py +157 -0
hud/tools/tests/test_utils.py +156 -0
hud/tools/utils.py +50 -0
hud/trajectory.py +5 -1
hud/types.py +10 -1
hud/utils/tests/test_init.py +21 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/METADATA +27 -18
hud_python-0.3.1.dist-info/RECORD +119 -0
hud/evaluators/__init__.py +0 -9
hud/evaluators/base.py +0 -32
hud/evaluators/inspect.py +0 -24
hud/evaluators/judge.py +0 -189
hud/evaluators/match.py +0 -156
hud/evaluators/remote.py +0 -65
hud/evaluators/tests/__init__.py +0 -0
hud/evaluators/tests/test_inspect.py +0 -12
hud/evaluators/tests/test_judge.py +0 -231
hud/evaluators/tests/test_match.py +0 -115
hud/evaluators/tests/test_remote.py +0 -98
hud_python-0.2.10.dist-info/RECORD +0 -85
{hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
{hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0

hud/tools/helper/server_initialization.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""Helper for MCP server initialization with progress notifications.
+Example:
+    ```python
+    from hud.tools.helper import mcp_intialize_wrapper
+    @mcp_intialize_wrapper
+    async def initialize_environment(session=None, progress_token=None):
+        # Send progress if available
+        if session and progress_token:
+            await session.send_progress_notification(
+                progress_token=progress_token, progress=0, total=100, message="Starting services..."
+            )
+        # Your initialization code works with or without session
+        start_services()
+    # Create and run server - initialization happens automatically
+    mcp = FastMCP("My Server")
+    mcp.run()
+    ```
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import mcp.types as types
+from mcp.server.session import ServerSession
+if TYPE_CHECKING:
+    from collections.abc import Awaitable, Callable
+    from mcp.shared.session import RequestResponder
+# Store the original _received_request method
+_original_received_request = ServerSession._received_request
+_init_function: Callable | None = None
+_initialized = False
+async def _patched_received_request(
+    self: ServerSession, responder: RequestResponder[types.ClientRequest, types.ServerResult]
+) -> types.ServerResult | None:
+    """Intercept initialization to run custom setup with progress notifications."""
+    global _initialized, _init_function
+    # Check if this is an initialization request
+    if isinstance(responder.request.root, types.InitializeRequest):
+        params = responder.request.root.params
+        # Extract progress token if present
+        progress_token = None
+        if hasattr(params, "meta") and params.meta and hasattr(params.meta, "progressToken"):
+            progress_token = params.meta.progressToken
+        # Run our initialization function if provided and not already done
+        if _init_function and not _initialized:
+            try:
+                await _init_function(session=self, progress_token=progress_token)
+                ServerSession._received_request = _original_received_request
+            except Exception as e:
+                if progress_token:
+                    await self.send_progress_notification(
+                        progress_token=progress_token,
+                        progress=0,
+                        total=100,
+                        message=f"Initialization failed: {e!s}",
+                    )
+                raise
+    # Call the original handler to send the InitializeResult
+    result = await _original_received_request(self, responder)
+    _initialized = True
+    return result
+def mcp_intialize_wrapper(
+    init_function: Callable[[ServerSession | None, str | None], Awaitable[None]] | None = None,
+) -> Callable:
+    """Decorator to enable progress notifications during MCP server initialization.
+    Your init function receives optional session and progress_token parameters.
+    If provided, use them to send progress updates. If not, the function still works.
+    Usage:
+        @mcp_intialize_wrapper
+        async def initialize(session=None, progress_token=None):
+            if session and progress_token:
+                await session.send_progress_notification(...)
+            # Your init code here
+    Must be applied before creating FastMCP instance or calling mcp.run().
+    """
+    global _init_function
+    def decorator(func: Callable[[ServerSession | None, str | None], Awaitable[None]]) -> Callable:
+        global _init_function
+        # Store the initialization function
+        _init_function = func
+        # Apply the monkey patch if not already applied
+        if ServerSession._received_request != _patched_received_request:
+            ServerSession._received_request = _patched_received_request  # type: ignore[assignment]
+        return func
+    # If called with a function directly
+    if init_function is not None:
+        return decorator(init_function)
+    # If used as @decorator
+    return decorator

hud/tools/helper/utils.py ADDED Viewed

@@ -0,0 +1,58 @@
+from __future__ import annotations
+import asyncio
+import inspect
+from functools import wraps
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from mcp.server.fastmcp import FastMCP
+def register_instance_tool(mcp: FastMCP, name: str, instance: Any) -> Callable[..., Any]:
+    """Register ``instance.__call__`` as a FastMCP tool.
+    Parameters
+    ----------
+    mcp:
+        A :class:`mcp.server.fastmcp.FastMCP` instance.
+    name:
+        Public tool name.
+    instance:
+        Object with an ``async def __call__`` (or sync) implementing the tool.
+    """
+    if inspect.isclass(instance):
+        class_name = instance.__name__
+        raise TypeError(
+            f"register_instance_tool() expects an instance, but got class '{class_name}'. "
+            f"Use: register_instance_tool(mcp, '{name}', {class_name}()) "
+            f"Not: register_instance_tool(mcp, '{name}', {class_name})"
+        )
+    call_fn = instance.__call__
+    sig = inspect.signature(call_fn)
+    # Remove *args/**kwargs so Pydantic doesn't treat them as required fields
+    from typing import Any as _Any
+    filtered = [
+        p.replace(kind=p.POSITIONAL_OR_KEYWORD, annotation=_Any)
+        for p in sig.parameters.values()
+        if p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD)
+    ]
+    public_sig = inspect.Signature(parameters=filtered, return_annotation=_Any)
+    @wraps(call_fn)
+    async def _wrapper(*args: Any, **kwargs: Any) -> Any:  # type: ignore[override]
+        result = call_fn(*args, **kwargs)
+        if asyncio.iscoroutine(result):
+            result = await result
+        return result
+    _wrapper.__signature__ = public_sig  # type: ignore[attr-defined]
+    return mcp.tool(name=name)(_wrapper)

hud/tools/playwright_tool.py ADDED Viewed

@@ -0,0 +1,379 @@
+"""Playwright web automation tool for HUD."""
+from __future__ import annotations
+import logging
+import os
+from typing import TYPE_CHECKING, Any, Literal
+from mcp import ErrorData, McpError
+from mcp.types import INVALID_PARAMS, ImageContent, TextContent
+from pydantic import Field
+from hud.tools.base import ToolResult, tool_result_to_content_blocks
+if TYPE_CHECKING:
+    from playwright.async_api import Browser, BrowserContext, Page
+logger = logging.getLogger(__name__)
+class PlaywrightTool:
+    """Playwright tool for web automation."""
+    def __init__(self, cdp_url: str | None = None) -> None:
+        super().__init__()
+        self._cdp_url = cdp_url
+        self._playwright = None
+        self._browser: Browser | None = None
+        self._context: BrowserContext | None = None
+        self._page: Page | None = None
+    @property
+    def page(self) -> Page:
+        """Get the current page, raising an error if not initialized."""
+        if self._page is None:
+            raise RuntimeError("Browser page is not initialized. Call ensure_browser_launched().")
+        return self._page
+    async def __call__(
+        self,
+        action: str = Field(
+            ...,
+            description="The action to perform (navigate, screenshot, click, type, get_page_info, wait_for_element)",  # noqa: E501
+        ),
+        url: str | None = Field(None, description="URL to navigate to (for navigate action)"),
+        selector: str | None = Field(
+            None, description="CSS selector for element (for click, type, wait_for_element actions)"
+        ),
+        text: str | None = Field(None, description="Text to type (for type action)"),
+        wait_for_load_state: Literal["commit", "domcontentloaded", "load", "networkidle"]
+        | None = Field(
+            None,
+            description="State to wait for: commit, domcontentloaded, load, networkidle (default: networkidle)",  # noqa: E501
+        ),
+    ) -> list[ImageContent | TextContent]:
+        """
+        Execute a Playwright web automation action.
+        Returns:
+            List of MCP content blocks
+        """
+        logger.info("PlaywrightTool executing action: %s", action)
+        try:
+            if action == "navigate":
+                if url is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS, message="url parameter is required for navigate"
+                        )
+                    )
+                result = await self.navigate(url, wait_for_load_state or "networkidle")
+            elif action == "screenshot":
+                result = await self.screenshot()
+            elif action == "click":
+                if selector is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS, message="selector parameter is required for click"
+                        )
+                    )
+                result = await self.click(selector)
+            elif action == "type":
+                if selector is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS, message="selector parameter is required for type"
+                        )
+                    )
+                if text is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS, message="text parameter is required for type"
+                        )
+                    )
+                result = await self.type_text(selector, text)
+            elif action == "get_page_info":
+                result = await self.get_page_info()
+            elif action == "wait_for_element":
+                if selector is None:
+                    raise McpError(
+                        ErrorData(
+                            code=INVALID_PARAMS,
+                            message="selector parameter is required for wait_for_element",
+                        )
+                    )
+                result = await self.wait_for_element(selector)
+            else:
+                raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
+            # Convert dict result to ToolResult
+            if isinstance(result, dict):
+                if result.get("success"):
+                    tool_result = ToolResult(output=result.get("message", ""))
+                else:
+                    tool_result = ToolResult(error=result.get("error", "Unknown error"))
+            elif isinstance(result, ToolResult):
+                tool_result = result
+            else:
+                tool_result = ToolResult(output=str(result))
+            # Convert result to content blocks
+            return tool_result_to_content_blocks(tool_result)
+        except McpError:
+            raise
+        except Exception as e:
+            logger.error("PlaywrightTool error: %s", e)
+            raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Playwright error: {e}")) from e
+    async def _ensure_browser(self) -> None:
+        """Ensure browser is launched and ready."""
+        if self._browser is None or not self._browser.is_connected():
+            if self._cdp_url:
+                logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
+            else:
+                logger.info("Launching Playwright browser...")
+            # Ensure DISPLAY is set (only needed for local browser)
+            if not self._cdp_url:
+                os.environ["DISPLAY"] = os.environ.get("DISPLAY", ":1")
+            if self._playwright is None:
+                try:
+                    from playwright.async_api import async_playwright
+                    self._playwright = await async_playwright().start()
+                except ImportError:
+                    raise ImportError(
+                        "Playwright is not installed. Please install with: pip install playwright"
+                    ) from None
+            # Connect via CDP URL or launch local browser
+            if self._cdp_url:
+                # Connect to remote browser via CDP
+                self._browser = await self._playwright.chromium.connect_over_cdp(self._cdp_url)
+                if self._browser is None:
+                    raise RuntimeError("Failed to connect to remote browser")
+                # Use existing context or create new one
+                contexts = self._browser.contexts
+                if contexts:
+                    self._context = contexts[0]
+                else:
+                    self._context = await self._browser.new_context(
+                        viewport={"width": 1920, "height": 1080},
+                        ignore_https_errors=True,
+                    )
+            else:
+                # Launch local browser
+                self._browser = await self._playwright.chromium.launch(
+                    headless=False,
+                    args=[
+                        "--no-sandbox",
+                        "--disable-dev-shm-usage",
+                        "--disable-gpu",
+                        "--disable-web-security",
+                        "--disable-features=IsolateOrigins,site-per-process",
+                        "--disable-blink-features=AutomationControlled",
+                        "--window-size=1920,1080",
+                        "--window-position=0,0",
+                        "--start-maximized",
+                        "--disable-background-timer-throttling",
+                        "--disable-backgrounding-occluded-windows",
+                        "--disable-renderer-backgrounding",
+                        "--disable-features=TranslateUI",
+                        "--disable-ipc-flooding-protection",
+                        "--disable-default-apps",
+                        "--no-first-run",
+                        "--disable-sync",
+                        "--no-default-browser-check",
+                    ],
+                )
+                if self._browser is None:
+                    raise RuntimeError("Browser failed to initialize")
+                self._context = await self._browser.new_context(
+                    viewport={"width": 1920, "height": 1080},
+                    ignore_https_errors=True,
+                )
+            if self._context is None:
+                raise RuntimeError("Browser context failed to initialize")
+            self._page = await self._context.new_page()
+            logger.info("Playwright browser launched successfully")
+    async def navigate(
+        self,
+        url: str,
+        wait_for_load_state: Literal[
+            "commit", "domcontentloaded", "load", "networkidle"
+        ] = "networkidle",
+    ) -> dict[str, Any]:
+        """Navigate to a URL.
+        Args:
+            url: URL to navigate to
+            wait_for_load_state: Load state to wait for (load, domcontentloaded, networkidle)
+        Returns:
+            Dict with navigation result
+        """
+        await self._ensure_browser()
+        logger.info("Navigating to %s", url)
+        try:
+            await self.page.goto(url, wait_until=wait_for_load_state)
+            current_url = self.page.url
+            title = await self.page.title()
+            return {
+                "success": True,
+                "url": current_url,
+                "title": title,
+                "message": f"Successfully navigated to {url}",
+            }
+        except Exception as e:
+            logger.error("Navigation failed: %s", e)
+            return {
+                "success": False,
+                "error": str(e),
+                "message": f"Failed to navigate to {url}: {e}",
+            }
+    async def screenshot(self) -> ToolResult:
+        """Take a screenshot of the current page.
+        Returns:
+            ToolResult with base64_image
+        """
+        await self._ensure_browser()
+        try:
+            # Always return base64 encoded screenshot as ToolResult
+            screenshot_bytes = await self.page.screenshot(full_page=True)
+            import base64
+            screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
+            return ToolResult(base64_image=screenshot_b64)
+        except Exception as e:
+            logger.error("Screenshot failed: %s", e)
+            return ToolResult(error=f"Failed to take screenshot: {e}")
+    async def click(self, selector: str) -> dict[str, Any]:
+        """Click an element by selector.
+        Args:
+            selector: CSS selector for element to click
+        Returns:
+            Dict with click result
+        """
+        await self._ensure_browser()
+        try:
+            await self.page.click(selector)
+            return {"success": True, "message": f"Clicked element: {selector}"}
+        except Exception as e:
+            logger.error("Click failed: %s", e)
+            return {
+                "success": False,
+                "error": str(e),
+                "message": f"Failed to click {selector}: {e}",
+            }
+    async def type_text(self, selector: str, text: str) -> dict[str, Any]:
+        """Type text into an element.
+        Args:
+            selector: CSS selector for input element
+            text: Text to type
+        Returns:
+            Dict with type result
+        """
+        await self._ensure_browser()
+        try:
+            await self.page.fill(selector, text)
+            return {"success": True, "message": f"Typed '{text}' into {selector}"}
+        except Exception as e:
+            logger.error("Type failed: %s", e)
+            return {
+                "success": False,
+                "error": str(e),
+                "message": f"Failed to type into {selector}: {e}",
+            }
+    async def get_page_info(self) -> dict[str, Any]:
+        """Get current page information.
+        Returns:
+            Dict with page info
+        """
+        await self._ensure_browser()
+        try:
+            url = self.page.url
+            title = await self.page.title()
+            return {
+                "success": True,
+                "url": url,
+                "title": title,
+                "message": f"Current page: {title} ({url})",
+            }
+        except Exception as e:
+            logger.error("Get page info failed: %s", e)
+            return {"success": False, "error": str(e), "message": f"Failed to get page info: {e}"}
+    async def wait_for_element(self, selector: str) -> dict[str, Any]:
+        """Wait for an element to appear.
+        Args:
+            selector: CSS selector for element
+        Returns:
+            Dict with wait result
+        """
+        await self._ensure_browser()
+        try:
+            await self.page.wait_for_selector(selector, timeout=30000)
+            return {"success": True, "message": f"Element {selector} appeared"}
+        except Exception as e:
+            logger.error("Wait for element failed: %s", e)
+            return {
+                "success": False,
+                "error": str(e),
+                "message": f"Element {selector} did not appear within 30000ms: {e}",
+            }
+    async def close(self) -> None:
+        """Close browser and cleanup."""
+        if self._browser:
+            try:
+                await self._browser.close()
+                logger.info("Browser closed")
+            except Exception as e:
+                logger.error("Error closing browser: %s", e)
+        if self._playwright:
+            try:
+                await self._playwright.stop()
+            except Exception as e:
+                logger.error("Error stopping playwright: %s", e)
+        self._browser = None
+        self._context = None
+        self._page = None
+        self._playwright = None

hud/tools/tests/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from __future__ import annotations
+__all__ = []

hud-python 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl

Potentially problematic release.

hud-python 0.2.10py3-none-any.whl → 0.3.1py3-none-any.whl