PyPI - hud-python - Versions diffs - 0.1.0__py3-none-any.whl - Mend

hud-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (21) hide show

hud/__init__.py +22 -0
hud/adapters/__init__.py +5 -0
hud/adapters/claude/__init__.py +6 -0
hud/adapters/claude/adapter.py +131 -0
hud/adapters/common/__init__.py +6 -0
hud/adapters/common/adapter.py +167 -0
hud/adapters/common/types.py +92 -0
hud/client.py +184 -0
hud/env.py +258 -0
hud/gym.py +22 -0
hud/py.typed +0 -0
hud/run.py +157 -0
hud/server/__init__.py +5 -0
hud/server/requests.py +79 -0
hud/settings.py +39 -0
hud/utils/__init__.py +5 -0
hud/utils/config.py +7 -0
hud_python-0.1.0.dist-info/METADATA +125 -0
hud_python-0.1.0.dist-info/RECORD +21 -0
hud_python-0.1.0.dist-info/WHEEL +4 -0
hud_python-0.1.0.dist-info/licenses/LICENSE +21 -0

hud/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""
+HUD Gym SDK - A Python SDK for interacting with HUD environments.
+"""
+from __future__ import annotations
+from hud.client import HUDClient
+from hud.env import Env, EvalSet, Observation, TaskResult
+from hud.gym import Gym
+from hud.run import Run
+__version__ = "0.1.0"
+__all__ = [
+    "Env",
+    "EvalSet",
+    "Gym",
+    "HUDClient",
+    "Observation",
+    "Run",
+    "TaskResult",
+]

hud/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from __future__ import annotations
+from .common import Adapter
+__all__ = ["Adapter"]

hud/adapters/claude/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from __future__ import annotations
+from .adapter import ClaudeAdapter
+__all__ = ["ClaudeAdapter"]

hud/adapters/claude/adapter.py ADDED Viewed

@@ -0,0 +1,131 @@
+# ruff: noqa: S101
+from __future__ import annotations
+from typing import Any
+from hud.adapters.common import CLA, Adapter
+from hud.adapters.common.types import (
+    ClickAction,
+    DragAction,
+    MoveAction,
+    Point,
+    PositionFetch,
+    PressAction,
+    ScreenshotFetch,
+    ScrollAction,
+    TypeAction,
+    WaitAction,
+)
+class ClaudeAdapter(Adapter):
+    def __init__(self) -> None:
+        super().__init__()
+        self.agent_width = 1024  # Claude's preferred width
+        self.agent_height = 768  # Claude's preferred height
+    def convert(self, data: Any) -> CLA:
+        try:
+            action_type = data.get("action")
+            if action_type == "key":
+                assert "text" in data
+                if "+" in data["text"]:
+                    keys = data["text"].split("+")
+                    assert len(keys) > 0
+                    return PressAction(keys=keys)
+                return PressAction(keys=[data["text"]])
+            elif action_type == "type":
+                assert "text" in data
+                return TypeAction(
+                    text=data["text"],
+                    enter_after=False,
+                )
+            elif action_type == "mouse_move":
+                # 'coordinate' should be provided as an array [x, y].
+                assert "coordinate" in data
+                coord = data["coordinate"]
+                assert isinstance(coord, list)
+                assert len(coord) == 2
+                return MoveAction(point=Point(x=coord[0], y=coord[1]))
+            elif action_type == "left_click":
+                assert "coordinate" in data
+                coord = data["coordinate"]
+                assert isinstance(coord, list)
+                assert len(coord) == 2
+                return ClickAction(point=Point(x=coord[0], y=coord[1]), button="left")
+            elif action_type == "left_click_drag":
+                assert "coordinate" in data
+                coord = data["coordinate"]
+                assert isinstance(coord, list)
+                assert len(coord) == 2
+                if (
+                    len(self.memory) == 0
+                    or (self.memory[-1] is not MoveAction and self.memory[-1] is not ClickAction)
+                    or self.memory[-1].point is None
+                ):
+                    raise ValueError("Left click drag must be preceded by a move or click action")
+                else:
+                    return DragAction(path=[self.memory[-1].point, Point(x=coord[0], y=coord[1])])
+            elif action_type == "right_click":
+                assert "coordinate" in data
+                coord = data["coordinate"]
+                assert isinstance(coord, list)
+                assert len(coord) == 2
+                return ClickAction(point=Point(x=coord[0], y=coord[1]), button="right")
+            elif action_type == "middle_click":
+                assert "coordinate" in data
+                coord = data["coordinate"]
+                assert isinstance(coord, list)
+                assert len(coord) == 2
+                return ClickAction(point=Point(x=coord[0], y=coord[1]), button="wheel")
+            elif action_type == "double_click":
+                assert "coordinate" in data
+                coord = data["coordinate"]
+                assert isinstance(coord, list)
+                assert len(coord) == 2
+                return ClickAction(
+                    point=Point(x=coord[0], y=coord[1]), button="left", pattern=[100]
+                )
+            elif action_type == "scroll":
+                assert "scroll_direction" in data
+                direction = data["scroll_direction"]
+                if direction == "up":
+                    scroll = Point(x=0, y=-data["scroll_amount"])
+                elif direction == "down":
+                    scroll = Point(x=0, y=data["scroll_amount"])
+                elif direction == "left":
+                    scroll = Point(x=-data["scroll_amount"], y=0)
+                elif direction == "right":
+                    scroll = Point(x=data["scroll_amount"], y=0)
+                else:
+                    raise ValueError(f"Unsupported scroll direction: {direction}")
+                return ScrollAction(
+                    point=Point(x=data["coordinate"][0], y=data["coordinate"][1]), scroll=scroll
+                )
+            elif action_type == "screenshot":
+                return ScreenshotFetch()
+            elif action_type == "cursor_position":
+                return PositionFetch()
+            elif action_type == "wait":
+                assert "duration" in data
+                return WaitAction(time=data["duration"])
+            else:
+                raise ValueError(f"Unsupported action type: {action_type}")
+        except AssertionError:
+            raise ValueError(f"Invalid action: {data}") from None

hud/adapters/common/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from __future__ import annotations
+from .adapter import Adapter
+from .types import CLA
+__all__ = ["CLA", "Adapter"]

hud/adapters/common/adapter.py ADDED Viewed

@@ -0,0 +1,167 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+from PIL import Image
+from pydantic import TypeAdapter, ValidationError
+from .types import CLA
+if TYPE_CHECKING:
+    import numpy as np  # type: ignore
+    from typing_extensions import TypeAlias, TypeIs
+    ImageType: TypeAlias = "np.ndarray[Any, Any] | Image.Image | str | None"
+def _is_numpy_array(observation: Any) -> TypeIs[np.ndarray]:
+    """Check if the observation is a numpy array, without requiring numpy."""
+    try:
+        import numpy as np  # type: ignore
+        return isinstance(observation, np.ndarray)
+    except (ModuleNotFoundError, NameError):
+        return False
+class Adapter:
+    def __init__(self) -> None:
+        self.memory = []
+        self.agent_width = 1920
+        self.agent_height = 1080
+        self.env_width = 1920
+        self.env_height = 1080
+    def preprocess(self, action: Any) -> Any:
+        return action
+    def convert(self, action: Any) -> CLA:
+        if action is None:
+            raise ValueError("Please provide a valid action")
+        try:
+            return TypeAdapter(CLA).validate_python(action)
+        except ValidationError as e:
+            raise ValueError(f"Invalid action type in conversion: {action}") from e
+    def json(self, action: CLA) -> Any:
+        if action is None:
+            raise ValueError("Please provide a valid action")
+        try:
+            validated = TypeAdapter(CLA).validate_python(action)
+            return validated.model_dump()
+        except ValidationError as e:
+            raise ValueError(f"Invalid action type in json creation: {action}") from e
+    def rescale(self, observation: ImageType) -> str | None:
+        """
+        Resize the observation (image) to agent-specific dimensions.
+        Args:
+            observation: Image data, which can be:
+                - numpy array
+                - PIL Image
+                - base64 string (PNG)
+        Returns:
+            Base64-encoded string of the resized image (PNG format)
+        """
+        if observation is None:
+            return None
+        # Handle different input types.
+        if _is_numpy_array(observation):
+            # Convert numpy array to PIL Image
+            img = Image.fromarray(observation)
+        elif isinstance(observation, Image.Image):
+            img = observation
+        elif isinstance(observation, str):
+            # Assume it's a base64 string
+            try:
+                import base64
+                import io
+                # Remove header if present (e.g., 'data:image/png;base64,')
+                if "," in observation:
+                    observation = observation.split(",")[1]
+                # Decode base64 string to bytes
+                img_bytes = base64.b64decode(observation)
+                # Convert to PIL Image
+                img = Image.open(io.BytesIO(img_bytes))
+            except Exception as e:
+                raise ValueError(f"Failed to decode base64 image: {e}") from None
+        else:
+            raise ValueError(f"Unsupported observation type: {type(observation)}")
+        # Update environment dimensions
+        self.env_width, self.env_height = img.size
+        # Resize to agent dimensions
+        resized_img = img.resize((self.agent_width, self.agent_height), Image.Resampling.LANCZOS)
+        # Always convert to base64 string
+        import base64
+        import io
+        buffered = io.BytesIO()
+        resized_img.save(buffered, format="PNG")
+        return base64.b64encode(buffered.getvalue()).decode("utf-8")
+    def postprocess_action(self, action: dict[str, Any]) -> dict[str, Any]:
+        """
+        Rescale action coordinates from agent dimensions to environment dimensions.
+        Args:
+            action: Action dictionary with coordinates
+        Returns:
+            Action with rescaled coordinates
+        """
+        if not action:
+            return action
+        # Calculate scaling factors
+        x_scale = self.env_width / self.agent_width
+        y_scale = self.env_height / self.agent_height
+        # Deep copy to avoid modifying the original
+        processed_action = action.copy()
+        # Rescale based on action type and structure
+        if "point" in processed_action and processed_action["point"] is not None:
+            # For actions with a single point (click, move)
+            processed_action["point"]["x"] = int(processed_action["point"]["x"] * x_scale)
+            processed_action["point"]["y"] = int(processed_action["point"]["y"] * y_scale)
+        if (path := processed_action.get("path")) is not None:
+            # For actions with a path (drag)
+            for point in path:
+                point["x"] = int(point["x"] * x_scale)
+                point["y"] = int(point["y"] * y_scale)
+        if "scroll" in processed_action and processed_action["scroll"] is not None:
+            # For scroll actions
+            processed_action["scroll"]["x"] = int(processed_action["scroll"]["x"] * x_scale)
+            processed_action["scroll"]["y"] = int(processed_action["scroll"]["y"] * y_scale)
+        return processed_action
+    def adapt(self, action: Any) -> dict[str, Any]:
+        # any preprocessing steps
+        action = self.preprocess(action)
+        # convert to CLA
+        action = self.convert(action)
+        self.memory.append(action)
+        # convert to json
+        action_dict = self.json(action)
+        # apply coordinate rescaling
+        rescaled_action = self.postprocess_action(action_dict)
+        return rescaled_action
+    def adapt_list(self, actions: list[Any]) -> list[dict[str, Any]]:
+        if not isinstance(actions, list):
+            raise ValueError("Please provide a list of actions")
+        return [self.adapt(action) for action in actions]

hud/adapters/common/types.py ADDED Viewed

@@ -0,0 +1,92 @@
+from __future__ import annotations
+from typing import Annotated, Literal, Union
+from pydantic import BaseModel, Field
+# Base class for all actions
+class CLAAction(BaseModel):
+    type: str
+# Basic Point model for coordinates
+class Point(BaseModel):
+    x: int
+    y: int
+# CLICK ACTION (supports extra options)
+class ClickAction(CLAAction):
+    type: Literal["click"] = "click"
+    point: Point | None = None
+    selector: str | None = None
+    button: Literal["left", "right", "wheel", "back", "forward"] = "left"
+    pattern: list[int] | None = None  # [delay_1, delay_2, ...]
+# PRESS ACTION for key presses/hotkeys
+class PressAction(CLAAction):
+    type: Literal["press"] = "press"
+    keys: list[str]
+# TYPE ACTION for text typing
+class TypeAction(CLAAction):
+    type: Literal["type"] = "type"
+    text: str
+    enter_after: bool | None = False
+# SCROLL ACTION
+class ScrollAction(CLAAction):
+    type: Literal["scroll"] = "scroll"
+    point: Point | None = None
+    scroll: Point | None = None
+# MOVE ACTION for mouse movement
+class MoveAction(CLAAction):
+    type: Literal["move"] = "move"
+    point: Point | None = None
+    selector: str | None = None
+    offset: Point | None = None
+# WAIT ACTION
+class WaitAction(CLAAction):
+    type: Literal["wait"] = "wait"
+    time: int  # in milliseconds
+# DRAG ACTION
+class DragAction(CLAAction):
+    type: Literal["drag"] = "drag"
+    path: list[Point]
+    pattern: list[int] | None = None  # [delay_1, delay_2, ...]
+# SCREENSHOT ACTION
+class ScreenshotFetch(CLAAction):
+    type: Literal["screenshot"] = "screenshot"
+class PositionFetch(CLAAction):
+    type: Literal["position"] = "position"
+# Union of all possible actions
+CLA = Annotated[
+    Union[
+        ClickAction,
+        PressAction,
+        TypeAction,
+        ScrollAction,
+        MoveAction,
+        WaitAction,
+        DragAction,
+        ScreenshotFetch,
+        PositionFetch,
+    ],
+    Field(discriminator="type"),
+]

hud/client.py ADDED Viewed

@@ -0,0 +1,184 @@
+"""
+HUD client for interacting with the API.
+"""
+from __future__ import annotations
+import json
+from typing import Any
+from .adapters.common import Adapter
+from .env import EvalSet
+from .gym import Gym
+from .run import Run, RunResponse
+from .server import make_request, make_sync_request
+from .settings import settings
+class HUDClient:
+    """
+    Client for interacting with the HUD API.
+    This is the main entry point for the SDK, providing methods to load gyms,
+    evalsets, and create runs.
+    """
+    def __init__(self, api_key: str) -> None:
+        """
+        Initialize the HUD client with an API key.
+        Args:
+            api_key: API key for authentication with the HUD API
+        """
+        self.api_key = api_key
+        settings.api_key = api_key  # Set global config
+    async def load_gym(self, id: str) -> Gym:
+        """
+        Load a gym by ID from the HUD API.
+        Args:
+            id: The ID of the gym to load
+        Returns:
+            Gym: The loaded gym object
+        """
+        # API call to get gym info
+        data = await make_request(
+            method="GET",
+            url=f"{settings.base_url}/gyms/{id}",
+            api_key=self.api_key,
+        )
+        return Gym(id=data["id"], name=data["name"])
+    async def load_evalset(self, id: str) -> EvalSet:
+        """
+        Load an evalset by ID from the HUD API.
+        Args:
+            id: The ID of the evalset to load
+        Returns:
+            EvalSet: The loaded evalset object
+        """
+        # API call to get evalset info
+        data = await make_request(
+            method="GET",
+            url=f"{settings.base_url}/evalsets/{id}",
+            api_key=self.api_key,
+        )
+        return EvalSet(id=data["id"], name=data["name"])
+    async def list_gyms(self) -> list[str]:
+        """
+        List all available gyms.
+        Returns:
+            list[str]: List of gym IDs
+        """
+        # API call to get gyms
+        data = await make_request(
+            method="GET", url=f"{settings.base_url}/gyms", api_key=self.api_key
+        )
+        return data["gyms"]
+    async def get_runs(self) -> list[Run]:
+        """
+        Get all runs associated with the API key.
+        Returns:
+            list[Run]: List of run objects
+        """
+        # API call to get runs
+        data = await make_request(
+            method="GET", url=f"{settings.base_url}/runs", api_key=self.api_key
+        )
+        return data["runs"]
+    async def load_run(self, id: str, adapter: Adapter | None = None) -> Run | None:
+        """
+        Load a run by ID from the HUD API.
+        Args:
+            id: The ID of the run to load
+            adapter: Optional adapter for action conversion
+        Returns:
+            Run: The loaded run object, or None if not found
+        """
+        adapter = adapter or Adapter()
+        # API call to get run info
+        data = await make_request(
+            method="GET",
+            url=f"{settings.base_url}/runs/{id}",
+            api_key=self.api_key,
+        )
+        if data:
+            response = RunResponse(**data)
+            gym = Gym(id=response.gym["id"], name=response.gym["name"])
+            evalset = EvalSet(
+                id=response.evalset["id"],
+                name=response.evalset["name"],
+                tasks=response.evalset["tasks"],
+            )
+            return Run(
+                id=response.id,
+                name=response.name,
+                gym=gym,
+                evalset=evalset,
+                adapter=adapter,
+                config=response.config,
+                metadata=response.metadata,
+            )
+        return None
+    def create_run(
+        self,
+        name: str,
+        gym: Gym,
+        evalset: EvalSet,
+        config: dict[str, Any] | None = None,
+        metadata: dict[str, Any] | None = None,
+        adapter: Adapter | None = None,
+    ) -> Run:
+        """
+        Create a new run in the HUD system.
+        Args:
+            name: Name of the run
+            gym: Gym to use for the run
+            evalset: Evalset to use for the run
+            config: Optional configuration parameters
+            metadata: Optional metadata for the run
+            adapter: Optional adapter for action conversion
+        Returns:
+            Run: The created run object
+        """
+        adapter = adapter or Adapter()
+        # Make synchronous API call to create run
+        if metadata is None:
+            metadata = {}
+        if config is None:
+            config = {}
+        data = make_sync_request(
+            method="POST",
+            url=f"{settings.base_url}/runs",
+            json={
+                "name": name,
+                "gym_id": gym.id,
+                "evalset_id": evalset.id,
+                "config": json.dumps(config),
+                "metadata": json.dumps(metadata),
+            },
+            api_key=self.api_key,
+        )
+        return Run(
+            id=data["id"],
+            name=name,
+            gym=gym,
+            evalset=evalset,
+            adapter=adapter,
+            config=config,
+            metadata=metadata,
+        )