PyPI - hud-python - Versions diffs - 0.1.0b3__py3-none-any.whl → 0.1.2a0__py3-none-any.whl - Mend

hud-python 0.1.0b3py3-none-any.whl → 0.1.2a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (8) hide show

hud/__init__.py CHANGED Viewed

@@ -9,7 +9,7 @@ from hud.environment import Environment, EvalSet, Observation, TaskResult
 from hud.gym import Gym
 from hud.run import Run
-__version__ = "0.1.0b3"
+__version__ = "0.1.2-alpha"
 __all__ = [
     "Environment",

hud/adapters/claude/adapter.py CHANGED Viewed

@@ -2,10 +2,11 @@
 from __future__ import annotations
-from typing import Any
+from typing import Any, ClassVar
 from hud.adapters.common import CLA, Adapter
 from hud.adapters.common.types import (
+    CLAKey,
     ClickAction,
     DragAction,
     MoveAction,
@@ -20,11 +21,17 @@ from hud.adapters.common.types import (
 class ClaudeAdapter(Adapter):
+    KEY_MAP: ClassVar[dict[str, CLAKey]] = {"Return": "enter"}
     def __init__(self) -> None:
         super().__init__()
         self.agent_width = 1024  # Claude's preferred width
         self.agent_height = 768  # Claude's preferred height
+    def _map_key(self, key: str) -> CLAKey:
+        """Map a key to its standardized form."""
+        return self.KEY_MAP.get(key, key.lower())  # type: ignore
     def convert(self, data: Any) -> CLA:
         try:
             action_type = data.get("action")
@@ -32,10 +39,12 @@ class ClaudeAdapter(Adapter):
             if action_type == "key":
                 assert "text" in data
                 if "+" in data["text"]:
-                    keys = data["text"].split("+")
+                    keys: list[CLAKey] = [
+                        self._map_key(k) for k in (data["text"].split("+"))
+                    ]
                     assert len(keys) > 0
                     return PressAction(keys=keys)
-                return PressAction(keys=[data["text"]])
+                return PressAction(keys=[self._map_key(data["text"])])
             elif action_type == "type":
                 assert "text" in data
@@ -66,12 +75,19 @@ class ClaudeAdapter(Adapter):
                 assert len(coord) == 2
                 if (
                     len(self.memory) == 0
-                    or (self.memory[-1] is not MoveAction and self.memory[-1] is not ClickAction)
+                    or (
+                        self.memory[-1] is not MoveAction
+                        and self.memory[-1] is not ClickAction
+                    )
                     or self.memory[-1].point is None
                 ):
-                    raise ValueError("Left click drag must be preceded by a move or click action")
+                    raise ValueError(
+                        "Left click drag must be preceded by a move or click action"
+                    )
                 else:
-                    return DragAction(path=[self.memory[-1].point, Point(x=coord[0], y=coord[1])])
+                    return DragAction(
+                        path=[self.memory[-1].point, Point(x=coord[0], y=coord[1])]
+                    )
             elif action_type == "right_click":
                 assert "coordinate" in data
@@ -96,6 +112,17 @@ class ClaudeAdapter(Adapter):
                     point=Point(x=coord[0], y=coord[1]), button="left", pattern=[100]
                 )
+            elif action_type == "triple_click":
+                assert "coordinate" in data
+                coord = data["coordinate"]
+                assert isinstance(coord, list)
+                assert len(coord) == 2
+                return ClickAction(
+                    point=Point(x=coord[0], y=coord[1]),
+                    button="left",
+                    pattern=[100, 100],
+                )
             elif action_type == "scroll":
                 assert "scroll_direction" in data
                 direction = data["scroll_direction"]
@@ -112,7 +139,8 @@ class ClaudeAdapter(Adapter):
                     raise ValueError(f"Unsupported scroll direction: {direction}")
                 return ScrollAction(
-                    point=Point(x=data["coordinate"][0], y=data["coordinate"][1]), scroll=scroll
+                    point=Point(x=data["coordinate"][0], y=data["coordinate"][1]),
+                    scroll=scroll,
                 )
             elif action_type == "screenshot":
@@ -124,7 +152,6 @@ class ClaudeAdapter(Adapter):
             elif action_type == "wait":
                 assert "duration" in data
                 return WaitAction(time=data["duration"])
             else:
                 raise ValueError(f"Unsupported action type: {action_type}")
         except AssertionError:

hud/adapters/common/types.py CHANGED Viewed

@@ -78,6 +78,10 @@ class PositionFetch(CLAAction):
     type: Literal["position"] = "position"
+class CustomAction(CLAAction):
+    type: Literal["custom"] = "custom"
+    action: str
 # Union of all possible actions
 CLA = Annotated[
     Union[
@@ -90,6 +94,7 @@ CLA = Annotated[
         DragAction,
         ScreenshotFetch,
         PositionFetch,
+        CustomAction,
     ],
     Field(discriminator="type"),
 ]

hud/environment.py CHANGED Viewed

@@ -13,6 +13,17 @@ from hud.settings import settings
 if TYPE_CHECKING:
     from .adapters.common import Adapter
+class BaseResponseWithLogs(BaseModel):
+    """Base model for API responses that include logs."""
+    logs: str | None = None
+    error: str | None = None
+class RewardResponse(BaseResponseWithLogs):
+    reward: float
 logger = logging.getLogger("hud.environment")
@@ -199,19 +210,19 @@ class Environment:
             return self.adapter.adapt_list(action)
         return [self.adapter.adapt(action)]
-    async def evaluate(self) -> float:
+    async def evaluate(self) -> RewardResponse:
         """
         Get final evaluation score.
         Returns:
-            float: The evaluation score
+            RewardResponse: The evaluation response containing reward, logs, and possible error
         """
         data = await make_request(
             method="POST",
             url=f"{settings.base_url}/evaluation/{self.id}",
             api_key=settings.api_key,
         )
-        return data["reward"]
+        return RewardResponse(**data)
     async def close(self) -> None:
         """
@@ -273,6 +284,7 @@ class EvalSet:
         id: str,
         name: str,
         tasks: list[str] | None = None,
+        configs: dict[str, Any] | None = None,
     ) -> None:
         """
         Initialize an evaluation set.
@@ -285,6 +297,7 @@ class EvalSet:
         self.id = id
         self.name = name
         self.tasks = tasks or []
+        self.configs = configs or {}
     async def fetch_tasks(self) -> list[str]:
         """
@@ -298,5 +311,7 @@ class EvalSet:
             url=f"{settings.base_url}/evalsets/{self.id}/tasks",
             api_key=settings.api_key,
         )
+        # Extracts a list of task ids and list of config objects for the evalset
         self.tasks = data["tasks"]
+        self.configs = data["evalset"]
         return self.tasks

{hud_python-0.1.0b3.dist-info → hud_python-0.1.2a0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.1.0b3
+Version: 0.1.2a0
 Summary: SDK for the HUD evaluation platform.
 Project-URL: Homepage, https://github.com/Human-Data/hud-sdk
 Project-URL: Bug Tracker, https://github.com/Human-Data/hud-sdk/issues
@@ -57,9 +57,9 @@ Description-Content-Type: text/markdown
 # HUD
-A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models. Visit [hud.so](https://hud.so).
+A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models.
-> **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is evolving and may change in future releases as we gather feedback and improve functionality.
+> **Alpha Release Notice**: This SDK is currently in early release status. The API is evolving and may change in future releases as we gather feedback and improve functionality.
 [![PyPI version](https://img.shields.io/pypi/v/hud-python)](https://pypi.org/project/hud-python/)
@@ -70,13 +70,12 @@ A Python SDK for interacting with HUD environments and evaluation benchmarks for
 [RECOMMENDED] To set get started with an agent, see the [Claude Computer use example](https://github.com/Human-Data/hud-sdk/tree/main/examples).
-Otherwise, install the package with Python>=3.9:
+Install the package with Python>=3.9:
 ```bash
 pip install hud-python
 ```
-Make sure to setup your account [here](https://hud.so/settings) and add your API key to the environment variables:
+Make sure to setup your account with us (email founders@hud.so) and add your API key to the environment variables:
 ```bash
 HUD_API_KEY=<your-api-key>
 ```
@@ -117,20 +116,9 @@ if __name__ == "__main__":
     asyncio.run(main())
 ```
-## Features
-- Connect to HUD evaluation environments
-- Run benchmarks across various tasks
-- Support for different agent adapters
-- Asynchronous API
 ## Documentation
-For comprehensive guides, examples, and API reference, visit:
-- [Getting Started](https://docs.hud.so/introduction)
-- [Installation](https://docs.hud.so/installation)
-- [API Reference](https://docs.hud.so/api-reference)
-- [Examples](https://docs.hud.so/examples)
+For comprehensive guides, examples, and API reference, visit [our docs](https://docs.hud.so/introduction)
 ## License

{hud_python-0.1.0b3.dist-info → hud_python-0.1.2a0.dist-info}/RECORD RENAMED Viewed

@@ -1,21 +1,21 @@
-hud/__init__.py,sha256=Xam6plJLHFqKPKcnVhwLQf4bsApDuxZ8BJF0FEAjkos,416
+hud/__init__.py,sha256=IEEme8kZA7zs9URZV-C35gqBOC5sxMEU7e6NjSAgXR4,420
 hud/client.py,sha256=7WHXTQhVK-T9Rj4ZooADE_c1pah5Bc1DJ9ZRqUyUnuQ,5724
-hud/environment.py,sha256=39tna-Cpzg9T6HqKebPARP2DXaF2n0xPr1W0qx8y160,8401
+hud/environment.py,sha256=9r8eK3OVqr-wpPGlhnrpuDt-z6FIp3S3oukTZ7swN3o,8899
 hud/gym.py,sha256=aanBHtlsXrJwrFax9SbXWwk_By-X8wE3M9deS-E_s4c,463
 hud/run.py,sha256=5ukjuRNLjj5fczaWxpR_5NebFbQpoy8w81eRYy309Vg,6401
 hud/settings.py,sha256=1ScSac0ta03LkckkH2gi6SyKY2M7nr15vRGugo2C_xs,1015
 hud/adapters/__init__.py,sha256=y3H7yMl7rC-rrXG2WvePdSojoNFSui02eYTH17Xd7OY,87
 hud/adapters/claude/__init__.py,sha256=i7QEF-29FLb9qxp1eYtXs-adIk_tG54tL-9g6d3xodk,100
-hud/adapters/claude/adapter.py,sha256=oi2lvO42g7i-L151tIWIGQGA80skcYRwzQ52-0f2OpA,4840
+hud/adapters/claude/adapter.py,sha256=ekYZixANKfx-4lENlXGaomh6Ecw4SRKtLWD5quGNWdM,5782
 hud/adapters/common/__init__.py,sha256=BjdZWJVs_AKtpFrt-tNsdQRjnz7D97DFEQirJ-r0mp8,118
 hud/adapters/common/adapter.py,sha256=SCtOuRjW5Szzd45LXCaqDEaKr2lhA-nIqSEMJ9KLsKI,5799
-hud/adapters/common/types.py,sha256=d9tIF06tjK7VCb-yBJ9epwHlXRHlObo9YWetrv33s8c,4511
+hud/adapters/common/types.py,sha256=Kgj0ZhiWOU6V95qxrvf-mMCvodLV_6rGBHwP1FQdMBk,4620
 hud/server/__init__.py,sha256=VPrhyyqg3inge9J7BjcmDBNJRuvkCA9ZDXS_R5Q8ZtY,129
 hud/server/requests.py,sha256=pPPaMpwqmA1RyWwzQN1ybgAnbSHJLeeIaW6MJwhJYks,6052
 hud/utils/__init__.py,sha256=0m8klSLnMLeIJT23ipBXfFACk4hNWPsA6ZNqZDpv6oY,99
 hud/utils/config.py,sha256=dze0BGE4q14omjj9822kL9BeiIgWQvJyuU29A2wa1SE,193
 hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hud_python-0.1.0b3.dist-info/METADATA,sha256=mWp4cHyIzYuzxk3alNFspztWL2S8_6ZlvGP0UqzIF48,5146
-hud_python-0.1.0b3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hud_python-0.1.0b3.dist-info/licenses/LICENSE,sha256=IVdfcZ8xq5apYGJS5GzRLLbm9r03Aecxd03isi-3P9k,1075
-hud_python-0.1.0b3.dist-info/RECORD,,
+hud_python-0.1.2a0.dist-info/METADATA,sha256=oGcQYuTzPo9FSMeeARPiPws3ERRoWNPNu8tM8W_1Uow,4787
+hud_python-0.1.2a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hud_python-0.1.2a0.dist-info/licenses/LICENSE,sha256=IVdfcZ8xq5apYGJS5GzRLLbm9r03Aecxd03isi-3P9k,1075
+hud_python-0.1.2a0.dist-info/RECORD,,

{hud_python-0.1.0b3.dist-info → hud_python-0.1.2a0.dist-info}/WHEEL RENAMED Viewed

File without changes

{hud_python-0.1.0b3.dist-info → hud_python-0.1.2a0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hud-python 0.1.0b3__py3-none-any.whl → 0.1.2a0__py3-none-any.whl

Potentially problematic release.

hud-python 0.1.0b3py3-none-any.whl → 0.1.2a0py3-none-any.whl