hud-python 0.1.0b3__py3-none-any.whl → 0.1.2a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/__init__.py CHANGED
@@ -9,7 +9,7 @@ from hud.environment import Environment, EvalSet, Observation, TaskResult
9
9
  from hud.gym import Gym
10
10
  from hud.run import Run
11
11
 
12
- __version__ = "0.1.0b3"
12
+ __version__ = "0.1.2-alpha"
13
13
 
14
14
  __all__ = [
15
15
  "Environment",
@@ -2,10 +2,11 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Any
5
+ from typing import Any, ClassVar
6
6
 
7
7
  from hud.adapters.common import CLA, Adapter
8
8
  from hud.adapters.common.types import (
9
+ CLAKey,
9
10
  ClickAction,
10
11
  DragAction,
11
12
  MoveAction,
@@ -20,11 +21,17 @@ from hud.adapters.common.types import (
20
21
 
21
22
 
22
23
  class ClaudeAdapter(Adapter):
24
+ KEY_MAP: ClassVar[dict[str, CLAKey]] = {"Return": "enter"}
25
+
23
26
  def __init__(self) -> None:
24
27
  super().__init__()
25
28
  self.agent_width = 1024 # Claude's preferred width
26
29
  self.agent_height = 768 # Claude's preferred height
27
30
 
31
+ def _map_key(self, key: str) -> CLAKey:
32
+ """Map a key to its standardized form."""
33
+ return self.KEY_MAP.get(key, key.lower()) # type: ignore
34
+
28
35
  def convert(self, data: Any) -> CLA:
29
36
  try:
30
37
  action_type = data.get("action")
@@ -32,10 +39,12 @@ class ClaudeAdapter(Adapter):
32
39
  if action_type == "key":
33
40
  assert "text" in data
34
41
  if "+" in data["text"]:
35
- keys = data["text"].split("+")
42
+ keys: list[CLAKey] = [
43
+ self._map_key(k) for k in (data["text"].split("+"))
44
+ ]
36
45
  assert len(keys) > 0
37
46
  return PressAction(keys=keys)
38
- return PressAction(keys=[data["text"]])
47
+ return PressAction(keys=[self._map_key(data["text"])])
39
48
 
40
49
  elif action_type == "type":
41
50
  assert "text" in data
@@ -66,12 +75,19 @@ class ClaudeAdapter(Adapter):
66
75
  assert len(coord) == 2
67
76
  if (
68
77
  len(self.memory) == 0
69
- or (self.memory[-1] is not MoveAction and self.memory[-1] is not ClickAction)
78
+ or (
79
+ self.memory[-1] is not MoveAction
80
+ and self.memory[-1] is not ClickAction
81
+ )
70
82
  or self.memory[-1].point is None
71
83
  ):
72
- raise ValueError("Left click drag must be preceded by a move or click action")
84
+ raise ValueError(
85
+ "Left click drag must be preceded by a move or click action"
86
+ )
73
87
  else:
74
- return DragAction(path=[self.memory[-1].point, Point(x=coord[0], y=coord[1])])
88
+ return DragAction(
89
+ path=[self.memory[-1].point, Point(x=coord[0], y=coord[1])]
90
+ )
75
91
 
76
92
  elif action_type == "right_click":
77
93
  assert "coordinate" in data
@@ -96,6 +112,17 @@ class ClaudeAdapter(Adapter):
96
112
  point=Point(x=coord[0], y=coord[1]), button="left", pattern=[100]
97
113
  )
98
114
 
115
+ elif action_type == "triple_click":
116
+ assert "coordinate" in data
117
+ coord = data["coordinate"]
118
+ assert isinstance(coord, list)
119
+ assert len(coord) == 2
120
+ return ClickAction(
121
+ point=Point(x=coord[0], y=coord[1]),
122
+ button="left",
123
+ pattern=[100, 100],
124
+ )
125
+
99
126
  elif action_type == "scroll":
100
127
  assert "scroll_direction" in data
101
128
  direction = data["scroll_direction"]
@@ -112,7 +139,8 @@ class ClaudeAdapter(Adapter):
112
139
  raise ValueError(f"Unsupported scroll direction: {direction}")
113
140
 
114
141
  return ScrollAction(
115
- point=Point(x=data["coordinate"][0], y=data["coordinate"][1]), scroll=scroll
142
+ point=Point(x=data["coordinate"][0], y=data["coordinate"][1]),
143
+ scroll=scroll,
116
144
  )
117
145
 
118
146
  elif action_type == "screenshot":
@@ -124,7 +152,6 @@ class ClaudeAdapter(Adapter):
124
152
  elif action_type == "wait":
125
153
  assert "duration" in data
126
154
  return WaitAction(time=data["duration"])
127
-
128
155
  else:
129
156
  raise ValueError(f"Unsupported action type: {action_type}")
130
157
  except AssertionError:
@@ -78,6 +78,10 @@ class PositionFetch(CLAAction):
78
78
  type: Literal["position"] = "position"
79
79
 
80
80
 
81
+ class CustomAction(CLAAction):
82
+ type: Literal["custom"] = "custom"
83
+ action: str
84
+
81
85
  # Union of all possible actions
82
86
  CLA = Annotated[
83
87
  Union[
@@ -90,6 +94,7 @@ CLA = Annotated[
90
94
  DragAction,
91
95
  ScreenshotFetch,
92
96
  PositionFetch,
97
+ CustomAction,
93
98
  ],
94
99
  Field(discriminator="type"),
95
100
  ]
hud/environment.py CHANGED
@@ -13,6 +13,17 @@ from hud.settings import settings
13
13
  if TYPE_CHECKING:
14
14
  from .adapters.common import Adapter
15
15
 
16
+
17
+ class BaseResponseWithLogs(BaseModel):
18
+ """Base model for API responses that include logs."""
19
+ logs: str | None = None
20
+ error: str | None = None
21
+
22
+
23
+ class RewardResponse(BaseResponseWithLogs):
24
+ reward: float
25
+
26
+
16
27
  logger = logging.getLogger("hud.environment")
17
28
 
18
29
 
@@ -199,19 +210,19 @@ class Environment:
199
210
  return self.adapter.adapt_list(action)
200
211
  return [self.adapter.adapt(action)]
201
212
 
202
- async def evaluate(self) -> float:
213
+ async def evaluate(self) -> RewardResponse:
203
214
  """
204
215
  Get final evaluation score.
205
216
 
206
217
  Returns:
207
- float: The evaluation score
218
+ RewardResponse: The evaluation response containing reward, logs, and possible error
208
219
  """
209
220
  data = await make_request(
210
221
  method="POST",
211
222
  url=f"{settings.base_url}/evaluation/{self.id}",
212
223
  api_key=settings.api_key,
213
224
  )
214
- return data["reward"]
225
+ return RewardResponse(**data)
215
226
 
216
227
  async def close(self) -> None:
217
228
  """
@@ -273,6 +284,7 @@ class EvalSet:
273
284
  id: str,
274
285
  name: str,
275
286
  tasks: list[str] | None = None,
287
+ configs: dict[str, Any] | None = None,
276
288
  ) -> None:
277
289
  """
278
290
  Initialize an evaluation set.
@@ -285,6 +297,7 @@ class EvalSet:
285
297
  self.id = id
286
298
  self.name = name
287
299
  self.tasks = tasks or []
300
+ self.configs = configs or {}
288
301
 
289
302
  async def fetch_tasks(self) -> list[str]:
290
303
  """
@@ -298,5 +311,7 @@ class EvalSet:
298
311
  url=f"{settings.base_url}/evalsets/{self.id}/tasks",
299
312
  api_key=settings.api_key,
300
313
  )
314
+ # Extracts a list of task ids and list of config objects for the evalset
301
315
  self.tasks = data["tasks"]
316
+ self.configs = data["evalset"]
302
317
  return self.tasks
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.1.0b3
3
+ Version: 0.1.2a0
4
4
  Summary: SDK for the HUD evaluation platform.
5
5
  Project-URL: Homepage, https://github.com/Human-Data/hud-sdk
6
6
  Project-URL: Bug Tracker, https://github.com/Human-Data/hud-sdk/issues
@@ -57,9 +57,9 @@ Description-Content-Type: text/markdown
57
57
 
58
58
  # HUD
59
59
 
60
- A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models. Visit [hud.so](https://hud.so).
60
+ A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models.
61
61
 
62
- > **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is evolving and may change in future releases as we gather feedback and improve functionality.
62
+ > **Alpha Release Notice**: This SDK is currently in early release status. The API is evolving and may change in future releases as we gather feedback and improve functionality.
63
63
 
64
64
  [![PyPI version](https://img.shields.io/pypi/v/hud-python)](https://pypi.org/project/hud-python/)
65
65
 
@@ -70,13 +70,12 @@ A Python SDK for interacting with HUD environments and evaluation benchmarks for
70
70
 
71
71
  [RECOMMENDED] To set get started with an agent, see the [Claude Computer use example](https://github.com/Human-Data/hud-sdk/tree/main/examples).
72
72
 
73
-
74
- Otherwise, install the package with Python>=3.9:
73
+ Install the package with Python>=3.9:
75
74
  ```bash
76
75
  pip install hud-python
77
76
  ```
78
77
 
79
- Make sure to setup your account [here](https://hud.so/settings) and add your API key to the environment variables:
78
+ Make sure to setup your account with us (email founders@hud.so) and add your API key to the environment variables:
80
79
  ```bash
81
80
  HUD_API_KEY=<your-api-key>
82
81
  ```
@@ -117,20 +116,9 @@ if __name__ == "__main__":
117
116
  asyncio.run(main())
118
117
  ```
119
118
 
120
- ## Features
121
-
122
- - Connect to HUD evaluation environments
123
- - Run benchmarks across various tasks
124
- - Support for different agent adapters
125
- - Asynchronous API
126
-
127
119
  ## Documentation
128
120
 
129
- For comprehensive guides, examples, and API reference, visit:
130
- - [Getting Started](https://docs.hud.so/introduction)
131
- - [Installation](https://docs.hud.so/installation)
132
- - [API Reference](https://docs.hud.so/api-reference)
133
- - [Examples](https://docs.hud.so/examples)
121
+ For comprehensive guides, examples, and API reference, visit [our docs](https://docs.hud.so/introduction)
134
122
 
135
123
  ## License
136
124
 
@@ -1,21 +1,21 @@
1
- hud/__init__.py,sha256=Xam6plJLHFqKPKcnVhwLQf4bsApDuxZ8BJF0FEAjkos,416
1
+ hud/__init__.py,sha256=IEEme8kZA7zs9URZV-C35gqBOC5sxMEU7e6NjSAgXR4,420
2
2
  hud/client.py,sha256=7WHXTQhVK-T9Rj4ZooADE_c1pah5Bc1DJ9ZRqUyUnuQ,5724
3
- hud/environment.py,sha256=39tna-Cpzg9T6HqKebPARP2DXaF2n0xPr1W0qx8y160,8401
3
+ hud/environment.py,sha256=9r8eK3OVqr-wpPGlhnrpuDt-z6FIp3S3oukTZ7swN3o,8899
4
4
  hud/gym.py,sha256=aanBHtlsXrJwrFax9SbXWwk_By-X8wE3M9deS-E_s4c,463
5
5
  hud/run.py,sha256=5ukjuRNLjj5fczaWxpR_5NebFbQpoy8w81eRYy309Vg,6401
6
6
  hud/settings.py,sha256=1ScSac0ta03LkckkH2gi6SyKY2M7nr15vRGugo2C_xs,1015
7
7
  hud/adapters/__init__.py,sha256=y3H7yMl7rC-rrXG2WvePdSojoNFSui02eYTH17Xd7OY,87
8
8
  hud/adapters/claude/__init__.py,sha256=i7QEF-29FLb9qxp1eYtXs-adIk_tG54tL-9g6d3xodk,100
9
- hud/adapters/claude/adapter.py,sha256=oi2lvO42g7i-L151tIWIGQGA80skcYRwzQ52-0f2OpA,4840
9
+ hud/adapters/claude/adapter.py,sha256=ekYZixANKfx-4lENlXGaomh6Ecw4SRKtLWD5quGNWdM,5782
10
10
  hud/adapters/common/__init__.py,sha256=BjdZWJVs_AKtpFrt-tNsdQRjnz7D97DFEQirJ-r0mp8,118
11
11
  hud/adapters/common/adapter.py,sha256=SCtOuRjW5Szzd45LXCaqDEaKr2lhA-nIqSEMJ9KLsKI,5799
12
- hud/adapters/common/types.py,sha256=d9tIF06tjK7VCb-yBJ9epwHlXRHlObo9YWetrv33s8c,4511
12
+ hud/adapters/common/types.py,sha256=Kgj0ZhiWOU6V95qxrvf-mMCvodLV_6rGBHwP1FQdMBk,4620
13
13
  hud/server/__init__.py,sha256=VPrhyyqg3inge9J7BjcmDBNJRuvkCA9ZDXS_R5Q8ZtY,129
14
14
  hud/server/requests.py,sha256=pPPaMpwqmA1RyWwzQN1ybgAnbSHJLeeIaW6MJwhJYks,6052
15
15
  hud/utils/__init__.py,sha256=0m8klSLnMLeIJT23ipBXfFACk4hNWPsA6ZNqZDpv6oY,99
16
16
  hud/utils/config.py,sha256=dze0BGE4q14omjj9822kL9BeiIgWQvJyuU29A2wa1SE,193
17
17
  hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- hud_python-0.1.0b3.dist-info/METADATA,sha256=mWp4cHyIzYuzxk3alNFspztWL2S8_6ZlvGP0UqzIF48,5146
19
- hud_python-0.1.0b3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
20
- hud_python-0.1.0b3.dist-info/licenses/LICENSE,sha256=IVdfcZ8xq5apYGJS5GzRLLbm9r03Aecxd03isi-3P9k,1075
21
- hud_python-0.1.0b3.dist-info/RECORD,,
18
+ hud_python-0.1.2a0.dist-info/METADATA,sha256=oGcQYuTzPo9FSMeeARPiPws3ERRoWNPNu8tM8W_1Uow,4787
19
+ hud_python-0.1.2a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
20
+ hud_python-0.1.2a0.dist-info/licenses/LICENSE,sha256=IVdfcZ8xq5apYGJS5GzRLLbm9r03Aecxd03isi-3P9k,1075
21
+ hud_python-0.1.2a0.dist-info/RECORD,,