hud-python 0.1.0b2__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/__init__.py CHANGED
@@ -9,7 +9,7 @@ from hud.environment import Environment, EvalSet, Observation, TaskResult
9
9
  from hud.gym import Gym
10
10
  from hud.run import Run
11
11
 
12
- __version__ = "0.1.0b2"
12
+ __version__ = "0.1.1"
13
13
 
14
14
  __all__ = [
15
15
  "Environment",
@@ -3,4 +3,3 @@ from __future__ import annotations
3
3
  from .adapter import ClaudeAdapter
4
4
 
5
5
  __all__ = ["ClaudeAdapter"]
6
-
@@ -2,10 +2,11 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Any
5
+ from typing import Any, ClassVar
6
6
 
7
7
  from hud.adapters.common import CLA, Adapter
8
8
  from hud.adapters.common.types import (
9
+ CLAKey,
9
10
  ClickAction,
10
11
  DragAction,
11
12
  MoveAction,
@@ -20,11 +21,17 @@ from hud.adapters.common.types import (
20
21
 
21
22
 
22
23
  class ClaudeAdapter(Adapter):
24
+ KEY_MAP: ClassVar[dict[str, CLAKey]] = {"Return": "enter"}
25
+
23
26
  def __init__(self) -> None:
24
27
  super().__init__()
25
28
  self.agent_width = 1024 # Claude's preferred width
26
29
  self.agent_height = 768 # Claude's preferred height
27
30
 
31
+ def _map_key(self, key: str) -> CLAKey:
32
+ """Map a key to its standardized form."""
33
+ return self.KEY_MAP.get(key, key.lower()) # type: ignore
34
+
28
35
  def convert(self, data: Any) -> CLA:
29
36
  try:
30
37
  action_type = data.get("action")
@@ -32,10 +39,12 @@ class ClaudeAdapter(Adapter):
32
39
  if action_type == "key":
33
40
  assert "text" in data
34
41
  if "+" in data["text"]:
35
- keys = data["text"].split("+")
42
+ keys: list[CLAKey] = [
43
+ self._map_key(k) for k in (data["text"].split("+"))
44
+ ]
36
45
  assert len(keys) > 0
37
46
  return PressAction(keys=keys)
38
- return PressAction(keys=[data["text"]])
47
+ return PressAction(keys=[self._map_key(data["text"])])
39
48
 
40
49
  elif action_type == "type":
41
50
  assert "text" in data
@@ -66,12 +75,19 @@ class ClaudeAdapter(Adapter):
66
75
  assert len(coord) == 2
67
76
  if (
68
77
  len(self.memory) == 0
69
- or (self.memory[-1] is not MoveAction and self.memory[-1] is not ClickAction)
78
+ or (
79
+ self.memory[-1] is not MoveAction
80
+ and self.memory[-1] is not ClickAction
81
+ )
70
82
  or self.memory[-1].point is None
71
83
  ):
72
- raise ValueError("Left click drag must be preceded by a move or click action")
84
+ raise ValueError(
85
+ "Left click drag must be preceded by a move or click action"
86
+ )
73
87
  else:
74
- return DragAction(path=[self.memory[-1].point, Point(x=coord[0], y=coord[1])])
88
+ return DragAction(
89
+ path=[self.memory[-1].point, Point(x=coord[0], y=coord[1])]
90
+ )
75
91
 
76
92
  elif action_type == "right_click":
77
93
  assert "coordinate" in data
@@ -96,6 +112,17 @@ class ClaudeAdapter(Adapter):
96
112
  point=Point(x=coord[0], y=coord[1]), button="left", pattern=[100]
97
113
  )
98
114
 
115
+ elif action_type == "triple_click":
116
+ assert "coordinate" in data
117
+ coord = data["coordinate"]
118
+ assert isinstance(coord, list)
119
+ assert len(coord) == 2
120
+ return ClickAction(
121
+ point=Point(x=coord[0], y=coord[1]),
122
+ button="left",
123
+ pattern=[100, 100],
124
+ )
125
+
99
126
  elif action_type == "scroll":
100
127
  assert "scroll_direction" in data
101
128
  direction = data["scroll_direction"]
@@ -112,7 +139,8 @@ class ClaudeAdapter(Adapter):
112
139
  raise ValueError(f"Unsupported scroll direction: {direction}")
113
140
 
114
141
  return ScrollAction(
115
- point=Point(x=data["coordinate"][0], y=data["coordinate"][1]), scroll=scroll
142
+ point=Point(x=data["coordinate"][0], y=data["coordinate"][1]),
143
+ scroll=scroll,
116
144
  )
117
145
 
118
146
  elif action_type == "screenshot":
@@ -124,7 +152,6 @@ class ClaudeAdapter(Adapter):
124
152
  elif action_type == "wait":
125
153
  assert "duration" in data
126
154
  return WaitAction(time=data["duration"])
127
-
128
155
  else:
129
156
  raise ValueError(f"Unsupported action type: {action_type}")
130
157
  except AssertionError:
@@ -23,12 +23,13 @@ class ClickAction(CLAAction):
23
23
  selector: str | None = None
24
24
  button: Literal["left", "right", "wheel", "back", "forward"] = "left"
25
25
  pattern: list[int] | None = None # [delay_1, delay_2, ...]
26
+ hold_keys: list[CLAKey] | None = None
26
27
 
27
28
 
28
29
  # PRESS ACTION for key presses/hotkeys
29
30
  class PressAction(CLAAction):
30
31
  type: Literal["press"] = "press"
31
- keys: list[str]
32
+ keys: list[CLAKey]
32
33
 
33
34
 
34
35
  # TYPE ACTION for text typing
@@ -43,6 +44,7 @@ class ScrollAction(CLAAction):
43
44
  type: Literal["scroll"] = "scroll"
44
45
  point: Point | None = None
45
46
  scroll: Point | None = None
47
+ hold_keys: list[CLAKey] | None = None
46
48
 
47
49
 
48
50
  # MOVE ACTION for mouse movement
@@ -64,6 +66,7 @@ class DragAction(CLAAction):
64
66
  type: Literal["drag"] = "drag"
65
67
  path: list[Point]
66
68
  pattern: list[int] | None = None # [delay_1, delay_2, ...]
69
+ hold_keys: list[CLAKey] | None = None
67
70
 
68
71
 
69
72
  # SCREENSHOT ACTION
@@ -90,3 +93,201 @@ CLA = Annotated[
90
93
  ],
91
94
  Field(discriminator="type"),
92
95
  ]
96
+
97
+
98
+ CLAKey = Literal[
99
+ # Control keys
100
+ "backspace",
101
+ "tab",
102
+ "enter",
103
+ "shift",
104
+ "shiftleft",
105
+ "shiftright",
106
+ "ctrl",
107
+ "ctrlleft",
108
+ "ctrlright",
109
+ "alt",
110
+ "altleft",
111
+ "altright",
112
+ "pause",
113
+ "capslock",
114
+ "esc",
115
+ "escape",
116
+ "space",
117
+ "pageup",
118
+ "pagedown",
119
+ "end",
120
+ "home",
121
+ "left",
122
+ "up",
123
+ "right",
124
+ "down",
125
+ "select",
126
+ "print",
127
+ "execute",
128
+ "printscreen",
129
+ "prtsc",
130
+ "insert",
131
+ "delete",
132
+ "help",
133
+ "sleep",
134
+ # Special keys
135
+ "numlock",
136
+ "scrolllock",
137
+ "clear",
138
+ "separator",
139
+ "modechange",
140
+ "apps",
141
+ "browserback",
142
+ "browserfavorites",
143
+ "browserforward",
144
+ "browserhome",
145
+ "browserrefresh",
146
+ "browsersearch",
147
+ "browserstop",
148
+ "launchapp1",
149
+ "launchapp2",
150
+ "launchmail",
151
+ "launchmediaselect",
152
+ "playpause",
153
+ "stop",
154
+ "prevtrack",
155
+ "nexttrack",
156
+ "volumemute",
157
+ "volumeup",
158
+ "volumedown",
159
+ "zoom",
160
+ # Modifier keys
161
+ "win",
162
+ "winleft",
163
+ "winright",
164
+ "command",
165
+ "option",
166
+ "optionleft",
167
+ "optionright",
168
+ "fn",
169
+ # Numpad keys
170
+ "num0",
171
+ "num1",
172
+ "num2",
173
+ "num3",
174
+ "num4",
175
+ "num5",
176
+ "num6",
177
+ "num7",
178
+ "num8",
179
+ "num9",
180
+ "multiply",
181
+ "add",
182
+ "subtract",
183
+ "decimal",
184
+ "divide",
185
+ # Function keys
186
+ "f1",
187
+ "f2",
188
+ "f3",
189
+ "f4",
190
+ "f5",
191
+ "f6",
192
+ "f7",
193
+ "f8",
194
+ "f9",
195
+ "f10",
196
+ "f11",
197
+ "f12",
198
+ "f13",
199
+ "f14",
200
+ "f15",
201
+ "f16",
202
+ "f17",
203
+ "f18",
204
+ "f19",
205
+ "f20",
206
+ "f21",
207
+ "f22",
208
+ "f23",
209
+ "f24",
210
+ # Language-specific keys
211
+ "hanguel",
212
+ "hangul",
213
+ "hanja",
214
+ "kana",
215
+ "kanji",
216
+ "junja",
217
+ "convert",
218
+ "nonconvert",
219
+ "yen",
220
+ # Characters
221
+ "\t",
222
+ "\n",
223
+ "\r",
224
+ " ",
225
+ "!",
226
+ '"',
227
+ "#",
228
+ "$",
229
+ "%",
230
+ "&",
231
+ "'",
232
+ "(",
233
+ ")",
234
+ "*",
235
+ "+",
236
+ ",",
237
+ "-",
238
+ ".",
239
+ "/",
240
+ "0",
241
+ "1",
242
+ "2",
243
+ "3",
244
+ "4",
245
+ "5",
246
+ "6",
247
+ "7",
248
+ "8",
249
+ "9",
250
+ ":",
251
+ ";",
252
+ "<",
253
+ "=",
254
+ ">",
255
+ "?",
256
+ "@",
257
+ "[",
258
+ "\\",
259
+ "]",
260
+ "^",
261
+ "_",
262
+ "`",
263
+ "a",
264
+ "b",
265
+ "c",
266
+ "d",
267
+ "e",
268
+ "f",
269
+ "g",
270
+ "h",
271
+ "i",
272
+ "j",
273
+ "k",
274
+ "l",
275
+ "m",
276
+ "n",
277
+ "o",
278
+ "p",
279
+ "q",
280
+ "r",
281
+ "s",
282
+ "t",
283
+ "u",
284
+ "v",
285
+ "w",
286
+ "x",
287
+ "y",
288
+ "z",
289
+ "{",
290
+ "|",
291
+ "}",
292
+ "~",
293
+ ]
hud/client.py CHANGED
@@ -11,14 +11,14 @@ from .adapters.common import Adapter
11
11
  from .environment import EvalSet
12
12
  from .gym import Gym
13
13
  from .run import Run, RunResponse
14
- from .server import make_request, make_sync_request
14
+ from .server import make_request
15
15
  from .settings import settings
16
16
 
17
17
 
18
18
  class HUDClient:
19
19
  """
20
20
  Client for interacting with the HUD API.
21
-
21
+
22
22
  This is the main entry point for the SDK, providing methods to load gyms,
23
23
  evalsets, and create runs.
24
24
  """
@@ -26,7 +26,7 @@ class HUDClient:
26
26
  def __init__(self, api_key: str | None = None) -> None:
27
27
  """
28
28
  Initialize the HUD client with an API key.
29
-
29
+
30
30
  Args:
31
31
  api_key: API key for authentication with the HUD API
32
32
  """
@@ -36,10 +36,10 @@ class HUDClient:
36
36
  async def load_gym(self, id: str) -> Gym:
37
37
  """
38
38
  Load a gym by ID from the HUD API.
39
-
39
+
40
40
  Args:
41
41
  id: The ID of the gym to load
42
-
42
+
43
43
  Returns:
44
44
  Gym: The loaded gym object
45
45
  """
@@ -54,10 +54,10 @@ class HUDClient:
54
54
  async def load_evalset(self, id: str) -> EvalSet:
55
55
  """
56
56
  Load an evalset by ID from the HUD API.
57
-
57
+
58
58
  Args:
59
59
  id: The ID of the evalset to load
60
-
60
+
61
61
  Returns:
62
62
  EvalSet: The loaded evalset object
63
63
  """
@@ -72,7 +72,7 @@ class HUDClient:
72
72
  async def list_gyms(self) -> list[str]:
73
73
  """
74
74
  List all available gyms.
75
-
75
+
76
76
  Returns:
77
77
  list[str]: List of gym IDs
78
78
  """
@@ -85,7 +85,7 @@ class HUDClient:
85
85
  async def get_runs(self) -> list[Run]:
86
86
  """
87
87
  Get all runs associated with the API key.
88
-
88
+
89
89
  Returns:
90
90
  list[Run]: List of run objects
91
91
  """
@@ -98,11 +98,11 @@ class HUDClient:
98
98
  async def load_run(self, id: str, adapter: Adapter | None = None) -> Run | None:
99
99
  """
100
100
  Load a run by ID from the HUD API.
101
-
101
+
102
102
  Args:
103
103
  id: The ID of the run to load
104
104
  adapter: Optional adapter for action conversion
105
-
105
+
106
106
  Returns:
107
107
  Run: The loaded run object, or None if not found
108
108
  """
@@ -132,7 +132,7 @@ class HUDClient:
132
132
  )
133
133
  return None
134
134
 
135
- def create_run(
135
+ async def create_run(
136
136
  self,
137
137
  name: str,
138
138
  gym: Gym,
@@ -143,7 +143,7 @@ class HUDClient:
143
143
  ) -> Run:
144
144
  """
145
145
  Create a new run in the HUD system.
146
-
146
+
147
147
  Args:
148
148
  name: Name of the run
149
149
  gym: Gym to use for the run
@@ -151,7 +151,7 @@ class HUDClient:
151
151
  config: Optional configuration parameters
152
152
  metadata: Optional metadata for the run
153
153
  adapter: Optional adapter for action conversion
154
-
154
+
155
155
  Returns:
156
156
  Run: The created run object
157
157
  """
@@ -161,7 +161,7 @@ class HUDClient:
161
161
  metadata = {}
162
162
  if config is None:
163
163
  config = {}
164
- data = make_sync_request(
164
+ data = await make_request(
165
165
  method="POST",
166
166
  url=f"{settings.base_url}/runs",
167
167
  json={
@@ -188,6 +188,7 @@ class HUDClient:
188
188
  Display a stream in the HUD system.
189
189
  """
190
190
  from IPython.display import HTML, display
191
+
191
192
  html_content = f"""
192
193
  <div style="width: 960px; height: 540px; overflow: hidden;">
193
194
  <div style="transform: scale(0.5); transform-origin: top left;">
hud/environment.py CHANGED
@@ -13,16 +13,29 @@ from hud.settings import settings
13
13
  if TYPE_CHECKING:
14
14
  from .adapters.common import Adapter
15
15
 
16
+
17
+ class BaseResponseWithLogs(BaseModel):
18
+ """Base model for API responses that include logs."""
19
+ logs: str | None = None
20
+ error: str | None = None
21
+
22
+
23
+ class RewardResponse(BaseResponseWithLogs):
24
+ reward: float
25
+
26
+
16
27
  logger = logging.getLogger("hud.environment")
17
28
 
29
+
18
30
  class Observation(BaseModel):
19
31
  """
20
32
  Observation from the environment.
21
-
33
+
22
34
  Attributes:
23
35
  screenshot: Base64 encoded PNG string of the screen
24
36
  text: Text observation, if available
25
37
  """
38
+
26
39
  screenshot: str | None = None # base64 string png
27
40
  text: str | None = None
28
41
 
@@ -30,18 +43,20 @@ class Observation(BaseModel):
30
43
  class TaskResult(BaseModel):
31
44
  """
32
45
  Result of a task step.
33
-
46
+
34
47
  Attributes:
35
48
  observation: The current observation
36
49
  reward: Reward value from the step
37
50
  terminated: Whether the task is complete
38
51
  info: Additional information from the environment
39
52
  """
53
+
40
54
  observation: Observation
41
55
  reward: float
42
56
  terminated: bool
43
57
  info: dict[str, Any]
44
58
 
59
+
45
60
  class EnvironmentStatus(str, enum.Enum):
46
61
  """
47
62
  Status of the environment.
@@ -52,6 +67,7 @@ class EnvironmentStatus(str, enum.Enum):
52
67
  COMPLETED: The environment is completed
53
68
  ERROR: The environment is in an error state
54
69
  """
70
+
55
71
  INITIALIZING = "initializing"
56
72
  RUNNING = "running"
57
73
  COMPLETED = "completed"
@@ -64,27 +80,30 @@ status_messages = {
64
80
  EnvironmentStatus.COMPLETED.value: "completed",
65
81
  }
66
82
 
83
+
67
84
  class Environment:
68
85
  """
69
86
  Environment interface for agent interactions.
70
-
87
+
71
88
  This class handles the environment state and interactions, including
72
89
  creating the environment, retrieving state, and executing actions.
73
90
  """
74
91
 
75
92
  def __init__(
76
93
  self,
77
- run_id: str,
78
94
  adapter: Adapter,
95
+ run_id: str,
96
+ id: str | None = None,
79
97
  config: dict[str, Any] | None = None,
80
98
  metadata: dict[str, Any] | None = None,
81
99
  ) -> None:
82
100
  """
83
101
  Initialize an environment.
84
-
102
+
85
103
  Args:
86
- run_id: ID of the run this environment belongs to
87
104
  adapter: Adapter for converting actions
105
+ run_id: ID of the run this environment belongs to
106
+ id: Optional ID of an existing environment
88
107
  config: Optional configuration parameters
89
108
  metadata: Optional metadata for the environment
90
109
  """
@@ -96,16 +115,14 @@ class Environment:
96
115
  self.config = config
97
116
  self.adapter = adapter
98
117
  self.metadata = metadata
99
- # task_run_id is created when the environment is created (create_environment)
100
- # or provided if env already exists.
101
118
  self.final_response: None | str = None
102
- self.id = None
119
+ self.id = id
103
120
  self.vnc_url = None
104
121
 
105
122
  async def create_environment(self) -> str:
106
123
  """
107
124
  Initialize the environment and return the task_run_id.
108
-
125
+
109
126
  Returns:
110
127
  str: The environment ID
111
128
  """
@@ -121,7 +138,7 @@ class Environment:
121
138
  async def get_vnc_url(self) -> str:
122
139
  """
123
140
  Get the VNC URL for the environment.
124
-
141
+
125
142
  Returns:
126
143
  str: The VNC URL for remote viewing/control
127
144
  """
@@ -136,7 +153,7 @@ class Environment:
136
153
  async def get_env_state(self) -> str:
137
154
  """
138
155
  Get the state of the environment.
139
-
156
+
140
157
  Returns:
141
158
  str: The current state (e.g., "running", "error")
142
159
  """
@@ -152,10 +169,10 @@ class Environment:
152
169
  ) -> tuple[Observation, float, bool, dict[str, Any]]:
153
170
  """
154
171
  Send action to environment and get result.
155
-
172
+
156
173
  Args:
157
174
  action: The action to take, or None for no action
158
-
175
+
159
176
  Returns:
160
177
  tuple: (observation, reward, terminated, info)
161
178
  """
@@ -181,10 +198,10 @@ class Environment:
181
198
  def translate_action(self, action: Any) -> list:
182
199
  """
183
200
  Translate action to the correct format.
184
-
201
+
185
202
  Args:
186
203
  action: The action to translate
187
-
204
+
188
205
  Returns:
189
206
  list: List of translated actions in the CLA format
190
207
  """
@@ -193,19 +210,19 @@ class Environment:
193
210
  return self.adapter.adapt_list(action)
194
211
  return [self.adapter.adapt(action)]
195
212
 
196
- async def evaluate(self) -> float:
213
+ async def evaluate(self) -> RewardResponse:
197
214
  """
198
215
  Get final evaluation score.
199
-
216
+
200
217
  Returns:
201
- float: The evaluation score
218
+ RewardResponse: The evaluation response containing reward, logs, and possible error
202
219
  """
203
220
  data = await make_request(
204
221
  method="POST",
205
222
  url=f"{settings.base_url}/evaluation/{self.id}",
206
223
  api_key=settings.api_key,
207
224
  )
208
- return data["reward"]
225
+ return RewardResponse(**data)
209
226
 
210
227
  async def close(self) -> None:
211
228
  """
@@ -217,16 +234,14 @@ class Environment:
217
234
  api_key=settings.api_key,
218
235
  )
219
236
 
220
- async def reset(
221
- self, task_id: str, metadata: dict[str, Any] | None = None
222
- ) -> Observation:
237
+ async def reset(self, task_id: str, metadata: dict[str, Any] | None = None) -> Observation:
223
238
  """
224
239
  Reset the environment to the task.
225
-
240
+
226
241
  Args:
227
242
  task_id: ID of the task to reset to
228
243
  metadata: Optional metadata for the reset
229
-
244
+
230
245
  Returns:
231
246
  Observation: Initial observation for the task
232
247
  """
@@ -253,10 +268,11 @@ class Environment:
253
268
  break
254
269
  await asyncio.sleep(10)
255
270
 
271
+
256
272
  class EvalSet:
257
273
  """
258
274
  Evaluation set containing tasks for benchmarking.
259
-
275
+
260
276
  Attributes:
261
277
  id: Unique identifier for the evalset
262
278
  name: Human-readable name
@@ -268,10 +284,11 @@ class EvalSet:
268
284
  id: str,
269
285
  name: str,
270
286
  tasks: list[str] | None = None,
287
+ configs: dict[str, Any] | None = None,
271
288
  ) -> None:
272
289
  """
273
290
  Initialize an evaluation set.
274
-
291
+
275
292
  Args:
276
293
  id: Unique identifier
277
294
  name: Human-readable name
@@ -280,11 +297,12 @@ class EvalSet:
280
297
  self.id = id
281
298
  self.name = name
282
299
  self.tasks = tasks or []
300
+ self.configs = configs or {}
283
301
 
284
302
  async def fetch_tasks(self) -> list[str]:
285
303
  """
286
304
  Fetch all tasks in this evalset from the API.
287
-
305
+
288
306
  Returns:
289
307
  list[str]: List of task IDs
290
308
  """
@@ -293,5 +311,7 @@ class EvalSet:
293
311
  url=f"{settings.base_url}/evalsets/{self.id}/tasks",
294
312
  api_key=settings.api_key,
295
313
  )
314
+ # Extracts a list of task ids and list of config objects for the evalset
296
315
  self.tasks = data["tasks"]
316
+ self.configs = data["evalset"]
297
317
  return self.tasks
hud/gym.py CHANGED
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
  class Gym:
5
5
  """
6
6
  Represents a simulation environment in the HUD system.
7
-
7
+
8
8
  Attributes:
9
9
  id: Unique identifier for the gym
10
10
  name: Human-readable name of the gym
@@ -13,7 +13,7 @@ class Gym:
13
13
  def __init__(self, id: str, name: str) -> None:
14
14
  """
15
15
  Initialize a gym.
16
-
16
+
17
17
  Args:
18
18
  id: Unique identifier
19
19
  name: Human-readable name
hud/run.py CHANGED
@@ -19,21 +19,20 @@ if TYPE_CHECKING:
19
19
  class RunResponse(BaseModel):
20
20
  """
21
21
  Response model for run data from the API.
22
-
22
+
23
23
  Attributes:
24
24
  id: Unique identifier for the run
25
25
  name: Human-readable name of the run
26
26
  gym: Dictionary containing gym information
27
27
  evalset: Dictionary containing evalset information
28
- adapter: Dictionary containing adapter information
29
28
  config: Dictionary containing configuration parameters
30
29
  metadata: Dictionary containing metadata
31
30
  """
31
+
32
32
  id: str
33
33
  name: str
34
34
  gym: dict[str, Any]
35
35
  evalset: dict[str, Any]
36
- adapter: dict[str, Any]
37
36
  config: dict[str, Any]
38
37
  metadata: dict[str, Any]
39
38
 
@@ -41,7 +40,7 @@ class RunResponse(BaseModel):
41
40
  class RunAnalyticsResponse(BaseModel):
42
41
  """
43
42
  Model for Run analytics data.
44
-
43
+
45
44
  Attributes:
46
45
  id: Unique identifier for the run
47
46
  name: Human-readable name of the run
@@ -54,6 +53,7 @@ class RunAnalyticsResponse(BaseModel):
54
53
  created_at: When the run was created
55
54
  raw_data: Detailed data about tasks and environments
56
55
  """
56
+
57
57
  id: str
58
58
  name: str
59
59
  status_counts: dict[str, int] # e.g. {"completed": 5, "running": 2, "error": 1}
@@ -69,7 +69,7 @@ class RunAnalyticsResponse(BaseModel):
69
69
 
70
70
  def __str__(self) -> str:
71
71
  return self.visualize()
72
-
72
+
73
73
  def visualize(self) -> str:
74
74
  """
75
75
  Generate an ASCII bar chart visualization of run analytics.
@@ -82,9 +82,7 @@ class RunAnalyticsResponse(BaseModel):
82
82
  """
83
83
  max_width = 50
84
84
 
85
- completion_rate = (
86
- self.completion_rate if self.completion_rate is not None else 0
87
- )
85
+ completion_rate = self.completion_rate if self.completion_rate is not None else 0
88
86
 
89
87
  result = [
90
88
  f"Run: {self.name} (ID: {self.id})",
@@ -123,7 +121,7 @@ class RunAnalyticsResponse(BaseModel):
123
121
  class Run:
124
122
  """
125
123
  A run represents a collection of tasks and environments.
126
-
124
+
127
125
  This class provides methods to fetch task IDs, create environments,
128
126
  and access analytics for the run.
129
127
  """
@@ -140,7 +138,7 @@ class Run:
140
138
  ) -> None:
141
139
  """
142
140
  Initialize a run.
143
-
141
+
144
142
  Args:
145
143
  id: Unique identifier
146
144
  name: Human-readable name
@@ -167,7 +165,7 @@ class Run:
167
165
  async def fetch_task_ids(self) -> list[str]:
168
166
  """
169
167
  Fetch task IDs for this run from the evalset.
170
-
168
+
171
169
  Returns:
172
170
  list[str]: List of task IDs
173
171
  """
@@ -176,10 +174,10 @@ class Run:
176
174
  async def make(self, metadata: dict[str, Any] | None = None) -> Environment:
177
175
  """
178
176
  Create a new environment for this run.
179
-
177
+
180
178
  Args:
181
179
  metadata: Metadata for the environment
182
-
180
+
183
181
  Returns:
184
182
  Environment: The created environment
185
183
  """
@@ -197,7 +195,7 @@ class Run:
197
195
  async def get_analytics(self) -> RunAnalyticsResponse:
198
196
  """
199
197
  Get analytics for this run.
200
-
198
+
201
199
  Returns:
202
200
  RunAnalyticsResponse: Analytics data including status counts,
203
201
  average score, and other metrics
hud/server/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- from .requests import RequestError, make_request, make_sync_request
3
+ from .requests import RequestError, make_request
4
4
 
5
- __all__ = ["RequestError", "make_request", "make_sync_request"]
5
+ __all__ = ["RequestError", "make_request"]
hud/server/requests.py CHANGED
@@ -4,22 +4,27 @@ HTTP request utilities for the HUD API.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
+ import asyncio
7
8
  import logging
8
9
  from typing import Any
9
10
 
10
11
  import httpx
11
12
 
13
+ # Set up logger
12
14
  logger = logging.getLogger("hud.http")
15
+ logger.setLevel(logging.DEBUG)
16
+
13
17
 
14
18
  class RequestError(Exception):
15
19
  """Custom exception for API request errors"""
20
+
16
21
  def __init__(
17
22
  self,
18
23
  message: str,
19
24
  status_code: int | None = None,
20
25
  response_text: str | None = None,
21
26
  response_json: dict[str, Any] | None = None,
22
- response_headers: dict[str, str] | None = None
27
+ response_headers: dict[str, str] | None = None,
23
28
  ) -> None:
24
29
  self.message = message
25
30
  self.status_code = status_code
@@ -33,17 +38,22 @@ class RequestError(Exception):
33
38
 
34
39
  if self.status_code:
35
40
  parts.append(f"Status: {self.status_code}")
41
+
36
42
  if self.response_text:
37
43
  parts.append(f"Response Text: {self.response_text}")
44
+
38
45
  if self.response_json:
39
46
  parts.append(f"Response JSON: {self.response_json}")
47
+
40
48
  if self.response_headers:
41
49
  parts.append(f"Headers: {self.response_headers}")
42
50
 
43
51
  return " | ".join(parts)
44
52
 
45
53
  @classmethod
46
- def from_http_error(cls, error: httpx.HTTPStatusError) -> RequestError:
54
+ def from_http_error(
55
+ cls, error: httpx.HTTPStatusError, context: str = ""
56
+ ) -> RequestError:
47
57
  """Create a RequestError from an HTTP error response"""
48
58
  response = error.response
49
59
  status_code = response.status_code
@@ -68,6 +78,10 @@ class RequestError(Exception):
68
78
  # Fallback to simple message if JSON parsing fails
69
79
  message = f"Request failed with status {status_code}"
70
80
 
81
+ # Add context if provided
82
+ if context:
83
+ message = f"{context}: {message}"
84
+
71
85
  # Log the error details
72
86
  logger.error(
73
87
  "HTTP error from HUD SDK: %s | URL: %s | Status: %s | Response: %s%s",
@@ -75,7 +89,7 @@ class RequestError(Exception):
75
89
  response.url,
76
90
  status_code,
77
91
  response_text[:500],
78
- "..." if len(response_text) > 500 else ""
92
+ "..." if len(response_text) > 500 else "",
79
93
  )
80
94
 
81
95
  return cls(
@@ -86,61 +100,44 @@ class RequestError(Exception):
86
100
  response_headers=response_headers,
87
101
  )
88
102
 
89
- async def make_request(
90
- method: str, url: str, json: Any | None = None, api_key: str | None = None
91
- ) -> dict[str, Any]:
92
- """
93
- Make an asynchronous HTTP request to the HUD API.
94
-
95
- Args:
96
- method: HTTP method (GET, POST, etc.)
97
- url: Full URL for the request
98
- json: Optional JSON serializable data
99
- api_key: API key for authentication
100
-
101
- Returns:
102
- dict: JSON response from the server
103
-
104
- Raises:
105
- RequestError: If API key is missing or request fails
106
- """
107
- if not api_key:
108
- raise RequestError("API key is required but not provided")
109
-
110
- headers = {"Authorization": f"Bearer {api_key}"}
111
103
 
112
- async with httpx.AsyncClient(timeout=240.0) as client:
113
- try:
114
- response = await client.request(
115
- method=method, url=url, json=json, headers=headers
116
- )
117
- response.raise_for_status()
118
- result = response.json()
119
- return result
120
- except httpx.HTTPStatusError as e:
121
- raise RequestError.from_http_error(e) from None
122
- except httpx.RequestError as e:
123
- raise RequestError(f"Network error: {e!s}") from None
124
- except Exception as e:
125
- # Catch-all for unexpected errors
126
- raise RequestError(f"Unexpected error: {e!s}") from None
104
+ async def _handle_retry(
105
+ attempt: int, max_retries: int, retry_delay: float, url: str, error_msg: str
106
+ ) -> None:
107
+ """Helper function to handle retry logic and logging."""
108
+ retry_time = retry_delay * (2 ** (attempt - 1)) # Exponential backoff
109
+ logger.warning(
110
+ "%s from %s, retrying in %.2f seconds (attempt %d/%d)",
111
+ error_msg,
112
+ url,
113
+ retry_time,
114
+ attempt,
115
+ max_retries,
116
+ )
117
+ await asyncio.sleep(retry_time)
127
118
 
128
119
 
129
- def make_sync_request(
130
- method: str, url: str, json: Any | None = None, api_key: str | None = None
120
+ async def make_request(
121
+ method: str,
122
+ url: str,
123
+ json: Any | None = None,
124
+ api_key: str | None = None,
125
+ max_retries: int = 4,
126
+ retry_delay: float = 2.0,
131
127
  ) -> dict[str, Any]:
132
128
  """
133
- Make a synchronous HTTP request to the HUD API.
134
-
129
+ Make an asynchronous HTTP request to the HUD API.
130
+
135
131
  Args:
136
132
  method: HTTP method (GET, POST, etc.)
137
133
  url: Full URL for the request
138
134
  json: Optional JSON serializable data
139
135
  api_key: API key for authentication
140
-
136
+ max_retries: Maximum number of retries
137
+ retry_delay: Delay between retries
141
138
  Returns:
142
139
  dict: JSON response from the server
143
-
140
+
144
141
  Raises:
145
142
  RequestError: If API key is missing or request fails
146
143
  """
@@ -148,19 +145,49 @@ def make_sync_request(
148
145
  raise RequestError("API key is required but not provided")
149
146
 
150
147
  headers = {"Authorization": f"Bearer {api_key}"}
148
+ retry_status_codes = [502, 503, 504]
149
+ attempt = 0
150
+
151
+ while attempt <= max_retries:
152
+ attempt += 1
151
153
 
152
- with httpx.Client(timeout=240.0) as client:
153
154
  try:
154
- response = client.request(
155
- method=method, url=url, json=json, headers=headers
156
- )
155
+ async with httpx.AsyncClient(
156
+ timeout=240.0,
157
+ limits=httpx.Limits(
158
+ max_connections=1000,
159
+ max_keepalive_connections=1000,
160
+ keepalive_expiry=10.0,
161
+ ),
162
+ ) as client:
163
+ response = await client.request(
164
+ method=method, url=url, json=json, headers=headers
165
+ )
166
+
167
+ # Check if we got a retriable status code
168
+ if response.status_code in retry_status_codes and attempt <= max_retries:
169
+ await _handle_retry(
170
+ attempt,
171
+ max_retries,
172
+ retry_delay,
173
+ url,
174
+ f"Received status {response.status_code}",
175
+ )
176
+ continue
177
+
157
178
  response.raise_for_status()
158
179
  result = response.json()
159
180
  return result
160
181
  except httpx.HTTPStatusError as e:
161
182
  raise RequestError.from_http_error(e) from None
162
183
  except httpx.RequestError as e:
163
- raise RequestError(f"Network error: {e!s}") from None
184
+ if attempt <= max_retries:
185
+ await _handle_retry(
186
+ attempt, max_retries, retry_delay, url, f"Network error: {e}"
187
+ )
188
+ continue
189
+ else:
190
+ raise RequestError(f"Network error: {e!s}") from None
164
191
  except Exception as e:
165
- # Catch-all for unexpected errors
166
192
  raise RequestError(f"Unexpected error: {e!s}") from None
193
+ raise RequestError(f"Request failed after {max_retries} retries with unknown error")
hud/settings.py CHANGED
@@ -7,32 +7,30 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
7
7
  class Settings(BaseSettings):
8
8
  """
9
9
  Global settings for the HUD SDK.
10
-
10
+
11
11
  This class manages configuration values loaded from environment variables
12
12
  and provides global access to settings throughout the application.
13
13
  """
14
- model_config = SettingsConfigDict(
15
- env_file=".env",
16
- env_file_encoding="utf-8",
17
- extra="allow"
18
- )
14
+
15
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="allow")
19
16
 
20
17
  base_url: str = Field(
21
18
  default="https://orchestrator.hud.live/hud-gym/api/v1",
22
19
  description="Base URL for the HUD API",
23
- validation_alias="base_url"
20
+ validation_alias="base_url",
24
21
  )
25
22
 
26
23
  api_key: str | None = Field(
27
24
  default=None,
28
25
  description="API key for authentication with the HUD API",
29
- validation_alias="HUD_API_KEY"
26
+ validation_alias="HUD_API_KEY",
30
27
  )
31
28
 
32
29
 
33
30
  # Create a singleton instance
34
31
  settings = Settings()
35
32
 
33
+
36
34
  # Add utility functions for backwards compatibility
37
35
  def get_settings() -> Settings:
38
36
  """Get the global settings instance."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.1.0b2
3
+ Version: 0.1.1
4
4
  Summary: SDK for the HUD evaluation platform.
5
5
  Project-URL: Homepage, https://github.com/Human-Data/hud-sdk
6
6
  Project-URL: Bug Tracker, https://github.com/Human-Data/hud-sdk/issues
@@ -57,9 +57,9 @@ Description-Content-Type: text/markdown
57
57
 
58
58
  # HUD
59
59
 
60
- A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models. Visit [hud.so](https://hud.so).
60
+ A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models.
61
61
 
62
- > **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is evolving and may change in future releases as we gather feedback and improve functionality.
62
+ > **Alpha Release Notice**: This SDK is currently in early release status. The API is evolving and may change in future releases as we gather feedback and improve functionality.
63
63
 
64
64
  [![PyPI version](https://img.shields.io/pypi/v/hud-python)](https://pypi.org/project/hud-python/)
65
65
 
@@ -70,13 +70,12 @@ A Python SDK for interacting with HUD environments and evaluation benchmarks for
70
70
 
71
71
  [RECOMMENDED] To set get started with an agent, see the [Claude Computer use example](https://github.com/Human-Data/hud-sdk/tree/main/examples).
72
72
 
73
-
74
- Otherwise, install the package with Python>=3.9:
73
+ Install the package with Python>=3.9:
75
74
  ```bash
76
75
  pip install hud-python
77
76
  ```
78
77
 
79
- Make sure to setup your account [here](https://hud.so/settings) and add your API key to the environment variables:
78
+ Make sure to setup your account with us (email founders@hud.so) and add your API key to the environment variables:
80
79
  ```bash
81
80
  HUD_API_KEY=<your-api-key>
82
81
  ```
@@ -95,7 +94,7 @@ async def main():
95
94
  evalset = await client.load_evalset(id="OSWorld-Ubuntu")
96
95
 
97
96
  # Create a run and environment
98
- run = client.create_run(name="example-run", gym=gym, evalset=evalset)
97
+ run = await client.create_run(name="example-run", gym=gym, evalset=evalset)
99
98
  env = await run.make(metadata={"agent_id": "OSWORLD-1"})
100
99
  await env.wait_for_ready()
101
100
 
@@ -117,20 +116,9 @@ if __name__ == "__main__":
117
116
  asyncio.run(main())
118
117
  ```
119
118
 
120
- ## Features
121
-
122
- - Connect to HUD evaluation environments
123
- - Run benchmarks across various tasks
124
- - Support for different agent adapters
125
- - Asynchronous API
126
-
127
119
  ## Documentation
128
120
 
129
- For comprehensive guides, examples, and API reference, visit:
130
- - [Getting Started](https://docs.hud.so/introduction)
131
- - [Installation](https://docs.hud.so/installation)
132
- - [API Reference](https://docs.hud.so/api-reference)
133
- - [Examples](https://docs.hud.so/examples)
121
+ For comprehensive guides, examples, and API reference, visit [our docs](https://docs.hud.so/introduction)
134
122
 
135
123
  ## License
136
124
 
@@ -0,0 +1,21 @@
1
+ hud/__init__.py,sha256=8zbB2Wts-sHJ6pLPxwFDvM3Fo1RNT2r5m1H1NpMvyeA,414
2
+ hud/client.py,sha256=7WHXTQhVK-T9Rj4ZooADE_c1pah5Bc1DJ9ZRqUyUnuQ,5724
3
+ hud/environment.py,sha256=9r8eK3OVqr-wpPGlhnrpuDt-z6FIp3S3oukTZ7swN3o,8899
4
+ hud/gym.py,sha256=aanBHtlsXrJwrFax9SbXWwk_By-X8wE3M9deS-E_s4c,463
5
+ hud/run.py,sha256=5ukjuRNLjj5fczaWxpR_5NebFbQpoy8w81eRYy309Vg,6401
6
+ hud/settings.py,sha256=1ScSac0ta03LkckkH2gi6SyKY2M7nr15vRGugo2C_xs,1015
7
+ hud/adapters/__init__.py,sha256=y3H7yMl7rC-rrXG2WvePdSojoNFSui02eYTH17Xd7OY,87
8
+ hud/adapters/claude/__init__.py,sha256=i7QEF-29FLb9qxp1eYtXs-adIk_tG54tL-9g6d3xodk,100
9
+ hud/adapters/claude/adapter.py,sha256=ekYZixANKfx-4lENlXGaomh6Ecw4SRKtLWD5quGNWdM,5782
10
+ hud/adapters/common/__init__.py,sha256=BjdZWJVs_AKtpFrt-tNsdQRjnz7D97DFEQirJ-r0mp8,118
11
+ hud/adapters/common/adapter.py,sha256=SCtOuRjW5Szzd45LXCaqDEaKr2lhA-nIqSEMJ9KLsKI,5799
12
+ hud/adapters/common/types.py,sha256=d9tIF06tjK7VCb-yBJ9epwHlXRHlObo9YWetrv33s8c,4511
13
+ hud/server/__init__.py,sha256=VPrhyyqg3inge9J7BjcmDBNJRuvkCA9ZDXS_R5Q8ZtY,129
14
+ hud/server/requests.py,sha256=pPPaMpwqmA1RyWwzQN1ybgAnbSHJLeeIaW6MJwhJYks,6052
15
+ hud/utils/__init__.py,sha256=0m8klSLnMLeIJT23ipBXfFACk4hNWPsA6ZNqZDpv6oY,99
16
+ hud/utils/config.py,sha256=dze0BGE4q14omjj9822kL9BeiIgWQvJyuU29A2wa1SE,193
17
+ hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ hud_python-0.1.1.dist-info/METADATA,sha256=rH6sSZl4J7sfQdUp2QxNRUp9scqRMG2TxI390KD3Lr4,4785
19
+ hud_python-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
20
+ hud_python-0.1.1.dist-info/licenses/LICENSE,sha256=IVdfcZ8xq5apYGJS5GzRLLbm9r03Aecxd03isi-3P9k,1075
21
+ hud_python-0.1.1.dist-info/RECORD,,
@@ -1,21 +0,0 @@
1
- hud/__init__.py,sha256=GmX-LujM2oZR6_tP_mOW09BY8HeK41lLF-P0sMW_1pY,416
2
- hud/client.py,sha256=ztWPiAJyJUdJxdxGqDmsQnVK-_jccinWQUUXmq0OOmY,5843
3
- hud/environment.py,sha256=R-t-21V0gveuHL6LlVLnBp0gYGm8tn5FbLcq_rRlH9g,8587
4
- hud/gym.py,sha256=dKmf0Ol0-XyLhji034pF_5dXnhW1IgIr-dJUg4KfslE,475
5
- hud/run.py,sha256=_K7POPjJyqcJ_DVLAO7hRmvLUcg9gg2KrLHw_26DB9I,6570
6
- hud/settings.py,sha256=FbZHI1q6bDHe7Awl32SDPb-syqtkLI3C7gIIXuMXCiQ,1045
7
- hud/adapters/__init__.py,sha256=y3H7yMl7rC-rrXG2WvePdSojoNFSui02eYTH17Xd7OY,87
8
- hud/adapters/claude/__init__.py,sha256=GsMxaBL5ZuKV6-jJsLfw23n_Ml9e88SXIddYDGkIUKE,101
9
- hud/adapters/claude/adapter.py,sha256=oi2lvO42g7i-L151tIWIGQGA80skcYRwzQ52-0f2OpA,4840
10
- hud/adapters/common/__init__.py,sha256=BjdZWJVs_AKtpFrt-tNsdQRjnz7D97DFEQirJ-r0mp8,118
11
- hud/adapters/common/adapter.py,sha256=SCtOuRjW5Szzd45LXCaqDEaKr2lhA-nIqSEMJ9KLsKI,5799
12
- hud/adapters/common/types.py,sha256=LlWxH9sWucYgnIv6DKrgqToh3k7Bu-xdTxNFU4L8Xg8,1962
13
- hud/server/__init__.py,sha256=HeIXBGb-bxtq3xF20jP4IrOy77PlsqhClOf3bZ9wrwI,169
14
- hud/server/requests.py,sha256=M_pK1oCd4QjIE0yguD6iaybJ_mempOWDQYEpdOkophU,5522
15
- hud/utils/__init__.py,sha256=0m8klSLnMLeIJT23ipBXfFACk4hNWPsA6ZNqZDpv6oY,99
16
- hud/utils/config.py,sha256=dze0BGE4q14omjj9822kL9BeiIgWQvJyuU29A2wa1SE,193
17
- hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- hud_python-0.1.0b2.dist-info/METADATA,sha256=5skHs5IfSJP4DQAGzpuo_yjO7l65XxHIPWGitHQt0Ug,5140
19
- hud_python-0.1.0b2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
20
- hud_python-0.1.0b2.dist-info/licenses/LICENSE,sha256=IVdfcZ8xq5apYGJS5GzRLLbm9r03Aecxd03isi-3P9k,1075
21
- hud_python-0.1.0b2.dist-info/RECORD,,