hud-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/__init__.py ADDED
@@ -0,0 +1,22 @@
1
+ """
2
+ HUD Gym SDK - A Python SDK for interacting with HUD environments.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from hud.client import HUDClient
8
+ from hud.env import Env, EvalSet, Observation, TaskResult
9
+ from hud.gym import Gym
10
+ from hud.run import Run
11
+
12
+ __version__ = "0.1.0"
13
+
14
+ __all__ = [
15
+ "Env",
16
+ "EvalSet",
17
+ "Gym",
18
+ "HUDClient",
19
+ "Observation",
20
+ "Run",
21
+ "TaskResult",
22
+ ]
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ from .common import Adapter
4
+
5
+ __all__ = ["Adapter"]
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from .adapter import ClaudeAdapter
4
+
5
+ __all__ = ["ClaudeAdapter"]
6
+
@@ -0,0 +1,131 @@
1
+ # ruff: noqa: S101
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from hud.adapters.common import CLA, Adapter
8
+ from hud.adapters.common.types import (
9
+ ClickAction,
10
+ DragAction,
11
+ MoveAction,
12
+ Point,
13
+ PositionFetch,
14
+ PressAction,
15
+ ScreenshotFetch,
16
+ ScrollAction,
17
+ TypeAction,
18
+ WaitAction,
19
+ )
20
+
21
+
22
+ class ClaudeAdapter(Adapter):
23
+ def __init__(self) -> None:
24
+ super().__init__()
25
+ self.agent_width = 1024 # Claude's preferred width
26
+ self.agent_height = 768 # Claude's preferred height
27
+
28
+ def convert(self, data: Any) -> CLA:
29
+ try:
30
+ action_type = data.get("action")
31
+
32
+ if action_type == "key":
33
+ assert "text" in data
34
+ if "+" in data["text"]:
35
+ keys = data["text"].split("+")
36
+ assert len(keys) > 0
37
+ return PressAction(keys=keys)
38
+ return PressAction(keys=[data["text"]])
39
+
40
+ elif action_type == "type":
41
+ assert "text" in data
42
+ return TypeAction(
43
+ text=data["text"],
44
+ enter_after=False,
45
+ )
46
+
47
+ elif action_type == "mouse_move":
48
+ # 'coordinate' should be provided as an array [x, y].
49
+ assert "coordinate" in data
50
+ coord = data["coordinate"]
51
+ assert isinstance(coord, list)
52
+ assert len(coord) == 2
53
+ return MoveAction(point=Point(x=coord[0], y=coord[1]))
54
+
55
+ elif action_type == "left_click":
56
+ assert "coordinate" in data
57
+ coord = data["coordinate"]
58
+ assert isinstance(coord, list)
59
+ assert len(coord) == 2
60
+ return ClickAction(point=Point(x=coord[0], y=coord[1]), button="left")
61
+
62
+ elif action_type == "left_click_drag":
63
+ assert "coordinate" in data
64
+ coord = data["coordinate"]
65
+ assert isinstance(coord, list)
66
+ assert len(coord) == 2
67
+ if (
68
+ len(self.memory) == 0
69
+ or (self.memory[-1] is not MoveAction and self.memory[-1] is not ClickAction)
70
+ or self.memory[-1].point is None
71
+ ):
72
+ raise ValueError("Left click drag must be preceded by a move or click action")
73
+ else:
74
+ return DragAction(path=[self.memory[-1].point, Point(x=coord[0], y=coord[1])])
75
+
76
+ elif action_type == "right_click":
77
+ assert "coordinate" in data
78
+ coord = data["coordinate"]
79
+ assert isinstance(coord, list)
80
+ assert len(coord) == 2
81
+ return ClickAction(point=Point(x=coord[0], y=coord[1]), button="right")
82
+
83
+ elif action_type == "middle_click":
84
+ assert "coordinate" in data
85
+ coord = data["coordinate"]
86
+ assert isinstance(coord, list)
87
+ assert len(coord) == 2
88
+ return ClickAction(point=Point(x=coord[0], y=coord[1]), button="wheel")
89
+
90
+ elif action_type == "double_click":
91
+ assert "coordinate" in data
92
+ coord = data["coordinate"]
93
+ assert isinstance(coord, list)
94
+ assert len(coord) == 2
95
+ return ClickAction(
96
+ point=Point(x=coord[0], y=coord[1]), button="left", pattern=[100]
97
+ )
98
+
99
+ elif action_type == "scroll":
100
+ assert "scroll_direction" in data
101
+ direction = data["scroll_direction"]
102
+
103
+ if direction == "up":
104
+ scroll = Point(x=0, y=-data["scroll_amount"])
105
+ elif direction == "down":
106
+ scroll = Point(x=0, y=data["scroll_amount"])
107
+ elif direction == "left":
108
+ scroll = Point(x=-data["scroll_amount"], y=0)
109
+ elif direction == "right":
110
+ scroll = Point(x=data["scroll_amount"], y=0)
111
+ else:
112
+ raise ValueError(f"Unsupported scroll direction: {direction}")
113
+
114
+ return ScrollAction(
115
+ point=Point(x=data["coordinate"][0], y=data["coordinate"][1]), scroll=scroll
116
+ )
117
+
118
+ elif action_type == "screenshot":
119
+ return ScreenshotFetch()
120
+
121
+ elif action_type == "cursor_position":
122
+ return PositionFetch()
123
+
124
+ elif action_type == "wait":
125
+ assert "duration" in data
126
+ return WaitAction(time=data["duration"])
127
+
128
+ else:
129
+ raise ValueError(f"Unsupported action type: {action_type}")
130
+ except AssertionError:
131
+ raise ValueError(f"Invalid action: {data}") from None
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from .adapter import Adapter
4
+ from .types import CLA
5
+
6
+ __all__ = ["CLA", "Adapter"]
@@ -0,0 +1,167 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ from PIL import Image
6
+ from pydantic import TypeAdapter, ValidationError
7
+
8
+ from .types import CLA
9
+
10
+ if TYPE_CHECKING:
11
+ import numpy as np # type: ignore
12
+ from typing_extensions import TypeAlias, TypeIs
13
+
14
+ ImageType: TypeAlias = "np.ndarray[Any, Any] | Image.Image | str | None"
15
+
16
+
17
+ def _is_numpy_array(observation: Any) -> TypeIs[np.ndarray]:
18
+ """Check if the observation is a numpy array, without requiring numpy."""
19
+ try:
20
+ import numpy as np # type: ignore
21
+
22
+ return isinstance(observation, np.ndarray)
23
+ except (ModuleNotFoundError, NameError):
24
+ return False
25
+
26
+
27
+ class Adapter:
28
+ def __init__(self) -> None:
29
+ self.memory = []
30
+
31
+ self.agent_width = 1920
32
+ self.agent_height = 1080
33
+ self.env_width = 1920
34
+ self.env_height = 1080
35
+
36
+ def preprocess(self, action: Any) -> Any:
37
+ return action
38
+
39
+ def convert(self, action: Any) -> CLA:
40
+ if action is None:
41
+ raise ValueError("Please provide a valid action")
42
+ try:
43
+ return TypeAdapter(CLA).validate_python(action)
44
+ except ValidationError as e:
45
+ raise ValueError(f"Invalid action type in conversion: {action}") from e
46
+
47
+ def json(self, action: CLA) -> Any:
48
+ if action is None:
49
+ raise ValueError("Please provide a valid action")
50
+ try:
51
+ validated = TypeAdapter(CLA).validate_python(action)
52
+ return validated.model_dump()
53
+ except ValidationError as e:
54
+ raise ValueError(f"Invalid action type in json creation: {action}") from e
55
+
56
+ def rescale(self, observation: ImageType) -> str | None:
57
+ """
58
+ Resize the observation (image) to agent-specific dimensions.
59
+
60
+ Args:
61
+ observation: Image data, which can be:
62
+ - numpy array
63
+ - PIL Image
64
+ - base64 string (PNG)
65
+
66
+ Returns:
67
+ Base64-encoded string of the resized image (PNG format)
68
+ """
69
+ if observation is None:
70
+ return None
71
+
72
+ # Handle different input types.
73
+ if _is_numpy_array(observation):
74
+ # Convert numpy array to PIL Image
75
+ img = Image.fromarray(observation)
76
+ elif isinstance(observation, Image.Image):
77
+ img = observation
78
+ elif isinstance(observation, str):
79
+ # Assume it's a base64 string
80
+ try:
81
+ import base64
82
+ import io
83
+
84
+ # Remove header if present (e.g., 'data:image/png;base64,')
85
+ if "," in observation:
86
+ observation = observation.split(",")[1]
87
+ # Decode base64 string to bytes
88
+ img_bytes = base64.b64decode(observation)
89
+ # Convert to PIL Image
90
+ img = Image.open(io.BytesIO(img_bytes))
91
+ except Exception as e:
92
+ raise ValueError(f"Failed to decode base64 image: {e}") from None
93
+ else:
94
+ raise ValueError(f"Unsupported observation type: {type(observation)}")
95
+
96
+ # Update environment dimensions
97
+ self.env_width, self.env_height = img.size
98
+
99
+ # Resize to agent dimensions
100
+ resized_img = img.resize((self.agent_width, self.agent_height), Image.Resampling.LANCZOS)
101
+
102
+ # Always convert to base64 string
103
+ import base64
104
+ import io
105
+
106
+ buffered = io.BytesIO()
107
+ resized_img.save(buffered, format="PNG")
108
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
109
+
110
+ def postprocess_action(self, action: dict[str, Any]) -> dict[str, Any]:
111
+ """
112
+ Rescale action coordinates from agent dimensions to environment dimensions.
113
+
114
+ Args:
115
+ action: Action dictionary with coordinates
116
+
117
+ Returns:
118
+ Action with rescaled coordinates
119
+ """
120
+ if not action:
121
+ return action
122
+
123
+ # Calculate scaling factors
124
+ x_scale = self.env_width / self.agent_width
125
+ y_scale = self.env_height / self.agent_height
126
+
127
+ # Deep copy to avoid modifying the original
128
+ processed_action = action.copy()
129
+
130
+ # Rescale based on action type and structure
131
+ if "point" in processed_action and processed_action["point"] is not None:
132
+ # For actions with a single point (click, move)
133
+ processed_action["point"]["x"] = int(processed_action["point"]["x"] * x_scale)
134
+ processed_action["point"]["y"] = int(processed_action["point"]["y"] * y_scale)
135
+
136
+ if (path := processed_action.get("path")) is not None:
137
+ # For actions with a path (drag)
138
+ for point in path:
139
+ point["x"] = int(point["x"] * x_scale)
140
+ point["y"] = int(point["y"] * y_scale)
141
+
142
+ if "scroll" in processed_action and processed_action["scroll"] is not None:
143
+ # For scroll actions
144
+ processed_action["scroll"]["x"] = int(processed_action["scroll"]["x"] * x_scale)
145
+ processed_action["scroll"]["y"] = int(processed_action["scroll"]["y"] * y_scale)
146
+
147
+ return processed_action
148
+
149
+ def adapt(self, action: Any) -> dict[str, Any]:
150
+ # any preprocessing steps
151
+ action = self.preprocess(action)
152
+
153
+ # convert to CLA
154
+ action = self.convert(action)
155
+ self.memory.append(action)
156
+
157
+ # convert to json
158
+ action_dict = self.json(action)
159
+
160
+ # apply coordinate rescaling
161
+ rescaled_action = self.postprocess_action(action_dict)
162
+ return rescaled_action
163
+
164
+ def adapt_list(self, actions: list[Any]) -> list[dict[str, Any]]:
165
+ if not isinstance(actions, list):
166
+ raise ValueError("Please provide a list of actions")
167
+ return [self.adapt(action) for action in actions]
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Annotated, Literal, Union
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ # Base class for all actions
9
+ class CLAAction(BaseModel):
10
+ type: str
11
+
12
+
13
+ # Basic Point model for coordinates
14
+ class Point(BaseModel):
15
+ x: int
16
+ y: int
17
+
18
+
19
+ # CLICK ACTION (supports extra options)
20
+ class ClickAction(CLAAction):
21
+ type: Literal["click"] = "click"
22
+ point: Point | None = None
23
+ selector: str | None = None
24
+ button: Literal["left", "right", "wheel", "back", "forward"] = "left"
25
+ pattern: list[int] | None = None # [delay_1, delay_2, ...]
26
+
27
+
28
+ # PRESS ACTION for key presses/hotkeys
29
+ class PressAction(CLAAction):
30
+ type: Literal["press"] = "press"
31
+ keys: list[str]
32
+
33
+
34
+ # TYPE ACTION for text typing
35
+ class TypeAction(CLAAction):
36
+ type: Literal["type"] = "type"
37
+ text: str
38
+ enter_after: bool | None = False
39
+
40
+
41
+ # SCROLL ACTION
42
+ class ScrollAction(CLAAction):
43
+ type: Literal["scroll"] = "scroll"
44
+ point: Point | None = None
45
+ scroll: Point | None = None
46
+
47
+
48
+ # MOVE ACTION for mouse movement
49
+ class MoveAction(CLAAction):
50
+ type: Literal["move"] = "move"
51
+ point: Point | None = None
52
+ selector: str | None = None
53
+ offset: Point | None = None
54
+
55
+
56
+ # WAIT ACTION
57
+ class WaitAction(CLAAction):
58
+ type: Literal["wait"] = "wait"
59
+ time: int # in milliseconds
60
+
61
+
62
+ # DRAG ACTION
63
+ class DragAction(CLAAction):
64
+ type: Literal["drag"] = "drag"
65
+ path: list[Point]
66
+ pattern: list[int] | None = None # [delay_1, delay_2, ...]
67
+
68
+
69
+ # SCREENSHOT ACTION
70
+ class ScreenshotFetch(CLAAction):
71
+ type: Literal["screenshot"] = "screenshot"
72
+
73
+
74
+ class PositionFetch(CLAAction):
75
+ type: Literal["position"] = "position"
76
+
77
+
78
+ # Union of all possible actions
79
+ CLA = Annotated[
80
+ Union[
81
+ ClickAction,
82
+ PressAction,
83
+ TypeAction,
84
+ ScrollAction,
85
+ MoveAction,
86
+ WaitAction,
87
+ DragAction,
88
+ ScreenshotFetch,
89
+ PositionFetch,
90
+ ],
91
+ Field(discriminator="type"),
92
+ ]
hud/client.py ADDED
@@ -0,0 +1,184 @@
1
+ """
2
+ HUD client for interacting with the API.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ from typing import Any
9
+
10
+ from .adapters.common import Adapter
11
+ from .env import EvalSet
12
+ from .gym import Gym
13
+ from .run import Run, RunResponse
14
+ from .server import make_request, make_sync_request
15
+ from .settings import settings
16
+
17
+
18
+ class HUDClient:
19
+ """
20
+ Client for interacting with the HUD API.
21
+
22
+ This is the main entry point for the SDK, providing methods to load gyms,
23
+ evalsets, and create runs.
24
+ """
25
+
26
+ def __init__(self, api_key: str) -> None:
27
+ """
28
+ Initialize the HUD client with an API key.
29
+
30
+ Args:
31
+ api_key: API key for authentication with the HUD API
32
+ """
33
+ self.api_key = api_key
34
+ settings.api_key = api_key # Set global config
35
+
36
+ async def load_gym(self, id: str) -> Gym:
37
+ """
38
+ Load a gym by ID from the HUD API.
39
+
40
+ Args:
41
+ id: The ID of the gym to load
42
+
43
+ Returns:
44
+ Gym: The loaded gym object
45
+ """
46
+ # API call to get gym info
47
+ data = await make_request(
48
+ method="GET",
49
+ url=f"{settings.base_url}/gyms/{id}",
50
+ api_key=self.api_key,
51
+ )
52
+ return Gym(id=data["id"], name=data["name"])
53
+
54
+ async def load_evalset(self, id: str) -> EvalSet:
55
+ """
56
+ Load an evalset by ID from the HUD API.
57
+
58
+ Args:
59
+ id: The ID of the evalset to load
60
+
61
+ Returns:
62
+ EvalSet: The loaded evalset object
63
+ """
64
+ # API call to get evalset info
65
+ data = await make_request(
66
+ method="GET",
67
+ url=f"{settings.base_url}/evalsets/{id}",
68
+ api_key=self.api_key,
69
+ )
70
+ return EvalSet(id=data["id"], name=data["name"])
71
+
72
+ async def list_gyms(self) -> list[str]:
73
+ """
74
+ List all available gyms.
75
+
76
+ Returns:
77
+ list[str]: List of gym IDs
78
+ """
79
+ # API call to get gyms
80
+ data = await make_request(
81
+ method="GET", url=f"{settings.base_url}/gyms", api_key=self.api_key
82
+ )
83
+ return data["gyms"]
84
+
85
+ async def get_runs(self) -> list[Run]:
86
+ """
87
+ Get all runs associated with the API key.
88
+
89
+ Returns:
90
+ list[Run]: List of run objects
91
+ """
92
+ # API call to get runs
93
+ data = await make_request(
94
+ method="GET", url=f"{settings.base_url}/runs", api_key=self.api_key
95
+ )
96
+ return data["runs"]
97
+
98
+ async def load_run(self, id: str, adapter: Adapter | None = None) -> Run | None:
99
+ """
100
+ Load a run by ID from the HUD API.
101
+
102
+ Args:
103
+ id: The ID of the run to load
104
+ adapter: Optional adapter for action conversion
105
+
106
+ Returns:
107
+ Run: The loaded run object, or None if not found
108
+ """
109
+ adapter = adapter or Adapter()
110
+ # API call to get run info
111
+ data = await make_request(
112
+ method="GET",
113
+ url=f"{settings.base_url}/runs/{id}",
114
+ api_key=self.api_key,
115
+ )
116
+ if data:
117
+ response = RunResponse(**data)
118
+ gym = Gym(id=response.gym["id"], name=response.gym["name"])
119
+ evalset = EvalSet(
120
+ id=response.evalset["id"],
121
+ name=response.evalset["name"],
122
+ tasks=response.evalset["tasks"],
123
+ )
124
+ return Run(
125
+ id=response.id,
126
+ name=response.name,
127
+ gym=gym,
128
+ evalset=evalset,
129
+ adapter=adapter,
130
+ config=response.config,
131
+ metadata=response.metadata,
132
+ )
133
+ return None
134
+
135
+ def create_run(
136
+ self,
137
+ name: str,
138
+ gym: Gym,
139
+ evalset: EvalSet,
140
+ config: dict[str, Any] | None = None,
141
+ metadata: dict[str, Any] | None = None,
142
+ adapter: Adapter | None = None,
143
+ ) -> Run:
144
+ """
145
+ Create a new run in the HUD system.
146
+
147
+ Args:
148
+ name: Name of the run
149
+ gym: Gym to use for the run
150
+ evalset: Evalset to use for the run
151
+ config: Optional configuration parameters
152
+ metadata: Optional metadata for the run
153
+ adapter: Optional adapter for action conversion
154
+
155
+ Returns:
156
+ Run: The created run object
157
+ """
158
+ adapter = adapter or Adapter()
159
+ # Make synchronous API call to create run
160
+ if metadata is None:
161
+ metadata = {}
162
+ if config is None:
163
+ config = {}
164
+ data = make_sync_request(
165
+ method="POST",
166
+ url=f"{settings.base_url}/runs",
167
+ json={
168
+ "name": name,
169
+ "gym_id": gym.id,
170
+ "evalset_id": evalset.id,
171
+ "config": json.dumps(config),
172
+ "metadata": json.dumps(metadata),
173
+ },
174
+ api_key=self.api_key,
175
+ )
176
+ return Run(
177
+ id=data["id"],
178
+ name=name,
179
+ gym=gym,
180
+ evalset=evalset,
181
+ adapter=adapter,
182
+ config=config,
183
+ metadata=metadata,
184
+ )