hud-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/env.py ADDED
@@ -0,0 +1,258 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from hud.server import make_request
8
+ from hud.settings import settings
9
+
10
+ if TYPE_CHECKING:
11
+ from .adapters.common import Adapter
12
+
13
+
14
+ class Observation(BaseModel):
15
+ """
16
+ Observation from the environment.
17
+
18
+ Attributes:
19
+ screenshot: Base64 encoded PNG string of the screen
20
+ text: Text observation, if available
21
+ """
22
+ screenshot: str | None = None # base64 string png
23
+ text: str | None = None
24
+
25
+
26
+ class TaskResult(BaseModel):
27
+ """
28
+ Result of a task step.
29
+
30
+ Attributes:
31
+ observation: The current observation
32
+ reward: Reward value from the step
33
+ terminated: Whether the task is complete
34
+ info: Additional information from the environment
35
+ """
36
+ observation: Observation
37
+ reward: float
38
+ terminated: bool
39
+ info: dict[str, Any]
40
+
41
+
42
+ class Env:
43
+ """
44
+ Environment interface for agent interactions.
45
+
46
+ This class handles the environment state and interactions, including
47
+ creating the environment, retrieving state, and executing actions.
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ run_id: str,
53
+ adapter: Adapter,
54
+ config: dict[str, Any] | None = None,
55
+ metadata: dict[str, Any] | None = None,
56
+ ) -> None:
57
+ """
58
+ Initialize an environment.
59
+
60
+ Args:
61
+ run_id: ID of the run this environment belongs to
62
+ adapter: Adapter for converting actions
63
+ config: Optional configuration parameters
64
+ metadata: Optional metadata for the environment
65
+ """
66
+ if metadata is None:
67
+ metadata = {}
68
+ if config is None:
69
+ config = {}
70
+ self.run_id = run_id
71
+ self.config = config
72
+ self.adapter = adapter
73
+ self.metadata = metadata
74
+ # task_run_id is created when the environment is created (create_environment)
75
+ # or provided if env already exists.
76
+ self.final_response: None | str = None
77
+ self.id = None
78
+ self.vnc_url = None
79
+
80
+ async def create_environment(self) -> str:
81
+ """
82
+ Initialize the environment and return the task_run_id.
83
+
84
+ Returns:
85
+ str: The environment ID
86
+ """
87
+ data = await make_request(
88
+ method="POST",
89
+ url=f"{settings.base_url}/create_environment",
90
+ json={"run_id": self.run_id, "metadata": self.metadata},
91
+ api_key=settings.api_key,
92
+ )
93
+ self.id = data["id"]
94
+ return self.id
95
+
96
+ async def get_vnc_url(self) -> str:
97
+ """
98
+ Get the VNC URL for the environment.
99
+
100
+ Returns:
101
+ str: The VNC URL for remote viewing/control
102
+ """
103
+ data = await make_request(
104
+ method="GET",
105
+ url=f"{settings.base_url}/environment/{self.id}/vnc",
106
+ api_key=settings.api_key,
107
+ )
108
+ self.vnc_url = data["vm_url"]
109
+ return self.vnc_url
110
+
111
+ async def get_env_state(self) -> str:
112
+ """
113
+ Get the state of the environment.
114
+
115
+ Returns:
116
+ str: The current state (e.g., "running", "error")
117
+ """
118
+ data = await make_request(
119
+ method="GET",
120
+ url=f"{settings.base_url}/get_env_state/{self.id}",
121
+ api_key=settings.api_key,
122
+ )
123
+ return data["state"]
124
+
125
+ async def step(
126
+ self, action: Any | None = None
127
+ ) -> tuple[Observation, float, bool, dict[str, Any]]:
128
+ """
129
+ Send action to environment and get result.
130
+
131
+ Args:
132
+ action: The action to take, or None for no action
133
+
134
+ Returns:
135
+ tuple: (observation, reward, terminated, info)
136
+ """
137
+ action_list = self.translate_action(action) if action is not None else []
138
+ data = await make_request(
139
+ method="POST",
140
+ url=f"{settings.base_url}/execute_step/{self.id}",
141
+ json=action_list,
142
+ api_key=settings.api_key,
143
+ )
144
+ # Convert the raw observation to the correct type
145
+ self.current_observation = Observation(**data["observation"])
146
+ data["observation"] = self.current_observation
147
+ # Return the result
148
+ task_result = TaskResult(**data)
149
+ return (
150
+ task_result.observation,
151
+ task_result.reward,
152
+ task_result.terminated,
153
+ task_result.info,
154
+ )
155
+
156
+ def translate_action(self, action: Any) -> list:
157
+ """
158
+ Translate action to the correct format.
159
+
160
+ Args:
161
+ action: The action to translate
162
+
163
+ Returns:
164
+ list: List of translated actions in the CLA format
165
+ """
166
+ # Get adapter and then translate action to Common Language Action
167
+ if isinstance(action, list):
168
+ return self.adapter.adapt_list(action)
169
+ return [self.adapter.adapt(action)]
170
+
171
+ async def evaluate(self) -> float:
172
+ """
173
+ Get final evaluation score.
174
+
175
+ Returns:
176
+ float: The evaluation score
177
+ """
178
+ data = await make_request(
179
+ method="POST",
180
+ url=f"{settings.base_url}/evaluation/{self.id}",
181
+ api_key=settings.api_key,
182
+ )
183
+ return data["reward"]
184
+
185
+ async def close(self) -> None:
186
+ """
187
+ Close the environment.
188
+ """
189
+ await make_request(
190
+ method="POST",
191
+ url=f"{settings.base_url}/close/{self.id}",
192
+ api_key=settings.api_key,
193
+ )
194
+
195
+ async def reset(self, task_id: str, metadata: dict[str, Any] | None = None) -> Observation:
196
+ """
197
+ Reset the environment to the task.
198
+
199
+ Args:
200
+ task_id: ID of the task to reset to
201
+ metadata: Optional metadata for the reset
202
+
203
+ Returns:
204
+ Observation: Initial observation for the task
205
+ """
206
+ if metadata is None:
207
+ metadata = {}
208
+ data = await make_request(
209
+ method="POST",
210
+ url=f"{settings.base_url}/environments/{self.id}/reset",
211
+ json={"task_id": task_id, "metadata": metadata},
212
+ api_key=settings.api_key,
213
+ )
214
+ return Observation(**data["observation"])
215
+
216
+
217
+ class EvalSet:
218
+ """
219
+ Evaluation set containing tasks for benchmarking.
220
+
221
+ Attributes:
222
+ id: Unique identifier for the evalset
223
+ name: Human-readable name
224
+ tasks: List of task IDs in this evalset
225
+ """
226
+
227
+ def __init__(
228
+ self,
229
+ id: str,
230
+ name: str,
231
+ tasks: list[str] | None = None,
232
+ ) -> None:
233
+ """
234
+ Initialize an evaluation set.
235
+
236
+ Args:
237
+ id: Unique identifier
238
+ name: Human-readable name
239
+ tasks: Optional list of task IDs
240
+ """
241
+ self.id = id
242
+ self.name = name
243
+ self.tasks = tasks or []
244
+
245
+ async def fetch_tasks(self) -> list[str]:
246
+ """
247
+ Fetch all tasks in this evalset from the API.
248
+
249
+ Returns:
250
+ list[str]: List of task IDs
251
+ """
252
+ data = await make_request(
253
+ method="GET",
254
+ url=f"{settings.base_url}/evalsets/{self.id}/tasks",
255
+ api_key=settings.api_key,
256
+ )
257
+ self.tasks = data["tasks"]
258
+ return self.tasks
hud/gym.py ADDED
@@ -0,0 +1,22 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class Gym:
5
+ """
6
+ Represents a simulation environment in the HUD system.
7
+
8
+ Attributes:
9
+ id: Unique identifier for the gym
10
+ name: Human-readable name of the gym
11
+ """
12
+
13
+ def __init__(self, id: str, name: str) -> None:
14
+ """
15
+ Initialize a gym.
16
+
17
+ Args:
18
+ id: Unique identifier
19
+ name: Human-readable name
20
+ """
21
+ self.id = id
22
+ self.name = name
hud/py.typed ADDED
File without changes
hud/run.py ADDED
@@ -0,0 +1,157 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ from .adapters.common import Adapter
8
+ from .env import Env, EvalSet
9
+ from .server import make_request
10
+ from .settings import settings
11
+
12
+ if TYPE_CHECKING:
13
+ from datetime import datetime
14
+
15
+ from .gym import Gym
16
+
17
+
18
+ class RunResponse(BaseModel):
19
+ """
20
+ Response model for run data from the API.
21
+
22
+ Attributes:
23
+ id: Unique identifier for the run
24
+ name: Human-readable name of the run
25
+ gym: Dictionary containing gym information
26
+ evalset: Dictionary containing evalset information
27
+ adapter: Dictionary containing adapter information
28
+ config: Dictionary containing configuration parameters
29
+ metadata: Dictionary containing metadata
30
+ """
31
+ id: str
32
+ name: str
33
+ gym: dict[str, Any]
34
+ evalset: dict[str, Any]
35
+ adapter: dict[str, Any]
36
+ config: dict[str, Any]
37
+ metadata: dict[str, Any]
38
+
39
+
40
+ class RunAnalyticsResponse(BaseModel):
41
+ """
42
+ Model for Run analytics data.
43
+
44
+ Attributes:
45
+ id: Unique identifier for the run
46
+ name: Human-readable name of the run
47
+ status_counts: Counts of tasks in different states
48
+ avg_score: Average score across all tasks, if available
49
+ completion_rate: Percentage of tasks completed
50
+ total_tasks: Total number of tasks in the run
51
+ completed_tasks: Number of completed tasks
52
+ running_time: Total runtime in seconds, if available
53
+ created_at: When the run was created
54
+ raw_data: Detailed data about tasks and environments
55
+ """
56
+ id: str
57
+ name: str
58
+ status_counts: dict[str, int] # e.g. {"completed": 5, "running": 2, "error": 1}
59
+ avg_score: float | None = None
60
+ completion_rate: float | None = None # percentage of tasks completed
61
+ total_tasks: int
62
+ completed_tasks: int
63
+ running_time: float | None = None # runtime in seconds if available
64
+ created_at: datetime
65
+ raw_data: dict[str, list[dict[str, Any]]] = Field(
66
+ default_factory=lambda: {"tasks": [], "environments": []}
67
+ )
68
+
69
+
70
+ class Run:
71
+ """
72
+ A run represents a collection of tasks and environments.
73
+
74
+ This class provides methods to fetch task IDs, create environments,
75
+ and access analytics for the run.
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ id: str,
81
+ name: str,
82
+ gym: Gym,
83
+ evalset: EvalSet,
84
+ config: dict[str, Any] | None = None,
85
+ metadata: dict[str, Any] | None = None,
86
+ adapter: Adapter | None = None,
87
+ ) -> None:
88
+ """
89
+ Initialize a run.
90
+
91
+ Args:
92
+ id: Unique identifier
93
+ name: Human-readable name
94
+ gym: Gym object for this run
95
+ evalset: EvalSet object containing tasks
96
+ config: Optional configuration parameters
97
+ metadata: Optional metadata
98
+ adapter: Optional adapter for action conversion
99
+ """
100
+ adapter = adapter or Adapter()
101
+ if metadata is None:
102
+ metadata = {}
103
+ if config is None:
104
+ config = {}
105
+ self.id = id
106
+ self.name = name
107
+ self.gym = gym
108
+ self.evalset = evalset
109
+ self.adapter = adapter
110
+ self.config = config
111
+ self.metadata = metadata
112
+ self.envs: list[Env] = []
113
+
114
+ async def fetch_task_ids(self) -> list[str]:
115
+ """
116
+ Fetch task IDs for this run from the evalset.
117
+
118
+ Returns:
119
+ list[str]: List of task IDs
120
+ """
121
+ return await self.evalset.fetch_tasks()
122
+
123
+ async def make(self, metadata: dict[str, Any]) -> Env:
124
+ """
125
+ Create a new environment for this run.
126
+
127
+ Args:
128
+ metadata: Metadata for the environment
129
+
130
+ Returns:
131
+ Env: The created environment
132
+ """
133
+ # Make the env class
134
+ env = Env(
135
+ run_id=self.id,
136
+ config=self.config,
137
+ adapter=self.adapter,
138
+ metadata=metadata,
139
+ )
140
+ await env.create_environment()
141
+ self.envs.append(env)
142
+ return env
143
+
144
+ async def get_analytics(self) -> RunAnalyticsResponse:
145
+ """
146
+ Get analytics for this run.
147
+
148
+ Returns:
149
+ RunAnalyticsResponse: Analytics data including status counts,
150
+ average score, and other metrics
151
+ """
152
+ data = await make_request(
153
+ method="GET",
154
+ url=f"{settings.base_url}/runs/{self.id}/analytics",
155
+ api_key=settings.api_key,
156
+ )
157
+ return RunAnalyticsResponse(**data)
hud/server/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ from .requests import RequestError, make_request, make_sync_request
4
+
5
+ __all__ = ["RequestError", "make_request", "make_sync_request"]
hud/server/requests.py ADDED
@@ -0,0 +1,79 @@
1
+ """
2
+ HTTP request utilities for the HUD API.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any
8
+
9
+ import httpx
10
+
11
+
12
+ class RequestError(Exception):
13
+ """
14
+ Custom exception for API request errors.
15
+ """
16
+
17
+
18
+ async def make_request(
19
+ method: str, url: str, json: Any | None = None, api_key: str | None = None
20
+ ) -> dict[str, Any]:
21
+ """
22
+ Make an asynchronous HTTP request to the HUD API.
23
+
24
+ Args:
25
+ method: HTTP method (GET, POST, etc.)
26
+ url: Full URL for the request
27
+ json: Optional JSON serializable data
28
+ api_key: API key for authentication
29
+
30
+ Returns:
31
+ dict: JSON response from the server
32
+
33
+ Raises:
34
+ RequestError: If API key is missing or request fails
35
+ """
36
+ if not api_key:
37
+ raise RequestError("API key is required but not provided")
38
+
39
+ headers = {"Authorization": f"Bearer {api_key}"}
40
+
41
+ async with httpx.AsyncClient(timeout=240.0) as client:
42
+ try:
43
+ response = await client.request(method=method, url=url, json=json, headers=headers)
44
+ response.raise_for_status()
45
+ return response.json()
46
+ except httpx.HTTPError as e:
47
+ raise RequestError(f"Request failed: {e!s}") from None
48
+
49
+
50
+ def make_sync_request(
51
+ method: str, url: str, json: Any | None = None, api_key: str | None = None
52
+ ) -> dict[str, Any]:
53
+ """
54
+ Make a synchronous HTTP request to the HUD API.
55
+
56
+ Args:
57
+ method: HTTP method (GET, POST, etc.)
58
+ url: Full URL for the request
59
+ json: Optional JSON serializable data
60
+ api_key: API key for authentication
61
+
62
+ Returns:
63
+ dict: JSON response from the server
64
+
65
+ Raises:
66
+ RequestError: If API key is missing or request fails
67
+ """
68
+ if not api_key:
69
+ raise RequestError("API key is required but not provided")
70
+
71
+ headers = {"Authorization": f"Bearer {api_key}"}
72
+
73
+ with httpx.Client(timeout=240.0) as client:
74
+ try:
75
+ response = client.request(method=method, url=url, json=json, headers=headers)
76
+ response.raise_for_status()
77
+ return response.json()
78
+ except httpx.HTTPError as e:
79
+ raise RequestError(f"Request failed: {e!s}") from None
hud/settings.py ADDED
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+
6
+
7
+ class Settings(BaseSettings):
8
+ """
9
+ Global settings for the HUD SDK.
10
+
11
+ This class manages configuration values loaded from environment variables
12
+ and provides global access to settings throughout the application.
13
+ """
14
+ model_config = SettingsConfigDict(
15
+ env_file=".env",
16
+ env_file_encoding="utf-8",
17
+ extra="allow"
18
+ )
19
+
20
+ base_url: str = Field(
21
+ default="https://orchestrator.hud.live/hud-gym/api/v1",
22
+ description="Base URL for the HUD API",
23
+ validation_alias="base_url"
24
+ )
25
+
26
+ api_key: str | None = Field(
27
+ default=None,
28
+ description="API key for authentication with the HUD API",
29
+ validation_alias="HUD_API_KEY"
30
+ )
31
+
32
+
33
+ # Create a singleton instance
34
+ settings = Settings()
35
+
36
+ # Add utility functions for backwards compatibility
37
+ def get_settings() -> Settings:
38
+ """Get the global settings instance."""
39
+ return settings
hud/utils/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ from .config import configuration
4
+
5
+ __all__ = ["configuration"]
hud/utils/config.py ADDED
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from hud.settings import settings
4
+
5
+ # For backwards compatibility, keep 'configuration'
6
+ # but have it point to the settings instance
7
+ configuration = settings
@@ -0,0 +1,125 @@
1
+ Metadata-Version: 2.4
2
+ Name: hud-python
3
+ Version: 0.1.0
4
+ Summary: SDK for the HUD evaluation platform.
5
+ Project-URL: Homepage, https://github.com/Human-Data/hud-sdk
6
+ Project-URL: Bug Tracker, https://github.com/Human-Data/hud-sdk/issues
7
+ Project-URL: Documentation, https://hud.so
8
+ Author-email: Human Union Data SDK <founders@hud.so>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2025 Human Data Company
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Classifier: Development Status :: 4 - Beta
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: Programming Language :: Python :: 3
34
+ Classifier: Programming Language :: Python :: 3.9
35
+ Classifier: Programming Language :: Python :: 3.10
36
+ Classifier: Programming Language :: Python :: 3.11
37
+ Classifier: Programming Language :: Python :: 3.12
38
+ Classifier: Programming Language :: Python :: 3.13
39
+ Requires-Python: <3.14,>=3.9
40
+ Requires-Dist: eval-type-backport>=0.2.2
41
+ Requires-Dist: httpx<1,>=0.23.0
42
+ Requires-Dist: pillow<12,>=11
43
+ Requires-Dist: pydantic-settings<3,>=2
44
+ Requires-Dist: pydantic<3,>=2
45
+ Provides-Extra: dev
46
+ Requires-Dist: anthropic; extra == 'dev'
47
+ Requires-Dist: ipykernel; extra == 'dev'
48
+ Requires-Dist: ipython<9; extra == 'dev'
49
+ Requires-Dist: jupyter-client; extra == 'dev'
50
+ Requires-Dist: jupyter-core; extra == 'dev'
51
+ Requires-Dist: openai; extra == 'dev'
52
+ Requires-Dist: pyright==1.1.364; extra == 'dev'
53
+ Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
54
+ Requires-Dist: ruff==0.9.8; extra == 'dev'
55
+ Description-Content-Type: text/markdown
56
+
57
+ # HUD SDK (Alpha Release)
58
+
59
+ A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models.
60
+
61
+ Visit [hud.so](https://hud.so) for more information about HUD.
62
+
63
+ > **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is still evolving and may change in future releases as we gather feedback and improve functionality.
64
+
65
+ [![PyPI version](https://img.shields.io/pypi/v/hud-python)](https://pypi.org/project/hud-python/)
66
+
67
+ [📚 Documentation](https://docs.hud.so) | [🏠 Homepage](https://hud.so)
68
+
69
+ ## Quick Start
70
+
71
+ ```bash
72
+ # Install the latest stable release
73
+ pip install hud-python
74
+
75
+ # Install the latest alpha release (may include breaking changes)
76
+ pip install --pre hud-python
77
+
78
+ # Install a specific alpha version
79
+ pip install hud-python==0.1.0-alpha
80
+ ```
81
+
82
+ ```python
83
+ import asyncio
84
+ from hud import HUDClient
85
+
86
+ async def main():
87
+ # Initialize client with API key
88
+ client = HUDClient(api_key="your-api-key")
89
+
90
+ # Load a gym and evaluation set
91
+ gym = await client.load_gym(id="OSWorld-Ubuntu")
92
+ evalset = await client.load_evalset(id="OSWorld-Ubuntu")
93
+
94
+ # Create a run and environment
95
+ run = client.create_run(name="example-run", gym=gym, evalset=evalset)
96
+ env = await run.make(metadata={"agent_id": "example"})
97
+
98
+ # Agent loop goes here
99
+ # For complete examples and usage guides, see our documentation
100
+
101
+ # Close the environment when done
102
+ await env.close()
103
+
104
+ if __name__ == "__main__":
105
+ asyncio.run(main())
106
+ ```
107
+
108
+ ## Key Features
109
+
110
+ - Connect to HUD evaluation environments
111
+ - Run benchmarks across various tasks
112
+ - Support for different agent adapters
113
+ - Asynchronous API for efficient interaction
114
+
115
+ ## Documentation
116
+
117
+ For comprehensive guides, examples, and API reference, visit:
118
+ - [Getting Started](https://docs.hud.so/introduction)
119
+ - [Installation](https://docs.hud.so/installation)
120
+ - [API Reference](https://docs.hud.so/api-reference)
121
+ - [Examples](https://docs.hud.so/examples)
122
+
123
+ ## License
124
+
125
+ [MIT License](LICENSE)