hud-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -0
- hud/adapters/__init__.py +5 -0
- hud/adapters/claude/__init__.py +6 -0
- hud/adapters/claude/adapter.py +131 -0
- hud/adapters/common/__init__.py +6 -0
- hud/adapters/common/adapter.py +167 -0
- hud/adapters/common/types.py +92 -0
- hud/client.py +184 -0
- hud/env.py +258 -0
- hud/gym.py +22 -0
- hud/py.typed +0 -0
- hud/run.py +157 -0
- hud/server/__init__.py +5 -0
- hud/server/requests.py +79 -0
- hud/settings.py +39 -0
- hud/utils/__init__.py +5 -0
- hud/utils/config.py +7 -0
- hud_python-0.1.0.dist-info/METADATA +125 -0
- hud_python-0.1.0.dist-info/RECORD +21 -0
- hud_python-0.1.0.dist-info/WHEEL +4 -0
- hud_python-0.1.0.dist-info/licenses/LICENSE +21 -0
hud/env.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from hud.server import make_request
|
|
8
|
+
from hud.settings import settings
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from .adapters.common import Adapter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Observation(BaseModel):
|
|
15
|
+
"""
|
|
16
|
+
Observation from the environment.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
screenshot: Base64 encoded PNG string of the screen
|
|
20
|
+
text: Text observation, if available
|
|
21
|
+
"""
|
|
22
|
+
screenshot: str | None = None # base64 string png
|
|
23
|
+
text: str | None = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TaskResult(BaseModel):
|
|
27
|
+
"""
|
|
28
|
+
Result of a task step.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
observation: The current observation
|
|
32
|
+
reward: Reward value from the step
|
|
33
|
+
terminated: Whether the task is complete
|
|
34
|
+
info: Additional information from the environment
|
|
35
|
+
"""
|
|
36
|
+
observation: Observation
|
|
37
|
+
reward: float
|
|
38
|
+
terminated: bool
|
|
39
|
+
info: dict[str, Any]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Env:
|
|
43
|
+
"""
|
|
44
|
+
Environment interface for agent interactions.
|
|
45
|
+
|
|
46
|
+
This class handles the environment state and interactions, including
|
|
47
|
+
creating the environment, retrieving state, and executing actions.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
run_id: str,
|
|
53
|
+
adapter: Adapter,
|
|
54
|
+
config: dict[str, Any] | None = None,
|
|
55
|
+
metadata: dict[str, Any] | None = None,
|
|
56
|
+
) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Initialize an environment.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
run_id: ID of the run this environment belongs to
|
|
62
|
+
adapter: Adapter for converting actions
|
|
63
|
+
config: Optional configuration parameters
|
|
64
|
+
metadata: Optional metadata for the environment
|
|
65
|
+
"""
|
|
66
|
+
if metadata is None:
|
|
67
|
+
metadata = {}
|
|
68
|
+
if config is None:
|
|
69
|
+
config = {}
|
|
70
|
+
self.run_id = run_id
|
|
71
|
+
self.config = config
|
|
72
|
+
self.adapter = adapter
|
|
73
|
+
self.metadata = metadata
|
|
74
|
+
# task_run_id is created when the environment is created (create_environment)
|
|
75
|
+
# or provided if env already exists.
|
|
76
|
+
self.final_response: None | str = None
|
|
77
|
+
self.id = None
|
|
78
|
+
self.vnc_url = None
|
|
79
|
+
|
|
80
|
+
async def create_environment(self) -> str:
|
|
81
|
+
"""
|
|
82
|
+
Initialize the environment and return the task_run_id.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
str: The environment ID
|
|
86
|
+
"""
|
|
87
|
+
data = await make_request(
|
|
88
|
+
method="POST",
|
|
89
|
+
url=f"{settings.base_url}/create_environment",
|
|
90
|
+
json={"run_id": self.run_id, "metadata": self.metadata},
|
|
91
|
+
api_key=settings.api_key,
|
|
92
|
+
)
|
|
93
|
+
self.id = data["id"]
|
|
94
|
+
return self.id
|
|
95
|
+
|
|
96
|
+
async def get_vnc_url(self) -> str:
|
|
97
|
+
"""
|
|
98
|
+
Get the VNC URL for the environment.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
str: The VNC URL for remote viewing/control
|
|
102
|
+
"""
|
|
103
|
+
data = await make_request(
|
|
104
|
+
method="GET",
|
|
105
|
+
url=f"{settings.base_url}/environment/{self.id}/vnc",
|
|
106
|
+
api_key=settings.api_key,
|
|
107
|
+
)
|
|
108
|
+
self.vnc_url = data["vm_url"]
|
|
109
|
+
return self.vnc_url
|
|
110
|
+
|
|
111
|
+
async def get_env_state(self) -> str:
|
|
112
|
+
"""
|
|
113
|
+
Get the state of the environment.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
str: The current state (e.g., "running", "error")
|
|
117
|
+
"""
|
|
118
|
+
data = await make_request(
|
|
119
|
+
method="GET",
|
|
120
|
+
url=f"{settings.base_url}/get_env_state/{self.id}",
|
|
121
|
+
api_key=settings.api_key,
|
|
122
|
+
)
|
|
123
|
+
return data["state"]
|
|
124
|
+
|
|
125
|
+
async def step(
|
|
126
|
+
self, action: Any | None = None
|
|
127
|
+
) -> tuple[Observation, float, bool, dict[str, Any]]:
|
|
128
|
+
"""
|
|
129
|
+
Send action to environment and get result.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
action: The action to take, or None for no action
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
tuple: (observation, reward, terminated, info)
|
|
136
|
+
"""
|
|
137
|
+
action_list = self.translate_action(action) if action is not None else []
|
|
138
|
+
data = await make_request(
|
|
139
|
+
method="POST",
|
|
140
|
+
url=f"{settings.base_url}/execute_step/{self.id}",
|
|
141
|
+
json=action_list,
|
|
142
|
+
api_key=settings.api_key,
|
|
143
|
+
)
|
|
144
|
+
# Convert the raw observation to the correct type
|
|
145
|
+
self.current_observation = Observation(**data["observation"])
|
|
146
|
+
data["observation"] = self.current_observation
|
|
147
|
+
# Return the result
|
|
148
|
+
task_result = TaskResult(**data)
|
|
149
|
+
return (
|
|
150
|
+
task_result.observation,
|
|
151
|
+
task_result.reward,
|
|
152
|
+
task_result.terminated,
|
|
153
|
+
task_result.info,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def translate_action(self, action: Any) -> list:
|
|
157
|
+
"""
|
|
158
|
+
Translate action to the correct format.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
action: The action to translate
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
list: List of translated actions in the CLA format
|
|
165
|
+
"""
|
|
166
|
+
# Get adapter and then translate action to Common Language Action
|
|
167
|
+
if isinstance(action, list):
|
|
168
|
+
return self.adapter.adapt_list(action)
|
|
169
|
+
return [self.adapter.adapt(action)]
|
|
170
|
+
|
|
171
|
+
async def evaluate(self) -> float:
|
|
172
|
+
"""
|
|
173
|
+
Get final evaluation score.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
float: The evaluation score
|
|
177
|
+
"""
|
|
178
|
+
data = await make_request(
|
|
179
|
+
method="POST",
|
|
180
|
+
url=f"{settings.base_url}/evaluation/{self.id}",
|
|
181
|
+
api_key=settings.api_key,
|
|
182
|
+
)
|
|
183
|
+
return data["reward"]
|
|
184
|
+
|
|
185
|
+
async def close(self) -> None:
|
|
186
|
+
"""
|
|
187
|
+
Close the environment.
|
|
188
|
+
"""
|
|
189
|
+
await make_request(
|
|
190
|
+
method="POST",
|
|
191
|
+
url=f"{settings.base_url}/close/{self.id}",
|
|
192
|
+
api_key=settings.api_key,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
async def reset(self, task_id: str, metadata: dict[str, Any] | None = None) -> Observation:
|
|
196
|
+
"""
|
|
197
|
+
Reset the environment to the task.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
task_id: ID of the task to reset to
|
|
201
|
+
metadata: Optional metadata for the reset
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Observation: Initial observation for the task
|
|
205
|
+
"""
|
|
206
|
+
if metadata is None:
|
|
207
|
+
metadata = {}
|
|
208
|
+
data = await make_request(
|
|
209
|
+
method="POST",
|
|
210
|
+
url=f"{settings.base_url}/environments/{self.id}/reset",
|
|
211
|
+
json={"task_id": task_id, "metadata": metadata},
|
|
212
|
+
api_key=settings.api_key,
|
|
213
|
+
)
|
|
214
|
+
return Observation(**data["observation"])
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class EvalSet:
|
|
218
|
+
"""
|
|
219
|
+
Evaluation set containing tasks for benchmarking.
|
|
220
|
+
|
|
221
|
+
Attributes:
|
|
222
|
+
id: Unique identifier for the evalset
|
|
223
|
+
name: Human-readable name
|
|
224
|
+
tasks: List of task IDs in this evalset
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
def __init__(
|
|
228
|
+
self,
|
|
229
|
+
id: str,
|
|
230
|
+
name: str,
|
|
231
|
+
tasks: list[str] | None = None,
|
|
232
|
+
) -> None:
|
|
233
|
+
"""
|
|
234
|
+
Initialize an evaluation set.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
id: Unique identifier
|
|
238
|
+
name: Human-readable name
|
|
239
|
+
tasks: Optional list of task IDs
|
|
240
|
+
"""
|
|
241
|
+
self.id = id
|
|
242
|
+
self.name = name
|
|
243
|
+
self.tasks = tasks or []
|
|
244
|
+
|
|
245
|
+
async def fetch_tasks(self) -> list[str]:
|
|
246
|
+
"""
|
|
247
|
+
Fetch all tasks in this evalset from the API.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
list[str]: List of task IDs
|
|
251
|
+
"""
|
|
252
|
+
data = await make_request(
|
|
253
|
+
method="GET",
|
|
254
|
+
url=f"{settings.base_url}/evalsets/{self.id}/tasks",
|
|
255
|
+
api_key=settings.api_key,
|
|
256
|
+
)
|
|
257
|
+
self.tasks = data["tasks"]
|
|
258
|
+
return self.tasks
|
hud/gym.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Gym:
|
|
5
|
+
"""
|
|
6
|
+
Represents a simulation environment in the HUD system.
|
|
7
|
+
|
|
8
|
+
Attributes:
|
|
9
|
+
id: Unique identifier for the gym
|
|
10
|
+
name: Human-readable name of the gym
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, id: str, name: str) -> None:
|
|
14
|
+
"""
|
|
15
|
+
Initialize a gym.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
id: Unique identifier
|
|
19
|
+
name: Human-readable name
|
|
20
|
+
"""
|
|
21
|
+
self.id = id
|
|
22
|
+
self.name = name
|
hud/py.typed
ADDED
|
File without changes
|
hud/run.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
from .adapters.common import Adapter
|
|
8
|
+
from .env import Env, EvalSet
|
|
9
|
+
from .server import make_request
|
|
10
|
+
from .settings import settings
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
|
|
15
|
+
from .gym import Gym
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RunResponse(BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Response model for run data from the API.
|
|
21
|
+
|
|
22
|
+
Attributes:
|
|
23
|
+
id: Unique identifier for the run
|
|
24
|
+
name: Human-readable name of the run
|
|
25
|
+
gym: Dictionary containing gym information
|
|
26
|
+
evalset: Dictionary containing evalset information
|
|
27
|
+
adapter: Dictionary containing adapter information
|
|
28
|
+
config: Dictionary containing configuration parameters
|
|
29
|
+
metadata: Dictionary containing metadata
|
|
30
|
+
"""
|
|
31
|
+
id: str
|
|
32
|
+
name: str
|
|
33
|
+
gym: dict[str, Any]
|
|
34
|
+
evalset: dict[str, Any]
|
|
35
|
+
adapter: dict[str, Any]
|
|
36
|
+
config: dict[str, Any]
|
|
37
|
+
metadata: dict[str, Any]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RunAnalyticsResponse(BaseModel):
|
|
41
|
+
"""
|
|
42
|
+
Model for Run analytics data.
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
id: Unique identifier for the run
|
|
46
|
+
name: Human-readable name of the run
|
|
47
|
+
status_counts: Counts of tasks in different states
|
|
48
|
+
avg_score: Average score across all tasks, if available
|
|
49
|
+
completion_rate: Percentage of tasks completed
|
|
50
|
+
total_tasks: Total number of tasks in the run
|
|
51
|
+
completed_tasks: Number of completed tasks
|
|
52
|
+
running_time: Total runtime in seconds, if available
|
|
53
|
+
created_at: When the run was created
|
|
54
|
+
raw_data: Detailed data about tasks and environments
|
|
55
|
+
"""
|
|
56
|
+
id: str
|
|
57
|
+
name: str
|
|
58
|
+
status_counts: dict[str, int] # e.g. {"completed": 5, "running": 2, "error": 1}
|
|
59
|
+
avg_score: float | None = None
|
|
60
|
+
completion_rate: float | None = None # percentage of tasks completed
|
|
61
|
+
total_tasks: int
|
|
62
|
+
completed_tasks: int
|
|
63
|
+
running_time: float | None = None # runtime in seconds if available
|
|
64
|
+
created_at: datetime
|
|
65
|
+
raw_data: dict[str, list[dict[str, Any]]] = Field(
|
|
66
|
+
default_factory=lambda: {"tasks": [], "environments": []}
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class Run:
|
|
71
|
+
"""
|
|
72
|
+
A run represents a collection of tasks and environments.
|
|
73
|
+
|
|
74
|
+
This class provides methods to fetch task IDs, create environments,
|
|
75
|
+
and access analytics for the run.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(
|
|
79
|
+
self,
|
|
80
|
+
id: str,
|
|
81
|
+
name: str,
|
|
82
|
+
gym: Gym,
|
|
83
|
+
evalset: EvalSet,
|
|
84
|
+
config: dict[str, Any] | None = None,
|
|
85
|
+
metadata: dict[str, Any] | None = None,
|
|
86
|
+
adapter: Adapter | None = None,
|
|
87
|
+
) -> None:
|
|
88
|
+
"""
|
|
89
|
+
Initialize a run.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
id: Unique identifier
|
|
93
|
+
name: Human-readable name
|
|
94
|
+
gym: Gym object for this run
|
|
95
|
+
evalset: EvalSet object containing tasks
|
|
96
|
+
config: Optional configuration parameters
|
|
97
|
+
metadata: Optional metadata
|
|
98
|
+
adapter: Optional adapter for action conversion
|
|
99
|
+
"""
|
|
100
|
+
adapter = adapter or Adapter()
|
|
101
|
+
if metadata is None:
|
|
102
|
+
metadata = {}
|
|
103
|
+
if config is None:
|
|
104
|
+
config = {}
|
|
105
|
+
self.id = id
|
|
106
|
+
self.name = name
|
|
107
|
+
self.gym = gym
|
|
108
|
+
self.evalset = evalset
|
|
109
|
+
self.adapter = adapter
|
|
110
|
+
self.config = config
|
|
111
|
+
self.metadata = metadata
|
|
112
|
+
self.envs: list[Env] = []
|
|
113
|
+
|
|
114
|
+
async def fetch_task_ids(self) -> list[str]:
|
|
115
|
+
"""
|
|
116
|
+
Fetch task IDs for this run from the evalset.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
list[str]: List of task IDs
|
|
120
|
+
"""
|
|
121
|
+
return await self.evalset.fetch_tasks()
|
|
122
|
+
|
|
123
|
+
async def make(self, metadata: dict[str, Any]) -> Env:
|
|
124
|
+
"""
|
|
125
|
+
Create a new environment for this run.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
metadata: Metadata for the environment
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Env: The created environment
|
|
132
|
+
"""
|
|
133
|
+
# Make the env class
|
|
134
|
+
env = Env(
|
|
135
|
+
run_id=self.id,
|
|
136
|
+
config=self.config,
|
|
137
|
+
adapter=self.adapter,
|
|
138
|
+
metadata=metadata,
|
|
139
|
+
)
|
|
140
|
+
await env.create_environment()
|
|
141
|
+
self.envs.append(env)
|
|
142
|
+
return env
|
|
143
|
+
|
|
144
|
+
async def get_analytics(self) -> RunAnalyticsResponse:
|
|
145
|
+
"""
|
|
146
|
+
Get analytics for this run.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
RunAnalyticsResponse: Analytics data including status counts,
|
|
150
|
+
average score, and other metrics
|
|
151
|
+
"""
|
|
152
|
+
data = await make_request(
|
|
153
|
+
method="GET",
|
|
154
|
+
url=f"{settings.base_url}/runs/{self.id}/analytics",
|
|
155
|
+
api_key=settings.api_key,
|
|
156
|
+
)
|
|
157
|
+
return RunAnalyticsResponse(**data)
|
hud/server/__init__.py
ADDED
hud/server/requests.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HTTP request utilities for the HUD API.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RequestError(Exception):
|
|
13
|
+
"""
|
|
14
|
+
Custom exception for API request errors.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def make_request(
|
|
19
|
+
method: str, url: str, json: Any | None = None, api_key: str | None = None
|
|
20
|
+
) -> dict[str, Any]:
|
|
21
|
+
"""
|
|
22
|
+
Make an asynchronous HTTP request to the HUD API.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
method: HTTP method (GET, POST, etc.)
|
|
26
|
+
url: Full URL for the request
|
|
27
|
+
json: Optional JSON serializable data
|
|
28
|
+
api_key: API key for authentication
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
dict: JSON response from the server
|
|
32
|
+
|
|
33
|
+
Raises:
|
|
34
|
+
RequestError: If API key is missing or request fails
|
|
35
|
+
"""
|
|
36
|
+
if not api_key:
|
|
37
|
+
raise RequestError("API key is required but not provided")
|
|
38
|
+
|
|
39
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
40
|
+
|
|
41
|
+
async with httpx.AsyncClient(timeout=240.0) as client:
|
|
42
|
+
try:
|
|
43
|
+
response = await client.request(method=method, url=url, json=json, headers=headers)
|
|
44
|
+
response.raise_for_status()
|
|
45
|
+
return response.json()
|
|
46
|
+
except httpx.HTTPError as e:
|
|
47
|
+
raise RequestError(f"Request failed: {e!s}") from None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def make_sync_request(
|
|
51
|
+
method: str, url: str, json: Any | None = None, api_key: str | None = None
|
|
52
|
+
) -> dict[str, Any]:
|
|
53
|
+
"""
|
|
54
|
+
Make a synchronous HTTP request to the HUD API.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
method: HTTP method (GET, POST, etc.)
|
|
58
|
+
url: Full URL for the request
|
|
59
|
+
json: Optional JSON serializable data
|
|
60
|
+
api_key: API key for authentication
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
dict: JSON response from the server
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
RequestError: If API key is missing or request fails
|
|
67
|
+
"""
|
|
68
|
+
if not api_key:
|
|
69
|
+
raise RequestError("API key is required but not provided")
|
|
70
|
+
|
|
71
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
72
|
+
|
|
73
|
+
with httpx.Client(timeout=240.0) as client:
|
|
74
|
+
try:
|
|
75
|
+
response = client.request(method=method, url=url, json=json, headers=headers)
|
|
76
|
+
response.raise_for_status()
|
|
77
|
+
return response.json()
|
|
78
|
+
except httpx.HTTPError as e:
|
|
79
|
+
raise RequestError(f"Request failed: {e!s}") from None
|
hud/settings.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Settings(BaseSettings):
|
|
8
|
+
"""
|
|
9
|
+
Global settings for the HUD SDK.
|
|
10
|
+
|
|
11
|
+
This class manages configuration values loaded from environment variables
|
|
12
|
+
and provides global access to settings throughout the application.
|
|
13
|
+
"""
|
|
14
|
+
model_config = SettingsConfigDict(
|
|
15
|
+
env_file=".env",
|
|
16
|
+
env_file_encoding="utf-8",
|
|
17
|
+
extra="allow"
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
base_url: str = Field(
|
|
21
|
+
default="https://orchestrator.hud.live/hud-gym/api/v1",
|
|
22
|
+
description="Base URL for the HUD API",
|
|
23
|
+
validation_alias="base_url"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
api_key: str | None = Field(
|
|
27
|
+
default=None,
|
|
28
|
+
description="API key for authentication with the HUD API",
|
|
29
|
+
validation_alias="HUD_API_KEY"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Create a singleton instance
|
|
34
|
+
settings = Settings()
|
|
35
|
+
|
|
36
|
+
# Add utility functions for backwards compatibility
|
|
37
|
+
def get_settings() -> Settings:
|
|
38
|
+
"""Get the global settings instance."""
|
|
39
|
+
return settings
|
hud/utils/__init__.py
ADDED
hud/utils/config.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hud-python
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: SDK for the HUD evaluation platform.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Human-Data/hud-sdk
|
|
6
|
+
Project-URL: Bug Tracker, https://github.com/Human-Data/hud-sdk/issues
|
|
7
|
+
Project-URL: Documentation, https://hud.so
|
|
8
|
+
Author-email: Human Union Data SDK <founders@hud.so>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2025 Human Data Company
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Classifier: Development Status :: 4 - Beta
|
|
32
|
+
Classifier: Intended Audience :: Developers
|
|
33
|
+
Classifier: Programming Language :: Python :: 3
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
39
|
+
Requires-Python: <3.14,>=3.9
|
|
40
|
+
Requires-Dist: eval-type-backport>=0.2.2
|
|
41
|
+
Requires-Dist: httpx<1,>=0.23.0
|
|
42
|
+
Requires-Dist: pillow<12,>=11
|
|
43
|
+
Requires-Dist: pydantic-settings<3,>=2
|
|
44
|
+
Requires-Dist: pydantic<3,>=2
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Requires-Dist: anthropic; extra == 'dev'
|
|
47
|
+
Requires-Dist: ipykernel; extra == 'dev'
|
|
48
|
+
Requires-Dist: ipython<9; extra == 'dev'
|
|
49
|
+
Requires-Dist: jupyter-client; extra == 'dev'
|
|
50
|
+
Requires-Dist: jupyter-core; extra == 'dev'
|
|
51
|
+
Requires-Dist: openai; extra == 'dev'
|
|
52
|
+
Requires-Dist: pyright==1.1.364; extra == 'dev'
|
|
53
|
+
Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
|
|
54
|
+
Requires-Dist: ruff==0.9.8; extra == 'dev'
|
|
55
|
+
Description-Content-Type: text/markdown
|
|
56
|
+
|
|
57
|
+
# HUD SDK (Alpha Release)
|
|
58
|
+
|
|
59
|
+
A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models.
|
|
60
|
+
|
|
61
|
+
Visit [hud.so](https://hud.so) for more information about HUD.
|
|
62
|
+
|
|
63
|
+
> **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is still evolving and may change in future releases as we gather feedback and improve functionality.
|
|
64
|
+
|
|
65
|
+
[](https://pypi.org/project/hud-python/)
|
|
66
|
+
|
|
67
|
+
[📚 Documentation](https://docs.hud.so) | [🏠 Homepage](https://hud.so)
|
|
68
|
+
|
|
69
|
+
## Quick Start
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
# Install the latest stable release
|
|
73
|
+
pip install hud-python
|
|
74
|
+
|
|
75
|
+
# Install the latest alpha release (may include breaking changes)
|
|
76
|
+
pip install --pre hud-python
|
|
77
|
+
|
|
78
|
+
# Install a specific alpha version
|
|
79
|
+
pip install hud-python==0.1.0-alpha
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
import asyncio
|
|
84
|
+
from hud import HUDClient
|
|
85
|
+
|
|
86
|
+
async def main():
|
|
87
|
+
# Initialize client with API key
|
|
88
|
+
client = HUDClient(api_key="your-api-key")
|
|
89
|
+
|
|
90
|
+
# Load a gym and evaluation set
|
|
91
|
+
gym = await client.load_gym(id="OSWorld-Ubuntu")
|
|
92
|
+
evalset = await client.load_evalset(id="OSWorld-Ubuntu")
|
|
93
|
+
|
|
94
|
+
# Create a run and environment
|
|
95
|
+
run = client.create_run(name="example-run", gym=gym, evalset=evalset)
|
|
96
|
+
env = await run.make(metadata={"agent_id": "example"})
|
|
97
|
+
|
|
98
|
+
# Agent loop goes here
|
|
99
|
+
# For complete examples and usage guides, see our documentation
|
|
100
|
+
|
|
101
|
+
# Close the environment when done
|
|
102
|
+
await env.close()
|
|
103
|
+
|
|
104
|
+
if __name__ == "__main__":
|
|
105
|
+
asyncio.run(main())
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Key Features
|
|
109
|
+
|
|
110
|
+
- Connect to HUD evaluation environments
|
|
111
|
+
- Run benchmarks across various tasks
|
|
112
|
+
- Support for different agent adapters
|
|
113
|
+
- Asynchronous API for efficient interaction
|
|
114
|
+
|
|
115
|
+
## Documentation
|
|
116
|
+
|
|
117
|
+
For comprehensive guides, examples, and API reference, visit:
|
|
118
|
+
- [Getting Started](https://docs.hud.so/introduction)
|
|
119
|
+
- [Installation](https://docs.hud.so/installation)
|
|
120
|
+
- [API Reference](https://docs.hud.so/api-reference)
|
|
121
|
+
- [Examples](https://docs.hud.so/examples)
|
|
122
|
+
|
|
123
|
+
## License
|
|
124
|
+
|
|
125
|
+
[MIT License](LICENSE)
|