hud-python 0.1.0__py3-none-any.whl → 0.1.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +3 -3
- hud/client.py +19 -4
- hud/{env.py → environment.py} +41 -2
- hud/run.py +62 -9
- hud/server/requests.py +98 -11
- {hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/METADATA +32 -20
- {hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/RECORD +9 -9
- {hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/WHEEL +0 -0
- {hud_python-0.1.0.dist-info → hud_python-0.1.0b2.dist-info}/licenses/LICENSE +0 -0
hud/__init__.py
CHANGED
|
@@ -5,14 +5,14 @@ HUD Gym SDK - A Python SDK for interacting with HUD environments.
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
7
|
from hud.client import HUDClient
|
|
8
|
-
from hud.
|
|
8
|
+
from hud.environment import Environment, EvalSet, Observation, TaskResult
|
|
9
9
|
from hud.gym import Gym
|
|
10
10
|
from hud.run import Run
|
|
11
11
|
|
|
12
|
-
__version__ = "0.1.
|
|
12
|
+
__version__ = "0.1.0b2"
|
|
13
13
|
|
|
14
14
|
__all__ = [
|
|
15
|
-
"
|
|
15
|
+
"Environment",
|
|
16
16
|
"EvalSet",
|
|
17
17
|
"Gym",
|
|
18
18
|
"HUDClient",
|
hud/client.py
CHANGED
|
@@ -8,7 +8,7 @@ import json
|
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
10
|
from .adapters.common import Adapter
|
|
11
|
-
from .
|
|
11
|
+
from .environment import EvalSet
|
|
12
12
|
from .gym import Gym
|
|
13
13
|
from .run import Run, RunResponse
|
|
14
14
|
from .server import make_request, make_sync_request
|
|
@@ -23,15 +23,15 @@ class HUDClient:
|
|
|
23
23
|
evalsets, and create runs.
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
|
-
def __init__(self, api_key: str) -> None:
|
|
26
|
+
def __init__(self, api_key: str | None = None) -> None:
|
|
27
27
|
"""
|
|
28
28
|
Initialize the HUD client with an API key.
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
31
|
api_key: API key for authentication with the HUD API
|
|
32
32
|
"""
|
|
33
|
-
self.api_key = api_key
|
|
34
|
-
settings.api_key = api_key
|
|
33
|
+
self.api_key = api_key or settings.api_key
|
|
34
|
+
settings.api_key = self.api_key
|
|
35
35
|
|
|
36
36
|
async def load_gym(self, id: str) -> Gym:
|
|
37
37
|
"""
|
|
@@ -182,3 +182,18 @@ class HUDClient:
|
|
|
182
182
|
config=config,
|
|
183
183
|
metadata=metadata,
|
|
184
184
|
)
|
|
185
|
+
|
|
186
|
+
def display_stream(self, live_url: str) -> None:
|
|
187
|
+
"""
|
|
188
|
+
Display a stream in the HUD system.
|
|
189
|
+
"""
|
|
190
|
+
from IPython.display import HTML, display
|
|
191
|
+
html_content = f"""
|
|
192
|
+
<div style="width: 960px; height: 540px; overflow: hidden;">
|
|
193
|
+
<div style="transform: scale(0.5); transform-origin: top left;">
|
|
194
|
+
<iframe src="{live_url}" width="1920" height="1080" style="border: 1px solid #ddd;">
|
|
195
|
+
</iframe>
|
|
196
|
+
</div>
|
|
197
|
+
</div>
|
|
198
|
+
"""
|
|
199
|
+
display(HTML(html_content))
|
hud/{env.py → environment.py}
RENAMED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
4
|
+
import enum
|
|
5
|
+
import logging
|
|
3
6
|
from typing import TYPE_CHECKING, Any
|
|
4
7
|
|
|
5
8
|
from pydantic import BaseModel
|
|
@@ -10,6 +13,7 @@ from hud.settings import settings
|
|
|
10
13
|
if TYPE_CHECKING:
|
|
11
14
|
from .adapters.common import Adapter
|
|
12
15
|
|
|
16
|
+
logger = logging.getLogger("hud.environment")
|
|
13
17
|
|
|
14
18
|
class Observation(BaseModel):
|
|
15
19
|
"""
|
|
@@ -38,8 +42,29 @@ class TaskResult(BaseModel):
|
|
|
38
42
|
terminated: bool
|
|
39
43
|
info: dict[str, Any]
|
|
40
44
|
|
|
45
|
+
class EnvironmentStatus(str, enum.Enum):
|
|
46
|
+
"""
|
|
47
|
+
Status of the environment.
|
|
48
|
+
|
|
49
|
+
Attributes:
|
|
50
|
+
INITIALIZING: The environment is initializing
|
|
51
|
+
RUNNING: The environment is running
|
|
52
|
+
COMPLETED: The environment is completed
|
|
53
|
+
ERROR: The environment is in an error state
|
|
54
|
+
"""
|
|
55
|
+
INITIALIZING = "initializing"
|
|
56
|
+
RUNNING = "running"
|
|
57
|
+
COMPLETED = "completed"
|
|
58
|
+
ERROR = "error"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
status_messages = {
|
|
62
|
+
EnvironmentStatus.RUNNING.value: "is running",
|
|
63
|
+
EnvironmentStatus.ERROR.value: "had an error initializing",
|
|
64
|
+
EnvironmentStatus.COMPLETED.value: "completed",
|
|
65
|
+
}
|
|
41
66
|
|
|
42
|
-
class
|
|
67
|
+
class Environment:
|
|
43
68
|
"""
|
|
44
69
|
Environment interface for agent interactions.
|
|
45
70
|
|
|
@@ -192,7 +217,9 @@ class Env:
|
|
|
192
217
|
api_key=settings.api_key,
|
|
193
218
|
)
|
|
194
219
|
|
|
195
|
-
async def reset(
|
|
220
|
+
async def reset(
|
|
221
|
+
self, task_id: str, metadata: dict[str, Any] | None = None
|
|
222
|
+
) -> Observation:
|
|
196
223
|
"""
|
|
197
224
|
Reset the environment to the task.
|
|
198
225
|
|
|
@@ -213,6 +240,18 @@ class Env:
|
|
|
213
240
|
)
|
|
214
241
|
return Observation(**data["observation"])
|
|
215
242
|
|
|
243
|
+
async def wait_for_ready(self) -> None:
|
|
244
|
+
"""Wait for the environment to be ready"""
|
|
245
|
+
while True:
|
|
246
|
+
state = await self.get_env_state()
|
|
247
|
+
if state in (
|
|
248
|
+
EnvironmentStatus.RUNNING.value,
|
|
249
|
+
EnvironmentStatus.ERROR.value,
|
|
250
|
+
EnvironmentStatus.COMPLETED.value,
|
|
251
|
+
):
|
|
252
|
+
logger.info("Environment %s %s", self.id, status_messages.get(state))
|
|
253
|
+
break
|
|
254
|
+
await asyncio.sleep(10)
|
|
216
255
|
|
|
217
256
|
class EvalSet:
|
|
218
257
|
"""
|
hud/run.py
CHANGED
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import datetime
|
|
3
4
|
from typing import TYPE_CHECKING, Any
|
|
4
5
|
|
|
5
6
|
from pydantic import BaseModel, Field
|
|
6
7
|
|
|
7
8
|
from .adapters.common import Adapter
|
|
8
|
-
from .
|
|
9
|
+
from .environment import Environment, EvalSet
|
|
9
10
|
from .server import make_request
|
|
10
11
|
from .settings import settings
|
|
11
12
|
|
|
12
13
|
if TYPE_CHECKING:
|
|
13
|
-
|
|
14
|
+
import datetime
|
|
14
15
|
|
|
15
16
|
from .gym import Gym
|
|
16
17
|
|
|
@@ -61,11 +62,63 @@ class RunAnalyticsResponse(BaseModel):
|
|
|
61
62
|
total_tasks: int
|
|
62
63
|
completed_tasks: int
|
|
63
64
|
running_time: float | None = None # runtime in seconds if available
|
|
64
|
-
created_at: datetime
|
|
65
|
+
created_at: datetime.datetime
|
|
65
66
|
raw_data: dict[str, list[dict[str, Any]]] = Field(
|
|
66
67
|
default_factory=lambda: {"tasks": [], "environments": []}
|
|
67
68
|
)
|
|
68
69
|
|
|
70
|
+
def __str__(self) -> str:
|
|
71
|
+
return self.visualize()
|
|
72
|
+
|
|
73
|
+
def visualize(self) -> str:
|
|
74
|
+
"""
|
|
75
|
+
Generate an ASCII bar chart visualization of run analytics.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
data: The run analytics data to visualize
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
A string containing an ASCII visualization
|
|
82
|
+
"""
|
|
83
|
+
max_width = 50
|
|
84
|
+
|
|
85
|
+
completion_rate = (
|
|
86
|
+
self.completion_rate if self.completion_rate is not None else 0
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
result = [
|
|
90
|
+
f"Run: {self.name} (ID: {self.id})",
|
|
91
|
+
f"Created: {self.created_at.strftime('%Y-%m-%d %H:%M:%S')}",
|
|
92
|
+
"-" * 60,
|
|
93
|
+
f"""Progress: {self.completed_tasks}/{self.total_tasks} tasks completed (
|
|
94
|
+
{completion_rate:.1f}% completion rate)""",
|
|
95
|
+
"",
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
result.append("Status Distribution:")
|
|
99
|
+
total = sum(self.status_counts.values())
|
|
100
|
+
for status, count in self.status_counts.items():
|
|
101
|
+
percentage = (count / total) * 100
|
|
102
|
+
bar_length = int((count / total) * max_width)
|
|
103
|
+
bar = "█" * bar_length
|
|
104
|
+
result.append(f"{status.ljust(10)}: {bar} {count} ({percentage:.1f}%)")
|
|
105
|
+
|
|
106
|
+
if self.avg_score is not None:
|
|
107
|
+
result.append("")
|
|
108
|
+
result.append(f"Average Score: {self.avg_score:.2f}")
|
|
109
|
+
|
|
110
|
+
score_bar_length = int((self.avg_score / 100) * max_width)
|
|
111
|
+
score_bar = "█" * score_bar_length
|
|
112
|
+
result.append(f"Score: {score_bar} {self.avg_score:.2f}/1.00")
|
|
113
|
+
|
|
114
|
+
if self.running_time is not None:
|
|
115
|
+
hours, remainder = divmod(self.running_time, 3600)
|
|
116
|
+
minutes, seconds = divmod(remainder, 60)
|
|
117
|
+
runtime_str = f"{int(hours)}h {int(minutes)}m {int(seconds)}s"
|
|
118
|
+
result.append(f"Total Runtime: {runtime_str}")
|
|
119
|
+
|
|
120
|
+
return "\n".join(result)
|
|
121
|
+
|
|
69
122
|
|
|
70
123
|
class Run:
|
|
71
124
|
"""
|
|
@@ -109,7 +162,7 @@ class Run:
|
|
|
109
162
|
self.adapter = adapter
|
|
110
163
|
self.config = config
|
|
111
164
|
self.metadata = metadata
|
|
112
|
-
self.
|
|
165
|
+
self.environments: list[Environment] = []
|
|
113
166
|
|
|
114
167
|
async def fetch_task_ids(self) -> list[str]:
|
|
115
168
|
"""
|
|
@@ -120,7 +173,7 @@ class Run:
|
|
|
120
173
|
"""
|
|
121
174
|
return await self.evalset.fetch_tasks()
|
|
122
175
|
|
|
123
|
-
async def make(self, metadata: dict[str, Any]) ->
|
|
176
|
+
async def make(self, metadata: dict[str, Any] | None = None) -> Environment:
|
|
124
177
|
"""
|
|
125
178
|
Create a new environment for this run.
|
|
126
179
|
|
|
@@ -128,17 +181,17 @@ class Run:
|
|
|
128
181
|
metadata: Metadata for the environment
|
|
129
182
|
|
|
130
183
|
Returns:
|
|
131
|
-
|
|
184
|
+
Environment: The created environment
|
|
132
185
|
"""
|
|
133
186
|
# Make the env class
|
|
134
|
-
env =
|
|
187
|
+
env = Environment(
|
|
135
188
|
run_id=self.id,
|
|
136
189
|
config=self.config,
|
|
137
190
|
adapter=self.adapter,
|
|
138
|
-
metadata=metadata,
|
|
191
|
+
metadata=metadata or {},
|
|
139
192
|
)
|
|
140
193
|
await env.create_environment()
|
|
141
|
-
self.
|
|
194
|
+
self.environments.append(env)
|
|
142
195
|
return env
|
|
143
196
|
|
|
144
197
|
async def get_analytics(self) -> RunAnalyticsResponse:
|
hud/server/requests.py
CHANGED
|
@@ -4,16 +4,87 @@ HTTP request utilities for the HUD API.
|
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
|
+
import logging
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
9
10
|
import httpx
|
|
10
11
|
|
|
12
|
+
logger = logging.getLogger("hud.http")
|
|
11
13
|
|
|
12
14
|
class RequestError(Exception):
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
"""Custom exception for API request errors"""
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
message: str,
|
|
19
|
+
status_code: int | None = None,
|
|
20
|
+
response_text: str | None = None,
|
|
21
|
+
response_json: dict[str, Any] | None = None,
|
|
22
|
+
response_headers: dict[str, str] | None = None
|
|
23
|
+
) -> None:
|
|
24
|
+
self.message = message
|
|
25
|
+
self.status_code = status_code
|
|
26
|
+
self.response_text = response_text
|
|
27
|
+
self.response_json = response_json
|
|
28
|
+
self.response_headers = response_headers
|
|
29
|
+
super().__init__(message)
|
|
30
|
+
|
|
31
|
+
def __str__(self) -> str:
|
|
32
|
+
parts = [self.message]
|
|
33
|
+
|
|
34
|
+
if self.status_code:
|
|
35
|
+
parts.append(f"Status: {self.status_code}")
|
|
36
|
+
if self.response_text:
|
|
37
|
+
parts.append(f"Response Text: {self.response_text}")
|
|
38
|
+
if self.response_json:
|
|
39
|
+
parts.append(f"Response JSON: {self.response_json}")
|
|
40
|
+
if self.response_headers:
|
|
41
|
+
parts.append(f"Headers: {self.response_headers}")
|
|
42
|
+
|
|
43
|
+
return " | ".join(parts)
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def from_http_error(cls, error: httpx.HTTPStatusError) -> RequestError:
|
|
47
|
+
"""Create a RequestError from an HTTP error response"""
|
|
48
|
+
response = error.response
|
|
49
|
+
status_code = response.status_code
|
|
50
|
+
response_text = response.text
|
|
51
|
+
response_headers = dict(response.headers)
|
|
52
|
+
|
|
53
|
+
# Try to get detailed error info from JSON if available
|
|
54
|
+
response_json = None
|
|
55
|
+
try:
|
|
56
|
+
response_json = response.json()
|
|
57
|
+
detail = response_json.get("detail")
|
|
58
|
+
if detail:
|
|
59
|
+
message = f"Request failed: {detail}"
|
|
60
|
+
else:
|
|
61
|
+
# If no detail field but we have JSON, include a summary
|
|
62
|
+
message = f"Request failed with status {status_code}"
|
|
63
|
+
if (
|
|
64
|
+
len(response_json) <= 5
|
|
65
|
+
): # If it's a small object, include it in the message
|
|
66
|
+
message += f" - JSON response: {response_json}"
|
|
67
|
+
except Exception:
|
|
68
|
+
# Fallback to simple message if JSON parsing fails
|
|
69
|
+
message = f"Request failed with status {status_code}"
|
|
70
|
+
|
|
71
|
+
# Log the error details
|
|
72
|
+
logger.error(
|
|
73
|
+
"HTTP error from HUD SDK: %s | URL: %s | Status: %s | Response: %s%s",
|
|
74
|
+
message,
|
|
75
|
+
response.url,
|
|
76
|
+
status_code,
|
|
77
|
+
response_text[:500],
|
|
78
|
+
"..." if len(response_text) > 500 else ""
|
|
79
|
+
)
|
|
16
80
|
|
|
81
|
+
return cls(
|
|
82
|
+
message=message,
|
|
83
|
+
status_code=status_code,
|
|
84
|
+
response_text=response_text,
|
|
85
|
+
response_json=response_json,
|
|
86
|
+
response_headers=response_headers,
|
|
87
|
+
)
|
|
17
88
|
|
|
18
89
|
async def make_request(
|
|
19
90
|
method: str, url: str, json: Any | None = None, api_key: str | None = None
|
|
@@ -40,11 +111,19 @@ async def make_request(
|
|
|
40
111
|
|
|
41
112
|
async with httpx.AsyncClient(timeout=240.0) as client:
|
|
42
113
|
try:
|
|
43
|
-
response = await client.request(
|
|
114
|
+
response = await client.request(
|
|
115
|
+
method=method, url=url, json=json, headers=headers
|
|
116
|
+
)
|
|
44
117
|
response.raise_for_status()
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
118
|
+
result = response.json()
|
|
119
|
+
return result
|
|
120
|
+
except httpx.HTTPStatusError as e:
|
|
121
|
+
raise RequestError.from_http_error(e) from None
|
|
122
|
+
except httpx.RequestError as e:
|
|
123
|
+
raise RequestError(f"Network error: {e!s}") from None
|
|
124
|
+
except Exception as e:
|
|
125
|
+
# Catch-all for unexpected errors
|
|
126
|
+
raise RequestError(f"Unexpected error: {e!s}") from None
|
|
48
127
|
|
|
49
128
|
|
|
50
129
|
def make_sync_request(
|
|
@@ -72,8 +151,16 @@ def make_sync_request(
|
|
|
72
151
|
|
|
73
152
|
with httpx.Client(timeout=240.0) as client:
|
|
74
153
|
try:
|
|
75
|
-
response = client.request(
|
|
154
|
+
response = client.request(
|
|
155
|
+
method=method, url=url, json=json, headers=headers
|
|
156
|
+
)
|
|
76
157
|
response.raise_for_status()
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
158
|
+
result = response.json()
|
|
159
|
+
return result
|
|
160
|
+
except httpx.HTTPStatusError as e:
|
|
161
|
+
raise RequestError.from_http_error(e) from None
|
|
162
|
+
except httpx.RequestError as e:
|
|
163
|
+
raise RequestError(f"Network error: {e!s}") from None
|
|
164
|
+
except Exception as e:
|
|
165
|
+
# Catch-all for unexpected errors
|
|
166
|
+
raise RequestError(f"Unexpected error: {e!s}") from None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hud-python
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.0b2
|
|
4
4
|
Summary: SDK for the HUD evaluation platform.
|
|
5
5
|
Project-URL: Homepage, https://github.com/Human-Data/hud-sdk
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/Human-Data/hud-sdk/issues
|
|
@@ -44,6 +44,7 @@ Requires-Dist: pydantic-settings<3,>=2
|
|
|
44
44
|
Requires-Dist: pydantic<3,>=2
|
|
45
45
|
Provides-Extra: dev
|
|
46
46
|
Requires-Dist: anthropic; extra == 'dev'
|
|
47
|
+
Requires-Dist: dotenv; extra == 'dev'
|
|
47
48
|
Requires-Dist: ipykernel; extra == 'dev'
|
|
48
49
|
Requires-Dist: ipython<9; extra == 'dev'
|
|
49
50
|
Requires-Dist: jupyter-client; extra == 'dev'
|
|
@@ -54,38 +55,40 @@ Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
|
|
|
54
55
|
Requires-Dist: ruff==0.9.8; extra == 'dev'
|
|
55
56
|
Description-Content-Type: text/markdown
|
|
56
57
|
|
|
57
|
-
# HUD
|
|
58
|
+
# HUD
|
|
58
59
|
|
|
59
|
-
A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models.
|
|
60
|
+
A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models. Visit [hud.so](https://hud.so).
|
|
60
61
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
> **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is still evolving and may change in future releases as we gather feedback and improve functionality.
|
|
62
|
+
> **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is evolving and may change in future releases as we gather feedback and improve functionality.
|
|
64
63
|
|
|
65
64
|
[](https://pypi.org/project/hud-python/)
|
|
66
65
|
|
|
67
|
-
[📚 Documentation](https://
|
|
66
|
+
[📚 Documentation](https://documentation.hud.so) | [🏠 Homepage](https://hud.so)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
## Quick start
|
|
68
70
|
|
|
69
|
-
|
|
71
|
+
[RECOMMENDED] To set get started with an agent, see the [Claude Computer use example](https://github.com/Human-Data/hud-sdk/tree/main/examples).
|
|
70
72
|
|
|
73
|
+
|
|
74
|
+
Otherwise, install the package with Python>=3.9:
|
|
71
75
|
```bash
|
|
72
|
-
# Install the latest stable release
|
|
73
76
|
pip install hud-python
|
|
77
|
+
```
|
|
74
78
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
# Install a specific alpha version
|
|
79
|
-
pip install hud-python==0.1.0-alpha
|
|
79
|
+
Make sure to setup your account [here](https://hud.so/settings) and add your API key to the environment variables:
|
|
80
|
+
```bash
|
|
81
|
+
HUD_API_KEY=<your-api-key>
|
|
80
82
|
```
|
|
81
83
|
|
|
84
|
+
Load in your agent and create a run! Go to the [examples](https://github.com/Human-Data/hud-sdk/tree/main/examples) folder for more examples.
|
|
82
85
|
```python
|
|
83
86
|
import asyncio
|
|
84
87
|
from hud import HUDClient
|
|
85
88
|
|
|
86
89
|
async def main():
|
|
87
90
|
# Initialize client with API key
|
|
88
|
-
client = HUDClient(api_key="
|
|
91
|
+
client = HUDClient(api_key=os.getenv("HUD_API_KEY"))
|
|
89
92
|
|
|
90
93
|
# Load a gym and evaluation set
|
|
91
94
|
gym = await client.load_gym(id="OSWorld-Ubuntu")
|
|
@@ -93,24 +96,33 @@ async def main():
|
|
|
93
96
|
|
|
94
97
|
# Create a run and environment
|
|
95
98
|
run = client.create_run(name="example-run", gym=gym, evalset=evalset)
|
|
96
|
-
env = await run.make(metadata={"agent_id": "
|
|
99
|
+
env = await run.make(metadata={"agent_id": "OSWORLD-1"})
|
|
100
|
+
await env.wait_for_ready()
|
|
101
|
+
|
|
102
|
+
###
|
|
103
|
+
### Agent loop goes here, see example in /examples
|
|
104
|
+
###
|
|
97
105
|
|
|
98
|
-
#
|
|
99
|
-
|
|
106
|
+
# Evaluate the environment
|
|
107
|
+
result = await env.evaluate()
|
|
100
108
|
|
|
101
109
|
# Close the environment when done
|
|
102
110
|
await env.close()
|
|
103
111
|
|
|
112
|
+
# Get analytics for the run such as rewards, task completions, etc.
|
|
113
|
+
analytics = await run.get_analytics()
|
|
114
|
+
print(analytics)
|
|
115
|
+
|
|
104
116
|
if __name__ == "__main__":
|
|
105
117
|
asyncio.run(main())
|
|
106
118
|
```
|
|
107
119
|
|
|
108
|
-
##
|
|
120
|
+
## Features
|
|
109
121
|
|
|
110
122
|
- Connect to HUD evaluation environments
|
|
111
123
|
- Run benchmarks across various tasks
|
|
112
124
|
- Support for different agent adapters
|
|
113
|
-
- Asynchronous API
|
|
125
|
+
- Asynchronous API
|
|
114
126
|
|
|
115
127
|
## Documentation
|
|
116
128
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
hud/__init__.py,sha256=
|
|
2
|
-
hud/client.py,sha256=
|
|
3
|
-
hud/
|
|
1
|
+
hud/__init__.py,sha256=GmX-LujM2oZR6_tP_mOW09BY8HeK41lLF-P0sMW_1pY,416
|
|
2
|
+
hud/client.py,sha256=ztWPiAJyJUdJxdxGqDmsQnVK-_jccinWQUUXmq0OOmY,5843
|
|
3
|
+
hud/environment.py,sha256=R-t-21V0gveuHL6LlVLnBp0gYGm8tn5FbLcq_rRlH9g,8587
|
|
4
4
|
hud/gym.py,sha256=dKmf0Ol0-XyLhji034pF_5dXnhW1IgIr-dJUg4KfslE,475
|
|
5
|
-
hud/run.py,sha256=
|
|
5
|
+
hud/run.py,sha256=_K7POPjJyqcJ_DVLAO7hRmvLUcg9gg2KrLHw_26DB9I,6570
|
|
6
6
|
hud/settings.py,sha256=FbZHI1q6bDHe7Awl32SDPb-syqtkLI3C7gIIXuMXCiQ,1045
|
|
7
7
|
hud/adapters/__init__.py,sha256=y3H7yMl7rC-rrXG2WvePdSojoNFSui02eYTH17Xd7OY,87
|
|
8
8
|
hud/adapters/claude/__init__.py,sha256=GsMxaBL5ZuKV6-jJsLfw23n_Ml9e88SXIddYDGkIUKE,101
|
|
@@ -11,11 +11,11 @@ hud/adapters/common/__init__.py,sha256=BjdZWJVs_AKtpFrt-tNsdQRjnz7D97DFEQirJ-r0m
|
|
|
11
11
|
hud/adapters/common/adapter.py,sha256=SCtOuRjW5Szzd45LXCaqDEaKr2lhA-nIqSEMJ9KLsKI,5799
|
|
12
12
|
hud/adapters/common/types.py,sha256=LlWxH9sWucYgnIv6DKrgqToh3k7Bu-xdTxNFU4L8Xg8,1962
|
|
13
13
|
hud/server/__init__.py,sha256=HeIXBGb-bxtq3xF20jP4IrOy77PlsqhClOf3bZ9wrwI,169
|
|
14
|
-
hud/server/requests.py,sha256=
|
|
14
|
+
hud/server/requests.py,sha256=M_pK1oCd4QjIE0yguD6iaybJ_mempOWDQYEpdOkophU,5522
|
|
15
15
|
hud/utils/__init__.py,sha256=0m8klSLnMLeIJT23ipBXfFACk4hNWPsA6ZNqZDpv6oY,99
|
|
16
16
|
hud/utils/config.py,sha256=dze0BGE4q14omjj9822kL9BeiIgWQvJyuU29A2wa1SE,193
|
|
17
17
|
hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
-
hud_python-0.1.
|
|
19
|
-
hud_python-0.1.
|
|
20
|
-
hud_python-0.1.
|
|
21
|
-
hud_python-0.1.
|
|
18
|
+
hud_python-0.1.0b2.dist-info/METADATA,sha256=5skHs5IfSJP4DQAGzpuo_yjO7l65XxHIPWGitHQt0Ug,5140
|
|
19
|
+
hud_python-0.1.0b2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
20
|
+
hud_python-0.1.0b2.dist-info/licenses/LICENSE,sha256=IVdfcZ8xq5apYGJS5GzRLLbm9r03Aecxd03isi-3P9k,1075
|
|
21
|
+
hud_python-0.1.0b2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|