hud-python 0.1.0__py3-none-any.whl → 0.1.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/__init__.py CHANGED
@@ -5,14 +5,14 @@ HUD Gym SDK - A Python SDK for interacting with HUD environments.
5
5
  from __future__ import annotations
6
6
 
7
7
  from hud.client import HUDClient
8
- from hud.env import Env, EvalSet, Observation, TaskResult
8
+ from hud.environment import Environment, EvalSet, Observation, TaskResult
9
9
  from hud.gym import Gym
10
10
  from hud.run import Run
11
11
 
12
- __version__ = "0.1.0"
12
+ __version__ = "0.1.0b2"
13
13
 
14
14
  __all__ = [
15
- "Env",
15
+ "Environment",
16
16
  "EvalSet",
17
17
  "Gym",
18
18
  "HUDClient",
hud/client.py CHANGED
@@ -8,7 +8,7 @@ import json
8
8
  from typing import Any
9
9
 
10
10
  from .adapters.common import Adapter
11
- from .env import EvalSet
11
+ from .environment import EvalSet
12
12
  from .gym import Gym
13
13
  from .run import Run, RunResponse
14
14
  from .server import make_request, make_sync_request
@@ -23,15 +23,15 @@ class HUDClient:
23
23
  evalsets, and create runs.
24
24
  """
25
25
 
26
- def __init__(self, api_key: str) -> None:
26
+ def __init__(self, api_key: str | None = None) -> None:
27
27
  """
28
28
  Initialize the HUD client with an API key.
29
29
 
30
30
  Args:
31
31
  api_key: API key for authentication with the HUD API
32
32
  """
33
- self.api_key = api_key
34
- settings.api_key = api_key # Set global config
33
+ self.api_key = api_key or settings.api_key
34
+ settings.api_key = self.api_key
35
35
 
36
36
  async def load_gym(self, id: str) -> Gym:
37
37
  """
@@ -182,3 +182,18 @@ class HUDClient:
182
182
  config=config,
183
183
  metadata=metadata,
184
184
  )
185
+
186
+ def display_stream(self, live_url: str) -> None:
187
+ """
188
+ Display a stream in the HUD system.
189
+ """
190
+ from IPython.display import HTML, display
191
+ html_content = f"""
192
+ <div style="width: 960px; height: 540px; overflow: hidden;">
193
+ <div style="transform: scale(0.5); transform-origin: top left;">
194
+ <iframe src="{live_url}" width="1920" height="1080" style="border: 1px solid #ddd;">
195
+ </iframe>
196
+ </div>
197
+ </div>
198
+ """
199
+ display(HTML(html_content))
@@ -1,5 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
4
+ import enum
5
+ import logging
3
6
  from typing import TYPE_CHECKING, Any
4
7
 
5
8
  from pydantic import BaseModel
@@ -10,6 +13,7 @@ from hud.settings import settings
10
13
  if TYPE_CHECKING:
11
14
  from .adapters.common import Adapter
12
15
 
16
+ logger = logging.getLogger("hud.environment")
13
17
 
14
18
  class Observation(BaseModel):
15
19
  """
@@ -38,8 +42,29 @@ class TaskResult(BaseModel):
38
42
  terminated: bool
39
43
  info: dict[str, Any]
40
44
 
45
+ class EnvironmentStatus(str, enum.Enum):
46
+ """
47
+ Status of the environment.
48
+
49
+ Attributes:
50
+ INITIALIZING: The environment is initializing
51
+ RUNNING: The environment is running
52
+ COMPLETED: The environment is completed
53
+ ERROR: The environment is in an error state
54
+ """
55
+ INITIALIZING = "initializing"
56
+ RUNNING = "running"
57
+ COMPLETED = "completed"
58
+ ERROR = "error"
59
+
60
+
61
+ status_messages = {
62
+ EnvironmentStatus.RUNNING.value: "is running",
63
+ EnvironmentStatus.ERROR.value: "had an error initializing",
64
+ EnvironmentStatus.COMPLETED.value: "completed",
65
+ }
41
66
 
42
- class Env:
67
+ class Environment:
43
68
  """
44
69
  Environment interface for agent interactions.
45
70
 
@@ -192,7 +217,9 @@ class Env:
192
217
  api_key=settings.api_key,
193
218
  )
194
219
 
195
- async def reset(self, task_id: str, metadata: dict[str, Any] | None = None) -> Observation:
220
+ async def reset(
221
+ self, task_id: str, metadata: dict[str, Any] | None = None
222
+ ) -> Observation:
196
223
  """
197
224
  Reset the environment to the task.
198
225
 
@@ -213,6 +240,18 @@ class Env:
213
240
  )
214
241
  return Observation(**data["observation"])
215
242
 
243
+ async def wait_for_ready(self) -> None:
244
+ """Wait for the environment to be ready"""
245
+ while True:
246
+ state = await self.get_env_state()
247
+ if state in (
248
+ EnvironmentStatus.RUNNING.value,
249
+ EnvironmentStatus.ERROR.value,
250
+ EnvironmentStatus.COMPLETED.value,
251
+ ):
252
+ logger.info("Environment %s %s", self.id, status_messages.get(state))
253
+ break
254
+ await asyncio.sleep(10)
216
255
 
217
256
  class EvalSet:
218
257
  """
hud/run.py CHANGED
@@ -1,16 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import datetime
3
4
  from typing import TYPE_CHECKING, Any
4
5
 
5
6
  from pydantic import BaseModel, Field
6
7
 
7
8
  from .adapters.common import Adapter
8
- from .env import Env, EvalSet
9
+ from .environment import Environment, EvalSet
9
10
  from .server import make_request
10
11
  from .settings import settings
11
12
 
12
13
  if TYPE_CHECKING:
13
- from datetime import datetime
14
+ import datetime
14
15
 
15
16
  from .gym import Gym
16
17
 
@@ -61,11 +62,63 @@ class RunAnalyticsResponse(BaseModel):
61
62
  total_tasks: int
62
63
  completed_tasks: int
63
64
  running_time: float | None = None # runtime in seconds if available
64
- created_at: datetime
65
+ created_at: datetime.datetime
65
66
  raw_data: dict[str, list[dict[str, Any]]] = Field(
66
67
  default_factory=lambda: {"tasks": [], "environments": []}
67
68
  )
68
69
 
70
+ def __str__(self) -> str:
71
+ return self.visualize()
72
+
73
+ def visualize(self) -> str:
74
+ """
75
+ Generate an ASCII bar chart visualization of run analytics.
76
+
77
+ Args:
78
+ data: The run analytics data to visualize
79
+
80
+ Returns:
81
+ A string containing an ASCII visualization
82
+ """
83
+ max_width = 50
84
+
85
+ completion_rate = (
86
+ self.completion_rate if self.completion_rate is not None else 0
87
+ )
88
+
89
+ result = [
90
+ f"Run: {self.name} (ID: {self.id})",
91
+ f"Created: {self.created_at.strftime('%Y-%m-%d %H:%M:%S')}",
92
+ "-" * 60,
93
+ f"""Progress: {self.completed_tasks}/{self.total_tasks} tasks completed (
94
+ {completion_rate:.1f}% completion rate)""",
95
+ "",
96
+ ]
97
+
98
+ result.append("Status Distribution:")
99
+ total = sum(self.status_counts.values())
100
+ for status, count in self.status_counts.items():
101
+ percentage = (count / total) * 100
102
+ bar_length = int((count / total) * max_width)
103
+ bar = "█" * bar_length
104
+ result.append(f"{status.ljust(10)}: {bar} {count} ({percentage:.1f}%)")
105
+
106
+ if self.avg_score is not None:
107
+ result.append("")
108
+ result.append(f"Average Score: {self.avg_score:.2f}")
109
+
110
+ score_bar_length = int((self.avg_score / 100) * max_width)
111
+ score_bar = "█" * score_bar_length
112
+ result.append(f"Score: {score_bar} {self.avg_score:.2f}/1.00")
113
+
114
+ if self.running_time is not None:
115
+ hours, remainder = divmod(self.running_time, 3600)
116
+ minutes, seconds = divmod(remainder, 60)
117
+ runtime_str = f"{int(hours)}h {int(minutes)}m {int(seconds)}s"
118
+ result.append(f"Total Runtime: {runtime_str}")
119
+
120
+ return "\n".join(result)
121
+
69
122
 
70
123
  class Run:
71
124
  """
@@ -109,7 +162,7 @@ class Run:
109
162
  self.adapter = adapter
110
163
  self.config = config
111
164
  self.metadata = metadata
112
- self.envs: list[Env] = []
165
+ self.environments: list[Environment] = []
113
166
 
114
167
  async def fetch_task_ids(self) -> list[str]:
115
168
  """
@@ -120,7 +173,7 @@ class Run:
120
173
  """
121
174
  return await self.evalset.fetch_tasks()
122
175
 
123
- async def make(self, metadata: dict[str, Any]) -> Env:
176
+ async def make(self, metadata: dict[str, Any] | None = None) -> Environment:
124
177
  """
125
178
  Create a new environment for this run.
126
179
 
@@ -128,17 +181,17 @@ class Run:
128
181
  metadata: Metadata for the environment
129
182
 
130
183
  Returns:
131
- Env: The created environment
184
+ Environment: The created environment
132
185
  """
133
186
  # Make the env class
134
- env = Env(
187
+ env = Environment(
135
188
  run_id=self.id,
136
189
  config=self.config,
137
190
  adapter=self.adapter,
138
- metadata=metadata,
191
+ metadata=metadata or {},
139
192
  )
140
193
  await env.create_environment()
141
- self.envs.append(env)
194
+ self.environments.append(env)
142
195
  return env
143
196
 
144
197
  async def get_analytics(self) -> RunAnalyticsResponse:
hud/server/requests.py CHANGED
@@ -4,16 +4,87 @@ HTTP request utilities for the HUD API.
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
+ import logging
7
8
  from typing import Any
8
9
 
9
10
  import httpx
10
11
 
12
+ logger = logging.getLogger("hud.http")
11
13
 
12
14
  class RequestError(Exception):
13
- """
14
- Custom exception for API request errors.
15
- """
15
+ """Custom exception for API request errors"""
16
+ def __init__(
17
+ self,
18
+ message: str,
19
+ status_code: int | None = None,
20
+ response_text: str | None = None,
21
+ response_json: dict[str, Any] | None = None,
22
+ response_headers: dict[str, str] | None = None
23
+ ) -> None:
24
+ self.message = message
25
+ self.status_code = status_code
26
+ self.response_text = response_text
27
+ self.response_json = response_json
28
+ self.response_headers = response_headers
29
+ super().__init__(message)
30
+
31
+ def __str__(self) -> str:
32
+ parts = [self.message]
33
+
34
+ if self.status_code:
35
+ parts.append(f"Status: {self.status_code}")
36
+ if self.response_text:
37
+ parts.append(f"Response Text: {self.response_text}")
38
+ if self.response_json:
39
+ parts.append(f"Response JSON: {self.response_json}")
40
+ if self.response_headers:
41
+ parts.append(f"Headers: {self.response_headers}")
42
+
43
+ return " | ".join(parts)
44
+
45
+ @classmethod
46
+ def from_http_error(cls, error: httpx.HTTPStatusError) -> RequestError:
47
+ """Create a RequestError from an HTTP error response"""
48
+ response = error.response
49
+ status_code = response.status_code
50
+ response_text = response.text
51
+ response_headers = dict(response.headers)
52
+
53
+ # Try to get detailed error info from JSON if available
54
+ response_json = None
55
+ try:
56
+ response_json = response.json()
57
+ detail = response_json.get("detail")
58
+ if detail:
59
+ message = f"Request failed: {detail}"
60
+ else:
61
+ # If no detail field but we have JSON, include a summary
62
+ message = f"Request failed with status {status_code}"
63
+ if (
64
+ len(response_json) <= 5
65
+ ): # If it's a small object, include it in the message
66
+ message += f" - JSON response: {response_json}"
67
+ except Exception:
68
+ # Fallback to simple message if JSON parsing fails
69
+ message = f"Request failed with status {status_code}"
70
+
71
+ # Log the error details
72
+ logger.error(
73
+ "HTTP error from HUD SDK: %s | URL: %s | Status: %s | Response: %s%s",
74
+ message,
75
+ response.url,
76
+ status_code,
77
+ response_text[:500],
78
+ "..." if len(response_text) > 500 else ""
79
+ )
16
80
 
81
+ return cls(
82
+ message=message,
83
+ status_code=status_code,
84
+ response_text=response_text,
85
+ response_json=response_json,
86
+ response_headers=response_headers,
87
+ )
17
88
 
18
89
  async def make_request(
19
90
  method: str, url: str, json: Any | None = None, api_key: str | None = None
@@ -40,11 +111,19 @@ async def make_request(
40
111
 
41
112
  async with httpx.AsyncClient(timeout=240.0) as client:
42
113
  try:
43
- response = await client.request(method=method, url=url, json=json, headers=headers)
114
+ response = await client.request(
115
+ method=method, url=url, json=json, headers=headers
116
+ )
44
117
  response.raise_for_status()
45
- return response.json()
46
- except httpx.HTTPError as e:
47
- raise RequestError(f"Request failed: {e!s}") from None
118
+ result = response.json()
119
+ return result
120
+ except httpx.HTTPStatusError as e:
121
+ raise RequestError.from_http_error(e) from None
122
+ except httpx.RequestError as e:
123
+ raise RequestError(f"Network error: {e!s}") from None
124
+ except Exception as e:
125
+ # Catch-all for unexpected errors
126
+ raise RequestError(f"Unexpected error: {e!s}") from None
48
127
 
49
128
 
50
129
  def make_sync_request(
@@ -72,8 +151,16 @@ def make_sync_request(
72
151
 
73
152
  with httpx.Client(timeout=240.0) as client:
74
153
  try:
75
- response = client.request(method=method, url=url, json=json, headers=headers)
154
+ response = client.request(
155
+ method=method, url=url, json=json, headers=headers
156
+ )
76
157
  response.raise_for_status()
77
- return response.json()
78
- except httpx.HTTPError as e:
79
- raise RequestError(f"Request failed: {e!s}") from None
158
+ result = response.json()
159
+ return result
160
+ except httpx.HTTPStatusError as e:
161
+ raise RequestError.from_http_error(e) from None
162
+ except httpx.RequestError as e:
163
+ raise RequestError(f"Network error: {e!s}") from None
164
+ except Exception as e:
165
+ # Catch-all for unexpected errors
166
+ raise RequestError(f"Unexpected error: {e!s}") from None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.1.0
3
+ Version: 0.1.0b2
4
4
  Summary: SDK for the HUD evaluation platform.
5
5
  Project-URL: Homepage, https://github.com/Human-Data/hud-sdk
6
6
  Project-URL: Bug Tracker, https://github.com/Human-Data/hud-sdk/issues
@@ -44,6 +44,7 @@ Requires-Dist: pydantic-settings<3,>=2
44
44
  Requires-Dist: pydantic<3,>=2
45
45
  Provides-Extra: dev
46
46
  Requires-Dist: anthropic; extra == 'dev'
47
+ Requires-Dist: dotenv; extra == 'dev'
47
48
  Requires-Dist: ipykernel; extra == 'dev'
48
49
  Requires-Dist: ipython<9; extra == 'dev'
49
50
  Requires-Dist: jupyter-client; extra == 'dev'
@@ -54,38 +55,40 @@ Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
54
55
  Requires-Dist: ruff==0.9.8; extra == 'dev'
55
56
  Description-Content-Type: text/markdown
56
57
 
57
- # HUD SDK (Alpha Release)
58
+ # HUD
58
59
 
59
- A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models.
60
+ A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models. Visit [hud.so](https://hud.so).
60
61
 
61
- Visit [hud.so](https://hud.so) for more information about HUD.
62
-
63
- > **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is still evolving and may change in future releases as we gather feedback and improve functionality.
62
+ > **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is evolving and may change in future releases as we gather feedback and improve functionality.
64
63
 
65
64
  [![PyPI version](https://img.shields.io/pypi/v/hud-python)](https://pypi.org/project/hud-python/)
66
65
 
67
- [📚 Documentation](https://docs.hud.so) | [🏠 Homepage](https://hud.so)
66
+ [📚 Documentation](https://documentation.hud.so) | [🏠 Homepage](https://hud.so)
67
+
68
+
69
+ ## Quick start
68
70
 
69
- ## Quick Start
71
+ [RECOMMENDED] To set get started with an agent, see the [Claude Computer use example](https://github.com/Human-Data/hud-sdk/tree/main/examples).
70
72
 
73
+
74
+ Otherwise, install the package with Python>=3.9:
71
75
  ```bash
72
- # Install the latest stable release
73
76
  pip install hud-python
77
+ ```
74
78
 
75
- # Install the latest alpha release (may include breaking changes)
76
- pip install --pre hud-python
77
-
78
- # Install a specific alpha version
79
- pip install hud-python==0.1.0-alpha
79
+ Make sure to setup your account [here](https://hud.so/settings) and add your API key to the environment variables:
80
+ ```bash
81
+ HUD_API_KEY=<your-api-key>
80
82
  ```
81
83
 
84
+ Load in your agent and create a run! Go to the [examples](https://github.com/Human-Data/hud-sdk/tree/main/examples) folder for more examples.
82
85
  ```python
83
86
  import asyncio
84
87
  from hud import HUDClient
85
88
 
86
89
  async def main():
87
90
  # Initialize client with API key
88
- client = HUDClient(api_key="your-api-key")
91
+ client = HUDClient(api_key=os.getenv("HUD_API_KEY"))
89
92
 
90
93
  # Load a gym and evaluation set
91
94
  gym = await client.load_gym(id="OSWorld-Ubuntu")
@@ -93,24 +96,33 @@ async def main():
93
96
 
94
97
  # Create a run and environment
95
98
  run = client.create_run(name="example-run", gym=gym, evalset=evalset)
96
- env = await run.make(metadata={"agent_id": "example"})
99
+ env = await run.make(metadata={"agent_id": "OSWORLD-1"})
100
+ await env.wait_for_ready()
101
+
102
+ ###
103
+ ### Agent loop goes here, see example in /examples
104
+ ###
97
105
 
98
- # Agent loop goes here
99
- # For complete examples and usage guides, see our documentation
106
+ # Evaluate the environment
107
+ result = await env.evaluate()
100
108
 
101
109
  # Close the environment when done
102
110
  await env.close()
103
111
 
112
+ # Get analytics for the run such as rewards, task completions, etc.
113
+ analytics = await run.get_analytics()
114
+ print(analytics)
115
+
104
116
  if __name__ == "__main__":
105
117
  asyncio.run(main())
106
118
  ```
107
119
 
108
- ## Key Features
120
+ ## Features
109
121
 
110
122
  - Connect to HUD evaluation environments
111
123
  - Run benchmarks across various tasks
112
124
  - Support for different agent adapters
113
- - Asynchronous API for efficient interaction
125
+ - Asynchronous API
114
126
 
115
127
  ## Documentation
116
128
 
@@ -1,8 +1,8 @@
1
- hud/__init__.py,sha256=8o5QRfwUHXHoXvjzLM1HLZ3-DWpViMTumNexqVcXgRA,390
2
- hud/client.py,sha256=JNJGsuzBNDRShZ8OKSGeuABZZYpKwE_XC1lVbRMaUE8,5262
3
- hud/env.py,sha256=mgVbOpRX8ilG4BiIAN4ZFhzY5TAJBB9HX5LUh0FIcII,7383
1
+ hud/__init__.py,sha256=GmX-LujM2oZR6_tP_mOW09BY8HeK41lLF-P0sMW_1pY,416
2
+ hud/client.py,sha256=ztWPiAJyJUdJxdxGqDmsQnVK-_jccinWQUUXmq0OOmY,5843
3
+ hud/environment.py,sha256=R-t-21V0gveuHL6LlVLnBp0gYGm8tn5FbLcq_rRlH9g,8587
4
4
  hud/gym.py,sha256=dKmf0Ol0-XyLhji034pF_5dXnhW1IgIr-dJUg4KfslE,475
5
- hud/run.py,sha256=rxWtw1Pgm-KysxB2b_aqW4hVjBn5X0FkhD1489P3g8Y,4633
5
+ hud/run.py,sha256=_K7POPjJyqcJ_DVLAO7hRmvLUcg9gg2KrLHw_26DB9I,6570
6
6
  hud/settings.py,sha256=FbZHI1q6bDHe7Awl32SDPb-syqtkLI3C7gIIXuMXCiQ,1045
7
7
  hud/adapters/__init__.py,sha256=y3H7yMl7rC-rrXG2WvePdSojoNFSui02eYTH17Xd7OY,87
8
8
  hud/adapters/claude/__init__.py,sha256=GsMxaBL5ZuKV6-jJsLfw23n_Ml9e88SXIddYDGkIUKE,101
@@ -11,11 +11,11 @@ hud/adapters/common/__init__.py,sha256=BjdZWJVs_AKtpFrt-tNsdQRjnz7D97DFEQirJ-r0m
11
11
  hud/adapters/common/adapter.py,sha256=SCtOuRjW5Szzd45LXCaqDEaKr2lhA-nIqSEMJ9KLsKI,5799
12
12
  hud/adapters/common/types.py,sha256=LlWxH9sWucYgnIv6DKrgqToh3k7Bu-xdTxNFU4L8Xg8,1962
13
13
  hud/server/__init__.py,sha256=HeIXBGb-bxtq3xF20jP4IrOy77PlsqhClOf3bZ9wrwI,169
14
- hud/server/requests.py,sha256=kEMWt3k1DrvWa4iO1RyzD7PI0tEW29vkQzElAxMjHsQ,2240
14
+ hud/server/requests.py,sha256=M_pK1oCd4QjIE0yguD6iaybJ_mempOWDQYEpdOkophU,5522
15
15
  hud/utils/__init__.py,sha256=0m8klSLnMLeIJT23ipBXfFACk4hNWPsA6ZNqZDpv6oY,99
16
16
  hud/utils/config.py,sha256=dze0BGE4q14omjj9822kL9BeiIgWQvJyuU29A2wa1SE,193
17
17
  hud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- hud_python-0.1.0.dist-info/METADATA,sha256=F0D0V3taE7Bvtdzg_AgvaxWQrDppQm4pX_406SeQWXo,4663
19
- hud_python-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
20
- hud_python-0.1.0.dist-info/licenses/LICENSE,sha256=IVdfcZ8xq5apYGJS5GzRLLbm9r03Aecxd03isi-3P9k,1075
21
- hud_python-0.1.0.dist-info/RECORD,,
18
+ hud_python-0.1.0b2.dist-info/METADATA,sha256=5skHs5IfSJP4DQAGzpuo_yjO7l65XxHIPWGitHQt0Ug,5140
19
+ hud_python-0.1.0b2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
20
+ hud_python-0.1.0b2.dist-info/licenses/LICENSE,sha256=IVdfcZ8xq5apYGJS5GzRLLbm9r03Aecxd03isi-3P9k,1075
21
+ hud_python-0.1.0b2.dist-info/RECORD,,