hud-python 0.1.0__tar.gz → 0.1.0b2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (54) hide show
  1. {hud_python-0.1.0 → hud_python-0.1.0b2}/PKG-INFO +32 -20
  2. hud_python-0.1.0b2/README.md +80 -0
  3. {hud_python-0.1.0 → hud_python-0.1.0b2}/agent/base.py +4 -1
  4. {hud_python-0.1.0 → hud_python-0.1.0b2}/agent/claude.py +13 -13
  5. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/installation.mdx +1 -1
  6. hud_python-0.1.0b2/examples/README.md +44 -0
  7. hud_python-0.1.0b2/examples/claude_osworld.ipynb +154 -0
  8. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/__init__.py +3 -3
  9. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/client.py +19 -4
  10. hud_python-0.1.0/hud/env.py → hud_python-0.1.0b2/hud/environment.py +41 -2
  11. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/run.py +62 -9
  12. hud_python-0.1.0b2/hud/server/requests.py +166 -0
  13. {hud_python-0.1.0 → hud_python-0.1.0b2}/pyproject.toml +2 -1
  14. {hud_python-0.1.0 → hud_python-0.1.0b2}/tests/test_import.py +1 -1
  15. hud_python-0.1.0/README.md +0 -69
  16. hud_python-0.1.0/examples/README.md +0 -22
  17. hud_python-0.1.0/examples/basic_usage.py +0 -81
  18. hud_python-0.1.0/examples/claude_agent_example.py +0 -134
  19. hud_python-0.1.0/examples/simple_agent_example.py +0 -162
  20. hud_python-0.1.0/hud/server/requests.py +0 -79
  21. {hud_python-0.1.0 → hud_python-0.1.0b2}/.env.example +0 -0
  22. {hud_python-0.1.0 → hud_python-0.1.0b2}/.github/workflows/ci.yml +0 -0
  23. {hud_python-0.1.0 → hud_python-0.1.0b2}/.github/workflows/release.yml +0 -0
  24. {hud_python-0.1.0 → hud_python-0.1.0b2}/.gitignore +0 -0
  25. {hud_python-0.1.0 → hud_python-0.1.0b2}/LICENSE +0 -0
  26. {hud_python-0.1.0 → hud_python-0.1.0b2}/MANIFEST.in +0 -0
  27. {hud_python-0.1.0 → hud_python-0.1.0b2}/agent/response_agent.py +0 -0
  28. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/api-reference/adapters.mdx +0 -0
  29. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/api-reference/client.mdx +0 -0
  30. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/api-reference/env.mdx +0 -0
  31. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/concepts/adapter.mdx +0 -0
  32. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/concepts/client.mdx +0 -0
  33. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/concepts/environment.mdx +0 -0
  34. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/concepts/gym.mdx +0 -0
  35. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/examples/basic.mdx +0 -0
  36. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/examples/claude-agent.mdx +0 -0
  37. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/examples/custom-agent.mdx +0 -0
  38. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/introduction.mdx +0 -0
  39. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/logo/HUD.svg +0 -0
  40. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/mint.json +0 -0
  41. {hud_python-0.1.0 → hud_python-0.1.0b2}/docs/quickstart.mdx +0 -0
  42. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/adapters/__init__.py +0 -0
  43. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/adapters/claude/__init__.py +0 -0
  44. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/adapters/claude/adapter.py +0 -0
  45. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/adapters/common/__init__.py +0 -0
  46. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/adapters/common/adapter.py +0 -0
  47. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/adapters/common/types.py +0 -0
  48. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/gym.py +0 -0
  49. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/py.typed +0 -0
  50. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/server/__init__.py +0 -0
  51. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/settings.py +0 -0
  52. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/utils/__init__.py +0 -0
  53. {hud_python-0.1.0 → hud_python-0.1.0b2}/hud/utils/config.py +0 -0
  54. {hud_python-0.1.0 → hud_python-0.1.0b2}/tests/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hud-python
3
- Version: 0.1.0
3
+ Version: 0.1.0b2
4
4
  Summary: SDK for the HUD evaluation platform.
5
5
  Project-URL: Homepage, https://github.com/Human-Data/hud-sdk
6
6
  Project-URL: Bug Tracker, https://github.com/Human-Data/hud-sdk/issues
@@ -44,6 +44,7 @@ Requires-Dist: pydantic-settings<3,>=2
44
44
  Requires-Dist: pydantic<3,>=2
45
45
  Provides-Extra: dev
46
46
  Requires-Dist: anthropic; extra == 'dev'
47
+ Requires-Dist: dotenv; extra == 'dev'
47
48
  Requires-Dist: ipykernel; extra == 'dev'
48
49
  Requires-Dist: ipython<9; extra == 'dev'
49
50
  Requires-Dist: jupyter-client; extra == 'dev'
@@ -54,38 +55,40 @@ Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
54
55
  Requires-Dist: ruff==0.9.8; extra == 'dev'
55
56
  Description-Content-Type: text/markdown
56
57
 
57
- # HUD SDK (Alpha Release)
58
+ # HUD
58
59
 
59
- A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models.
60
+ A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models. Visit [hud.so](https://hud.so).
60
61
 
61
- Visit [hud.so](https://hud.so) for more information about HUD.
62
-
63
- > **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is still evolving and may change in future releases as we gather feedback and improve functionality.
62
+ > **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is evolving and may change in future releases as we gather feedback and improve functionality.
64
63
 
65
64
  [![PyPI version](https://img.shields.io/pypi/v/hud-python)](https://pypi.org/project/hud-python/)
66
65
 
67
- [📚 Documentation](https://docs.hud.so) | [🏠 Homepage](https://hud.so)
66
+ [📚 Documentation](https://documentation.hud.so) | [🏠 Homepage](https://hud.so)
67
+
68
+
69
+ ## Quick start
68
70
 
69
- ## Quick Start
71
+ [RECOMMENDED] To set get started with an agent, see the [Claude Computer use example](https://github.com/Human-Data/hud-sdk/tree/main/examples).
70
72
 
73
+
74
+ Otherwise, install the package with Python>=3.9:
71
75
  ```bash
72
- # Install the latest stable release
73
76
  pip install hud-python
77
+ ```
74
78
 
75
- # Install the latest alpha release (may include breaking changes)
76
- pip install --pre hud-python
77
-
78
- # Install a specific alpha version
79
- pip install hud-python==0.1.0-alpha
79
+ Make sure to setup your account [here](https://hud.so/settings) and add your API key to the environment variables:
80
+ ```bash
81
+ HUD_API_KEY=<your-api-key>
80
82
  ```
81
83
 
84
+ Load in your agent and create a run! Go to the [examples](https://github.com/Human-Data/hud-sdk/tree/main/examples) folder for more examples.
82
85
  ```python
83
86
  import asyncio
84
87
  from hud import HUDClient
85
88
 
86
89
  async def main():
87
90
  # Initialize client with API key
88
- client = HUDClient(api_key="your-api-key")
91
+ client = HUDClient(api_key=os.getenv("HUD_API_KEY"))
89
92
 
90
93
  # Load a gym and evaluation set
91
94
  gym = await client.load_gym(id="OSWorld-Ubuntu")
@@ -93,24 +96,33 @@ async def main():
93
96
 
94
97
  # Create a run and environment
95
98
  run = client.create_run(name="example-run", gym=gym, evalset=evalset)
96
- env = await run.make(metadata={"agent_id": "example"})
99
+ env = await run.make(metadata={"agent_id": "OSWORLD-1"})
100
+ await env.wait_for_ready()
101
+
102
+ ###
103
+ ### Agent loop goes here, see example in /examples
104
+ ###
97
105
 
98
- # Agent loop goes here
99
- # For complete examples and usage guides, see our documentation
106
+ # Evaluate the environment
107
+ result = await env.evaluate()
100
108
 
101
109
  # Close the environment when done
102
110
  await env.close()
103
111
 
112
+ # Get analytics for the run such as rewards, task completions, etc.
113
+ analytics = await run.get_analytics()
114
+ print(analytics)
115
+
104
116
  if __name__ == "__main__":
105
117
  asyncio.run(main())
106
118
  ```
107
119
 
108
- ## Key Features
120
+ ## Features
109
121
 
110
122
  - Connect to HUD evaluation environments
111
123
  - Run benchmarks across various tasks
112
124
  - Support for different agent adapters
113
- - Asynchronous API for efficient interaction
125
+ - Asynchronous API
114
126
 
115
127
  ## Documentation
116
128
 
@@ -0,0 +1,80 @@
1
+ # HUD
2
+
3
+ A Python SDK for interacting with HUD environments and evaluation benchmarks for browser use and computer use models. Visit [hud.so](https://hud.so).
4
+
5
+ > **Alpha Release Notice**: This SDK is currently in alpha status (v0.1.0-alpha). The API is evolving and may change in future releases as we gather feedback and improve functionality.
6
+
7
+ [![PyPI version](https://img.shields.io/pypi/v/hud-python)](https://pypi.org/project/hud-python/)
8
+
9
+ [📚 Documentation](https://documentation.hud.so) | [🏠 Homepage](https://hud.so)
10
+
11
+
12
+ ## Quick start
13
+
14
+ [RECOMMENDED] To set get started with an agent, see the [Claude Computer use example](https://github.com/Human-Data/hud-sdk/tree/main/examples).
15
+
16
+
17
+ Otherwise, install the package with Python>=3.9:
18
+ ```bash
19
+ pip install hud-python
20
+ ```
21
+
22
+ Make sure to setup your account [here](https://hud.so/settings) and add your API key to the environment variables:
23
+ ```bash
24
+ HUD_API_KEY=<your-api-key>
25
+ ```
26
+
27
+ Load in your agent and create a run! Go to the [examples](https://github.com/Human-Data/hud-sdk/tree/main/examples) folder for more examples.
28
+ ```python
29
+ import asyncio
30
+ from hud import HUDClient
31
+
32
+ async def main():
33
+ # Initialize client with API key
34
+ client = HUDClient(api_key=os.getenv("HUD_API_KEY"))
35
+
36
+ # Load a gym and evaluation set
37
+ gym = await client.load_gym(id="OSWorld-Ubuntu")
38
+ evalset = await client.load_evalset(id="OSWorld-Ubuntu")
39
+
40
+ # Create a run and environment
41
+ run = client.create_run(name="example-run", gym=gym, evalset=evalset)
42
+ env = await run.make(metadata={"agent_id": "OSWORLD-1"})
43
+ await env.wait_for_ready()
44
+
45
+ ###
46
+ ### Agent loop goes here, see example in /examples
47
+ ###
48
+
49
+ # Evaluate the environment
50
+ result = await env.evaluate()
51
+
52
+ # Close the environment when done
53
+ await env.close()
54
+
55
+ # Get analytics for the run such as rewards, task completions, etc.
56
+ analytics = await run.get_analytics()
57
+ print(analytics)
58
+
59
+ if __name__ == "__main__":
60
+ asyncio.run(main())
61
+ ```
62
+
63
+ ## Features
64
+
65
+ - Connect to HUD evaluation environments
66
+ - Run benchmarks across various tasks
67
+ - Support for different agent adapters
68
+ - Asynchronous API
69
+
70
+ ## Documentation
71
+
72
+ For comprehensive guides, examples, and API reference, visit:
73
+ - [Getting Started](https://docs.hud.so/introduction)
74
+ - [Installation](https://docs.hud.so/installation)
75
+ - [API Reference](https://docs.hud.so/api-reference)
76
+ - [Examples](https://docs.hud.so/examples)
77
+
78
+ ## License
79
+
80
+ [MIT License](LICENSE)
@@ -1,5 +1,8 @@
1
+ from typing import Any
2
+
1
3
  class Agent:
2
- def __init__(self):
4
+ def __init__(self, client: Any):
5
+ self.client = client
3
6
  self.messages = []
4
7
  self.responses = []
5
8
 
@@ -2,19 +2,18 @@ import os
2
2
  import json
3
3
  from agent.base import Agent
4
4
  from anthropic import Anthropic
5
+ from anthropic.types import Message
5
6
 
6
-
7
- class Claude(Agent):
8
- def __init__(self):
9
- super().__init__()
10
- self.client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
7
+ class ClaudeAgent(Agent):
8
+ def __init__(self, client: Anthropic):
9
+ super().__init__(client)
11
10
  self.model = "claude-3-7-sonnet-20250219"
12
11
  self.max_tokens = 4096
13
12
  self.tool_version = "20250124"
14
13
  self.thinking_budget = 1024
15
14
  self.conversation = [] # Store the full conversation history including Claude's responses
16
15
 
17
- async def predict(self, base64_image: str | None = None, input_text: str | None = None):
16
+ async def predict(self, base64_image: str | None = None, input_text: str | None = None) -> tuple[bool, str | object | None]:
18
17
  message = self._create_message(base64_image, input_text)
19
18
 
20
19
  # Only append the message if it's not empty
@@ -33,7 +32,10 @@ class Claude(Agent):
33
32
  self.conversation.append(assistant_message)
34
33
 
35
34
  self.responses.append(response)
36
- return response
35
+
36
+ done, processed = await self.process_response(response)
37
+
38
+ return done, processed
37
39
 
38
40
  def _create_message(self, base64_image: str | None = None, input_text: str | None = None):
39
41
  """Create appropriate message based on context and inputs"""
@@ -120,19 +122,17 @@ class Claude(Agent):
120
122
  except Exception as e:
121
123
  raise
122
124
 
123
- def process_response(self, response: dict) -> tuple[bool, str | None]:
125
+ async def process_response(self, response: Message) -> tuple[bool, str | object | None]:
124
126
  # Check if response contains a computer tool use
125
- has_computer_tool_use = False
126
127
  computer_action = None
127
- for block in response["content"]:
128
+ for block in response.content:
128
129
  if block.type == "tool_use" and block.name == "computer":
129
- has_computer_tool_use = True
130
130
  computer_action = block.input
131
131
  break
132
132
 
133
- if not has_computer_tool_use:
133
+ if response.content[-1].type == "text":
134
134
  # No computer tool use, treat as final response
135
- return True, str(response["content"][-1].text)
135
+ return True, str(response.content[-1].text)
136
136
 
137
137
  # If we have a computer action, adapt it to environment actions
138
138
  if computer_action:
@@ -15,7 +15,7 @@ pip install hud-python
15
15
  pip install --pre hud-python
16
16
 
17
17
  # Install a specific alpha version
18
- pip install hud-python==0.1.0-alpha
18
+ pip install hud-python==0.1.0
19
19
  ```
20
20
 
21
21
  ## Alpha Release Status
@@ -0,0 +1,44 @@
1
+ ## Claude Computer Use evaluation on OSWorld
2
+
3
+ ### 1. Setup
4
+
5
+ Step 1: Install from the source repository:
6
+
7
+ ```bash
8
+ # Clone the repository
9
+ git clone https://github.com/Human-Data/hud-sdk.git
10
+ cd hud-sdk
11
+ ```
12
+
13
+ Step 2: Create a virtual environment:
14
+ ```bash
15
+ # Option 1: using venv
16
+ python -m venv .venv
17
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
18
+
19
+ # Option 2: using uv (recommended)
20
+ uv venv
21
+ # Then activate according to your shell
22
+ ```
23
+
24
+ Step 3: Install in development mode with all dependencies:
25
+ ```bash
26
+ # Option 1: using pip
27
+ pip install -e ".[dev]"
28
+
29
+ # Option 2: using uv (recommended)
30
+ uv pip install -e ".[dev]"
31
+ ```
32
+
33
+ ### 2. Set up environment variables
34
+
35
+ ```bash
36
+ HUD_API_KEY=<your-api-key>
37
+ ANTHROPIC_API_KEY=<your-api-key>
38
+ ```
39
+
40
+ ### 3. Run the OSWorld example
41
+
42
+ Explore the [claude_osworld.ipynb](https://github.com/Human-Data/hud-sdk/blob/main/examples/claude_osworld.ipynb) notebook from this folder in Jupyter Notebook.
43
+
44
+
@@ -0,0 +1,154 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "from dotenv import load_dotenv\n",
11
+ "load_dotenv()\n",
12
+ "\n",
13
+ "from hud import HUDClient\n",
14
+ "from hud.adapters.claude.adapter import ClaudeAdapter\n",
15
+ "from agent.claude import ClaudeAgent\n",
16
+ "\n",
17
+ "from anthropic import Anthropic"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 2,
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "# initialize HUD client\n",
27
+ "client = HUDClient(api_key=os.getenv(\"HUD_API_KEY\"))\n",
28
+ "\n",
29
+ "# initalize Claude Computer Use agent\n",
30
+ "anthropic = Anthropic(api_key=os.getenv(\"ANTHROPIC_API_KEY\"))\n",
31
+ "agent = ClaudeAgent(anthropic)\n",
32
+ "\n",
33
+ "# initialize adapter to interact with the environment\n",
34
+ "cua_adapter = ClaudeAdapter()"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": null,
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": [
43
+ "# load OSWorld environment\n",
44
+ "gym = await client.load_gym(id=\"OSWorld-Ubuntu\")\n",
45
+ "\n",
46
+ "# load OSWorld evalset\n",
47
+ "evalset = await client.load_evalset(id=\"OSWorld-Ubuntu\")\n",
48
+ "\n",
49
+ "# create a run that will host all evaluations\n",
50
+ "run = client.create_run(name=\"Claude-test-OSWorld\", gym=gym, evalset=evalset)\n",
51
+ "\n",
52
+ "# fetch all task ids from the run\n",
53
+ "tasks = await run.fetch_task_ids()\n",
54
+ "print(f\"Total tasks in OSWorld: {len(tasks)}\")"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": null,
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "# it may take around 3 minutes to initialize the OSWorld platform and reset to a task\n",
64
+ "\n",
65
+ "# make a HUD environment\n",
66
+ "env = await run.make()\n",
67
+ "await env.wait_for_ready()\n",
68
+ "\n",
69
+ "# reset to a task with an observation (screenshot and text)\n",
70
+ "obs = await env.reset(task_id=tasks[1])\n",
71
+ "print(f\"Task description: {obs.text}\")\n",
72
+ "\n",
73
+ "# watch the agent live\n",
74
+ "live_url = await env.get_vnc_url()\n",
75
+ "client.display_stream(live_url)"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": null,
81
+ "metadata": {},
82
+ "outputs": [],
83
+ "source": [
84
+ "# agent loop\n",
85
+ "for i in range(8):\n",
86
+ " # rescale screenshot to Claude's resolution\n",
87
+ " screenshot = cua_adapter.rescale(obs.screenshot)\n",
88
+ "\n",
89
+ " # agent's next action\n",
90
+ " done, response = await agent.predict(screenshot, obs.text)\n",
91
+ " if done:\n",
92
+ " env.final_response = str(response)\n",
93
+ " break\n",
94
+ "\n",
95
+ " # convert to HUD action space\n",
96
+ " actions = cua_adapter.adapt_list([response])\n",
97
+ " print(f\"Agent's action: {response}\")\n",
98
+ "\n",
99
+ " # step the environment forward\n",
100
+ " obs, reward, terminated, info = await env.step(actions)\n",
101
+ "\n",
102
+ " # drop out if terminated\n",
103
+ " if terminated:\n",
104
+ " break\n",
105
+ " print(f\"Step {i+1} completed\")\n"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": null,
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": [
114
+ "# evaluate environment state\n",
115
+ "result = await env.evaluate()\n",
116
+ "print(f\"Evaluation result: {result}\")\n",
117
+ "\n",
118
+ "# close environment\n",
119
+ "await env.close()"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": null,
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": [
128
+ "analytics = await run.get_analytics()\n",
129
+ "print(analytics)"
130
+ ]
131
+ }
132
+ ],
133
+ "metadata": {
134
+ "kernelspec": {
135
+ "display_name": ".venv",
136
+ "language": "python",
137
+ "name": "python3"
138
+ },
139
+ "language_info": {
140
+ "codemirror_mode": {
141
+ "name": "ipython",
142
+ "version": 3
143
+ },
144
+ "file_extension": ".py",
145
+ "mimetype": "text/x-python",
146
+ "name": "python",
147
+ "nbconvert_exporter": "python",
148
+ "pygments_lexer": "ipython3",
149
+ "version": "3.12.9"
150
+ }
151
+ },
152
+ "nbformat": 4,
153
+ "nbformat_minor": 2
154
+ }
@@ -5,14 +5,14 @@ HUD Gym SDK - A Python SDK for interacting with HUD environments.
5
5
  from __future__ import annotations
6
6
 
7
7
  from hud.client import HUDClient
8
- from hud.env import Env, EvalSet, Observation, TaskResult
8
+ from hud.environment import Environment, EvalSet, Observation, TaskResult
9
9
  from hud.gym import Gym
10
10
  from hud.run import Run
11
11
 
12
- __version__ = "0.1.0"
12
+ __version__ = "0.1.0b2"
13
13
 
14
14
  __all__ = [
15
- "Env",
15
+ "Environment",
16
16
  "EvalSet",
17
17
  "Gym",
18
18
  "HUDClient",
@@ -8,7 +8,7 @@ import json
8
8
  from typing import Any
9
9
 
10
10
  from .adapters.common import Adapter
11
- from .env import EvalSet
11
+ from .environment import EvalSet
12
12
  from .gym import Gym
13
13
  from .run import Run, RunResponse
14
14
  from .server import make_request, make_sync_request
@@ -23,15 +23,15 @@ class HUDClient:
23
23
  evalsets, and create runs.
24
24
  """
25
25
 
26
- def __init__(self, api_key: str) -> None:
26
+ def __init__(self, api_key: str | None = None) -> None:
27
27
  """
28
28
  Initialize the HUD client with an API key.
29
29
 
30
30
  Args:
31
31
  api_key: API key for authentication with the HUD API
32
32
  """
33
- self.api_key = api_key
34
- settings.api_key = api_key # Set global config
33
+ self.api_key = api_key or settings.api_key
34
+ settings.api_key = self.api_key
35
35
 
36
36
  async def load_gym(self, id: str) -> Gym:
37
37
  """
@@ -182,3 +182,18 @@ class HUDClient:
182
182
  config=config,
183
183
  metadata=metadata,
184
184
  )
185
+
186
+ def display_stream(self, live_url: str) -> None:
187
+ """
188
+ Display a stream in the HUD system.
189
+ """
190
+ from IPython.display import HTML, display
191
+ html_content = f"""
192
+ <div style="width: 960px; height: 540px; overflow: hidden;">
193
+ <div style="transform: scale(0.5); transform-origin: top left;">
194
+ <iframe src="{live_url}" width="1920" height="1080" style="border: 1px solid #ddd;">
195
+ </iframe>
196
+ </div>
197
+ </div>
198
+ """
199
+ display(HTML(html_content))
@@ -1,5 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
4
+ import enum
5
+ import logging
3
6
  from typing import TYPE_CHECKING, Any
4
7
 
5
8
  from pydantic import BaseModel
@@ -10,6 +13,7 @@ from hud.settings import settings
10
13
  if TYPE_CHECKING:
11
14
  from .adapters.common import Adapter
12
15
 
16
+ logger = logging.getLogger("hud.environment")
13
17
 
14
18
  class Observation(BaseModel):
15
19
  """
@@ -38,8 +42,29 @@ class TaskResult(BaseModel):
38
42
  terminated: bool
39
43
  info: dict[str, Any]
40
44
 
45
+ class EnvironmentStatus(str, enum.Enum):
46
+ """
47
+ Status of the environment.
48
+
49
+ Attributes:
50
+ INITIALIZING: The environment is initializing
51
+ RUNNING: The environment is running
52
+ COMPLETED: The environment is completed
53
+ ERROR: The environment is in an error state
54
+ """
55
+ INITIALIZING = "initializing"
56
+ RUNNING = "running"
57
+ COMPLETED = "completed"
58
+ ERROR = "error"
59
+
60
+
61
+ status_messages = {
62
+ EnvironmentStatus.RUNNING.value: "is running",
63
+ EnvironmentStatus.ERROR.value: "had an error initializing",
64
+ EnvironmentStatus.COMPLETED.value: "completed",
65
+ }
41
66
 
42
- class Env:
67
+ class Environment:
43
68
  """
44
69
  Environment interface for agent interactions.
45
70
 
@@ -192,7 +217,9 @@ class Env:
192
217
  api_key=settings.api_key,
193
218
  )
194
219
 
195
- async def reset(self, task_id: str, metadata: dict[str, Any] | None = None) -> Observation:
220
+ async def reset(
221
+ self, task_id: str, metadata: dict[str, Any] | None = None
222
+ ) -> Observation:
196
223
  """
197
224
  Reset the environment to the task.
198
225
 
@@ -213,6 +240,18 @@ class Env:
213
240
  )
214
241
  return Observation(**data["observation"])
215
242
 
243
+ async def wait_for_ready(self) -> None:
244
+ """Wait for the environment to be ready"""
245
+ while True:
246
+ state = await self.get_env_state()
247
+ if state in (
248
+ EnvironmentStatus.RUNNING.value,
249
+ EnvironmentStatus.ERROR.value,
250
+ EnvironmentStatus.COMPLETED.value,
251
+ ):
252
+ logger.info("Environment %s %s", self.id, status_messages.get(state))
253
+ break
254
+ await asyncio.sleep(10)
216
255
 
217
256
  class EvalSet:
218
257
  """