hud-python 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (50) hide show
  1. hud/__init__.py +22 -2
  2. hud/adapters/claude/adapter.py +9 -2
  3. hud/adapters/claude/tests/__init__.py +1 -0
  4. hud/adapters/claude/tests/test_adapter.py +519 -0
  5. hud/adapters/common/types.py +5 -1
  6. hud/adapters/operator/adapter.py +4 -0
  7. hud/adapters/operator/tests/__init__.py +1 -0
  8. hud/adapters/operator/tests/test_adapter.py +370 -0
  9. hud/agent/__init__.py +4 -0
  10. hud/agent/base.py +18 -2
  11. hud/agent/claude.py +20 -17
  12. hud/agent/claude_plays_pokemon.py +282 -0
  13. hud/agent/langchain.py +12 -7
  14. hud/agent/misc/__init__.py +3 -0
  15. hud/agent/misc/response_agent.py +80 -0
  16. hud/agent/operator.py +27 -19
  17. hud/agent/tests/__init__.py +1 -0
  18. hud/agent/tests/test_base.py +202 -0
  19. hud/env/docker_client.py +28 -18
  20. hud/env/environment.py +32 -16
  21. hud/env/local_docker_client.py +83 -42
  22. hud/env/remote_client.py +1 -3
  23. hud/env/remote_docker_client.py +72 -15
  24. hud/exceptions.py +12 -0
  25. hud/gym.py +71 -53
  26. hud/job.py +52 -7
  27. hud/settings.py +6 -0
  28. hud/task.py +45 -33
  29. hud/taskset.py +44 -4
  30. hud/telemetry/__init__.py +21 -0
  31. hud/telemetry/_trace.py +173 -0
  32. hud/telemetry/context.py +193 -0
  33. hud/telemetry/exporter.py +417 -0
  34. hud/telemetry/instrumentation/__init__.py +3 -0
  35. hud/telemetry/instrumentation/mcp.py +498 -0
  36. hud/telemetry/instrumentation/registry.py +59 -0
  37. hud/telemetry/mcp_models.py +331 -0
  38. hud/telemetry/tests/__init__.py +1 -0
  39. hud/telemetry/tests/test_context.py +203 -0
  40. hud/telemetry/tests/test_trace.py +270 -0
  41. hud/types.py +10 -26
  42. hud/utils/common.py +22 -2
  43. hud/utils/misc.py +53 -0
  44. hud/utils/tests/test_version.py +1 -1
  45. hud/version.py +7 -0
  46. {hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/METADATA +90 -22
  47. hud_python-0.2.5.dist-info/RECORD +84 -0
  48. hud_python-0.2.4.dist-info/RECORD +0 -62
  49. {hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/WHEEL +0 -0
  50. {hud_python-0.2.4.dist-info → hud_python-0.2.5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,202 @@
1
+ from __future__ import annotations
2
+
3
+ from unittest.mock import MagicMock, patch
4
+ from typing import Any
5
+
6
+ import pytest
7
+
8
+ from hud.agent.base import Agent
9
+ from hud.adapters import Adapter
10
+ from hud.adapters.common.types import ClickAction, Point
11
+ from hud.utils.common import Observation
12
+
13
+
14
+ class ConcreteAgent(Agent[Any, dict[str, Any]]):
15
+ """Concrete implementation of Agent for testing."""
16
+
17
+ def __init__(self, client: Any = None, adapter: Adapter | None = None):
18
+ super().__init__(client, adapter)
19
+ self.mock_responses = []
20
+ self.call_count = 0
21
+
22
+ async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
23
+ """Mock implementation that returns predefined responses."""
24
+ if self.call_count < len(self.mock_responses):
25
+ response = self.mock_responses[self.call_count]
26
+ self.call_count += 1
27
+ return response
28
+ return [], True
29
+
30
+
31
+ class TestAgentBase:
32
+ """Test the base Agent class."""
33
+
34
+ @pytest.fixture
35
+ def mock_client(self):
36
+ """Mock client for testing."""
37
+ return MagicMock()
38
+
39
+ @pytest.fixture
40
+ def mock_adapter(self):
41
+ """Mock adapter for testing."""
42
+ adapter = MagicMock(spec=Adapter)
43
+ adapter.rescale.return_value = "rescaled_screenshot"
44
+ adapter.adapt_list.return_value = [ClickAction(point=Point(x=100, y=200))]
45
+ return adapter
46
+
47
+ @pytest.fixture
48
+ def agent_with_adapter(self, mock_client, mock_adapter):
49
+ """Agent with both client and adapter."""
50
+ return ConcreteAgent(client=mock_client, adapter=mock_adapter)
51
+
52
+ @pytest.fixture
53
+ def agent_without_adapter(self, mock_client):
54
+ """Agent with client but no adapter."""
55
+ return ConcreteAgent(client=mock_client, adapter=None)
56
+
57
+ def test_init_with_client_and_adapter(self, mock_client, mock_adapter):
58
+ """Test agent initialization with client and adapter."""
59
+ agent = ConcreteAgent(client=mock_client, adapter=mock_adapter)
60
+ assert agent.client == mock_client
61
+ assert agent.adapter == mock_adapter
62
+
63
+ def test_init_with_none_values(self):
64
+ """Test agent initialization with None values."""
65
+ agent = ConcreteAgent(client=None, adapter=None)
66
+ assert agent.client is None
67
+ assert agent.adapter is None
68
+
69
+ def test_preprocess_without_adapter(self, agent_without_adapter):
70
+ """Test preprocess when no adapter is available."""
71
+ observation = Observation(text="test", screenshot="screenshot_data")
72
+ result = agent_without_adapter.preprocess(observation)
73
+
74
+ # Should return original observation unchanged
75
+ assert result == observation
76
+ assert result.text == "test"
77
+ assert result.screenshot == "screenshot_data"
78
+
79
+ def test_preprocess_without_screenshot(self, agent_with_adapter):
80
+ """Test preprocess when no screenshot is available."""
81
+ observation = Observation(text="test", screenshot=None)
82
+ result = agent_with_adapter.preprocess(observation)
83
+
84
+ # Should return original observation unchanged
85
+ assert result == observation
86
+ assert result.text == "test"
87
+ assert result.screenshot is None
88
+
89
+ def test_preprocess_with_adapter_and_screenshot(self, agent_with_adapter, mock_adapter):
90
+ """Test preprocess with adapter and screenshot (covers missing lines 48-55)."""
91
+ observation = Observation(text="test", screenshot="original_screenshot")
92
+ result = agent_with_adapter.preprocess(observation)
93
+
94
+ # Should create new observation with rescaled screenshot
95
+ mock_adapter.rescale.assert_called_once_with("original_screenshot")
96
+ assert result.text == "test"
97
+ assert result.screenshot == "rescaled_screenshot"
98
+ # Should be a new object, not the original
99
+ assert result is not observation
100
+
101
+ def test_postprocess_without_adapter(self, agent_without_adapter):
102
+ """Test postprocess when no adapter is available (covers missing lines 82-85)."""
103
+ actions = [{"type": "click", "x": 100, "y": 200}]
104
+
105
+ with pytest.raises(ValueError, match="Cannot postprocess actions without an adapter"):
106
+ agent_without_adapter.postprocess(actions)
107
+
108
+ def test_postprocess_with_adapter(self, agent_with_adapter, mock_adapter):
109
+ """Test postprocess with adapter."""
110
+ actions = [{"type": "click", "x": 100, "y": 200}]
111
+ result = agent_with_adapter.postprocess(actions)
112
+
113
+ mock_adapter.adapt_list.assert_called_once_with(actions)
114
+ assert len(result) == 1
115
+ assert isinstance(result[0], ClickAction)
116
+
117
+ @pytest.mark.asyncio
118
+ async def test_predict_without_verbose(self, agent_with_adapter):
119
+ """Test predict method without verbose logging."""
120
+ observation = Observation(text="test", screenshot="screenshot")
121
+ agent_with_adapter.mock_responses = [([{"type": "click", "x": 100, "y": 200}], False)]
122
+
123
+ actions, done = await agent_with_adapter.predict(observation, verbose=False)
124
+
125
+ assert len(actions) == 1
126
+ assert isinstance(actions[0], ClickAction)
127
+ assert done is False
128
+
129
+ @pytest.mark.asyncio
130
+ @patch("hud.agent.base.logger")
131
+ async def test_predict_with_verbose_logging(self, mock_logger, agent_with_adapter):
132
+ """Test predict method with verbose logging (covers missing lines 100-116)."""
133
+ observation = Observation(text="test", screenshot="screenshot")
134
+ agent_with_adapter.mock_responses = [([{"type": "click", "x": 100, "y": 200}], True)]
135
+
136
+ actions, done = await agent_with_adapter.predict(observation, verbose=True)
137
+
138
+ # Verify verbose logging was called
139
+ mock_logger.info.assert_any_call("Predicting action...")
140
+ mock_logger.info.assert_any_call("Raw action: %s", [{"type": "click", "x": 100, "y": 200}])
141
+
142
+ assert len(actions) == 1
143
+ assert isinstance(actions[0], ClickAction)
144
+ assert done is True
145
+
146
+ @pytest.mark.asyncio
147
+ async def test_predict_without_adapter_returns_raw_actions(self, agent_without_adapter):
148
+ """Test predict without adapter returns raw actions."""
149
+ observation = Observation(text="test", screenshot=None)
150
+ raw_actions = [{"type": "click", "x": 100, "y": 200}]
151
+ agent_without_adapter.mock_responses = [(raw_actions, True)]
152
+
153
+ actions, done = await agent_without_adapter.predict(observation, verbose=False)
154
+
155
+ # Should return raw actions, not processed ones
156
+ assert actions == raw_actions
157
+ assert done is True
158
+
159
+ @pytest.mark.asyncio
160
+ async def test_predict_with_empty_actions(self, agent_with_adapter):
161
+ """Test predict when fetch_response returns empty actions."""
162
+ observation = Observation(text="test", screenshot="screenshot")
163
+ agent_with_adapter.mock_responses = [([], True)]
164
+
165
+ actions, done = await agent_with_adapter.predict(observation, verbose=False)
166
+
167
+ # Should return empty actions without calling adapter
168
+ assert actions == []
169
+ assert done is True
170
+
171
+ @pytest.mark.asyncio
172
+ async def test_predict_full_pipeline(self, agent_with_adapter, mock_adapter):
173
+ """Test the complete predict pipeline with all stages."""
174
+ # Set up observation with screenshot that will be rescaled
175
+ observation = Observation(text="test input", screenshot="original_screenshot")
176
+ raw_actions = [{"type": "click", "x": 150, "y": 250}]
177
+ agent_with_adapter.mock_responses = [(raw_actions, False)]
178
+
179
+ actions, done = await agent_with_adapter.predict(observation, verbose=True)
180
+
181
+ # Verify all stages were called
182
+ # Stage 1: Preprocessing
183
+ mock_adapter.rescale.assert_called_once_with("original_screenshot")
184
+
185
+ # Stage 3: Postprocessing
186
+ mock_adapter.adapt_list.assert_called_once_with(raw_actions)
187
+
188
+ assert len(actions) == 1
189
+ assert isinstance(actions[0], ClickAction)
190
+ assert done is False
191
+
192
+ @pytest.mark.asyncio
193
+ async def test_predict_integration_without_screenshot(self, agent_with_adapter):
194
+ """Test predict integration when observation has no screenshot."""
195
+ observation = Observation(text="test input", screenshot=None)
196
+ raw_actions = [{"type": "response", "text": "Task completed"}]
197
+ agent_with_adapter.mock_responses = [(raw_actions, True)]
198
+
199
+ actions, done = await agent_with_adapter.predict(observation, verbose=False)
200
+
201
+ assert len(actions) == 1
202
+ assert done is True
hud/env/docker_client.py CHANGED
@@ -26,6 +26,8 @@ STATUS_MESSAGES = {
26
26
  EnvironmentStatus.COMPLETED.value: "completed",
27
27
  }
28
28
 
29
+ PACKAGE_NAME = "hud_controller"
30
+
29
31
 
30
32
  class InvokeError(Exception):
31
33
  """
@@ -63,20 +65,12 @@ class DockerClient(Client):
63
65
  _last_update_time: int = 0
64
66
  _last_file_mtimes: dict[str, float] = {} # noqa: RUF012 - Not recognized as Pydantic model
65
67
  _source_path: Path | None = None
66
- _package_name: str | None = None
67
68
 
68
69
  @property
69
70
  def source_path(self) -> Path | None:
70
71
  """Get the source path."""
71
72
  return self._source_path
72
73
 
73
- @property
74
- def package_name(self) -> str:
75
- """Get the package name."""
76
- if not self._package_name:
77
- raise ValueError("Package name not set")
78
- return self._package_name
79
-
80
74
  def set_source_path(self, source_path: Path) -> None:
81
75
  """
82
76
  Set the source path for this environment controller.
@@ -102,21 +96,37 @@ class DockerClient(Client):
102
96
  if not pyproject_path.exists():
103
97
  raise FileNotFoundError(f"pyproject.toml not found in {source_path}")
104
98
 
99
+ # validate package name
105
100
  pyproject_data = toml.load(pyproject_path)
106
- self._package_name = pyproject_data.get("project", {}).get("name")
107
- if not self._package_name:
101
+ package_name = pyproject_data.get("project", {}).get("name")
102
+ if not package_name:
108
103
  raise ValueError("Could not find package name in pyproject.toml")
104
+ if package_name != PACKAGE_NAME:
105
+ raise ValueError(f"Package name in pyproject.toml must be {PACKAGE_NAME}")
109
106
 
110
107
  self._source_path = source_path
111
108
 
109
+ # set current mtimes
110
+ self._last_file_mtimes = self._get_all_file_mtimes()
111
+
112
+ @classmethod
113
+ @abc.abstractmethod
114
+ async def build_image(cls, build_context: Path) -> tuple[str, dict[str, Any]]:
115
+ """
116
+ Build an image from a build context.
117
+
118
+ Returns:
119
+ tuple[str, dict[str, Any]]: The image tag and build output
120
+ """
121
+
112
122
  @classmethod
113
123
  @abc.abstractmethod
114
- async def create(cls, dockerfile: str) -> DockerClient:
124
+ async def create(cls, image: str) -> DockerClient:
115
125
  """
116
- Creates an environment client from a dockerfile.
126
+ Creates an environment client from an image.
117
127
 
118
128
  Args:
119
- dockerfile: The dockerfile content to build the environment
129
+ image: The image to build the environment from
120
130
 
121
131
  Returns:
122
132
  EnvClient: An instance of the environment client
@@ -194,8 +204,8 @@ class DockerClient(Client):
194
204
 
195
205
  # Create tar archive of the source code and send it to the container
196
206
  tar_bytes = directory_to_tar_bytes(self._source_path)
197
- await self.execute(["mkdir", "-p", "/root/controller"], timeout=5)
198
- await self.put_archive("/root/controller", tar_bytes)
207
+ await self.execute(["mkdir", "-p", "/controller"], timeout=5)
208
+ await self.put_archive("/controller", tar_bytes)
199
209
 
200
210
  # Check if pyproject.toml exists and parse it
201
211
  pyproject_path = self._source_path / "pyproject.toml"
@@ -213,9 +223,9 @@ class DockerClient(Client):
213
223
  self._package_name = pyproject_data.get("project", {}).get("name")
214
224
  if not self._package_name:
215
225
  raise ValueError("Could not find package name in pyproject.toml")
216
- logger.info("Installing %s in /root/controller", self._package_name)
226
+ logger.info("Installing %s in /controller", self._package_name)
217
227
  result = await self.execute(
218
- ["bash", "-c", "cd /root/controller && pip install -e . --break-system-packages"],
228
+ ["bash", "-c", "cd /controller && pip install -e . --break-system-packages"],
219
229
  timeout=60,
220
230
  )
221
231
  if result["stdout"]:
@@ -262,7 +272,7 @@ class DockerClient(Client):
262
272
  # generate a random uuid as a divider
263
273
  divider = str(uuid.uuid4())
264
274
 
265
- template = invoke_template(config, self.package_name, divider)
275
+ template = invoke_template(config, PACKAGE_NAME, divider)
266
276
  logger.debug("Invoking template: %s", template)
267
277
 
268
278
  result = await self.execute(["python3", "-c", template])
hud/env/environment.py CHANGED
@@ -74,14 +74,17 @@ class Environment(BaseModel):
74
74
  config: The configuration to use for the setup
75
75
  """
76
76
  if isinstance(self.client, RemoteClient):
77
+ await self.get_urls()
77
78
  await self._invoke_all(create_remote_config(self, config, REMOTE_SETUP))
78
79
  else:
79
80
  if config is not None:
80
81
  await self._invoke_all(config)
81
- elif self.task and self.task.config is not None:
82
- await self._invoke_all(self.task.config)
82
+ elif self.task and self.task.setup is not None:
83
+ await self._invoke_all(self.task.setup)
83
84
  else:
84
- raise ValueError("No config or task provided for local environment")
85
+ raise ValueError(
86
+ "No config, task or task setup function provided for local environment"
87
+ )
85
88
 
86
89
  async def evaluate(self, config: FunctionConfigs | None = None) -> Any:
87
90
  """
@@ -98,8 +101,8 @@ class Environment(BaseModel):
98
101
  else:
99
102
  if config is not None:
100
103
  results = await self._invoke_all(config)
101
- elif self.task and self.task.config is not None:
102
- results = await self._invoke_all(self.task.config)
104
+ elif self.task and self.task.evaluate is not None:
105
+ results = await self._invoke_all(self.task.evaluate)
103
106
  else:
104
107
  raise ValueError("No config or task provided for local environment")
105
108
  if len(results) == 1:
@@ -144,8 +147,7 @@ class Environment(BaseModel):
144
147
  args = [[action.model_dump() for action in actions]]
145
148
 
146
149
  # TODO: Move this into the server side
147
- if self._maybe_store_response(actions):
148
- return Observation(text=self.final_response), 0, False, {}
150
+ self._maybe_store_response(actions)
149
151
 
150
152
  result, stdout, stderr = await self.client.invoke(
151
153
  FunctionConfig(function="step", args=args)
@@ -211,20 +213,20 @@ class Environment(BaseModel):
211
213
  agent: The agent to run
212
214
  """
213
215
  if verbose:
214
- logger.info("[HUD] Running agent in environment...")
216
+ logger.info("Running agent in environment...")
215
217
  obs, _ = await self.reset()
216
218
  for i in range(max_steps):
217
- action, done = await agent.predict(obs)
219
+ action, done = await agent.predict(obs, verbose=verbose)
218
220
  if verbose:
219
- logger.info("[HUD] Step %d: Action: %s", i, action)
221
+ logger.info("Step %d: Action: %s", i, action)
220
222
  obs, reward, terminated, info = await self.step(action)
221
223
  if verbose:
222
- logger.info("[HUD] Step %d: Observation: %s", i, obs)
224
+ logger.info("Step %d: Observation: %s", i, obs)
223
225
  if done or terminated:
224
226
  break
225
227
  result = await self.evaluate()
226
228
  if verbose:
227
- logger.info("[HUD] Evaluation result: %s", result)
229
+ logger.info("Evaluation result: %s", result)
228
230
  return result
229
231
 
230
232
 
@@ -348,7 +350,11 @@ def create_remote_config(
348
350
  if not isinstance(expanded_configs[0].args, list):
349
351
  expanded_configs[0].args = [expanded_configs[0].args]
350
352
  expanded_configs[0].args.append(env.final_response) # for remote responses
351
- return [FunctionConfig(function=function, args=expanded_configs)]
353
+ return [
354
+ FunctionConfig(
355
+ function=function, args=expanded_configs, metadata={"task": task.model_dump()}
356
+ )
357
+ ]
352
358
 
353
359
  # Case 3: Check for task.config
354
360
  if hasattr(task, "config") and task.config:
@@ -363,17 +369,27 @@ def create_remote_config(
363
369
  if not isinstance(final_args["args"], list):
364
370
  final_args["args"] = [final_args["args"]]
365
371
  final_args["args"].append(env.final_response)
366
- return [FunctionConfig(function=function, args=[final_args])]
372
+ return [
373
+ FunctionConfig(
374
+ function=function, args=[final_args], metadata={"task": task.model_dump()}
375
+ )
376
+ ]
367
377
 
368
378
  # Case 4: Use task.id
369
379
  if task.id:
370
380
  args_list = [task.id]
371
381
  if env and env.final_response:
372
382
  args_list.append(env.final_response) # Append final response
373
- return [FunctionConfig(function=f"{REMOTE_FUNCTION_PREFIX}{function}", args=args_list)]
383
+ return [
384
+ FunctionConfig(
385
+ function=f"{REMOTE_FUNCTION_PREFIX}{function}",
386
+ args=args_list,
387
+ metadata={"task": task.model_dump()},
388
+ )
389
+ ]
374
390
 
375
391
  # Case 5: No valid configuration found
376
392
  args_list = []
377
393
  if env and env.final_response:
378
394
  args_list.append(env.final_response)
379
- return [FunctionConfig(function=function, args=args_list)]
395
+ return [FunctionConfig(function=function, args=args_list, metadata={"task": task.model_dump()})]
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  import io
4
5
  import logging
5
- import tarfile
6
- import tempfile
6
+ import textwrap
7
+ import time
7
8
  import uuid
8
9
  from typing import TYPE_CHECKING, Any
9
10
 
@@ -12,12 +13,15 @@ from aiohttp import ClientTimeout
12
13
 
13
14
  from hud.env.docker_client import DockerClient, EnvironmentStatus
14
15
  from hud.utils import ExecuteResult
16
+ from hud.utils.common import directory_to_tar_bytes
15
17
 
16
18
  if TYPE_CHECKING:
19
+ from pathlib import Path
20
+
17
21
  from aiodocker.containers import DockerContainer
18
22
  from aiodocker.stream import Stream
19
23
 
20
- logger = logging.getLogger("hud.env.docker_env_client")
24
+ logger = logging.getLogger(__name__)
21
25
 
22
26
 
23
27
  class LocalDockerClient(DockerClient):
@@ -26,17 +30,9 @@ class LocalDockerClient(DockerClient):
26
30
  """
27
31
 
28
32
  @classmethod
29
- async def create(
30
- cls, dockerfile: str, ports: list[int] | None = None
31
- ) -> tuple[LocalDockerClient, dict[str, Any]]:
33
+ async def build_image(cls, build_context: Path) -> tuple[str, dict[str, Any]]:
32
34
  """
33
- Creates a Docker environment client from a dockerfile.
34
-
35
- Args:
36
- dockerfile: The dockerfile content to build the Docker image
37
-
38
- Returns:
39
- DockerClient: An instance of the Docker environment client
35
+ Build an image from a build context.
40
36
  """
41
37
  # Create a unique image tag
42
38
  image_tag = f"hud-env-{uuid.uuid4().hex[:8]}"
@@ -44,32 +40,19 @@ class LocalDockerClient(DockerClient):
44
40
  # Initialize Docker client
45
41
  docker_client = aiodocker.Docker()
46
42
 
47
- # Create fileobj for the Dockerfile
48
- dockerfile_fileobj = io.BytesIO(dockerfile.encode("utf-8"))
49
-
50
- if ports is None:
51
- ports = []
52
-
53
- # Create a tar file from the dockerfile
54
- with tempfile.NamedTemporaryFile() as f:
55
- with tarfile.open(mode="w:gz", fileobj=f) as t:
56
- dfinfo = tarfile.TarInfo("Dockerfile")
57
- dfinfo.size = len(dockerfile_fileobj.getvalue())
58
- dockerfile_fileobj.seek(0)
59
- t.addfile(dfinfo, dockerfile_fileobj)
60
-
61
- # Reset the file pointer to the beginning of the file
62
- f.seek(0)
63
-
64
- # Build the image
65
- build_stream = await docker_client.images.build(
66
- fileobj=f,
67
- encoding="gzip",
68
- tag=image_tag,
69
- rm=True,
70
- pull=True,
71
- forcerm=True,
72
- )
43
+ # Create a tar file from the path
44
+ tar_bytes = directory_to_tar_bytes(build_context)
45
+ logger.info("generated tar file with size: %d KB", len(tar_bytes) // 1024)
46
+
47
+ # Build the image
48
+ build_stream = await docker_client.images.build(
49
+ fileobj=io.BytesIO(tar_bytes),
50
+ encoding="gzip",
51
+ tag=image_tag,
52
+ rm=True,
53
+ pull=True,
54
+ forcerm=True,
55
+ )
73
56
 
74
57
  # Print build output
75
58
  output = ""
@@ -78,23 +61,63 @@ class LocalDockerClient(DockerClient):
78
61
  logger.info(chunk["stream"])
79
62
  output += chunk["stream"]
80
63
 
64
+ return image_tag, {"build_output": output}
65
+
66
+ @classmethod
67
+ async def create(
68
+ cls,
69
+ image: str,
70
+ ) -> LocalDockerClient:
71
+ """
72
+ Creates a Docker environment client from a image.
73
+
74
+ Args:
75
+ image: The image to build the Docker image
76
+
77
+ Returns:
78
+ DockerClient: An instance of the Docker environment client
79
+ """
80
+
81
+ # Initialize Docker client
82
+ docker_client = aiodocker.Docker()
83
+
81
84
  # Create and start the container
82
85
  container_config = {
83
- "Image": image_tag,
86
+ "Image": image,
84
87
  "Tty": True,
85
88
  "OpenStdin": True,
86
89
  "Cmd": None,
87
90
  "HostConfig": {
88
91
  "PublishAllPorts": True,
89
92
  },
90
- "ExposedPorts": {f"{port}/tcp": {} for port in ports},
91
93
  }
92
94
 
93
95
  container = await docker_client.containers.create(config=container_config)
94
96
  await container.start()
95
97
 
98
+ inspection = await container.show()
99
+ if health_check_config := inspection["Config"].get("Healthcheck"):
100
+ # Using the interval as spinup deadline is a bit implicit - could
101
+ # consider adding explicitly to API if there's demand
102
+ window_usecs = health_check_config.get("Interval", int(30 * 1e9))
103
+ window_secs = window_usecs // 1_000_000
104
+
105
+ deadline = time.monotonic() + window_secs
106
+ logger.debug("Waiting for container %s to become healthy", container.id)
107
+ while True:
108
+ state = (await container.show())["State"]
109
+ if state.get("Health", {}).get("Status") == "healthy":
110
+ break
111
+ if state.get("Status") in {"exited", "dead"}:
112
+ raise RuntimeError("Container crashed before becoming healthy")
113
+ now = time.monotonic()
114
+ if now > deadline:
115
+ raise TimeoutError(f"{container.id} not healthy after {window_secs}s")
116
+ await asyncio.sleep(1)
117
+ logger.debug("Container %s is healthy", container.id)
118
+
96
119
  # Return the controller instance
97
- return cls(docker_client, container.id), {"build_output": output}
120
+ return cls(docker_client, container.id)
98
121
 
99
122
  def __init__(self, docker_conn: aiodocker.Docker, container_id: str) -> None:
100
123
  """
@@ -190,6 +213,24 @@ class LocalDockerClient(DockerClient):
190
213
  elif message.stream == 2: # stderr
191
214
  stderr_data.extend(message.data)
192
215
 
216
+ if "No module named 'hud_controller'" in stderr_data.decode():
217
+ if self._source_path is None:
218
+ message = textwrap.dedent("""\
219
+ Your environment is not set up correctly.
220
+ You are using a prebuilt image, so please ensure the following:
221
+ 1. Your image cannot be a generic python image, it must contain a python package
222
+ called hud_controller.
223
+ """)
224
+ else:
225
+ message = textwrap.dedent("""\
226
+ Your environment is not set up correctly.
227
+ You are using a local controller, so please ensure the following:
228
+ 1. Your package name is hud_controller
229
+ 2. You installed the package in the Dockerfile.
230
+ 3. The package is visible from the global python environment (no venv, conda, or uv)
231
+ """)
232
+ logger.error(message)
233
+
193
234
  return ExecuteResult(
194
235
  stdout=bytes(stdout_data),
195
236
  stderr=bytes(stderr_data),
hud/env/remote_client.py CHANGED
@@ -83,9 +83,7 @@ class RemoteClient(Client):
83
83
  build_data = response.get("metadata", {})
84
84
 
85
85
  if response.get("readme"):
86
- logger.info(
87
- "[HUD] %s gym created, see how to use it at %s", gym_id, response.get("readme")
88
- )
86
+ logger.info("Gym created, see how to use it at %s", response.get("readme"))
89
87
 
90
88
  return controller, build_data
91
89