hud-python 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -2
- hud/adapters/claude/adapter.py +9 -2
- hud/adapters/claude/tests/__init__.py +1 -0
- hud/adapters/claude/tests/test_adapter.py +519 -0
- hud/adapters/common/types.py +5 -1
- hud/adapters/operator/adapter.py +4 -0
- hud/adapters/operator/tests/__init__.py +1 -0
- hud/adapters/operator/tests/test_adapter.py +370 -0
- hud/agent/__init__.py +4 -0
- hud/agent/base.py +18 -2
- hud/agent/claude.py +20 -17
- hud/agent/claude_plays_pokemon.py +283 -0
- hud/agent/langchain.py +12 -7
- hud/agent/misc/__init__.py +3 -0
- hud/agent/misc/response_agent.py +80 -0
- hud/agent/operator.py +27 -19
- hud/agent/tests/__init__.py +1 -0
- hud/agent/tests/test_base.py +202 -0
- hud/env/docker_client.py +28 -18
- hud/env/environment.py +32 -16
- hud/env/local_docker_client.py +83 -42
- hud/env/remote_client.py +1 -3
- hud/env/remote_docker_client.py +71 -14
- hud/exceptions.py +12 -0
- hud/gym.py +71 -53
- hud/job.py +59 -14
- hud/server/requests.py +26 -4
- hud/settings.py +7 -1
- hud/task.py +45 -33
- hud/taskset.py +56 -4
- hud/telemetry/__init__.py +21 -0
- hud/telemetry/_trace.py +173 -0
- hud/telemetry/context.py +169 -0
- hud/telemetry/exporter.py +417 -0
- hud/telemetry/instrumentation/__init__.py +3 -0
- hud/telemetry/instrumentation/mcp.py +495 -0
- hud/telemetry/instrumentation/registry.py +59 -0
- hud/telemetry/mcp_models.py +331 -0
- hud/telemetry/tests/__init__.py +1 -0
- hud/telemetry/tests/test_context.py +207 -0
- hud/telemetry/tests/test_trace.py +270 -0
- hud/types.py +11 -27
- hud/utils/common.py +22 -2
- hud/utils/misc.py +53 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +7 -0
- {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/METADATA +98 -30
- hud_python-0.2.6.dist-info/RECORD +84 -0
- hud_python-0.2.4.dist-info/RECORD +0 -62
- {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/WHEEL +0 -0
- {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from unittest.mock import MagicMock, patch
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from hud.agent.base import Agent
|
|
9
|
+
from hud.adapters import Adapter
|
|
10
|
+
from hud.adapters.common.types import ClickAction, Point
|
|
11
|
+
from hud.utils.common import Observation
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ConcreteAgent(Agent[Any, dict[str, Any]]):
|
|
15
|
+
"""Concrete implementation of Agent for testing."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, client: Any = None, adapter: Adapter | None = None):
|
|
18
|
+
super().__init__(client, adapter)
|
|
19
|
+
self.mock_responses = []
|
|
20
|
+
self.call_count = 0
|
|
21
|
+
|
|
22
|
+
async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
|
|
23
|
+
"""Mock implementation that returns predefined responses."""
|
|
24
|
+
if self.call_count < len(self.mock_responses):
|
|
25
|
+
response = self.mock_responses[self.call_count]
|
|
26
|
+
self.call_count += 1
|
|
27
|
+
return response
|
|
28
|
+
return [], True
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TestAgentBase:
|
|
32
|
+
"""Test the base Agent class."""
|
|
33
|
+
|
|
34
|
+
@pytest.fixture
|
|
35
|
+
def mock_client(self):
|
|
36
|
+
"""Mock client for testing."""
|
|
37
|
+
return MagicMock()
|
|
38
|
+
|
|
39
|
+
@pytest.fixture
|
|
40
|
+
def mock_adapter(self):
|
|
41
|
+
"""Mock adapter for testing."""
|
|
42
|
+
adapter = MagicMock(spec=Adapter)
|
|
43
|
+
adapter.rescale.return_value = "rescaled_screenshot"
|
|
44
|
+
adapter.adapt_list.return_value = [ClickAction(point=Point(x=100, y=200))]
|
|
45
|
+
return adapter
|
|
46
|
+
|
|
47
|
+
@pytest.fixture
|
|
48
|
+
def agent_with_adapter(self, mock_client, mock_adapter):
|
|
49
|
+
"""Agent with both client and adapter."""
|
|
50
|
+
return ConcreteAgent(client=mock_client, adapter=mock_adapter)
|
|
51
|
+
|
|
52
|
+
@pytest.fixture
|
|
53
|
+
def agent_without_adapter(self, mock_client):
|
|
54
|
+
"""Agent with client but no adapter."""
|
|
55
|
+
return ConcreteAgent(client=mock_client, adapter=None)
|
|
56
|
+
|
|
57
|
+
def test_init_with_client_and_adapter(self, mock_client, mock_adapter):
|
|
58
|
+
"""Test agent initialization with client and adapter."""
|
|
59
|
+
agent = ConcreteAgent(client=mock_client, adapter=mock_adapter)
|
|
60
|
+
assert agent.client == mock_client
|
|
61
|
+
assert agent.adapter == mock_adapter
|
|
62
|
+
|
|
63
|
+
def test_init_with_none_values(self):
|
|
64
|
+
"""Test agent initialization with None values."""
|
|
65
|
+
agent = ConcreteAgent(client=None, adapter=None)
|
|
66
|
+
assert agent.client is None
|
|
67
|
+
assert agent.adapter is None
|
|
68
|
+
|
|
69
|
+
def test_preprocess_without_adapter(self, agent_without_adapter):
|
|
70
|
+
"""Test preprocess when no adapter is available."""
|
|
71
|
+
observation = Observation(text="test", screenshot="screenshot_data")
|
|
72
|
+
result = agent_without_adapter.preprocess(observation)
|
|
73
|
+
|
|
74
|
+
# Should return original observation unchanged
|
|
75
|
+
assert result == observation
|
|
76
|
+
assert result.text == "test"
|
|
77
|
+
assert result.screenshot == "screenshot_data"
|
|
78
|
+
|
|
79
|
+
def test_preprocess_without_screenshot(self, agent_with_adapter):
|
|
80
|
+
"""Test preprocess when no screenshot is available."""
|
|
81
|
+
observation = Observation(text="test", screenshot=None)
|
|
82
|
+
result = agent_with_adapter.preprocess(observation)
|
|
83
|
+
|
|
84
|
+
# Should return original observation unchanged
|
|
85
|
+
assert result == observation
|
|
86
|
+
assert result.text == "test"
|
|
87
|
+
assert result.screenshot is None
|
|
88
|
+
|
|
89
|
+
def test_preprocess_with_adapter_and_screenshot(self, agent_with_adapter, mock_adapter):
|
|
90
|
+
"""Test preprocess with adapter and screenshot (covers missing lines 48-55)."""
|
|
91
|
+
observation = Observation(text="test", screenshot="original_screenshot")
|
|
92
|
+
result = agent_with_adapter.preprocess(observation)
|
|
93
|
+
|
|
94
|
+
# Should create new observation with rescaled screenshot
|
|
95
|
+
mock_adapter.rescale.assert_called_once_with("original_screenshot")
|
|
96
|
+
assert result.text == "test"
|
|
97
|
+
assert result.screenshot == "rescaled_screenshot"
|
|
98
|
+
# Should be a new object, not the original
|
|
99
|
+
assert result is not observation
|
|
100
|
+
|
|
101
|
+
def test_postprocess_without_adapter(self, agent_without_adapter):
|
|
102
|
+
"""Test postprocess when no adapter is available (covers missing lines 82-85)."""
|
|
103
|
+
actions = [{"type": "click", "x": 100, "y": 200}]
|
|
104
|
+
|
|
105
|
+
with pytest.raises(ValueError, match="Cannot postprocess actions without an adapter"):
|
|
106
|
+
agent_without_adapter.postprocess(actions)
|
|
107
|
+
|
|
108
|
+
def test_postprocess_with_adapter(self, agent_with_adapter, mock_adapter):
|
|
109
|
+
"""Test postprocess with adapter."""
|
|
110
|
+
actions = [{"type": "click", "x": 100, "y": 200}]
|
|
111
|
+
result = agent_with_adapter.postprocess(actions)
|
|
112
|
+
|
|
113
|
+
mock_adapter.adapt_list.assert_called_once_with(actions)
|
|
114
|
+
assert len(result) == 1
|
|
115
|
+
assert isinstance(result[0], ClickAction)
|
|
116
|
+
|
|
117
|
+
@pytest.mark.asyncio
|
|
118
|
+
async def test_predict_without_verbose(self, agent_with_adapter):
|
|
119
|
+
"""Test predict method without verbose logging."""
|
|
120
|
+
observation = Observation(text="test", screenshot="screenshot")
|
|
121
|
+
agent_with_adapter.mock_responses = [([{"type": "click", "x": 100, "y": 200}], False)]
|
|
122
|
+
|
|
123
|
+
actions, done = await agent_with_adapter.predict(observation, verbose=False)
|
|
124
|
+
|
|
125
|
+
assert len(actions) == 1
|
|
126
|
+
assert isinstance(actions[0], ClickAction)
|
|
127
|
+
assert done is False
|
|
128
|
+
|
|
129
|
+
@pytest.mark.asyncio
|
|
130
|
+
@patch("hud.agent.base.logger")
|
|
131
|
+
async def test_predict_with_verbose_logging(self, mock_logger, agent_with_adapter):
|
|
132
|
+
"""Test predict method with verbose logging (covers missing lines 100-116)."""
|
|
133
|
+
observation = Observation(text="test", screenshot="screenshot")
|
|
134
|
+
agent_with_adapter.mock_responses = [([{"type": "click", "x": 100, "y": 200}], True)]
|
|
135
|
+
|
|
136
|
+
actions, done = await agent_with_adapter.predict(observation, verbose=True)
|
|
137
|
+
|
|
138
|
+
# Verify verbose logging was called
|
|
139
|
+
mock_logger.info.assert_any_call("Predicting action...")
|
|
140
|
+
mock_logger.info.assert_any_call("Raw action: %s", [{"type": "click", "x": 100, "y": 200}])
|
|
141
|
+
|
|
142
|
+
assert len(actions) == 1
|
|
143
|
+
assert isinstance(actions[0], ClickAction)
|
|
144
|
+
assert done is True
|
|
145
|
+
|
|
146
|
+
@pytest.mark.asyncio
|
|
147
|
+
async def test_predict_without_adapter_returns_raw_actions(self, agent_without_adapter):
|
|
148
|
+
"""Test predict without adapter returns raw actions."""
|
|
149
|
+
observation = Observation(text="test", screenshot=None)
|
|
150
|
+
raw_actions = [{"type": "click", "x": 100, "y": 200}]
|
|
151
|
+
agent_without_adapter.mock_responses = [(raw_actions, True)]
|
|
152
|
+
|
|
153
|
+
actions, done = await agent_without_adapter.predict(observation, verbose=False)
|
|
154
|
+
|
|
155
|
+
# Should return raw actions, not processed ones
|
|
156
|
+
assert actions == raw_actions
|
|
157
|
+
assert done is True
|
|
158
|
+
|
|
159
|
+
@pytest.mark.asyncio
|
|
160
|
+
async def test_predict_with_empty_actions(self, agent_with_adapter):
|
|
161
|
+
"""Test predict when fetch_response returns empty actions."""
|
|
162
|
+
observation = Observation(text="test", screenshot="screenshot")
|
|
163
|
+
agent_with_adapter.mock_responses = [([], True)]
|
|
164
|
+
|
|
165
|
+
actions, done = await agent_with_adapter.predict(observation, verbose=False)
|
|
166
|
+
|
|
167
|
+
# Should return empty actions without calling adapter
|
|
168
|
+
assert actions == []
|
|
169
|
+
assert done is True
|
|
170
|
+
|
|
171
|
+
@pytest.mark.asyncio
|
|
172
|
+
async def test_predict_full_pipeline(self, agent_with_adapter, mock_adapter):
|
|
173
|
+
"""Test the complete predict pipeline with all stages."""
|
|
174
|
+
# Set up observation with screenshot that will be rescaled
|
|
175
|
+
observation = Observation(text="test input", screenshot="original_screenshot")
|
|
176
|
+
raw_actions = [{"type": "click", "x": 150, "y": 250}]
|
|
177
|
+
agent_with_adapter.mock_responses = [(raw_actions, False)]
|
|
178
|
+
|
|
179
|
+
actions, done = await agent_with_adapter.predict(observation, verbose=True)
|
|
180
|
+
|
|
181
|
+
# Verify all stages were called
|
|
182
|
+
# Stage 1: Preprocessing
|
|
183
|
+
mock_adapter.rescale.assert_called_once_with("original_screenshot")
|
|
184
|
+
|
|
185
|
+
# Stage 3: Postprocessing
|
|
186
|
+
mock_adapter.adapt_list.assert_called_once_with(raw_actions)
|
|
187
|
+
|
|
188
|
+
assert len(actions) == 1
|
|
189
|
+
assert isinstance(actions[0], ClickAction)
|
|
190
|
+
assert done is False
|
|
191
|
+
|
|
192
|
+
@pytest.mark.asyncio
|
|
193
|
+
async def test_predict_integration_without_screenshot(self, agent_with_adapter):
|
|
194
|
+
"""Test predict integration when observation has no screenshot."""
|
|
195
|
+
observation = Observation(text="test input", screenshot=None)
|
|
196
|
+
raw_actions = [{"type": "response", "text": "Task completed"}]
|
|
197
|
+
agent_with_adapter.mock_responses = [(raw_actions, True)]
|
|
198
|
+
|
|
199
|
+
actions, done = await agent_with_adapter.predict(observation, verbose=False)
|
|
200
|
+
|
|
201
|
+
assert len(actions) == 1
|
|
202
|
+
assert done is True
|
hud/env/docker_client.py
CHANGED
|
@@ -26,6 +26,8 @@ STATUS_MESSAGES = {
|
|
|
26
26
|
EnvironmentStatus.COMPLETED.value: "completed",
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
PACKAGE_NAME = "hud_controller"
|
|
30
|
+
|
|
29
31
|
|
|
30
32
|
class InvokeError(Exception):
|
|
31
33
|
"""
|
|
@@ -63,20 +65,12 @@ class DockerClient(Client):
|
|
|
63
65
|
_last_update_time: int = 0
|
|
64
66
|
_last_file_mtimes: dict[str, float] = {} # noqa: RUF012 - Not recognized as Pydantic model
|
|
65
67
|
_source_path: Path | None = None
|
|
66
|
-
_package_name: str | None = None
|
|
67
68
|
|
|
68
69
|
@property
|
|
69
70
|
def source_path(self) -> Path | None:
|
|
70
71
|
"""Get the source path."""
|
|
71
72
|
return self._source_path
|
|
72
73
|
|
|
73
|
-
@property
|
|
74
|
-
def package_name(self) -> str:
|
|
75
|
-
"""Get the package name."""
|
|
76
|
-
if not self._package_name:
|
|
77
|
-
raise ValueError("Package name not set")
|
|
78
|
-
return self._package_name
|
|
79
|
-
|
|
80
74
|
def set_source_path(self, source_path: Path) -> None:
|
|
81
75
|
"""
|
|
82
76
|
Set the source path for this environment controller.
|
|
@@ -102,21 +96,37 @@ class DockerClient(Client):
|
|
|
102
96
|
if not pyproject_path.exists():
|
|
103
97
|
raise FileNotFoundError(f"pyproject.toml not found in {source_path}")
|
|
104
98
|
|
|
99
|
+
# validate package name
|
|
105
100
|
pyproject_data = toml.load(pyproject_path)
|
|
106
|
-
|
|
107
|
-
if not
|
|
101
|
+
package_name = pyproject_data.get("project", {}).get("name")
|
|
102
|
+
if not package_name:
|
|
108
103
|
raise ValueError("Could not find package name in pyproject.toml")
|
|
104
|
+
if package_name != PACKAGE_NAME:
|
|
105
|
+
raise ValueError(f"Package name in pyproject.toml must be {PACKAGE_NAME}")
|
|
109
106
|
|
|
110
107
|
self._source_path = source_path
|
|
111
108
|
|
|
109
|
+
# set current mtimes
|
|
110
|
+
self._last_file_mtimes = self._get_all_file_mtimes()
|
|
111
|
+
|
|
112
|
+
@classmethod
|
|
113
|
+
@abc.abstractmethod
|
|
114
|
+
async def build_image(cls, build_context: Path) -> tuple[str, dict[str, Any]]:
|
|
115
|
+
"""
|
|
116
|
+
Build an image from a build context.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
tuple[str, dict[str, Any]]: The image tag and build output
|
|
120
|
+
"""
|
|
121
|
+
|
|
112
122
|
@classmethod
|
|
113
123
|
@abc.abstractmethod
|
|
114
|
-
async def create(cls,
|
|
124
|
+
async def create(cls, image: str) -> DockerClient:
|
|
115
125
|
"""
|
|
116
|
-
Creates an environment client from
|
|
126
|
+
Creates an environment client from an image.
|
|
117
127
|
|
|
118
128
|
Args:
|
|
119
|
-
|
|
129
|
+
image: The image to build the environment from
|
|
120
130
|
|
|
121
131
|
Returns:
|
|
122
132
|
EnvClient: An instance of the environment client
|
|
@@ -194,8 +204,8 @@ class DockerClient(Client):
|
|
|
194
204
|
|
|
195
205
|
# Create tar archive of the source code and send it to the container
|
|
196
206
|
tar_bytes = directory_to_tar_bytes(self._source_path)
|
|
197
|
-
await self.execute(["mkdir", "-p", "/
|
|
198
|
-
await self.put_archive("/
|
|
207
|
+
await self.execute(["mkdir", "-p", "/controller"], timeout=5)
|
|
208
|
+
await self.put_archive("/controller", tar_bytes)
|
|
199
209
|
|
|
200
210
|
# Check if pyproject.toml exists and parse it
|
|
201
211
|
pyproject_path = self._source_path / "pyproject.toml"
|
|
@@ -213,9 +223,9 @@ class DockerClient(Client):
|
|
|
213
223
|
self._package_name = pyproject_data.get("project", {}).get("name")
|
|
214
224
|
if not self._package_name:
|
|
215
225
|
raise ValueError("Could not find package name in pyproject.toml")
|
|
216
|
-
logger.info("Installing %s in /
|
|
226
|
+
logger.info("Installing %s in /controller", self._package_name)
|
|
217
227
|
result = await self.execute(
|
|
218
|
-
["bash", "-c", "cd /
|
|
228
|
+
["bash", "-c", "cd /controller && pip install -e . --break-system-packages"],
|
|
219
229
|
timeout=60,
|
|
220
230
|
)
|
|
221
231
|
if result["stdout"]:
|
|
@@ -262,7 +272,7 @@ class DockerClient(Client):
|
|
|
262
272
|
# generate a random uuid as a divider
|
|
263
273
|
divider = str(uuid.uuid4())
|
|
264
274
|
|
|
265
|
-
template = invoke_template(config,
|
|
275
|
+
template = invoke_template(config, PACKAGE_NAME, divider)
|
|
266
276
|
logger.debug("Invoking template: %s", template)
|
|
267
277
|
|
|
268
278
|
result = await self.execute(["python3", "-c", template])
|
hud/env/environment.py
CHANGED
|
@@ -74,14 +74,17 @@ class Environment(BaseModel):
|
|
|
74
74
|
config: The configuration to use for the setup
|
|
75
75
|
"""
|
|
76
76
|
if isinstance(self.client, RemoteClient):
|
|
77
|
+
await self.get_urls()
|
|
77
78
|
await self._invoke_all(create_remote_config(self, config, REMOTE_SETUP))
|
|
78
79
|
else:
|
|
79
80
|
if config is not None:
|
|
80
81
|
await self._invoke_all(config)
|
|
81
|
-
elif self.task and self.task.
|
|
82
|
-
await self._invoke_all(self.task.
|
|
82
|
+
elif self.task and self.task.setup is not None:
|
|
83
|
+
await self._invoke_all(self.task.setup)
|
|
83
84
|
else:
|
|
84
|
-
raise ValueError(
|
|
85
|
+
raise ValueError(
|
|
86
|
+
"No config, task or task setup function provided for local environment"
|
|
87
|
+
)
|
|
85
88
|
|
|
86
89
|
async def evaluate(self, config: FunctionConfigs | None = None) -> Any:
|
|
87
90
|
"""
|
|
@@ -98,8 +101,8 @@ class Environment(BaseModel):
|
|
|
98
101
|
else:
|
|
99
102
|
if config is not None:
|
|
100
103
|
results = await self._invoke_all(config)
|
|
101
|
-
elif self.task and self.task.
|
|
102
|
-
results = await self._invoke_all(self.task.
|
|
104
|
+
elif self.task and self.task.evaluate is not None:
|
|
105
|
+
results = await self._invoke_all(self.task.evaluate)
|
|
103
106
|
else:
|
|
104
107
|
raise ValueError("No config or task provided for local environment")
|
|
105
108
|
if len(results) == 1:
|
|
@@ -144,8 +147,7 @@ class Environment(BaseModel):
|
|
|
144
147
|
args = [[action.model_dump() for action in actions]]
|
|
145
148
|
|
|
146
149
|
# TODO: Move this into the server side
|
|
147
|
-
|
|
148
|
-
return Observation(text=self.final_response), 0, False, {}
|
|
150
|
+
self._maybe_store_response(actions)
|
|
149
151
|
|
|
150
152
|
result, stdout, stderr = await self.client.invoke(
|
|
151
153
|
FunctionConfig(function="step", args=args)
|
|
@@ -211,20 +213,20 @@ class Environment(BaseModel):
|
|
|
211
213
|
agent: The agent to run
|
|
212
214
|
"""
|
|
213
215
|
if verbose:
|
|
214
|
-
logger.info("
|
|
216
|
+
logger.info("Running agent in environment...")
|
|
215
217
|
obs, _ = await self.reset()
|
|
216
218
|
for i in range(max_steps):
|
|
217
|
-
action, done = await agent.predict(obs)
|
|
219
|
+
action, done = await agent.predict(obs, verbose=verbose)
|
|
218
220
|
if verbose:
|
|
219
|
-
logger.info("
|
|
221
|
+
logger.info("Step %d: Action: %s", i, action)
|
|
220
222
|
obs, reward, terminated, info = await self.step(action)
|
|
221
223
|
if verbose:
|
|
222
|
-
logger.info("
|
|
224
|
+
logger.info("Step %d: Observation: %s", i, obs)
|
|
223
225
|
if done or terminated:
|
|
224
226
|
break
|
|
225
227
|
result = await self.evaluate()
|
|
226
228
|
if verbose:
|
|
227
|
-
logger.info("
|
|
229
|
+
logger.info("Evaluation result: %s", result)
|
|
228
230
|
return result
|
|
229
231
|
|
|
230
232
|
|
|
@@ -348,7 +350,11 @@ def create_remote_config(
|
|
|
348
350
|
if not isinstance(expanded_configs[0].args, list):
|
|
349
351
|
expanded_configs[0].args = [expanded_configs[0].args]
|
|
350
352
|
expanded_configs[0].args.append(env.final_response) # for remote responses
|
|
351
|
-
return [
|
|
353
|
+
return [
|
|
354
|
+
FunctionConfig(
|
|
355
|
+
function=function, args=expanded_configs, metadata={"task": task.model_dump()}
|
|
356
|
+
)
|
|
357
|
+
]
|
|
352
358
|
|
|
353
359
|
# Case 3: Check for task.config
|
|
354
360
|
if hasattr(task, "config") and task.config:
|
|
@@ -363,17 +369,27 @@ def create_remote_config(
|
|
|
363
369
|
if not isinstance(final_args["args"], list):
|
|
364
370
|
final_args["args"] = [final_args["args"]]
|
|
365
371
|
final_args["args"].append(env.final_response)
|
|
366
|
-
return [
|
|
372
|
+
return [
|
|
373
|
+
FunctionConfig(
|
|
374
|
+
function=function, args=[final_args], metadata={"task": task.model_dump()}
|
|
375
|
+
)
|
|
376
|
+
]
|
|
367
377
|
|
|
368
378
|
# Case 4: Use task.id
|
|
369
379
|
if task.id:
|
|
370
380
|
args_list = [task.id]
|
|
371
381
|
if env and env.final_response:
|
|
372
382
|
args_list.append(env.final_response) # Append final response
|
|
373
|
-
return [
|
|
383
|
+
return [
|
|
384
|
+
FunctionConfig(
|
|
385
|
+
function=f"{REMOTE_FUNCTION_PREFIX}{function}",
|
|
386
|
+
args=args_list,
|
|
387
|
+
metadata={"task": task.model_dump()},
|
|
388
|
+
)
|
|
389
|
+
]
|
|
374
390
|
|
|
375
391
|
# Case 5: No valid configuration found
|
|
376
392
|
args_list = []
|
|
377
393
|
if env and env.final_response:
|
|
378
394
|
args_list.append(env.final_response)
|
|
379
|
-
return [FunctionConfig(function=function, args=args_list)]
|
|
395
|
+
return [FunctionConfig(function=function, args=args_list, metadata={"task": task.model_dump()})]
|
hud/env/local_docker_client.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import io
|
|
4
5
|
import logging
|
|
5
|
-
import
|
|
6
|
-
import
|
|
6
|
+
import textwrap
|
|
7
|
+
import time
|
|
7
8
|
import uuid
|
|
8
9
|
from typing import TYPE_CHECKING, Any
|
|
9
10
|
|
|
@@ -12,12 +13,15 @@ from aiohttp import ClientTimeout
|
|
|
12
13
|
|
|
13
14
|
from hud.env.docker_client import DockerClient, EnvironmentStatus
|
|
14
15
|
from hud.utils import ExecuteResult
|
|
16
|
+
from hud.utils.common import directory_to_tar_bytes
|
|
15
17
|
|
|
16
18
|
if TYPE_CHECKING:
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
17
21
|
from aiodocker.containers import DockerContainer
|
|
18
22
|
from aiodocker.stream import Stream
|
|
19
23
|
|
|
20
|
-
logger = logging.getLogger(
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
21
25
|
|
|
22
26
|
|
|
23
27
|
class LocalDockerClient(DockerClient):
|
|
@@ -26,17 +30,9 @@ class LocalDockerClient(DockerClient):
|
|
|
26
30
|
"""
|
|
27
31
|
|
|
28
32
|
@classmethod
|
|
29
|
-
async def
|
|
30
|
-
cls, dockerfile: str, ports: list[int] | None = None
|
|
31
|
-
) -> tuple[LocalDockerClient, dict[str, Any]]:
|
|
33
|
+
async def build_image(cls, build_context: Path) -> tuple[str, dict[str, Any]]:
|
|
32
34
|
"""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
Args:
|
|
36
|
-
dockerfile: The dockerfile content to build the Docker image
|
|
37
|
-
|
|
38
|
-
Returns:
|
|
39
|
-
DockerClient: An instance of the Docker environment client
|
|
35
|
+
Build an image from a build context.
|
|
40
36
|
"""
|
|
41
37
|
# Create a unique image tag
|
|
42
38
|
image_tag = f"hud-env-{uuid.uuid4().hex[:8]}"
|
|
@@ -44,32 +40,19 @@ class LocalDockerClient(DockerClient):
|
|
|
44
40
|
# Initialize Docker client
|
|
45
41
|
docker_client = aiodocker.Docker()
|
|
46
42
|
|
|
47
|
-
# Create
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
# Reset the file pointer to the beginning of the file
|
|
62
|
-
f.seek(0)
|
|
63
|
-
|
|
64
|
-
# Build the image
|
|
65
|
-
build_stream = await docker_client.images.build(
|
|
66
|
-
fileobj=f,
|
|
67
|
-
encoding="gzip",
|
|
68
|
-
tag=image_tag,
|
|
69
|
-
rm=True,
|
|
70
|
-
pull=True,
|
|
71
|
-
forcerm=True,
|
|
72
|
-
)
|
|
43
|
+
# Create a tar file from the path
|
|
44
|
+
tar_bytes = directory_to_tar_bytes(build_context)
|
|
45
|
+
logger.info("generated tar file with size: %d KB", len(tar_bytes) // 1024)
|
|
46
|
+
|
|
47
|
+
# Build the image
|
|
48
|
+
build_stream = await docker_client.images.build(
|
|
49
|
+
fileobj=io.BytesIO(tar_bytes),
|
|
50
|
+
encoding="gzip",
|
|
51
|
+
tag=image_tag,
|
|
52
|
+
rm=True,
|
|
53
|
+
pull=True,
|
|
54
|
+
forcerm=True,
|
|
55
|
+
)
|
|
73
56
|
|
|
74
57
|
# Print build output
|
|
75
58
|
output = ""
|
|
@@ -78,23 +61,63 @@ class LocalDockerClient(DockerClient):
|
|
|
78
61
|
logger.info(chunk["stream"])
|
|
79
62
|
output += chunk["stream"]
|
|
80
63
|
|
|
64
|
+
return image_tag, {"build_output": output}
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
async def create(
|
|
68
|
+
cls,
|
|
69
|
+
image: str,
|
|
70
|
+
) -> LocalDockerClient:
|
|
71
|
+
"""
|
|
72
|
+
Creates a Docker environment client from a image.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
image: The image to build the Docker image
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
DockerClient: An instance of the Docker environment client
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
# Initialize Docker client
|
|
82
|
+
docker_client = aiodocker.Docker()
|
|
83
|
+
|
|
81
84
|
# Create and start the container
|
|
82
85
|
container_config = {
|
|
83
|
-
"Image":
|
|
86
|
+
"Image": image,
|
|
84
87
|
"Tty": True,
|
|
85
88
|
"OpenStdin": True,
|
|
86
89
|
"Cmd": None,
|
|
87
90
|
"HostConfig": {
|
|
88
91
|
"PublishAllPorts": True,
|
|
89
92
|
},
|
|
90
|
-
"ExposedPorts": {f"{port}/tcp": {} for port in ports},
|
|
91
93
|
}
|
|
92
94
|
|
|
93
95
|
container = await docker_client.containers.create(config=container_config)
|
|
94
96
|
await container.start()
|
|
95
97
|
|
|
98
|
+
inspection = await container.show()
|
|
99
|
+
if health_check_config := inspection["Config"].get("Healthcheck"):
|
|
100
|
+
# Using the interval as spinup deadline is a bit implicit - could
|
|
101
|
+
# consider adding explicitly to API if there's demand
|
|
102
|
+
window_usecs = health_check_config.get("Interval", int(30 * 1e9))
|
|
103
|
+
window_secs = window_usecs // 1_000_000
|
|
104
|
+
|
|
105
|
+
deadline = time.monotonic() + window_secs
|
|
106
|
+
logger.debug("Waiting for container %s to become healthy", container.id)
|
|
107
|
+
while True:
|
|
108
|
+
state = (await container.show())["State"]
|
|
109
|
+
if state.get("Health", {}).get("Status") == "healthy":
|
|
110
|
+
break
|
|
111
|
+
if state.get("Status") in {"exited", "dead"}:
|
|
112
|
+
raise RuntimeError("Container crashed before becoming healthy")
|
|
113
|
+
now = time.monotonic()
|
|
114
|
+
if now > deadline:
|
|
115
|
+
raise TimeoutError(f"{container.id} not healthy after {window_secs}s")
|
|
116
|
+
await asyncio.sleep(1)
|
|
117
|
+
logger.debug("Container %s is healthy", container.id)
|
|
118
|
+
|
|
96
119
|
# Return the controller instance
|
|
97
|
-
return cls(docker_client, container.id)
|
|
120
|
+
return cls(docker_client, container.id)
|
|
98
121
|
|
|
99
122
|
def __init__(self, docker_conn: aiodocker.Docker, container_id: str) -> None:
|
|
100
123
|
"""
|
|
@@ -190,6 +213,24 @@ class LocalDockerClient(DockerClient):
|
|
|
190
213
|
elif message.stream == 2: # stderr
|
|
191
214
|
stderr_data.extend(message.data)
|
|
192
215
|
|
|
216
|
+
if "No module named 'hud_controller'" in stderr_data.decode():
|
|
217
|
+
if self._source_path is None:
|
|
218
|
+
message = textwrap.dedent("""\
|
|
219
|
+
Your environment is not set up correctly.
|
|
220
|
+
You are using a prebuilt image, so please ensure the following:
|
|
221
|
+
1. Your image cannot be a generic python image, it must contain a python package
|
|
222
|
+
called hud_controller.
|
|
223
|
+
""")
|
|
224
|
+
else:
|
|
225
|
+
message = textwrap.dedent("""\
|
|
226
|
+
Your environment is not set up correctly.
|
|
227
|
+
You are using a local controller, so please ensure the following:
|
|
228
|
+
1. Your package name is hud_controller
|
|
229
|
+
2. You installed the package in the Dockerfile.
|
|
230
|
+
3. The package is visible from the global python environment (no venv, conda, or uv)
|
|
231
|
+
""")
|
|
232
|
+
logger.error(message)
|
|
233
|
+
|
|
193
234
|
return ExecuteResult(
|
|
194
235
|
stdout=bytes(stdout_data),
|
|
195
236
|
stderr=bytes(stderr_data),
|
hud/env/remote_client.py
CHANGED
|
@@ -83,9 +83,7 @@ class RemoteClient(Client):
|
|
|
83
83
|
build_data = response.get("metadata", {})
|
|
84
84
|
|
|
85
85
|
if response.get("readme"):
|
|
86
|
-
logger.info(
|
|
87
|
-
"[HUD] %s gym created, see how to use it at %s", gym_id, response.get("readme")
|
|
88
|
-
)
|
|
86
|
+
logger.info("Gym created, see how to use it at %s", response.get("readme"))
|
|
89
87
|
|
|
90
88
|
return controller, build_data
|
|
91
89
|
|