hud-python 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (51) hide show
  1. hud/__init__.py +22 -2
  2. hud/adapters/claude/adapter.py +9 -2
  3. hud/adapters/claude/tests/__init__.py +1 -0
  4. hud/adapters/claude/tests/test_adapter.py +519 -0
  5. hud/adapters/common/types.py +5 -1
  6. hud/adapters/operator/adapter.py +4 -0
  7. hud/adapters/operator/tests/__init__.py +1 -0
  8. hud/adapters/operator/tests/test_adapter.py +370 -0
  9. hud/agent/__init__.py +4 -0
  10. hud/agent/base.py +18 -2
  11. hud/agent/claude.py +20 -17
  12. hud/agent/claude_plays_pokemon.py +283 -0
  13. hud/agent/langchain.py +12 -7
  14. hud/agent/misc/__init__.py +3 -0
  15. hud/agent/misc/response_agent.py +80 -0
  16. hud/agent/operator.py +27 -19
  17. hud/agent/tests/__init__.py +1 -0
  18. hud/agent/tests/test_base.py +202 -0
  19. hud/env/docker_client.py +28 -18
  20. hud/env/environment.py +32 -16
  21. hud/env/local_docker_client.py +83 -42
  22. hud/env/remote_client.py +1 -3
  23. hud/env/remote_docker_client.py +71 -14
  24. hud/exceptions.py +12 -0
  25. hud/gym.py +71 -53
  26. hud/job.py +59 -14
  27. hud/server/requests.py +26 -4
  28. hud/settings.py +7 -1
  29. hud/task.py +45 -33
  30. hud/taskset.py +56 -4
  31. hud/telemetry/__init__.py +21 -0
  32. hud/telemetry/_trace.py +173 -0
  33. hud/telemetry/context.py +169 -0
  34. hud/telemetry/exporter.py +417 -0
  35. hud/telemetry/instrumentation/__init__.py +3 -0
  36. hud/telemetry/instrumentation/mcp.py +495 -0
  37. hud/telemetry/instrumentation/registry.py +59 -0
  38. hud/telemetry/mcp_models.py +331 -0
  39. hud/telemetry/tests/__init__.py +1 -0
  40. hud/telemetry/tests/test_context.py +207 -0
  41. hud/telemetry/tests/test_trace.py +270 -0
  42. hud/types.py +11 -27
  43. hud/utils/common.py +22 -2
  44. hud/utils/misc.py +53 -0
  45. hud/utils/tests/test_version.py +1 -1
  46. hud/version.py +7 -0
  47. {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/METADATA +98 -30
  48. hud_python-0.2.6.dist-info/RECORD +84 -0
  49. hud_python-0.2.4.dist-info/RECORD +0 -62
  50. {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/WHEEL +0 -0
  51. {hud_python-0.2.4.dist-info → hud_python-0.2.6.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,270 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import uuid
5
+ from unittest.mock import MagicMock
6
+
7
+ import pytest
8
+
9
+ from hud.telemetry._trace import (
10
+ init_telemetry,
11
+ register_trace,
12
+ trace,
13
+ )
14
+ from hud.telemetry.context import get_current_task_run_id as actual_get_current_task_run_id
15
+ from hud.telemetry.context import is_root_trace as actual_is_root_trace
16
+ from hud.telemetry.context import reset_context
17
+ from hud.telemetry.context import set_current_task_run_id as actual_set_current_task_run_id
18
+
19
+
20
+ @pytest.fixture(autouse=True)
21
+ def reset_telemetry_context_fixture():
22
+ """Ensures telemetry context is reset before and after each test in this file."""
23
+ reset_context()
24
+ yield
25
+ reset_context()
26
+
27
+
28
+ class TestInitTelemetry:
29
+ """Test telemetry initialization."""
30
+
31
+ def test_init_telemetry(self, mocker):
32
+ """Test telemetry initialization calls registry.install_all."""
33
+ mock_registry = mocker.patch("hud.telemetry._trace.registry", autospec=True)
34
+ init_telemetry()
35
+ mock_registry.install_all.assert_called_once()
36
+
37
+
38
+ class TestTrace:
39
+ """Test the trace context manager."""
40
+
41
+ def test_trace_basic(self, mocker):
42
+ """Test basic trace functionality and context setting."""
43
+ mock_flush = mocker.patch(
44
+ "hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
45
+ )
46
+ mock_submit_loop = mocker.patch(
47
+ "hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
48
+ )
49
+
50
+ initial_root_state = actual_is_root_trace.get()
51
+
52
+ with trace() as task_run_id:
53
+ assert isinstance(task_run_id, str)
54
+ uuid.UUID(task_run_id)
55
+ assert actual_get_current_task_run_id() == task_run_id
56
+ assert actual_is_root_trace.get() is True
57
+
58
+ assert actual_get_current_task_run_id() is None
59
+ assert actual_is_root_trace.get() == initial_root_state
60
+ mock_flush.assert_called_once()
61
+ mock_submit_loop.assert_not_called()
62
+
63
+ def test_trace_with_name_and_attributes(self, mocker):
64
+ """Test trace with name and attributes, checking they are passed on."""
65
+ mock_mcp_calls = [MagicMock()]
66
+ mock_flush = mocker.patch(
67
+ "hud.telemetry._trace.flush_buffer", return_value=mock_mcp_calls, autospec=True
68
+ )
69
+ mock_submit_loop = mocker.patch(
70
+ "hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
71
+ )
72
+
73
+ trace_name = "test_trace_with_data"
74
+ attrs = {"key": "value", "number": 42}
75
+
76
+ with trace(name=trace_name, attributes=attrs) as task_run_id:
77
+ assert isinstance(task_run_id, str)
78
+
79
+ mock_flush.assert_called_once()
80
+ mock_submit_loop.assert_called_once()
81
+
82
+ @pytest.mark.asyncio
83
+ async def test_trace_with_mcp_calls_exports(self, mocker):
84
+ """Test trace with MCP calls exports telemetry with correct data."""
85
+ mock_mcp_calls = [MagicMock(), MagicMock()]
86
+ mock_flush = mocker.patch(
87
+ "hud.telemetry._trace.flush_buffer", return_value=mock_mcp_calls, autospec=True
88
+ )
89
+ mock_submit_loop = mocker.patch(
90
+ "hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
91
+ )
92
+
93
+ async def mock_export(*args, **kwargs):
94
+ return None
95
+
96
+ mock_export_actual_coro = mocker.patch(
97
+ "hud.telemetry._trace.exporter.export_telemetry",
98
+ side_effect=mock_export,
99
+ )
100
+
101
+ test_attrs = {"custom_attr": "test_val"}
102
+ test_name = "mcp_export_test"
103
+
104
+ with trace(name=test_name, attributes=test_attrs) as task_run_id:
105
+ pass
106
+
107
+ mock_flush.assert_called_once()
108
+ mock_submit_loop.assert_called_once()
109
+
110
+ mock_export_actual_coro.assert_called_once()
111
+ args, kwargs = mock_export_actual_coro.call_args
112
+ assert kwargs["task_run_id"] == task_run_id
113
+ assert kwargs["mcp_calls"] == mock_mcp_calls
114
+ assert kwargs["trace_attributes"]["trace_name"] == test_name
115
+ assert kwargs["trace_attributes"]["custom_attr"] == "test_val"
116
+ assert "start_time" in kwargs["trace_attributes"]
117
+ assert "end_time" in kwargs["trace_attributes"]
118
+ assert "duration" in kwargs["trace_attributes"]
119
+ assert kwargs["trace_attributes"]["is_root"] is True
120
+
121
+ def test_trace_nested(self, mocker):
122
+ """Test nested traces, verifying context restoration and root trace logic."""
123
+ actual_set_current_task_run_id(None)
124
+ actual_is_root_trace.set(False)
125
+
126
+ mock_flush_internal = mocker.patch(
127
+ "hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
128
+ )
129
+ mock_submit_loop_internal = mocker.patch(
130
+ "hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
131
+ )
132
+
133
+ assert actual_get_current_task_run_id() is None
134
+ assert actual_is_root_trace.get() is False
135
+
136
+ with trace(name="outer") as outer_id:
137
+ assert actual_get_current_task_run_id() == outer_id
138
+ assert actual_is_root_trace.get() is True
139
+ with trace(name="inner") as inner_id:
140
+ assert actual_get_current_task_run_id() == inner_id
141
+ assert actual_is_root_trace.get() is False
142
+ assert outer_id != inner_id
143
+ assert actual_get_current_task_run_id() == outer_id
144
+ assert actual_is_root_trace.get() is True
145
+
146
+ assert actual_get_current_task_run_id() is None
147
+ assert actual_is_root_trace.get() is False
148
+ assert mock_flush_internal.call_count == 2
149
+ mock_submit_loop_internal.assert_not_called()
150
+
151
+ def test_trace_exception_handling(self, mocker):
152
+ """Test trace handles exceptions properly and restores context."""
153
+ initial_task_id_before_trace = "pre_existing_id_123"
154
+ initial_root_state_before_trace = True
155
+ actual_set_current_task_run_id(initial_task_id_before_trace)
156
+ actual_is_root_trace.set(initial_root_state_before_trace)
157
+
158
+ mock_flush = mocker.patch(
159
+ "hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
160
+ )
161
+ mock_submit_loop = mocker.patch(
162
+ "hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
163
+ )
164
+
165
+ with pytest.raises(ValueError, match="Test exception"), trace(name="trace_with_exception"):
166
+ assert actual_get_current_task_run_id() != initial_task_id_before_trace
167
+ assert actual_is_root_trace.get() is False
168
+ raise ValueError("Test exception")
169
+
170
+ mock_flush.assert_called_once()
171
+ assert actual_get_current_task_run_id() == initial_task_id_before_trace
172
+ assert actual_is_root_trace.get() == initial_root_state_before_trace
173
+ mock_submit_loop.assert_not_called()
174
+
175
+
176
+ class TestRegisterTrace:
177
+ """Test the register_trace decorator."""
178
+
179
+ def test_register_trace_sync_function(self, mocker):
180
+ mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
181
+ mock_trace_context_manager.return_value.__enter__.return_value = "mocked_task_id"
182
+ mock_trace_context_manager.return_value.__exit__.return_value = None
183
+
184
+ @register_trace(name="test_func_sync")
185
+ def sync_function(x, y):
186
+ return x + y
187
+
188
+ result = sync_function(1, 2)
189
+ assert result == 3
190
+ mock_trace_context_manager.assert_called_once_with(name="test_func_sync", attributes=None)
191
+
192
+ def test_register_trace_async_function(self, mocker):
193
+ mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
194
+ mock_trace_context_manager.return_value.__enter__.return_value = "mocked_task_id"
195
+ mock_trace_context_manager.return_value.__exit__.return_value = None
196
+
197
+ @register_trace(name="test_func_async")
198
+ async def async_function(x, y):
199
+ return x + y
200
+
201
+ async def run_test():
202
+ result = await async_function(1, 2)
203
+ assert result == 3
204
+ mock_trace_context_manager.assert_called_once_with(
205
+ name="test_func_async", attributes=None
206
+ )
207
+
208
+ asyncio.run(run_test())
209
+
210
+ def test_register_trace_with_attributes(self, mocker):
211
+ """Test register_trace with attributes."""
212
+ mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
213
+
214
+ class _MockTraceContextManager:
215
+ def __enter__(self):
216
+ return "task_id"
217
+
218
+ def __exit__(self, exc_type, exc_value, traceback):
219
+ return None
220
+
221
+ mock_trace_context_manager.return_value = _MockTraceContextManager()
222
+
223
+ attrs = {"operation": "add"}
224
+
225
+ @register_trace(name="test_func", attributes=attrs)
226
+ def func_with_attrs(x):
227
+ return x * 2
228
+
229
+ result = func_with_attrs(5)
230
+ assert result == 10
231
+ mock_trace_context_manager.assert_called_once_with(name="test_func", attributes=attrs)
232
+
233
+ def test_register_trace_without_name(self, mocker):
234
+ """Test register_trace uses function name when name not provided."""
235
+ mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
236
+ mock_trace_context_manager.return_value.__enter__.return_value = "task_id"
237
+ mock_trace_context_manager.return_value.__exit__.return_value = None
238
+
239
+ @register_trace()
240
+ def my_function():
241
+ return "result"
242
+
243
+ result = my_function()
244
+ assert result == "result"
245
+ mock_trace_context_manager.assert_called_once_with(name="my_function", attributes=None)
246
+
247
+ def test_register_trace_preserves_function_metadata(self):
248
+ """Test register_trace preserves original function metadata."""
249
+
250
+ @register_trace(name="test")
251
+ def original_function():
252
+ """Original docstring."""
253
+
254
+ assert original_function.__name__ == "original_function"
255
+ assert original_function.__doc__ == "Original docstring."
256
+
257
+ def test_register_trace_exception_propagation(self, mocker):
258
+ """Test register_trace propagates exceptions."""
259
+ mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
260
+ mock_trace_context_manager.return_value.__enter__.return_value = "task_id"
261
+ mock_trace_context_manager.return_value.__exit__.return_value = None
262
+
263
+ @register_trace()
264
+ def failing_function():
265
+ raise RuntimeError("Test error")
266
+
267
+ with pytest.raises(RuntimeError, match="Test error"):
268
+ failing_function()
269
+
270
+ mock_trace_context_manager.assert_called_once()
hud/types.py CHANGED
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import enum
4
4
  from pathlib import Path
5
- from typing import Any, Literal, TypeAlias
5
+ from typing import Literal, TypeAlias
6
6
 
7
7
  from pydantic import BaseModel
8
8
 
@@ -12,38 +12,22 @@ class CustomGym(BaseModel):
12
12
  Public environment specification with a dockerfile and controller.
13
13
 
14
14
  If the location is remote, the env will be created on the server.
15
- If the location is dev, the env will be created locally via docker.
15
+ If the location is local, the env will be created locally via docker.
16
16
 
17
17
  The dockerfile can be specified directly or automatically found in the controller_source_dir.
18
18
  If neither is provided, an error will be raised during validation.
19
19
  """
20
20
 
21
21
  type: Literal["public"] = "public"
22
- dockerfile: str | None = None
23
22
  location: Literal["local", "remote"]
24
- ports: list[int] | None = None
25
- # If path, then it is a development environment on the local computer
26
- # If none, then the controller must be installed in the environment through the dockerfile
27
- # Can be provided as a string or Path object
28
- controller_source_dir: str | Path | None = None
29
-
30
- def model_post_init(self, __context: Any, /) -> None:
31
- """Validate and set up dockerfile if not explicitly provided."""
32
- # Convert string path to Path object if needed
33
- if isinstance(self.controller_source_dir, str):
34
- self.controller_source_dir = Path(self.controller_source_dir)
35
-
36
- if self.dockerfile is None:
37
- if self.controller_source_dir is None:
38
- raise ValueError("Either dockerfile or controller_source_dir must be provided")
39
-
40
- # Look for Dockerfile in the controller_source_dir
41
- dockerfile_path = self.controller_source_dir / "Dockerfile"
42
- if not dockerfile_path.exists():
43
- raise ValueError(f"Dockerfile not found in {self.controller_source_dir}")
44
-
45
- # Read the Dockerfile content
46
- self.dockerfile = dockerfile_path.read_text()
23
+ # A. If path, then it is a docker build context on the local computer.
24
+ # If the location is local, docker build will be used to create the image.
25
+ # If the location is remote, we will build the image remotely.
26
+ # The controller will be automatically installed and kept in sync with local changes
27
+ # as long as a pyproject.toml is present at the root of the folder.
28
+ # B. If string, then it is the uri of the docker image to use.
29
+ # The controller must already be installed in the image.
30
+ image_or_build_context: str | Path
47
31
 
48
32
 
49
33
  class EnvironmentStatus(str, enum.Enum):
@@ -64,7 +48,7 @@ class EnvironmentStatus(str, enum.Enum):
64
48
 
65
49
 
66
50
  # Available HUD gyms
67
- ServerGym: TypeAlias = Literal["qa", "hud-browser", "hud-ubuntu", "OSWorld-Ubuntu"]
51
+ ServerGym: TypeAlias = Literal["qa", "hud-browser", "OSWorld-Ubuntu"]
68
52
 
69
53
  # Gyms can be either custom or server-side
70
54
  Gym: TypeAlias = CustomGym | ServerGym
hud/utils/common.py CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import io
4
4
  import logging
5
5
  import tarfile
6
+ import zipfile
6
7
  from typing import TYPE_CHECKING, Any, TypedDict
7
8
 
8
9
  from pydantic import BaseModel
@@ -22,6 +23,7 @@ class FunctionConfig(BaseModel):
22
23
  args: list[Any] # Must be json serializable
23
24
 
24
25
  id: str | None = None # Optional id for remote execution
26
+ metadata: dict[str, Any] | None = None # Optional metadata for telemetry
25
27
 
26
28
  def __len__(self) -> int:
27
29
  return len(self.args)
@@ -33,11 +35,12 @@ class FunctionConfig(BaseModel):
33
35
  return iter(self.args)
34
36
 
35
37
  def __str__(self) -> str:
36
- return f"{self.function}: {', '.join(str(arg) for arg in self.args)}"
38
+ return f"FC: {self.function}: {', '.join(str(arg) for arg in self.args)} ({self.metadata})"
37
39
 
38
40
 
39
41
  # Type alias for the shorthand config, which just converts to function name and args
40
- ShorthandConfig = tuple[str | dict[str, Any] | list[str] | list[dict[str, Any]], ...]
42
+ BasicType = str | int | float | bool | None
43
+ ShorthandConfig = tuple[BasicType | dict[str, Any] | list[BasicType] | list[dict[str, Any]], ...]
41
44
 
42
45
  # Type alias for multiple config formats
43
46
  FunctionConfigs = (
@@ -62,6 +65,11 @@ class Observation(BaseModel):
62
65
  screenshot: str | None = None # base64 string png
63
66
  text: str | None = None
64
67
 
68
+ def __str__(self) -> str:
69
+ return f"""Observation(screenshot={
70
+ self.screenshot[:100] if self.screenshot else "None"
71
+ }..., text={self.text}...)"""
72
+
65
73
 
66
74
  class ExecuteResult(TypedDict):
67
75
  """
@@ -107,6 +115,18 @@ def directory_to_tar_bytes(directory_path: Path) -> bytes:
107
115
  return output.getvalue()
108
116
 
109
117
 
118
+ def directory_to_zip_bytes(context_dir: Path) -> bytes:
119
+ """Zip a directory and return the zip archive as bytes."""
120
+ output = io.BytesIO()
121
+ with zipfile.ZipFile(output, "w", zipfile.ZIP_DEFLATED) as zipf:
122
+ for file_path in context_dir.rglob("*"):
123
+ if file_path.is_file():
124
+ rel_path = file_path.relative_to(context_dir)
125
+ logger.debug("Adding %s to zip archive", rel_path)
126
+ zipf.write(str(file_path), arcname=str(rel_path))
127
+ return output.getvalue()
128
+
129
+
110
130
  async def get_gym_id(gym_name_or_id: str) -> str:
111
131
  """
112
132
  Get the gym ID for a given gym name or ID.
hud/utils/misc.py ADDED
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ from hud.server import make_request
7
+ from hud.settings import settings
8
+
9
+ if TYPE_CHECKING:
10
+ from hud.env.environment import Environment # Import Environment for type hinting
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ async def upload_env_telemetry(
16
+ environment: Environment,
17
+ results: Any,
18
+ api_key: str | None = None,
19
+ ) -> None:
20
+ """
21
+ Sends telemetry data (results from a cloud runner) to the HUD telemetry upload endpoint.
22
+ """
23
+ environment_id = environment.client.env_id # type: ignore
24
+
25
+ if not api_key:
26
+ api_key = settings.api_key
27
+
28
+ if not api_key:
29
+ raise ValueError("API key must be provided either as an argument or set in hud.settings.")
30
+
31
+ endpoint_url = f"{settings.base_url}/v2/environments/{environment_id}/telemetry-upload"
32
+
33
+ request_payload = {
34
+ "results": {
35
+ "steps": results,
36
+ }
37
+ }
38
+
39
+ logger.debug("Sending telemetry to %s for env_id: %s", endpoint_url, environment_id)
40
+
41
+ try:
42
+ await make_request(
43
+ method="POST",
44
+ url=endpoint_url,
45
+ json=request_payload,
46
+ api_key=api_key,
47
+ )
48
+ logger.info("Successfully uploaded telemetry for environment_id: %s", environment_id)
49
+ except Exception as e:
50
+ logger.error(
51
+ "Failed to upload telemetry for environment_id: %s. Error: %s", environment_id, e
52
+ )
53
+ raise
@@ -5,4 +5,4 @@ def test_import():
5
5
  """Test that the package can be imported."""
6
6
  import hud
7
7
 
8
- assert hud.__version__ == "0.2.4"
8
+ assert hud.__version__ == "0.2.6"
hud/version.py ADDED
@@ -0,0 +1,7 @@
1
+ """
2
+ Version information for the HUD SDK.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ __version__ = "0.2.6"