hud-python 0.2.9__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +14 -5
- hud/env/docker_client.py +1 -1
- hud/env/environment.py +13 -8
- hud/env/local_docker_client.py +1 -1
- hud/env/remote_client.py +1 -1
- hud/env/remote_docker_client.py +2 -2
- hud/exceptions.py +2 -1
- hud/mcp_agent/__init__.py +15 -0
- hud/mcp_agent/base.py +723 -0
- hud/mcp_agent/claude.py +316 -0
- hud/mcp_agent/langchain.py +231 -0
- hud/mcp_agent/openai.py +318 -0
- hud/mcp_agent/tests/__init__.py +1 -0
- hud/mcp_agent/tests/test_base.py +437 -0
- hud/settings.py +14 -2
- hud/task.py +4 -0
- hud/telemetry/__init__.py +11 -7
- hud/telemetry/_trace.py +82 -71
- hud/telemetry/context.py +9 -27
- hud/telemetry/exporter.py +6 -5
- hud/telemetry/instrumentation/mcp.py +174 -410
- hud/telemetry/mcp_models.py +13 -74
- hud/telemetry/tests/test_context.py +9 -6
- hud/telemetry/tests/test_trace.py +92 -61
- hud/tools/__init__.py +21 -0
- hud/tools/base.py +65 -0
- hud/tools/bash.py +137 -0
- hud/tools/computer/__init__.py +13 -0
- hud/tools/computer/anthropic.py +411 -0
- hud/tools/computer/hud.py +315 -0
- hud/tools/computer/openai.py +283 -0
- hud/tools/edit.py +290 -0
- hud/tools/executors/__init__.py +13 -0
- hud/tools/executors/base.py +331 -0
- hud/tools/executors/pyautogui.py +585 -0
- hud/tools/executors/tests/__init__.py +1 -0
- hud/tools/executors/tests/test_base_executor.py +338 -0
- hud/tools/executors/tests/test_pyautogui_executor.py +162 -0
- hud/tools/executors/xdo.py +503 -0
- hud/tools/helper/README.md +56 -0
- hud/tools/helper/__init__.py +9 -0
- hud/tools/helper/mcp_server.py +78 -0
- hud/tools/helper/server_initialization.py +115 -0
- hud/tools/helper/utils.py +58 -0
- hud/tools/playwright_tool.py +373 -0
- hud/tools/tests/__init__.py +3 -0
- hud/tools/tests/test_bash.py +152 -0
- hud/tools/tests/test_computer.py +52 -0
- hud/tools/tests/test_computer_actions.py +34 -0
- hud/tools/tests/test_edit.py +233 -0
- hud/tools/tests/test_init.py +27 -0
- hud/tools/tests/test_playwright_tool.py +183 -0
- hud/tools/tests/test_tools.py +154 -0
- hud/tools/tests/test_utils.py +156 -0
- hud/tools/utils.py +50 -0
- hud/types.py +10 -1
- hud/utils/tests/test_init.py +21 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.2.9.dist-info → hud_python-0.3.0.dist-info}/METADATA +9 -6
- hud_python-0.3.0.dist-info/RECORD +124 -0
- hud_python-0.2.9.dist-info/RECORD +0 -85
- {hud_python-0.2.9.dist-info → hud_python-0.3.0.dist-info}/WHEEL +0 -0
- {hud_python-0.2.9.dist-info → hud_python-0.3.0.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/mcp_models.py
CHANGED
|
@@ -28,16 +28,14 @@ class StatusType(str, Enum):
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class MCPCallType(str, Enum):
|
|
31
|
-
"""
|
|
31
|
+
"""Enum for different types of MCP calls in telemetry."""
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
HANDLE_INCOMING = "mcp.handle_incoming"
|
|
40
|
-
MANUAL_TEST = "manual.test"
|
|
33
|
+
# Requests and Notifications
|
|
34
|
+
SEND_REQUEST = "mcp.send_request"
|
|
35
|
+
SEND_NOTIFICATION = "mcp.send_notification"
|
|
36
|
+
|
|
37
|
+
# Responses
|
|
38
|
+
RECEIVE_RESPONSE = "mcp.receive_response"
|
|
41
39
|
|
|
42
40
|
|
|
43
41
|
class BaseMCPCall(BaseModel):
|
|
@@ -87,6 +85,8 @@ class MCPRequestCall(BaseMCPCall):
|
|
|
87
85
|
duration: float | None = None
|
|
88
86
|
request_id: str | int | None = None
|
|
89
87
|
request_data: dict[str, Any] | None = None
|
|
88
|
+
error: str | None = None
|
|
89
|
+
error_type: str | None = None
|
|
90
90
|
|
|
91
91
|
@classmethod
|
|
92
92
|
def from_jsonrpc_request(
|
|
@@ -191,6 +191,8 @@ class MCPNotificationCall(BaseMCPCall):
|
|
|
191
191
|
end_time: float | None = None
|
|
192
192
|
duration: float | None = None
|
|
193
193
|
notification_data: dict[str, Any] | None = None
|
|
194
|
+
error: str | None = None
|
|
195
|
+
error_type: str | None = None
|
|
194
196
|
|
|
195
197
|
@classmethod
|
|
196
198
|
def from_jsonrpc_notification(
|
|
@@ -230,69 +232,6 @@ class MCPNotificationCall(BaseMCPCall):
|
|
|
230
232
|
return None
|
|
231
233
|
|
|
232
234
|
|
|
233
|
-
class MCPStreamEvent(BaseMCPCall):
|
|
234
|
-
"""Record for an MCP stream event (read or write)"""
|
|
235
|
-
|
|
236
|
-
stream_event: bool = True
|
|
237
|
-
event_type: str = Field(..., description="Type of stream event: read or write")
|
|
238
|
-
item_type: str | None = None
|
|
239
|
-
is_response_or_error: bool = False
|
|
240
|
-
message_data: dict[str, Any] | None = None
|
|
241
|
-
|
|
242
|
-
@classmethod
|
|
243
|
-
def from_session_message(
|
|
244
|
-
cls, message: SessionMessage, task_run_id: str, event_type: str, **kwargs: Any
|
|
245
|
-
) -> MCPStreamEvent:
|
|
246
|
-
"""Create telemetry record for a stream event"""
|
|
247
|
-
method_name = "unknown_stream_operation"
|
|
248
|
-
is_response = False
|
|
249
|
-
item_type = "unknown"
|
|
250
|
-
message_data = None
|
|
251
|
-
|
|
252
|
-
if hasattr(message, "message") and hasattr(message.message, "root"):
|
|
253
|
-
msg_root = message.message.root
|
|
254
|
-
item_type = type(msg_root).__name__
|
|
255
|
-
message_data = msg_root.model_dump(exclude_none=True)
|
|
256
|
-
|
|
257
|
-
# Check type first before accessing attributes
|
|
258
|
-
if isinstance(msg_root, JSONRPCRequest | JSONRPCNotification) and hasattr(
|
|
259
|
-
msg_root, "method"
|
|
260
|
-
):
|
|
261
|
-
method_name = msg_root.method
|
|
262
|
-
elif isinstance(msg_root, JSONRPCResponse | JSONRPCError) and hasattr(msg_root, "id"):
|
|
263
|
-
method_name = f"response_to_id_{msg_root.id}"
|
|
264
|
-
is_response = True
|
|
265
|
-
|
|
266
|
-
return cls(
|
|
267
|
-
task_run_id=task_run_id,
|
|
268
|
-
status=StatusType.COMPLETED,
|
|
269
|
-
method=method_name,
|
|
270
|
-
event_type=event_type,
|
|
271
|
-
item_type=item_type,
|
|
272
|
-
is_response_or_error=is_response,
|
|
273
|
-
message_data=message_data,
|
|
274
|
-
timestamp=datetime.now().timestamp(),
|
|
275
|
-
**kwargs,
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
class MCPManualTestCall(BaseMCPCall):
|
|
280
|
-
"""Record for a manual test record"""
|
|
281
|
-
|
|
282
|
-
call_type: str = MCPCallType.MANUAL_TEST
|
|
283
|
-
custom_data: dict[str, Any] = Field(default_factory=dict)
|
|
284
|
-
|
|
285
|
-
@classmethod
|
|
286
|
-
def create(cls, task_run_id: str, **custom_data: Any) -> MCPManualTestCall:
|
|
287
|
-
"""Create a manual test record with custom data"""
|
|
288
|
-
return cls(
|
|
289
|
-
task_run_id=task_run_id,
|
|
290
|
-
status=StatusType.COMPLETED,
|
|
291
|
-
custom_data=custom_data,
|
|
292
|
-
timestamp=datetime.now().timestamp(),
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
|
|
296
235
|
class MCPTelemetryRecord(BaseModel):
|
|
297
236
|
"""Container for a set of related MCP telemetry records"""
|
|
298
237
|
|
|
@@ -320,9 +259,9 @@ class MCPTelemetryRecord(BaseModel):
|
|
|
320
259
|
|
|
321
260
|
|
|
322
261
|
class TrajectoryStep(BaseModel):
|
|
323
|
-
"""Model
|
|
262
|
+
"""Model for telemetry export format."""
|
|
324
263
|
|
|
325
|
-
type: str = Field(default="mcp-step")
|
|
264
|
+
type: str = Field(default="mcp-step")
|
|
326
265
|
observation_url: str | None = None
|
|
327
266
|
observation_text: str | None = None
|
|
328
267
|
actions: list[dict[str, Any]] = Field(default_factory=list)
|
|
@@ -64,21 +64,24 @@ class TestRootTraceContext:
|
|
|
64
64
|
class TestMCPCallBuffer:
|
|
65
65
|
"""Test MCP call buffer management."""
|
|
66
66
|
|
|
67
|
-
def
|
|
67
|
+
def reset_context(self):
|
|
68
68
|
"""Clear buffer before each test."""
|
|
69
69
|
# Flush any existing calls and reset context
|
|
70
|
+
set_current_task_run_id(None)
|
|
71
|
+
# Clear any existing buffers by setting a temporary task ID and flushing
|
|
72
|
+
set_current_task_run_id("temp-cleanup")
|
|
70
73
|
flush_buffer()
|
|
71
74
|
set_current_task_run_id(None)
|
|
72
75
|
|
|
73
76
|
def test_flush_buffer_empty(self):
|
|
74
77
|
"""Test flushing empty buffer."""
|
|
75
|
-
self.
|
|
78
|
+
self.reset_context()
|
|
76
79
|
result = flush_buffer()
|
|
77
80
|
assert result == []
|
|
78
81
|
|
|
79
82
|
def test_add_and_flush_mcp_call(self):
|
|
80
83
|
"""Test adding and flushing MCP calls."""
|
|
81
|
-
self.
|
|
84
|
+
self.reset_context()
|
|
82
85
|
|
|
83
86
|
# Set active task run ID
|
|
84
87
|
set_current_task_run_id("test-task")
|
|
@@ -101,7 +104,7 @@ class TestMCPCallBuffer:
|
|
|
101
104
|
|
|
102
105
|
def test_add_multiple_mcp_calls(self):
|
|
103
106
|
"""Test adding multiple MCP calls."""
|
|
104
|
-
self.
|
|
107
|
+
self.reset_context()
|
|
105
108
|
|
|
106
109
|
# Set active task run ID
|
|
107
110
|
set_current_task_run_id("test-task")
|
|
@@ -122,7 +125,7 @@ class TestMCPCallBuffer:
|
|
|
122
125
|
|
|
123
126
|
def test_buffer_isolation_per_task(self):
|
|
124
127
|
"""Test that MCP call buffers contain all calls regardless of task ID."""
|
|
125
|
-
self.
|
|
128
|
+
self.reset_context()
|
|
126
129
|
|
|
127
130
|
# Set task run ID 1
|
|
128
131
|
set_current_task_run_id("task-1")
|
|
@@ -150,7 +153,7 @@ class TestMCPCallBuffer:
|
|
|
150
153
|
|
|
151
154
|
def test_buffer_mcp_call_without_task_id(self):
|
|
152
155
|
"""Test adding MCP call when no task run ID is set."""
|
|
153
|
-
self.
|
|
156
|
+
self.reset_context()
|
|
154
157
|
set_current_task_run_id(None)
|
|
155
158
|
|
|
156
159
|
mock_call = MagicMock(spec=BaseMCPCall)
|
|
@@ -8,21 +8,25 @@ import pytest
|
|
|
8
8
|
|
|
9
9
|
from hud.telemetry._trace import (
|
|
10
10
|
init_telemetry,
|
|
11
|
-
register_trace,
|
|
12
11
|
trace,
|
|
12
|
+
trace_decorator,
|
|
13
|
+
trace_open,
|
|
13
14
|
)
|
|
14
15
|
from hud.telemetry.context import get_current_task_run_id as actual_get_current_task_run_id
|
|
15
16
|
from hud.telemetry.context import is_root_trace as actual_is_root_trace
|
|
16
|
-
from hud.telemetry.context import reset_context
|
|
17
17
|
from hud.telemetry.context import set_current_task_run_id as actual_set_current_task_run_id
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
@pytest.fixture(autouse=True)
|
|
21
21
|
def reset_telemetry_context_fixture():
|
|
22
22
|
"""Ensures telemetry context is reset before and after each test in this file."""
|
|
23
|
-
|
|
23
|
+
# Reset context before test
|
|
24
|
+
actual_set_current_task_run_id(None)
|
|
25
|
+
actual_is_root_trace.set(False)
|
|
24
26
|
yield
|
|
25
|
-
|
|
27
|
+
# Reset context after test
|
|
28
|
+
actual_set_current_task_run_id(None)
|
|
29
|
+
actual_is_root_trace.set(False)
|
|
26
30
|
|
|
27
31
|
|
|
28
32
|
class TestInitTelemetry:
|
|
@@ -49,7 +53,7 @@ class TestTrace:
|
|
|
49
53
|
|
|
50
54
|
initial_root_state = actual_is_root_trace.get()
|
|
51
55
|
|
|
52
|
-
with
|
|
56
|
+
with trace_open() as task_run_id:
|
|
53
57
|
assert isinstance(task_run_id, str)
|
|
54
58
|
uuid.UUID(task_run_id)
|
|
55
59
|
assert actual_get_current_task_run_id() == task_run_id
|
|
@@ -73,7 +77,7 @@ class TestTrace:
|
|
|
73
77
|
trace_name = "test_trace_with_data"
|
|
74
78
|
attrs = {"key": "value", "number": 42}
|
|
75
79
|
|
|
76
|
-
with
|
|
80
|
+
with trace_open(name=trace_name, attributes=attrs) as task_run_id:
|
|
77
81
|
assert isinstance(task_run_id, str)
|
|
78
82
|
|
|
79
83
|
mock_flush.assert_called_once()
|
|
@@ -101,7 +105,7 @@ class TestTrace:
|
|
|
101
105
|
test_attrs = {"custom_attr": "test_val"}
|
|
102
106
|
test_name = "mcp_export_test"
|
|
103
107
|
|
|
104
|
-
with
|
|
108
|
+
with trace_open(name=test_name, attributes=test_attrs) as task_run_id:
|
|
105
109
|
pass
|
|
106
110
|
|
|
107
111
|
mock_flush.assert_called_once()
|
|
@@ -113,10 +117,8 @@ class TestTrace:
|
|
|
113
117
|
assert kwargs["mcp_calls"] == mock_mcp_calls
|
|
114
118
|
assert kwargs["trace_attributes"]["trace_name"] == test_name
|
|
115
119
|
assert kwargs["trace_attributes"]["custom_attr"] == "test_val"
|
|
116
|
-
assert "
|
|
117
|
-
assert
|
|
118
|
-
assert "duration" in kwargs["trace_attributes"]
|
|
119
|
-
assert kwargs["trace_attributes"]["is_root"] is True
|
|
120
|
+
assert "duration_seconds" in kwargs["trace_attributes"]
|
|
121
|
+
assert kwargs["trace_attributes"]["is_root_trace"] is True
|
|
120
122
|
|
|
121
123
|
def test_trace_nested(self, mocker):
|
|
122
124
|
"""Test nested traces, verifying context restoration and root trace logic."""
|
|
@@ -133,7 +135,7 @@ class TestTrace:
|
|
|
133
135
|
assert actual_get_current_task_run_id() is None
|
|
134
136
|
assert actual_is_root_trace.get() is False
|
|
135
137
|
|
|
136
|
-
with
|
|
138
|
+
with trace_open(name="outer") as outer_id:
|
|
137
139
|
assert actual_get_current_task_run_id() == outer_id
|
|
138
140
|
assert actual_is_root_trace.get() is True
|
|
139
141
|
with trace(name="inner") as inner_id:
|
|
@@ -162,7 +164,10 @@ class TestTrace:
|
|
|
162
164
|
"hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
|
|
163
165
|
)
|
|
164
166
|
|
|
165
|
-
with
|
|
167
|
+
with (
|
|
168
|
+
pytest.raises(ValueError, match="Test exception"),
|
|
169
|
+
trace_open(name="trace_with_exception"),
|
|
170
|
+
):
|
|
166
171
|
assert actual_get_current_task_run_id() != initial_task_id_before_trace
|
|
167
172
|
assert actual_is_root_trace.get() is False
|
|
168
173
|
raise ValueError("Test exception")
|
|
@@ -173,98 +178,124 @@ class TestTrace:
|
|
|
173
178
|
mock_submit_loop.assert_not_called()
|
|
174
179
|
|
|
175
180
|
|
|
176
|
-
class
|
|
177
|
-
"""Test the
|
|
181
|
+
class TestTraceSync:
|
|
182
|
+
"""Test the trace_sync context manager."""
|
|
178
183
|
|
|
179
|
-
def
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
184
|
+
def test_trace_sync_basic(self, mocker):
|
|
185
|
+
"""Test trace calls trace_open and flush."""
|
|
186
|
+
mock_flush = mocker.patch("hud.flush", autospec=True)
|
|
187
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open")
|
|
188
|
+
mock_trace_open.return_value.__enter__.return_value = "test-task-id"
|
|
189
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
183
190
|
|
|
184
|
-
|
|
191
|
+
with trace(name="test_sync") as task_run_id:
|
|
192
|
+
assert task_run_id == "test-task-id"
|
|
193
|
+
|
|
194
|
+
mock_trace_open.assert_called_once_with(name="test_sync", attributes=None)
|
|
195
|
+
mock_flush.assert_called_once()
|
|
196
|
+
|
|
197
|
+
def test_trace_sync_with_attributes(self, mocker):
|
|
198
|
+
"""Test trace passes attributes correctly."""
|
|
199
|
+
mock_flush = mocker.patch("hud.flush", autospec=True)
|
|
200
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open")
|
|
201
|
+
mock_trace_open.return_value.__enter__.return_value = "test-task-id"
|
|
202
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
203
|
+
attrs = {"key": "value"}
|
|
204
|
+
|
|
205
|
+
with trace(name="test_sync", attributes=attrs):
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
mock_trace_open.assert_called_once_with(name="test_sync", attributes=attrs)
|
|
209
|
+
mock_flush.assert_called_once()
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class TestTraceDecorator:
|
|
213
|
+
"""Test the trace_decorator function decorator."""
|
|
214
|
+
|
|
215
|
+
def test_trace_decorator_sync_function(self, mocker):
|
|
216
|
+
"""Test trace_decorator on synchronous functions."""
|
|
217
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
218
|
+
mock_trace_open.return_value.__enter__.return_value = "mocked_task_id"
|
|
219
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
220
|
+
|
|
221
|
+
@trace_decorator(name="test_func_sync")
|
|
185
222
|
def sync_function(x, y):
|
|
186
223
|
return x + y
|
|
187
224
|
|
|
188
225
|
result = sync_function(1, 2)
|
|
189
226
|
assert result == 3
|
|
190
|
-
|
|
227
|
+
mock_trace_open.assert_called_once_with(name="test_func_sync", attributes=None)
|
|
191
228
|
|
|
192
|
-
def
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
229
|
+
def test_trace_decorator_async_function(self, mocker):
|
|
230
|
+
"""Test trace_decorator on asynchronous functions."""
|
|
231
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
232
|
+
mock_trace_open.return_value.__enter__.return_value = "mocked_task_id"
|
|
233
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
196
234
|
|
|
197
|
-
@
|
|
235
|
+
@trace_decorator(name="test_func_async")
|
|
198
236
|
async def async_function(x, y):
|
|
199
237
|
return x + y
|
|
200
238
|
|
|
201
239
|
async def run_test():
|
|
202
240
|
result = await async_function(1, 2)
|
|
203
241
|
assert result == 3
|
|
204
|
-
|
|
205
|
-
name="test_func_async", attributes=None
|
|
206
|
-
)
|
|
242
|
+
mock_trace_open.assert_called_once_with(name="test_func_async", attributes=None)
|
|
207
243
|
|
|
208
244
|
asyncio.run(run_test())
|
|
209
245
|
|
|
210
|
-
def
|
|
211
|
-
"""Test
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
def __enter__(self):
|
|
216
|
-
return "task_id"
|
|
217
|
-
|
|
218
|
-
def __exit__(self, exc_type, exc_value, traceback):
|
|
219
|
-
return None
|
|
220
|
-
|
|
221
|
-
mock_trace_context_manager.return_value = _MockTraceContextManager()
|
|
246
|
+
def test_trace_decorator_with_attributes(self, mocker):
|
|
247
|
+
"""Test trace_decorator with attributes."""
|
|
248
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
249
|
+
mock_trace_open.return_value.__enter__.return_value = "task_id"
|
|
250
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
222
251
|
|
|
223
|
-
attrs = {"operation": "
|
|
252
|
+
attrs = {"operation": "multiply"}
|
|
224
253
|
|
|
225
|
-
@
|
|
254
|
+
@trace_decorator(name="test_func", attributes=attrs)
|
|
226
255
|
def func_with_attrs(x):
|
|
227
256
|
return x * 2
|
|
228
257
|
|
|
229
258
|
result = func_with_attrs(5)
|
|
230
259
|
assert result == 10
|
|
231
|
-
|
|
260
|
+
mock_trace_open.assert_called_once_with(name="test_func", attributes=attrs)
|
|
232
261
|
|
|
233
|
-
def
|
|
234
|
-
"""Test
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
262
|
+
def test_trace_decorator_without_name(self, mocker):
|
|
263
|
+
"""Test trace_decorator uses module.function name when name not provided."""
|
|
264
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
265
|
+
mock_trace_open.return_value.__enter__.return_value = "task_id"
|
|
266
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
238
267
|
|
|
239
|
-
@
|
|
268
|
+
@trace_decorator()
|
|
240
269
|
def my_function():
|
|
241
270
|
return "result"
|
|
242
271
|
|
|
243
272
|
result = my_function()
|
|
244
273
|
assert result == "result"
|
|
245
|
-
|
|
274
|
+
# Should use module.function name
|
|
275
|
+
expected_name = f"{my_function.__module__}.my_function"
|
|
276
|
+
mock_trace_open.assert_called_once_with(name=expected_name, attributes=None)
|
|
246
277
|
|
|
247
|
-
def
|
|
248
|
-
"""Test
|
|
278
|
+
def test_trace_decorator_preserves_function_metadata(self):
|
|
279
|
+
"""Test trace_decorator preserves original function metadata."""
|
|
249
280
|
|
|
250
|
-
@
|
|
281
|
+
@trace_decorator(name="test")
|
|
251
282
|
def original_function():
|
|
252
283
|
"""Original docstring."""
|
|
253
284
|
|
|
254
285
|
assert original_function.__name__ == "original_function"
|
|
255
286
|
assert original_function.__doc__ == "Original docstring."
|
|
256
287
|
|
|
257
|
-
def
|
|
258
|
-
"""Test
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
288
|
+
def test_trace_decorator_exception_propagation(self, mocker):
|
|
289
|
+
"""Test trace_decorator propagates exceptions."""
|
|
290
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
291
|
+
mock_trace_open.return_value.__enter__.return_value = "task_id"
|
|
292
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
262
293
|
|
|
263
|
-
@
|
|
294
|
+
@trace_decorator()
|
|
264
295
|
def failing_function():
|
|
265
296
|
raise RuntimeError("Test error")
|
|
266
297
|
|
|
267
298
|
with pytest.raises(RuntimeError, match="Test error"):
|
|
268
299
|
failing_function()
|
|
269
300
|
|
|
270
|
-
|
|
301
|
+
mock_trace_open.assert_called_once()
|
hud/tools/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""HUD tools for computer control, file editing, and bash commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .base import ToolError, ToolResult, tool_result_to_content_blocks
|
|
6
|
+
from .bash import BashTool
|
|
7
|
+
from .computer import AnthropicComputerTool, HudComputerTool, OpenAIComputerTool
|
|
8
|
+
from .edit import EditTool
|
|
9
|
+
from .playwright_tool import PlaywrightTool
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"AnthropicComputerTool",
|
|
13
|
+
"BashTool",
|
|
14
|
+
"EditTool",
|
|
15
|
+
"HudComputerTool",
|
|
16
|
+
"OpenAIComputerTool",
|
|
17
|
+
"PlaywrightTool",
|
|
18
|
+
"ToolError",
|
|
19
|
+
"ToolResult",
|
|
20
|
+
"tool_result_to_content_blocks",
|
|
21
|
+
]
|
hud/tools/base.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, fields, replace
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from mcp.types import ImageContent, TextContent
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(kw_only=True, frozen=True)
|
|
10
|
+
class ToolResult:
|
|
11
|
+
"""Represents the result of a tool execution."""
|
|
12
|
+
|
|
13
|
+
output: str | None = None
|
|
14
|
+
error: str | None = None
|
|
15
|
+
base64_image: str | None = None
|
|
16
|
+
system: str | None = None
|
|
17
|
+
|
|
18
|
+
def __bool__(self) -> bool:
|
|
19
|
+
return any(getattr(self, field.name) for field in fields(self))
|
|
20
|
+
|
|
21
|
+
def __add__(self, other: ToolResult) -> ToolResult:
|
|
22
|
+
def combine_fields(
|
|
23
|
+
field: str | None, other_field: str | None, concatenate: bool = True
|
|
24
|
+
) -> str | None:
|
|
25
|
+
if field and other_field:
|
|
26
|
+
if concatenate:
|
|
27
|
+
return field + other_field
|
|
28
|
+
raise ValueError("Cannot combine tool results")
|
|
29
|
+
return field or other_field
|
|
30
|
+
|
|
31
|
+
return ToolResult(
|
|
32
|
+
output=combine_fields(self.output, other.output),
|
|
33
|
+
error=combine_fields(self.error, other.error),
|
|
34
|
+
base64_image=combine_fields(self.base64_image, other.base64_image, False),
|
|
35
|
+
system=combine_fields(self.system, other.system),
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def replace(self, **kwargs: Any) -> ToolResult:
|
|
39
|
+
"""Returns a new ToolResult with the given fields replaced."""
|
|
40
|
+
return replace(self, **kwargs)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Legacy alias for backward compatibility
|
|
44
|
+
CLIResult = ToolResult
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ToolError(Exception):
|
|
48
|
+
"""An error raised by a tool."""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Legacy alias for backward compatibility
|
|
52
|
+
CLIError = ToolError
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def tool_result_to_content_blocks(result: ToolResult) -> list[ImageContent | TextContent]:
|
|
56
|
+
"""Convert a ToolResult to MCP content blocks."""
|
|
57
|
+
blocks = []
|
|
58
|
+
|
|
59
|
+
if result.output:
|
|
60
|
+
blocks.append(TextContent(text=result.output, type="text"))
|
|
61
|
+
if result.error:
|
|
62
|
+
blocks.append(TextContent(text=result.error, type="text"))
|
|
63
|
+
if result.base64_image:
|
|
64
|
+
blocks.append(ImageContent(data=result.base64_image, mimeType="image/png", type="image"))
|
|
65
|
+
return blocks
|
hud/tools/bash.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from .base import CLIResult, ToolError, ToolResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class _BashSession:
|
|
12
|
+
"""A session of a bash shell."""
|
|
13
|
+
|
|
14
|
+
_started: bool
|
|
15
|
+
_process: asyncio.subprocess.Process
|
|
16
|
+
|
|
17
|
+
command: str = "/bin/bash"
|
|
18
|
+
_output_delay: float = 0.2 # seconds
|
|
19
|
+
_timeout: float = 120.0 # seconds
|
|
20
|
+
_sentinel: str = "<<exit>>"
|
|
21
|
+
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
self._started = False
|
|
24
|
+
self._timed_out = False
|
|
25
|
+
|
|
26
|
+
async def start(self) -> None:
|
|
27
|
+
if self._started:
|
|
28
|
+
await asyncio.sleep(0)
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
# Platform-specific subprocess creation
|
|
32
|
+
kwargs = {
|
|
33
|
+
"shell": True,
|
|
34
|
+
"bufsize": 0,
|
|
35
|
+
"stdin": asyncio.subprocess.PIPE,
|
|
36
|
+
"stdout": asyncio.subprocess.PIPE,
|
|
37
|
+
"stderr": asyncio.subprocess.PIPE,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
# Only use setsid on Unix-like systems
|
|
41
|
+
if sys.platform != "win32":
|
|
42
|
+
kwargs["preexec_fn"] = os.setsid
|
|
43
|
+
|
|
44
|
+
self._process = await asyncio.create_subprocess_shell(self.command, **kwargs)
|
|
45
|
+
|
|
46
|
+
self._started = True
|
|
47
|
+
|
|
48
|
+
def stop(self) -> None:
|
|
49
|
+
"""Terminate the bash shell."""
|
|
50
|
+
if not self._started:
|
|
51
|
+
raise ToolError("Session has not started.")
|
|
52
|
+
if self._process.returncode is not None:
|
|
53
|
+
return
|
|
54
|
+
self._process.terminate()
|
|
55
|
+
|
|
56
|
+
async def run(self, command: str) -> CLIResult:
|
|
57
|
+
"""Execute a command in the bash shell."""
|
|
58
|
+
if not self._started:
|
|
59
|
+
raise ToolError("Session has not started.")
|
|
60
|
+
if self._process.returncode is not None:
|
|
61
|
+
await asyncio.sleep(0)
|
|
62
|
+
return ToolResult(
|
|
63
|
+
system="tool must be restarted",
|
|
64
|
+
error=f"bash has exited with returncode {self._process.returncode}",
|
|
65
|
+
)
|
|
66
|
+
if self._timed_out:
|
|
67
|
+
raise ToolError(
|
|
68
|
+
f"timed out: bash did not return in {self._timeout} seconds and must be restarted",
|
|
69
|
+
) from None
|
|
70
|
+
|
|
71
|
+
if self._process.stdin is None:
|
|
72
|
+
raise ToolError("stdin is None")
|
|
73
|
+
if self._process.stdout is None:
|
|
74
|
+
raise ToolError("stdout is None")
|
|
75
|
+
if self._process.stderr is None:
|
|
76
|
+
raise ToolError("stderr is None")
|
|
77
|
+
|
|
78
|
+
# Send command to the process
|
|
79
|
+
self._process.stdin.write(command.encode() + f"; echo '{self._sentinel}'\n".encode())
|
|
80
|
+
await self._process.stdin.drain()
|
|
81
|
+
|
|
82
|
+
# Read output from the process, until the sentinel is found
|
|
83
|
+
sentinel_line = f"{self._sentinel}\n"
|
|
84
|
+
sentinel_bytes = sentinel_line.encode()
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
raw_out: bytes = await asyncio.wait_for(
|
|
88
|
+
self._process.stdout.readuntil(sentinel_bytes),
|
|
89
|
+
timeout=self._timeout,
|
|
90
|
+
)
|
|
91
|
+
output = raw_out.decode()[: -len(sentinel_line)]
|
|
92
|
+
except (TimeoutError, asyncio.LimitOverrunError):
|
|
93
|
+
self._timed_out = True
|
|
94
|
+
raise ToolError(
|
|
95
|
+
f"timed out: bash did not return in {self._timeout} seconds and must be restarted",
|
|
96
|
+
) from None
|
|
97
|
+
|
|
98
|
+
# Attempt non-blocking stderr fetch (may return empty)
|
|
99
|
+
try:
|
|
100
|
+
error_bytes = await asyncio.wait_for(self._process.stderr.read(), timeout=0.01)
|
|
101
|
+
error = error_bytes.decode().rstrip("\n")
|
|
102
|
+
except TimeoutError:
|
|
103
|
+
error = ""
|
|
104
|
+
|
|
105
|
+
return CLIResult(output=output, error=error)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class BashTool:
|
|
109
|
+
"""
|
|
110
|
+
A tool that allows the agent to run bash commands.
|
|
111
|
+
The tool parameters are defined by Anthropic and are not editable.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
_session: _BashSession | None
|
|
115
|
+
|
|
116
|
+
def __init__(self) -> None:
|
|
117
|
+
self._session = None
|
|
118
|
+
|
|
119
|
+
async def __call__(
|
|
120
|
+
self, command: str | None = None, restart: bool = False, **kwargs: Any
|
|
121
|
+
) -> ToolResult:
|
|
122
|
+
if restart:
|
|
123
|
+
if self._session:
|
|
124
|
+
self._session.stop()
|
|
125
|
+
self._session = _BashSession()
|
|
126
|
+
await self._session.start()
|
|
127
|
+
|
|
128
|
+
return ToolResult(system="tool has been restarted.")
|
|
129
|
+
|
|
130
|
+
if self._session is None:
|
|
131
|
+
self._session = _BashSession()
|
|
132
|
+
await self._session.start()
|
|
133
|
+
|
|
134
|
+
if command is not None:
|
|
135
|
+
return await self._session.run(command)
|
|
136
|
+
|
|
137
|
+
raise ToolError("no command provided.")
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Computer control tools for different agent APIs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .anthropic import AnthropicComputerTool
|
|
6
|
+
from .hud import HudComputerTool
|
|
7
|
+
from .openai import OpenAIComputerTool
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"AnthropicComputerTool",
|
|
11
|
+
"HudComputerTool",
|
|
12
|
+
"OpenAIComputerTool",
|
|
13
|
+
]
|