hud-python 0.2.9__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (64) hide show
  1. hud/__init__.py +14 -5
  2. hud/env/docker_client.py +1 -1
  3. hud/env/environment.py +13 -8
  4. hud/env/local_docker_client.py +1 -1
  5. hud/env/remote_client.py +1 -1
  6. hud/env/remote_docker_client.py +2 -2
  7. hud/exceptions.py +2 -1
  8. hud/mcp_agent/__init__.py +15 -0
  9. hud/mcp_agent/base.py +723 -0
  10. hud/mcp_agent/claude.py +316 -0
  11. hud/mcp_agent/langchain.py +231 -0
  12. hud/mcp_agent/openai.py +318 -0
  13. hud/mcp_agent/tests/__init__.py +1 -0
  14. hud/mcp_agent/tests/test_base.py +437 -0
  15. hud/settings.py +14 -2
  16. hud/task.py +4 -0
  17. hud/telemetry/__init__.py +11 -7
  18. hud/telemetry/_trace.py +82 -71
  19. hud/telemetry/context.py +9 -27
  20. hud/telemetry/exporter.py +6 -5
  21. hud/telemetry/instrumentation/mcp.py +174 -410
  22. hud/telemetry/mcp_models.py +13 -74
  23. hud/telemetry/tests/test_context.py +9 -6
  24. hud/telemetry/tests/test_trace.py +92 -61
  25. hud/tools/__init__.py +21 -0
  26. hud/tools/base.py +65 -0
  27. hud/tools/bash.py +137 -0
  28. hud/tools/computer/__init__.py +13 -0
  29. hud/tools/computer/anthropic.py +411 -0
  30. hud/tools/computer/hud.py +315 -0
  31. hud/tools/computer/openai.py +283 -0
  32. hud/tools/edit.py +290 -0
  33. hud/tools/executors/__init__.py +13 -0
  34. hud/tools/executors/base.py +331 -0
  35. hud/tools/executors/pyautogui.py +585 -0
  36. hud/tools/executors/tests/__init__.py +1 -0
  37. hud/tools/executors/tests/test_base_executor.py +338 -0
  38. hud/tools/executors/tests/test_pyautogui_executor.py +162 -0
  39. hud/tools/executors/xdo.py +503 -0
  40. hud/tools/helper/README.md +56 -0
  41. hud/tools/helper/__init__.py +9 -0
  42. hud/tools/helper/mcp_server.py +78 -0
  43. hud/tools/helper/server_initialization.py +115 -0
  44. hud/tools/helper/utils.py +58 -0
  45. hud/tools/playwright_tool.py +373 -0
  46. hud/tools/tests/__init__.py +3 -0
  47. hud/tools/tests/test_bash.py +152 -0
  48. hud/tools/tests/test_computer.py +52 -0
  49. hud/tools/tests/test_computer_actions.py +34 -0
  50. hud/tools/tests/test_edit.py +233 -0
  51. hud/tools/tests/test_init.py +27 -0
  52. hud/tools/tests/test_playwright_tool.py +183 -0
  53. hud/tools/tests/test_tools.py +154 -0
  54. hud/tools/tests/test_utils.py +156 -0
  55. hud/tools/utils.py +50 -0
  56. hud/types.py +10 -1
  57. hud/utils/tests/test_init.py +21 -0
  58. hud/utils/tests/test_version.py +1 -1
  59. hud/version.py +1 -1
  60. {hud_python-0.2.9.dist-info → hud_python-0.3.0.dist-info}/METADATA +9 -6
  61. hud_python-0.3.0.dist-info/RECORD +124 -0
  62. hud_python-0.2.9.dist-info/RECORD +0 -85
  63. {hud_python-0.2.9.dist-info → hud_python-0.3.0.dist-info}/WHEEL +0 -0
  64. {hud_python-0.2.9.dist-info → hud_python-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -28,16 +28,14 @@ class StatusType(str, Enum):
28
28
 
29
29
 
30
30
  class MCPCallType(str, Enum):
31
- """Known MCP call types"""
31
+ """Enum for different types of MCP calls in telemetry."""
32
32
 
33
- SEND_REQUEST = "mcp.shared.session.send_request"
34
- SEND_NOTIFICATION = "mcp.shared.session.send_notification"
35
- RECEIVE_RESPONSE = "mcp.shared.session.receive_response"
36
- RECEIVE_REQUEST = "mcp.shared.session.receive_request"
37
- STREAM_READ = "mcp.stream.read"
38
- STREAM_WRITE = "mcp.stream.write"
39
- HANDLE_INCOMING = "mcp.handle_incoming"
40
- MANUAL_TEST = "manual.test"
33
+ # Requests and Notifications
34
+ SEND_REQUEST = "mcp.send_request"
35
+ SEND_NOTIFICATION = "mcp.send_notification"
36
+
37
+ # Responses
38
+ RECEIVE_RESPONSE = "mcp.receive_response"
41
39
 
42
40
 
43
41
  class BaseMCPCall(BaseModel):
@@ -87,6 +85,8 @@ class MCPRequestCall(BaseMCPCall):
87
85
  duration: float | None = None
88
86
  request_id: str | int | None = None
89
87
  request_data: dict[str, Any] | None = None
88
+ error: str | None = None
89
+ error_type: str | None = None
90
90
 
91
91
  @classmethod
92
92
  def from_jsonrpc_request(
@@ -191,6 +191,8 @@ class MCPNotificationCall(BaseMCPCall):
191
191
  end_time: float | None = None
192
192
  duration: float | None = None
193
193
  notification_data: dict[str, Any] | None = None
194
+ error: str | None = None
195
+ error_type: str | None = None
194
196
 
195
197
  @classmethod
196
198
  def from_jsonrpc_notification(
@@ -230,69 +232,6 @@ class MCPNotificationCall(BaseMCPCall):
230
232
  return None
231
233
 
232
234
 
233
- class MCPStreamEvent(BaseMCPCall):
234
- """Record for an MCP stream event (read or write)"""
235
-
236
- stream_event: bool = True
237
- event_type: str = Field(..., description="Type of stream event: read or write")
238
- item_type: str | None = None
239
- is_response_or_error: bool = False
240
- message_data: dict[str, Any] | None = None
241
-
242
- @classmethod
243
- def from_session_message(
244
- cls, message: SessionMessage, task_run_id: str, event_type: str, **kwargs: Any
245
- ) -> MCPStreamEvent:
246
- """Create telemetry record for a stream event"""
247
- method_name = "unknown_stream_operation"
248
- is_response = False
249
- item_type = "unknown"
250
- message_data = None
251
-
252
- if hasattr(message, "message") and hasattr(message.message, "root"):
253
- msg_root = message.message.root
254
- item_type = type(msg_root).__name__
255
- message_data = msg_root.model_dump(exclude_none=True)
256
-
257
- # Check type first before accessing attributes
258
- if isinstance(msg_root, JSONRPCRequest | JSONRPCNotification) and hasattr(
259
- msg_root, "method"
260
- ):
261
- method_name = msg_root.method
262
- elif isinstance(msg_root, JSONRPCResponse | JSONRPCError) and hasattr(msg_root, "id"):
263
- method_name = f"response_to_id_{msg_root.id}"
264
- is_response = True
265
-
266
- return cls(
267
- task_run_id=task_run_id,
268
- status=StatusType.COMPLETED,
269
- method=method_name,
270
- event_type=event_type,
271
- item_type=item_type,
272
- is_response_or_error=is_response,
273
- message_data=message_data,
274
- timestamp=datetime.now().timestamp(),
275
- **kwargs,
276
- )
277
-
278
-
279
- class MCPManualTestCall(BaseMCPCall):
280
- """Record for a manual test record"""
281
-
282
- call_type: str = MCPCallType.MANUAL_TEST
283
- custom_data: dict[str, Any] = Field(default_factory=dict)
284
-
285
- @classmethod
286
- def create(cls, task_run_id: str, **custom_data: Any) -> MCPManualTestCall:
287
- """Create a manual test record with custom data"""
288
- return cls(
289
- task_run_id=task_run_id,
290
- status=StatusType.COMPLETED,
291
- custom_data=custom_data,
292
- timestamp=datetime.now().timestamp(),
293
- )
294
-
295
-
296
235
  class MCPTelemetryRecord(BaseModel):
297
236
  """Container for a set of related MCP telemetry records"""
298
237
 
@@ -320,9 +259,9 @@ class MCPTelemetryRecord(BaseModel):
320
259
 
321
260
 
322
261
  class TrajectoryStep(BaseModel):
323
- """Model representing a single step in a trajectory, for export."""
262
+ """Model for telemetry export format."""
324
263
 
325
- type: str = Field(default="mcp-step") # Default for MCP calls
264
+ type: str = Field(default="mcp-step")
326
265
  observation_url: str | None = None
327
266
  observation_text: str | None = None
328
267
  actions: list[dict[str, Any]] = Field(default_factory=list)
@@ -64,21 +64,24 @@ class TestRootTraceContext:
64
64
  class TestMCPCallBuffer:
65
65
  """Test MCP call buffer management."""
66
66
 
67
- def setUp(self):
67
+ def reset_context(self):
68
68
  """Clear buffer before each test."""
69
69
  # Flush any existing calls and reset context
70
+ set_current_task_run_id(None)
71
+ # Clear any existing buffers by setting a temporary task ID and flushing
72
+ set_current_task_run_id("temp-cleanup")
70
73
  flush_buffer()
71
74
  set_current_task_run_id(None)
72
75
 
73
76
  def test_flush_buffer_empty(self):
74
77
  """Test flushing empty buffer."""
75
- self.setUp()
78
+ self.reset_context()
76
79
  result = flush_buffer()
77
80
  assert result == []
78
81
 
79
82
  def test_add_and_flush_mcp_call(self):
80
83
  """Test adding and flushing MCP calls."""
81
- self.setUp()
84
+ self.reset_context()
82
85
 
83
86
  # Set active task run ID
84
87
  set_current_task_run_id("test-task")
@@ -101,7 +104,7 @@ class TestMCPCallBuffer:
101
104
 
102
105
  def test_add_multiple_mcp_calls(self):
103
106
  """Test adding multiple MCP calls."""
104
- self.setUp()
107
+ self.reset_context()
105
108
 
106
109
  # Set active task run ID
107
110
  set_current_task_run_id("test-task")
@@ -122,7 +125,7 @@ class TestMCPCallBuffer:
122
125
 
123
126
  def test_buffer_isolation_per_task(self):
124
127
  """Test that MCP call buffers contain all calls regardless of task ID."""
125
- self.setUp()
128
+ self.reset_context()
126
129
 
127
130
  # Set task run ID 1
128
131
  set_current_task_run_id("task-1")
@@ -150,7 +153,7 @@ class TestMCPCallBuffer:
150
153
 
151
154
  def test_buffer_mcp_call_without_task_id(self):
152
155
  """Test adding MCP call when no task run ID is set."""
153
- self.setUp()
156
+ self.reset_context()
154
157
  set_current_task_run_id(None)
155
158
 
156
159
  mock_call = MagicMock(spec=BaseMCPCall)
@@ -8,21 +8,25 @@ import pytest
8
8
 
9
9
  from hud.telemetry._trace import (
10
10
  init_telemetry,
11
- register_trace,
12
11
  trace,
12
+ trace_decorator,
13
+ trace_open,
13
14
  )
14
15
  from hud.telemetry.context import get_current_task_run_id as actual_get_current_task_run_id
15
16
  from hud.telemetry.context import is_root_trace as actual_is_root_trace
16
- from hud.telemetry.context import reset_context
17
17
  from hud.telemetry.context import set_current_task_run_id as actual_set_current_task_run_id
18
18
 
19
19
 
20
20
  @pytest.fixture(autouse=True)
21
21
  def reset_telemetry_context_fixture():
22
22
  """Ensures telemetry context is reset before and after each test in this file."""
23
- reset_context()
23
+ # Reset context before test
24
+ actual_set_current_task_run_id(None)
25
+ actual_is_root_trace.set(False)
24
26
  yield
25
- reset_context()
27
+ # Reset context after test
28
+ actual_set_current_task_run_id(None)
29
+ actual_is_root_trace.set(False)
26
30
 
27
31
 
28
32
  class TestInitTelemetry:
@@ -49,7 +53,7 @@ class TestTrace:
49
53
 
50
54
  initial_root_state = actual_is_root_trace.get()
51
55
 
52
- with trace() as task_run_id:
56
+ with trace_open() as task_run_id:
53
57
  assert isinstance(task_run_id, str)
54
58
  uuid.UUID(task_run_id)
55
59
  assert actual_get_current_task_run_id() == task_run_id
@@ -73,7 +77,7 @@ class TestTrace:
73
77
  trace_name = "test_trace_with_data"
74
78
  attrs = {"key": "value", "number": 42}
75
79
 
76
- with trace(name=trace_name, attributes=attrs) as task_run_id:
80
+ with trace_open(name=trace_name, attributes=attrs) as task_run_id:
77
81
  assert isinstance(task_run_id, str)
78
82
 
79
83
  mock_flush.assert_called_once()
@@ -101,7 +105,7 @@ class TestTrace:
101
105
  test_attrs = {"custom_attr": "test_val"}
102
106
  test_name = "mcp_export_test"
103
107
 
104
- with trace(name=test_name, attributes=test_attrs) as task_run_id:
108
+ with trace_open(name=test_name, attributes=test_attrs) as task_run_id:
105
109
  pass
106
110
 
107
111
  mock_flush.assert_called_once()
@@ -113,10 +117,8 @@ class TestTrace:
113
117
  assert kwargs["mcp_calls"] == mock_mcp_calls
114
118
  assert kwargs["trace_attributes"]["trace_name"] == test_name
115
119
  assert kwargs["trace_attributes"]["custom_attr"] == "test_val"
116
- assert "start_time" in kwargs["trace_attributes"]
117
- assert "end_time" in kwargs["trace_attributes"]
118
- assert "duration" in kwargs["trace_attributes"]
119
- assert kwargs["trace_attributes"]["is_root"] is True
120
+ assert "duration_seconds" in kwargs["trace_attributes"]
121
+ assert kwargs["trace_attributes"]["is_root_trace"] is True
120
122
 
121
123
  def test_trace_nested(self, mocker):
122
124
  """Test nested traces, verifying context restoration and root trace logic."""
@@ -133,7 +135,7 @@ class TestTrace:
133
135
  assert actual_get_current_task_run_id() is None
134
136
  assert actual_is_root_trace.get() is False
135
137
 
136
- with trace(name="outer") as outer_id:
138
+ with trace_open(name="outer") as outer_id:
137
139
  assert actual_get_current_task_run_id() == outer_id
138
140
  assert actual_is_root_trace.get() is True
139
141
  with trace(name="inner") as inner_id:
@@ -162,7 +164,10 @@ class TestTrace:
162
164
  "hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
163
165
  )
164
166
 
165
- with pytest.raises(ValueError, match="Test exception"), trace(name="trace_with_exception"):
167
+ with (
168
+ pytest.raises(ValueError, match="Test exception"),
169
+ trace_open(name="trace_with_exception"),
170
+ ):
166
171
  assert actual_get_current_task_run_id() != initial_task_id_before_trace
167
172
  assert actual_is_root_trace.get() is False
168
173
  raise ValueError("Test exception")
@@ -173,98 +178,124 @@ class TestTrace:
173
178
  mock_submit_loop.assert_not_called()
174
179
 
175
180
 
176
- class TestRegisterTrace:
177
- """Test the register_trace decorator."""
181
+ class TestTraceSync:
182
+ """Test the trace_sync context manager."""
178
183
 
179
- def test_register_trace_sync_function(self, mocker):
180
- mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
181
- mock_trace_context_manager.return_value.__enter__.return_value = "mocked_task_id"
182
- mock_trace_context_manager.return_value.__exit__.return_value = None
184
+ def test_trace_sync_basic(self, mocker):
185
+ """Test trace calls trace_open and flush."""
186
+ mock_flush = mocker.patch("hud.flush", autospec=True)
187
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open")
188
+ mock_trace_open.return_value.__enter__.return_value = "test-task-id"
189
+ mock_trace_open.return_value.__exit__.return_value = None
183
190
 
184
- @register_trace(name="test_func_sync")
191
+ with trace(name="test_sync") as task_run_id:
192
+ assert task_run_id == "test-task-id"
193
+
194
+ mock_trace_open.assert_called_once_with(name="test_sync", attributes=None)
195
+ mock_flush.assert_called_once()
196
+
197
+ def test_trace_sync_with_attributes(self, mocker):
198
+ """Test trace passes attributes correctly."""
199
+ mock_flush = mocker.patch("hud.flush", autospec=True)
200
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open")
201
+ mock_trace_open.return_value.__enter__.return_value = "test-task-id"
202
+ mock_trace_open.return_value.__exit__.return_value = None
203
+ attrs = {"key": "value"}
204
+
205
+ with trace(name="test_sync", attributes=attrs):
206
+ pass
207
+
208
+ mock_trace_open.assert_called_once_with(name="test_sync", attributes=attrs)
209
+ mock_flush.assert_called_once()
210
+
211
+
212
+ class TestTraceDecorator:
213
+ """Test the trace_decorator function decorator."""
214
+
215
+ def test_trace_decorator_sync_function(self, mocker):
216
+ """Test trace_decorator on synchronous functions."""
217
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
218
+ mock_trace_open.return_value.__enter__.return_value = "mocked_task_id"
219
+ mock_trace_open.return_value.__exit__.return_value = None
220
+
221
+ @trace_decorator(name="test_func_sync")
185
222
  def sync_function(x, y):
186
223
  return x + y
187
224
 
188
225
  result = sync_function(1, 2)
189
226
  assert result == 3
190
- mock_trace_context_manager.assert_called_once_with(name="test_func_sync", attributes=None)
227
+ mock_trace_open.assert_called_once_with(name="test_func_sync", attributes=None)
191
228
 
192
- def test_register_trace_async_function(self, mocker):
193
- mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
194
- mock_trace_context_manager.return_value.__enter__.return_value = "mocked_task_id"
195
- mock_trace_context_manager.return_value.__exit__.return_value = None
229
+ def test_trace_decorator_async_function(self, mocker):
230
+ """Test trace_decorator on asynchronous functions."""
231
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
232
+ mock_trace_open.return_value.__enter__.return_value = "mocked_task_id"
233
+ mock_trace_open.return_value.__exit__.return_value = None
196
234
 
197
- @register_trace(name="test_func_async")
235
+ @trace_decorator(name="test_func_async")
198
236
  async def async_function(x, y):
199
237
  return x + y
200
238
 
201
239
  async def run_test():
202
240
  result = await async_function(1, 2)
203
241
  assert result == 3
204
- mock_trace_context_manager.assert_called_once_with(
205
- name="test_func_async", attributes=None
206
- )
242
+ mock_trace_open.assert_called_once_with(name="test_func_async", attributes=None)
207
243
 
208
244
  asyncio.run(run_test())
209
245
 
210
- def test_register_trace_with_attributes(self, mocker):
211
- """Test register_trace with attributes."""
212
- mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
213
-
214
- class _MockTraceContextManager:
215
- def __enter__(self):
216
- return "task_id"
217
-
218
- def __exit__(self, exc_type, exc_value, traceback):
219
- return None
220
-
221
- mock_trace_context_manager.return_value = _MockTraceContextManager()
246
+ def test_trace_decorator_with_attributes(self, mocker):
247
+ """Test trace_decorator with attributes."""
248
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
249
+ mock_trace_open.return_value.__enter__.return_value = "task_id"
250
+ mock_trace_open.return_value.__exit__.return_value = None
222
251
 
223
- attrs = {"operation": "add"}
252
+ attrs = {"operation": "multiply"}
224
253
 
225
- @register_trace(name="test_func", attributes=attrs)
254
+ @trace_decorator(name="test_func", attributes=attrs)
226
255
  def func_with_attrs(x):
227
256
  return x * 2
228
257
 
229
258
  result = func_with_attrs(5)
230
259
  assert result == 10
231
- mock_trace_context_manager.assert_called_once_with(name="test_func", attributes=attrs)
260
+ mock_trace_open.assert_called_once_with(name="test_func", attributes=attrs)
232
261
 
233
- def test_register_trace_without_name(self, mocker):
234
- """Test register_trace uses function name when name not provided."""
235
- mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
236
- mock_trace_context_manager.return_value.__enter__.return_value = "task_id"
237
- mock_trace_context_manager.return_value.__exit__.return_value = None
262
+ def test_trace_decorator_without_name(self, mocker):
263
+ """Test trace_decorator uses module.function name when name not provided."""
264
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
265
+ mock_trace_open.return_value.__enter__.return_value = "task_id"
266
+ mock_trace_open.return_value.__exit__.return_value = None
238
267
 
239
- @register_trace()
268
+ @trace_decorator()
240
269
  def my_function():
241
270
  return "result"
242
271
 
243
272
  result = my_function()
244
273
  assert result == "result"
245
- mock_trace_context_manager.assert_called_once_with(name="my_function", attributes=None)
274
+ # Should use module.function name
275
+ expected_name = f"{my_function.__module__}.my_function"
276
+ mock_trace_open.assert_called_once_with(name=expected_name, attributes=None)
246
277
 
247
- def test_register_trace_preserves_function_metadata(self):
248
- """Test register_trace preserves original function metadata."""
278
+ def test_trace_decorator_preserves_function_metadata(self):
279
+ """Test trace_decorator preserves original function metadata."""
249
280
 
250
- @register_trace(name="test")
281
+ @trace_decorator(name="test")
251
282
  def original_function():
252
283
  """Original docstring."""
253
284
 
254
285
  assert original_function.__name__ == "original_function"
255
286
  assert original_function.__doc__ == "Original docstring."
256
287
 
257
- def test_register_trace_exception_propagation(self, mocker):
258
- """Test register_trace propagates exceptions."""
259
- mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
260
- mock_trace_context_manager.return_value.__enter__.return_value = "task_id"
261
- mock_trace_context_manager.return_value.__exit__.return_value = None
288
+ def test_trace_decorator_exception_propagation(self, mocker):
289
+ """Test trace_decorator propagates exceptions."""
290
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
291
+ mock_trace_open.return_value.__enter__.return_value = "task_id"
292
+ mock_trace_open.return_value.__exit__.return_value = None
262
293
 
263
- @register_trace()
294
+ @trace_decorator()
264
295
  def failing_function():
265
296
  raise RuntimeError("Test error")
266
297
 
267
298
  with pytest.raises(RuntimeError, match="Test error"):
268
299
  failing_function()
269
300
 
270
- mock_trace_context_manager.assert_called_once()
301
+ mock_trace_open.assert_called_once()
hud/tools/__init__.py ADDED
@@ -0,0 +1,21 @@
1
+ """HUD tools for computer control, file editing, and bash commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .base import ToolError, ToolResult, tool_result_to_content_blocks
6
+ from .bash import BashTool
7
+ from .computer import AnthropicComputerTool, HudComputerTool, OpenAIComputerTool
8
+ from .edit import EditTool
9
+ from .playwright_tool import PlaywrightTool
10
+
11
+ __all__ = [
12
+ "AnthropicComputerTool",
13
+ "BashTool",
14
+ "EditTool",
15
+ "HudComputerTool",
16
+ "OpenAIComputerTool",
17
+ "PlaywrightTool",
18
+ "ToolError",
19
+ "ToolResult",
20
+ "tool_result_to_content_blocks",
21
+ ]
hud/tools/base.py ADDED
@@ -0,0 +1,65 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, fields, replace
4
+ from typing import Any
5
+
6
+ from mcp.types import ImageContent, TextContent
7
+
8
+
9
+ @dataclass(kw_only=True, frozen=True)
10
+ class ToolResult:
11
+ """Represents the result of a tool execution."""
12
+
13
+ output: str | None = None
14
+ error: str | None = None
15
+ base64_image: str | None = None
16
+ system: str | None = None
17
+
18
+ def __bool__(self) -> bool:
19
+ return any(getattr(self, field.name) for field in fields(self))
20
+
21
+ def __add__(self, other: ToolResult) -> ToolResult:
22
+ def combine_fields(
23
+ field: str | None, other_field: str | None, concatenate: bool = True
24
+ ) -> str | None:
25
+ if field and other_field:
26
+ if concatenate:
27
+ return field + other_field
28
+ raise ValueError("Cannot combine tool results")
29
+ return field or other_field
30
+
31
+ return ToolResult(
32
+ output=combine_fields(self.output, other.output),
33
+ error=combine_fields(self.error, other.error),
34
+ base64_image=combine_fields(self.base64_image, other.base64_image, False),
35
+ system=combine_fields(self.system, other.system),
36
+ )
37
+
38
+ def replace(self, **kwargs: Any) -> ToolResult:
39
+ """Returns a new ToolResult with the given fields replaced."""
40
+ return replace(self, **kwargs)
41
+
42
+
43
+ # Legacy alias for backward compatibility
44
+ CLIResult = ToolResult
45
+
46
+
47
+ class ToolError(Exception):
48
+ """An error raised by a tool."""
49
+
50
+
51
+ # Legacy alias for backward compatibility
52
+ CLIError = ToolError
53
+
54
+
55
+ def tool_result_to_content_blocks(result: ToolResult) -> list[ImageContent | TextContent]:
56
+ """Convert a ToolResult to MCP content blocks."""
57
+ blocks = []
58
+
59
+ if result.output:
60
+ blocks.append(TextContent(text=result.output, type="text"))
61
+ if result.error:
62
+ blocks.append(TextContent(text=result.error, type="text"))
63
+ if result.base64_image:
64
+ blocks.append(ImageContent(data=result.base64_image, mimeType="image/png", type="image"))
65
+ return blocks
hud/tools/bash.py ADDED
@@ -0,0 +1,137 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import os
5
+ import sys
6
+ from typing import Any
7
+
8
+ from .base import CLIResult, ToolError, ToolResult
9
+
10
+
11
+ class _BashSession:
12
+ """A session of a bash shell."""
13
+
14
+ _started: bool
15
+ _process: asyncio.subprocess.Process
16
+
17
+ command: str = "/bin/bash"
18
+ _output_delay: float = 0.2 # seconds
19
+ _timeout: float = 120.0 # seconds
20
+ _sentinel: str = "<<exit>>"
21
+
22
+ def __init__(self) -> None:
23
+ self._started = False
24
+ self._timed_out = False
25
+
26
+ async def start(self) -> None:
27
+ if self._started:
28
+ await asyncio.sleep(0)
29
+ return
30
+
31
+ # Platform-specific subprocess creation
32
+ kwargs = {
33
+ "shell": True,
34
+ "bufsize": 0,
35
+ "stdin": asyncio.subprocess.PIPE,
36
+ "stdout": asyncio.subprocess.PIPE,
37
+ "stderr": asyncio.subprocess.PIPE,
38
+ }
39
+
40
+ # Only use setsid on Unix-like systems
41
+ if sys.platform != "win32":
42
+ kwargs["preexec_fn"] = os.setsid
43
+
44
+ self._process = await asyncio.create_subprocess_shell(self.command, **kwargs)
45
+
46
+ self._started = True
47
+
48
+ def stop(self) -> None:
49
+ """Terminate the bash shell."""
50
+ if not self._started:
51
+ raise ToolError("Session has not started.")
52
+ if self._process.returncode is not None:
53
+ return
54
+ self._process.terminate()
55
+
56
+ async def run(self, command: str) -> CLIResult:
57
+ """Execute a command in the bash shell."""
58
+ if not self._started:
59
+ raise ToolError("Session has not started.")
60
+ if self._process.returncode is not None:
61
+ await asyncio.sleep(0)
62
+ return ToolResult(
63
+ system="tool must be restarted",
64
+ error=f"bash has exited with returncode {self._process.returncode}",
65
+ )
66
+ if self._timed_out:
67
+ raise ToolError(
68
+ f"timed out: bash did not return in {self._timeout} seconds and must be restarted",
69
+ ) from None
70
+
71
+ if self._process.stdin is None:
72
+ raise ToolError("stdin is None")
73
+ if self._process.stdout is None:
74
+ raise ToolError("stdout is None")
75
+ if self._process.stderr is None:
76
+ raise ToolError("stderr is None")
77
+
78
+ # Send command to the process
79
+ self._process.stdin.write(command.encode() + f"; echo '{self._sentinel}'\n".encode())
80
+ await self._process.stdin.drain()
81
+
82
+ # Read output from the process, until the sentinel is found
83
+ sentinel_line = f"{self._sentinel}\n"
84
+ sentinel_bytes = sentinel_line.encode()
85
+
86
+ try:
87
+ raw_out: bytes = await asyncio.wait_for(
88
+ self._process.stdout.readuntil(sentinel_bytes),
89
+ timeout=self._timeout,
90
+ )
91
+ output = raw_out.decode()[: -len(sentinel_line)]
92
+ except (TimeoutError, asyncio.LimitOverrunError):
93
+ self._timed_out = True
94
+ raise ToolError(
95
+ f"timed out: bash did not return in {self._timeout} seconds and must be restarted",
96
+ ) from None
97
+
98
+ # Attempt non-blocking stderr fetch (may return empty)
99
+ try:
100
+ error_bytes = await asyncio.wait_for(self._process.stderr.read(), timeout=0.01)
101
+ error = error_bytes.decode().rstrip("\n")
102
+ except TimeoutError:
103
+ error = ""
104
+
105
+ return CLIResult(output=output, error=error)
106
+
107
+
108
+ class BashTool:
109
+ """
110
+ A tool that allows the agent to run bash commands.
111
+ The tool parameters are defined by Anthropic and are not editable.
112
+ """
113
+
114
+ _session: _BashSession | None
115
+
116
+ def __init__(self) -> None:
117
+ self._session = None
118
+
119
+ async def __call__(
120
+ self, command: str | None = None, restart: bool = False, **kwargs: Any
121
+ ) -> ToolResult:
122
+ if restart:
123
+ if self._session:
124
+ self._session.stop()
125
+ self._session = _BashSession()
126
+ await self._session.start()
127
+
128
+ return ToolResult(system="tool has been restarted.")
129
+
130
+ if self._session is None:
131
+ self._session = _BashSession()
132
+ await self._session.start()
133
+
134
+ if command is not None:
135
+ return await self._session.run(command)
136
+
137
+ raise ToolError("no command provided.")
@@ -0,0 +1,13 @@
1
+ """Computer control tools for different agent APIs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .anthropic import AnthropicComputerTool
6
+ from .hud import HudComputerTool
7
+ from .openai import OpenAIComputerTool
8
+
9
+ __all__ = [
10
+ "AnthropicComputerTool",
11
+ "HudComputerTool",
12
+ "OpenAIComputerTool",
13
+ ]