hud-python 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -89
- hud/agents/__init__.py +17 -0
- hud/agents/art.py +101 -0
- hud/agents/base.py +599 -0
- hud/{mcp → agents}/claude.py +373 -321
- hud/{mcp → agents}/langchain.py +250 -250
- hud/agents/misc/__init__.py +7 -0
- hud/{agent → agents}/misc/response_agent.py +80 -80
- hud/{mcp → agents}/openai.py +352 -334
- hud/agents/openai_chat_generic.py +154 -0
- hud/{mcp → agents}/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -0
- hud/agents/tests/test_claude.py +324 -0
- hud/{mcp → agents}/tests/test_client.py +363 -324
- hud/{mcp → agents}/tests/test_openai.py +237 -238
- hud/cli/__init__.py +617 -0
- hud/cli/__main__.py +8 -0
- hud/cli/analyze.py +371 -0
- hud/cli/analyze_metadata.py +230 -0
- hud/cli/build.py +427 -0
- hud/cli/clone.py +185 -0
- hud/cli/cursor.py +92 -0
- hud/cli/debug.py +392 -0
- hud/cli/docker_utils.py +83 -0
- hud/cli/init.py +281 -0
- hud/cli/interactive.py +353 -0
- hud/cli/mcp_server.py +756 -0
- hud/cli/pull.py +336 -0
- hud/cli/push.py +379 -0
- hud/cli/remote_runner.py +311 -0
- hud/cli/runner.py +160 -0
- hud/cli/tests/__init__.py +3 -0
- hud/cli/tests/test_analyze.py +284 -0
- hud/cli/tests/test_cli_init.py +265 -0
- hud/cli/tests/test_cli_main.py +27 -0
- hud/cli/tests/test_clone.py +142 -0
- hud/cli/tests/test_cursor.py +253 -0
- hud/cli/tests/test_debug.py +453 -0
- hud/cli/tests/test_mcp_server.py +139 -0
- hud/cli/tests/test_utils.py +388 -0
- hud/cli/utils.py +263 -0
- hud/clients/README.md +143 -0
- hud/clients/__init__.py +16 -0
- hud/clients/base.py +354 -0
- hud/clients/fastmcp.py +202 -0
- hud/clients/mcp_use.py +278 -0
- hud/clients/tests/__init__.py +1 -0
- hud/clients/tests/test_client_integration.py +111 -0
- hud/clients/tests/test_fastmcp.py +342 -0
- hud/clients/tests/test_protocol.py +188 -0
- hud/clients/utils/__init__.py +1 -0
- hud/clients/utils/retry_transport.py +160 -0
- hud/datasets.py +322 -192
- hud/misc/__init__.py +1 -0
- hud/{agent → misc}/claude_plays_pokemon.py +292 -283
- hud/otel/__init__.py +35 -0
- hud/otel/collector.py +142 -0
- hud/otel/config.py +164 -0
- hud/otel/context.py +536 -0
- hud/otel/exporters.py +366 -0
- hud/otel/instrumentation.py +97 -0
- hud/otel/processors.py +118 -0
- hud/otel/tests/__init__.py +1 -0
- hud/otel/tests/test_processors.py +197 -0
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -0
- hud/server/helper/__init__.py +5 -0
- hud/server/low_level.py +132 -0
- hud/server/server.py +166 -0
- hud/server/tests/__init__.py +3 -0
- hud/settings.py +73 -79
- hud/shared/__init__.py +5 -0
- hud/{exceptions.py → shared/exceptions.py} +180 -180
- hud/{server → shared}/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -0
- hud/{server → shared}/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -30
- hud/telemetry/instrument.py +379 -0
- hud/telemetry/job.py +309 -141
- hud/telemetry/replay.py +74 -0
- hud/telemetry/trace.py +83 -0
- hud/tools/__init__.py +33 -34
- hud/tools/base.py +365 -65
- hud/tools/bash.py +161 -137
- hud/tools/computer/__init__.py +15 -13
- hud/tools/computer/anthropic.py +437 -420
- hud/tools/computer/hud.py +376 -334
- hud/tools/computer/openai.py +295 -292
- hud/tools/computer/settings.py +82 -0
- hud/tools/edit.py +314 -290
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -532
- hud/tools/executors/pyautogui.py +621 -619
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -503
- hud/tools/{playwright_tool.py → playwright.py} +412 -379
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -0
- hud/tools/tests/test_bash.py +158 -152
- hud/tools/tests/test_bash_extended.py +197 -0
- hud/tools/tests/test_computer.py +425 -52
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -240
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -157
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -0
- hud/tools/utils.py +50 -50
- hud/types.py +136 -89
- hud/utils/__init__.py +10 -16
- hud/utils/async_utils.py +65 -0
- hud/utils/design.py +168 -0
- hud/utils/mcp.py +55 -0
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -0
- hud/utils/tests/test_init.py +17 -21
- hud/utils/tests/test_progress.py +261 -225
- hud/utils/tests/test_telemetry.py +82 -37
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- hud_python-0.4.0.dist-info/METADATA +474 -0
- hud_python-0.4.0.dist-info/RECORD +132 -0
- hud_python-0.4.0.dist-info/entry_points.txt +3 -0
- {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
- hud/adapters/__init__.py +0 -8
- hud/adapters/claude/__init__.py +0 -5
- hud/adapters/claude/adapter.py +0 -180
- hud/adapters/claude/tests/__init__.py +0 -1
- hud/adapters/claude/tests/test_adapter.py +0 -519
- hud/adapters/common/__init__.py +0 -6
- hud/adapters/common/adapter.py +0 -178
- hud/adapters/common/tests/test_adapter.py +0 -289
- hud/adapters/common/types.py +0 -446
- hud/adapters/operator/__init__.py +0 -5
- hud/adapters/operator/adapter.py +0 -108
- hud/adapters/operator/tests/__init__.py +0 -1
- hud/adapters/operator/tests/test_adapter.py +0 -370
- hud/agent/__init__.py +0 -19
- hud/agent/base.py +0 -126
- hud/agent/claude.py +0 -271
- hud/agent/langchain.py +0 -215
- hud/agent/misc/__init__.py +0 -3
- hud/agent/operator.py +0 -268
- hud/agent/tests/__init__.py +0 -1
- hud/agent/tests/test_base.py +0 -202
- hud/env/__init__.py +0 -11
- hud/env/client.py +0 -35
- hud/env/docker_client.py +0 -349
- hud/env/environment.py +0 -446
- hud/env/local_docker_client.py +0 -358
- hud/env/remote_client.py +0 -212
- hud/env/remote_docker_client.py +0 -292
- hud/gym.py +0 -130
- hud/job.py +0 -773
- hud/mcp/__init__.py +0 -17
- hud/mcp/base.py +0 -631
- hud/mcp/client.py +0 -312
- hud/mcp/tests/test_base.py +0 -512
- hud/mcp/tests/test_claude.py +0 -294
- hud/task.py +0 -149
- hud/taskset.py +0 -237
- hud/telemetry/_trace.py +0 -347
- hud/telemetry/context.py +0 -230
- hud/telemetry/exporter.py +0 -575
- hud/telemetry/instrumentation/__init__.py +0 -3
- hud/telemetry/instrumentation/mcp.py +0 -259
- hud/telemetry/instrumentation/registry.py +0 -59
- hud/telemetry/mcp_models.py +0 -270
- hud/telemetry/tests/__init__.py +0 -1
- hud/telemetry/tests/test_context.py +0 -210
- hud/telemetry/tests/test_trace.py +0 -312
- hud/tools/helper/README.md +0 -56
- hud/tools/helper/__init__.py +0 -9
- hud/tools/helper/mcp_server.py +0 -78
- hud/tools/helper/server_initialization.py +0 -115
- hud/tools/helper/utils.py +0 -58
- hud/trajectory.py +0 -94
- hud/utils/agent.py +0 -37
- hud/utils/common.py +0 -256
- hud/utils/config.py +0 -120
- hud/utils/deprecation.py +0 -115
- hud/utils/misc.py +0 -53
- hud/utils/tests/test_common.py +0 -277
- hud/utils/tests/test_config.py +0 -129
- hud_python-0.3.5.dist-info/METADATA +0 -284
- hud_python-0.3.5.dist-info/RECORD +0 -120
- /hud/{adapters/common → shared}/tests/__init__.py +0 -0
- {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from unittest.mock import MagicMock
|
|
4
|
-
|
|
5
|
-
from hud.telemetry.context import (
|
|
6
|
-
buffer_mcp_call,
|
|
7
|
-
flush_buffer,
|
|
8
|
-
get_current_task_run_id,
|
|
9
|
-
is_root_trace,
|
|
10
|
-
set_current_task_run_id,
|
|
11
|
-
)
|
|
12
|
-
from hud.telemetry.mcp_models import BaseMCPCall
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class TestTaskRunIdContext:
|
|
16
|
-
"""Test task run ID context management."""
|
|
17
|
-
|
|
18
|
-
def test_get_current_task_run_id_initial(self):
|
|
19
|
-
"""Test getting task run ID when none is set."""
|
|
20
|
-
# Reset context for clean test
|
|
21
|
-
set_current_task_run_id(None)
|
|
22
|
-
result = get_current_task_run_id()
|
|
23
|
-
assert result is None
|
|
24
|
-
|
|
25
|
-
def test_set_and_get_task_run_id(self):
|
|
26
|
-
"""Test setting and getting task run ID."""
|
|
27
|
-
test_id = "test-task-run-id"
|
|
28
|
-
set_current_task_run_id(test_id)
|
|
29
|
-
result = get_current_task_run_id()
|
|
30
|
-
assert result == test_id
|
|
31
|
-
|
|
32
|
-
def test_task_run_id_isolation(self):
|
|
33
|
-
"""Test that task run IDs are isolated per context."""
|
|
34
|
-
# This test simulates what would happen in different contexts
|
|
35
|
-
set_current_task_run_id("context-1")
|
|
36
|
-
assert get_current_task_run_id() == "context-1"
|
|
37
|
-
|
|
38
|
-
set_current_task_run_id("context-2")
|
|
39
|
-
assert get_current_task_run_id() == "context-2"
|
|
40
|
-
|
|
41
|
-
# Reset to None
|
|
42
|
-
set_current_task_run_id(None)
|
|
43
|
-
assert get_current_task_run_id() is None
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class TestRootTraceContext:
|
|
47
|
-
"""Test root trace context management."""
|
|
48
|
-
|
|
49
|
-
def test_is_root_trace_initial(self):
|
|
50
|
-
"""Test is_root_trace initial state."""
|
|
51
|
-
# The initial state may vary, so we just test that it returns a boolean
|
|
52
|
-
result = is_root_trace.get()
|
|
53
|
-
assert isinstance(result, bool)
|
|
54
|
-
|
|
55
|
-
def test_set_root_trace(self):
|
|
56
|
-
"""Test setting root trace state."""
|
|
57
|
-
is_root_trace.set(True)
|
|
58
|
-
assert is_root_trace.get() is True
|
|
59
|
-
|
|
60
|
-
is_root_trace.set(False)
|
|
61
|
-
assert is_root_trace.get() is False
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
class TestMCPCallBuffer:
|
|
65
|
-
"""Test MCP call buffer management."""
|
|
66
|
-
|
|
67
|
-
def reset_context(self):
|
|
68
|
-
"""Clear buffer before each test."""
|
|
69
|
-
# Flush any existing calls and reset context
|
|
70
|
-
set_current_task_run_id(None)
|
|
71
|
-
# Clear any existing buffers by setting a temporary task ID and flushing
|
|
72
|
-
set_current_task_run_id("temp-cleanup")
|
|
73
|
-
flush_buffer()
|
|
74
|
-
set_current_task_run_id(None)
|
|
75
|
-
|
|
76
|
-
def test_flush_buffer_empty(self):
|
|
77
|
-
"""Test flushing empty buffer."""
|
|
78
|
-
self.reset_context()
|
|
79
|
-
result = flush_buffer()
|
|
80
|
-
assert result == []
|
|
81
|
-
|
|
82
|
-
def test_add_and_flush_mcp_call(self):
|
|
83
|
-
"""Test adding and flushing MCP calls."""
|
|
84
|
-
self.reset_context()
|
|
85
|
-
|
|
86
|
-
# Set active task run ID
|
|
87
|
-
set_current_task_run_id("test-task")
|
|
88
|
-
|
|
89
|
-
# Create mock MCP call with required attributes
|
|
90
|
-
mock_call = MagicMock(spec=BaseMCPCall)
|
|
91
|
-
mock_call.model_dump.return_value = {"type": "test", "task_run_id": "test-task"}
|
|
92
|
-
mock_call.task_run_id = "test-task"
|
|
93
|
-
|
|
94
|
-
buffer_mcp_call(mock_call)
|
|
95
|
-
|
|
96
|
-
# Flush should return the call and clear buffer
|
|
97
|
-
result = flush_buffer()
|
|
98
|
-
assert len(result) == 1
|
|
99
|
-
assert result[0] == mock_call
|
|
100
|
-
|
|
101
|
-
# Buffer should be empty after flush
|
|
102
|
-
result2 = flush_buffer()
|
|
103
|
-
assert result2 == []
|
|
104
|
-
|
|
105
|
-
def test_add_multiple_mcp_calls(self):
|
|
106
|
-
"""Test adding multiple MCP calls."""
|
|
107
|
-
self.reset_context()
|
|
108
|
-
|
|
109
|
-
# Set active task run ID
|
|
110
|
-
set_current_task_run_id("test-task")
|
|
111
|
-
|
|
112
|
-
# Create multiple mock calls
|
|
113
|
-
mock_calls = []
|
|
114
|
-
for i in range(3):
|
|
115
|
-
mock_call = MagicMock(spec=BaseMCPCall)
|
|
116
|
-
mock_call.model_dump.return_value = {"type": f"test_{i}", "task_run_id": "test-task"}
|
|
117
|
-
mock_call.task_run_id = "test-task"
|
|
118
|
-
mock_calls.append(mock_call)
|
|
119
|
-
buffer_mcp_call(mock_call)
|
|
120
|
-
|
|
121
|
-
# Flush should return all calls
|
|
122
|
-
result = flush_buffer()
|
|
123
|
-
assert len(result) == 3
|
|
124
|
-
assert result == mock_calls
|
|
125
|
-
|
|
126
|
-
def test_buffer_isolation_per_task(self):
|
|
127
|
-
"""Test that MCP call buffers contain all calls regardless of task ID."""
|
|
128
|
-
self.reset_context()
|
|
129
|
-
|
|
130
|
-
# Set task run ID 1
|
|
131
|
-
set_current_task_run_id("task-1")
|
|
132
|
-
mock_call_1 = MagicMock(spec=BaseMCPCall)
|
|
133
|
-
mock_call_1.task_run_id = "task-1"
|
|
134
|
-
mock_call_1.model_dump.return_value = {"type": "test", "task_run_id": "task-1"}
|
|
135
|
-
buffer_mcp_call(mock_call_1)
|
|
136
|
-
|
|
137
|
-
# Set task run ID 2
|
|
138
|
-
set_current_task_run_id("task-2")
|
|
139
|
-
mock_call_2 = MagicMock(spec=BaseMCPCall)
|
|
140
|
-
mock_call_2.task_run_id = "task-2"
|
|
141
|
-
mock_call_2.model_dump.return_value = {"type": "test", "task_run_id": "task-2"}
|
|
142
|
-
buffer_mcp_call(mock_call_2)
|
|
143
|
-
|
|
144
|
-
# Flush should return all calls from both tasks
|
|
145
|
-
result = flush_buffer()
|
|
146
|
-
assert len(result) == 1
|
|
147
|
-
assert result[0] == mock_call_2
|
|
148
|
-
|
|
149
|
-
set_current_task_run_id("task-1")
|
|
150
|
-
result2 = flush_buffer()
|
|
151
|
-
assert len(result2) == 1
|
|
152
|
-
assert result2[0] == mock_call_1
|
|
153
|
-
|
|
154
|
-
def test_buffer_mcp_call_without_task_id(self):
|
|
155
|
-
"""Test adding MCP call when no task run ID is set."""
|
|
156
|
-
self.reset_context()
|
|
157
|
-
set_current_task_run_id(None)
|
|
158
|
-
|
|
159
|
-
mock_call = MagicMock(spec=BaseMCPCall)
|
|
160
|
-
mock_call.task_run_id = None
|
|
161
|
-
buffer_mcp_call(mock_call)
|
|
162
|
-
|
|
163
|
-
# Should not buffer anything when no task ID is set
|
|
164
|
-
result = flush_buffer()
|
|
165
|
-
assert len(result) == 0
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
class TestContextIntegration:
|
|
169
|
-
"""Integration tests for context management."""
|
|
170
|
-
|
|
171
|
-
def test_context_lifecycle(self):
|
|
172
|
-
"""Test complete context lifecycle."""
|
|
173
|
-
# Start with clean state
|
|
174
|
-
set_current_task_run_id(None)
|
|
175
|
-
flush_buffer()
|
|
176
|
-
is_root_trace.set(False)
|
|
177
|
-
|
|
178
|
-
# Set up trace context
|
|
179
|
-
task_id = "integration-test-task"
|
|
180
|
-
set_current_task_run_id(task_id)
|
|
181
|
-
is_root_trace.set(True)
|
|
182
|
-
|
|
183
|
-
# Add some MCP calls
|
|
184
|
-
mock_calls = []
|
|
185
|
-
for i in range(2):
|
|
186
|
-
mock_call = MagicMock(spec=BaseMCPCall)
|
|
187
|
-
mock_call.model_dump.return_value = {
|
|
188
|
-
"type": f"integration_test_{i}",
|
|
189
|
-
"task_run_id": task_id,
|
|
190
|
-
}
|
|
191
|
-
mock_call.task_run_id = task_id
|
|
192
|
-
mock_calls.append(mock_call)
|
|
193
|
-
buffer_mcp_call(mock_call)
|
|
194
|
-
|
|
195
|
-
# Verify context state
|
|
196
|
-
assert get_current_task_run_id() == task_id
|
|
197
|
-
assert is_root_trace.get() is True
|
|
198
|
-
|
|
199
|
-
# Flush and verify
|
|
200
|
-
result = flush_buffer()
|
|
201
|
-
assert len(result) == 2
|
|
202
|
-
assert result == mock_calls
|
|
203
|
-
|
|
204
|
-
# Clean up
|
|
205
|
-
set_current_task_run_id(None)
|
|
206
|
-
is_root_trace.set(False)
|
|
207
|
-
|
|
208
|
-
# Verify cleanup
|
|
209
|
-
assert get_current_task_run_id() is None
|
|
210
|
-
assert flush_buffer() == []
|
|
@@ -1,312 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import uuid
|
|
5
|
-
from unittest.mock import MagicMock
|
|
6
|
-
|
|
7
|
-
import pytest
|
|
8
|
-
|
|
9
|
-
from hud.telemetry._trace import (
|
|
10
|
-
init_telemetry,
|
|
11
|
-
trace,
|
|
12
|
-
trace_decorator,
|
|
13
|
-
trace_open,
|
|
14
|
-
)
|
|
15
|
-
from hud.telemetry.context import get_current_task_run_id as actual_get_current_task_run_id
|
|
16
|
-
from hud.telemetry.context import is_root_trace as actual_is_root_trace
|
|
17
|
-
from hud.telemetry.context import set_current_task_run_id as actual_set_current_task_run_id
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
@pytest.fixture(autouse=True)
|
|
21
|
-
def reset_telemetry_context_fixture():
|
|
22
|
-
"""Ensures telemetry context is reset before and after each test in this file."""
|
|
23
|
-
# Reset context before test
|
|
24
|
-
actual_set_current_task_run_id(None)
|
|
25
|
-
actual_is_root_trace.set(False)
|
|
26
|
-
yield
|
|
27
|
-
# Reset context after test
|
|
28
|
-
actual_set_current_task_run_id(None)
|
|
29
|
-
actual_is_root_trace.set(False)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class TestInitTelemetry:
|
|
33
|
-
"""Test telemetry initialization."""
|
|
34
|
-
|
|
35
|
-
def test_init_telemetry(self, mocker):
|
|
36
|
-
"""Test telemetry initialization calls registry.install_all."""
|
|
37
|
-
mock_registry = mocker.patch("hud.telemetry._trace.registry", autospec=True)
|
|
38
|
-
init_telemetry()
|
|
39
|
-
mock_registry.install_all.assert_called_once()
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class TestTrace:
|
|
43
|
-
"""Test the trace context manager."""
|
|
44
|
-
|
|
45
|
-
def test_trace_basic(self, mocker):
|
|
46
|
-
"""Test basic trace functionality and context setting."""
|
|
47
|
-
mock_flush = mocker.patch(
|
|
48
|
-
"hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
|
|
49
|
-
)
|
|
50
|
-
mock_submit_loop = mocker.patch(
|
|
51
|
-
"hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
initial_root_state = actual_is_root_trace.get()
|
|
55
|
-
|
|
56
|
-
with trace_open() as task_run_id:
|
|
57
|
-
assert isinstance(task_run_id, str)
|
|
58
|
-
uuid.UUID(task_run_id)
|
|
59
|
-
assert actual_get_current_task_run_id() == task_run_id
|
|
60
|
-
assert actual_is_root_trace.get() is True
|
|
61
|
-
|
|
62
|
-
assert actual_get_current_task_run_id() is None
|
|
63
|
-
assert actual_is_root_trace.get() == initial_root_state
|
|
64
|
-
mock_flush.assert_called_once()
|
|
65
|
-
# submit_to_worker_loop is now called for status updates
|
|
66
|
-
assert mock_submit_loop.call_count == 2 # INITIALIZING and COMPLETED
|
|
67
|
-
|
|
68
|
-
def test_trace_with_name_and_attributes(self, mocker):
|
|
69
|
-
"""Test trace with name and attributes, checking they are passed on."""
|
|
70
|
-
mock_mcp_calls = [MagicMock()]
|
|
71
|
-
mock_flush = mocker.patch(
|
|
72
|
-
"hud.telemetry._trace.flush_buffer", return_value=mock_mcp_calls, autospec=True
|
|
73
|
-
)
|
|
74
|
-
mock_submit_loop = mocker.patch(
|
|
75
|
-
"hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
trace_name = "test_trace_with_data"
|
|
79
|
-
attrs = {"key": "value", "number": 42}
|
|
80
|
-
|
|
81
|
-
with trace_open(name=trace_name, attributes=attrs) as task_run_id:
|
|
82
|
-
assert isinstance(task_run_id, str)
|
|
83
|
-
|
|
84
|
-
mock_flush.assert_called_once()
|
|
85
|
-
# submit_to_worker_loop is now called for status updates
|
|
86
|
-
assert mock_submit_loop.call_count == 2 # INITIALIZING and COMPLETED
|
|
87
|
-
|
|
88
|
-
@pytest.mark.asyncio
|
|
89
|
-
async def test_trace_with_mcp_calls_exports(self, mocker):
|
|
90
|
-
"""Test trace with MCP calls exports telemetry with correct data."""
|
|
91
|
-
mock_mcp_calls = [MagicMock(), MagicMock()]
|
|
92
|
-
mock_flush = mocker.patch(
|
|
93
|
-
"hud.telemetry._trace.flush_buffer", return_value=mock_mcp_calls, autospec=True
|
|
94
|
-
)
|
|
95
|
-
mock_submit_loop = mocker.patch(
|
|
96
|
-
"hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
async def mock_export(*args, **kwargs):
|
|
100
|
-
return None
|
|
101
|
-
|
|
102
|
-
mocker.patch(
|
|
103
|
-
"hud.telemetry.exporter.export_telemetry",
|
|
104
|
-
side_effect=mock_export,
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
test_attrs = {"custom_attr": "test_val"}
|
|
108
|
-
test_name = "mcp_export_test"
|
|
109
|
-
|
|
110
|
-
with trace_open(name=test_name, attributes=test_attrs) as task_run_id:
|
|
111
|
-
pass
|
|
112
|
-
|
|
113
|
-
mock_flush.assert_called_once()
|
|
114
|
-
# submit_to_worker_loop is now called for status updates and export
|
|
115
|
-
# The exact count may vary depending on whether export_incremental is called
|
|
116
|
-
assert mock_submit_loop.call_count >= 2 # At least INITIALIZING and COMPLETED
|
|
117
|
-
|
|
118
|
-
# With the new export flow, export_telemetry is submitted to worker loop
|
|
119
|
-
# so we can't directly assert on it being called synchronously
|
|
120
|
-
# Instead, verify that the trace completed successfully
|
|
121
|
-
assert task_run_id is not None
|
|
122
|
-
|
|
123
|
-
def test_trace_nested(self, mocker):
|
|
124
|
-
"""Test nested traces, verifying context restoration and root trace logic."""
|
|
125
|
-
actual_set_current_task_run_id(None)
|
|
126
|
-
actual_is_root_trace.set(False)
|
|
127
|
-
|
|
128
|
-
mock_flush_internal = mocker.patch(
|
|
129
|
-
"hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
|
|
130
|
-
)
|
|
131
|
-
mock_submit_loop_internal = mocker.patch(
|
|
132
|
-
"hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
assert actual_get_current_task_run_id() is None
|
|
136
|
-
assert actual_is_root_trace.get() is False
|
|
137
|
-
|
|
138
|
-
with trace_open(name="outer") as outer_id:
|
|
139
|
-
assert actual_get_current_task_run_id() == outer_id
|
|
140
|
-
assert actual_is_root_trace.get() is True
|
|
141
|
-
with trace(name="inner") as inner_id:
|
|
142
|
-
assert actual_get_current_task_run_id() == inner_id
|
|
143
|
-
assert actual_is_root_trace.get() is False
|
|
144
|
-
assert outer_id != inner_id
|
|
145
|
-
assert actual_get_current_task_run_id() == outer_id
|
|
146
|
-
assert actual_is_root_trace.get() is True
|
|
147
|
-
|
|
148
|
-
assert actual_get_current_task_run_id() is None
|
|
149
|
-
assert actual_is_root_trace.get() is False
|
|
150
|
-
assert mock_flush_internal.call_count == 2
|
|
151
|
-
# submit_to_worker_loop is now called for status updates
|
|
152
|
-
assert mock_submit_loop_internal.call_count == 2 # Only outer trace sends status updates
|
|
153
|
-
|
|
154
|
-
def test_trace_exception_handling(self, mocker):
|
|
155
|
-
"""Test trace handles exceptions properly and restores context."""
|
|
156
|
-
initial_task_id_before_trace = "pre_existing_id_123"
|
|
157
|
-
initial_root_state_before_trace = True
|
|
158
|
-
actual_set_current_task_run_id(initial_task_id_before_trace)
|
|
159
|
-
actual_is_root_trace.set(initial_root_state_before_trace)
|
|
160
|
-
|
|
161
|
-
mock_flush = mocker.patch(
|
|
162
|
-
"hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
|
|
163
|
-
)
|
|
164
|
-
mock_submit_loop = mocker.patch(
|
|
165
|
-
"hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
with (
|
|
169
|
-
pytest.raises(ValueError, match="Test exception"),
|
|
170
|
-
trace_open(name="trace_with_exception"),
|
|
171
|
-
):
|
|
172
|
-
assert actual_get_current_task_run_id() != initial_task_id_before_trace
|
|
173
|
-
assert actual_is_root_trace.get() is False
|
|
174
|
-
raise ValueError("Test exception")
|
|
175
|
-
|
|
176
|
-
mock_flush.assert_called_once()
|
|
177
|
-
assert actual_get_current_task_run_id() == initial_task_id_before_trace
|
|
178
|
-
assert actual_is_root_trace.get() == initial_root_state_before_trace
|
|
179
|
-
mock_submit_loop.assert_not_called()
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
class TestTraceSync:
|
|
183
|
-
"""Test the trace_sync context manager."""
|
|
184
|
-
|
|
185
|
-
def test_trace_sync_basic(self, mocker):
|
|
186
|
-
"""Test trace calls trace_open and flush."""
|
|
187
|
-
mock_flush = mocker.patch("hud.flush", autospec=True)
|
|
188
|
-
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open")
|
|
189
|
-
mock_trace_open.return_value.__enter__.return_value = "test-task-id"
|
|
190
|
-
mock_trace_open.return_value.__exit__.return_value = None
|
|
191
|
-
|
|
192
|
-
with trace(name="test_sync") as task_run_id:
|
|
193
|
-
assert task_run_id == "test-task-id"
|
|
194
|
-
|
|
195
|
-
mock_trace_open.assert_called_once_with(name="test_sync", agent_model=None, attributes=None)
|
|
196
|
-
mock_flush.assert_called_once()
|
|
197
|
-
|
|
198
|
-
def test_trace_sync_with_attributes(self, mocker):
|
|
199
|
-
"""Test trace passes attributes correctly."""
|
|
200
|
-
mock_flush = mocker.patch("hud.flush", autospec=True)
|
|
201
|
-
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open")
|
|
202
|
-
mock_trace_open.return_value.__enter__.return_value = "test-task-id"
|
|
203
|
-
mock_trace_open.return_value.__exit__.return_value = None
|
|
204
|
-
attrs = {"key": "value"}
|
|
205
|
-
|
|
206
|
-
with trace(name="test_sync", attributes=attrs):
|
|
207
|
-
pass
|
|
208
|
-
|
|
209
|
-
mock_trace_open.assert_called_once_with(
|
|
210
|
-
name="test_sync", agent_model=None, attributes=attrs
|
|
211
|
-
)
|
|
212
|
-
mock_flush.assert_called_once()
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
class TestTraceDecorator:
|
|
216
|
-
"""Test the trace_decorator function decorator."""
|
|
217
|
-
|
|
218
|
-
def test_trace_decorator_sync_function(self, mocker):
|
|
219
|
-
"""Test trace_decorator on synchronous functions."""
|
|
220
|
-
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
221
|
-
mock_trace_open.return_value.__enter__.return_value = "mocked_task_id"
|
|
222
|
-
mock_trace_open.return_value.__exit__.return_value = None
|
|
223
|
-
|
|
224
|
-
@trace_decorator(name="test_func_sync")
|
|
225
|
-
def sync_function(x, y):
|
|
226
|
-
return x + y
|
|
227
|
-
|
|
228
|
-
result = sync_function(1, 2)
|
|
229
|
-
assert result == 3
|
|
230
|
-
mock_trace_open.assert_called_once_with(
|
|
231
|
-
name="test_func_sync", agent_model=None, attributes=None
|
|
232
|
-
)
|
|
233
|
-
|
|
234
|
-
def test_trace_decorator_async_function(self, mocker):
|
|
235
|
-
"""Test trace_decorator on asynchronous functions."""
|
|
236
|
-
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
237
|
-
mock_trace_open.return_value.__enter__.return_value = "mocked_task_id"
|
|
238
|
-
mock_trace_open.return_value.__exit__.return_value = None
|
|
239
|
-
|
|
240
|
-
@trace_decorator(name="test_func_async")
|
|
241
|
-
async def async_function(x, y):
|
|
242
|
-
return x + y
|
|
243
|
-
|
|
244
|
-
async def run_test():
|
|
245
|
-
result = await async_function(1, 2)
|
|
246
|
-
assert result == 3
|
|
247
|
-
mock_trace_open.assert_called_once_with(
|
|
248
|
-
name="test_func_async", agent_model=None, attributes=None
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
asyncio.run(run_test())
|
|
252
|
-
|
|
253
|
-
def test_trace_decorator_with_attributes(self, mocker):
|
|
254
|
-
"""Test trace_decorator with attributes."""
|
|
255
|
-
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
256
|
-
mock_trace_open.return_value.__enter__.return_value = "task_id"
|
|
257
|
-
mock_trace_open.return_value.__exit__.return_value = None
|
|
258
|
-
|
|
259
|
-
attrs = {"operation": "multiply"}
|
|
260
|
-
|
|
261
|
-
@trace_decorator(name="test_func", attributes=attrs)
|
|
262
|
-
def func_with_attrs(x):
|
|
263
|
-
return x * 2
|
|
264
|
-
|
|
265
|
-
result = func_with_attrs(5)
|
|
266
|
-
assert result == 10
|
|
267
|
-
mock_trace_open.assert_called_once_with(
|
|
268
|
-
name="test_func", agent_model=None, attributes=attrs
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
def test_trace_decorator_without_name(self, mocker):
|
|
272
|
-
"""Test trace_decorator uses module.function name when name not provided."""
|
|
273
|
-
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
274
|
-
mock_trace_open.return_value.__enter__.return_value = "task_id"
|
|
275
|
-
mock_trace_open.return_value.__exit__.return_value = None
|
|
276
|
-
|
|
277
|
-
@trace_decorator()
|
|
278
|
-
def my_function():
|
|
279
|
-
return "result"
|
|
280
|
-
|
|
281
|
-
result = my_function()
|
|
282
|
-
assert result == "result"
|
|
283
|
-
# Should use module.function name
|
|
284
|
-
expected_name = f"{my_function.__module__}.my_function"
|
|
285
|
-
mock_trace_open.assert_called_once_with(
|
|
286
|
-
name=expected_name, agent_model=None, attributes=None
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
def test_trace_decorator_preserves_function_metadata(self):
|
|
290
|
-
"""Test trace_decorator preserves original function metadata."""
|
|
291
|
-
|
|
292
|
-
@trace_decorator(name="test")
|
|
293
|
-
def original_function():
|
|
294
|
-
"""Original docstring."""
|
|
295
|
-
|
|
296
|
-
assert original_function.__name__ == "original_function"
|
|
297
|
-
assert original_function.__doc__ == "Original docstring."
|
|
298
|
-
|
|
299
|
-
def test_trace_decorator_exception_propagation(self, mocker):
|
|
300
|
-
"""Test trace_decorator propagates exceptions."""
|
|
301
|
-
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
302
|
-
mock_trace_open.return_value.__enter__.return_value = "task_id"
|
|
303
|
-
mock_trace_open.return_value.__exit__.return_value = None
|
|
304
|
-
|
|
305
|
-
@trace_decorator()
|
|
306
|
-
def failing_function():
|
|
307
|
-
raise RuntimeError("Test error")
|
|
308
|
-
|
|
309
|
-
with pytest.raises(RuntimeError, match="Test error"):
|
|
310
|
-
failing_function()
|
|
311
|
-
|
|
312
|
-
mock_trace_open.assert_called_once()
|
hud/tools/helper/README.md
DELETED
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
# HUD Helper Package
|
|
2
|
-
|
|
3
|
-
This sub-package bundles utilities that make it trivial to expose HUD
|
|
4
|
-
Python tool classes as **Model Context Protocol (MCP)** tools.
|
|
5
|
-
|
|
6
|
-
## Contents
|
|
7
|
-
|
|
8
|
-
| File | Purpose |
|
|
9
|
-
|------|---------|
|
|
10
|
-
| `utils.py` | `register_instance_tool` – wrap a class instance into a FastMCP tool with auto-generated JSON schema |
|
|
11
|
-
| `mcp_server.py` | CLI server (stdio/HTTP). Tool names: `computer`, `computer_anthropic`, `computer_openai`, `bash`, `edit_file` |
|
|
12
|
-
|
|
13
|
-
## Quick start
|
|
14
|
-
|
|
15
|
-
### 1 — Run a server (stdio)
|
|
16
|
-
```bash
|
|
17
|
-
python -m hud.tools.helper.mcp_server # exposes all tools on stdio
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
### 2 — Run a server (HTTP)
|
|
21
|
-
```bash
|
|
22
|
-
python -m hud.tools.helper.mcp_server http --port 8040 \
|
|
23
|
-
--tools computer bash # expose only two tools
|
|
24
|
-
```
|
|
25
|
-
This starts a Streamable-HTTP MCP server at `http://localhost:8040/mcp`.
|
|
26
|
-
|
|
27
|
-
### 3 — From a client
|
|
28
|
-
```python
|
|
29
|
-
from mcp import ClientSession
|
|
30
|
-
from mcp.client.streamable_http import streamablehttp_client
|
|
31
|
-
|
|
32
|
-
async with streamablehttp_client("http://localhost:8040/mcp") as (r, w, _):
|
|
33
|
-
async with ClientSession(r, w) as sess:
|
|
34
|
-
await sess.initialize()
|
|
35
|
-
res = await sess.call_tool("bash", {"command": "echo hi"})
|
|
36
|
-
print(res.content[0].text)
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
## Advanced: registering custom tools
|
|
40
|
-
|
|
41
|
-
```python
|
|
42
|
-
from mcp.server.fastmcp import FastMCP
|
|
43
|
-
from hud.tools.helper import register_instance_tool
|
|
44
|
-
|
|
45
|
-
class MyTool:
|
|
46
|
-
async def __call__(self, name: str) -> str: # type-hints generate schema!
|
|
47
|
-
return f"Hello {name}!"
|
|
48
|
-
|
|
49
|
-
mcp = FastMCP("Custom")
|
|
50
|
-
register_instance_tool(mcp, "my_tool", MyTool())
|
|
51
|
-
|
|
52
|
-
mcp.run(transport="stdio")
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
The helper inspects `MyTool.__call__`, removes `*args/**kwargs`, and FastMCP
|
|
56
|
-
automatically derives an input schema and registers the tool.
|
hud/tools/helper/__init__.py
DELETED
hud/tools/helper/mcp_server.py
DELETED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Parameterised FastMCP server for HUD tools.
|
|
3
|
-
|
|
4
|
-
Usage
|
|
5
|
-
-----
|
|
6
|
-
Run with default (stdio, all tools):
|
|
7
|
-
|
|
8
|
-
python -m hud.tools.helper.mcp_server
|
|
9
|
-
|
|
10
|
-
Streamable HTTP on :8040 exposing computer + bash only:
|
|
11
|
-
|
|
12
|
-
python -m hud.tools.helper.mcp_server http --tools computer bash
|
|
13
|
-
|
|
14
|
-
Arguments
|
|
15
|
-
~~~~~~~~~
|
|
16
|
-
transport stdio (default) | http
|
|
17
|
-
--tools list of tool names to expose (default = all)
|
|
18
|
-
--port HTTP port (default 8040)
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
from __future__ import annotations
|
|
22
|
-
|
|
23
|
-
import argparse
|
|
24
|
-
|
|
25
|
-
from mcp.server.fastmcp import FastMCP
|
|
26
|
-
|
|
27
|
-
from hud.tools.bash import BashTool
|
|
28
|
-
from hud.tools.computer.anthropic import AnthropicComputerTool
|
|
29
|
-
from hud.tools.computer.hud import HudComputerTool
|
|
30
|
-
from hud.tools.computer.openai import OpenAIComputerTool
|
|
31
|
-
from hud.tools.edit import EditTool
|
|
32
|
-
|
|
33
|
-
from .utils import register_instance_tool
|
|
34
|
-
|
|
35
|
-
TOOL_MAP = {
|
|
36
|
-
"computer": HudComputerTool,
|
|
37
|
-
"computer_anthropic": AnthropicComputerTool,
|
|
38
|
-
"computer_openai": OpenAIComputerTool,
|
|
39
|
-
"bash": BashTool,
|
|
40
|
-
"edit_file": EditTool,
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def build_server(
|
|
45
|
-
names: list[str] | None = None,
|
|
46
|
-
*,
|
|
47
|
-
port: int = 8040,
|
|
48
|
-
host: str = "0.0.0.0", # noqa: S104
|
|
49
|
-
) -> FastMCP:
|
|
50
|
-
server = FastMCP("HUD", port=port, host=host)
|
|
51
|
-
selected = names or list(TOOL_MAP.keys())
|
|
52
|
-
|
|
53
|
-
for name in selected:
|
|
54
|
-
cls = TOOL_MAP.get(name)
|
|
55
|
-
if cls is None:
|
|
56
|
-
raise SystemExit(f"Unknown tool '{name}'. Choices: {list(TOOL_MAP)}")
|
|
57
|
-
register_instance_tool(server, name, cls())
|
|
58
|
-
return server
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def main() -> None:
|
|
62
|
-
parser = argparse.ArgumentParser(prog="hud-mcp", description="Run HUD FastMCP server")
|
|
63
|
-
parser.add_argument("transport", nargs="?", choices=["stdio", "http"], default="stdio")
|
|
64
|
-
parser.add_argument("--tools", nargs="*", help="Tool names to expose (default: all)")
|
|
65
|
-
parser.add_argument("--port", type=int, default=8040, help="HTTP port (default 8040)")
|
|
66
|
-
parser.add_argument("--host", type=str, default="0.0.0.0", help="HTTP host (default 0.0.0.0)") # noqa: S104
|
|
67
|
-
args = parser.parse_args()
|
|
68
|
-
|
|
69
|
-
mcp = build_server(args.tools, port=args.port, host=args.host)
|
|
70
|
-
|
|
71
|
-
if args.transport == "http":
|
|
72
|
-
mcp.run(transport="streamable-http")
|
|
73
|
-
else:
|
|
74
|
-
mcp.run(transport="stdio")
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
if __name__ == "__main__":
|
|
78
|
-
main()
|