hud-python 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +20 -8
- hud/adapters/common/adapter.py +14 -3
- hud/adapters/common/tests/test_adapter.py +16 -4
- hud/datasets.py +188 -0
- hud/env/docker_client.py +15 -3
- hud/env/environment.py +10 -7
- hud/env/local_docker_client.py +29 -7
- hud/env/remote_client.py +1 -1
- hud/env/remote_docker_client.py +2 -2
- hud/exceptions.py +2 -1
- hud/gym.py +0 -9
- hud/mcp/__init__.py +17 -0
- hud/mcp/base.py +631 -0
- hud/mcp/claude.py +321 -0
- hud/mcp/client.py +312 -0
- hud/mcp/langchain.py +250 -0
- hud/mcp/openai.py +334 -0
- hud/mcp/tests/__init__.py +1 -0
- hud/mcp/tests/test_base.py +512 -0
- hud/mcp/tests/test_claude.py +294 -0
- hud/mcp/tests/test_client.py +324 -0
- hud/mcp/tests/test_openai.py +238 -0
- hud/settings.py +20 -2
- hud/task.py +5 -88
- hud/taskset.py +2 -23
- hud/telemetry/__init__.py +16 -7
- hud/telemetry/_trace.py +246 -72
- hud/telemetry/context.py +88 -27
- hud/telemetry/exporter.py +171 -11
- hud/telemetry/instrumentation/mcp.py +174 -410
- hud/telemetry/job.py +141 -0
- hud/telemetry/mcp_models.py +13 -74
- hud/telemetry/tests/test_context.py +9 -6
- hud/telemetry/tests/test_trace.py +120 -78
- hud/tools/__init__.py +34 -0
- hud/tools/base.py +65 -0
- hud/tools/bash.py +137 -0
- hud/tools/computer/__init__.py +13 -0
- hud/tools/computer/anthropic.py +411 -0
- hud/tools/computer/hud.py +315 -0
- hud/tools/computer/openai.py +283 -0
- hud/tools/edit.py +290 -0
- hud/tools/executors/__init__.py +30 -0
- hud/tools/executors/base.py +331 -0
- hud/tools/executors/pyautogui.py +619 -0
- hud/tools/executors/tests/__init__.py +1 -0
- hud/tools/executors/tests/test_base_executor.py +338 -0
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -0
- hud/tools/executors/xdo.py +503 -0
- hud/tools/helper/README.md +56 -0
- hud/tools/helper/__init__.py +9 -0
- hud/tools/helper/mcp_server.py +78 -0
- hud/tools/helper/server_initialization.py +115 -0
- hud/tools/helper/utils.py +58 -0
- hud/tools/playwright_tool.py +379 -0
- hud/tools/tests/__init__.py +3 -0
- hud/tools/tests/test_bash.py +152 -0
- hud/tools/tests/test_computer.py +52 -0
- hud/tools/tests/test_computer_actions.py +34 -0
- hud/tools/tests/test_edit.py +240 -0
- hud/tools/tests/test_init.py +27 -0
- hud/tools/tests/test_playwright_tool.py +183 -0
- hud/tools/tests/test_tools.py +157 -0
- hud/tools/tests/test_utils.py +156 -0
- hud/tools/utils.py +50 -0
- hud/trajectory.py +5 -1
- hud/types.py +10 -1
- hud/utils/tests/test_init.py +21 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/METADATA +27 -18
- hud_python-0.3.1.dist-info/RECORD +119 -0
- hud/evaluators/__init__.py +0 -9
- hud/evaluators/base.py +0 -32
- hud/evaluators/inspect.py +0 -24
- hud/evaluators/judge.py +0 -189
- hud/evaluators/match.py +0 -156
- hud/evaluators/remote.py +0 -65
- hud/evaluators/tests/__init__.py +0 -0
- hud/evaluators/tests/test_inspect.py +0 -12
- hud/evaluators/tests/test_judge.py +0 -231
- hud/evaluators/tests/test_match.py +0 -115
- hud/evaluators/tests/test_remote.py +0 -98
- hud_python-0.2.10.dist-info/RECORD +0 -85
- {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
- {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/job.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Job context manager for grouping related traces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
import uuid
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from contextvars import ContextVar
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
11
|
+
|
|
12
|
+
from hud.telemetry.exporter import JobStatus, submit_to_worker_loop, update_job_status
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Generator
|
|
16
|
+
from typing import Self
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("hud.telemetry")
|
|
19
|
+
|
|
20
|
+
# Context variables for current job
|
|
21
|
+
current_job_id: ContextVar[str | None] = ContextVar("current_job_id", default=None)
|
|
22
|
+
current_job_name: ContextVar[str | None] = ContextVar("current_job_name", default=None)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class JobContext:
|
|
26
|
+
"""Context manager for grouping traces under a job."""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self, name: str, taskset_name: str | None = None, metadata: dict[str, Any] | None = None
|
|
30
|
+
) -> None:
|
|
31
|
+
self.id = str(uuid.uuid4())
|
|
32
|
+
self.name = name
|
|
33
|
+
self.metadata = metadata or {}
|
|
34
|
+
self.taskset_name: str | None = taskset_name
|
|
35
|
+
|
|
36
|
+
def __enter__(self) -> Self:
|
|
37
|
+
# Auto-detect dataset
|
|
38
|
+
if self.taskset_name is None:
|
|
39
|
+
self._detect_dataset()
|
|
40
|
+
|
|
41
|
+
# Set context variables
|
|
42
|
+
current_job_id.set(self.id)
|
|
43
|
+
current_job_name.set(self.name)
|
|
44
|
+
|
|
45
|
+
# Send initial status
|
|
46
|
+
job_metadata = {**self.metadata}
|
|
47
|
+
coro = update_job_status(
|
|
48
|
+
self.id, JobStatus.RUNNING, metadata=job_metadata, taskset_name=self.taskset_name
|
|
49
|
+
)
|
|
50
|
+
submit_to_worker_loop(coro)
|
|
51
|
+
|
|
52
|
+
logger.info("Started job %s (ID: %s)", self.name, self.id)
|
|
53
|
+
return self
|
|
54
|
+
|
|
55
|
+
def __exit__(
|
|
56
|
+
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: object
|
|
57
|
+
) -> None:
|
|
58
|
+
# Determine final status
|
|
59
|
+
if exc_type is not None:
|
|
60
|
+
# Job failed with exception
|
|
61
|
+
error_msg = f"{exc_type.__name__}: {exc_val}"
|
|
62
|
+
coro = update_job_status(
|
|
63
|
+
self.id, JobStatus.ERROR, error_message=error_msg, taskset_name=self.taskset_name
|
|
64
|
+
)
|
|
65
|
+
else:
|
|
66
|
+
# Job completed successfully
|
|
67
|
+
coro = update_job_status(self.id, JobStatus.COMPLETED, taskset_name=self.taskset_name)
|
|
68
|
+
|
|
69
|
+
submit_to_worker_loop(coro)
|
|
70
|
+
|
|
71
|
+
# Clear context
|
|
72
|
+
current_job_id.set(None)
|
|
73
|
+
current_job_name.set(None)
|
|
74
|
+
|
|
75
|
+
status = "failed" if exc_type else "completed"
|
|
76
|
+
logger.info("Job %s %s", self.name, status)
|
|
77
|
+
|
|
78
|
+
def _detect_dataset(self) -> None:
|
|
79
|
+
"""Auto-detect HuggingFace dataset in parent scope."""
|
|
80
|
+
try:
|
|
81
|
+
# Check frames 2 and 3 (with statement and parent scope)
|
|
82
|
+
for frame_depth in [2, 3]:
|
|
83
|
+
try:
|
|
84
|
+
frame = sys._getframe(frame_depth)
|
|
85
|
+
|
|
86
|
+
# Search for Dataset objects
|
|
87
|
+
for var_value in frame.f_locals.values():
|
|
88
|
+
if hasattr(var_value, "info") and hasattr(var_value.info, "builder_name"):
|
|
89
|
+
self.taskset_name = var_value.info.builder_name
|
|
90
|
+
logger.debug(
|
|
91
|
+
"Auto-detected dataset at frame %d: %s",
|
|
92
|
+
frame_depth,
|
|
93
|
+
self.taskset_name,
|
|
94
|
+
)
|
|
95
|
+
return
|
|
96
|
+
elif hasattr(var_value, "builder_name"):
|
|
97
|
+
# Older dataset format
|
|
98
|
+
self.taskset_name = var_value.builder_name
|
|
99
|
+
logger.debug(
|
|
100
|
+
"Auto-detected dataset at frame %d: %s",
|
|
101
|
+
frame_depth,
|
|
102
|
+
self.taskset_name,
|
|
103
|
+
)
|
|
104
|
+
return
|
|
105
|
+
except ValueError:
|
|
106
|
+
# Frame doesn't exist
|
|
107
|
+
continue
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.debug("Dataset auto-detection failed: %s", e)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@contextmanager
|
|
113
|
+
def job(
|
|
114
|
+
name: str, taskset_name: str | None = None, metadata: dict[str, Any] | None = None
|
|
115
|
+
) -> Generator[JobContext, None, None]:
|
|
116
|
+
"""
|
|
117
|
+
Create a job context for grouping related traces.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
name: Name for the job
|
|
121
|
+
metadata: Optional metadata to include with the job
|
|
122
|
+
|
|
123
|
+
Example:
|
|
124
|
+
with hud.job("evaluation_run") as job:
|
|
125
|
+
for task in tasks:
|
|
126
|
+
with hud.trace(f"task_{task.id}"):
|
|
127
|
+
# Trace automatically includes job_id
|
|
128
|
+
result = await agent.run(task)
|
|
129
|
+
"""
|
|
130
|
+
with JobContext(name, taskset_name, metadata) as ctx:
|
|
131
|
+
yield ctx
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def get_current_job_id() -> str | None:
|
|
135
|
+
"""Get the current job ID if inside a job context."""
|
|
136
|
+
return current_job_id.get()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def get_current_job_name() -> str | None:
|
|
140
|
+
"""Get the current job name if inside a job context."""
|
|
141
|
+
return current_job_name.get()
|
hud/telemetry/mcp_models.py
CHANGED
|
@@ -28,16 +28,14 @@ class StatusType(str, Enum):
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class MCPCallType(str, Enum):
|
|
31
|
-
"""
|
|
31
|
+
"""Enum for different types of MCP calls in telemetry."""
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
HANDLE_INCOMING = "mcp.handle_incoming"
|
|
40
|
-
MANUAL_TEST = "manual.test"
|
|
33
|
+
# Requests and Notifications
|
|
34
|
+
SEND_REQUEST = "mcp.send_request"
|
|
35
|
+
SEND_NOTIFICATION = "mcp.send_notification"
|
|
36
|
+
|
|
37
|
+
# Responses
|
|
38
|
+
RECEIVE_RESPONSE = "mcp.receive_response"
|
|
41
39
|
|
|
42
40
|
|
|
43
41
|
class BaseMCPCall(BaseModel):
|
|
@@ -87,6 +85,8 @@ class MCPRequestCall(BaseMCPCall):
|
|
|
87
85
|
duration: float | None = None
|
|
88
86
|
request_id: str | int | None = None
|
|
89
87
|
request_data: dict[str, Any] | None = None
|
|
88
|
+
error: str | None = None
|
|
89
|
+
error_type: str | None = None
|
|
90
90
|
|
|
91
91
|
@classmethod
|
|
92
92
|
def from_jsonrpc_request(
|
|
@@ -191,6 +191,8 @@ class MCPNotificationCall(BaseMCPCall):
|
|
|
191
191
|
end_time: float | None = None
|
|
192
192
|
duration: float | None = None
|
|
193
193
|
notification_data: dict[str, Any] | None = None
|
|
194
|
+
error: str | None = None
|
|
195
|
+
error_type: str | None = None
|
|
194
196
|
|
|
195
197
|
@classmethod
|
|
196
198
|
def from_jsonrpc_notification(
|
|
@@ -230,69 +232,6 @@ class MCPNotificationCall(BaseMCPCall):
|
|
|
230
232
|
return None
|
|
231
233
|
|
|
232
234
|
|
|
233
|
-
class MCPStreamEvent(BaseMCPCall):
|
|
234
|
-
"""Record for an MCP stream event (read or write)"""
|
|
235
|
-
|
|
236
|
-
stream_event: bool = True
|
|
237
|
-
event_type: str = Field(..., description="Type of stream event: read or write")
|
|
238
|
-
item_type: str | None = None
|
|
239
|
-
is_response_or_error: bool = False
|
|
240
|
-
message_data: dict[str, Any] | None = None
|
|
241
|
-
|
|
242
|
-
@classmethod
|
|
243
|
-
def from_session_message(
|
|
244
|
-
cls, message: SessionMessage, task_run_id: str, event_type: str, **kwargs: Any
|
|
245
|
-
) -> MCPStreamEvent:
|
|
246
|
-
"""Create telemetry record for a stream event"""
|
|
247
|
-
method_name = "unknown_stream_operation"
|
|
248
|
-
is_response = False
|
|
249
|
-
item_type = "unknown"
|
|
250
|
-
message_data = None
|
|
251
|
-
|
|
252
|
-
if hasattr(message, "message") and hasattr(message.message, "root"):
|
|
253
|
-
msg_root = message.message.root
|
|
254
|
-
item_type = type(msg_root).__name__
|
|
255
|
-
message_data = msg_root.model_dump(exclude_none=True)
|
|
256
|
-
|
|
257
|
-
# Check type first before accessing attributes
|
|
258
|
-
if isinstance(msg_root, JSONRPCRequest | JSONRPCNotification) and hasattr(
|
|
259
|
-
msg_root, "method"
|
|
260
|
-
):
|
|
261
|
-
method_name = msg_root.method
|
|
262
|
-
elif isinstance(msg_root, JSONRPCResponse | JSONRPCError) and hasattr(msg_root, "id"):
|
|
263
|
-
method_name = f"response_to_id_{msg_root.id}"
|
|
264
|
-
is_response = True
|
|
265
|
-
|
|
266
|
-
return cls(
|
|
267
|
-
task_run_id=task_run_id,
|
|
268
|
-
status=StatusType.COMPLETED,
|
|
269
|
-
method=method_name,
|
|
270
|
-
event_type=event_type,
|
|
271
|
-
item_type=item_type,
|
|
272
|
-
is_response_or_error=is_response,
|
|
273
|
-
message_data=message_data,
|
|
274
|
-
timestamp=datetime.now().timestamp(),
|
|
275
|
-
**kwargs,
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
class MCPManualTestCall(BaseMCPCall):
|
|
280
|
-
"""Record for a manual test record"""
|
|
281
|
-
|
|
282
|
-
call_type: str = MCPCallType.MANUAL_TEST
|
|
283
|
-
custom_data: dict[str, Any] = Field(default_factory=dict)
|
|
284
|
-
|
|
285
|
-
@classmethod
|
|
286
|
-
def create(cls, task_run_id: str, **custom_data: Any) -> MCPManualTestCall:
|
|
287
|
-
"""Create a manual test record with custom data"""
|
|
288
|
-
return cls(
|
|
289
|
-
task_run_id=task_run_id,
|
|
290
|
-
status=StatusType.COMPLETED,
|
|
291
|
-
custom_data=custom_data,
|
|
292
|
-
timestamp=datetime.now().timestamp(),
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
|
|
296
235
|
class MCPTelemetryRecord(BaseModel):
|
|
297
236
|
"""Container for a set of related MCP telemetry records"""
|
|
298
237
|
|
|
@@ -320,9 +259,9 @@ class MCPTelemetryRecord(BaseModel):
|
|
|
320
259
|
|
|
321
260
|
|
|
322
261
|
class TrajectoryStep(BaseModel):
|
|
323
|
-
"""Model
|
|
262
|
+
"""Model for telemetry export format."""
|
|
324
263
|
|
|
325
|
-
type: str = Field(default="mcp-step")
|
|
264
|
+
type: str = Field(default="mcp-step")
|
|
326
265
|
observation_url: str | None = None
|
|
327
266
|
observation_text: str | None = None
|
|
328
267
|
actions: list[dict[str, Any]] = Field(default_factory=list)
|
|
@@ -64,21 +64,24 @@ class TestRootTraceContext:
|
|
|
64
64
|
class TestMCPCallBuffer:
|
|
65
65
|
"""Test MCP call buffer management."""
|
|
66
66
|
|
|
67
|
-
def
|
|
67
|
+
def reset_context(self):
|
|
68
68
|
"""Clear buffer before each test."""
|
|
69
69
|
# Flush any existing calls and reset context
|
|
70
|
+
set_current_task_run_id(None)
|
|
71
|
+
# Clear any existing buffers by setting a temporary task ID and flushing
|
|
72
|
+
set_current_task_run_id("temp-cleanup")
|
|
70
73
|
flush_buffer()
|
|
71
74
|
set_current_task_run_id(None)
|
|
72
75
|
|
|
73
76
|
def test_flush_buffer_empty(self):
|
|
74
77
|
"""Test flushing empty buffer."""
|
|
75
|
-
self.
|
|
78
|
+
self.reset_context()
|
|
76
79
|
result = flush_buffer()
|
|
77
80
|
assert result == []
|
|
78
81
|
|
|
79
82
|
def test_add_and_flush_mcp_call(self):
|
|
80
83
|
"""Test adding and flushing MCP calls."""
|
|
81
|
-
self.
|
|
84
|
+
self.reset_context()
|
|
82
85
|
|
|
83
86
|
# Set active task run ID
|
|
84
87
|
set_current_task_run_id("test-task")
|
|
@@ -101,7 +104,7 @@ class TestMCPCallBuffer:
|
|
|
101
104
|
|
|
102
105
|
def test_add_multiple_mcp_calls(self):
|
|
103
106
|
"""Test adding multiple MCP calls."""
|
|
104
|
-
self.
|
|
107
|
+
self.reset_context()
|
|
105
108
|
|
|
106
109
|
# Set active task run ID
|
|
107
110
|
set_current_task_run_id("test-task")
|
|
@@ -122,7 +125,7 @@ class TestMCPCallBuffer:
|
|
|
122
125
|
|
|
123
126
|
def test_buffer_isolation_per_task(self):
|
|
124
127
|
"""Test that MCP call buffers contain all calls regardless of task ID."""
|
|
125
|
-
self.
|
|
128
|
+
self.reset_context()
|
|
126
129
|
|
|
127
130
|
# Set task run ID 1
|
|
128
131
|
set_current_task_run_id("task-1")
|
|
@@ -150,7 +153,7 @@ class TestMCPCallBuffer:
|
|
|
150
153
|
|
|
151
154
|
def test_buffer_mcp_call_without_task_id(self):
|
|
152
155
|
"""Test adding MCP call when no task run ID is set."""
|
|
153
|
-
self.
|
|
156
|
+
self.reset_context()
|
|
154
157
|
set_current_task_run_id(None)
|
|
155
158
|
|
|
156
159
|
mock_call = MagicMock(spec=BaseMCPCall)
|
|
@@ -8,21 +8,25 @@ import pytest
|
|
|
8
8
|
|
|
9
9
|
from hud.telemetry._trace import (
|
|
10
10
|
init_telemetry,
|
|
11
|
-
register_trace,
|
|
12
11
|
trace,
|
|
12
|
+
trace_decorator,
|
|
13
|
+
trace_open,
|
|
13
14
|
)
|
|
14
15
|
from hud.telemetry.context import get_current_task_run_id as actual_get_current_task_run_id
|
|
15
16
|
from hud.telemetry.context import is_root_trace as actual_is_root_trace
|
|
16
|
-
from hud.telemetry.context import reset_context
|
|
17
17
|
from hud.telemetry.context import set_current_task_run_id as actual_set_current_task_run_id
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
@pytest.fixture(autouse=True)
|
|
21
21
|
def reset_telemetry_context_fixture():
|
|
22
22
|
"""Ensures telemetry context is reset before and after each test in this file."""
|
|
23
|
-
|
|
23
|
+
# Reset context before test
|
|
24
|
+
actual_set_current_task_run_id(None)
|
|
25
|
+
actual_is_root_trace.set(False)
|
|
24
26
|
yield
|
|
25
|
-
|
|
27
|
+
# Reset context after test
|
|
28
|
+
actual_set_current_task_run_id(None)
|
|
29
|
+
actual_is_root_trace.set(False)
|
|
26
30
|
|
|
27
31
|
|
|
28
32
|
class TestInitTelemetry:
|
|
@@ -44,12 +48,12 @@ class TestTrace:
|
|
|
44
48
|
"hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
|
|
45
49
|
)
|
|
46
50
|
mock_submit_loop = mocker.patch(
|
|
47
|
-
"hud.telemetry.
|
|
51
|
+
"hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
|
|
48
52
|
)
|
|
49
53
|
|
|
50
54
|
initial_root_state = actual_is_root_trace.get()
|
|
51
55
|
|
|
52
|
-
with
|
|
56
|
+
with trace_open() as task_run_id:
|
|
53
57
|
assert isinstance(task_run_id, str)
|
|
54
58
|
uuid.UUID(task_run_id)
|
|
55
59
|
assert actual_get_current_task_run_id() == task_run_id
|
|
@@ -58,7 +62,8 @@ class TestTrace:
|
|
|
58
62
|
assert actual_get_current_task_run_id() is None
|
|
59
63
|
assert actual_is_root_trace.get() == initial_root_state
|
|
60
64
|
mock_flush.assert_called_once()
|
|
61
|
-
|
|
65
|
+
# submit_to_worker_loop is now called for status updates
|
|
66
|
+
assert mock_submit_loop.call_count == 2 # INITIALIZING and COMPLETED
|
|
62
67
|
|
|
63
68
|
def test_trace_with_name_and_attributes(self, mocker):
|
|
64
69
|
"""Test trace with name and attributes, checking they are passed on."""
|
|
@@ -67,17 +72,18 @@ class TestTrace:
|
|
|
67
72
|
"hud.telemetry._trace.flush_buffer", return_value=mock_mcp_calls, autospec=True
|
|
68
73
|
)
|
|
69
74
|
mock_submit_loop = mocker.patch(
|
|
70
|
-
"hud.telemetry.
|
|
75
|
+
"hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
|
|
71
76
|
)
|
|
72
77
|
|
|
73
78
|
trace_name = "test_trace_with_data"
|
|
74
79
|
attrs = {"key": "value", "number": 42}
|
|
75
80
|
|
|
76
|
-
with
|
|
81
|
+
with trace_open(name=trace_name, attributes=attrs) as task_run_id:
|
|
77
82
|
assert isinstance(task_run_id, str)
|
|
78
83
|
|
|
79
84
|
mock_flush.assert_called_once()
|
|
80
|
-
|
|
85
|
+
# submit_to_worker_loop is now called for status updates
|
|
86
|
+
assert mock_submit_loop.call_count == 2 # INITIALIZING and COMPLETED
|
|
81
87
|
|
|
82
88
|
@pytest.mark.asyncio
|
|
83
89
|
async def test_trace_with_mcp_calls_exports(self, mocker):
|
|
@@ -87,36 +93,32 @@ class TestTrace:
|
|
|
87
93
|
"hud.telemetry._trace.flush_buffer", return_value=mock_mcp_calls, autospec=True
|
|
88
94
|
)
|
|
89
95
|
mock_submit_loop = mocker.patch(
|
|
90
|
-
"hud.telemetry.
|
|
96
|
+
"hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
|
|
91
97
|
)
|
|
92
98
|
|
|
93
99
|
async def mock_export(*args, **kwargs):
|
|
94
100
|
return None
|
|
95
101
|
|
|
96
|
-
|
|
97
|
-
"hud.telemetry.
|
|
102
|
+
mocker.patch(
|
|
103
|
+
"hud.telemetry.exporter.export_telemetry",
|
|
98
104
|
side_effect=mock_export,
|
|
99
105
|
)
|
|
100
106
|
|
|
101
107
|
test_attrs = {"custom_attr": "test_val"}
|
|
102
108
|
test_name = "mcp_export_test"
|
|
103
109
|
|
|
104
|
-
with
|
|
110
|
+
with trace_open(name=test_name, attributes=test_attrs) as task_run_id:
|
|
105
111
|
pass
|
|
106
112
|
|
|
107
113
|
mock_flush.assert_called_once()
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
assert
|
|
114
|
-
|
|
115
|
-
assert
|
|
116
|
-
assert "start_time" in kwargs["trace_attributes"]
|
|
117
|
-
assert "end_time" in kwargs["trace_attributes"]
|
|
118
|
-
assert "duration" in kwargs["trace_attributes"]
|
|
119
|
-
assert kwargs["trace_attributes"]["is_root"] is True
|
|
114
|
+
# submit_to_worker_loop is now called for status updates and export
|
|
115
|
+
# The exact count may vary depending on whether export_incremental is called
|
|
116
|
+
assert mock_submit_loop.call_count >= 2 # At least INITIALIZING and COMPLETED
|
|
117
|
+
|
|
118
|
+
# With the new export flow, export_telemetry is submitted to worker loop
|
|
119
|
+
# so we can't directly assert on it being called synchronously
|
|
120
|
+
# Instead, verify that the trace completed successfully
|
|
121
|
+
assert task_run_id is not None
|
|
120
122
|
|
|
121
123
|
def test_trace_nested(self, mocker):
|
|
122
124
|
"""Test nested traces, verifying context restoration and root trace logic."""
|
|
@@ -127,13 +129,13 @@ class TestTrace:
|
|
|
127
129
|
"hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
|
|
128
130
|
)
|
|
129
131
|
mock_submit_loop_internal = mocker.patch(
|
|
130
|
-
"hud.telemetry.
|
|
132
|
+
"hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
|
|
131
133
|
)
|
|
132
134
|
|
|
133
135
|
assert actual_get_current_task_run_id() is None
|
|
134
136
|
assert actual_is_root_trace.get() is False
|
|
135
137
|
|
|
136
|
-
with
|
|
138
|
+
with trace_open(name="outer") as outer_id:
|
|
137
139
|
assert actual_get_current_task_run_id() == outer_id
|
|
138
140
|
assert actual_is_root_trace.get() is True
|
|
139
141
|
with trace(name="inner") as inner_id:
|
|
@@ -146,7 +148,8 @@ class TestTrace:
|
|
|
146
148
|
assert actual_get_current_task_run_id() is None
|
|
147
149
|
assert actual_is_root_trace.get() is False
|
|
148
150
|
assert mock_flush_internal.call_count == 2
|
|
149
|
-
|
|
151
|
+
# submit_to_worker_loop is now called for status updates
|
|
152
|
+
assert mock_submit_loop_internal.call_count == 2 # Only outer trace sends status updates
|
|
150
153
|
|
|
151
154
|
def test_trace_exception_handling(self, mocker):
|
|
152
155
|
"""Test trace handles exceptions properly and restores context."""
|
|
@@ -159,10 +162,13 @@ class TestTrace:
|
|
|
159
162
|
"hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
|
|
160
163
|
)
|
|
161
164
|
mock_submit_loop = mocker.patch(
|
|
162
|
-
"hud.telemetry.
|
|
165
|
+
"hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
|
|
163
166
|
)
|
|
164
167
|
|
|
165
|
-
with
|
|
168
|
+
with (
|
|
169
|
+
pytest.raises(ValueError, match="Test exception"),
|
|
170
|
+
trace_open(name="trace_with_exception"),
|
|
171
|
+
):
|
|
166
172
|
assert actual_get_current_task_run_id() != initial_task_id_before_trace
|
|
167
173
|
assert actual_is_root_trace.get() is False
|
|
168
174
|
raise ValueError("Test exception")
|
|
@@ -173,98 +179,134 @@ class TestTrace:
|
|
|
173
179
|
mock_submit_loop.assert_not_called()
|
|
174
180
|
|
|
175
181
|
|
|
176
|
-
class
|
|
177
|
-
"""Test the
|
|
182
|
+
class TestTraceSync:
|
|
183
|
+
"""Test the trace_sync context manager."""
|
|
184
|
+
|
|
185
|
+
def test_trace_sync_basic(self, mocker):
|
|
186
|
+
"""Test trace calls trace_open and flush."""
|
|
187
|
+
mock_flush = mocker.patch("hud.flush", autospec=True)
|
|
188
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open")
|
|
189
|
+
mock_trace_open.return_value.__enter__.return_value = "test-task-id"
|
|
190
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
191
|
+
|
|
192
|
+
with trace(name="test_sync") as task_run_id:
|
|
193
|
+
assert task_run_id == "test-task-id"
|
|
194
|
+
|
|
195
|
+
mock_trace_open.assert_called_once_with(name="test_sync", agent_model=None, attributes=None)
|
|
196
|
+
mock_flush.assert_called_once()
|
|
197
|
+
|
|
198
|
+
def test_trace_sync_with_attributes(self, mocker):
|
|
199
|
+
"""Test trace passes attributes correctly."""
|
|
200
|
+
mock_flush = mocker.patch("hud.flush", autospec=True)
|
|
201
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open")
|
|
202
|
+
mock_trace_open.return_value.__enter__.return_value = "test-task-id"
|
|
203
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
204
|
+
attrs = {"key": "value"}
|
|
205
|
+
|
|
206
|
+
with trace(name="test_sync", attributes=attrs):
|
|
207
|
+
pass
|
|
208
|
+
|
|
209
|
+
mock_trace_open.assert_called_once_with(
|
|
210
|
+
name="test_sync", agent_model=None, attributes=attrs
|
|
211
|
+
)
|
|
212
|
+
mock_flush.assert_called_once()
|
|
213
|
+
|
|
178
214
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
mock_trace_context_manager.return_value.__enter__.return_value = "mocked_task_id"
|
|
182
|
-
mock_trace_context_manager.return_value.__exit__.return_value = None
|
|
215
|
+
class TestTraceDecorator:
|
|
216
|
+
"""Test the trace_decorator function decorator."""
|
|
183
217
|
|
|
184
|
-
|
|
218
|
+
def test_trace_decorator_sync_function(self, mocker):
|
|
219
|
+
"""Test trace_decorator on synchronous functions."""
|
|
220
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
221
|
+
mock_trace_open.return_value.__enter__.return_value = "mocked_task_id"
|
|
222
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
223
|
+
|
|
224
|
+
@trace_decorator(name="test_func_sync")
|
|
185
225
|
def sync_function(x, y):
|
|
186
226
|
return x + y
|
|
187
227
|
|
|
188
228
|
result = sync_function(1, 2)
|
|
189
229
|
assert result == 3
|
|
190
|
-
|
|
230
|
+
mock_trace_open.assert_called_once_with(
|
|
231
|
+
name="test_func_sync", agent_model=None, attributes=None
|
|
232
|
+
)
|
|
191
233
|
|
|
192
|
-
def
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
234
|
+
def test_trace_decorator_async_function(self, mocker):
|
|
235
|
+
"""Test trace_decorator on asynchronous functions."""
|
|
236
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
237
|
+
mock_trace_open.return_value.__enter__.return_value = "mocked_task_id"
|
|
238
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
196
239
|
|
|
197
|
-
@
|
|
240
|
+
@trace_decorator(name="test_func_async")
|
|
198
241
|
async def async_function(x, y):
|
|
199
242
|
return x + y
|
|
200
243
|
|
|
201
244
|
async def run_test():
|
|
202
245
|
result = await async_function(1, 2)
|
|
203
246
|
assert result == 3
|
|
204
|
-
|
|
205
|
-
name="test_func_async", attributes=None
|
|
247
|
+
mock_trace_open.assert_called_once_with(
|
|
248
|
+
name="test_func_async", agent_model=None, attributes=None
|
|
206
249
|
)
|
|
207
250
|
|
|
208
251
|
asyncio.run(run_test())
|
|
209
252
|
|
|
210
|
-
def
|
|
211
|
-
"""Test
|
|
212
|
-
|
|
253
|
+
def test_trace_decorator_with_attributes(self, mocker):
|
|
254
|
+
"""Test trace_decorator with attributes."""
|
|
255
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
256
|
+
mock_trace_open.return_value.__enter__.return_value = "task_id"
|
|
257
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
213
258
|
|
|
214
|
-
|
|
215
|
-
def __enter__(self):
|
|
216
|
-
return "task_id"
|
|
259
|
+
attrs = {"operation": "multiply"}
|
|
217
260
|
|
|
218
|
-
|
|
219
|
-
return None
|
|
220
|
-
|
|
221
|
-
mock_trace_context_manager.return_value = _MockTraceContextManager()
|
|
222
|
-
|
|
223
|
-
attrs = {"operation": "add"}
|
|
224
|
-
|
|
225
|
-
@register_trace(name="test_func", attributes=attrs)
|
|
261
|
+
@trace_decorator(name="test_func", attributes=attrs)
|
|
226
262
|
def func_with_attrs(x):
|
|
227
263
|
return x * 2
|
|
228
264
|
|
|
229
265
|
result = func_with_attrs(5)
|
|
230
266
|
assert result == 10
|
|
231
|
-
|
|
267
|
+
mock_trace_open.assert_called_once_with(
|
|
268
|
+
name="test_func", agent_model=None, attributes=attrs
|
|
269
|
+
)
|
|
232
270
|
|
|
233
|
-
def
|
|
234
|
-
"""Test
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
271
|
+
def test_trace_decorator_without_name(self, mocker):
|
|
272
|
+
"""Test trace_decorator uses module.function name when name not provided."""
|
|
273
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
274
|
+
mock_trace_open.return_value.__enter__.return_value = "task_id"
|
|
275
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
238
276
|
|
|
239
|
-
@
|
|
277
|
+
@trace_decorator()
|
|
240
278
|
def my_function():
|
|
241
279
|
return "result"
|
|
242
280
|
|
|
243
281
|
result = my_function()
|
|
244
282
|
assert result == "result"
|
|
245
|
-
|
|
283
|
+
# Should use module.function name
|
|
284
|
+
expected_name = f"{my_function.__module__}.my_function"
|
|
285
|
+
mock_trace_open.assert_called_once_with(
|
|
286
|
+
name=expected_name, agent_model=None, attributes=None
|
|
287
|
+
)
|
|
246
288
|
|
|
247
|
-
def
|
|
248
|
-
"""Test
|
|
289
|
+
def test_trace_decorator_preserves_function_metadata(self):
|
|
290
|
+
"""Test trace_decorator preserves original function metadata."""
|
|
249
291
|
|
|
250
|
-
@
|
|
292
|
+
@trace_decorator(name="test")
|
|
251
293
|
def original_function():
|
|
252
294
|
"""Original docstring."""
|
|
253
295
|
|
|
254
296
|
assert original_function.__name__ == "original_function"
|
|
255
297
|
assert original_function.__doc__ == "Original docstring."
|
|
256
298
|
|
|
257
|
-
def
|
|
258
|
-
"""Test
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
299
|
+
def test_trace_decorator_exception_propagation(self, mocker):
|
|
300
|
+
"""Test trace_decorator propagates exceptions."""
|
|
301
|
+
mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
|
|
302
|
+
mock_trace_open.return_value.__enter__.return_value = "task_id"
|
|
303
|
+
mock_trace_open.return_value.__exit__.return_value = None
|
|
262
304
|
|
|
263
|
-
@
|
|
305
|
+
@trace_decorator()
|
|
264
306
|
def failing_function():
|
|
265
307
|
raise RuntimeError("Test error")
|
|
266
308
|
|
|
267
309
|
with pytest.raises(RuntimeError, match="Test error"):
|
|
268
310
|
failing_function()
|
|
269
311
|
|
|
270
|
-
|
|
312
|
+
mock_trace_open.assert_called_once()
|