hud-python 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (86) hide show
  1. hud/__init__.py +20 -8
  2. hud/adapters/common/adapter.py +14 -3
  3. hud/adapters/common/tests/test_adapter.py +16 -4
  4. hud/datasets.py +188 -0
  5. hud/env/docker_client.py +15 -3
  6. hud/env/environment.py +10 -7
  7. hud/env/local_docker_client.py +29 -7
  8. hud/env/remote_client.py +1 -1
  9. hud/env/remote_docker_client.py +2 -2
  10. hud/exceptions.py +2 -1
  11. hud/gym.py +0 -9
  12. hud/mcp/__init__.py +17 -0
  13. hud/mcp/base.py +631 -0
  14. hud/mcp/claude.py +321 -0
  15. hud/mcp/client.py +312 -0
  16. hud/mcp/langchain.py +250 -0
  17. hud/mcp/openai.py +334 -0
  18. hud/mcp/tests/__init__.py +1 -0
  19. hud/mcp/tests/test_base.py +512 -0
  20. hud/mcp/tests/test_claude.py +294 -0
  21. hud/mcp/tests/test_client.py +324 -0
  22. hud/mcp/tests/test_openai.py +238 -0
  23. hud/settings.py +20 -2
  24. hud/task.py +5 -88
  25. hud/taskset.py +2 -23
  26. hud/telemetry/__init__.py +16 -7
  27. hud/telemetry/_trace.py +246 -72
  28. hud/telemetry/context.py +88 -27
  29. hud/telemetry/exporter.py +171 -11
  30. hud/telemetry/instrumentation/mcp.py +174 -410
  31. hud/telemetry/job.py +141 -0
  32. hud/telemetry/mcp_models.py +13 -74
  33. hud/telemetry/tests/test_context.py +9 -6
  34. hud/telemetry/tests/test_trace.py +120 -78
  35. hud/tools/__init__.py +34 -0
  36. hud/tools/base.py +65 -0
  37. hud/tools/bash.py +137 -0
  38. hud/tools/computer/__init__.py +13 -0
  39. hud/tools/computer/anthropic.py +411 -0
  40. hud/tools/computer/hud.py +315 -0
  41. hud/tools/computer/openai.py +283 -0
  42. hud/tools/edit.py +290 -0
  43. hud/tools/executors/__init__.py +30 -0
  44. hud/tools/executors/base.py +331 -0
  45. hud/tools/executors/pyautogui.py +619 -0
  46. hud/tools/executors/tests/__init__.py +1 -0
  47. hud/tools/executors/tests/test_base_executor.py +338 -0
  48. hud/tools/executors/tests/test_pyautogui_executor.py +165 -0
  49. hud/tools/executors/xdo.py +503 -0
  50. hud/tools/helper/README.md +56 -0
  51. hud/tools/helper/__init__.py +9 -0
  52. hud/tools/helper/mcp_server.py +78 -0
  53. hud/tools/helper/server_initialization.py +115 -0
  54. hud/tools/helper/utils.py +58 -0
  55. hud/tools/playwright_tool.py +379 -0
  56. hud/tools/tests/__init__.py +3 -0
  57. hud/tools/tests/test_bash.py +152 -0
  58. hud/tools/tests/test_computer.py +52 -0
  59. hud/tools/tests/test_computer_actions.py +34 -0
  60. hud/tools/tests/test_edit.py +240 -0
  61. hud/tools/tests/test_init.py +27 -0
  62. hud/tools/tests/test_playwright_tool.py +183 -0
  63. hud/tools/tests/test_tools.py +157 -0
  64. hud/tools/tests/test_utils.py +156 -0
  65. hud/tools/utils.py +50 -0
  66. hud/trajectory.py +5 -1
  67. hud/types.py +10 -1
  68. hud/utils/tests/test_init.py +21 -0
  69. hud/utils/tests/test_version.py +1 -1
  70. hud/version.py +1 -1
  71. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/METADATA +27 -18
  72. hud_python-0.3.1.dist-info/RECORD +119 -0
  73. hud/evaluators/__init__.py +0 -9
  74. hud/evaluators/base.py +0 -32
  75. hud/evaluators/inspect.py +0 -24
  76. hud/evaluators/judge.py +0 -189
  77. hud/evaluators/match.py +0 -156
  78. hud/evaluators/remote.py +0 -65
  79. hud/evaluators/tests/__init__.py +0 -0
  80. hud/evaluators/tests/test_inspect.py +0 -12
  81. hud/evaluators/tests/test_judge.py +0 -231
  82. hud/evaluators/tests/test_match.py +0 -115
  83. hud/evaluators/tests/test_remote.py +0 -98
  84. hud_python-0.2.10.dist-info/RECORD +0 -85
  85. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
  86. {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/job.py ADDED
@@ -0,0 +1,141 @@
1
+ """Job context manager for grouping related traces."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import sys
7
+ import uuid
8
+ from contextlib import contextmanager
9
+ from contextvars import ContextVar
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ from hud.telemetry.exporter import JobStatus, submit_to_worker_loop, update_job_status
13
+
14
+ if TYPE_CHECKING:
15
+ from collections.abc import Generator
16
+ from typing import Self
17
+
18
+ logger = logging.getLogger("hud.telemetry")
19
+
20
+ # Context variables for current job
21
+ current_job_id: ContextVar[str | None] = ContextVar("current_job_id", default=None)
22
+ current_job_name: ContextVar[str | None] = ContextVar("current_job_name", default=None)
23
+
24
+
25
+ class JobContext:
26
+ """Context manager for grouping traces under a job."""
27
+
28
+ def __init__(
29
+ self, name: str, taskset_name: str | None = None, metadata: dict[str, Any] | None = None
30
+ ) -> None:
31
+ self.id = str(uuid.uuid4())
32
+ self.name = name
33
+ self.metadata = metadata or {}
34
+ self.taskset_name: str | None = taskset_name
35
+
36
+ def __enter__(self) -> Self:
37
+ # Auto-detect dataset
38
+ if self.taskset_name is None:
39
+ self._detect_dataset()
40
+
41
+ # Set context variables
42
+ current_job_id.set(self.id)
43
+ current_job_name.set(self.name)
44
+
45
+ # Send initial status
46
+ job_metadata = {**self.metadata}
47
+ coro = update_job_status(
48
+ self.id, JobStatus.RUNNING, metadata=job_metadata, taskset_name=self.taskset_name
49
+ )
50
+ submit_to_worker_loop(coro)
51
+
52
+ logger.info("Started job %s (ID: %s)", self.name, self.id)
53
+ return self
54
+
55
+ def __exit__(
56
+ self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: object
57
+ ) -> None:
58
+ # Determine final status
59
+ if exc_type is not None:
60
+ # Job failed with exception
61
+ error_msg = f"{exc_type.__name__}: {exc_val}"
62
+ coro = update_job_status(
63
+ self.id, JobStatus.ERROR, error_message=error_msg, taskset_name=self.taskset_name
64
+ )
65
+ else:
66
+ # Job completed successfully
67
+ coro = update_job_status(self.id, JobStatus.COMPLETED, taskset_name=self.taskset_name)
68
+
69
+ submit_to_worker_loop(coro)
70
+
71
+ # Clear context
72
+ current_job_id.set(None)
73
+ current_job_name.set(None)
74
+
75
+ status = "failed" if exc_type else "completed"
76
+ logger.info("Job %s %s", self.name, status)
77
+
78
+ def _detect_dataset(self) -> None:
79
+ """Auto-detect HuggingFace dataset in parent scope."""
80
+ try:
81
+ # Check frames 2 and 3 (with statement and parent scope)
82
+ for frame_depth in [2, 3]:
83
+ try:
84
+ frame = sys._getframe(frame_depth)
85
+
86
+ # Search for Dataset objects
87
+ for var_value in frame.f_locals.values():
88
+ if hasattr(var_value, "info") and hasattr(var_value.info, "builder_name"):
89
+ self.taskset_name = var_value.info.builder_name
90
+ logger.debug(
91
+ "Auto-detected dataset at frame %d: %s",
92
+ frame_depth,
93
+ self.taskset_name,
94
+ )
95
+ return
96
+ elif hasattr(var_value, "builder_name"):
97
+ # Older dataset format
98
+ self.taskset_name = var_value.builder_name
99
+ logger.debug(
100
+ "Auto-detected dataset at frame %d: %s",
101
+ frame_depth,
102
+ self.taskset_name,
103
+ )
104
+ return
105
+ except ValueError:
106
+ # Frame doesn't exist
107
+ continue
108
+ except Exception as e:
109
+ logger.debug("Dataset auto-detection failed: %s", e)
110
+
111
+
112
+ @contextmanager
113
+ def job(
114
+ name: str, taskset_name: str | None = None, metadata: dict[str, Any] | None = None
115
+ ) -> Generator[JobContext, None, None]:
116
+ """
117
+ Create a job context for grouping related traces.
118
+
119
+ Args:
120
+ name: Name for the job
121
+ metadata: Optional metadata to include with the job
122
+
123
+ Example:
124
+ with hud.job("evaluation_run") as job:
125
+ for task in tasks:
126
+ with hud.trace(f"task_{task.id}"):
127
+ # Trace automatically includes job_id
128
+ result = await agent.run(task)
129
+ """
130
+ with JobContext(name, taskset_name, metadata) as ctx:
131
+ yield ctx
132
+
133
+
134
+ def get_current_job_id() -> str | None:
135
+ """Get the current job ID if inside a job context."""
136
+ return current_job_id.get()
137
+
138
+
139
+ def get_current_job_name() -> str | None:
140
+ """Get the current job name if inside a job context."""
141
+ return current_job_name.get()
@@ -28,16 +28,14 @@ class StatusType(str, Enum):
28
28
 
29
29
 
30
30
  class MCPCallType(str, Enum):
31
- """Known MCP call types"""
31
+ """Enum for different types of MCP calls in telemetry."""
32
32
 
33
- SEND_REQUEST = "mcp.shared.session.send_request"
34
- SEND_NOTIFICATION = "mcp.shared.session.send_notification"
35
- RECEIVE_RESPONSE = "mcp.shared.session.receive_response"
36
- RECEIVE_REQUEST = "mcp.shared.session.receive_request"
37
- STREAM_READ = "mcp.stream.read"
38
- STREAM_WRITE = "mcp.stream.write"
39
- HANDLE_INCOMING = "mcp.handle_incoming"
40
- MANUAL_TEST = "manual.test"
33
+ # Requests and Notifications
34
+ SEND_REQUEST = "mcp.send_request"
35
+ SEND_NOTIFICATION = "mcp.send_notification"
36
+
37
+ # Responses
38
+ RECEIVE_RESPONSE = "mcp.receive_response"
41
39
 
42
40
 
43
41
  class BaseMCPCall(BaseModel):
@@ -87,6 +85,8 @@ class MCPRequestCall(BaseMCPCall):
87
85
  duration: float | None = None
88
86
  request_id: str | int | None = None
89
87
  request_data: dict[str, Any] | None = None
88
+ error: str | None = None
89
+ error_type: str | None = None
90
90
 
91
91
  @classmethod
92
92
  def from_jsonrpc_request(
@@ -191,6 +191,8 @@ class MCPNotificationCall(BaseMCPCall):
191
191
  end_time: float | None = None
192
192
  duration: float | None = None
193
193
  notification_data: dict[str, Any] | None = None
194
+ error: str | None = None
195
+ error_type: str | None = None
194
196
 
195
197
  @classmethod
196
198
  def from_jsonrpc_notification(
@@ -230,69 +232,6 @@ class MCPNotificationCall(BaseMCPCall):
230
232
  return None
231
233
 
232
234
 
233
- class MCPStreamEvent(BaseMCPCall):
234
- """Record for an MCP stream event (read or write)"""
235
-
236
- stream_event: bool = True
237
- event_type: str = Field(..., description="Type of stream event: read or write")
238
- item_type: str | None = None
239
- is_response_or_error: bool = False
240
- message_data: dict[str, Any] | None = None
241
-
242
- @classmethod
243
- def from_session_message(
244
- cls, message: SessionMessage, task_run_id: str, event_type: str, **kwargs: Any
245
- ) -> MCPStreamEvent:
246
- """Create telemetry record for a stream event"""
247
- method_name = "unknown_stream_operation"
248
- is_response = False
249
- item_type = "unknown"
250
- message_data = None
251
-
252
- if hasattr(message, "message") and hasattr(message.message, "root"):
253
- msg_root = message.message.root
254
- item_type = type(msg_root).__name__
255
- message_data = msg_root.model_dump(exclude_none=True)
256
-
257
- # Check type first before accessing attributes
258
- if isinstance(msg_root, JSONRPCRequest | JSONRPCNotification) and hasattr(
259
- msg_root, "method"
260
- ):
261
- method_name = msg_root.method
262
- elif isinstance(msg_root, JSONRPCResponse | JSONRPCError) and hasattr(msg_root, "id"):
263
- method_name = f"response_to_id_{msg_root.id}"
264
- is_response = True
265
-
266
- return cls(
267
- task_run_id=task_run_id,
268
- status=StatusType.COMPLETED,
269
- method=method_name,
270
- event_type=event_type,
271
- item_type=item_type,
272
- is_response_or_error=is_response,
273
- message_data=message_data,
274
- timestamp=datetime.now().timestamp(),
275
- **kwargs,
276
- )
277
-
278
-
279
- class MCPManualTestCall(BaseMCPCall):
280
- """Record for a manual test record"""
281
-
282
- call_type: str = MCPCallType.MANUAL_TEST
283
- custom_data: dict[str, Any] = Field(default_factory=dict)
284
-
285
- @classmethod
286
- def create(cls, task_run_id: str, **custom_data: Any) -> MCPManualTestCall:
287
- """Create a manual test record with custom data"""
288
- return cls(
289
- task_run_id=task_run_id,
290
- status=StatusType.COMPLETED,
291
- custom_data=custom_data,
292
- timestamp=datetime.now().timestamp(),
293
- )
294
-
295
-
296
235
  class MCPTelemetryRecord(BaseModel):
297
236
  """Container for a set of related MCP telemetry records"""
298
237
 
@@ -320,9 +259,9 @@ class MCPTelemetryRecord(BaseModel):
320
259
 
321
260
 
322
261
  class TrajectoryStep(BaseModel):
323
- """Model representing a single step in a trajectory, for export."""
262
+ """Model for telemetry export format."""
324
263
 
325
- type: str = Field(default="mcp-step") # Default for MCP calls
264
+ type: str = Field(default="mcp-step")
326
265
  observation_url: str | None = None
327
266
  observation_text: str | None = None
328
267
  actions: list[dict[str, Any]] = Field(default_factory=list)
@@ -64,21 +64,24 @@ class TestRootTraceContext:
64
64
  class TestMCPCallBuffer:
65
65
  """Test MCP call buffer management."""
66
66
 
67
- def setUp(self):
67
+ def reset_context(self):
68
68
  """Clear buffer before each test."""
69
69
  # Flush any existing calls and reset context
70
+ set_current_task_run_id(None)
71
+ # Clear any existing buffers by setting a temporary task ID and flushing
72
+ set_current_task_run_id("temp-cleanup")
70
73
  flush_buffer()
71
74
  set_current_task_run_id(None)
72
75
 
73
76
  def test_flush_buffer_empty(self):
74
77
  """Test flushing empty buffer."""
75
- self.setUp()
78
+ self.reset_context()
76
79
  result = flush_buffer()
77
80
  assert result == []
78
81
 
79
82
  def test_add_and_flush_mcp_call(self):
80
83
  """Test adding and flushing MCP calls."""
81
- self.setUp()
84
+ self.reset_context()
82
85
 
83
86
  # Set active task run ID
84
87
  set_current_task_run_id("test-task")
@@ -101,7 +104,7 @@ class TestMCPCallBuffer:
101
104
 
102
105
  def test_add_multiple_mcp_calls(self):
103
106
  """Test adding multiple MCP calls."""
104
- self.setUp()
107
+ self.reset_context()
105
108
 
106
109
  # Set active task run ID
107
110
  set_current_task_run_id("test-task")
@@ -122,7 +125,7 @@ class TestMCPCallBuffer:
122
125
 
123
126
  def test_buffer_isolation_per_task(self):
124
127
  """Test that MCP call buffers contain all calls regardless of task ID."""
125
- self.setUp()
128
+ self.reset_context()
126
129
 
127
130
  # Set task run ID 1
128
131
  set_current_task_run_id("task-1")
@@ -150,7 +153,7 @@ class TestMCPCallBuffer:
150
153
 
151
154
  def test_buffer_mcp_call_without_task_id(self):
152
155
  """Test adding MCP call when no task run ID is set."""
153
- self.setUp()
156
+ self.reset_context()
154
157
  set_current_task_run_id(None)
155
158
 
156
159
  mock_call = MagicMock(spec=BaseMCPCall)
@@ -8,21 +8,25 @@ import pytest
8
8
 
9
9
  from hud.telemetry._trace import (
10
10
  init_telemetry,
11
- register_trace,
12
11
  trace,
12
+ trace_decorator,
13
+ trace_open,
13
14
  )
14
15
  from hud.telemetry.context import get_current_task_run_id as actual_get_current_task_run_id
15
16
  from hud.telemetry.context import is_root_trace as actual_is_root_trace
16
- from hud.telemetry.context import reset_context
17
17
  from hud.telemetry.context import set_current_task_run_id as actual_set_current_task_run_id
18
18
 
19
19
 
20
20
  @pytest.fixture(autouse=True)
21
21
  def reset_telemetry_context_fixture():
22
22
  """Ensures telemetry context is reset before and after each test in this file."""
23
- reset_context()
23
+ # Reset context before test
24
+ actual_set_current_task_run_id(None)
25
+ actual_is_root_trace.set(False)
24
26
  yield
25
- reset_context()
27
+ # Reset context after test
28
+ actual_set_current_task_run_id(None)
29
+ actual_is_root_trace.set(False)
26
30
 
27
31
 
28
32
  class TestInitTelemetry:
@@ -44,12 +48,12 @@ class TestTrace:
44
48
  "hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
45
49
  )
46
50
  mock_submit_loop = mocker.patch(
47
- "hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
51
+ "hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
48
52
  )
49
53
 
50
54
  initial_root_state = actual_is_root_trace.get()
51
55
 
52
- with trace() as task_run_id:
56
+ with trace_open() as task_run_id:
53
57
  assert isinstance(task_run_id, str)
54
58
  uuid.UUID(task_run_id)
55
59
  assert actual_get_current_task_run_id() == task_run_id
@@ -58,7 +62,8 @@ class TestTrace:
58
62
  assert actual_get_current_task_run_id() is None
59
63
  assert actual_is_root_trace.get() == initial_root_state
60
64
  mock_flush.assert_called_once()
61
- mock_submit_loop.assert_not_called()
65
+ # submit_to_worker_loop is now called for status updates
66
+ assert mock_submit_loop.call_count == 2 # INITIALIZING and COMPLETED
62
67
 
63
68
  def test_trace_with_name_and_attributes(self, mocker):
64
69
  """Test trace with name and attributes, checking they are passed on."""
@@ -67,17 +72,18 @@ class TestTrace:
67
72
  "hud.telemetry._trace.flush_buffer", return_value=mock_mcp_calls, autospec=True
68
73
  )
69
74
  mock_submit_loop = mocker.patch(
70
- "hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
75
+ "hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
71
76
  )
72
77
 
73
78
  trace_name = "test_trace_with_data"
74
79
  attrs = {"key": "value", "number": 42}
75
80
 
76
- with trace(name=trace_name, attributes=attrs) as task_run_id:
81
+ with trace_open(name=trace_name, attributes=attrs) as task_run_id:
77
82
  assert isinstance(task_run_id, str)
78
83
 
79
84
  mock_flush.assert_called_once()
80
- mock_submit_loop.assert_called_once()
85
+ # submit_to_worker_loop is now called for status updates
86
+ assert mock_submit_loop.call_count == 2 # INITIALIZING and COMPLETED
81
87
 
82
88
  @pytest.mark.asyncio
83
89
  async def test_trace_with_mcp_calls_exports(self, mocker):
@@ -87,36 +93,32 @@ class TestTrace:
87
93
  "hud.telemetry._trace.flush_buffer", return_value=mock_mcp_calls, autospec=True
88
94
  )
89
95
  mock_submit_loop = mocker.patch(
90
- "hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
96
+ "hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
91
97
  )
92
98
 
93
99
  async def mock_export(*args, **kwargs):
94
100
  return None
95
101
 
96
- mock_export_actual_coro = mocker.patch(
97
- "hud.telemetry._trace.exporter.export_telemetry",
102
+ mocker.patch(
103
+ "hud.telemetry.exporter.export_telemetry",
98
104
  side_effect=mock_export,
99
105
  )
100
106
 
101
107
  test_attrs = {"custom_attr": "test_val"}
102
108
  test_name = "mcp_export_test"
103
109
 
104
- with trace(name=test_name, attributes=test_attrs) as task_run_id:
110
+ with trace_open(name=test_name, attributes=test_attrs) as task_run_id:
105
111
  pass
106
112
 
107
113
  mock_flush.assert_called_once()
108
- mock_submit_loop.assert_called_once()
109
-
110
- mock_export_actual_coro.assert_called_once()
111
- args, kwargs = mock_export_actual_coro.call_args
112
- assert kwargs["task_run_id"] == task_run_id
113
- assert kwargs["mcp_calls"] == mock_mcp_calls
114
- assert kwargs["trace_attributes"]["trace_name"] == test_name
115
- assert kwargs["trace_attributes"]["custom_attr"] == "test_val"
116
- assert "start_time" in kwargs["trace_attributes"]
117
- assert "end_time" in kwargs["trace_attributes"]
118
- assert "duration" in kwargs["trace_attributes"]
119
- assert kwargs["trace_attributes"]["is_root"] is True
114
+ # submit_to_worker_loop is now called for status updates and export
115
+ # The exact count may vary depending on whether export_incremental is called
116
+ assert mock_submit_loop.call_count >= 2 # At least INITIALIZING and COMPLETED
117
+
118
+ # With the new export flow, export_telemetry is submitted to worker loop
119
+ # so we can't directly assert on it being called synchronously
120
+ # Instead, verify that the trace completed successfully
121
+ assert task_run_id is not None
120
122
 
121
123
  def test_trace_nested(self, mocker):
122
124
  """Test nested traces, verifying context restoration and root trace logic."""
@@ -127,13 +129,13 @@ class TestTrace:
127
129
  "hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
128
130
  )
129
131
  mock_submit_loop_internal = mocker.patch(
130
- "hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
132
+ "hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
131
133
  )
132
134
 
133
135
  assert actual_get_current_task_run_id() is None
134
136
  assert actual_is_root_trace.get() is False
135
137
 
136
- with trace(name="outer") as outer_id:
138
+ with trace_open(name="outer") as outer_id:
137
139
  assert actual_get_current_task_run_id() == outer_id
138
140
  assert actual_is_root_trace.get() is True
139
141
  with trace(name="inner") as inner_id:
@@ -146,7 +148,8 @@ class TestTrace:
146
148
  assert actual_get_current_task_run_id() is None
147
149
  assert actual_is_root_trace.get() is False
148
150
  assert mock_flush_internal.call_count == 2
149
- mock_submit_loop_internal.assert_not_called()
151
+ # submit_to_worker_loop is now called for status updates
152
+ assert mock_submit_loop_internal.call_count == 2 # Only outer trace sends status updates
150
153
 
151
154
  def test_trace_exception_handling(self, mocker):
152
155
  """Test trace handles exceptions properly and restores context."""
@@ -159,10 +162,13 @@ class TestTrace:
159
162
  "hud.telemetry._trace.flush_buffer", return_value=[], autospec=True
160
163
  )
161
164
  mock_submit_loop = mocker.patch(
162
- "hud.telemetry._trace.submit_to_worker_loop", return_value=MagicMock(), autospec=True
165
+ "hud.telemetry.exporter.submit_to_worker_loop", return_value=MagicMock(), autospec=True
163
166
  )
164
167
 
165
- with pytest.raises(ValueError, match="Test exception"), trace(name="trace_with_exception"):
168
+ with (
169
+ pytest.raises(ValueError, match="Test exception"),
170
+ trace_open(name="trace_with_exception"),
171
+ ):
166
172
  assert actual_get_current_task_run_id() != initial_task_id_before_trace
167
173
  assert actual_is_root_trace.get() is False
168
174
  raise ValueError("Test exception")
@@ -173,98 +179,134 @@ class TestTrace:
173
179
  mock_submit_loop.assert_not_called()
174
180
 
175
181
 
176
- class TestRegisterTrace:
177
- """Test the register_trace decorator."""
182
+ class TestTraceSync:
183
+ """Test the trace_sync context manager."""
184
+
185
+ def test_trace_sync_basic(self, mocker):
186
+ """Test trace calls trace_open and flush."""
187
+ mock_flush = mocker.patch("hud.flush", autospec=True)
188
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open")
189
+ mock_trace_open.return_value.__enter__.return_value = "test-task-id"
190
+ mock_trace_open.return_value.__exit__.return_value = None
191
+
192
+ with trace(name="test_sync") as task_run_id:
193
+ assert task_run_id == "test-task-id"
194
+
195
+ mock_trace_open.assert_called_once_with(name="test_sync", agent_model=None, attributes=None)
196
+ mock_flush.assert_called_once()
197
+
198
+ def test_trace_sync_with_attributes(self, mocker):
199
+ """Test trace passes attributes correctly."""
200
+ mock_flush = mocker.patch("hud.flush", autospec=True)
201
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open")
202
+ mock_trace_open.return_value.__enter__.return_value = "test-task-id"
203
+ mock_trace_open.return_value.__exit__.return_value = None
204
+ attrs = {"key": "value"}
205
+
206
+ with trace(name="test_sync", attributes=attrs):
207
+ pass
208
+
209
+ mock_trace_open.assert_called_once_with(
210
+ name="test_sync", agent_model=None, attributes=attrs
211
+ )
212
+ mock_flush.assert_called_once()
213
+
178
214
 
179
- def test_register_trace_sync_function(self, mocker):
180
- mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
181
- mock_trace_context_manager.return_value.__enter__.return_value = "mocked_task_id"
182
- mock_trace_context_manager.return_value.__exit__.return_value = None
215
+ class TestTraceDecorator:
216
+ """Test the trace_decorator function decorator."""
183
217
 
184
- @register_trace(name="test_func_sync")
218
+ def test_trace_decorator_sync_function(self, mocker):
219
+ """Test trace_decorator on synchronous functions."""
220
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
221
+ mock_trace_open.return_value.__enter__.return_value = "mocked_task_id"
222
+ mock_trace_open.return_value.__exit__.return_value = None
223
+
224
+ @trace_decorator(name="test_func_sync")
185
225
  def sync_function(x, y):
186
226
  return x + y
187
227
 
188
228
  result = sync_function(1, 2)
189
229
  assert result == 3
190
- mock_trace_context_manager.assert_called_once_with(name="test_func_sync", attributes=None)
230
+ mock_trace_open.assert_called_once_with(
231
+ name="test_func_sync", agent_model=None, attributes=None
232
+ )
191
233
 
192
- def test_register_trace_async_function(self, mocker):
193
- mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
194
- mock_trace_context_manager.return_value.__enter__.return_value = "mocked_task_id"
195
- mock_trace_context_manager.return_value.__exit__.return_value = None
234
+ def test_trace_decorator_async_function(self, mocker):
235
+ """Test trace_decorator on asynchronous functions."""
236
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
237
+ mock_trace_open.return_value.__enter__.return_value = "mocked_task_id"
238
+ mock_trace_open.return_value.__exit__.return_value = None
196
239
 
197
- @register_trace(name="test_func_async")
240
+ @trace_decorator(name="test_func_async")
198
241
  async def async_function(x, y):
199
242
  return x + y
200
243
 
201
244
  async def run_test():
202
245
  result = await async_function(1, 2)
203
246
  assert result == 3
204
- mock_trace_context_manager.assert_called_once_with(
205
- name="test_func_async", attributes=None
247
+ mock_trace_open.assert_called_once_with(
248
+ name="test_func_async", agent_model=None, attributes=None
206
249
  )
207
250
 
208
251
  asyncio.run(run_test())
209
252
 
210
- def test_register_trace_with_attributes(self, mocker):
211
- """Test register_trace with attributes."""
212
- mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
253
+ def test_trace_decorator_with_attributes(self, mocker):
254
+ """Test trace_decorator with attributes."""
255
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
256
+ mock_trace_open.return_value.__enter__.return_value = "task_id"
257
+ mock_trace_open.return_value.__exit__.return_value = None
213
258
 
214
- class _MockTraceContextManager:
215
- def __enter__(self):
216
- return "task_id"
259
+ attrs = {"operation": "multiply"}
217
260
 
218
- def __exit__(self, exc_type, exc_value, traceback):
219
- return None
220
-
221
- mock_trace_context_manager.return_value = _MockTraceContextManager()
222
-
223
- attrs = {"operation": "add"}
224
-
225
- @register_trace(name="test_func", attributes=attrs)
261
+ @trace_decorator(name="test_func", attributes=attrs)
226
262
  def func_with_attrs(x):
227
263
  return x * 2
228
264
 
229
265
  result = func_with_attrs(5)
230
266
  assert result == 10
231
- mock_trace_context_manager.assert_called_once_with(name="test_func", attributes=attrs)
267
+ mock_trace_open.assert_called_once_with(
268
+ name="test_func", agent_model=None, attributes=attrs
269
+ )
232
270
 
233
- def test_register_trace_without_name(self, mocker):
234
- """Test register_trace uses function name when name not provided."""
235
- mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
236
- mock_trace_context_manager.return_value.__enter__.return_value = "task_id"
237
- mock_trace_context_manager.return_value.__exit__.return_value = None
271
+ def test_trace_decorator_without_name(self, mocker):
272
+ """Test trace_decorator uses module.function name when name not provided."""
273
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
274
+ mock_trace_open.return_value.__enter__.return_value = "task_id"
275
+ mock_trace_open.return_value.__exit__.return_value = None
238
276
 
239
- @register_trace()
277
+ @trace_decorator()
240
278
  def my_function():
241
279
  return "result"
242
280
 
243
281
  result = my_function()
244
282
  assert result == "result"
245
- mock_trace_context_manager.assert_called_once_with(name="my_function", attributes=None)
283
+ # Should use module.function name
284
+ expected_name = f"{my_function.__module__}.my_function"
285
+ mock_trace_open.assert_called_once_with(
286
+ name=expected_name, agent_model=None, attributes=None
287
+ )
246
288
 
247
- def test_register_trace_preserves_function_metadata(self):
248
- """Test register_trace preserves original function metadata."""
289
+ def test_trace_decorator_preserves_function_metadata(self):
290
+ """Test trace_decorator preserves original function metadata."""
249
291
 
250
- @register_trace(name="test")
292
+ @trace_decorator(name="test")
251
293
  def original_function():
252
294
  """Original docstring."""
253
295
 
254
296
  assert original_function.__name__ == "original_function"
255
297
  assert original_function.__doc__ == "Original docstring."
256
298
 
257
- def test_register_trace_exception_propagation(self, mocker):
258
- """Test register_trace propagates exceptions."""
259
- mock_trace_context_manager = mocker.patch("hud.telemetry._trace.trace", autospec=True)
260
- mock_trace_context_manager.return_value.__enter__.return_value = "task_id"
261
- mock_trace_context_manager.return_value.__exit__.return_value = None
299
+ def test_trace_decorator_exception_propagation(self, mocker):
300
+ """Test trace_decorator propagates exceptions."""
301
+ mock_trace_open = mocker.patch("hud.telemetry._trace.trace_open", autospec=True)
302
+ mock_trace_open.return_value.__enter__.return_value = "task_id"
303
+ mock_trace_open.return_value.__exit__.return_value = None
262
304
 
263
- @register_trace()
305
+ @trace_decorator()
264
306
  def failing_function():
265
307
  raise RuntimeError("Test error")
266
308
 
267
309
  with pytest.raises(RuntimeError, match="Test error"):
268
310
  failing_function()
269
311
 
270
- mock_trace_context_manager.assert_called_once()
312
+ mock_trace_open.assert_called_once()