hud-python 0.2.10__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +20 -8
- hud/adapters/common/adapter.py +14 -3
- hud/adapters/common/tests/test_adapter.py +16 -4
- hud/datasets.py +188 -0
- hud/env/docker_client.py +15 -3
- hud/env/environment.py +10 -7
- hud/env/local_docker_client.py +29 -7
- hud/env/remote_client.py +1 -1
- hud/env/remote_docker_client.py +2 -2
- hud/exceptions.py +2 -1
- hud/gym.py +0 -9
- hud/mcp/__init__.py +17 -0
- hud/mcp/base.py +631 -0
- hud/mcp/claude.py +321 -0
- hud/mcp/client.py +312 -0
- hud/mcp/langchain.py +250 -0
- hud/mcp/openai.py +334 -0
- hud/mcp/tests/__init__.py +1 -0
- hud/mcp/tests/test_base.py +512 -0
- hud/mcp/tests/test_claude.py +294 -0
- hud/mcp/tests/test_client.py +324 -0
- hud/mcp/tests/test_openai.py +238 -0
- hud/settings.py +20 -2
- hud/task.py +5 -88
- hud/taskset.py +2 -23
- hud/telemetry/__init__.py +16 -7
- hud/telemetry/_trace.py +246 -72
- hud/telemetry/context.py +88 -27
- hud/telemetry/exporter.py +171 -11
- hud/telemetry/instrumentation/mcp.py +174 -410
- hud/telemetry/job.py +141 -0
- hud/telemetry/mcp_models.py +13 -74
- hud/telemetry/tests/test_context.py +9 -6
- hud/telemetry/tests/test_trace.py +120 -78
- hud/tools/__init__.py +34 -0
- hud/tools/base.py +65 -0
- hud/tools/bash.py +137 -0
- hud/tools/computer/__init__.py +13 -0
- hud/tools/computer/anthropic.py +411 -0
- hud/tools/computer/hud.py +315 -0
- hud/tools/computer/openai.py +283 -0
- hud/tools/edit.py +290 -0
- hud/tools/executors/__init__.py +30 -0
- hud/tools/executors/base.py +331 -0
- hud/tools/executors/pyautogui.py +619 -0
- hud/tools/executors/tests/__init__.py +1 -0
- hud/tools/executors/tests/test_base_executor.py +338 -0
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -0
- hud/tools/executors/xdo.py +503 -0
- hud/tools/helper/README.md +56 -0
- hud/tools/helper/__init__.py +9 -0
- hud/tools/helper/mcp_server.py +78 -0
- hud/tools/helper/server_initialization.py +115 -0
- hud/tools/helper/utils.py +58 -0
- hud/tools/playwright_tool.py +379 -0
- hud/tools/tests/__init__.py +3 -0
- hud/tools/tests/test_bash.py +152 -0
- hud/tools/tests/test_computer.py +52 -0
- hud/tools/tests/test_computer_actions.py +34 -0
- hud/tools/tests/test_edit.py +240 -0
- hud/tools/tests/test_init.py +27 -0
- hud/tools/tests/test_playwright_tool.py +183 -0
- hud/tools/tests/test_tools.py +157 -0
- hud/tools/tests/test_utils.py +156 -0
- hud/tools/utils.py +50 -0
- hud/trajectory.py +5 -1
- hud/types.py +10 -1
- hud/utils/tests/test_init.py +21 -0
- hud/utils/tests/test_version.py +1 -1
- hud/version.py +1 -1
- {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/METADATA +27 -18
- hud_python-0.3.1.dist-info/RECORD +119 -0
- hud/evaluators/__init__.py +0 -9
- hud/evaluators/base.py +0 -32
- hud/evaluators/inspect.py +0 -24
- hud/evaluators/judge.py +0 -189
- hud/evaluators/match.py +0 -156
- hud/evaluators/remote.py +0 -65
- hud/evaluators/tests/__init__.py +0 -0
- hud/evaluators/tests/test_inspect.py +0 -12
- hud/evaluators/tests/test_judge.py +0 -231
- hud/evaluators/tests/test_match.py +0 -115
- hud/evaluators/tests/test_remote.py +0 -98
- hud_python-0.2.10.dist-info/RECORD +0 -85
- {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/WHEEL +0 -0
- {hud_python-0.2.10.dist-info → hud_python-0.3.1.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/_trace.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
# ruff: noqa: T201
|
|
3
4
|
import asyncio
|
|
4
5
|
import logging
|
|
5
6
|
import time
|
|
@@ -11,61 +12,173 @@ from typing import (
|
|
|
11
12
|
Any,
|
|
12
13
|
ParamSpec,
|
|
13
14
|
TypeVar,
|
|
14
|
-
overload,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
-
from hud.telemetry import exporter
|
|
18
17
|
from hud.telemetry.context import (
|
|
19
18
|
flush_buffer,
|
|
20
19
|
get_current_task_run_id,
|
|
21
20
|
is_root_trace,
|
|
22
21
|
set_current_task_run_id,
|
|
23
22
|
)
|
|
24
|
-
from hud.telemetry.exporter import submit_to_worker_loop
|
|
25
23
|
from hud.telemetry.instrumentation.registry import registry
|
|
26
24
|
|
|
27
25
|
if TYPE_CHECKING:
|
|
28
|
-
from collections.abc import
|
|
29
|
-
Callable,
|
|
30
|
-
Coroutine,
|
|
31
|
-
Generator,
|
|
32
|
-
)
|
|
26
|
+
from collections.abc import Generator
|
|
33
27
|
|
|
34
|
-
from hud.telemetry.mcp_models import BaseMCPCall
|
|
35
28
|
|
|
36
29
|
logger = logging.getLogger("hud.telemetry")
|
|
37
30
|
T = TypeVar("T")
|
|
31
|
+
P = ParamSpec("P")
|
|
32
|
+
|
|
33
|
+
# Track whether telemetry has been initialized
|
|
34
|
+
_telemetry_initialized = False
|
|
38
35
|
|
|
39
36
|
|
|
40
37
|
def init_telemetry() -> None:
|
|
41
38
|
"""Initialize telemetry instrumentors and ensure worker is started if telemetry is active."""
|
|
39
|
+
global _telemetry_initialized
|
|
40
|
+
if _telemetry_initialized:
|
|
41
|
+
return
|
|
42
|
+
|
|
42
43
|
registry.install_all()
|
|
43
44
|
logger.info("Telemetry initialized.")
|
|
45
|
+
_telemetry_initialized = True
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _ensure_telemetry_initialized() -> None:
|
|
49
|
+
"""Ensure telemetry is initialized - called lazily by trace functions."""
|
|
50
|
+
from hud.settings import settings
|
|
51
|
+
|
|
52
|
+
if settings.telemetry_enabled and not _telemetry_initialized:
|
|
53
|
+
init_telemetry()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _detect_agent_model() -> str | None:
|
|
57
|
+
"""
|
|
58
|
+
Try to auto-detect agent model from parent frames.
|
|
59
|
+
This is a best-effort approach and may not work in all cases.
|
|
60
|
+
"""
|
|
61
|
+
import sys
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
# Try different frame depths (2-3 typically covers most cases)
|
|
65
|
+
for depth in range(2, 3):
|
|
66
|
+
try:
|
|
67
|
+
frame = sys._getframe(depth)
|
|
68
|
+
# Check local variables for agent objects
|
|
69
|
+
for var_value in frame.f_locals.values():
|
|
70
|
+
# Look for objects with model_name attribute
|
|
71
|
+
if hasattr(var_value, "model_name") and hasattr(var_value, "run"):
|
|
72
|
+
# Likely an agent object
|
|
73
|
+
model_name = getattr(var_value, "model_name", None)
|
|
74
|
+
if model_name:
|
|
75
|
+
logger.debug(
|
|
76
|
+
"Found agent with model_name in frame %d: %s", depth, model_name
|
|
77
|
+
)
|
|
78
|
+
return str(model_name)
|
|
79
|
+
|
|
80
|
+
# Also check self in case we're in a method
|
|
81
|
+
if "self" in frame.f_locals:
|
|
82
|
+
self_obj = frame.f_locals["self"]
|
|
83
|
+
if hasattr(self_obj, "model_name"):
|
|
84
|
+
model_name = getattr(self_obj, "model_name", None)
|
|
85
|
+
if model_name:
|
|
86
|
+
logger.debug(
|
|
87
|
+
"Found agent model_name in self at frame %d: %s", depth, model_name
|
|
88
|
+
)
|
|
89
|
+
return str(model_name)
|
|
90
|
+
|
|
91
|
+
except (ValueError, AttributeError):
|
|
92
|
+
# Frame doesn't exist at this depth or other issues
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
except Exception as e:
|
|
96
|
+
logger.debug("Agent model detection failed: %s", e)
|
|
97
|
+
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _print_trace_url(task_run_id: str) -> None:
|
|
102
|
+
"""Print the trace URL in a colorful box."""
|
|
103
|
+
url = f"https://app.hud.so/trace/{task_run_id}"
|
|
104
|
+
header = "🚀 See your agent live at:"
|
|
105
|
+
|
|
106
|
+
# ANSI color codes
|
|
107
|
+
DIM = "\033[90m" # Dim/Gray for border (visible on both light and dark terminals)
|
|
108
|
+
GOLD = "\033[33m" # Gold/Yellow for URL
|
|
109
|
+
RESET = "\033[0m"
|
|
110
|
+
BOLD = "\033[1m"
|
|
111
|
+
|
|
112
|
+
# Calculate box width based on the longest line
|
|
113
|
+
box_width = max(len(url), len(header)) + 6
|
|
114
|
+
|
|
115
|
+
# Box drawing characters
|
|
116
|
+
top_border = "╔" + "═" * (box_width - 2) + "╗"
|
|
117
|
+
bottom_border = "╚" + "═" * (box_width - 2) + "╝"
|
|
118
|
+
divider = "╟" + "─" * (box_width - 2) + "╢"
|
|
119
|
+
|
|
120
|
+
# Center the content
|
|
121
|
+
header_padding = (box_width - len(header) - 2) // 2
|
|
122
|
+
url_padding = (box_width - len(url) - 2) // 2
|
|
123
|
+
|
|
124
|
+
# Print the box
|
|
125
|
+
print(f"\n{DIM}{top_border}{RESET}")
|
|
126
|
+
print(
|
|
127
|
+
f"{DIM}║{RESET}{' ' * header_padding}{header}{' ' * (box_width - len(header) - header_padding - 3)}{DIM}║{RESET}" # noqa: E501
|
|
128
|
+
)
|
|
129
|
+
print(f"{DIM}{divider}{RESET}")
|
|
130
|
+
print(
|
|
131
|
+
f"{DIM}║{RESET}{' ' * url_padding}{BOLD}{GOLD}{url}{RESET}{' ' * (box_width - len(url) - url_padding - 2)}{DIM}║{RESET}" # noqa: E501
|
|
132
|
+
)
|
|
133
|
+
print(f"{DIM}{bottom_border}{RESET}\n")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _print_trace_complete_url(task_run_id: str) -> None:
|
|
137
|
+
"""Print the trace completion URL in a simple colorful format."""
|
|
138
|
+
url = f"https://app.hud.so/trace/{task_run_id}"
|
|
139
|
+
|
|
140
|
+
# ANSI color codes
|
|
141
|
+
GREEN = "\033[92m"
|
|
142
|
+
GOLD = "\033[33m"
|
|
143
|
+
RESET = "\033[0m"
|
|
144
|
+
DIM = "\033[2m"
|
|
145
|
+
BOLD = "\033[1m"
|
|
146
|
+
|
|
147
|
+
print(f"\n{GREEN}✓ Trace complete!{RESET} {DIM}View at:{RESET} {BOLD}{GOLD}{url}{RESET}\n")
|
|
44
148
|
|
|
45
149
|
|
|
46
150
|
@contextmanager
|
|
47
|
-
def
|
|
151
|
+
def trace_open(
|
|
48
152
|
name: str | None = None,
|
|
153
|
+
agent_model: str | None = None,
|
|
154
|
+
run_id: str | None = None,
|
|
49
155
|
attributes: dict[str, Any] | None = None,
|
|
50
156
|
) -> Generator[str, None, None]:
|
|
51
157
|
"""
|
|
52
158
|
Context manager for tracing a block of code.
|
|
53
|
-
The task_run_id is always generated internally as a UUID.
|
|
54
|
-
Telemetry export is handled by a background worker thread.
|
|
55
159
|
|
|
56
160
|
Args:
|
|
57
|
-
attributes: Optional dictionary of attributes to associate with this trace
|
|
58
161
|
name: Optional name for this trace, will be added to attributes.
|
|
162
|
+
attributes: Optional dictionary of attributes to associate with this trace
|
|
59
163
|
|
|
60
164
|
Returns:
|
|
61
165
|
The generated task run ID (UUID string) used for this trace
|
|
62
166
|
"""
|
|
63
|
-
|
|
167
|
+
# Lazy initialization - only initialize telemetry when trace() is actually called
|
|
168
|
+
_ensure_telemetry_initialized()
|
|
169
|
+
|
|
170
|
+
task_run_id = run_id or str(uuid.uuid4())
|
|
171
|
+
|
|
172
|
+
_print_trace_url(task_run_id)
|
|
64
173
|
|
|
65
174
|
local_attributes = attributes.copy() if attributes is not None else {}
|
|
66
175
|
if name is not None:
|
|
67
176
|
local_attributes["trace_name"] = name
|
|
68
177
|
|
|
178
|
+
# Auto-detect agent if not explicitly provided
|
|
179
|
+
if agent_model is None:
|
|
180
|
+
agent_model = _detect_agent_model()
|
|
181
|
+
|
|
69
182
|
start_time = time.time()
|
|
70
183
|
logger.debug("Starting trace %s (Name: %s)", task_run_id, name if name else "Unnamed")
|
|
71
184
|
|
|
@@ -76,96 +189,157 @@ def trace(
|
|
|
76
189
|
is_root = previous_task_id is None
|
|
77
190
|
is_root_trace.set(is_root)
|
|
78
191
|
|
|
192
|
+
# Update status to initializing for root traces
|
|
193
|
+
if is_root:
|
|
194
|
+
from hud.telemetry.exporter import (
|
|
195
|
+
TaskRunStatus,
|
|
196
|
+
submit_to_worker_loop,
|
|
197
|
+
update_task_run_status,
|
|
198
|
+
)
|
|
199
|
+
from hud.telemetry.job import get_current_job_id
|
|
200
|
+
|
|
201
|
+
# Include metadata in the initial status update
|
|
202
|
+
initial_metadata = local_attributes.copy()
|
|
203
|
+
initial_metadata["is_root_trace"] = is_root
|
|
204
|
+
if agent_model:
|
|
205
|
+
initial_metadata["agent_model"] = agent_model
|
|
206
|
+
|
|
207
|
+
# Get job_id if we're in a job context
|
|
208
|
+
job_id = get_current_job_id()
|
|
209
|
+
|
|
210
|
+
coro = update_task_run_status(
|
|
211
|
+
task_run_id, TaskRunStatus.INITIALIZING, metadata=initial_metadata, job_id=job_id
|
|
212
|
+
)
|
|
213
|
+
submit_to_worker_loop(coro)
|
|
214
|
+
logger.debug("Updated task run %s status to INITIALIZING with metadata", task_run_id)
|
|
215
|
+
|
|
216
|
+
error_occurred = False
|
|
217
|
+
error_message = None
|
|
218
|
+
|
|
79
219
|
try:
|
|
80
220
|
yield task_run_id
|
|
221
|
+
except Exception as e:
|
|
222
|
+
error_occurred = True
|
|
223
|
+
error_message = str(e)
|
|
224
|
+
raise
|
|
81
225
|
finally:
|
|
82
226
|
end_time = time.time()
|
|
83
227
|
duration = end_time - start_time
|
|
228
|
+
local_attributes["duration_seconds"] = duration
|
|
229
|
+
local_attributes["is_root_trace"] = is_root
|
|
84
230
|
|
|
85
|
-
|
|
231
|
+
logger.debug("Finishing trace %s after %.2f seconds", task_run_id, duration)
|
|
86
232
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
233
|
+
# Update status for root traces
|
|
234
|
+
if is_root:
|
|
235
|
+
from hud.telemetry.exporter import (
|
|
236
|
+
TaskRunStatus,
|
|
237
|
+
submit_to_worker_loop,
|
|
238
|
+
update_task_run_status,
|
|
239
|
+
)
|
|
94
240
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
241
|
+
# Include final metadata with duration
|
|
242
|
+
final_metadata = local_attributes.copy()
|
|
243
|
+
|
|
244
|
+
if error_occurred:
|
|
245
|
+
coro = update_task_run_status(
|
|
246
|
+
task_run_id, TaskRunStatus.ERROR, error_message, metadata=final_metadata
|
|
101
247
|
)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
logger.warning(
|
|
109
|
-
"Failed to submit telemetry for trace %s to"
|
|
110
|
-
"background worker (loop not available).",
|
|
111
|
-
task_run_id,
|
|
112
|
-
)
|
|
113
|
-
except Exception as e:
|
|
114
|
-
logger.warning("Failed to submit telemetry for trace %s: %s", task_run_id, e)
|
|
248
|
+
logger.debug("Updated task run %s status to ERROR: %s", task_run_id, error_message)
|
|
249
|
+
else:
|
|
250
|
+
coro = update_task_run_status(
|
|
251
|
+
task_run_id, TaskRunStatus.COMPLETED, metadata=final_metadata
|
|
252
|
+
)
|
|
253
|
+
logger.debug("Updated task run %s status to COMPLETED with metadata", task_run_id)
|
|
115
254
|
|
|
116
|
-
|
|
117
|
-
|
|
255
|
+
# Wait for the status update to complete
|
|
256
|
+
future = submit_to_worker_loop(coro)
|
|
257
|
+
if future:
|
|
258
|
+
try:
|
|
259
|
+
# Wait up to 5 seconds for the status update
|
|
260
|
+
import concurrent.futures
|
|
118
261
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
262
|
+
future.result(timeout=5.0)
|
|
263
|
+
logger.debug("Status update completed successfully")
|
|
264
|
+
except concurrent.futures.TimeoutError:
|
|
265
|
+
logger.warning("Timeout waiting for status update to complete")
|
|
266
|
+
except Exception as e:
|
|
267
|
+
logger.error("Error waiting for status update: %s", e)
|
|
125
268
|
|
|
126
|
-
|
|
269
|
+
# Export any remaining records before flushing
|
|
270
|
+
if is_root:
|
|
271
|
+
from hud.telemetry.context import export_incremental
|
|
127
272
|
|
|
273
|
+
export_incremental()
|
|
128
274
|
|
|
129
|
-
|
|
130
|
-
|
|
275
|
+
# Always flush the buffer for the current task
|
|
276
|
+
mcp_calls = flush_buffer(export=True)
|
|
277
|
+
logger.debug("Flushed %d MCP calls for trace %s", len(mcp_calls), task_run_id)
|
|
278
|
+
|
|
279
|
+
# Restore previous context
|
|
280
|
+
set_current_task_run_id(previous_task_id)
|
|
281
|
+
is_root_trace.set(was_root)
|
|
131
282
|
|
|
283
|
+
# Log at the end
|
|
284
|
+
if is_root:
|
|
285
|
+
_print_trace_complete_url(task_run_id)
|
|
132
286
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
287
|
+
|
|
288
|
+
@contextmanager
|
|
289
|
+
def trace(
|
|
290
|
+
name: str | None = None,
|
|
291
|
+
agent_model: str | None = None,
|
|
292
|
+
attributes: dict[str, Any] | None = None,
|
|
293
|
+
) -> Generator[str, None, None]:
|
|
136
294
|
"""
|
|
137
|
-
|
|
138
|
-
|
|
295
|
+
Synchronous context manager that traces and blocks until telemetry is sent.
|
|
296
|
+
|
|
297
|
+
This is the "worry-free" option when you want to ensure telemetry is
|
|
298
|
+
sent immediately before continuing, rather than relying on background workers.
|
|
139
299
|
|
|
140
300
|
Args:
|
|
141
|
-
name: Optional name for
|
|
142
|
-
attributes: Optional
|
|
301
|
+
name: Optional name for this trace
|
|
302
|
+
attributes: Optional attributes for the trace
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
The generated task run ID (UUID string) used for this trace
|
|
143
306
|
"""
|
|
307
|
+
with trace_open(name=name, agent_model=agent_model, attributes=attributes) as task_run_id:
|
|
308
|
+
yield task_run_id
|
|
144
309
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
func: Callable[P, Coroutine[Any, Any, R]],
|
|
148
|
-
) -> Callable[P, Coroutine[Any, Any, R]]: ...
|
|
310
|
+
# Ensure telemetry is flushed synchronously
|
|
311
|
+
from hud import flush
|
|
149
312
|
|
|
150
|
-
|
|
151
|
-
|
|
313
|
+
flush()
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def trace_decorator(
|
|
317
|
+
name: str | None = None,
|
|
318
|
+
agent_model: str | None = None,
|
|
319
|
+
attributes: dict[str, Any] | None = None,
|
|
320
|
+
) -> Any:
|
|
321
|
+
"""
|
|
322
|
+
Decorator for tracing functions.
|
|
323
|
+
|
|
324
|
+
Can be used on both sync and async functions.
|
|
325
|
+
"""
|
|
152
326
|
|
|
153
|
-
def decorator(func:
|
|
327
|
+
def decorator(func: Any) -> Any:
|
|
154
328
|
if asyncio.iscoroutinefunction(func):
|
|
155
329
|
|
|
156
330
|
@wraps(func)
|
|
157
|
-
async def async_wrapper(*args:
|
|
158
|
-
|
|
159
|
-
with
|
|
331
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
332
|
+
func_name = name or f"{func.__module__}.{func.__name__}"
|
|
333
|
+
with trace_open(name=func_name, agent_model=agent_model, attributes=attributes):
|
|
160
334
|
return await func(*args, **kwargs)
|
|
161
335
|
|
|
162
336
|
return async_wrapper
|
|
163
337
|
else:
|
|
164
338
|
|
|
165
339
|
@wraps(func)
|
|
166
|
-
def sync_wrapper(*args:
|
|
167
|
-
|
|
168
|
-
with
|
|
340
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
341
|
+
func_name = name or f"{func.__module__}.{func.__name__}"
|
|
342
|
+
with trace_open(name=func_name, agent_model=agent_model, attributes=attributes):
|
|
169
343
|
return func(*args, **kwargs)
|
|
170
344
|
|
|
171
345
|
return sync_wrapper
|
hud/telemetry/context.py
CHANGED
|
@@ -8,7 +8,6 @@ from typing import Any, TypeVar
|
|
|
8
8
|
|
|
9
9
|
from hud.telemetry.mcp_models import (
|
|
10
10
|
BaseMCPCall,
|
|
11
|
-
MCPManualTestCall,
|
|
12
11
|
MCPNotificationCall,
|
|
13
12
|
MCPRequestCall,
|
|
14
13
|
MCPResponseCall,
|
|
@@ -21,8 +20,12 @@ logger = logging.getLogger("hud.telemetry")
|
|
|
21
20
|
current_task_run_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
|
|
22
21
|
"current_task_run_id", default=None
|
|
23
22
|
)
|
|
24
|
-
#
|
|
23
|
+
# Global dictionary for buffering, keyed by task_run_id
|
|
25
24
|
_GLOBAL_MCP_CALL_BUFFERS: defaultdict[str, list[BaseMCPCall]] = defaultdict(list)
|
|
25
|
+
# Track the last exported index for each task_run_id
|
|
26
|
+
_GLOBAL_EXPORT_INDICES: defaultdict[str, int] = defaultdict(int)
|
|
27
|
+
# Track whether we've seen a non-init request for each task_run_id
|
|
28
|
+
_GLOBAL_HAS_NON_INIT_REQUEST: defaultdict[str, bool] = defaultdict(bool)
|
|
26
29
|
is_root_trace: contextvars.ContextVar[bool] = contextvars.ContextVar("is_root_trace", default=False)
|
|
27
30
|
|
|
28
31
|
# Maximum buffer size before automatic flush
|
|
@@ -43,6 +46,7 @@ def set_current_task_run_id(task_run_id: str | None) -> None:
|
|
|
43
46
|
|
|
44
47
|
|
|
45
48
|
def buffer_mcp_call(record: BaseMCPCall | dict[str, Any]) -> None:
|
|
49
|
+
"""Buffer an MCP call record for the current trace."""
|
|
46
50
|
task_run_id = get_current_task_run_id()
|
|
47
51
|
|
|
48
52
|
if not task_run_id:
|
|
@@ -51,7 +55,7 @@ def buffer_mcp_call(record: BaseMCPCall | dict[str, Any]) -> None:
|
|
|
51
55
|
)
|
|
52
56
|
return
|
|
53
57
|
|
|
54
|
-
# Ensure 'record' is a Pydantic model instance
|
|
58
|
+
# Ensure 'record' is a Pydantic model instance
|
|
55
59
|
if isinstance(record, dict):
|
|
56
60
|
try:
|
|
57
61
|
record_model = BaseMCPCall.from_dict(record)
|
|
@@ -67,6 +71,48 @@ def buffer_mcp_call(record: BaseMCPCall | dict[str, Any]) -> None:
|
|
|
67
71
|
flush_buffer(export=True)
|
|
68
72
|
|
|
69
73
|
|
|
74
|
+
def export_incremental() -> list[BaseMCPCall]:
|
|
75
|
+
"""
|
|
76
|
+
Export only new MCP calls since last export without clearing the buffer.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
The list of newly exported MCP calls
|
|
80
|
+
"""
|
|
81
|
+
task_run_id = get_current_task_run_id()
|
|
82
|
+
if not task_run_id or not is_root_trace.get():
|
|
83
|
+
return []
|
|
84
|
+
|
|
85
|
+
buffer = _GLOBAL_MCP_CALL_BUFFERS.get(task_run_id, [])
|
|
86
|
+
last_exported_idx = _GLOBAL_EXPORT_INDICES.get(task_run_id, 0)
|
|
87
|
+
|
|
88
|
+
# Get only the new records since last export
|
|
89
|
+
new_records = buffer[last_exported_idx:]
|
|
90
|
+
|
|
91
|
+
if new_records:
|
|
92
|
+
# Update the export index
|
|
93
|
+
_GLOBAL_EXPORT_INDICES[task_run_id] = len(buffer)
|
|
94
|
+
|
|
95
|
+
# Trigger export
|
|
96
|
+
from hud.telemetry import exporter
|
|
97
|
+
from hud.telemetry.exporter import submit_to_worker_loop
|
|
98
|
+
|
|
99
|
+
# Get current trace attributes if available
|
|
100
|
+
attributes = {"incremental": True}
|
|
101
|
+
|
|
102
|
+
coro = exporter.export_telemetry(
|
|
103
|
+
task_run_id=task_run_id,
|
|
104
|
+
trace_attributes=attributes,
|
|
105
|
+
mcp_calls=new_records.copy(), # Copy to avoid modification during export
|
|
106
|
+
)
|
|
107
|
+
submit_to_worker_loop(coro)
|
|
108
|
+
|
|
109
|
+
logger.debug(
|
|
110
|
+
"Incremental export: %d new MCP calls for trace %s", len(new_records), task_run_id
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return new_records
|
|
114
|
+
|
|
115
|
+
|
|
70
116
|
def flush_buffer(export: bool = False) -> list[BaseMCPCall]:
|
|
71
117
|
"""
|
|
72
118
|
Clear the MCP calls buffer and return its contents.
|
|
@@ -82,11 +128,12 @@ def flush_buffer(export: bool = False) -> list[BaseMCPCall]:
|
|
|
82
128
|
logger.warning("FLUSH_BUFFER: No current task_run_id. Cannot flush.")
|
|
83
129
|
return []
|
|
84
130
|
|
|
85
|
-
buffer_for_task = _GLOBAL_MCP_CALL_BUFFERS.pop(
|
|
86
|
-
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
|
|
131
|
+
buffer_for_task = _GLOBAL_MCP_CALL_BUFFERS.pop(task_run_id, [])
|
|
132
|
+
# Clean up export index when buffer is flushed
|
|
133
|
+
_GLOBAL_EXPORT_INDICES.pop(task_run_id, None)
|
|
134
|
+
# Clean up non-init request tracking
|
|
135
|
+
_GLOBAL_HAS_NON_INIT_REQUEST.pop(task_run_id, None)
|
|
136
|
+
return buffer_for_task
|
|
90
137
|
|
|
91
138
|
|
|
92
139
|
def create_request_record(
|
|
@@ -98,6 +145,31 @@ def create_request_record(
|
|
|
98
145
|
logger.warning("No active task_run_id, request record will not be created")
|
|
99
146
|
raise ValueError("No active task_run_id")
|
|
100
147
|
|
|
148
|
+
# Check if this is the first non-init request and update status
|
|
149
|
+
if is_root_trace.get() and not _GLOBAL_HAS_NON_INIT_REQUEST[task_run_id]:
|
|
150
|
+
# Common initialization method patterns
|
|
151
|
+
init_methods = {"initialize", "session/new", "init", "setup", "connect"}
|
|
152
|
+
method_lower = method.lower()
|
|
153
|
+
|
|
154
|
+
# Check if this is NOT an initialization method
|
|
155
|
+
if not any(init_pattern in method_lower for init_pattern in init_methods):
|
|
156
|
+
_GLOBAL_HAS_NON_INIT_REQUEST[task_run_id] = True
|
|
157
|
+
|
|
158
|
+
# Update status to running
|
|
159
|
+
from hud.telemetry.exporter import (
|
|
160
|
+
TaskRunStatus,
|
|
161
|
+
submit_to_worker_loop,
|
|
162
|
+
update_task_run_status,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
coro = update_task_run_status(task_run_id, TaskRunStatus.RUNNING)
|
|
166
|
+
submit_to_worker_loop(coro)
|
|
167
|
+
logger.debug(
|
|
168
|
+
"Updated task run %s status to RUNNING on first non-init request: %s",
|
|
169
|
+
task_run_id,
|
|
170
|
+
method,
|
|
171
|
+
)
|
|
172
|
+
|
|
101
173
|
record = MCPRequestCall(
|
|
102
174
|
task_run_id=task_run_id,
|
|
103
175
|
method=method,
|
|
@@ -118,16 +190,23 @@ def create_response_record(
|
|
|
118
190
|
logger.warning("No active task_run_id, response record will not be created")
|
|
119
191
|
raise ValueError("No active task_run_id")
|
|
120
192
|
|
|
193
|
+
# Default to COMPLETED status if not provided
|
|
194
|
+
if "status" not in kwargs:
|
|
195
|
+
kwargs["status"] = StatusType.COMPLETED
|
|
196
|
+
|
|
121
197
|
record = MCPResponseCall(
|
|
122
198
|
task_run_id=task_run_id,
|
|
123
199
|
method=method,
|
|
124
|
-
status=StatusType.COMPLETED,
|
|
125
200
|
related_request_id=related_request_id,
|
|
126
201
|
is_error=is_error,
|
|
127
202
|
**kwargs,
|
|
128
203
|
)
|
|
129
204
|
|
|
130
205
|
buffer_mcp_call(record)
|
|
206
|
+
|
|
207
|
+
# Trigger incremental export when we receive a response
|
|
208
|
+
export_incremental()
|
|
209
|
+
|
|
131
210
|
return record
|
|
132
211
|
|
|
133
212
|
|
|
@@ -149,21 +228,3 @@ def create_notification_record(
|
|
|
149
228
|
)
|
|
150
229
|
buffer_mcp_call(record)
|
|
151
230
|
return record
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def create_manual_test_record(**custom_data: Any) -> MCPManualTestCall | None:
|
|
155
|
-
"""Create and buffer a manual test record"""
|
|
156
|
-
task_run_id = get_current_task_run_id()
|
|
157
|
-
if not task_run_id:
|
|
158
|
-
logger.warning("No active task_run_id, manual test record will not be created")
|
|
159
|
-
return None
|
|
160
|
-
|
|
161
|
-
record = MCPManualTestCall.create(task_run_id=task_run_id, **custom_data)
|
|
162
|
-
buffer_mcp_call(record)
|
|
163
|
-
return record
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def reset_context() -> None:
|
|
167
|
-
"""Reset all telemetry context variables. Useful for test isolation."""
|
|
168
|
-
set_current_task_run_id(None)
|
|
169
|
-
is_root_trace.set(False)
|