hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -22
- hud/agents/__init__.py +13 -15
- hud/agents/base.py +599 -599
- hud/agents/claude.py +373 -373
- hud/agents/langchain.py +261 -250
- hud/agents/misc/__init__.py +7 -7
- hud/agents/misc/response_agent.py +82 -80
- hud/agents/openai.py +352 -352
- hud/agents/openai_chat_generic.py +154 -154
- hud/agents/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -742
- hud/agents/tests/test_claude.py +324 -324
- hud/agents/tests/test_client.py +363 -363
- hud/agents/tests/test_openai.py +237 -237
- hud/cli/__init__.py +617 -617
- hud/cli/__main__.py +8 -8
- hud/cli/analyze.py +371 -371
- hud/cli/analyze_metadata.py +230 -230
- hud/cli/build.py +498 -427
- hud/cli/clone.py +185 -185
- hud/cli/cursor.py +92 -92
- hud/cli/debug.py +392 -392
- hud/cli/docker_utils.py +83 -83
- hud/cli/init.py +280 -281
- hud/cli/interactive.py +353 -353
- hud/cli/mcp_server.py +764 -756
- hud/cli/pull.py +330 -336
- hud/cli/push.py +404 -370
- hud/cli/remote_runner.py +311 -311
- hud/cli/runner.py +160 -160
- hud/cli/tests/__init__.py +3 -3
- hud/cli/tests/test_analyze.py +284 -284
- hud/cli/tests/test_cli_init.py +265 -265
- hud/cli/tests/test_cli_main.py +27 -27
- hud/cli/tests/test_clone.py +142 -142
- hud/cli/tests/test_cursor.py +253 -253
- hud/cli/tests/test_debug.py +453 -453
- hud/cli/tests/test_mcp_server.py +139 -139
- hud/cli/tests/test_utils.py +388 -388
- hud/cli/utils.py +263 -263
- hud/clients/README.md +143 -143
- hud/clients/__init__.py +16 -16
- hud/clients/base.py +378 -379
- hud/clients/fastmcp.py +222 -222
- hud/clients/mcp_use.py +298 -278
- hud/clients/tests/__init__.py +1 -1
- hud/clients/tests/test_client_integration.py +111 -111
- hud/clients/tests/test_fastmcp.py +342 -342
- hud/clients/tests/test_protocol.py +188 -188
- hud/clients/utils/__init__.py +1 -1
- hud/clients/utils/retry_transport.py +160 -160
- hud/datasets.py +327 -322
- hud/misc/__init__.py +1 -1
- hud/misc/claude_plays_pokemon.py +292 -292
- hud/otel/__init__.py +35 -35
- hud/otel/collector.py +142 -142
- hud/otel/config.py +164 -164
- hud/otel/context.py +536 -536
- hud/otel/exporters.py +366 -366
- hud/otel/instrumentation.py +97 -97
- hud/otel/processors.py +118 -118
- hud/otel/tests/__init__.py +1 -1
- hud/otel/tests/test_processors.py +197 -197
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -114
- hud/server/helper/__init__.py +5 -5
- hud/server/low_level.py +132 -132
- hud/server/server.py +170 -166
- hud/server/tests/__init__.py +3 -3
- hud/settings.py +73 -73
- hud/shared/__init__.py +5 -5
- hud/shared/exceptions.py +180 -180
- hud/shared/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -157
- hud/shared/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -25
- hud/telemetry/instrument.py +379 -379
- hud/telemetry/job.py +309 -309
- hud/telemetry/replay.py +74 -74
- hud/telemetry/trace.py +83 -83
- hud/tools/__init__.py +33 -33
- hud/tools/base.py +365 -365
- hud/tools/bash.py +161 -161
- hud/tools/computer/__init__.py +15 -15
- hud/tools/computer/anthropic.py +437 -437
- hud/tools/computer/hud.py +376 -376
- hud/tools/computer/openai.py +295 -295
- hud/tools/computer/settings.py +82 -82
- hud/tools/edit.py +314 -314
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -539
- hud/tools/executors/pyautogui.py +621 -621
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -511
- hud/tools/playwright.py +412 -412
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -282
- hud/tools/tests/test_bash.py +158 -158
- hud/tools/tests/test_bash_extended.py +197 -197
- hud/tools/tests/test_computer.py +425 -425
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -259
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -145
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -72
- hud/tools/utils.py +50 -50
- hud/types.py +136 -136
- hud/utils/__init__.py +10 -10
- hud/utils/async_utils.py +65 -65
- hud/utils/design.py +236 -168
- hud/utils/mcp.py +55 -55
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -173
- hud/utils/tests/test_init.py +17 -17
- hud/utils/tests/test_progress.py +261 -261
- hud/utils/tests/test_telemetry.py +82 -82
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
- hud_python-0.4.3.dist-info/RECORD +131 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
- hud/agents/art.py +0 -101
- hud_python-0.4.1.dist-info/RECORD +0 -132
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/otel/exporters.py
CHANGED
|
@@ -1,366 +1,366 @@
|
|
|
1
|
-
"""Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
|
|
2
|
-
HTTP endpoint (/trace/<id>/telemetry-upload).
|
|
3
|
-
|
|
4
|
-
The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
|
|
5
|
-
exactly the same semantics the old async worker in ``hud.telemetry.exporter``
|
|
6
|
-
implemented.
|
|
7
|
-
|
|
8
|
-
This exporter is *synchronous* (derives from :class:`SpanExporter`). We rely on
|
|
9
|
-
``hud.shared.make_request_sync`` which already contains retry & auth logic.
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
from __future__ import annotations
|
|
13
|
-
|
|
14
|
-
import contextlib
|
|
15
|
-
import json
|
|
16
|
-
import logging
|
|
17
|
-
from collections import defaultdict
|
|
18
|
-
from datetime import UTC, datetime
|
|
19
|
-
from typing import TYPE_CHECKING, Any
|
|
20
|
-
|
|
21
|
-
from mcp.types import ClientRequest, ServerResult
|
|
22
|
-
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
23
|
-
from pydantic import BaseModel, ConfigDict, Field
|
|
24
|
-
|
|
25
|
-
from hud.shared import make_request_sync
|
|
26
|
-
from hud.types import TraceStep as HudSpanAttributes
|
|
27
|
-
|
|
28
|
-
if TYPE_CHECKING:
|
|
29
|
-
from opentelemetry.sdk.trace import ReadableSpan
|
|
30
|
-
|
|
31
|
-
logger = logging.getLogger(__name__)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# ---------------------------------------------------------------------------
|
|
35
|
-
# Models
|
|
36
|
-
# ---------------------------------------------------------------------------
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class HudSpan(BaseModel):
|
|
40
|
-
"""A telemetry span ready for export."""
|
|
41
|
-
|
|
42
|
-
name: str
|
|
43
|
-
trace_id: str = Field(pattern=r"^[0-9a-fA-F]{32}$")
|
|
44
|
-
span_id: str = Field(pattern=r"^[0-9a-fA-F]{16}$")
|
|
45
|
-
parent_span_id: str | None = Field(None, pattern=r"^[0-9a-fA-F]{16}$")
|
|
46
|
-
|
|
47
|
-
start_time: str # ISO format
|
|
48
|
-
end_time: str # ISO format
|
|
49
|
-
|
|
50
|
-
status_code: str # "UNSET", "OK", "ERROR"
|
|
51
|
-
status_message: str | None = None
|
|
52
|
-
|
|
53
|
-
attributes: HudSpanAttributes
|
|
54
|
-
exceptions: list[dict[str, Any]] | None = None
|
|
55
|
-
|
|
56
|
-
model_config = ConfigDict(extra="forbid")
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def extract_span_attributes(
|
|
60
|
-
attrs: dict[str, Any], method_name: str | None = None, span_name: str | None = None
|
|
61
|
-
) -> HudSpanAttributes:
|
|
62
|
-
"""Extract and parse span attributes into typed model.
|
|
63
|
-
|
|
64
|
-
This handles:
|
|
65
|
-
- Detecting span type (MCP vs Agent)
|
|
66
|
-
- Renaming verbose OpenTelemetry semantic conventions
|
|
67
|
-
- Parsing JSON strings to MCP types
|
|
68
|
-
"""
|
|
69
|
-
# Start with core attributes - map to TraceStep field names
|
|
70
|
-
result_attrs = {
|
|
71
|
-
"task_run_id": attrs.get(
|
|
72
|
-
"hud.task_run_id"
|
|
73
|
-
), # TraceStep expects task_run_id, not hud.task_run_id
|
|
74
|
-
"job_id": attrs.get("hud.job_id"), # TraceStep expects job_id, not hud.job_id
|
|
75
|
-
"type": attrs.get("span.kind", "CLIENT"), # TraceStep expects type, not span.kind
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
# Determine span type based on presence of agent or MCP attributes
|
|
79
|
-
# Note: The input attrs might already have "category" set
|
|
80
|
-
existing_category = attrs.get("category")
|
|
81
|
-
|
|
82
|
-
if existing_category:
|
|
83
|
-
# Use the explicit category if provided
|
|
84
|
-
result_attrs["category"] = existing_category
|
|
85
|
-
elif span_name and span_name.startswith("agent."):
|
|
86
|
-
# Legacy support for spans named "agent.*"
|
|
87
|
-
result_attrs["category"] = "agent"
|
|
88
|
-
else:
|
|
89
|
-
result_attrs["category"] = "mcp" # Default to MCP
|
|
90
|
-
|
|
91
|
-
# No special processing needed for different categories
|
|
92
|
-
# The backend will handle them based on the category field
|
|
93
|
-
|
|
94
|
-
# Add method_name and request_id for MCP spans
|
|
95
|
-
if result_attrs["category"] == "mcp":
|
|
96
|
-
if method_name:
|
|
97
|
-
result_attrs["method_name"] = method_name
|
|
98
|
-
# Check for request_id with and without semconv_ai prefix
|
|
99
|
-
request_id = attrs.get("semconv_ai.mcp.request_id") or attrs.get("mcp.request.id")
|
|
100
|
-
if request_id:
|
|
101
|
-
result_attrs["request_id"] = request_id
|
|
102
|
-
|
|
103
|
-
# Parse input/output - check both with and without semconv_ai prefix
|
|
104
|
-
input_str = attrs.get("semconv_ai.traceloop.entity.input") or attrs.get(
|
|
105
|
-
"traceloop.entity.input"
|
|
106
|
-
)
|
|
107
|
-
output_str = attrs.get("semconv_ai.traceloop.entity.output") or attrs.get(
|
|
108
|
-
"traceloop.entity.output"
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
logger.debug(
|
|
112
|
-
"Category: %s, has input: %s, has output: %s",
|
|
113
|
-
result_attrs.get("category"),
|
|
114
|
-
bool(input_str),
|
|
115
|
-
bool(output_str),
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
# Check for direct request/result attributes first
|
|
119
|
-
if "request" in attrs and not result_attrs.get("request"):
|
|
120
|
-
req = attrs["request"]
|
|
121
|
-
if isinstance(req, str):
|
|
122
|
-
with contextlib.suppress(json.JSONDecodeError):
|
|
123
|
-
req = json.loads(req)
|
|
124
|
-
result_attrs["request"] = req
|
|
125
|
-
|
|
126
|
-
if "result" in attrs and not result_attrs.get("result"):
|
|
127
|
-
res = attrs["result"]
|
|
128
|
-
if isinstance(res, str):
|
|
129
|
-
with contextlib.suppress(json.JSONDecodeError):
|
|
130
|
-
res = json.loads(res)
|
|
131
|
-
result_attrs["result"] = res
|
|
132
|
-
|
|
133
|
-
# Process input/output from MCP instrumentation
|
|
134
|
-
if input_str and not result_attrs.get("request"):
|
|
135
|
-
try:
|
|
136
|
-
input_data = json.loads(input_str) if isinstance(input_str, str) else input_str
|
|
137
|
-
|
|
138
|
-
# For MCP category, try to parse as ClientRequest to extract the root
|
|
139
|
-
if result_attrs["category"] == "mcp" and isinstance(input_data, dict):
|
|
140
|
-
try:
|
|
141
|
-
if "method" in input_data and "params" in input_data:
|
|
142
|
-
client_request = ClientRequest.model_validate(input_data)
|
|
143
|
-
result_attrs["request"] = client_request.root
|
|
144
|
-
else:
|
|
145
|
-
result_attrs["request"] = input_data
|
|
146
|
-
except Exception:
|
|
147
|
-
result_attrs["request"] = input_data
|
|
148
|
-
else:
|
|
149
|
-
# For all other categories, just store the data
|
|
150
|
-
result_attrs["request"] = input_data
|
|
151
|
-
except Exception as e:
|
|
152
|
-
logger.debug("Failed to parse request JSON: %s", e)
|
|
153
|
-
|
|
154
|
-
if output_str and not result_attrs.get("result"):
|
|
155
|
-
try:
|
|
156
|
-
output_data = json.loads(output_str) if isinstance(output_str, str) else output_str
|
|
157
|
-
|
|
158
|
-
# For MCP category, try to parse as ServerResult to extract the root
|
|
159
|
-
if result_attrs["category"] == "mcp" and isinstance(output_data, dict):
|
|
160
|
-
# Check for error
|
|
161
|
-
if "error" in output_data:
|
|
162
|
-
result_attrs["mcp_error"] = True
|
|
163
|
-
try:
|
|
164
|
-
server_result = ServerResult.model_validate(output_data)
|
|
165
|
-
result_attrs["result"] = server_result.root
|
|
166
|
-
# Check for isError in the result
|
|
167
|
-
if getattr(server_result.root, "isError", False):
|
|
168
|
-
result_attrs["mcp_error"] = True
|
|
169
|
-
except Exception:
|
|
170
|
-
result_attrs["result"] = output_data
|
|
171
|
-
else:
|
|
172
|
-
# For all other categories, just store the data
|
|
173
|
-
result_attrs["result"] = output_data
|
|
174
|
-
except Exception as e:
|
|
175
|
-
logger.debug("Failed to parse result JSON: %s", e)
|
|
176
|
-
|
|
177
|
-
# Don't include the verbose attributes or ones we've already processed
|
|
178
|
-
exclude_keys = {
|
|
179
|
-
"hud.task_run_id",
|
|
180
|
-
"hud.job_id",
|
|
181
|
-
"span.kind",
|
|
182
|
-
"semconv_ai.mcp.method_name",
|
|
183
|
-
"mcp.method.name", # Also exclude non-prefixed version
|
|
184
|
-
"semconv_ai.mcp.request_id",
|
|
185
|
-
"mcp.request.id", # Also exclude non-prefixed version
|
|
186
|
-
"semconv_ai.traceloop.entity.input",
|
|
187
|
-
"semconv_ai.traceloop.entity.output",
|
|
188
|
-
"traceloop.entity.input", # Also exclude non-prefixed versions
|
|
189
|
-
"traceloop.entity.output",
|
|
190
|
-
"mcp_request", # Exclude to prevent overwriting parsed values
|
|
191
|
-
"mcp_result", # Exclude to prevent overwriting parsed values
|
|
192
|
-
"request", # Exclude to prevent overwriting parsed values
|
|
193
|
-
"result", # Exclude to prevent overwriting parsed values
|
|
194
|
-
"category", # Already handled above
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
# Add any extra attributes
|
|
198
|
-
for key, value in attrs.items():
|
|
199
|
-
if key not in exclude_keys:
|
|
200
|
-
result_attrs[key] = value # noqa: PERF403
|
|
201
|
-
|
|
202
|
-
logger.debug(
|
|
203
|
-
"""Final result_attrs before creating HudSpanAttributes:
|
|
204
|
-
request=%s,
|
|
205
|
-
result=%s""",
|
|
206
|
-
result_attrs.get("request"),
|
|
207
|
-
result_attrs.get("result"),
|
|
208
|
-
)
|
|
209
|
-
return HudSpanAttributes(**result_attrs)
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
# ---------------------------------------------------------------------------
|
|
213
|
-
# Helpers
|
|
214
|
-
# ---------------------------------------------------------------------------
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
def _ts_ns_to_iso(ts_ns: int) -> str:
|
|
218
|
-
"""Convert a ``Span`` timestamp (nanoseconds) to ISO-8601 string."""
|
|
219
|
-
# OpenTelemetry times are epoch nanoseconds
|
|
220
|
-
dt = datetime.fromtimestamp(ts_ns / 1_000_000_000, tz=UTC)
|
|
221
|
-
return dt.isoformat().replace("+00:00", "Z")
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
|
|
225
|
-
"""Convert an OpenTelemetry span to a dict using typed models."""
|
|
226
|
-
|
|
227
|
-
attrs = dict(span.attributes or {})
|
|
228
|
-
|
|
229
|
-
# Extract method name from span name if not in attributes
|
|
230
|
-
# Check both with and without semconv_ai prefix
|
|
231
|
-
raw_method = attrs.get("semconv_ai.mcp.method_name") or attrs.get("mcp.method.name")
|
|
232
|
-
method_name: str | None = None
|
|
233
|
-
if isinstance(raw_method, str):
|
|
234
|
-
method_name = raw_method
|
|
235
|
-
if method_name is None and isinstance(span.name, str) and span.name.endswith(".mcp"):
|
|
236
|
-
method_name = span.name[:-4] # Remove .mcp suffix
|
|
237
|
-
|
|
238
|
-
# Create typed attributes
|
|
239
|
-
typed_attrs = extract_span_attributes(attrs, method_name, str(span.name))
|
|
240
|
-
|
|
241
|
-
# Record span kind as extra attribute (TraceStep allows extras)
|
|
242
|
-
try:
|
|
243
|
-
typed_attrs.span_kind = span.kind.name # type: ignore[attr-defined]
|
|
244
|
-
except Exception:
|
|
245
|
-
logger.warning("Failed to set span kind attribute")
|
|
246
|
-
|
|
247
|
-
# Build typed span
|
|
248
|
-
# Guard context/parent/timestamps
|
|
249
|
-
context = getattr(span, "context", None)
|
|
250
|
-
trace_id_hex = (
|
|
251
|
-
format(context.trace_id, "032x") if context and hasattr(context, "trace_id") else "0" * 32
|
|
252
|
-
)
|
|
253
|
-
span_id_hex = (
|
|
254
|
-
format(context.span_id, "016x") if context and hasattr(context, "span_id") else "0" * 16
|
|
255
|
-
)
|
|
256
|
-
parent = getattr(span, "parent", None)
|
|
257
|
-
parent_id_hex = (
|
|
258
|
-
format(parent.span_id, "016x") if parent and hasattr(parent, "span_id") else None
|
|
259
|
-
)
|
|
260
|
-
start_ns = span.start_time or 0
|
|
261
|
-
end_ns = span.end_time or start_ns
|
|
262
|
-
|
|
263
|
-
typed_span = HudSpan(
|
|
264
|
-
name=span.name,
|
|
265
|
-
trace_id=trace_id_hex,
|
|
266
|
-
span_id=span_id_hex,
|
|
267
|
-
parent_span_id=parent_id_hex,
|
|
268
|
-
start_time=_ts_ns_to_iso(int(start_ns)),
|
|
269
|
-
end_time=_ts_ns_to_iso(int(end_ns)),
|
|
270
|
-
status_code=span.status.status_code.name if span.status else "UNSET",
|
|
271
|
-
status_message=span.status.description if span.status else None,
|
|
272
|
-
attributes=typed_attrs,
|
|
273
|
-
exceptions=None,
|
|
274
|
-
)
|
|
275
|
-
|
|
276
|
-
# Add error information if present
|
|
277
|
-
if span.events:
|
|
278
|
-
exceptions = []
|
|
279
|
-
exceptions = [
|
|
280
|
-
{
|
|
281
|
-
"timestamp": _ts_ns_to_iso(event.timestamp),
|
|
282
|
-
"attributes": dict(event.attributes or {}),
|
|
283
|
-
}
|
|
284
|
-
for event in span.events
|
|
285
|
-
]
|
|
286
|
-
if exceptions:
|
|
287
|
-
typed_span.exceptions = exceptions
|
|
288
|
-
|
|
289
|
-
# Convert to dict for export
|
|
290
|
-
return typed_span.model_dump(mode="json", by_alias=True, exclude_none=True)
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
# ---------------------------------------------------------------------------
|
|
294
|
-
# Exporter
|
|
295
|
-
# ---------------------------------------------------------------------------
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
class HudSpanExporter(SpanExporter):
|
|
299
|
-
"""Exporter that forwards spans to HUD backend using existing endpoint."""
|
|
300
|
-
|
|
301
|
-
def __init__(self, *, telemetry_url: str, api_key: str) -> None:
|
|
302
|
-
super().__init__()
|
|
303
|
-
self._telemetry_url = telemetry_url.rstrip("/")
|
|
304
|
-
self._api_key = api_key
|
|
305
|
-
|
|
306
|
-
# ------------------------------------------------------------------
|
|
307
|
-
# Core API
|
|
308
|
-
# ------------------------------------------------------------------
|
|
309
|
-
def export(self, spans: list[ReadableSpan]) -> SpanExportResult: # type: ignore[override]
|
|
310
|
-
if not spans:
|
|
311
|
-
return SpanExportResult.SUCCESS
|
|
312
|
-
|
|
313
|
-
# Group spans by hud.task_run_id attribute
|
|
314
|
-
grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
|
|
315
|
-
for span in spans:
|
|
316
|
-
run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
|
|
317
|
-
if not run_id:
|
|
318
|
-
# Skip spans that are outside HUD traces
|
|
319
|
-
continue
|
|
320
|
-
grouped[str(run_id)].append(span)
|
|
321
|
-
|
|
322
|
-
# Send each group synchronously (retry inside make_request_sync)
|
|
323
|
-
for run_id, span_batch in grouped.items():
|
|
324
|
-
try:
|
|
325
|
-
url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
|
|
326
|
-
telemetry_spans = [_span_to_dict(s) for s in span_batch]
|
|
327
|
-
# Include current step count in metadata
|
|
328
|
-
metadata = {}
|
|
329
|
-
# Get the HIGHEST step count from the batch (most recent)
|
|
330
|
-
step_count = 0
|
|
331
|
-
for span in span_batch:
|
|
332
|
-
if span.attributes and "hud.step_count" in span.attributes:
|
|
333
|
-
current_step = span.attributes["hud.step_count"]
|
|
334
|
-
if isinstance(current_step, int) and current_step > step_count:
|
|
335
|
-
step_count = current_step
|
|
336
|
-
|
|
337
|
-
payload = {
|
|
338
|
-
"metadata": metadata,
|
|
339
|
-
"telemetry": telemetry_spans,
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
# Only include step_count if we found any steps
|
|
343
|
-
if step_count > 0:
|
|
344
|
-
payload["step_count"] = step_count
|
|
345
|
-
|
|
346
|
-
logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
|
|
347
|
-
make_request_sync(
|
|
348
|
-
method="POST",
|
|
349
|
-
url=url,
|
|
350
|
-
json=payload,
|
|
351
|
-
api_key=self._api_key,
|
|
352
|
-
)
|
|
353
|
-
except Exception as exc:
|
|
354
|
-
logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
|
|
355
|
-
# If *any* group fails we return FAILURE so the OTEL SDK can retry
|
|
356
|
-
return SpanExportResult.FAILURE
|
|
357
|
-
|
|
358
|
-
return SpanExportResult.SUCCESS
|
|
359
|
-
|
|
360
|
-
def shutdown(self) -> None: # type: ignore[override]
|
|
361
|
-
# Nothing to cleanup, httpx handled inside make_request_sync
|
|
362
|
-
pass
|
|
363
|
-
|
|
364
|
-
def force_flush(self, timeout_millis: int | None = None) -> bool: # type: ignore[override]
|
|
365
|
-
# Synchronous export, nothing buffered here
|
|
366
|
-
return True
|
|
1
|
+
"""Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
|
|
2
|
+
HTTP endpoint (/trace/<id>/telemetry-upload).
|
|
3
|
+
|
|
4
|
+
The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
|
|
5
|
+
exactly the same semantics the old async worker in ``hud.telemetry.exporter``
|
|
6
|
+
implemented.
|
|
7
|
+
|
|
8
|
+
This exporter is *synchronous* (derives from :class:`SpanExporter`). We rely on
|
|
9
|
+
``hud.shared.make_request_sync`` which already contains retry & auth logic.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import contextlib
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
from datetime import UTC, datetime
|
|
19
|
+
from typing import TYPE_CHECKING, Any
|
|
20
|
+
|
|
21
|
+
from mcp.types import ClientRequest, ServerResult
|
|
22
|
+
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
23
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
24
|
+
|
|
25
|
+
from hud.shared import make_request_sync
|
|
26
|
+
from hud.types import TraceStep as HudSpanAttributes
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from opentelemetry.sdk.trace import ReadableSpan
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# Models
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class HudSpan(BaseModel):
|
|
40
|
+
"""A telemetry span ready for export."""
|
|
41
|
+
|
|
42
|
+
name: str
|
|
43
|
+
trace_id: str = Field(pattern=r"^[0-9a-fA-F]{32}$")
|
|
44
|
+
span_id: str = Field(pattern=r"^[0-9a-fA-F]{16}$")
|
|
45
|
+
parent_span_id: str | None = Field(None, pattern=r"^[0-9a-fA-F]{16}$")
|
|
46
|
+
|
|
47
|
+
start_time: str # ISO format
|
|
48
|
+
end_time: str # ISO format
|
|
49
|
+
|
|
50
|
+
status_code: str # "UNSET", "OK", "ERROR"
|
|
51
|
+
status_message: str | None = None
|
|
52
|
+
|
|
53
|
+
attributes: HudSpanAttributes
|
|
54
|
+
exceptions: list[dict[str, Any]] | None = None
|
|
55
|
+
|
|
56
|
+
model_config = ConfigDict(extra="forbid")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def extract_span_attributes(
|
|
60
|
+
attrs: dict[str, Any], method_name: str | None = None, span_name: str | None = None
|
|
61
|
+
) -> HudSpanAttributes:
|
|
62
|
+
"""Extract and parse span attributes into typed model.
|
|
63
|
+
|
|
64
|
+
This handles:
|
|
65
|
+
- Detecting span type (MCP vs Agent)
|
|
66
|
+
- Renaming verbose OpenTelemetry semantic conventions
|
|
67
|
+
- Parsing JSON strings to MCP types
|
|
68
|
+
"""
|
|
69
|
+
# Start with core attributes - map to TraceStep field names
|
|
70
|
+
result_attrs = {
|
|
71
|
+
"task_run_id": attrs.get(
|
|
72
|
+
"hud.task_run_id"
|
|
73
|
+
), # TraceStep expects task_run_id, not hud.task_run_id
|
|
74
|
+
"job_id": attrs.get("hud.job_id"), # TraceStep expects job_id, not hud.job_id
|
|
75
|
+
"type": attrs.get("span.kind", "CLIENT"), # TraceStep expects type, not span.kind
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
# Determine span type based on presence of agent or MCP attributes
|
|
79
|
+
# Note: The input attrs might already have "category" set
|
|
80
|
+
existing_category = attrs.get("category")
|
|
81
|
+
|
|
82
|
+
if existing_category:
|
|
83
|
+
# Use the explicit category if provided
|
|
84
|
+
result_attrs["category"] = existing_category
|
|
85
|
+
elif span_name and span_name.startswith("agent."):
|
|
86
|
+
# Legacy support for spans named "agent.*"
|
|
87
|
+
result_attrs["category"] = "agent"
|
|
88
|
+
else:
|
|
89
|
+
result_attrs["category"] = "mcp" # Default to MCP
|
|
90
|
+
|
|
91
|
+
# No special processing needed for different categories
|
|
92
|
+
# The backend will handle them based on the category field
|
|
93
|
+
|
|
94
|
+
# Add method_name and request_id for MCP spans
|
|
95
|
+
if result_attrs["category"] == "mcp":
|
|
96
|
+
if method_name:
|
|
97
|
+
result_attrs["method_name"] = method_name
|
|
98
|
+
# Check for request_id with and without semconv_ai prefix
|
|
99
|
+
request_id = attrs.get("semconv_ai.mcp.request_id") or attrs.get("mcp.request.id")
|
|
100
|
+
if request_id:
|
|
101
|
+
result_attrs["request_id"] = request_id
|
|
102
|
+
|
|
103
|
+
# Parse input/output - check both with and without semconv_ai prefix
|
|
104
|
+
input_str = attrs.get("semconv_ai.traceloop.entity.input") or attrs.get(
|
|
105
|
+
"traceloop.entity.input"
|
|
106
|
+
)
|
|
107
|
+
output_str = attrs.get("semconv_ai.traceloop.entity.output") or attrs.get(
|
|
108
|
+
"traceloop.entity.output"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
logger.debug(
|
|
112
|
+
"Category: %s, has input: %s, has output: %s",
|
|
113
|
+
result_attrs.get("category"),
|
|
114
|
+
bool(input_str),
|
|
115
|
+
bool(output_str),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Check for direct request/result attributes first
|
|
119
|
+
if "request" in attrs and not result_attrs.get("request"):
|
|
120
|
+
req = attrs["request"]
|
|
121
|
+
if isinstance(req, str):
|
|
122
|
+
with contextlib.suppress(json.JSONDecodeError):
|
|
123
|
+
req = json.loads(req)
|
|
124
|
+
result_attrs["request"] = req
|
|
125
|
+
|
|
126
|
+
if "result" in attrs and not result_attrs.get("result"):
|
|
127
|
+
res = attrs["result"]
|
|
128
|
+
if isinstance(res, str):
|
|
129
|
+
with contextlib.suppress(json.JSONDecodeError):
|
|
130
|
+
res = json.loads(res)
|
|
131
|
+
result_attrs["result"] = res
|
|
132
|
+
|
|
133
|
+
# Process input/output from MCP instrumentation
|
|
134
|
+
if input_str and not result_attrs.get("request"):
|
|
135
|
+
try:
|
|
136
|
+
input_data = json.loads(input_str) if isinstance(input_str, str) else input_str
|
|
137
|
+
|
|
138
|
+
# For MCP category, try to parse as ClientRequest to extract the root
|
|
139
|
+
if result_attrs["category"] == "mcp" and isinstance(input_data, dict):
|
|
140
|
+
try:
|
|
141
|
+
if "method" in input_data and "params" in input_data:
|
|
142
|
+
client_request = ClientRequest.model_validate(input_data)
|
|
143
|
+
result_attrs["request"] = client_request.root
|
|
144
|
+
else:
|
|
145
|
+
result_attrs["request"] = input_data
|
|
146
|
+
except Exception:
|
|
147
|
+
result_attrs["request"] = input_data
|
|
148
|
+
else:
|
|
149
|
+
# For all other categories, just store the data
|
|
150
|
+
result_attrs["request"] = input_data
|
|
151
|
+
except Exception as e:
|
|
152
|
+
logger.debug("Failed to parse request JSON: %s", e)
|
|
153
|
+
|
|
154
|
+
if output_str and not result_attrs.get("result"):
|
|
155
|
+
try:
|
|
156
|
+
output_data = json.loads(output_str) if isinstance(output_str, str) else output_str
|
|
157
|
+
|
|
158
|
+
# For MCP category, try to parse as ServerResult to extract the root
|
|
159
|
+
if result_attrs["category"] == "mcp" and isinstance(output_data, dict):
|
|
160
|
+
# Check for error
|
|
161
|
+
if "error" in output_data:
|
|
162
|
+
result_attrs["mcp_error"] = True
|
|
163
|
+
try:
|
|
164
|
+
server_result = ServerResult.model_validate(output_data)
|
|
165
|
+
result_attrs["result"] = server_result.root
|
|
166
|
+
# Check for isError in the result
|
|
167
|
+
if getattr(server_result.root, "isError", False):
|
|
168
|
+
result_attrs["mcp_error"] = True
|
|
169
|
+
except Exception:
|
|
170
|
+
result_attrs["result"] = output_data
|
|
171
|
+
else:
|
|
172
|
+
# For all other categories, just store the data
|
|
173
|
+
result_attrs["result"] = output_data
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.debug("Failed to parse result JSON: %s", e)
|
|
176
|
+
|
|
177
|
+
# Don't include the verbose attributes or ones we've already processed
|
|
178
|
+
exclude_keys = {
|
|
179
|
+
"hud.task_run_id",
|
|
180
|
+
"hud.job_id",
|
|
181
|
+
"span.kind",
|
|
182
|
+
"semconv_ai.mcp.method_name",
|
|
183
|
+
"mcp.method.name", # Also exclude non-prefixed version
|
|
184
|
+
"semconv_ai.mcp.request_id",
|
|
185
|
+
"mcp.request.id", # Also exclude non-prefixed version
|
|
186
|
+
"semconv_ai.traceloop.entity.input",
|
|
187
|
+
"semconv_ai.traceloop.entity.output",
|
|
188
|
+
"traceloop.entity.input", # Also exclude non-prefixed versions
|
|
189
|
+
"traceloop.entity.output",
|
|
190
|
+
"mcp_request", # Exclude to prevent overwriting parsed values
|
|
191
|
+
"mcp_result", # Exclude to prevent overwriting parsed values
|
|
192
|
+
"request", # Exclude to prevent overwriting parsed values
|
|
193
|
+
"result", # Exclude to prevent overwriting parsed values
|
|
194
|
+
"category", # Already handled above
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
# Add any extra attributes
|
|
198
|
+
for key, value in attrs.items():
|
|
199
|
+
if key not in exclude_keys:
|
|
200
|
+
result_attrs[key] = value # noqa: PERF403
|
|
201
|
+
|
|
202
|
+
logger.debug(
|
|
203
|
+
"""Final result_attrs before creating HudSpanAttributes:
|
|
204
|
+
request=%s,
|
|
205
|
+
result=%s""",
|
|
206
|
+
result_attrs.get("request"),
|
|
207
|
+
result_attrs.get("result"),
|
|
208
|
+
)
|
|
209
|
+
return HudSpanAttributes(**result_attrs)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# ---------------------------------------------------------------------------
|
|
213
|
+
# Helpers
|
|
214
|
+
# ---------------------------------------------------------------------------
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _ts_ns_to_iso(ts_ns: int) -> str:
|
|
218
|
+
"""Convert a ``Span`` timestamp (nanoseconds) to ISO-8601 string."""
|
|
219
|
+
# OpenTelemetry times are epoch nanoseconds
|
|
220
|
+
dt = datetime.fromtimestamp(ts_ns / 1_000_000_000, tz=UTC)
|
|
221
|
+
return dt.isoformat().replace("+00:00", "Z")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
|
|
225
|
+
"""Convert an OpenTelemetry span to a dict using typed models."""
|
|
226
|
+
|
|
227
|
+
attrs = dict(span.attributes or {})
|
|
228
|
+
|
|
229
|
+
# Extract method name from span name if not in attributes
|
|
230
|
+
# Check both with and without semconv_ai prefix
|
|
231
|
+
raw_method = attrs.get("semconv_ai.mcp.method_name") or attrs.get("mcp.method.name")
|
|
232
|
+
method_name: str | None = None
|
|
233
|
+
if isinstance(raw_method, str):
|
|
234
|
+
method_name = raw_method
|
|
235
|
+
if method_name is None and isinstance(span.name, str) and span.name.endswith(".mcp"):
|
|
236
|
+
method_name = span.name[:-4] # Remove .mcp suffix
|
|
237
|
+
|
|
238
|
+
# Create typed attributes
|
|
239
|
+
typed_attrs = extract_span_attributes(attrs, method_name, str(span.name))
|
|
240
|
+
|
|
241
|
+
# Record span kind as extra attribute (TraceStep allows extras)
|
|
242
|
+
try:
|
|
243
|
+
typed_attrs.span_kind = span.kind.name # type: ignore[attr-defined]
|
|
244
|
+
except Exception:
|
|
245
|
+
logger.warning("Failed to set span kind attribute")
|
|
246
|
+
|
|
247
|
+
# Build typed span
|
|
248
|
+
# Guard context/parent/timestamps
|
|
249
|
+
context = getattr(span, "context", None)
|
|
250
|
+
trace_id_hex = (
|
|
251
|
+
format(context.trace_id, "032x") if context and hasattr(context, "trace_id") else "0" * 32
|
|
252
|
+
)
|
|
253
|
+
span_id_hex = (
|
|
254
|
+
format(context.span_id, "016x") if context and hasattr(context, "span_id") else "0" * 16
|
|
255
|
+
)
|
|
256
|
+
parent = getattr(span, "parent", None)
|
|
257
|
+
parent_id_hex = (
|
|
258
|
+
format(parent.span_id, "016x") if parent and hasattr(parent, "span_id") else None
|
|
259
|
+
)
|
|
260
|
+
start_ns = span.start_time or 0
|
|
261
|
+
end_ns = span.end_time or start_ns
|
|
262
|
+
|
|
263
|
+
typed_span = HudSpan(
|
|
264
|
+
name=span.name,
|
|
265
|
+
trace_id=trace_id_hex,
|
|
266
|
+
span_id=span_id_hex,
|
|
267
|
+
parent_span_id=parent_id_hex,
|
|
268
|
+
start_time=_ts_ns_to_iso(int(start_ns)),
|
|
269
|
+
end_time=_ts_ns_to_iso(int(end_ns)),
|
|
270
|
+
status_code=span.status.status_code.name if span.status else "UNSET",
|
|
271
|
+
status_message=span.status.description if span.status else None,
|
|
272
|
+
attributes=typed_attrs,
|
|
273
|
+
exceptions=None,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Add error information if present
|
|
277
|
+
if span.events:
|
|
278
|
+
exceptions = []
|
|
279
|
+
exceptions = [
|
|
280
|
+
{
|
|
281
|
+
"timestamp": _ts_ns_to_iso(event.timestamp),
|
|
282
|
+
"attributes": dict(event.attributes or {}),
|
|
283
|
+
}
|
|
284
|
+
for event in span.events
|
|
285
|
+
]
|
|
286
|
+
if exceptions:
|
|
287
|
+
typed_span.exceptions = exceptions
|
|
288
|
+
|
|
289
|
+
# Convert to dict for export
|
|
290
|
+
return typed_span.model_dump(mode="json", by_alias=True, exclude_none=True)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
# ---------------------------------------------------------------------------
|
|
294
|
+
# Exporter
|
|
295
|
+
# ---------------------------------------------------------------------------
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
class HudSpanExporter(SpanExporter):
|
|
299
|
+
"""Exporter that forwards spans to HUD backend using existing endpoint."""
|
|
300
|
+
|
|
301
|
+
def __init__(self, *, telemetry_url: str, api_key: str) -> None:
|
|
302
|
+
super().__init__()
|
|
303
|
+
self._telemetry_url = telemetry_url.rstrip("/")
|
|
304
|
+
self._api_key = api_key
|
|
305
|
+
|
|
306
|
+
# ------------------------------------------------------------------
|
|
307
|
+
# Core API
|
|
308
|
+
# ------------------------------------------------------------------
|
|
309
|
+
def export(self, spans: list[ReadableSpan]) -> SpanExportResult: # type: ignore[override]
|
|
310
|
+
if not spans:
|
|
311
|
+
return SpanExportResult.SUCCESS
|
|
312
|
+
|
|
313
|
+
# Group spans by hud.task_run_id attribute
|
|
314
|
+
grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
|
|
315
|
+
for span in spans:
|
|
316
|
+
run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
|
|
317
|
+
if not run_id:
|
|
318
|
+
# Skip spans that are outside HUD traces
|
|
319
|
+
continue
|
|
320
|
+
grouped[str(run_id)].append(span)
|
|
321
|
+
|
|
322
|
+
# Send each group synchronously (retry inside make_request_sync)
|
|
323
|
+
for run_id, span_batch in grouped.items():
|
|
324
|
+
try:
|
|
325
|
+
url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
|
|
326
|
+
telemetry_spans = [_span_to_dict(s) for s in span_batch]
|
|
327
|
+
# Include current step count in metadata
|
|
328
|
+
metadata = {}
|
|
329
|
+
# Get the HIGHEST step count from the batch (most recent)
|
|
330
|
+
step_count = 0
|
|
331
|
+
for span in span_batch:
|
|
332
|
+
if span.attributes and "hud.step_count" in span.attributes:
|
|
333
|
+
current_step = span.attributes["hud.step_count"]
|
|
334
|
+
if isinstance(current_step, int) and current_step > step_count:
|
|
335
|
+
step_count = current_step
|
|
336
|
+
|
|
337
|
+
payload = {
|
|
338
|
+
"metadata": metadata,
|
|
339
|
+
"telemetry": telemetry_spans,
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
# Only include step_count if we found any steps
|
|
343
|
+
if step_count > 0:
|
|
344
|
+
payload["step_count"] = step_count
|
|
345
|
+
|
|
346
|
+
logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
|
|
347
|
+
make_request_sync(
|
|
348
|
+
method="POST",
|
|
349
|
+
url=url,
|
|
350
|
+
json=payload,
|
|
351
|
+
api_key=self._api_key,
|
|
352
|
+
)
|
|
353
|
+
except Exception as exc:
|
|
354
|
+
logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
|
|
355
|
+
# If *any* group fails we return FAILURE so the OTEL SDK can retry
|
|
356
|
+
return SpanExportResult.FAILURE
|
|
357
|
+
|
|
358
|
+
return SpanExportResult.SUCCESS
|
|
359
|
+
|
|
360
|
+
def shutdown(self) -> None: # type: ignore[override]
|
|
361
|
+
# Nothing to cleanup, httpx handled inside make_request_sync
|
|
362
|
+
pass
|
|
363
|
+
|
|
364
|
+
def force_flush(self, timeout_millis: int | None = None) -> bool: # type: ignore[override]
|
|
365
|
+
# Synchronous export, nothing buffered here
|
|
366
|
+
return True
|