hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (130) hide show
  1. hud/__init__.py +22 -22
  2. hud/agents/__init__.py +13 -15
  3. hud/agents/base.py +599 -599
  4. hud/agents/claude.py +373 -373
  5. hud/agents/langchain.py +261 -250
  6. hud/agents/misc/__init__.py +7 -7
  7. hud/agents/misc/response_agent.py +82 -80
  8. hud/agents/openai.py +352 -352
  9. hud/agents/openai_chat_generic.py +154 -154
  10. hud/agents/tests/__init__.py +1 -1
  11. hud/agents/tests/test_base.py +742 -742
  12. hud/agents/tests/test_claude.py +324 -324
  13. hud/agents/tests/test_client.py +363 -363
  14. hud/agents/tests/test_openai.py +237 -237
  15. hud/cli/__init__.py +617 -617
  16. hud/cli/__main__.py +8 -8
  17. hud/cli/analyze.py +371 -371
  18. hud/cli/analyze_metadata.py +230 -230
  19. hud/cli/build.py +498 -427
  20. hud/cli/clone.py +185 -185
  21. hud/cli/cursor.py +92 -92
  22. hud/cli/debug.py +392 -392
  23. hud/cli/docker_utils.py +83 -83
  24. hud/cli/init.py +280 -281
  25. hud/cli/interactive.py +353 -353
  26. hud/cli/mcp_server.py +764 -756
  27. hud/cli/pull.py +330 -336
  28. hud/cli/push.py +404 -370
  29. hud/cli/remote_runner.py +311 -311
  30. hud/cli/runner.py +160 -160
  31. hud/cli/tests/__init__.py +3 -3
  32. hud/cli/tests/test_analyze.py +284 -284
  33. hud/cli/tests/test_cli_init.py +265 -265
  34. hud/cli/tests/test_cli_main.py +27 -27
  35. hud/cli/tests/test_clone.py +142 -142
  36. hud/cli/tests/test_cursor.py +253 -253
  37. hud/cli/tests/test_debug.py +453 -453
  38. hud/cli/tests/test_mcp_server.py +139 -139
  39. hud/cli/tests/test_utils.py +388 -388
  40. hud/cli/utils.py +263 -263
  41. hud/clients/README.md +143 -143
  42. hud/clients/__init__.py +16 -16
  43. hud/clients/base.py +378 -379
  44. hud/clients/fastmcp.py +222 -222
  45. hud/clients/mcp_use.py +298 -278
  46. hud/clients/tests/__init__.py +1 -1
  47. hud/clients/tests/test_client_integration.py +111 -111
  48. hud/clients/tests/test_fastmcp.py +342 -342
  49. hud/clients/tests/test_protocol.py +188 -188
  50. hud/clients/utils/__init__.py +1 -1
  51. hud/clients/utils/retry_transport.py +160 -160
  52. hud/datasets.py +327 -322
  53. hud/misc/__init__.py +1 -1
  54. hud/misc/claude_plays_pokemon.py +292 -292
  55. hud/otel/__init__.py +35 -35
  56. hud/otel/collector.py +142 -142
  57. hud/otel/config.py +164 -164
  58. hud/otel/context.py +536 -536
  59. hud/otel/exporters.py +366 -366
  60. hud/otel/instrumentation.py +97 -97
  61. hud/otel/processors.py +118 -118
  62. hud/otel/tests/__init__.py +1 -1
  63. hud/otel/tests/test_processors.py +197 -197
  64. hud/server/__init__.py +5 -5
  65. hud/server/context.py +114 -114
  66. hud/server/helper/__init__.py +5 -5
  67. hud/server/low_level.py +132 -132
  68. hud/server/server.py +170 -166
  69. hud/server/tests/__init__.py +3 -3
  70. hud/settings.py +73 -73
  71. hud/shared/__init__.py +5 -5
  72. hud/shared/exceptions.py +180 -180
  73. hud/shared/requests.py +264 -264
  74. hud/shared/tests/test_exceptions.py +157 -157
  75. hud/shared/tests/test_requests.py +275 -275
  76. hud/telemetry/__init__.py +25 -25
  77. hud/telemetry/instrument.py +379 -379
  78. hud/telemetry/job.py +309 -309
  79. hud/telemetry/replay.py +74 -74
  80. hud/telemetry/trace.py +83 -83
  81. hud/tools/__init__.py +33 -33
  82. hud/tools/base.py +365 -365
  83. hud/tools/bash.py +161 -161
  84. hud/tools/computer/__init__.py +15 -15
  85. hud/tools/computer/anthropic.py +437 -437
  86. hud/tools/computer/hud.py +376 -376
  87. hud/tools/computer/openai.py +295 -295
  88. hud/tools/computer/settings.py +82 -82
  89. hud/tools/edit.py +314 -314
  90. hud/tools/executors/__init__.py +30 -30
  91. hud/tools/executors/base.py +539 -539
  92. hud/tools/executors/pyautogui.py +621 -621
  93. hud/tools/executors/tests/__init__.py +1 -1
  94. hud/tools/executors/tests/test_base_executor.py +338 -338
  95. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  96. hud/tools/executors/xdo.py +511 -511
  97. hud/tools/playwright.py +412 -412
  98. hud/tools/tests/__init__.py +3 -3
  99. hud/tools/tests/test_base.py +282 -282
  100. hud/tools/tests/test_bash.py +158 -158
  101. hud/tools/tests/test_bash_extended.py +197 -197
  102. hud/tools/tests/test_computer.py +425 -425
  103. hud/tools/tests/test_computer_actions.py +34 -34
  104. hud/tools/tests/test_edit.py +259 -259
  105. hud/tools/tests/test_init.py +27 -27
  106. hud/tools/tests/test_playwright_tool.py +183 -183
  107. hud/tools/tests/test_tools.py +145 -145
  108. hud/tools/tests/test_utils.py +156 -156
  109. hud/tools/types.py +72 -72
  110. hud/tools/utils.py +50 -50
  111. hud/types.py +136 -136
  112. hud/utils/__init__.py +10 -10
  113. hud/utils/async_utils.py +65 -65
  114. hud/utils/design.py +236 -168
  115. hud/utils/mcp.py +55 -55
  116. hud/utils/progress.py +149 -149
  117. hud/utils/telemetry.py +66 -66
  118. hud/utils/tests/test_async_utils.py +173 -173
  119. hud/utils/tests/test_init.py +17 -17
  120. hud/utils/tests/test_progress.py +261 -261
  121. hud/utils/tests/test_telemetry.py +82 -82
  122. hud/utils/tests/test_version.py +8 -8
  123. hud/version.py +7 -7
  124. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
  125. hud_python-0.4.3.dist-info/RECORD +131 -0
  126. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
  127. hud/agents/art.py +0 -101
  128. hud_python-0.4.1.dist-info/RECORD +0 -132
  129. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
  130. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/otel/exporters.py CHANGED
@@ -1,366 +1,366 @@
1
- """Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
2
- HTTP endpoint (/trace/<id>/telemetry-upload).
3
-
4
- The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
5
- exactly the same semantics the old async worker in ``hud.telemetry.exporter``
6
- implemented.
7
-
8
- This exporter is *synchronous* (derives from :class:`SpanExporter`). We rely on
9
- ``hud.shared.make_request_sync`` which already contains retry & auth logic.
10
- """
11
-
12
- from __future__ import annotations
13
-
14
- import contextlib
15
- import json
16
- import logging
17
- from collections import defaultdict
18
- from datetime import UTC, datetime
19
- from typing import TYPE_CHECKING, Any
20
-
21
- from mcp.types import ClientRequest, ServerResult
22
- from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
23
- from pydantic import BaseModel, ConfigDict, Field
24
-
25
- from hud.shared import make_request_sync
26
- from hud.types import TraceStep as HudSpanAttributes
27
-
28
- if TYPE_CHECKING:
29
- from opentelemetry.sdk.trace import ReadableSpan
30
-
31
- logger = logging.getLogger(__name__)
32
-
33
-
34
- # ---------------------------------------------------------------------------
35
- # Models
36
- # ---------------------------------------------------------------------------
37
-
38
-
39
- class HudSpan(BaseModel):
40
- """A telemetry span ready for export."""
41
-
42
- name: str
43
- trace_id: str = Field(pattern=r"^[0-9a-fA-F]{32}$")
44
- span_id: str = Field(pattern=r"^[0-9a-fA-F]{16}$")
45
- parent_span_id: str | None = Field(None, pattern=r"^[0-9a-fA-F]{16}$")
46
-
47
- start_time: str # ISO format
48
- end_time: str # ISO format
49
-
50
- status_code: str # "UNSET", "OK", "ERROR"
51
- status_message: str | None = None
52
-
53
- attributes: HudSpanAttributes
54
- exceptions: list[dict[str, Any]] | None = None
55
-
56
- model_config = ConfigDict(extra="forbid")
57
-
58
-
59
- def extract_span_attributes(
60
- attrs: dict[str, Any], method_name: str | None = None, span_name: str | None = None
61
- ) -> HudSpanAttributes:
62
- """Extract and parse span attributes into typed model.
63
-
64
- This handles:
65
- - Detecting span type (MCP vs Agent)
66
- - Renaming verbose OpenTelemetry semantic conventions
67
- - Parsing JSON strings to MCP types
68
- """
69
- # Start with core attributes - map to TraceStep field names
70
- result_attrs = {
71
- "task_run_id": attrs.get(
72
- "hud.task_run_id"
73
- ), # TraceStep expects task_run_id, not hud.task_run_id
74
- "job_id": attrs.get("hud.job_id"), # TraceStep expects job_id, not hud.job_id
75
- "type": attrs.get("span.kind", "CLIENT"), # TraceStep expects type, not span.kind
76
- }
77
-
78
- # Determine span type based on presence of agent or MCP attributes
79
- # Note: The input attrs might already have "category" set
80
- existing_category = attrs.get("category")
81
-
82
- if existing_category:
83
- # Use the explicit category if provided
84
- result_attrs["category"] = existing_category
85
- elif span_name and span_name.startswith("agent."):
86
- # Legacy support for spans named "agent.*"
87
- result_attrs["category"] = "agent"
88
- else:
89
- result_attrs["category"] = "mcp" # Default to MCP
90
-
91
- # No special processing needed for different categories
92
- # The backend will handle them based on the category field
93
-
94
- # Add method_name and request_id for MCP spans
95
- if result_attrs["category"] == "mcp":
96
- if method_name:
97
- result_attrs["method_name"] = method_name
98
- # Check for request_id with and without semconv_ai prefix
99
- request_id = attrs.get("semconv_ai.mcp.request_id") or attrs.get("mcp.request.id")
100
- if request_id:
101
- result_attrs["request_id"] = request_id
102
-
103
- # Parse input/output - check both with and without semconv_ai prefix
104
- input_str = attrs.get("semconv_ai.traceloop.entity.input") or attrs.get(
105
- "traceloop.entity.input"
106
- )
107
- output_str = attrs.get("semconv_ai.traceloop.entity.output") or attrs.get(
108
- "traceloop.entity.output"
109
- )
110
-
111
- logger.debug(
112
- "Category: %s, has input: %s, has output: %s",
113
- result_attrs.get("category"),
114
- bool(input_str),
115
- bool(output_str),
116
- )
117
-
118
- # Check for direct request/result attributes first
119
- if "request" in attrs and not result_attrs.get("request"):
120
- req = attrs["request"]
121
- if isinstance(req, str):
122
- with contextlib.suppress(json.JSONDecodeError):
123
- req = json.loads(req)
124
- result_attrs["request"] = req
125
-
126
- if "result" in attrs and not result_attrs.get("result"):
127
- res = attrs["result"]
128
- if isinstance(res, str):
129
- with contextlib.suppress(json.JSONDecodeError):
130
- res = json.loads(res)
131
- result_attrs["result"] = res
132
-
133
- # Process input/output from MCP instrumentation
134
- if input_str and not result_attrs.get("request"):
135
- try:
136
- input_data = json.loads(input_str) if isinstance(input_str, str) else input_str
137
-
138
- # For MCP category, try to parse as ClientRequest to extract the root
139
- if result_attrs["category"] == "mcp" and isinstance(input_data, dict):
140
- try:
141
- if "method" in input_data and "params" in input_data:
142
- client_request = ClientRequest.model_validate(input_data)
143
- result_attrs["request"] = client_request.root
144
- else:
145
- result_attrs["request"] = input_data
146
- except Exception:
147
- result_attrs["request"] = input_data
148
- else:
149
- # For all other categories, just store the data
150
- result_attrs["request"] = input_data
151
- except Exception as e:
152
- logger.debug("Failed to parse request JSON: %s", e)
153
-
154
- if output_str and not result_attrs.get("result"):
155
- try:
156
- output_data = json.loads(output_str) if isinstance(output_str, str) else output_str
157
-
158
- # For MCP category, try to parse as ServerResult to extract the root
159
- if result_attrs["category"] == "mcp" and isinstance(output_data, dict):
160
- # Check for error
161
- if "error" in output_data:
162
- result_attrs["mcp_error"] = True
163
- try:
164
- server_result = ServerResult.model_validate(output_data)
165
- result_attrs["result"] = server_result.root
166
- # Check for isError in the result
167
- if getattr(server_result.root, "isError", False):
168
- result_attrs["mcp_error"] = True
169
- except Exception:
170
- result_attrs["result"] = output_data
171
- else:
172
- # For all other categories, just store the data
173
- result_attrs["result"] = output_data
174
- except Exception as e:
175
- logger.debug("Failed to parse result JSON: %s", e)
176
-
177
- # Don't include the verbose attributes or ones we've already processed
178
- exclude_keys = {
179
- "hud.task_run_id",
180
- "hud.job_id",
181
- "span.kind",
182
- "semconv_ai.mcp.method_name",
183
- "mcp.method.name", # Also exclude non-prefixed version
184
- "semconv_ai.mcp.request_id",
185
- "mcp.request.id", # Also exclude non-prefixed version
186
- "semconv_ai.traceloop.entity.input",
187
- "semconv_ai.traceloop.entity.output",
188
- "traceloop.entity.input", # Also exclude non-prefixed versions
189
- "traceloop.entity.output",
190
- "mcp_request", # Exclude to prevent overwriting parsed values
191
- "mcp_result", # Exclude to prevent overwriting parsed values
192
- "request", # Exclude to prevent overwriting parsed values
193
- "result", # Exclude to prevent overwriting parsed values
194
- "category", # Already handled above
195
- }
196
-
197
- # Add any extra attributes
198
- for key, value in attrs.items():
199
- if key not in exclude_keys:
200
- result_attrs[key] = value # noqa: PERF403
201
-
202
- logger.debug(
203
- """Final result_attrs before creating HudSpanAttributes:
204
- request=%s,
205
- result=%s""",
206
- result_attrs.get("request"),
207
- result_attrs.get("result"),
208
- )
209
- return HudSpanAttributes(**result_attrs)
210
-
211
-
212
- # ---------------------------------------------------------------------------
213
- # Helpers
214
- # ---------------------------------------------------------------------------
215
-
216
-
217
- def _ts_ns_to_iso(ts_ns: int) -> str:
218
- """Convert a ``Span`` timestamp (nanoseconds) to ISO-8601 string."""
219
- # OpenTelemetry times are epoch nanoseconds
220
- dt = datetime.fromtimestamp(ts_ns / 1_000_000_000, tz=UTC)
221
- return dt.isoformat().replace("+00:00", "Z")
222
-
223
-
224
- def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
225
- """Convert an OpenTelemetry span to a dict using typed models."""
226
-
227
- attrs = dict(span.attributes or {})
228
-
229
- # Extract method name from span name if not in attributes
230
- # Check both with and without semconv_ai prefix
231
- raw_method = attrs.get("semconv_ai.mcp.method_name") or attrs.get("mcp.method.name")
232
- method_name: str | None = None
233
- if isinstance(raw_method, str):
234
- method_name = raw_method
235
- if method_name is None and isinstance(span.name, str) and span.name.endswith(".mcp"):
236
- method_name = span.name[:-4] # Remove .mcp suffix
237
-
238
- # Create typed attributes
239
- typed_attrs = extract_span_attributes(attrs, method_name, str(span.name))
240
-
241
- # Record span kind as extra attribute (TraceStep allows extras)
242
- try:
243
- typed_attrs.span_kind = span.kind.name # type: ignore[attr-defined]
244
- except Exception:
245
- logger.warning("Failed to set span kind attribute")
246
-
247
- # Build typed span
248
- # Guard context/parent/timestamps
249
- context = getattr(span, "context", None)
250
- trace_id_hex = (
251
- format(context.trace_id, "032x") if context and hasattr(context, "trace_id") else "0" * 32
252
- )
253
- span_id_hex = (
254
- format(context.span_id, "016x") if context and hasattr(context, "span_id") else "0" * 16
255
- )
256
- parent = getattr(span, "parent", None)
257
- parent_id_hex = (
258
- format(parent.span_id, "016x") if parent and hasattr(parent, "span_id") else None
259
- )
260
- start_ns = span.start_time or 0
261
- end_ns = span.end_time or start_ns
262
-
263
- typed_span = HudSpan(
264
- name=span.name,
265
- trace_id=trace_id_hex,
266
- span_id=span_id_hex,
267
- parent_span_id=parent_id_hex,
268
- start_time=_ts_ns_to_iso(int(start_ns)),
269
- end_time=_ts_ns_to_iso(int(end_ns)),
270
- status_code=span.status.status_code.name if span.status else "UNSET",
271
- status_message=span.status.description if span.status else None,
272
- attributes=typed_attrs,
273
- exceptions=None,
274
- )
275
-
276
- # Add error information if present
277
- if span.events:
278
- exceptions = []
279
- exceptions = [
280
- {
281
- "timestamp": _ts_ns_to_iso(event.timestamp),
282
- "attributes": dict(event.attributes or {}),
283
- }
284
- for event in span.events
285
- ]
286
- if exceptions:
287
- typed_span.exceptions = exceptions
288
-
289
- # Convert to dict for export
290
- return typed_span.model_dump(mode="json", by_alias=True, exclude_none=True)
291
-
292
-
293
- # ---------------------------------------------------------------------------
294
- # Exporter
295
- # ---------------------------------------------------------------------------
296
-
297
-
298
- class HudSpanExporter(SpanExporter):
299
- """Exporter that forwards spans to HUD backend using existing endpoint."""
300
-
301
- def __init__(self, *, telemetry_url: str, api_key: str) -> None:
302
- super().__init__()
303
- self._telemetry_url = telemetry_url.rstrip("/")
304
- self._api_key = api_key
305
-
306
- # ------------------------------------------------------------------
307
- # Core API
308
- # ------------------------------------------------------------------
309
- def export(self, spans: list[ReadableSpan]) -> SpanExportResult: # type: ignore[override]
310
- if not spans:
311
- return SpanExportResult.SUCCESS
312
-
313
- # Group spans by hud.task_run_id attribute
314
- grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
315
- for span in spans:
316
- run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
317
- if not run_id:
318
- # Skip spans that are outside HUD traces
319
- continue
320
- grouped[str(run_id)].append(span)
321
-
322
- # Send each group synchronously (retry inside make_request_sync)
323
- for run_id, span_batch in grouped.items():
324
- try:
325
- url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
326
- telemetry_spans = [_span_to_dict(s) for s in span_batch]
327
- # Include current step count in metadata
328
- metadata = {}
329
- # Get the HIGHEST step count from the batch (most recent)
330
- step_count = 0
331
- for span in span_batch:
332
- if span.attributes and "hud.step_count" in span.attributes:
333
- current_step = span.attributes["hud.step_count"]
334
- if isinstance(current_step, int) and current_step > step_count:
335
- step_count = current_step
336
-
337
- payload = {
338
- "metadata": metadata,
339
- "telemetry": telemetry_spans,
340
- }
341
-
342
- # Only include step_count if we found any steps
343
- if step_count > 0:
344
- payload["step_count"] = step_count
345
-
346
- logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
347
- make_request_sync(
348
- method="POST",
349
- url=url,
350
- json=payload,
351
- api_key=self._api_key,
352
- )
353
- except Exception as exc:
354
- logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
355
- # If *any* group fails we return FAILURE so the OTEL SDK can retry
356
- return SpanExportResult.FAILURE
357
-
358
- return SpanExportResult.SUCCESS
359
-
360
- def shutdown(self) -> None: # type: ignore[override]
361
- # Nothing to cleanup, httpx handled inside make_request_sync
362
- pass
363
-
364
- def force_flush(self, timeout_millis: int | None = None) -> bool: # type: ignore[override]
365
- # Synchronous export, nothing buffered here
366
- return True
1
+ """Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
2
+ HTTP endpoint (/trace/<id>/telemetry-upload).
3
+
4
+ The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
5
+ exactly the same semantics the old async worker in ``hud.telemetry.exporter``
6
+ implemented.
7
+
8
+ This exporter is *synchronous* (derives from :class:`SpanExporter`). We rely on
9
+ ``hud.shared.make_request_sync`` which already contains retry & auth logic.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import contextlib
15
+ import json
16
+ import logging
17
+ from collections import defaultdict
18
+ from datetime import UTC, datetime
19
+ from typing import TYPE_CHECKING, Any
20
+
21
+ from mcp.types import ClientRequest, ServerResult
22
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
23
+ from pydantic import BaseModel, ConfigDict, Field
24
+
25
+ from hud.shared import make_request_sync
26
+ from hud.types import TraceStep as HudSpanAttributes
27
+
28
+ if TYPE_CHECKING:
29
+ from opentelemetry.sdk.trace import ReadableSpan
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Models
36
+ # ---------------------------------------------------------------------------
37
+
38
+
39
+ class HudSpan(BaseModel):
40
+ """A telemetry span ready for export."""
41
+
42
+ name: str
43
+ trace_id: str = Field(pattern=r"^[0-9a-fA-F]{32}$")
44
+ span_id: str = Field(pattern=r"^[0-9a-fA-F]{16}$")
45
+ parent_span_id: str | None = Field(None, pattern=r"^[0-9a-fA-F]{16}$")
46
+
47
+ start_time: str # ISO format
48
+ end_time: str # ISO format
49
+
50
+ status_code: str # "UNSET", "OK", "ERROR"
51
+ status_message: str | None = None
52
+
53
+ attributes: HudSpanAttributes
54
+ exceptions: list[dict[str, Any]] | None = None
55
+
56
+ model_config = ConfigDict(extra="forbid")
57
+
58
+
59
+ def extract_span_attributes(
60
+ attrs: dict[str, Any], method_name: str | None = None, span_name: str | None = None
61
+ ) -> HudSpanAttributes:
62
+ """Extract and parse span attributes into typed model.
63
+
64
+ This handles:
65
+ - Detecting span type (MCP vs Agent)
66
+ - Renaming verbose OpenTelemetry semantic conventions
67
+ - Parsing JSON strings to MCP types
68
+ """
69
+ # Start with core attributes - map to TraceStep field names
70
+ result_attrs = {
71
+ "task_run_id": attrs.get(
72
+ "hud.task_run_id"
73
+ ), # TraceStep expects task_run_id, not hud.task_run_id
74
+ "job_id": attrs.get("hud.job_id"), # TraceStep expects job_id, not hud.job_id
75
+ "type": attrs.get("span.kind", "CLIENT"), # TraceStep expects type, not span.kind
76
+ }
77
+
78
+ # Determine span type based on presence of agent or MCP attributes
79
+ # Note: The input attrs might already have "category" set
80
+ existing_category = attrs.get("category")
81
+
82
+ if existing_category:
83
+ # Use the explicit category if provided
84
+ result_attrs["category"] = existing_category
85
+ elif span_name and span_name.startswith("agent."):
86
+ # Legacy support for spans named "agent.*"
87
+ result_attrs["category"] = "agent"
88
+ else:
89
+ result_attrs["category"] = "mcp" # Default to MCP
90
+
91
+ # No special processing needed for different categories
92
+ # The backend will handle them based on the category field
93
+
94
+ # Add method_name and request_id for MCP spans
95
+ if result_attrs["category"] == "mcp":
96
+ if method_name:
97
+ result_attrs["method_name"] = method_name
98
+ # Check for request_id with and without semconv_ai prefix
99
+ request_id = attrs.get("semconv_ai.mcp.request_id") or attrs.get("mcp.request.id")
100
+ if request_id:
101
+ result_attrs["request_id"] = request_id
102
+
103
+ # Parse input/output - check both with and without semconv_ai prefix
104
+ input_str = attrs.get("semconv_ai.traceloop.entity.input") or attrs.get(
105
+ "traceloop.entity.input"
106
+ )
107
+ output_str = attrs.get("semconv_ai.traceloop.entity.output") or attrs.get(
108
+ "traceloop.entity.output"
109
+ )
110
+
111
+ logger.debug(
112
+ "Category: %s, has input: %s, has output: %s",
113
+ result_attrs.get("category"),
114
+ bool(input_str),
115
+ bool(output_str),
116
+ )
117
+
118
+ # Check for direct request/result attributes first
119
+ if "request" in attrs and not result_attrs.get("request"):
120
+ req = attrs["request"]
121
+ if isinstance(req, str):
122
+ with contextlib.suppress(json.JSONDecodeError):
123
+ req = json.loads(req)
124
+ result_attrs["request"] = req
125
+
126
+ if "result" in attrs and not result_attrs.get("result"):
127
+ res = attrs["result"]
128
+ if isinstance(res, str):
129
+ with contextlib.suppress(json.JSONDecodeError):
130
+ res = json.loads(res)
131
+ result_attrs["result"] = res
132
+
133
+ # Process input/output from MCP instrumentation
134
+ if input_str and not result_attrs.get("request"):
135
+ try:
136
+ input_data = json.loads(input_str) if isinstance(input_str, str) else input_str
137
+
138
+ # For MCP category, try to parse as ClientRequest to extract the root
139
+ if result_attrs["category"] == "mcp" and isinstance(input_data, dict):
140
+ try:
141
+ if "method" in input_data and "params" in input_data:
142
+ client_request = ClientRequest.model_validate(input_data)
143
+ result_attrs["request"] = client_request.root
144
+ else:
145
+ result_attrs["request"] = input_data
146
+ except Exception:
147
+ result_attrs["request"] = input_data
148
+ else:
149
+ # For all other categories, just store the data
150
+ result_attrs["request"] = input_data
151
+ except Exception as e:
152
+ logger.debug("Failed to parse request JSON: %s", e)
153
+
154
+ if output_str and not result_attrs.get("result"):
155
+ try:
156
+ output_data = json.loads(output_str) if isinstance(output_str, str) else output_str
157
+
158
+ # For MCP category, try to parse as ServerResult to extract the root
159
+ if result_attrs["category"] == "mcp" and isinstance(output_data, dict):
160
+ # Check for error
161
+ if "error" in output_data:
162
+ result_attrs["mcp_error"] = True
163
+ try:
164
+ server_result = ServerResult.model_validate(output_data)
165
+ result_attrs["result"] = server_result.root
166
+ # Check for isError in the result
167
+ if getattr(server_result.root, "isError", False):
168
+ result_attrs["mcp_error"] = True
169
+ except Exception:
170
+ result_attrs["result"] = output_data
171
+ else:
172
+ # For all other categories, just store the data
173
+ result_attrs["result"] = output_data
174
+ except Exception as e:
175
+ logger.debug("Failed to parse result JSON: %s", e)
176
+
177
+ # Don't include the verbose attributes or ones we've already processed
178
+ exclude_keys = {
179
+ "hud.task_run_id",
180
+ "hud.job_id",
181
+ "span.kind",
182
+ "semconv_ai.mcp.method_name",
183
+ "mcp.method.name", # Also exclude non-prefixed version
184
+ "semconv_ai.mcp.request_id",
185
+ "mcp.request.id", # Also exclude non-prefixed version
186
+ "semconv_ai.traceloop.entity.input",
187
+ "semconv_ai.traceloop.entity.output",
188
+ "traceloop.entity.input", # Also exclude non-prefixed versions
189
+ "traceloop.entity.output",
190
+ "mcp_request", # Exclude to prevent overwriting parsed values
191
+ "mcp_result", # Exclude to prevent overwriting parsed values
192
+ "request", # Exclude to prevent overwriting parsed values
193
+ "result", # Exclude to prevent overwriting parsed values
194
+ "category", # Already handled above
195
+ }
196
+
197
+ # Add any extra attributes
198
+ for key, value in attrs.items():
199
+ if key not in exclude_keys:
200
+ result_attrs[key] = value # noqa: PERF403
201
+
202
+ logger.debug(
203
+ """Final result_attrs before creating HudSpanAttributes:
204
+ request=%s,
205
+ result=%s""",
206
+ result_attrs.get("request"),
207
+ result_attrs.get("result"),
208
+ )
209
+ return HudSpanAttributes(**result_attrs)
210
+
211
+
212
+ # ---------------------------------------------------------------------------
213
+ # Helpers
214
+ # ---------------------------------------------------------------------------
215
+
216
+
217
+ def _ts_ns_to_iso(ts_ns: int) -> str:
218
+ """Convert a ``Span`` timestamp (nanoseconds) to ISO-8601 string."""
219
+ # OpenTelemetry times are epoch nanoseconds
220
+ dt = datetime.fromtimestamp(ts_ns / 1_000_000_000, tz=UTC)
221
+ return dt.isoformat().replace("+00:00", "Z")
222
+
223
+
224
+ def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
225
+ """Convert an OpenTelemetry span to a dict using typed models."""
226
+
227
+ attrs = dict(span.attributes or {})
228
+
229
+ # Extract method name from span name if not in attributes
230
+ # Check both with and without semconv_ai prefix
231
+ raw_method = attrs.get("semconv_ai.mcp.method_name") or attrs.get("mcp.method.name")
232
+ method_name: str | None = None
233
+ if isinstance(raw_method, str):
234
+ method_name = raw_method
235
+ if method_name is None and isinstance(span.name, str) and span.name.endswith(".mcp"):
236
+ method_name = span.name[:-4] # Remove .mcp suffix
237
+
238
+ # Create typed attributes
239
+ typed_attrs = extract_span_attributes(attrs, method_name, str(span.name))
240
+
241
+ # Record span kind as extra attribute (TraceStep allows extras)
242
+ try:
243
+ typed_attrs.span_kind = span.kind.name # type: ignore[attr-defined]
244
+ except Exception:
245
+ logger.warning("Failed to set span kind attribute")
246
+
247
+ # Build typed span
248
+ # Guard context/parent/timestamps
249
+ context = getattr(span, "context", None)
250
+ trace_id_hex = (
251
+ format(context.trace_id, "032x") if context and hasattr(context, "trace_id") else "0" * 32
252
+ )
253
+ span_id_hex = (
254
+ format(context.span_id, "016x") if context and hasattr(context, "span_id") else "0" * 16
255
+ )
256
+ parent = getattr(span, "parent", None)
257
+ parent_id_hex = (
258
+ format(parent.span_id, "016x") if parent and hasattr(parent, "span_id") else None
259
+ )
260
+ start_ns = span.start_time or 0
261
+ end_ns = span.end_time or start_ns
262
+
263
+ typed_span = HudSpan(
264
+ name=span.name,
265
+ trace_id=trace_id_hex,
266
+ span_id=span_id_hex,
267
+ parent_span_id=parent_id_hex,
268
+ start_time=_ts_ns_to_iso(int(start_ns)),
269
+ end_time=_ts_ns_to_iso(int(end_ns)),
270
+ status_code=span.status.status_code.name if span.status else "UNSET",
271
+ status_message=span.status.description if span.status else None,
272
+ attributes=typed_attrs,
273
+ exceptions=None,
274
+ )
275
+
276
+ # Add error information if present
277
+ if span.events:
278
+ exceptions = []
279
+ exceptions = [
280
+ {
281
+ "timestamp": _ts_ns_to_iso(event.timestamp),
282
+ "attributes": dict(event.attributes or {}),
283
+ }
284
+ for event in span.events
285
+ ]
286
+ if exceptions:
287
+ typed_span.exceptions = exceptions
288
+
289
+ # Convert to dict for export
290
+ return typed_span.model_dump(mode="json", by_alias=True, exclude_none=True)
291
+
292
+
293
+ # ---------------------------------------------------------------------------
294
+ # Exporter
295
+ # ---------------------------------------------------------------------------
296
+
297
+
298
+ class HudSpanExporter(SpanExporter):
299
+ """Exporter that forwards spans to HUD backend using existing endpoint."""
300
+
301
+ def __init__(self, *, telemetry_url: str, api_key: str) -> None:
302
+ super().__init__()
303
+ self._telemetry_url = telemetry_url.rstrip("/")
304
+ self._api_key = api_key
305
+
306
+ # ------------------------------------------------------------------
307
+ # Core API
308
+ # ------------------------------------------------------------------
309
+ def export(self, spans: list[ReadableSpan]) -> SpanExportResult: # type: ignore[override]
310
+ if not spans:
311
+ return SpanExportResult.SUCCESS
312
+
313
+ # Group spans by hud.task_run_id attribute
314
+ grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
315
+ for span in spans:
316
+ run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
317
+ if not run_id:
318
+ # Skip spans that are outside HUD traces
319
+ continue
320
+ grouped[str(run_id)].append(span)
321
+
322
+ # Send each group synchronously (retry inside make_request_sync)
323
+ for run_id, span_batch in grouped.items():
324
+ try:
325
+ url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
326
+ telemetry_spans = [_span_to_dict(s) for s in span_batch]
327
+ # Include current step count in metadata
328
+ metadata = {}
329
+ # Get the HIGHEST step count from the batch (most recent)
330
+ step_count = 0
331
+ for span in span_batch:
332
+ if span.attributes and "hud.step_count" in span.attributes:
333
+ current_step = span.attributes["hud.step_count"]
334
+ if isinstance(current_step, int) and current_step > step_count:
335
+ step_count = current_step
336
+
337
+ payload = {
338
+ "metadata": metadata,
339
+ "telemetry": telemetry_spans,
340
+ }
341
+
342
+ # Only include step_count if we found any steps
343
+ if step_count > 0:
344
+ payload["step_count"] = step_count
345
+
346
+ logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
347
+ make_request_sync(
348
+ method="POST",
349
+ url=url,
350
+ json=payload,
351
+ api_key=self._api_key,
352
+ )
353
+ except Exception as exc:
354
+ logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
355
+ # If *any* group fails we return FAILURE so the OTEL SDK can retry
356
+ return SpanExportResult.FAILURE
357
+
358
+ return SpanExportResult.SUCCESS
359
+
360
+ def shutdown(self) -> None: # type: ignore[override]
361
+ # Nothing to cleanup, httpx handled inside make_request_sync
362
+ pass
363
+
364
+ def force_flush(self, timeout_millis: int | None = None) -> bool: # type: ignore[override]
365
+ # Synchronous export, nothing buffered here
366
+ return True