hud-python 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -414
  87. hud/tools/computer/hud.py +376 -328
  88. hud/tools/computer/openai.py +295 -286
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.4.dist-info/METADATA +0 -284
  190. hud_python-0.3.4.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
hud/otel/exporters.py ADDED
@@ -0,0 +1,366 @@
1
+ """Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
2
+ HTTP endpoint (/trace/<id>/telemetry-upload).
3
+
4
+ The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
5
+ exactly the same semantics the old async worker in ``hud.telemetry.exporter``
6
+ implemented.
7
+
8
+ This exporter is *synchronous* (derives from :class:`SpanExporter`). We rely on
9
+ ``hud.shared.make_request_sync`` which already contains retry & auth logic.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import contextlib
15
+ import json
16
+ import logging
17
+ from collections import defaultdict
18
+ from datetime import UTC, datetime
19
+ from typing import TYPE_CHECKING, Any
20
+
21
+ from mcp.types import ClientRequest, ServerResult
22
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
23
+ from pydantic import BaseModel, ConfigDict, Field
24
+
25
+ from hud.shared import make_request_sync
26
+ from hud.types import TraceStep as HudSpanAttributes
27
+
28
+ if TYPE_CHECKING:
29
+ from opentelemetry.sdk.trace import ReadableSpan
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Models
36
+ # ---------------------------------------------------------------------------
37
+
38
+
39
+ class HudSpan(BaseModel):
40
+ """A telemetry span ready for export."""
41
+
42
+ name: str
43
+ trace_id: str = Field(pattern=r"^[0-9a-fA-F]{32}$")
44
+ span_id: str = Field(pattern=r"^[0-9a-fA-F]{16}$")
45
+ parent_span_id: str | None = Field(None, pattern=r"^[0-9a-fA-F]{16}$")
46
+
47
+ start_time: str # ISO format
48
+ end_time: str # ISO format
49
+
50
+ status_code: str # "UNSET", "OK", "ERROR"
51
+ status_message: str | None = None
52
+
53
+ attributes: HudSpanAttributes
54
+ exceptions: list[dict[str, Any]] | None = None
55
+
56
+ model_config = ConfigDict(extra="forbid")
57
+
58
+
59
+ def extract_span_attributes(
60
+ attrs: dict[str, Any], method_name: str | None = None, span_name: str | None = None
61
+ ) -> HudSpanAttributes:
62
+ """Extract and parse span attributes into typed model.
63
+
64
+ This handles:
65
+ - Detecting span type (MCP vs Agent)
66
+ - Renaming verbose OpenTelemetry semantic conventions
67
+ - Parsing JSON strings to MCP types
68
+ """
69
+ # Start with core attributes - map to TraceStep field names
70
+ result_attrs = {
71
+ "task_run_id": attrs.get(
72
+ "hud.task_run_id"
73
+ ), # TraceStep expects task_run_id, not hud.task_run_id
74
+ "job_id": attrs.get("hud.job_id"), # TraceStep expects job_id, not hud.job_id
75
+ "type": attrs.get("span.kind", "CLIENT"), # TraceStep expects type, not span.kind
76
+ }
77
+
78
+ # Determine span type based on presence of agent or MCP attributes
79
+ # Note: The input attrs might already have "category" set
80
+ existing_category = attrs.get("category")
81
+
82
+ if existing_category:
83
+ # Use the explicit category if provided
84
+ result_attrs["category"] = existing_category
85
+ elif span_name and span_name.startswith("agent."):
86
+ # Legacy support for spans named "agent.*"
87
+ result_attrs["category"] = "agent"
88
+ else:
89
+ result_attrs["category"] = "mcp" # Default to MCP
90
+
91
+ # No special processing needed for different categories
92
+ # The backend will handle them based on the category field
93
+
94
+ # Add method_name and request_id for MCP spans
95
+ if result_attrs["category"] == "mcp":
96
+ if method_name:
97
+ result_attrs["method_name"] = method_name
98
+ # Check for request_id with and without semconv_ai prefix
99
+ request_id = attrs.get("semconv_ai.mcp.request_id") or attrs.get("mcp.request.id")
100
+ if request_id:
101
+ result_attrs["request_id"] = request_id
102
+
103
+ # Parse input/output - check both with and without semconv_ai prefix
104
+ input_str = attrs.get("semconv_ai.traceloop.entity.input") or attrs.get(
105
+ "traceloop.entity.input"
106
+ )
107
+ output_str = attrs.get("semconv_ai.traceloop.entity.output") or attrs.get(
108
+ "traceloop.entity.output"
109
+ )
110
+
111
+ logger.debug(
112
+ "Category: %s, has input: %s, has output: %s",
113
+ result_attrs.get("category"),
114
+ bool(input_str),
115
+ bool(output_str),
116
+ )
117
+
118
+ # Check for direct request/result attributes first
119
+ if "request" in attrs and not result_attrs.get("request"):
120
+ req = attrs["request"]
121
+ if isinstance(req, str):
122
+ with contextlib.suppress(json.JSONDecodeError):
123
+ req = json.loads(req)
124
+ result_attrs["request"] = req
125
+
126
+ if "result" in attrs and not result_attrs.get("result"):
127
+ res = attrs["result"]
128
+ if isinstance(res, str):
129
+ with contextlib.suppress(json.JSONDecodeError):
130
+ res = json.loads(res)
131
+ result_attrs["result"] = res
132
+
133
+ # Process input/output from MCP instrumentation
134
+ if input_str and not result_attrs.get("request"):
135
+ try:
136
+ input_data = json.loads(input_str) if isinstance(input_str, str) else input_str
137
+
138
+ # For MCP category, try to parse as ClientRequest to extract the root
139
+ if result_attrs["category"] == "mcp" and isinstance(input_data, dict):
140
+ try:
141
+ if "method" in input_data and "params" in input_data:
142
+ client_request = ClientRequest.model_validate(input_data)
143
+ result_attrs["request"] = client_request.root
144
+ else:
145
+ result_attrs["request"] = input_data
146
+ except Exception:
147
+ result_attrs["request"] = input_data
148
+ else:
149
+ # For all other categories, just store the data
150
+ result_attrs["request"] = input_data
151
+ except Exception as e:
152
+ logger.debug("Failed to parse request JSON: %s", e)
153
+
154
+ if output_str and not result_attrs.get("result"):
155
+ try:
156
+ output_data = json.loads(output_str) if isinstance(output_str, str) else output_str
157
+
158
+ # For MCP category, try to parse as ServerResult to extract the root
159
+ if result_attrs["category"] == "mcp" and isinstance(output_data, dict):
160
+ # Check for error
161
+ if "error" in output_data:
162
+ result_attrs["mcp_error"] = True
163
+ try:
164
+ server_result = ServerResult.model_validate(output_data)
165
+ result_attrs["result"] = server_result.root
166
+ # Check for isError in the result
167
+ if getattr(server_result.root, "isError", False):
168
+ result_attrs["mcp_error"] = True
169
+ except Exception:
170
+ result_attrs["result"] = output_data
171
+ else:
172
+ # For all other categories, just store the data
173
+ result_attrs["result"] = output_data
174
+ except Exception as e:
175
+ logger.debug("Failed to parse result JSON: %s", e)
176
+
177
+ # Don't include the verbose attributes or ones we've already processed
178
+ exclude_keys = {
179
+ "hud.task_run_id",
180
+ "hud.job_id",
181
+ "span.kind",
182
+ "semconv_ai.mcp.method_name",
183
+ "mcp.method.name", # Also exclude non-prefixed version
184
+ "semconv_ai.mcp.request_id",
185
+ "mcp.request.id", # Also exclude non-prefixed version
186
+ "semconv_ai.traceloop.entity.input",
187
+ "semconv_ai.traceloop.entity.output",
188
+ "traceloop.entity.input", # Also exclude non-prefixed versions
189
+ "traceloop.entity.output",
190
+ "mcp_request", # Exclude to prevent overwriting parsed values
191
+ "mcp_result", # Exclude to prevent overwriting parsed values
192
+ "request", # Exclude to prevent overwriting parsed values
193
+ "result", # Exclude to prevent overwriting parsed values
194
+ "category", # Already handled above
195
+ }
196
+
197
+ # Add any extra attributes
198
+ for key, value in attrs.items():
199
+ if key not in exclude_keys:
200
+ result_attrs[key] = value # noqa: PERF403
201
+
202
+ logger.debug(
203
+ """Final result_attrs before creating HudSpanAttributes:
204
+ request=%s,
205
+ result=%s""",
206
+ result_attrs.get("request"),
207
+ result_attrs.get("result"),
208
+ )
209
+ return HudSpanAttributes(**result_attrs)
210
+
211
+
212
+ # ---------------------------------------------------------------------------
213
+ # Helpers
214
+ # ---------------------------------------------------------------------------
215
+
216
+
217
+ def _ts_ns_to_iso(ts_ns: int) -> str:
218
+ """Convert a ``Span`` timestamp (nanoseconds) to ISO-8601 string."""
219
+ # OpenTelemetry times are epoch nanoseconds
220
+ dt = datetime.fromtimestamp(ts_ns / 1_000_000_000, tz=UTC)
221
+ return dt.isoformat().replace("+00:00", "Z")
222
+
223
+
224
+ def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
225
+ """Convert an OpenTelemetry span to a dict using typed models."""
226
+
227
+ attrs = dict(span.attributes or {})
228
+
229
+ # Extract method name from span name if not in attributes
230
+ # Check both with and without semconv_ai prefix
231
+ raw_method = attrs.get("semconv_ai.mcp.method_name") or attrs.get("mcp.method.name")
232
+ method_name: str | None = None
233
+ if isinstance(raw_method, str):
234
+ method_name = raw_method
235
+ if method_name is None and isinstance(span.name, str) and span.name.endswith(".mcp"):
236
+ method_name = span.name[:-4] # Remove .mcp suffix
237
+
238
+ # Create typed attributes
239
+ typed_attrs = extract_span_attributes(attrs, method_name, str(span.name))
240
+
241
+ # Record span kind as extra attribute (TraceStep allows extras)
242
+ try:
243
+ typed_attrs.span_kind = span.kind.name # type: ignore[attr-defined]
244
+ except Exception:
245
+ logger.warning("Failed to set span kind attribute")
246
+
247
+ # Build typed span
248
+ # Guard context/parent/timestamps
249
+ context = getattr(span, "context", None)
250
+ trace_id_hex = (
251
+ format(context.trace_id, "032x") if context and hasattr(context, "trace_id") else "0" * 32
252
+ )
253
+ span_id_hex = (
254
+ format(context.span_id, "016x") if context and hasattr(context, "span_id") else "0" * 16
255
+ )
256
+ parent = getattr(span, "parent", None)
257
+ parent_id_hex = (
258
+ format(parent.span_id, "016x") if parent and hasattr(parent, "span_id") else None
259
+ )
260
+ start_ns = span.start_time or 0
261
+ end_ns = span.end_time or start_ns
262
+
263
+ typed_span = HudSpan(
264
+ name=span.name,
265
+ trace_id=trace_id_hex,
266
+ span_id=span_id_hex,
267
+ parent_span_id=parent_id_hex,
268
+ start_time=_ts_ns_to_iso(int(start_ns)),
269
+ end_time=_ts_ns_to_iso(int(end_ns)),
270
+ status_code=span.status.status_code.name if span.status else "UNSET",
271
+ status_message=span.status.description if span.status else None,
272
+ attributes=typed_attrs,
273
+ exceptions=None,
274
+ )
275
+
276
+ # Add error information if present
277
+ if span.events:
278
+ exceptions = []
279
+ exceptions = [
280
+ {
281
+ "timestamp": _ts_ns_to_iso(event.timestamp),
282
+ "attributes": dict(event.attributes or {}),
283
+ }
284
+ for event in span.events
285
+ ]
286
+ if exceptions:
287
+ typed_span.exceptions = exceptions
288
+
289
+ # Convert to dict for export
290
+ return typed_span.model_dump(mode="json", by_alias=True, exclude_none=True)
291
+
292
+
293
+ # ---------------------------------------------------------------------------
294
+ # Exporter
295
+ # ---------------------------------------------------------------------------
296
+
297
+
298
+ class HudSpanExporter(SpanExporter):
299
+ """Exporter that forwards spans to HUD backend using existing endpoint."""
300
+
301
+ def __init__(self, *, telemetry_url: str, api_key: str) -> None:
302
+ super().__init__()
303
+ self._telemetry_url = telemetry_url.rstrip("/")
304
+ self._api_key = api_key
305
+
306
+ # ------------------------------------------------------------------
307
+ # Core API
308
+ # ------------------------------------------------------------------
309
+ def export(self, spans: list[ReadableSpan]) -> SpanExportResult: # type: ignore[override]
310
+ if not spans:
311
+ return SpanExportResult.SUCCESS
312
+
313
+ # Group spans by hud.task_run_id attribute
314
+ grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
315
+ for span in spans:
316
+ run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
317
+ if not run_id:
318
+ # Skip spans that are outside HUD traces
319
+ continue
320
+ grouped[str(run_id)].append(span)
321
+
322
+ # Send each group synchronously (retry inside make_request_sync)
323
+ for run_id, span_batch in grouped.items():
324
+ try:
325
+ url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
326
+ telemetry_spans = [_span_to_dict(s) for s in span_batch]
327
+ # Include current step count in metadata
328
+ metadata = {}
329
+ # Get the HIGHEST step count from the batch (most recent)
330
+ step_count = 0
331
+ for span in span_batch:
332
+ if span.attributes and "hud.step_count" in span.attributes:
333
+ current_step = span.attributes["hud.step_count"]
334
+ if isinstance(current_step, int) and current_step > step_count:
335
+ step_count = current_step
336
+
337
+ payload = {
338
+ "metadata": metadata,
339
+ "telemetry": telemetry_spans,
340
+ }
341
+
342
+ # Only include step_count if we found any steps
343
+ if step_count > 0:
344
+ payload["step_count"] = step_count
345
+
346
+ logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
347
+ make_request_sync(
348
+ method="POST",
349
+ url=url,
350
+ json=payload,
351
+ api_key=self._api_key,
352
+ )
353
+ except Exception as exc:
354
+ logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
355
+ # If *any* group fails we return FAILURE so the OTEL SDK can retry
356
+ return SpanExportResult.FAILURE
357
+
358
+ return SpanExportResult.SUCCESS
359
+
360
+ def shutdown(self) -> None: # type: ignore[override]
361
+ # Nothing to cleanup, httpx handled inside make_request_sync
362
+ pass
363
+
364
+ def force_flush(self, timeout_millis: int | None = None) -> bool: # type: ignore[override]
365
+ # Synchronous export, nothing buffered here
366
+ return True
@@ -0,0 +1,97 @@
1
+ """MCP instrumentation support for HUD.
2
+
3
+ This module provides functions to enable MCP OpenTelemetry instrumentation
4
+ for automatic tracing of MCP protocol communication.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import AsyncGenerator, Callable
14
+
15
+ from opentelemetry.trace import TracerProvider
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ LIFECYCLE_TOOLS = {"setup", "evaluate"}
20
+
21
+
22
+ def install_mcp_instrumentation(provider: TracerProvider) -> None:
23
+ """Enable community MCP OpenTelemetry instrumentation if present.
24
+
25
+ Args:
26
+ provider: The TracerProvider to use for instrumentation
27
+ """
28
+ import logging
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ try:
33
+ from opentelemetry.instrumentation.mcp.instrumentation import (
34
+ McpInstrumentor,
35
+ )
36
+
37
+ # First, patch the instrumentation to handle 3-value transports correctly
38
+ _patch_mcp_instrumentation()
39
+
40
+ McpInstrumentor().instrument(tracer_provider=provider)
41
+ logger.debug("MCP instrumentation installed with fastmcp compatibility patch")
42
+ except ImportError:
43
+ logger.debug("opentelemetry-instrumentation-mcp not available, skipping")
44
+ except Exception as exc:
45
+ logger.warning("Failed to install MCP instrumentation: %s", exc)
46
+
47
+
48
+ def _patch_mcp_instrumentation() -> None:
49
+ """Patch MCP instrumentation to handle 3-value transport yields correctly."""
50
+ from contextlib import asynccontextmanager
51
+
52
+ try:
53
+ from opentelemetry.instrumentation.mcp.instrumentation import McpInstrumentor
54
+
55
+ def patched_transport_wrapper(self: Any, tracer: Any) -> Callable[..., Any]:
56
+ @asynccontextmanager
57
+ async def traced_method(
58
+ wrapped: Callable[..., Any], instance: Any, args: Any, kwargs: Any
59
+ ) -> AsyncGenerator[Any, None]:
60
+ async with wrapped(*args, **kwargs) as result:
61
+ # Check if we got a tuple with 3 values
62
+ if isinstance(result, tuple) and len(result) == 3:
63
+ read_stream, write_stream, third_value = result
64
+ # Import here to avoid circular imports
65
+ from opentelemetry.instrumentation.mcp.instrumentation import (
66
+ InstrumentedStreamReader,
67
+ InstrumentedStreamWriter,
68
+ )
69
+
70
+ yield (
71
+ InstrumentedStreamReader(read_stream, tracer),
72
+ InstrumentedStreamWriter(write_stream, tracer),
73
+ third_value,
74
+ )
75
+ else:
76
+ # Fall back to 2-value case
77
+ read_stream, write_stream = result
78
+ from opentelemetry.instrumentation.mcp.instrumentation import (
79
+ InstrumentedStreamReader,
80
+ InstrumentedStreamWriter,
81
+ )
82
+
83
+ yield (
84
+ InstrumentedStreamReader(read_stream, tracer),
85
+ InstrumentedStreamWriter(write_stream, tracer),
86
+ )
87
+
88
+ return traced_method
89
+
90
+ # Apply the patch
91
+ McpInstrumentor._transport_wrapper = patched_transport_wrapper
92
+
93
+ except Exception as e:
94
+ import logging
95
+
96
+ logger = logging.getLogger(__name__)
97
+ logger.warning("Failed to patch MCP instrumentation: %s", e)
hud/otel/processors.py ADDED
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any
5
+
6
+ from opentelemetry import baggage
7
+ from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
8
+
9
+ from .context import (
10
+ get_agent_steps,
11
+ get_base_mcp_steps,
12
+ get_mcp_tool_steps,
13
+ increment_agent_steps,
14
+ increment_base_mcp_steps,
15
+ increment_mcp_tool_steps,
16
+ )
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class HudEnrichmentProcessor(SpanProcessor):
22
+ """Span processor that enriches every span with HUD-specific context.
23
+
24
+ • Adds ``hud.task_run_id`` attribute if available.
25
+ • Adds ``hud.job_id`` attribute if available in baggage.
26
+ • Adds ``hud.step_count`` attribute if available in baggage.
27
+ """
28
+
29
+ def __init__(self) -> None:
30
+ # No state, everything comes from context vars
31
+ super().__init__()
32
+
33
+ # --- callback hooks -------------------------------------------------
34
+ def on_start(self, span: Span, parent_context: Any) -> None: # type: ignore[override]
35
+ try:
36
+ # Get task_run_id from baggage in parent context
37
+ run_id = baggage.get_baggage("hud.task_run_id", context=parent_context)
38
+ if run_id and span.is_recording():
39
+ span.set_attribute("hud.task_run_id", str(run_id))
40
+
41
+ # Get job_id from baggage if available
42
+ job_id = baggage.get_baggage("hud.job_id", context=parent_context)
43
+ if job_id and span.is_recording():
44
+ span.set_attribute("hud.job_id", str(job_id))
45
+
46
+ # Check what type of step this is and increment appropriate counters
47
+ if span.is_recording():
48
+ step_type = self._get_step_type(span)
49
+
50
+ if step_type == "agent":
51
+ # Increment agent steps
52
+ new_agent_count = increment_agent_steps()
53
+ span.set_attribute("hud.agent_steps", new_agent_count)
54
+ logger.debug("Incremented agent steps to %d", new_agent_count)
55
+
56
+ elif step_type == "base_mcp":
57
+ # Increment base MCP steps
58
+ new_base_count = increment_base_mcp_steps()
59
+ span.set_attribute("hud.base_mcp_steps", new_base_count)
60
+ logger.debug("Incremented base MCP steps to %d", new_base_count)
61
+
62
+ elif step_type == "mcp_tool":
63
+ # Increment both base MCP and MCP tool steps
64
+ new_base_count = increment_base_mcp_steps()
65
+ new_tool_count = increment_mcp_tool_steps()
66
+ span.set_attribute("hud.base_mcp_steps", new_base_count)
67
+ span.set_attribute("hud.mcp_tool_steps", new_tool_count)
68
+ logger.debug(
69
+ "Incremented MCP steps to base=%d, tool=%d", new_base_count, new_tool_count
70
+ )
71
+
72
+ # Always set all current step counts on the span
73
+ span.set_attribute("hud.base_mcp_steps", get_base_mcp_steps())
74
+ span.set_attribute("hud.mcp_tool_steps", get_mcp_tool_steps())
75
+ span.set_attribute("hud.agent_steps", get_agent_steps())
76
+
77
+ except Exception as exc: # defensive; never fail the tracer
78
+ logger.debug("HudEnrichmentProcessor.on_start error: %s", exc, exc_info=False)
79
+
80
+ def _get_step_type(self, span: Span) -> str | None:
81
+ """Determine what type of step this span represents.
82
+
83
+ Returns:
84
+ 'base_mcp' for any MCP span
85
+ 'mcp_tool' for MCP tool calls (tools/call.mcp)
86
+ 'agent' for agent spans
87
+ None if not a step
88
+ """
89
+ # Check span attributes
90
+ attrs = span.attributes or {}
91
+ span_name = span.name
92
+
93
+ # Check for agent steps (instrumented with span_type="agent")
94
+ if attrs.get("category") == "agent":
95
+ return "agent"
96
+
97
+ # Check span name pattern for MCP calls
98
+ if span_name:
99
+ # tools/call.mcp is an mcp_tool step
100
+ if span_name == "tools/call.mcp":
101
+ return "mcp_tool"
102
+
103
+ # Any other .mcp suffixed span is a base MCP step
104
+ elif span_name.endswith(".mcp"):
105
+ return "base_mcp"
106
+
107
+ return None
108
+
109
+ def on_end(self, span: ReadableSpan) -> None:
110
+ # Nothing to do enrichment is on_start only
111
+ pass
112
+
113
+ # Required to fully implement abstract base, but we don't batch spans
114
+ def shutdown(self) -> None: # type: ignore[override]
115
+ pass
116
+
117
+ def force_flush(self, timeout_millis: int | None = None) -> bool: # type: ignore[override]
118
+ return True
@@ -0,0 +1 @@
1
+ """Tests for OpenTelemetry integration."""