PyPI - hud-python - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

hud-python 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (130) hide show

hud/__init__.py +22 -22
hud/agents/__init__.py +13 -15
hud/agents/base.py +599 -599
hud/agents/claude.py +373 -373
hud/agents/langchain.py +261 -250
hud/agents/misc/__init__.py +7 -7
hud/agents/misc/response_agent.py +82 -80
hud/agents/openai.py +352 -352
hud/agents/openai_chat_generic.py +154 -154
hud/agents/tests/__init__.py +1 -1
hud/agents/tests/test_base.py +742 -742
hud/agents/tests/test_claude.py +324 -324
hud/agents/tests/test_client.py +363 -363
hud/agents/tests/test_openai.py +237 -237
hud/cli/__init__.py +617 -617
hud/cli/__main__.py +8 -8
hud/cli/analyze.py +371 -371
hud/cli/analyze_metadata.py +230 -230
hud/cli/build.py +498 -427
hud/cli/clone.py +185 -185
hud/cli/cursor.py +92 -92
hud/cli/debug.py +392 -392
hud/cli/docker_utils.py +83 -83
hud/cli/init.py +280 -281
hud/cli/interactive.py +353 -353
hud/cli/mcp_server.py +764 -756
hud/cli/pull.py +330 -336
hud/cli/push.py +404 -370
hud/cli/remote_runner.py +311 -311
hud/cli/runner.py +160 -160
hud/cli/tests/__init__.py +3 -3
hud/cli/tests/test_analyze.py +284 -284
hud/cli/tests/test_cli_init.py +265 -265
hud/cli/tests/test_cli_main.py +27 -27
hud/cli/tests/test_clone.py +142 -142
hud/cli/tests/test_cursor.py +253 -253
hud/cli/tests/test_debug.py +453 -453
hud/cli/tests/test_mcp_server.py +139 -139
hud/cli/tests/test_utils.py +388 -388
hud/cli/utils.py +263 -263
hud/clients/README.md +143 -143
hud/clients/__init__.py +16 -16
hud/clients/base.py +378 -379
hud/clients/fastmcp.py +222 -222
hud/clients/mcp_use.py +298 -278
hud/clients/tests/__init__.py +1 -1
hud/clients/tests/test_client_integration.py +111 -111
hud/clients/tests/test_fastmcp.py +342 -342
hud/clients/tests/test_protocol.py +188 -188
hud/clients/utils/__init__.py +1 -1
hud/clients/utils/retry_transport.py +160 -160
hud/datasets.py +327 -322
hud/misc/__init__.py +1 -1
hud/misc/claude_plays_pokemon.py +292 -292
hud/otel/__init__.py +35 -35
hud/otel/collector.py +142 -142
hud/otel/config.py +164 -164
hud/otel/context.py +536 -536
hud/otel/exporters.py +366 -366
hud/otel/instrumentation.py +97 -97
hud/otel/processors.py +118 -118
hud/otel/tests/__init__.py +1 -1
hud/otel/tests/test_processors.py +197 -197
hud/server/__init__.py +5 -5
hud/server/context.py +114 -114
hud/server/helper/__init__.py +5 -5
hud/server/low_level.py +132 -132
hud/server/server.py +170 -166
hud/server/tests/__init__.py +3 -3
hud/settings.py +73 -73
hud/shared/__init__.py +5 -5
hud/shared/exceptions.py +180 -180
hud/shared/requests.py +264 -264
hud/shared/tests/test_exceptions.py +157 -157
hud/shared/tests/test_requests.py +275 -275
hud/telemetry/__init__.py +25 -25
hud/telemetry/instrument.py +379 -379
hud/telemetry/job.py +309 -309
hud/telemetry/replay.py +74 -74
hud/telemetry/trace.py +83 -83
hud/tools/__init__.py +33 -33
hud/tools/base.py +365 -365
hud/tools/bash.py +161 -161
hud/tools/computer/__init__.py +15 -15
hud/tools/computer/anthropic.py +437 -437
hud/tools/computer/hud.py +376 -376
hud/tools/computer/openai.py +295 -295
hud/tools/computer/settings.py +82 -82
hud/tools/edit.py +314 -314
hud/tools/executors/__init__.py +30 -30
hud/tools/executors/base.py +539 -539
hud/tools/executors/pyautogui.py +621 -621
hud/tools/executors/tests/__init__.py +1 -1
hud/tools/executors/tests/test_base_executor.py +338 -338
hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
hud/tools/executors/xdo.py +511 -511
hud/tools/playwright.py +412 -412
hud/tools/tests/__init__.py +3 -3
hud/tools/tests/test_base.py +282 -282
hud/tools/tests/test_bash.py +158 -158
hud/tools/tests/test_bash_extended.py +197 -197
hud/tools/tests/test_computer.py +425 -425
hud/tools/tests/test_computer_actions.py +34 -34
hud/tools/tests/test_edit.py +259 -259
hud/tools/tests/test_init.py +27 -27
hud/tools/tests/test_playwright_tool.py +183 -183
hud/tools/tests/test_tools.py +145 -145
hud/tools/tests/test_utils.py +156 -156
hud/tools/types.py +72 -72
hud/tools/utils.py +50 -50
hud/types.py +136 -136
hud/utils/__init__.py +10 -10
hud/utils/async_utils.py +65 -65
hud/utils/design.py +236 -168
hud/utils/mcp.py +55 -55
hud/utils/progress.py +149 -149
hud/utils/telemetry.py +66 -66
hud/utils/tests/test_async_utils.py +173 -173
hud/utils/tests/test_init.py +17 -17
hud/utils/tests/test_progress.py +261 -261
hud/utils/tests/test_telemetry.py +82 -82
hud/utils/tests/test_version.py +8 -8
hud/version.py +7 -7
{hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
hud_python-0.4.3.dist-info/RECORD +131 -0
{hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
hud/agents/art.py +0 -101
hud_python-0.4.1.dist-info/RECORD +0 -132
{hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
{hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0

hud/otel/exporters.py CHANGED Viewed

@@ -1,366 +1,366 @@
-"""Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
-HTTP endpoint (/trace/<id>/telemetry-upload).
-The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
-exactly the same semantics the old async worker in ``hud.telemetry.exporter``
-implemented.
-This exporter is *synchronous* (derives from :class:`SpanExporter`).  We rely on
-``hud.shared.make_request_sync`` which already contains retry & auth logic.
-"""
-from __future__ import annotations
-import contextlib
-import json
-import logging
-from collections import defaultdict
-from datetime import UTC, datetime
-from typing import TYPE_CHECKING, Any
-from mcp.types import ClientRequest, ServerResult
-from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
-from pydantic import BaseModel, ConfigDict, Field
-from hud.shared import make_request_sync
-from hud.types import TraceStep as HudSpanAttributes
-if TYPE_CHECKING:
-    from opentelemetry.sdk.trace import ReadableSpan
-logger = logging.getLogger(__name__)
-# ---------------------------------------------------------------------------
-# Models
-# ---------------------------------------------------------------------------
-class HudSpan(BaseModel):
-    """A telemetry span ready for export."""
-    name: str
-    trace_id: str = Field(pattern=r"^[0-9a-fA-F]{32}$")
-    span_id: str = Field(pattern=r"^[0-9a-fA-F]{16}$")
-    parent_span_id: str | None = Field(None, pattern=r"^[0-9a-fA-F]{16}$")
-    start_time: str  # ISO format
-    end_time: str  # ISO format
-    status_code: str  # "UNSET", "OK", "ERROR"
-    status_message: str | None = None
-    attributes: HudSpanAttributes
-    exceptions: list[dict[str, Any]] | None = None
-    model_config = ConfigDict(extra="forbid")
-def extract_span_attributes(
-    attrs: dict[str, Any], method_name: str | None = None, span_name: str | None = None
-) -> HudSpanAttributes:
-    """Extract and parse span attributes into typed model.
-    This handles:
-    - Detecting span type (MCP vs Agent)
-    - Renaming verbose OpenTelemetry semantic conventions
-    - Parsing JSON strings to MCP types
-    """
-    # Start with core attributes - map to TraceStep field names
-    result_attrs = {
-        "task_run_id": attrs.get(
-            "hud.task_run_id"
-        ),  # TraceStep expects task_run_id, not hud.task_run_id
-        "job_id": attrs.get("hud.job_id"),  # TraceStep expects job_id, not hud.job_id
-        "type": attrs.get("span.kind", "CLIENT"),  # TraceStep expects type, not span.kind
-    }
-    # Determine span type based on presence of agent or MCP attributes
-    # Note: The input attrs might already have "category" set
-    existing_category = attrs.get("category")
-    if existing_category:
-        # Use the explicit category if provided
-        result_attrs["category"] = existing_category
-    elif span_name and span_name.startswith("agent."):
-        # Legacy support for spans named "agent.*"
-        result_attrs["category"] = "agent"
-    else:
-        result_attrs["category"] = "mcp"  # Default to MCP
-    # No special processing needed for different categories
-    # The backend will handle them based on the category field
-    # Add method_name and request_id for MCP spans
-    if result_attrs["category"] == "mcp":
-        if method_name:
-            result_attrs["method_name"] = method_name
-        # Check for request_id with and without semconv_ai prefix
-        request_id = attrs.get("semconv_ai.mcp.request_id") or attrs.get("mcp.request.id")
-        if request_id:
-            result_attrs["request_id"] = request_id
-    # Parse input/output - check both with and without semconv_ai prefix
-    input_str = attrs.get("semconv_ai.traceloop.entity.input") or attrs.get(
-        "traceloop.entity.input"
-    )
-    output_str = attrs.get("semconv_ai.traceloop.entity.output") or attrs.get(
-        "traceloop.entity.output"
-    )
-    logger.debug(
-        "Category: %s, has input: %s, has output: %s",
-        result_attrs.get("category"),
-        bool(input_str),
-        bool(output_str),
-    )
-    # Check for direct request/result attributes first
-    if "request" in attrs and not result_attrs.get("request"):
-        req = attrs["request"]
-        if isinstance(req, str):
-            with contextlib.suppress(json.JSONDecodeError):
-                req = json.loads(req)
-        result_attrs["request"] = req
-    if "result" in attrs and not result_attrs.get("result"):
-        res = attrs["result"]
-        if isinstance(res, str):
-            with contextlib.suppress(json.JSONDecodeError):
-                res = json.loads(res)
-        result_attrs["result"] = res
-    # Process input/output from MCP instrumentation
-    if input_str and not result_attrs.get("request"):
-        try:
-            input_data = json.loads(input_str) if isinstance(input_str, str) else input_str
-            # For MCP category, try to parse as ClientRequest to extract the root
-            if result_attrs["category"] == "mcp" and isinstance(input_data, dict):
-                try:
-                    if "method" in input_data and "params" in input_data:
-                        client_request = ClientRequest.model_validate(input_data)
-                        result_attrs["request"] = client_request.root
-                    else:
-                        result_attrs["request"] = input_data
-                except Exception:
-                    result_attrs["request"] = input_data
-            else:
-                # For all other categories, just store the data
-                result_attrs["request"] = input_data
-        except Exception as e:
-            logger.debug("Failed to parse request JSON: %s", e)
-    if output_str and not result_attrs.get("result"):
-        try:
-            output_data = json.loads(output_str) if isinstance(output_str, str) else output_str
-            # For MCP category, try to parse as ServerResult to extract the root
-            if result_attrs["category"] == "mcp" and isinstance(output_data, dict):
-                # Check for error
-                if "error" in output_data:
-                    result_attrs["mcp_error"] = True
-                try:
-                    server_result = ServerResult.model_validate(output_data)
-                    result_attrs["result"] = server_result.root
-                    # Check for isError in the result
-                    if getattr(server_result.root, "isError", False):
-                        result_attrs["mcp_error"] = True
-                except Exception:
-                    result_attrs["result"] = output_data
-            else:
-                # For all other categories, just store the data
-                result_attrs["result"] = output_data
-        except Exception as e:
-            logger.debug("Failed to parse result JSON: %s", e)
-    # Don't include the verbose attributes or ones we've already processed
-    exclude_keys = {
-        "hud.task_run_id",
-        "hud.job_id",
-        "span.kind",
-        "semconv_ai.mcp.method_name",
-        "mcp.method.name",  # Also exclude non-prefixed version
-        "semconv_ai.mcp.request_id",
-        "mcp.request.id",  # Also exclude non-prefixed version
-        "semconv_ai.traceloop.entity.input",
-        "semconv_ai.traceloop.entity.output",
-        "traceloop.entity.input",  # Also exclude non-prefixed versions
-        "traceloop.entity.output",
-        "mcp_request",  # Exclude to prevent overwriting parsed values
-        "mcp_result",  # Exclude to prevent overwriting parsed values
-        "request",  # Exclude to prevent overwriting parsed values
-        "result",  # Exclude to prevent overwriting parsed values
-        "category",  # Already handled above
-    }
-    # Add any extra attributes
-    for key, value in attrs.items():
-        if key not in exclude_keys:
-            result_attrs[key] = value  # noqa: PERF403
-    logger.debug(
-        """Final result_attrs before creating HudSpanAttributes:
-        request=%s,
-        result=%s""",
-        result_attrs.get("request"),
-        result_attrs.get("result"),
-    )
-    return HudSpanAttributes(**result_attrs)
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-def _ts_ns_to_iso(ts_ns: int) -> str:
-    """Convert a ``Span`` timestamp (nanoseconds) to ISO-8601 string."""
-    # OpenTelemetry times are epoch nanoseconds
-    dt = datetime.fromtimestamp(ts_ns / 1_000_000_000, tz=UTC)
-    return dt.isoformat().replace("+00:00", "Z")
-def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
-    """Convert an OpenTelemetry span to a dict using typed models."""
-    attrs = dict(span.attributes or {})
-    # Extract method name from span name if not in attributes
-    # Check both with and without semconv_ai prefix
-    raw_method = attrs.get("semconv_ai.mcp.method_name") or attrs.get("mcp.method.name")
-    method_name: str | None = None
-    if isinstance(raw_method, str):
-        method_name = raw_method
-    if method_name is None and isinstance(span.name, str) and span.name.endswith(".mcp"):
-        method_name = span.name[:-4]  # Remove .mcp suffix
-    # Create typed attributes
-    typed_attrs = extract_span_attributes(attrs, method_name, str(span.name))
-    # Record span kind as extra attribute (TraceStep allows extras)
-    try:
-        typed_attrs.span_kind = span.kind.name  # type: ignore[attr-defined]
-    except Exception:
-        logger.warning("Failed to set span kind attribute")
-    # Build typed span
-    # Guard context/parent/timestamps
-    context = getattr(span, "context", None)
-    trace_id_hex = (
-        format(context.trace_id, "032x") if context and hasattr(context, "trace_id") else "0" * 32
-    )
-    span_id_hex = (
-        format(context.span_id, "016x") if context and hasattr(context, "span_id") else "0" * 16
-    )
-    parent = getattr(span, "parent", None)
-    parent_id_hex = (
-        format(parent.span_id, "016x") if parent and hasattr(parent, "span_id") else None
-    )
-    start_ns = span.start_time or 0
-    end_ns = span.end_time or start_ns
-    typed_span = HudSpan(
-        name=span.name,
-        trace_id=trace_id_hex,
-        span_id=span_id_hex,
-        parent_span_id=parent_id_hex,
-        start_time=_ts_ns_to_iso(int(start_ns)),
-        end_time=_ts_ns_to_iso(int(end_ns)),
-        status_code=span.status.status_code.name if span.status else "UNSET",
-        status_message=span.status.description if span.status else None,
-        attributes=typed_attrs,
-        exceptions=None,
-    )
-    # Add error information if present
-    if span.events:
-        exceptions = []
-        exceptions = [
-            {
-                "timestamp": _ts_ns_to_iso(event.timestamp),
-                "attributes": dict(event.attributes or {}),
-            }
-            for event in span.events
-        ]
-        if exceptions:
-            typed_span.exceptions = exceptions
-    # Convert to dict for export
-    return typed_span.model_dump(mode="json", by_alias=True, exclude_none=True)
-# ---------------------------------------------------------------------------
-# Exporter
-# ---------------------------------------------------------------------------
-class HudSpanExporter(SpanExporter):
-    """Exporter that forwards spans to HUD backend using existing endpoint."""
-    def __init__(self, *, telemetry_url: str, api_key: str) -> None:
-        super().__init__()
-        self._telemetry_url = telemetry_url.rstrip("/")
-        self._api_key = api_key
-    # ------------------------------------------------------------------
-    # Core API
-    # ------------------------------------------------------------------
-    def export(self, spans: list[ReadableSpan]) -> SpanExportResult:  # type: ignore[override]
-        if not spans:
-            return SpanExportResult.SUCCESS
-        # Group spans by hud.task_run_id attribute
-        grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
-        for span in spans:
-            run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
-            if not run_id:
-                # Skip spans that are outside HUD traces
-                continue
-            grouped[str(run_id)].append(span)
-        # Send each group synchronously (retry inside make_request_sync)
-        for run_id, span_batch in grouped.items():
-            try:
-                url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
-                telemetry_spans = [_span_to_dict(s) for s in span_batch]
-                # Include current step count in metadata
-                metadata = {}
-                # Get the HIGHEST step count from the batch (most recent)
-                step_count = 0
-                for span in span_batch:
-                    if span.attributes and "hud.step_count" in span.attributes:
-                        current_step = span.attributes["hud.step_count"]
-                        if isinstance(current_step, int) and current_step > step_count:
-                            step_count = current_step
-                payload = {
-                    "metadata": metadata,
-                    "telemetry": telemetry_spans,
-                }
-                # Only include step_count if we found any steps
-                if step_count > 0:
-                    payload["step_count"] = step_count
-                logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
-                make_request_sync(
-                    method="POST",
-                    url=url,
-                    json=payload,
-                    api_key=self._api_key,
-                )
-            except Exception as exc:
-                logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
-                # If *any* group fails we return FAILURE so the OTEL SDK can retry
-                return SpanExportResult.FAILURE
-        return SpanExportResult.SUCCESS
-    def shutdown(self) -> None:  # type: ignore[override]
-        # Nothing to cleanup, httpx handled inside make_request_sync
-        pass
-    def force_flush(self, timeout_millis: int | None = None) -> bool:  # type: ignore[override]
-        # Synchronous export, nothing buffered here
-        return True
+"""Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
+HTTP endpoint (/trace/<id>/telemetry-upload).
+The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
+exactly the same semantics the old async worker in ``hud.telemetry.exporter``
+implemented.
+This exporter is *synchronous* (derives from :class:`SpanExporter`).  We rely on
+``hud.shared.make_request_sync`` which already contains retry & auth logic.
+"""
+from __future__ import annotations
+import contextlib
+import json
+import logging
+from collections import defaultdict
+from datetime import UTC, datetime
+from typing import TYPE_CHECKING, Any
+from mcp.types import ClientRequest, ServerResult
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from pydantic import BaseModel, ConfigDict, Field
+from hud.shared import make_request_sync
+from hud.types import TraceStep as HudSpanAttributes
+if TYPE_CHECKING:
+    from opentelemetry.sdk.trace import ReadableSpan
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Models
+# ---------------------------------------------------------------------------
+class HudSpan(BaseModel):
+    """A telemetry span ready for export."""
+    name: str
+    trace_id: str = Field(pattern=r"^[0-9a-fA-F]{32}$")
+    span_id: str = Field(pattern=r"^[0-9a-fA-F]{16}$")
+    parent_span_id: str | None = Field(None, pattern=r"^[0-9a-fA-F]{16}$")
+    start_time: str  # ISO format
+    end_time: str  # ISO format
+    status_code: str  # "UNSET", "OK", "ERROR"
+    status_message: str | None = None
+    attributes: HudSpanAttributes
+    exceptions: list[dict[str, Any]] | None = None
+    model_config = ConfigDict(extra="forbid")
+def extract_span_attributes(
+    attrs: dict[str, Any], method_name: str | None = None, span_name: str | None = None
+) -> HudSpanAttributes:
+    """Extract and parse span attributes into typed model.
+    This handles:
+    - Detecting span type (MCP vs Agent)
+    - Renaming verbose OpenTelemetry semantic conventions
+    - Parsing JSON strings to MCP types
+    """
+    # Start with core attributes - map to TraceStep field names
+    result_attrs = {
+        "task_run_id": attrs.get(
+            "hud.task_run_id"
+        ),  # TraceStep expects task_run_id, not hud.task_run_id
+        "job_id": attrs.get("hud.job_id"),  # TraceStep expects job_id, not hud.job_id
+        "type": attrs.get("span.kind", "CLIENT"),  # TraceStep expects type, not span.kind
+    }
+    # Determine span type based on presence of agent or MCP attributes
+    # Note: The input attrs might already have "category" set
+    existing_category = attrs.get("category")
+    if existing_category:
+        # Use the explicit category if provided
+        result_attrs["category"] = existing_category
+    elif span_name and span_name.startswith("agent."):
+        # Legacy support for spans named "agent.*"
+        result_attrs["category"] = "agent"
+    else:
+        result_attrs["category"] = "mcp"  # Default to MCP
+    # No special processing needed for different categories
+    # The backend will handle them based on the category field
+    # Add method_name and request_id for MCP spans
+    if result_attrs["category"] == "mcp":
+        if method_name:
+            result_attrs["method_name"] = method_name
+        # Check for request_id with and without semconv_ai prefix
+        request_id = attrs.get("semconv_ai.mcp.request_id") or attrs.get("mcp.request.id")
+        if request_id:
+            result_attrs["request_id"] = request_id
+    # Parse input/output - check both with and without semconv_ai prefix
+    input_str = attrs.get("semconv_ai.traceloop.entity.input") or attrs.get(
+        "traceloop.entity.input"
+    )
+    output_str = attrs.get("semconv_ai.traceloop.entity.output") or attrs.get(
+        "traceloop.entity.output"
+    )
+    logger.debug(
+        "Category: %s, has input: %s, has output: %s",
+        result_attrs.get("category"),
+        bool(input_str),
+        bool(output_str),
+    )
+    # Check for direct request/result attributes first
+    if "request" in attrs and not result_attrs.get("request"):
+        req = attrs["request"]
+        if isinstance(req, str):
+            with contextlib.suppress(json.JSONDecodeError):
+                req = json.loads(req)
+        result_attrs["request"] = req
+    if "result" in attrs and not result_attrs.get("result"):
+        res = attrs["result"]
+        if isinstance(res, str):
+            with contextlib.suppress(json.JSONDecodeError):
+                res = json.loads(res)
+        result_attrs["result"] = res
+    # Process input/output from MCP instrumentation
+    if input_str and not result_attrs.get("request"):
+        try:
+            input_data = json.loads(input_str) if isinstance(input_str, str) else input_str
+            # For MCP category, try to parse as ClientRequest to extract the root
+            if result_attrs["category"] == "mcp" and isinstance(input_data, dict):
+                try:
+                    if "method" in input_data and "params" in input_data:
+                        client_request = ClientRequest.model_validate(input_data)
+                        result_attrs["request"] = client_request.root
+                    else:
+                        result_attrs["request"] = input_data
+                except Exception:
+                    result_attrs["request"] = input_data
+            else:
+                # For all other categories, just store the data
+                result_attrs["request"] = input_data
+        except Exception as e:
+            logger.debug("Failed to parse request JSON: %s", e)
+    if output_str and not result_attrs.get("result"):
+        try:
+            output_data = json.loads(output_str) if isinstance(output_str, str) else output_str
+            # For MCP category, try to parse as ServerResult to extract the root
+            if result_attrs["category"] == "mcp" and isinstance(output_data, dict):
+                # Check for error
+                if "error" in output_data:
+                    result_attrs["mcp_error"] = True
+                try:
+                    server_result = ServerResult.model_validate(output_data)
+                    result_attrs["result"] = server_result.root
+                    # Check for isError in the result
+                    if getattr(server_result.root, "isError", False):
+                        result_attrs["mcp_error"] = True
+                except Exception:
+                    result_attrs["result"] = output_data
+            else:
+                # For all other categories, just store the data
+                result_attrs["result"] = output_data
+        except Exception as e:
+            logger.debug("Failed to parse result JSON: %s", e)
+    # Don't include the verbose attributes or ones we've already processed
+    exclude_keys = {
+        "hud.task_run_id",
+        "hud.job_id",
+        "span.kind",
+        "semconv_ai.mcp.method_name",
+        "mcp.method.name",  # Also exclude non-prefixed version
+        "semconv_ai.mcp.request_id",
+        "mcp.request.id",  # Also exclude non-prefixed version
+        "semconv_ai.traceloop.entity.input",
+        "semconv_ai.traceloop.entity.output",
+        "traceloop.entity.input",  # Also exclude non-prefixed versions
+        "traceloop.entity.output",
+        "mcp_request",  # Exclude to prevent overwriting parsed values
+        "mcp_result",  # Exclude to prevent overwriting parsed values
+        "request",  # Exclude to prevent overwriting parsed values
+        "result",  # Exclude to prevent overwriting parsed values
+        "category",  # Already handled above
+    }
+    # Add any extra attributes
+    for key, value in attrs.items():
+        if key not in exclude_keys:
+            result_attrs[key] = value  # noqa: PERF403
+    logger.debug(
+        """Final result_attrs before creating HudSpanAttributes:
+        request=%s,
+        result=%s""",
+        result_attrs.get("request"),
+        result_attrs.get("result"),
+    )
+    return HudSpanAttributes(**result_attrs)
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _ts_ns_to_iso(ts_ns: int) -> str:
+    """Convert a ``Span`` timestamp (nanoseconds) to ISO-8601 string."""
+    # OpenTelemetry times are epoch nanoseconds
+    dt = datetime.fromtimestamp(ts_ns / 1_000_000_000, tz=UTC)
+    return dt.isoformat().replace("+00:00", "Z")
+def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
+    """Convert an OpenTelemetry span to a dict using typed models."""
+    attrs = dict(span.attributes or {})
+    # Extract method name from span name if not in attributes
+    # Check both with and without semconv_ai prefix
+    raw_method = attrs.get("semconv_ai.mcp.method_name") or attrs.get("mcp.method.name")
+    method_name: str | None = None
+    if isinstance(raw_method, str):
+        method_name = raw_method
+    if method_name is None and isinstance(span.name, str) and span.name.endswith(".mcp"):
+        method_name = span.name[:-4]  # Remove .mcp suffix
+    # Create typed attributes
+    typed_attrs = extract_span_attributes(attrs, method_name, str(span.name))
+    # Record span kind as extra attribute (TraceStep allows extras)
+    try:
+        typed_attrs.span_kind = span.kind.name  # type: ignore[attr-defined]
+    except Exception:
+        logger.warning("Failed to set span kind attribute")
+    # Build typed span
+    # Guard context/parent/timestamps
+    context = getattr(span, "context", None)
+    trace_id_hex = (
+        format(context.trace_id, "032x") if context and hasattr(context, "trace_id") else "0" * 32
+    )
+    span_id_hex = (
+        format(context.span_id, "016x") if context and hasattr(context, "span_id") else "0" * 16
+    )
+    parent = getattr(span, "parent", None)
+    parent_id_hex = (
+        format(parent.span_id, "016x") if parent and hasattr(parent, "span_id") else None
+    )
+    start_ns = span.start_time or 0
+    end_ns = span.end_time or start_ns
+    typed_span = HudSpan(
+        name=span.name,
+        trace_id=trace_id_hex,
+        span_id=span_id_hex,
+        parent_span_id=parent_id_hex,
+        start_time=_ts_ns_to_iso(int(start_ns)),
+        end_time=_ts_ns_to_iso(int(end_ns)),
+        status_code=span.status.status_code.name if span.status else "UNSET",
+        status_message=span.status.description if span.status else None,
+        attributes=typed_attrs,
+        exceptions=None,
+    )
+    # Add error information if present
+    if span.events:
+        exceptions = []
+        exceptions = [
+            {
+                "timestamp": _ts_ns_to_iso(event.timestamp),
+                "attributes": dict(event.attributes or {}),
+            }
+            for event in span.events
+        ]
+        if exceptions:
+            typed_span.exceptions = exceptions
+    # Convert to dict for export
+    return typed_span.model_dump(mode="json", by_alias=True, exclude_none=True)
+# ---------------------------------------------------------------------------
+# Exporter
+# ---------------------------------------------------------------------------
+class HudSpanExporter(SpanExporter):
+    """Exporter that forwards spans to HUD backend using existing endpoint."""
+    def __init__(self, *, telemetry_url: str, api_key: str) -> None:
+        super().__init__()
+        self._telemetry_url = telemetry_url.rstrip("/")
+        self._api_key = api_key
+    # ------------------------------------------------------------------
+    # Core API
+    # ------------------------------------------------------------------
+    def export(self, spans: list[ReadableSpan]) -> SpanExportResult:  # type: ignore[override]
+        if not spans:
+            return SpanExportResult.SUCCESS
+        # Group spans by hud.task_run_id attribute
+        grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
+        for span in spans:
+            run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
+            if not run_id:
+                # Skip spans that are outside HUD traces
+                continue
+            grouped[str(run_id)].append(span)
+        # Send each group synchronously (retry inside make_request_sync)
+        for run_id, span_batch in grouped.items():
+            try:
+                url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
+                telemetry_spans = [_span_to_dict(s) for s in span_batch]
+                # Include current step count in metadata
+                metadata = {}
+                # Get the HIGHEST step count from the batch (most recent)
+                step_count = 0
+                for span in span_batch:
+                    if span.attributes and "hud.step_count" in span.attributes:
+                        current_step = span.attributes["hud.step_count"]
+                        if isinstance(current_step, int) and current_step > step_count:
+                            step_count = current_step
+                payload = {
+                    "metadata": metadata,
+                    "telemetry": telemetry_spans,
+                }
+                # Only include step_count if we found any steps
+                if step_count > 0:
+                    payload["step_count"] = step_count
+                logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
+                make_request_sync(
+                    method="POST",
+                    url=url,
+                    json=payload,
+                    api_key=self._api_key,
+                )
+            except Exception as exc:
+                logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
+                # If *any* group fails we return FAILURE so the OTEL SDK can retry
+                return SpanExportResult.FAILURE
+        return SpanExportResult.SUCCESS
+    def shutdown(self) -> None:  # type: ignore[override]
+        # Nothing to cleanup, httpx handled inside make_request_sync
+        pass
+    def force_flush(self, timeout_millis: int | None = None) -> bool:  # type: ignore[override]
+        # Synchronous export, nothing buffered here
+        return True

hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl