PyPI - hud-python - Versions diffs - 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

hud-python 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show

hud/__init__.py +22 -89
hud/agents/__init__.py +17 -0
hud/agents/art.py +101 -0
hud/agents/base.py +599 -0
hud/{mcp → agents}/claude.py +373 -321
hud/{mcp → agents}/langchain.py +250 -250
hud/agents/misc/__init__.py +7 -0
hud/{agent → agents}/misc/response_agent.py +80 -80
hud/{mcp → agents}/openai.py +352 -334
hud/agents/openai_chat_generic.py +154 -0
hud/{mcp → agents}/tests/__init__.py +1 -1
hud/agents/tests/test_base.py +742 -0
hud/agents/tests/test_claude.py +324 -0
hud/{mcp → agents}/tests/test_client.py +363 -324
hud/{mcp → agents}/tests/test_openai.py +237 -238
hud/cli/__init__.py +617 -0
hud/cli/__main__.py +8 -0
hud/cli/analyze.py +371 -0
hud/cli/analyze_metadata.py +230 -0
hud/cli/build.py +427 -0
hud/cli/clone.py +185 -0
hud/cli/cursor.py +92 -0
hud/cli/debug.py +392 -0
hud/cli/docker_utils.py +83 -0
hud/cli/init.py +281 -0
hud/cli/interactive.py +353 -0
hud/cli/mcp_server.py +756 -0
hud/cli/pull.py +336 -0
hud/cli/push.py +379 -0
hud/cli/remote_runner.py +311 -0
hud/cli/runner.py +160 -0
hud/cli/tests/__init__.py +3 -0
hud/cli/tests/test_analyze.py +284 -0
hud/cli/tests/test_cli_init.py +265 -0
hud/cli/tests/test_cli_main.py +27 -0
hud/cli/tests/test_clone.py +142 -0
hud/cli/tests/test_cursor.py +253 -0
hud/cli/tests/test_debug.py +453 -0
hud/cli/tests/test_mcp_server.py +139 -0
hud/cli/tests/test_utils.py +388 -0
hud/cli/utils.py +263 -0
hud/clients/README.md +143 -0
hud/clients/__init__.py +16 -0
hud/clients/base.py +354 -0
hud/clients/fastmcp.py +202 -0
hud/clients/mcp_use.py +278 -0
hud/clients/tests/__init__.py +1 -0
hud/clients/tests/test_client_integration.py +111 -0
hud/clients/tests/test_fastmcp.py +342 -0
hud/clients/tests/test_protocol.py +188 -0
hud/clients/utils/__init__.py +1 -0
hud/clients/utils/retry_transport.py +160 -0
hud/datasets.py +322 -192
hud/misc/__init__.py +1 -0
hud/{agent → misc}/claude_plays_pokemon.py +292 -283
hud/otel/__init__.py +35 -0
hud/otel/collector.py +142 -0
hud/otel/config.py +164 -0
hud/otel/context.py +536 -0
hud/otel/exporters.py +366 -0
hud/otel/instrumentation.py +97 -0
hud/otel/processors.py +118 -0
hud/otel/tests/__init__.py +1 -0
hud/otel/tests/test_processors.py +197 -0
hud/server/__init__.py +5 -5
hud/server/context.py +114 -0
hud/server/helper/__init__.py +5 -0
hud/server/low_level.py +132 -0
hud/server/server.py +166 -0
hud/server/tests/__init__.py +3 -0
hud/settings.py +73 -79
hud/shared/__init__.py +5 -0
hud/{exceptions.py → shared/exceptions.py} +180 -180
hud/{server → shared}/requests.py +264 -264
hud/shared/tests/test_exceptions.py +157 -0
hud/{server → shared}/tests/test_requests.py +275 -275
hud/telemetry/__init__.py +25 -30
hud/telemetry/instrument.py +379 -0
hud/telemetry/job.py +309 -141
hud/telemetry/replay.py +74 -0
hud/telemetry/trace.py +83 -0
hud/tools/__init__.py +33 -34
hud/tools/base.py +365 -65
hud/tools/bash.py +161 -137
hud/tools/computer/__init__.py +15 -13
hud/tools/computer/anthropic.py +437 -414
hud/tools/computer/hud.py +376 -328
hud/tools/computer/openai.py +295 -286
hud/tools/computer/settings.py +82 -0
hud/tools/edit.py +314 -290
hud/tools/executors/__init__.py +30 -30
hud/tools/executors/base.py +539 -532
hud/tools/executors/pyautogui.py +621 -619
hud/tools/executors/tests/__init__.py +1 -1
hud/tools/executors/tests/test_base_executor.py +338 -338
hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
hud/tools/executors/xdo.py +511 -503
hud/tools/{playwright_tool.py → playwright.py} +412 -379
hud/tools/tests/__init__.py +3 -3
hud/tools/tests/test_base.py +282 -0
hud/tools/tests/test_bash.py +158 -152
hud/tools/tests/test_bash_extended.py +197 -0
hud/tools/tests/test_computer.py +425 -52
hud/tools/tests/test_computer_actions.py +34 -34
hud/tools/tests/test_edit.py +259 -240
hud/tools/tests/test_init.py +27 -27
hud/tools/tests/test_playwright_tool.py +183 -183
hud/tools/tests/test_tools.py +145 -157
hud/tools/tests/test_utils.py +156 -156
hud/tools/types.py +72 -0
hud/tools/utils.py +50 -50
hud/types.py +136 -89
hud/utils/__init__.py +10 -16
hud/utils/async_utils.py +65 -0
hud/utils/design.py +168 -0
hud/utils/mcp.py +55 -0
hud/utils/progress.py +149 -149
hud/utils/telemetry.py +66 -66
hud/utils/tests/test_async_utils.py +173 -0
hud/utils/tests/test_init.py +17 -21
hud/utils/tests/test_progress.py +261 -225
hud/utils/tests/test_telemetry.py +82 -37
hud/utils/tests/test_version.py +8 -8
hud/version.py +7 -7
hud_python-0.4.0.dist-info/METADATA +474 -0
hud_python-0.4.0.dist-info/RECORD +132 -0
hud_python-0.4.0.dist-info/entry_points.txt +3 -0
{hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
hud/adapters/__init__.py +0 -8
hud/adapters/claude/__init__.py +0 -5
hud/adapters/claude/adapter.py +0 -180
hud/adapters/claude/tests/__init__.py +0 -1
hud/adapters/claude/tests/test_adapter.py +0 -519
hud/adapters/common/__init__.py +0 -6
hud/adapters/common/adapter.py +0 -178
hud/adapters/common/tests/test_adapter.py +0 -289
hud/adapters/common/types.py +0 -446
hud/adapters/operator/__init__.py +0 -5
hud/adapters/operator/adapter.py +0 -108
hud/adapters/operator/tests/__init__.py +0 -1
hud/adapters/operator/tests/test_adapter.py +0 -370
hud/agent/__init__.py +0 -19
hud/agent/base.py +0 -126
hud/agent/claude.py +0 -271
hud/agent/langchain.py +0 -215
hud/agent/misc/__init__.py +0 -3
hud/agent/operator.py +0 -268
hud/agent/tests/__init__.py +0 -1
hud/agent/tests/test_base.py +0 -202
hud/env/__init__.py +0 -11
hud/env/client.py +0 -35
hud/env/docker_client.py +0 -349
hud/env/environment.py +0 -446
hud/env/local_docker_client.py +0 -358
hud/env/remote_client.py +0 -212
hud/env/remote_docker_client.py +0 -292
hud/gym.py +0 -130
hud/job.py +0 -773
hud/mcp/__init__.py +0 -17
hud/mcp/base.py +0 -631
hud/mcp/client.py +0 -312
hud/mcp/tests/test_base.py +0 -512
hud/mcp/tests/test_claude.py +0 -294
hud/task.py +0 -149
hud/taskset.py +0 -237
hud/telemetry/_trace.py +0 -347
hud/telemetry/context.py +0 -230
hud/telemetry/exporter.py +0 -575
hud/telemetry/instrumentation/__init__.py +0 -3
hud/telemetry/instrumentation/mcp.py +0 -259
hud/telemetry/instrumentation/registry.py +0 -59
hud/telemetry/mcp_models.py +0 -270
hud/telemetry/tests/__init__.py +0 -1
hud/telemetry/tests/test_context.py +0 -210
hud/telemetry/tests/test_trace.py +0 -312
hud/tools/helper/README.md +0 -56
hud/tools/helper/__init__.py +0 -9
hud/tools/helper/mcp_server.py +0 -78
hud/tools/helper/server_initialization.py +0 -115
hud/tools/helper/utils.py +0 -58
hud/trajectory.py +0 -94
hud/utils/agent.py +0 -37
hud/utils/common.py +0 -256
hud/utils/config.py +0 -120
hud/utils/deprecation.py +0 -115
hud/utils/misc.py +0 -53
hud/utils/tests/test_common.py +0 -277
hud/utils/tests/test_config.py +0 -129
hud_python-0.3.4.dist-info/METADATA +0 -284
hud_python-0.3.4.dist-info/RECORD +0 -120
/hud/{adapters/common → shared}/tests/__init__.py +0 -0
{hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0

hud/otel/exporters.py ADDED Viewed

@@ -0,0 +1,366 @@
+"""Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
+HTTP endpoint (/trace/<id>/telemetry-upload).
+The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
+exactly the same semantics the old async worker in ``hud.telemetry.exporter``
+implemented.
+This exporter is *synchronous* (derives from :class:`SpanExporter`).  We rely on
+``hud.shared.make_request_sync`` which already contains retry & auth logic.
+"""
+from __future__ import annotations
+import contextlib
+import json
+import logging
+from collections import defaultdict
+from datetime import UTC, datetime
+from typing import TYPE_CHECKING, Any
+from mcp.types import ClientRequest, ServerResult
+from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
+from pydantic import BaseModel, ConfigDict, Field
+from hud.shared import make_request_sync
+from hud.types import TraceStep as HudSpanAttributes
+if TYPE_CHECKING:
+    from opentelemetry.sdk.trace import ReadableSpan
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Models
+# ---------------------------------------------------------------------------
+class HudSpan(BaseModel):
+    """A telemetry span ready for export."""
+    name: str
+    trace_id: str = Field(pattern=r"^[0-9a-fA-F]{32}$")
+    span_id: str = Field(pattern=r"^[0-9a-fA-F]{16}$")
+    parent_span_id: str | None = Field(None, pattern=r"^[0-9a-fA-F]{16}$")
+    start_time: str  # ISO format
+    end_time: str  # ISO format
+    status_code: str  # "UNSET", "OK", "ERROR"
+    status_message: str | None = None
+    attributes: HudSpanAttributes
+    exceptions: list[dict[str, Any]] | None = None
+    model_config = ConfigDict(extra="forbid")
+def extract_span_attributes(
+    attrs: dict[str, Any], method_name: str | None = None, span_name: str | None = None
+) -> HudSpanAttributes:
+    """Extract and parse span attributes into typed model.
+    This handles:
+    - Detecting span type (MCP vs Agent)
+    - Renaming verbose OpenTelemetry semantic conventions
+    - Parsing JSON strings to MCP types
+    """
+    # Start with core attributes - map to TraceStep field names
+    result_attrs = {
+        "task_run_id": attrs.get(
+            "hud.task_run_id"
+        ),  # TraceStep expects task_run_id, not hud.task_run_id
+        "job_id": attrs.get("hud.job_id"),  # TraceStep expects job_id, not hud.job_id
+        "type": attrs.get("span.kind", "CLIENT"),  # TraceStep expects type, not span.kind
+    }
+    # Determine span type based on presence of agent or MCP attributes
+    # Note: The input attrs might already have "category" set
+    existing_category = attrs.get("category")
+    if existing_category:
+        # Use the explicit category if provided
+        result_attrs["category"] = existing_category
+    elif span_name and span_name.startswith("agent."):
+        # Legacy support for spans named "agent.*"
+        result_attrs["category"] = "agent"
+    else:
+        result_attrs["category"] = "mcp"  # Default to MCP
+    # No special processing needed for different categories
+    # The backend will handle them based on the category field
+    # Add method_name and request_id for MCP spans
+    if result_attrs["category"] == "mcp":
+        if method_name:
+            result_attrs["method_name"] = method_name
+        # Check for request_id with and without semconv_ai prefix
+        request_id = attrs.get("semconv_ai.mcp.request_id") or attrs.get("mcp.request.id")
+        if request_id:
+            result_attrs["request_id"] = request_id
+    # Parse input/output - check both with and without semconv_ai prefix
+    input_str = attrs.get("semconv_ai.traceloop.entity.input") or attrs.get(
+        "traceloop.entity.input"
+    )
+    output_str = attrs.get("semconv_ai.traceloop.entity.output") or attrs.get(
+        "traceloop.entity.output"
+    )
+    logger.debug(
+        "Category: %s, has input: %s, has output: %s",
+        result_attrs.get("category"),
+        bool(input_str),
+        bool(output_str),
+    )
+    # Check for direct request/result attributes first
+    if "request" in attrs and not result_attrs.get("request"):
+        req = attrs["request"]
+        if isinstance(req, str):
+            with contextlib.suppress(json.JSONDecodeError):
+                req = json.loads(req)
+        result_attrs["request"] = req
+    if "result" in attrs and not result_attrs.get("result"):
+        res = attrs["result"]
+        if isinstance(res, str):
+            with contextlib.suppress(json.JSONDecodeError):
+                res = json.loads(res)
+        result_attrs["result"] = res
+    # Process input/output from MCP instrumentation
+    if input_str and not result_attrs.get("request"):
+        try:
+            input_data = json.loads(input_str) if isinstance(input_str, str) else input_str
+            # For MCP category, try to parse as ClientRequest to extract the root
+            if result_attrs["category"] == "mcp" and isinstance(input_data, dict):
+                try:
+                    if "method" in input_data and "params" in input_data:
+                        client_request = ClientRequest.model_validate(input_data)
+                        result_attrs["request"] = client_request.root
+                    else:
+                        result_attrs["request"] = input_data
+                except Exception:
+                    result_attrs["request"] = input_data
+            else:
+                # For all other categories, just store the data
+                result_attrs["request"] = input_data
+        except Exception as e:
+            logger.debug("Failed to parse request JSON: %s", e)
+    if output_str and not result_attrs.get("result"):
+        try:
+            output_data = json.loads(output_str) if isinstance(output_str, str) else output_str
+            # For MCP category, try to parse as ServerResult to extract the root
+            if result_attrs["category"] == "mcp" and isinstance(output_data, dict):
+                # Check for error
+                if "error" in output_data:
+                    result_attrs["mcp_error"] = True
+                try:
+                    server_result = ServerResult.model_validate(output_data)
+                    result_attrs["result"] = server_result.root
+                    # Check for isError in the result
+                    if getattr(server_result.root, "isError", False):
+                        result_attrs["mcp_error"] = True
+                except Exception:
+                    result_attrs["result"] = output_data
+            else:
+                # For all other categories, just store the data
+                result_attrs["result"] = output_data
+        except Exception as e:
+            logger.debug("Failed to parse result JSON: %s", e)
+    # Don't include the verbose attributes or ones we've already processed
+    exclude_keys = {
+        "hud.task_run_id",
+        "hud.job_id",
+        "span.kind",
+        "semconv_ai.mcp.method_name",
+        "mcp.method.name",  # Also exclude non-prefixed version
+        "semconv_ai.mcp.request_id",
+        "mcp.request.id",  # Also exclude non-prefixed version
+        "semconv_ai.traceloop.entity.input",
+        "semconv_ai.traceloop.entity.output",
+        "traceloop.entity.input",  # Also exclude non-prefixed versions
+        "traceloop.entity.output",
+        "mcp_request",  # Exclude to prevent overwriting parsed values
+        "mcp_result",  # Exclude to prevent overwriting parsed values
+        "request",  # Exclude to prevent overwriting parsed values
+        "result",  # Exclude to prevent overwriting parsed values
+        "category",  # Already handled above
+    }
+    # Add any extra attributes
+    for key, value in attrs.items():
+        if key not in exclude_keys:
+            result_attrs[key] = value  # noqa: PERF403
+    logger.debug(
+        """Final result_attrs before creating HudSpanAttributes:
+        request=%s,
+        result=%s""",
+        result_attrs.get("request"),
+        result_attrs.get("result"),
+    )
+    return HudSpanAttributes(**result_attrs)
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _ts_ns_to_iso(ts_ns: int) -> str:
+    """Convert a ``Span`` timestamp (nanoseconds) to ISO-8601 string."""
+    # OpenTelemetry times are epoch nanoseconds
+    dt = datetime.fromtimestamp(ts_ns / 1_000_000_000, tz=UTC)
+    return dt.isoformat().replace("+00:00", "Z")
+def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
+    """Convert an OpenTelemetry span to a dict using typed models."""
+    attrs = dict(span.attributes or {})
+    # Extract method name from span name if not in attributes
+    # Check both with and without semconv_ai prefix
+    raw_method = attrs.get("semconv_ai.mcp.method_name") or attrs.get("mcp.method.name")
+    method_name: str | None = None
+    if isinstance(raw_method, str):
+        method_name = raw_method
+    if method_name is None and isinstance(span.name, str) and span.name.endswith(".mcp"):
+        method_name = span.name[:-4]  # Remove .mcp suffix
+    # Create typed attributes
+    typed_attrs = extract_span_attributes(attrs, method_name, str(span.name))
+    # Record span kind as extra attribute (TraceStep allows extras)
+    try:
+        typed_attrs.span_kind = span.kind.name  # type: ignore[attr-defined]
+    except Exception:
+        logger.warning("Failed to set span kind attribute")
+    # Build typed span
+    # Guard context/parent/timestamps
+    context = getattr(span, "context", None)
+    trace_id_hex = (
+        format(context.trace_id, "032x") if context and hasattr(context, "trace_id") else "0" * 32
+    )
+    span_id_hex = (
+        format(context.span_id, "016x") if context and hasattr(context, "span_id") else "0" * 16
+    )
+    parent = getattr(span, "parent", None)
+    parent_id_hex = (
+        format(parent.span_id, "016x") if parent and hasattr(parent, "span_id") else None
+    )
+    start_ns = span.start_time or 0
+    end_ns = span.end_time or start_ns
+    typed_span = HudSpan(
+        name=span.name,
+        trace_id=trace_id_hex,
+        span_id=span_id_hex,
+        parent_span_id=parent_id_hex,
+        start_time=_ts_ns_to_iso(int(start_ns)),
+        end_time=_ts_ns_to_iso(int(end_ns)),
+        status_code=span.status.status_code.name if span.status else "UNSET",
+        status_message=span.status.description if span.status else None,
+        attributes=typed_attrs,
+        exceptions=None,
+    )
+    # Add error information if present
+    if span.events:
+        exceptions = []
+        exceptions = [
+            {
+                "timestamp": _ts_ns_to_iso(event.timestamp),
+                "attributes": dict(event.attributes or {}),
+            }
+            for event in span.events
+        ]
+        if exceptions:
+            typed_span.exceptions = exceptions
+    # Convert to dict for export
+    return typed_span.model_dump(mode="json", by_alias=True, exclude_none=True)
+# ---------------------------------------------------------------------------
+# Exporter
+# ---------------------------------------------------------------------------
+class HudSpanExporter(SpanExporter):
+    """Exporter that forwards spans to HUD backend using existing endpoint."""
+    def __init__(self, *, telemetry_url: str, api_key: str) -> None:
+        super().__init__()
+        self._telemetry_url = telemetry_url.rstrip("/")
+        self._api_key = api_key
+    # ------------------------------------------------------------------
+    # Core API
+    # ------------------------------------------------------------------
+    def export(self, spans: list[ReadableSpan]) -> SpanExportResult:  # type: ignore[override]
+        if not spans:
+            return SpanExportResult.SUCCESS
+        # Group spans by hud.task_run_id attribute
+        grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
+        for span in spans:
+            run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
+            if not run_id:
+                # Skip spans that are outside HUD traces
+                continue
+            grouped[str(run_id)].append(span)
+        # Send each group synchronously (retry inside make_request_sync)
+        for run_id, span_batch in grouped.items():
+            try:
+                url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
+                telemetry_spans = [_span_to_dict(s) for s in span_batch]
+                # Include current step count in metadata
+                metadata = {}
+                # Get the HIGHEST step count from the batch (most recent)
+                step_count = 0
+                for span in span_batch:
+                    if span.attributes and "hud.step_count" in span.attributes:
+                        current_step = span.attributes["hud.step_count"]
+                        if isinstance(current_step, int) and current_step > step_count:
+                            step_count = current_step
+                payload = {
+                    "metadata": metadata,
+                    "telemetry": telemetry_spans,
+                }
+                # Only include step_count if we found any steps
+                if step_count > 0:
+                    payload["step_count"] = step_count
+                logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
+                make_request_sync(
+                    method="POST",
+                    url=url,
+                    json=payload,
+                    api_key=self._api_key,
+                )
+            except Exception as exc:
+                logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
+                # If *any* group fails we return FAILURE so the OTEL SDK can retry
+                return SpanExportResult.FAILURE
+        return SpanExportResult.SUCCESS
+    def shutdown(self) -> None:  # type: ignore[override]
+        # Nothing to cleanup, httpx handled inside make_request_sync
+        pass
+    def force_flush(self, timeout_millis: int | None = None) -> bool:  # type: ignore[override]
+        # Synchronous export, nothing buffered here
+        return True

hud/otel/instrumentation.py ADDED Viewed

@@ -0,0 +1,97 @@
+"""MCP instrumentation support for HUD.
+This module provides functions to enable MCP OpenTelemetry instrumentation
+for automatic tracing of MCP protocol communication.
+"""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from collections.abc import AsyncGenerator, Callable
+    from opentelemetry.trace import TracerProvider
+logger = logging.getLogger(__name__)
+LIFECYCLE_TOOLS = {"setup", "evaluate"}
+def install_mcp_instrumentation(provider: TracerProvider) -> None:
+    """Enable community MCP OpenTelemetry instrumentation if present.
+    Args:
+        provider: The TracerProvider to use for instrumentation
+    """
+    import logging
+    logger = logging.getLogger(__name__)
+    try:
+        from opentelemetry.instrumentation.mcp.instrumentation import (
+            McpInstrumentor,
+        )
+        # First, patch the instrumentation to handle 3-value transports correctly
+        _patch_mcp_instrumentation()
+        McpInstrumentor().instrument(tracer_provider=provider)
+        logger.debug("MCP instrumentation installed with fastmcp compatibility patch")
+    except ImportError:
+        logger.debug("opentelemetry-instrumentation-mcp not available, skipping")
+    except Exception as exc:
+        logger.warning("Failed to install MCP instrumentation: %s", exc)
+def _patch_mcp_instrumentation() -> None:
+    """Patch MCP instrumentation to handle 3-value transport yields correctly."""
+    from contextlib import asynccontextmanager
+    try:
+        from opentelemetry.instrumentation.mcp.instrumentation import McpInstrumentor
+        def patched_transport_wrapper(self: Any, tracer: Any) -> Callable[..., Any]:
+            @asynccontextmanager
+            async def traced_method(
+                wrapped: Callable[..., Any], instance: Any, args: Any, kwargs: Any
+            ) -> AsyncGenerator[Any, None]:
+                async with wrapped(*args, **kwargs) as result:
+                    # Check if we got a tuple with 3 values
+                    if isinstance(result, tuple) and len(result) == 3:
+                        read_stream, write_stream, third_value = result
+                        # Import here to avoid circular imports
+                        from opentelemetry.instrumentation.mcp.instrumentation import (
+                            InstrumentedStreamReader,
+                            InstrumentedStreamWriter,
+                        )
+                        yield (
+                            InstrumentedStreamReader(read_stream, tracer),
+                            InstrumentedStreamWriter(write_stream, tracer),
+                            third_value,
+                        )
+                    else:
+                        # Fall back to 2-value case
+                        read_stream, write_stream = result
+                        from opentelemetry.instrumentation.mcp.instrumentation import (
+                            InstrumentedStreamReader,
+                            InstrumentedStreamWriter,
+                        )
+                        yield (
+                            InstrumentedStreamReader(read_stream, tracer),
+                            InstrumentedStreamWriter(write_stream, tracer),
+                        )
+            return traced_method
+        # Apply the patch
+        McpInstrumentor._transport_wrapper = patched_transport_wrapper
+    except Exception as e:
+        import logging
+        logger = logging.getLogger(__name__)
+        logger.warning("Failed to patch MCP instrumentation: %s", e)

hud/otel/processors.py ADDED Viewed

@@ -0,0 +1,118 @@
+from __future__ import annotations
+import logging
+from typing import Any
+from opentelemetry import baggage
+from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
+from .context import (
+    get_agent_steps,
+    get_base_mcp_steps,
+    get_mcp_tool_steps,
+    increment_agent_steps,
+    increment_base_mcp_steps,
+    increment_mcp_tool_steps,
+)
+logger = logging.getLogger(__name__)
+class HudEnrichmentProcessor(SpanProcessor):
+    """Span processor that enriches every span with HUD-specific context.
+    • Adds ``hud.task_run_id`` attribute if available.
+    • Adds ``hud.job_id`` attribute if available in baggage.
+    • Adds ``hud.step_count`` attribute if available in baggage.
+    """
+    def __init__(self) -> None:
+        # No state, everything comes from context vars
+        super().__init__()
+    # --- callback hooks -------------------------------------------------
+    def on_start(self, span: Span, parent_context: Any) -> None:  # type: ignore[override]
+        try:
+            # Get task_run_id from baggage in parent context
+            run_id = baggage.get_baggage("hud.task_run_id", context=parent_context)
+            if run_id and span.is_recording():
+                span.set_attribute("hud.task_run_id", str(run_id))
+            # Get job_id from baggage if available
+            job_id = baggage.get_baggage("hud.job_id", context=parent_context)
+            if job_id and span.is_recording():
+                span.set_attribute("hud.job_id", str(job_id))
+            # Check what type of step this is and increment appropriate counters
+            if span.is_recording():
+                step_type = self._get_step_type(span)
+                if step_type == "agent":
+                    # Increment agent steps
+                    new_agent_count = increment_agent_steps()
+                    span.set_attribute("hud.agent_steps", new_agent_count)
+                    logger.debug("Incremented agent steps to %d", new_agent_count)
+                elif step_type == "base_mcp":
+                    # Increment base MCP steps
+                    new_base_count = increment_base_mcp_steps()
+                    span.set_attribute("hud.base_mcp_steps", new_base_count)
+                    logger.debug("Incremented base MCP steps to %d", new_base_count)
+                elif step_type == "mcp_tool":
+                    # Increment both base MCP and MCP tool steps
+                    new_base_count = increment_base_mcp_steps()
+                    new_tool_count = increment_mcp_tool_steps()
+                    span.set_attribute("hud.base_mcp_steps", new_base_count)
+                    span.set_attribute("hud.mcp_tool_steps", new_tool_count)
+                    logger.debug(
+                        "Incremented MCP steps to base=%d, tool=%d", new_base_count, new_tool_count
+                    )
+                # Always set all current step counts on the span
+                span.set_attribute("hud.base_mcp_steps", get_base_mcp_steps())
+                span.set_attribute("hud.mcp_tool_steps", get_mcp_tool_steps())
+                span.set_attribute("hud.agent_steps", get_agent_steps())
+        except Exception as exc:  # defensive; never fail the tracer
+            logger.debug("HudEnrichmentProcessor.on_start error: %s", exc, exc_info=False)
+    def _get_step_type(self, span: Span) -> str | None:
+        """Determine what type of step this span represents.
+        Returns:
+            'base_mcp' for any MCP span
+            'mcp_tool' for MCP tool calls (tools/call.mcp)
+            'agent' for agent spans
+            None if not a step
+        """
+        # Check span attributes
+        attrs = span.attributes or {}
+        span_name = span.name
+        # Check for agent steps (instrumented with span_type="agent")
+        if attrs.get("category") == "agent":
+            return "agent"
+        # Check span name pattern for MCP calls
+        if span_name:
+            # tools/call.mcp is an mcp_tool step
+            if span_name == "tools/call.mcp":
+                return "mcp_tool"
+            # Any other .mcp suffixed span is a base MCP step
+            elif span_name.endswith(".mcp"):
+                return "base_mcp"
+        return None
+    def on_end(self, span: ReadableSpan) -> None:
+        # Nothing to do enrichment is on_start only
+        pass
+    # Required to fully implement abstract base, but we don't batch spans
+    def shutdown(self) -> None:  # type: ignore[override]
+        pass
+    def force_flush(self, timeout_millis: int | None = None) -> bool:  # type: ignore[override]
+        return True

hud/otel/tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Tests for OpenTelemetry integration."""

hud-python 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

hud-python 0.3.4py3-none-any.whl → 0.4.0py3-none-any.whl