PyPI - hud-python - Versions diffs - 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (274) hide show

hud/__init__.py +27 -7
hud/agents/__init__.py +11 -5
hud/agents/base.py +220 -500
hud/agents/claude.py +200 -240
hud/agents/gemini.py +275 -0
hud/agents/gemini_cua.py +335 -0
hud/agents/grounded_openai.py +98 -100
hud/agents/misc/integration_test_agent.py +51 -20
hud/agents/misc/response_agent.py +41 -36
hud/agents/openai.py +291 -292
hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
hud/agents/operator.py +211 -0
hud/agents/tests/conftest.py +133 -0
hud/agents/tests/test_base.py +300 -622
hud/agents/tests/test_base_runtime.py +233 -0
hud/agents/tests/test_claude.py +379 -210
hud/agents/tests/test_client.py +9 -10
hud/agents/tests/test_gemini.py +369 -0
hud/agents/tests/test_grounded_openai_agent.py +65 -50
hud/agents/tests/test_openai.py +376 -140
hud/agents/tests/test_operator.py +362 -0
hud/agents/tests/test_run_eval.py +179 -0
hud/cli/__init__.py +461 -545
hud/cli/analyze.py +43 -5
hud/cli/build.py +664 -110
hud/cli/debug.py +8 -5
hud/cli/dev.py +882 -734
hud/cli/eval.py +782 -668
hud/cli/flows/dev.py +167 -0
hud/cli/flows/init.py +191 -0
hud/cli/flows/tasks.py +153 -56
hud/cli/flows/templates.py +151 -0
hud/cli/flows/tests/__init__.py +1 -0
hud/cli/flows/tests/test_dev.py +126 -0
hud/cli/init.py +60 -58
hud/cli/push.py +29 -11
hud/cli/rft.py +311 -0
hud/cli/rft_status.py +145 -0
hud/cli/tests/test_analyze.py +5 -5
hud/cli/tests/test_analyze_metadata.py +3 -2
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +108 -6
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_init.py +6 -1
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +140 -0
hud/cli/tests/test_convert.py +361 -0
hud/cli/tests/test_debug.py +12 -10
hud/cli/tests/test_dev.py +197 -0
hud/cli/tests/test_eval.py +251 -0
hud/cli/tests/test_eval_bedrock.py +51 -0
hud/cli/tests/test_init.py +124 -0
hud/cli/tests/test_main_module.py +11 -5
hud/cli/tests/test_mcp_server.py +12 -100
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/tests/test_registry.py +1 -1
hud/cli/tests/test_utils.py +1 -1
hud/cli/{rl → utils}/celebrate.py +14 -12
hud/cli/utils/config.py +18 -1
hud/cli/utils/docker.py +130 -4
hud/cli/utils/env_check.py +9 -9
hud/cli/utils/git.py +136 -0
hud/cli/utils/interactive.py +39 -5
hud/cli/utils/metadata.py +69 -0
hud/cli/utils/runner.py +1 -1
hud/cli/utils/server.py +2 -2
hud/cli/utils/source_hash.py +3 -3
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_git.py +142 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +258 -0
hud/cli/{rl → utils}/viewer.py +2 -2
hud/clients/README.md +12 -11
hud/clients/__init__.py +4 -3
hud/clients/base.py +166 -26
hud/clients/environment.py +51 -0
hud/clients/fastmcp.py +13 -6
hud/clients/mcp_use.py +40 -15
hud/clients/tests/test_analyze_scenarios.py +206 -0
hud/clients/tests/test_protocol.py +9 -3
hud/datasets/__init__.py +23 -20
hud/datasets/loader.py +327 -0
hud/datasets/runner.py +192 -105
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_loader.py +221 -0
hud/datasets/tests/test_utils.py +315 -0
hud/datasets/utils.py +270 -90
hud/environment/__init__.py +50 -0
hud/environment/connection.py +206 -0
hud/environment/connectors/__init__.py +33 -0
hud/environment/connectors/base.py +68 -0
hud/environment/connectors/local.py +177 -0
hud/environment/connectors/mcp_config.py +109 -0
hud/environment/connectors/openai.py +101 -0
hud/environment/connectors/remote.py +172 -0
hud/environment/environment.py +694 -0
hud/environment/integrations/__init__.py +45 -0
hud/environment/integrations/adk.py +67 -0
hud/environment/integrations/anthropic.py +196 -0
hud/environment/integrations/gemini.py +92 -0
hud/environment/integrations/langchain.py +82 -0
hud/environment/integrations/llamaindex.py +68 -0
hud/environment/integrations/openai.py +238 -0
hud/environment/mock.py +306 -0
hud/environment/router.py +112 -0
hud/environment/scenarios.py +493 -0
hud/environment/tests/__init__.py +1 -0
hud/environment/tests/test_connection.py +317 -0
hud/environment/tests/test_connectors.py +218 -0
hud/environment/tests/test_environment.py +161 -0
hud/environment/tests/test_integrations.py +257 -0
hud/environment/tests/test_local_connectors.py +201 -0
hud/environment/tests/test_scenarios.py +280 -0
hud/environment/tests/test_tools.py +208 -0
hud/environment/types.py +23 -0
hud/environment/utils/__init__.py +35 -0
hud/environment/utils/formats.py +215 -0
hud/environment/utils/schema.py +171 -0
hud/environment/utils/tool_wrappers.py +113 -0
hud/eval/__init__.py +67 -0
hud/eval/context.py +674 -0
hud/eval/display.py +299 -0
hud/eval/instrument.py +185 -0
hud/eval/manager.py +466 -0
hud/eval/parallel.py +268 -0
hud/eval/task.py +340 -0
hud/eval/tests/__init__.py +1 -0
hud/eval/tests/test_context.py +178 -0
hud/eval/tests/test_eval.py +210 -0
hud/eval/tests/test_manager.py +152 -0
hud/eval/tests/test_parallel.py +168 -0
hud/eval/tests/test_task.py +145 -0
hud/eval/types.py +63 -0
hud/eval/utils.py +183 -0
hud/patches/__init__.py +19 -0
hud/patches/mcp_patches.py +151 -0
hud/patches/warnings.py +54 -0
hud/samples/browser.py +4 -4
hud/server/__init__.py +2 -1
hud/server/low_level.py +2 -1
hud/server/router.py +164 -0
hud/server/server.py +567 -80
hud/server/tests/test_mcp_server_integration.py +11 -11
hud/server/tests/test_mcp_server_more.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/settings.py +45 -3
hud/shared/exceptions.py +36 -10
hud/shared/hints.py +26 -1
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +40 -31
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +20 -19
hud/telemetry/exporter.py +201 -0
hud/telemetry/instrument.py +158 -253
hud/telemetry/tests/test_eval_telemetry.py +356 -0
hud/telemetry/tests/test_exporter.py +258 -0
hud/telemetry/tests/test_instrument.py +401 -0
hud/tools/__init__.py +16 -2
hud/tools/apply_patch.py +639 -0
hud/tools/base.py +54 -4
hud/tools/bash.py +2 -2
hud/tools/computer/__init__.py +4 -0
hud/tools/computer/anthropic.py +2 -2
hud/tools/computer/gemini.py +385 -0
hud/tools/computer/hud.py +23 -6
hud/tools/computer/openai.py +20 -21
hud/tools/computer/qwen.py +434 -0
hud/tools/computer/settings.py +37 -0
hud/tools/edit.py +3 -7
hud/tools/executors/base.py +4 -2
hud/tools/executors/pyautogui.py +1 -1
hud/tools/grounding/grounded_tool.py +13 -18
hud/tools/grounding/grounder.py +10 -31
hud/tools/grounding/tests/test_grounded_tool.py +26 -44
hud/tools/jupyter.py +330 -0
hud/tools/playwright.py +18 -3
hud/tools/shell.py +308 -0
hud/tools/tests/test_apply_patch.py +718 -0
hud/tools/tests/test_computer.py +4 -9
hud/tools/tests/test_computer_actions.py +24 -2
hud/tools/tests/test_jupyter_tool.py +181 -0
hud/tools/tests/test_shell.py +596 -0
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/tools/types.py +21 -1
hud/types.py +167 -57
hud/utils/__init__.py +2 -0
hud/utils/env.py +67 -0
hud/utils/hud_console.py +61 -3
hud/utils/mcp.py +15 -58
hud/utils/strict_schema.py +162 -0
hud/utils/tests/test_init.py +1 -2
hud/utils/tests/test_mcp.py +1 -28
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/utils/types.py +20 -0
hud/version.py +1 -1
hud_python-0.5.1.dist-info/METADATA +264 -0
hud_python-0.5.1.dist-info/RECORD +299 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
hud/agents/langchain.py +0 -261
hud/agents/lite_llm.py +0 -72
hud/cli/rl/__init__.py +0 -180
hud/cli/rl/config.py +0 -101
hud/cli/rl/display.py +0 -133
hud/cli/rl/gpu.py +0 -63
hud/cli/rl/gpu_utils.py +0 -321
hud/cli/rl/local_runner.py +0 -595
hud/cli/rl/presets.py +0 -96
hud/cli/rl/remote_runner.py +0 -463
hud/cli/rl/rl_api.py +0 -150
hud/cli/rl/vllm.py +0 -177
hud/cli/rl/wait_utils.py +0 -89
hud/datasets/parallel.py +0 -687
hud/misc/__init__.py +0 -1
hud/misc/claude_plays_pokemon.py +0 -292
hud/otel/__init__.py +0 -35
hud/otel/collector.py +0 -142
hud/otel/config.py +0 -181
hud/otel/context.py +0 -570
hud/otel/exporters.py +0 -369
hud/otel/instrumentation.py +0 -135
hud/otel/processors.py +0 -121
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_processors.py +0 -197
hud/rl/README.md +0 -30
hud/rl/__init__.py +0 -1
hud/rl/actor.py +0 -176
hud/rl/buffer.py +0 -405
hud/rl/chat_template.jinja +0 -101
hud/rl/config.py +0 -192
hud/rl/distributed.py +0 -132
hud/rl/learner.py +0 -637
hud/rl/tests/__init__.py +0 -1
hud/rl/tests/test_learner.py +0 -186
hud/rl/train.py +0 -382
hud/rl/types.py +0 -101
hud/rl/utils/start_vllm_server.sh +0 -30
hud/rl/utils.py +0 -524
hud/rl/vllm_adapter.py +0 -143
hud/telemetry/job.py +0 -352
hud/telemetry/replay.py +0 -74
hud/telemetry/tests/test_replay.py +0 -40
hud/telemetry/tests/test_trace.py +0 -63
hud/telemetry/trace.py +0 -158
hud/utils/agent_factories.py +0 -86
hud/utils/async_utils.py +0 -65
hud/utils/group_eval.py +0 -223
hud/utils/progress.py +0 -149
hud/utils/tasks.py +0 -127
hud/utils/tests/test_async_utils.py +0 -173
hud/utils/tests/test_progress.py +0 -261
hud_python-0.4.45.dist-info/METADATA +0 -552
hud_python-0.4.45.dist-info/RECORD +0 -228
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0

hud/otel/exporters.py DELETED Viewed

@@ -1,369 +0,0 @@
-"""Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
-HTTP endpoint (/trace/<id>/telemetry-upload).
-The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
-exactly the same semantics the old async worker in ``hud.telemetry.exporter``
-implemented.
-This exporter is *synchronous* (derives from :class:`SpanExporter`).  We rely on
-``hud.shared.make_request_sync`` which already contains retry & auth logic.
-"""
-from __future__ import annotations
-import contextlib
-import json
-import logging
-import time
-from collections import defaultdict
-from datetime import UTC, datetime
-from typing import TYPE_CHECKING, Any
-from mcp.types import ClientRequest, ServerResult
-from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
-from pydantic import BaseModel, ConfigDict, Field
-from hud.shared import make_request_sync
-from hud.types import TraceStep as HudSpanAttributes
-if TYPE_CHECKING:
-    from opentelemetry.sdk.trace import ReadableSpan
-logger = logging.getLogger(__name__)
-# ---------------------------------------------------------------------------
-# Models
-# ---------------------------------------------------------------------------
-class HudSpan(BaseModel):
-    """A telemetry span ready for export."""
-    name: str
-    trace_id: str = Field(pattern=r"^[0-9a-fA-F]{32}$")
-    span_id: str = Field(pattern=r"^[0-9a-fA-F]{16}$")
-    parent_span_id: str | None = Field(None, pattern=r"^[0-9a-fA-F]{16}$")
-    start_time: str  # ISO format
-    end_time: str  # ISO format
-    status_code: str  # "UNSET", "OK", "ERROR"
-    status_message: str | None = None
-    attributes: HudSpanAttributes
-    exceptions: list[dict[str, Any]] | None = None
-    model_config = ConfigDict(extra="forbid")
-def extract_span_attributes(
-    attrs: dict[str, Any], method_name: str | None = None, span_name: str | None = None
-) -> HudSpanAttributes:
-    """Extract and parse span attributes into typed model.
-    This handles:
-    - Detecting span type (MCP vs Agent)
-    - Renaming verbose OpenTelemetry semantic conventions
-    - Parsing JSON strings to MCP types
-    """
-    # Start with core attributes - map to TraceStep field names
-    result_attrs = {
-        "task_run_id": attrs.get(
-            "hud.task_run_id"
-        ),  # TraceStep expects task_run_id, not hud.task_run_id
-        "job_id": attrs.get("hud.job_id"),  # TraceStep expects job_id, not hud.job_id
-        "type": attrs.get("span.kind", "CLIENT"),  # TraceStep expects type, not span.kind
-    }
-    # Determine span type based on presence of agent or MCP attributes
-    # Note: The input attrs might already have "category" set
-    existing_category = attrs.get("category")
-    if existing_category:
-        # Use the explicit category if provided
-        result_attrs["category"] = existing_category
-    elif span_name and span_name.startswith("agent."):
-        # Legacy support for spans named "agent.*"
-        result_attrs["category"] = "agent"
-    else:
-        result_attrs["category"] = "mcp"  # Default to MCP
-    # No special processing needed for different categories
-    # The backend will handle them based on the category field
-    # Add method_name and request_id for MCP spans
-    if result_attrs["category"] == "mcp":
-        if method_name:
-            result_attrs["method_name"] = method_name
-        # Check for request_id with and without semconv_ai prefix
-        request_id = attrs.get("semconv_ai.mcp.request_id") or attrs.get("mcp.request.id")
-        if request_id:
-            result_attrs["request_id"] = request_id
-    # Parse input/output - check both with and without semconv_ai prefix
-    input_str = attrs.get("semconv_ai.traceloop.entity.input") or attrs.get(
-        "traceloop.entity.input"
-    )
-    output_str = attrs.get("semconv_ai.traceloop.entity.output") or attrs.get(
-        "traceloop.entity.output"
-    )
-    logger.debug(
-        "Category: %s, has input: %s, has output: %s",
-        result_attrs.get("category"),
-        bool(input_str),
-        bool(output_str),
-    )
-    # Check for direct request/result attributes first
-    if "request" in attrs and not result_attrs.get("request"):
-        req = attrs["request"]
-        if isinstance(req, str):
-            with contextlib.suppress(json.JSONDecodeError):
-                req = json.loads(req)
-        result_attrs["request"] = req
-    if "result" in attrs and not result_attrs.get("result"):
-        res = attrs["result"]
-        if isinstance(res, str):
-            with contextlib.suppress(json.JSONDecodeError):
-                res = json.loads(res)
-        result_attrs["result"] = res
-    # Process input/output from MCP instrumentation
-    if input_str and not result_attrs.get("request"):
-        try:
-            input_data = json.loads(input_str) if isinstance(input_str, str) else input_str
-            # For MCP category, try to parse as ClientRequest to extract the root
-            if result_attrs["category"] == "mcp" and isinstance(input_data, dict):
-                try:
-                    if "method" in input_data and "params" in input_data:
-                        client_request = ClientRequest.model_validate(input_data)
-                        result_attrs["request"] = client_request.root
-                    else:
-                        result_attrs["request"] = input_data
-                except Exception:
-                    result_attrs["request"] = input_data
-            else:
-                # For all other categories, just store the data
-                result_attrs["request"] = input_data
-        except Exception as e:
-            logger.debug("Failed to parse request JSON: %s", e)
-    if output_str and not result_attrs.get("result"):
-        try:
-            output_data = json.loads(output_str) if isinstance(output_str, str) else output_str
-            # For MCP category, try to parse as ServerResult to extract the root
-            if result_attrs["category"] == "mcp" and isinstance(output_data, dict):
-                # Check for error
-                if "error" in output_data:
-                    result_attrs["mcp_error"] = True
-                try:
-                    server_result = ServerResult.model_validate(output_data)
-                    result_attrs["result"] = server_result.root
-                    # Check for isError in the result
-                    if getattr(server_result.root, "isError", False):
-                        result_attrs["mcp_error"] = True
-                except Exception:
-                    result_attrs["result"] = output_data
-            else:
-                # For all other categories, just store the data
-                result_attrs["result"] = output_data
-        except Exception as e:
-            logger.debug("Failed to parse result JSON: %s", e)
-    # Don't include the verbose attributes or ones we've already processed
-    exclude_keys = {
-        "hud.task_run_id",
-        "hud.job_id",
-        "span.kind",
-        "semconv_ai.mcp.method_name",
-        "mcp.method.name",  # Also exclude non-prefixed version
-        "semconv_ai.mcp.request_id",
-        "mcp.request.id",  # Also exclude non-prefixed version
-        "semconv_ai.traceloop.entity.input",
-        "semconv_ai.traceloop.entity.output",
-        "traceloop.entity.input",  # Also exclude non-prefixed versions
-        "traceloop.entity.output",
-        "mcp_request",  # Exclude to prevent overwriting parsed values
-        "mcp_result",  # Exclude to prevent overwriting parsed values
-        "request",  # Exclude to prevent overwriting parsed values
-        "result",  # Exclude to prevent overwriting parsed values
-        "category",  # Already handled above
-    }
-    # Add any extra attributes
-    for key, value in attrs.items():
-        if key not in exclude_keys:
-            result_attrs[key] = value  # noqa: PERF403
-    logger.debug(
-        """Final result_attrs before creating HudSpanAttributes:
-        request=%s,
-        result=%s""",
-        result_attrs.get("request"),
-        result_attrs.get("result"),
-    )
-    return HudSpanAttributes(**result_attrs)
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-def _ts_ns_to_iso(ts_ns: int) -> str:
-    """Convert a ``Span`` timestamp (nanoseconds) to ISO-8601 string."""
-    # OpenTelemetry times are epoch nanoseconds
-    dt = datetime.fromtimestamp(ts_ns / 1_000_000_000, tz=UTC)
-    return dt.isoformat().replace("+00:00", "Z")
-def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
-    """Convert an OpenTelemetry span to a dict using typed models."""
-    attrs = dict(span.attributes or {})
-    # Extract method name from span name if not in attributes
-    # Check both with and without semconv_ai prefix
-    raw_method = attrs.get("semconv_ai.mcp.method_name") or attrs.get("mcp.method.name")
-    method_name: str | None = None
-    if isinstance(raw_method, str):
-        method_name = raw_method
-    if method_name is None and isinstance(span.name, str) and span.name.endswith(".mcp"):
-        method_name = span.name[:-4]  # Remove .mcp suffix
-    # Create typed attributes
-    typed_attrs = extract_span_attributes(attrs, method_name, str(span.name))
-    # Record span kind as extra attribute (TraceStep allows extras)
-    try:
-        typed_attrs.span_kind = span.kind.name  # type: ignore[attr-defined]
-    except Exception:
-        logger.warning("Failed to set span kind attribute")
-    # Build typed span
-    # Guard context/parent/timestamps
-    context = getattr(span, "context", None)
-    trace_id_hex = (
-        format(context.trace_id, "032x") if context and hasattr(context, "trace_id") else "0" * 32
-    )
-    span_id_hex = (
-        format(context.span_id, "016x") if context and hasattr(context, "span_id") else "0" * 16
-    )
-    parent = getattr(span, "parent", None)
-    parent_id_hex = (
-        format(parent.span_id, "016x") if parent and hasattr(parent, "span_id") else None
-    )
-    start_ns = span.start_time or 0
-    end_ns = span.end_time or start_ns
-    typed_span = HudSpan(
-        name=span.name,
-        trace_id=trace_id_hex,
-        span_id=span_id_hex,
-        parent_span_id=parent_id_hex,
-        start_time=_ts_ns_to_iso(int(start_ns)),
-        end_time=_ts_ns_to_iso(int(end_ns)),
-        status_code=span.status.status_code.name if span.status else "UNSET",
-        status_message=span.status.description if span.status else None,
-        attributes=typed_attrs,
-        exceptions=None,
-    )
-    # Add error information if present
-    if span.events:
-        exceptions = []
-        exceptions = [
-            {
-                "timestamp": _ts_ns_to_iso(event.timestamp),
-                "attributes": dict(event.attributes or {}),
-            }
-            for event in span.events
-        ]
-        if exceptions:
-            typed_span.exceptions = exceptions
-    # Convert to dict for export
-    return typed_span.model_dump(mode="json", by_alias=True, exclude_none=True)
-# ---------------------------------------------------------------------------
-# Exporter
-# ---------------------------------------------------------------------------
-class HudSpanExporter(SpanExporter):
-    """Exporter that forwards spans to HUD backend using existing endpoint."""
-    def __init__(self, *, telemetry_url: str, api_key: str) -> None:
-        super().__init__()
-        self._telemetry_url = telemetry_url.rstrip("/")
-        self._api_key = api_key
-    # ------------------------------------------------------------------
-    # Core API
-    # ------------------------------------------------------------------
-    def export(self, spans: list[ReadableSpan]) -> SpanExportResult:  # type: ignore[override]
-        if not spans:
-            return SpanExportResult.SUCCESS
-        # Group spans by hud.task_run_id attribute
-        grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
-        for span in spans:
-            run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
-            if not run_id:
-                # Skip spans that are outside HUD traces
-                continue
-            grouped[str(run_id)].append(span)
-        # Send each group synchronously (retry inside make_request_sync)
-        for run_id, span_batch in grouped.items():
-            try:
-                url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
-                telemetry_spans = [_span_to_dict(s) for s in span_batch]
-                # Include current step count in metadata
-                metadata = {}
-                # Get the HIGHEST step count from the batch (most recent)
-                step_count = 0
-                for span in span_batch:
-                    if span.attributes and "hud.step_count" in span.attributes:
-                        current_step = span.attributes["hud.step_count"]
-                        if isinstance(current_step, int) and current_step > step_count:
-                            step_count = current_step
-                payload = {
-                    "metadata": metadata,
-                    "telemetry": telemetry_spans,
-                }
-                # Only include step_count if we found any steps
-                if step_count > 0:
-                    payload["step_count"] = step_count
-                logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
-                make_request_sync(
-                    method="POST",
-                    url=url,
-                    json=payload,
-                    api_key=self._api_key,
-                )
-            except Exception as exc:
-                logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
-                # If *any* group fails we return FAILURE so the OTEL SDK can retry
-                return SpanExportResult.FAILURE
-        return SpanExportResult.SUCCESS
-    def shutdown(self) -> None:  # type: ignore[override]
-        # Nothing to cleanup, httpx handled inside make_request_sync
-        pass
-    def force_flush(self, timeout_millis: int | None = None) -> bool:  # type: ignore[override]
-        if timeout_millis:
-            time.sleep(timeout_millis / 1000)
-        # Synchronous export, nothing buffered here
-        return True

hud/otel/instrumentation.py DELETED Viewed

@@ -1,135 +0,0 @@
-"""MCP instrumentation support for HUD.
-This module provides functions to enable MCP OpenTelemetry instrumentation
-for automatic tracing of MCP protocol communication.
-"""
-from __future__ import annotations
-import logging
-from typing import TYPE_CHECKING, Any
-if TYPE_CHECKING:
-    from collections.abc import AsyncGenerator, Callable
-    from opentelemetry.trace import TracerProvider
-logger = logging.getLogger(__name__)
-def install_mcp_instrumentation(provider: TracerProvider) -> None:
-    """Enable community MCP OpenTelemetry instrumentation if present.
-    Args:
-        provider: The TracerProvider to use for instrumentation
-    """
-    import logging
-    logger = logging.getLogger(__name__)
-    try:
-        # First, patch the _instruments to use our fork
-        import opentelemetry.instrumentation.mcp.instrumentation as mcp_inst
-        mcp_inst._instruments = ("hud-mcp-python-sdk >= 3.13.1",)
-        from opentelemetry.instrumentation.mcp.instrumentation import (
-            McpInstrumentor,
-        )
-        # Then, patch the instrumentation to handle 3-value transports correctly
-        _patch_mcp_instrumentation()
-        McpInstrumentor().instrument(tracer_provider=provider)
-        logger.debug("MCP instrumentation installed with fastmcp compatibility patch")
-    except ImportError:
-        logger.debug("opentelemetry-instrumentation-mcp not available, skipping")
-    except Exception as exc:
-        logger.warning("Failed to install MCP instrumentation: %s", exc)
-def _patch_mcp_instrumentation() -> None:
-    """Patch MCP instrumentation to handle 3-value transport yields correctly."""
-    from contextlib import asynccontextmanager
-    try:
-        from opentelemetry.instrumentation.mcp.instrumentation import McpInstrumentor
-        # First, patch the get_error_type function to handle invalid HTTP status codes
-        _patch_get_error_type()
-        def patched_transport_wrapper(self: Any, tracer: Any) -> Callable[..., Any]:
-            @asynccontextmanager
-            async def traced_method(
-                wrapped: Callable[..., Any], instance: Any, args: Any, kwargs: Any
-            ) -> AsyncGenerator[Any, None]:
-                async with wrapped(*args, **kwargs) as result:
-                    # Check if we got a tuple with 3 values
-                    if isinstance(result, tuple) and len(result) == 3:
-                        read_stream, write_stream, third_value = result
-                        # Import here to avoid circular imports
-                        from opentelemetry.instrumentation.mcp.instrumentation import (
-                            InstrumentedStreamReader,
-                            InstrumentedStreamWriter,
-                        )
-                        yield (
-                            InstrumentedStreamReader(read_stream, tracer),
-                            InstrumentedStreamWriter(write_stream, tracer),
-                            third_value,
-                        )
-                    else:
-                        # Fall back to 2-value case
-                        read_stream, write_stream = result
-                        from opentelemetry.instrumentation.mcp.instrumentation import (
-                            InstrumentedStreamReader,
-                            InstrumentedStreamWriter,
-                        )
-                        yield (
-                            InstrumentedStreamReader(read_stream, tracer),
-                            InstrumentedStreamWriter(write_stream, tracer),
-                        )
-            return traced_method
-        # Apply the patch
-        McpInstrumentor._transport_wrapper = patched_transport_wrapper
-    except Exception as e:
-        import logging
-        logger = logging.getLogger(__name__)
-        logger.warning("Failed to patch MCP instrumentation: %s", e)
-def _patch_get_error_type() -> None:
-    """Patch get_error_type to handle invalid HTTP status codes gracefully."""
-    import re
-    from http import HTTPStatus
-    try:
-        import opentelemetry.instrumentation.mcp.instrumentation as mcp_inst
-        def patched_get_error_type(error_message: str) -> str | None:
-            """Extract HTTP status from error message, handling invalid codes."""
-            if not isinstance(error_message, str):
-                return None
-            match = re.search(r"\b(4\d{2}|5\d{2})\b", error_message)
-            if match:
-                num = int(match.group())
-                try:
-                    # Only return if it's a valid HTTPStatus
-                    if 400 <= num <= 599:
-                        return HTTPStatus(num).name
-                except ValueError:
-                    # Not a valid HTTP status code
-                    logger.debug("Ignoring invalid HTTP status code: %s", num)
-            return None
-        # Apply the patch
-        mcp_inst.get_error_type = patched_get_error_type
-        logger.debug("Patched get_error_type to handle invalid HTTP status codes")
-    except Exception as e:
-        logger.warning("Failed to patch get_error_type: %s", e)

hud/otel/processors.py DELETED Viewed

@@ -1,121 +0,0 @@
-from __future__ import annotations
-import logging
-import time
-from typing import Any
-from opentelemetry import baggage
-from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
-from .context import (
-    get_agent_steps,
-    get_base_mcp_steps,
-    get_mcp_tool_steps,
-    increment_agent_steps,
-    increment_base_mcp_steps,
-    increment_mcp_tool_steps,
-)
-logger = logging.getLogger(__name__)
-class HudEnrichmentProcessor(SpanProcessor):
-    """Span processor that enriches every span with HUD-specific context.
-    • Adds ``hud.task_run_id`` attribute if available.
-    • Adds ``hud.job_id`` attribute if available in baggage.
-    • Adds ``hud.step_count`` attribute if available in baggage.
-    """
-    def __init__(self) -> None:
-        # No state, everything comes from context vars
-        super().__init__()
-    # --- callback hooks -------------------------------------------------
-    def on_start(self, span: Span, parent_context: Any) -> None:  # type: ignore[override]
-        try:
-            # Get task_run_id from baggage in parent context
-            run_id = baggage.get_baggage("hud.task_run_id", context=parent_context)
-            if run_id and span.is_recording():
-                span.set_attribute("hud.task_run_id", str(run_id))
-            # Get job_id from baggage if available
-            job_id = baggage.get_baggage("hud.job_id", context=parent_context)
-            if job_id and span.is_recording():
-                span.set_attribute("hud.job_id", str(job_id))
-            # Check what type of step this is and increment appropriate counters
-            if span.is_recording():
-                step_type = self._get_step_type(span)
-                if step_type == "agent":
-                    # Increment agent steps
-                    new_agent_count = increment_agent_steps()
-                    span.set_attribute("hud.agent_steps", new_agent_count)
-                    logger.debug("Incremented agent steps to %d", new_agent_count)
-                elif step_type == "base_mcp":
-                    # Increment base MCP steps
-                    new_base_count = increment_base_mcp_steps()
-                    span.set_attribute("hud.base_mcp_steps", new_base_count)
-                    logger.debug("Incremented base MCP steps to %d", new_base_count)
-                elif step_type == "mcp_tool":
-                    # Increment both base MCP and MCP tool steps
-                    new_base_count = increment_base_mcp_steps()
-                    new_tool_count = increment_mcp_tool_steps()
-                    span.set_attribute("hud.base_mcp_steps", new_base_count)
-                    span.set_attribute("hud.mcp_tool_steps", new_tool_count)
-                    logger.debug(
-                        "Incremented MCP steps to base=%d, tool=%d", new_base_count, new_tool_count
-                    )
-                # Always set all current step counts on the span
-                span.set_attribute("hud.base_mcp_steps", get_base_mcp_steps())
-                span.set_attribute("hud.mcp_tool_steps", get_mcp_tool_steps())
-                span.set_attribute("hud.agent_steps", get_agent_steps())
-        except Exception as exc:  # defensive; never fail the tracer
-            logger.debug("HudEnrichmentProcessor.on_start error: %s", exc, exc_info=False)
-    def _get_step_type(self, span: Span) -> str | None:
-        """Determine what type of step this span represents.
-        Returns:
-            'base_mcp' for any MCP span
-            'mcp_tool' for MCP tool calls (tools/call.mcp)
-            'agent' for agent spans
-            None if not a step
-        """
-        # Check span attributes
-        attrs = span.attributes or {}
-        span_name = span.name
-        # Check for agent steps (instrumented with span_type="agent")
-        if attrs.get("category") == "agent":
-            return "agent"
-        # Check span name pattern for MCP calls
-        if span_name:
-            # tools/call.mcp is an mcp_tool step
-            if span_name == "tools/call.mcp":
-                return "mcp_tool"
-            # Any other .mcp suffixed span is a base MCP step
-            elif span_name.endswith(".mcp"):
-                return "base_mcp"
-        return None
-    def on_end(self, span: ReadableSpan) -> None:
-        # Nothing to do enrichment is on_start only
-        pass
-    # Required to fully implement abstract base, but we don't batch spans
-    def shutdown(self) -> None:  # type: ignore[override]
-        pass
-    def force_flush(self, timeout_millis: int | None = None) -> bool:  # type: ignore[override]
-        if timeout_millis:
-            time.sleep(timeout_millis / 1000)
-        return True

hud/otel/tests/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Tests for OpenTelemetry integration."""

hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl

hud-python 0.4.45py3-none-any.whl → 0.5.1py3-none-any.whl