hud-python 0.4.45__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hud/__init__.py +27 -7
- hud/agents/__init__.py +11 -5
- hud/agents/base.py +220 -500
- hud/agents/claude.py +200 -240
- hud/agents/gemini.py +275 -0
- hud/agents/gemini_cua.py +335 -0
- hud/agents/grounded_openai.py +98 -100
- hud/agents/misc/integration_test_agent.py +51 -20
- hud/agents/misc/response_agent.py +41 -36
- hud/agents/openai.py +291 -292
- hud/agents/{openai_chat_generic.py → openai_chat.py} +80 -34
- hud/agents/operator.py +211 -0
- hud/agents/tests/conftest.py +133 -0
- hud/agents/tests/test_base.py +300 -622
- hud/agents/tests/test_base_runtime.py +233 -0
- hud/agents/tests/test_claude.py +379 -210
- hud/agents/tests/test_client.py +9 -10
- hud/agents/tests/test_gemini.py +369 -0
- hud/agents/tests/test_grounded_openai_agent.py +65 -50
- hud/agents/tests/test_openai.py +376 -140
- hud/agents/tests/test_operator.py +362 -0
- hud/agents/tests/test_run_eval.py +179 -0
- hud/cli/__init__.py +461 -545
- hud/cli/analyze.py +43 -5
- hud/cli/build.py +664 -110
- hud/cli/debug.py +8 -5
- hud/cli/dev.py +882 -734
- hud/cli/eval.py +782 -668
- hud/cli/flows/dev.py +167 -0
- hud/cli/flows/init.py +191 -0
- hud/cli/flows/tasks.py +153 -56
- hud/cli/flows/templates.py +151 -0
- hud/cli/flows/tests/__init__.py +1 -0
- hud/cli/flows/tests/test_dev.py +126 -0
- hud/cli/init.py +60 -58
- hud/cli/push.py +29 -11
- hud/cli/rft.py +311 -0
- hud/cli/rft_status.py +145 -0
- hud/cli/tests/test_analyze.py +5 -5
- hud/cli/tests/test_analyze_metadata.py +3 -2
- hud/cli/tests/test_analyze_module.py +120 -0
- hud/cli/tests/test_build.py +108 -6
- hud/cli/tests/test_build_failure.py +41 -0
- hud/cli/tests/test_build_module.py +50 -0
- hud/cli/tests/test_cli_init.py +6 -1
- hud/cli/tests/test_cli_more_wrappers.py +30 -0
- hud/cli/tests/test_cli_root.py +140 -0
- hud/cli/tests/test_convert.py +361 -0
- hud/cli/tests/test_debug.py +12 -10
- hud/cli/tests/test_dev.py +197 -0
- hud/cli/tests/test_eval.py +251 -0
- hud/cli/tests/test_eval_bedrock.py +51 -0
- hud/cli/tests/test_init.py +124 -0
- hud/cli/tests/test_main_module.py +11 -5
- hud/cli/tests/test_mcp_server.py +12 -100
- hud/cli/tests/test_push_happy.py +74 -0
- hud/cli/tests/test_push_wrapper.py +23 -0
- hud/cli/tests/test_registry.py +1 -1
- hud/cli/tests/test_utils.py +1 -1
- hud/cli/{rl → utils}/celebrate.py +14 -12
- hud/cli/utils/config.py +18 -1
- hud/cli/utils/docker.py +130 -4
- hud/cli/utils/env_check.py +9 -9
- hud/cli/utils/git.py +136 -0
- hud/cli/utils/interactive.py +39 -5
- hud/cli/utils/metadata.py +69 -0
- hud/cli/utils/runner.py +1 -1
- hud/cli/utils/server.py +2 -2
- hud/cli/utils/source_hash.py +3 -3
- hud/cli/utils/tasks.py +4 -1
- hud/cli/utils/tests/__init__.py +0 -0
- hud/cli/utils/tests/test_config.py +58 -0
- hud/cli/utils/tests/test_docker.py +93 -0
- hud/cli/utils/tests/test_docker_hints.py +71 -0
- hud/cli/utils/tests/test_env_check.py +74 -0
- hud/cli/utils/tests/test_environment.py +42 -0
- hud/cli/utils/tests/test_git.py +142 -0
- hud/cli/utils/tests/test_interactive_module.py +60 -0
- hud/cli/utils/tests/test_local_runner.py +50 -0
- hud/cli/utils/tests/test_logging_utils.py +23 -0
- hud/cli/utils/tests/test_metadata.py +49 -0
- hud/cli/utils/tests/test_package_runner.py +35 -0
- hud/cli/utils/tests/test_registry_utils.py +49 -0
- hud/cli/utils/tests/test_remote_runner.py +25 -0
- hud/cli/utils/tests/test_runner_modules.py +52 -0
- hud/cli/utils/tests/test_source_hash.py +36 -0
- hud/cli/utils/tests/test_tasks.py +80 -0
- hud/cli/utils/version_check.py +258 -0
- hud/cli/{rl → utils}/viewer.py +2 -2
- hud/clients/README.md +12 -11
- hud/clients/__init__.py +4 -3
- hud/clients/base.py +166 -26
- hud/clients/environment.py +51 -0
- hud/clients/fastmcp.py +13 -6
- hud/clients/mcp_use.py +40 -15
- hud/clients/tests/test_analyze_scenarios.py +206 -0
- hud/clients/tests/test_protocol.py +9 -3
- hud/datasets/__init__.py +23 -20
- hud/datasets/loader.py +327 -0
- hud/datasets/runner.py +192 -105
- hud/datasets/tests/__init__.py +0 -0
- hud/datasets/tests/test_loader.py +221 -0
- hud/datasets/tests/test_utils.py +315 -0
- hud/datasets/utils.py +270 -90
- hud/environment/__init__.py +50 -0
- hud/environment/connection.py +206 -0
- hud/environment/connectors/__init__.py +33 -0
- hud/environment/connectors/base.py +68 -0
- hud/environment/connectors/local.py +177 -0
- hud/environment/connectors/mcp_config.py +109 -0
- hud/environment/connectors/openai.py +101 -0
- hud/environment/connectors/remote.py +172 -0
- hud/environment/environment.py +694 -0
- hud/environment/integrations/__init__.py +45 -0
- hud/environment/integrations/adk.py +67 -0
- hud/environment/integrations/anthropic.py +196 -0
- hud/environment/integrations/gemini.py +92 -0
- hud/environment/integrations/langchain.py +82 -0
- hud/environment/integrations/llamaindex.py +68 -0
- hud/environment/integrations/openai.py +238 -0
- hud/environment/mock.py +306 -0
- hud/environment/router.py +112 -0
- hud/environment/scenarios.py +493 -0
- hud/environment/tests/__init__.py +1 -0
- hud/environment/tests/test_connection.py +317 -0
- hud/environment/tests/test_connectors.py +218 -0
- hud/environment/tests/test_environment.py +161 -0
- hud/environment/tests/test_integrations.py +257 -0
- hud/environment/tests/test_local_connectors.py +201 -0
- hud/environment/tests/test_scenarios.py +280 -0
- hud/environment/tests/test_tools.py +208 -0
- hud/environment/types.py +23 -0
- hud/environment/utils/__init__.py +35 -0
- hud/environment/utils/formats.py +215 -0
- hud/environment/utils/schema.py +171 -0
- hud/environment/utils/tool_wrappers.py +113 -0
- hud/eval/__init__.py +67 -0
- hud/eval/context.py +674 -0
- hud/eval/display.py +299 -0
- hud/eval/instrument.py +185 -0
- hud/eval/manager.py +466 -0
- hud/eval/parallel.py +268 -0
- hud/eval/task.py +340 -0
- hud/eval/tests/__init__.py +1 -0
- hud/eval/tests/test_context.py +178 -0
- hud/eval/tests/test_eval.py +210 -0
- hud/eval/tests/test_manager.py +152 -0
- hud/eval/tests/test_parallel.py +168 -0
- hud/eval/tests/test_task.py +145 -0
- hud/eval/types.py +63 -0
- hud/eval/utils.py +183 -0
- hud/patches/__init__.py +19 -0
- hud/patches/mcp_patches.py +151 -0
- hud/patches/warnings.py +54 -0
- hud/samples/browser.py +4 -4
- hud/server/__init__.py +2 -1
- hud/server/low_level.py +2 -1
- hud/server/router.py +164 -0
- hud/server/server.py +567 -80
- hud/server/tests/test_mcp_server_integration.py +11 -11
- hud/server/tests/test_mcp_server_more.py +1 -1
- hud/server/tests/test_server_extra.py +2 -0
- hud/settings.py +45 -3
- hud/shared/exceptions.py +36 -10
- hud/shared/hints.py +26 -1
- hud/shared/requests.py +15 -3
- hud/shared/tests/test_exceptions.py +40 -31
- hud/shared/tests/test_hints.py +167 -0
- hud/telemetry/__init__.py +20 -19
- hud/telemetry/exporter.py +201 -0
- hud/telemetry/instrument.py +158 -253
- hud/telemetry/tests/test_eval_telemetry.py +356 -0
- hud/telemetry/tests/test_exporter.py +258 -0
- hud/telemetry/tests/test_instrument.py +401 -0
- hud/tools/__init__.py +16 -2
- hud/tools/apply_patch.py +639 -0
- hud/tools/base.py +54 -4
- hud/tools/bash.py +2 -2
- hud/tools/computer/__init__.py +4 -0
- hud/tools/computer/anthropic.py +2 -2
- hud/tools/computer/gemini.py +385 -0
- hud/tools/computer/hud.py +23 -6
- hud/tools/computer/openai.py +20 -21
- hud/tools/computer/qwen.py +434 -0
- hud/tools/computer/settings.py +37 -0
- hud/tools/edit.py +3 -7
- hud/tools/executors/base.py +4 -2
- hud/tools/executors/pyautogui.py +1 -1
- hud/tools/grounding/grounded_tool.py +13 -18
- hud/tools/grounding/grounder.py +10 -31
- hud/tools/grounding/tests/test_grounded_tool.py +26 -44
- hud/tools/jupyter.py +330 -0
- hud/tools/playwright.py +18 -3
- hud/tools/shell.py +308 -0
- hud/tools/tests/test_apply_patch.py +718 -0
- hud/tools/tests/test_computer.py +4 -9
- hud/tools/tests/test_computer_actions.py +24 -2
- hud/tools/tests/test_jupyter_tool.py +181 -0
- hud/tools/tests/test_shell.py +596 -0
- hud/tools/tests/test_submit.py +85 -0
- hud/tools/tests/test_types.py +193 -0
- hud/tools/types.py +21 -1
- hud/types.py +167 -57
- hud/utils/__init__.py +2 -0
- hud/utils/env.py +67 -0
- hud/utils/hud_console.py +61 -3
- hud/utils/mcp.py +15 -58
- hud/utils/strict_schema.py +162 -0
- hud/utils/tests/test_init.py +1 -2
- hud/utils/tests/test_mcp.py +1 -28
- hud/utils/tests/test_pretty_errors.py +186 -0
- hud/utils/tests/test_tool_shorthand.py +154 -0
- hud/utils/tests/test_version.py +1 -1
- hud/utils/types.py +20 -0
- hud/version.py +1 -1
- hud_python-0.5.1.dist-info/METADATA +264 -0
- hud_python-0.5.1.dist-info/RECORD +299 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/WHEEL +1 -1
- hud/agents/langchain.py +0 -261
- hud/agents/lite_llm.py +0 -72
- hud/cli/rl/__init__.py +0 -180
- hud/cli/rl/config.py +0 -101
- hud/cli/rl/display.py +0 -133
- hud/cli/rl/gpu.py +0 -63
- hud/cli/rl/gpu_utils.py +0 -321
- hud/cli/rl/local_runner.py +0 -595
- hud/cli/rl/presets.py +0 -96
- hud/cli/rl/remote_runner.py +0 -463
- hud/cli/rl/rl_api.py +0 -150
- hud/cli/rl/vllm.py +0 -177
- hud/cli/rl/wait_utils.py +0 -89
- hud/datasets/parallel.py +0 -687
- hud/misc/__init__.py +0 -1
- hud/misc/claude_plays_pokemon.py +0 -292
- hud/otel/__init__.py +0 -35
- hud/otel/collector.py +0 -142
- hud/otel/config.py +0 -181
- hud/otel/context.py +0 -570
- hud/otel/exporters.py +0 -369
- hud/otel/instrumentation.py +0 -135
- hud/otel/processors.py +0 -121
- hud/otel/tests/__init__.py +0 -1
- hud/otel/tests/test_processors.py +0 -197
- hud/rl/README.md +0 -30
- hud/rl/__init__.py +0 -1
- hud/rl/actor.py +0 -176
- hud/rl/buffer.py +0 -405
- hud/rl/chat_template.jinja +0 -101
- hud/rl/config.py +0 -192
- hud/rl/distributed.py +0 -132
- hud/rl/learner.py +0 -637
- hud/rl/tests/__init__.py +0 -1
- hud/rl/tests/test_learner.py +0 -186
- hud/rl/train.py +0 -382
- hud/rl/types.py +0 -101
- hud/rl/utils/start_vllm_server.sh +0 -30
- hud/rl/utils.py +0 -524
- hud/rl/vllm_adapter.py +0 -143
- hud/telemetry/job.py +0 -352
- hud/telemetry/replay.py +0 -74
- hud/telemetry/tests/test_replay.py +0 -40
- hud/telemetry/tests/test_trace.py +0 -63
- hud/telemetry/trace.py +0 -158
- hud/utils/agent_factories.py +0 -86
- hud/utils/async_utils.py +0 -65
- hud/utils/group_eval.py +0 -223
- hud/utils/progress.py +0 -149
- hud/utils/tasks.py +0 -127
- hud/utils/tests/test_async_utils.py +0 -173
- hud/utils/tests/test_progress.py +0 -261
- hud_python-0.4.45.dist-info/METADATA +0 -552
- hud_python-0.4.45.dist-info/RECORD +0 -228
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/entry_points.txt +0 -0
- {hud_python-0.4.45.dist-info → hud_python-0.5.1.dist-info}/licenses/LICENSE +0 -0
hud/telemetry/instrument.py
CHANGED
|
@@ -1,7 +1,16 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Instrumentation decorator for HUD telemetry.
|
|
2
2
|
|
|
3
|
-
This module provides
|
|
4
|
-
|
|
3
|
+
This module provides a lightweight @instrument decorator that records
|
|
4
|
+
function calls and sends them to the HUD telemetry backend.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
@hud.instrument
|
|
8
|
+
async def my_function(arg1, arg2):
|
|
9
|
+
...
|
|
10
|
+
|
|
11
|
+
# Within an eval context, calls are recorded and sent to HUD
|
|
12
|
+
async with env.eval("task") as ctx:
|
|
13
|
+
result = await my_function("a", "b")
|
|
5
14
|
"""
|
|
6
15
|
|
|
7
16
|
from __future__ import annotations
|
|
@@ -11,14 +20,23 @@ import functools
|
|
|
11
20
|
import inspect
|
|
12
21
|
import json
|
|
13
22
|
import logging
|
|
23
|
+
import time
|
|
24
|
+
import uuid
|
|
25
|
+
from datetime import UTC, datetime
|
|
14
26
|
from typing import TYPE_CHECKING, Any, TypeVar, overload
|
|
15
27
|
|
|
16
28
|
import pydantic_core
|
|
17
|
-
from opentelemetry import trace
|
|
18
|
-
from opentelemetry.trace import SpanKind, Status, StatusCode
|
|
19
29
|
|
|
20
|
-
from hud.
|
|
21
|
-
from hud.
|
|
30
|
+
from hud.telemetry.exporter import queue_span
|
|
31
|
+
from hud.types import TraceStep
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _get_trace_id() -> str | None:
|
|
35
|
+
"""Lazy import to avoid circular dependency with eval.context."""
|
|
36
|
+
from hud.eval.context import get_current_trace_id
|
|
37
|
+
|
|
38
|
+
return get_current_trace_id()
|
|
39
|
+
|
|
22
40
|
|
|
23
41
|
if TYPE_CHECKING:
|
|
24
42
|
from collections.abc import Awaitable, Callable
|
|
@@ -31,53 +49,42 @@ logger = logging.getLogger(__name__)
|
|
|
31
49
|
|
|
32
50
|
|
|
33
51
|
def _serialize_value(value: Any, max_items: int = 10) -> Any:
|
|
34
|
-
"""Serialize a value for
|
|
35
|
-
|
|
36
|
-
Uses pydantic_core.to_json for robust serialization of complex objects.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
value: The value to serialize
|
|
40
|
-
max_items: Maximum number of items for collections
|
|
41
|
-
|
|
42
|
-
Returns:
|
|
43
|
-
JSON-serializable version of the value
|
|
44
|
-
"""
|
|
45
|
-
# Simple types pass through
|
|
52
|
+
"""Serialize a value for recording."""
|
|
46
53
|
if isinstance(value, str | int | float | bool | type(None)):
|
|
47
54
|
return value
|
|
48
55
|
|
|
49
|
-
# For collections, we need to limit size first
|
|
50
56
|
if isinstance(value, list | tuple):
|
|
51
57
|
value = value[:max_items] if len(value) > max_items else value
|
|
52
58
|
elif isinstance(value, dict) and len(value) > max_items:
|
|
53
59
|
value = dict(list(value.items())[:max_items])
|
|
54
60
|
|
|
55
|
-
# Use pydantic_core for serialization - it handles:
|
|
56
|
-
# - Pydantic models (via model_dump)
|
|
57
|
-
# - Dataclasses (via asdict)
|
|
58
|
-
# - Bytes (encodes to string)
|
|
59
|
-
# - Custom objects (via __dict__ or repr)
|
|
60
|
-
# - Complex nested structures
|
|
61
61
|
try:
|
|
62
|
-
# Convert to JSON bytes then back to Python objects
|
|
63
|
-
# This ensures we get JSON-serializable types
|
|
64
62
|
json_bytes = pydantic_core.to_json(value, fallback=str)
|
|
65
63
|
return json.loads(json_bytes)
|
|
66
64
|
except Exception:
|
|
67
|
-
# Fallback if pydantic_core fails somehow
|
|
68
65
|
return f"<{type(value).__name__}>"
|
|
69
66
|
|
|
70
67
|
|
|
68
|
+
def _now_iso() -> str:
|
|
69
|
+
"""Get current time as ISO-8601 string."""
|
|
70
|
+
return datetime.now(UTC).isoformat().replace("+00:00", "Z")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _normalize_trace_id(trace_id: str) -> str:
|
|
74
|
+
"""Normalize trace_id to 32-character hex string."""
|
|
75
|
+
clean = trace_id.replace("-", "")
|
|
76
|
+
return clean[:32].ljust(32, "0")
|
|
77
|
+
|
|
78
|
+
|
|
71
79
|
@overload
|
|
72
80
|
def instrument(
|
|
73
81
|
func: None = None,
|
|
74
82
|
*,
|
|
75
83
|
name: str | None = None,
|
|
76
|
-
|
|
77
|
-
|
|
84
|
+
category: str = "function",
|
|
85
|
+
span_type: str | None = None,
|
|
78
86
|
record_args: bool = True,
|
|
79
87
|
record_result: bool = True,
|
|
80
|
-
span_kind: SpanKind = SpanKind.INTERNAL,
|
|
81
88
|
) -> Callable[[Callable[..., Any]], Callable[..., Any]]: ...
|
|
82
89
|
|
|
83
90
|
|
|
@@ -86,11 +93,10 @@ def instrument(
|
|
|
86
93
|
func: Callable[P, R],
|
|
87
94
|
*,
|
|
88
95
|
name: str | None = None,
|
|
89
|
-
|
|
90
|
-
|
|
96
|
+
category: str = "function",
|
|
97
|
+
span_type: str | None = None,
|
|
91
98
|
record_args: bool = True,
|
|
92
99
|
record_result: bool = True,
|
|
93
|
-
span_kind: SpanKind = SpanKind.INTERNAL,
|
|
94
100
|
) -> Callable[P, R]: ...
|
|
95
101
|
|
|
96
102
|
|
|
@@ -99,11 +105,10 @@ def instrument(
|
|
|
99
105
|
func: Callable[P, Awaitable[R]],
|
|
100
106
|
*,
|
|
101
107
|
name: str | None = None,
|
|
102
|
-
|
|
103
|
-
|
|
108
|
+
category: str = "function",
|
|
109
|
+
span_type: str | None = None,
|
|
104
110
|
record_args: bool = True,
|
|
105
111
|
record_result: bool = True,
|
|
106
|
-
span_kind: SpanKind = SpanKind.INTERNAL,
|
|
107
112
|
) -> Callable[P, Awaitable[R]]: ...
|
|
108
113
|
|
|
109
114
|
|
|
@@ -111,269 +116,169 @@ def instrument(
|
|
|
111
116
|
func: Callable[..., Any] | None = None,
|
|
112
117
|
*,
|
|
113
118
|
name: str | None = None,
|
|
114
|
-
|
|
115
|
-
|
|
119
|
+
category: str = "function",
|
|
120
|
+
span_type: str | None = None,
|
|
116
121
|
record_args: bool = True,
|
|
117
122
|
record_result: bool = True,
|
|
118
|
-
span_kind: SpanKind = SpanKind.INTERNAL,
|
|
119
123
|
) -> Callable[..., Any]:
|
|
120
|
-
"""Instrument a function to
|
|
124
|
+
"""Instrument a function to record spans within eval context.
|
|
121
125
|
|
|
122
|
-
This decorator
|
|
123
|
-
observability. It works with both sync and async functions.
|
|
126
|
+
This decorator records function calls as spans and sends them to the HUD API.
|
|
124
127
|
|
|
125
128
|
Args:
|
|
126
|
-
func: The function to instrument
|
|
127
|
-
name: Custom span name (defaults to
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
record_args: Whether to record function arguments
|
|
131
|
-
record_result: Whether to record function result
|
|
132
|
-
span_kind: OpenTelemetry span kind (INTERNAL, CLIENT, SERVER, etc.)
|
|
129
|
+
func: The function to instrument
|
|
130
|
+
name: Custom span name (defaults to module.function)
|
|
131
|
+
category: Span category (e.g., "agent", "tool", "function", "mcp")
|
|
132
|
+
span_type: Alias for category (deprecated, use category instead)
|
|
133
|
+
record_args: Whether to record function arguments
|
|
134
|
+
record_result: Whether to record function result
|
|
133
135
|
|
|
134
136
|
Returns:
|
|
135
|
-
The instrumented function
|
|
137
|
+
The instrumented function
|
|
136
138
|
|
|
137
139
|
Examples:
|
|
138
|
-
# Basic usage - defaults to category="function"
|
|
139
140
|
@hud.instrument
|
|
140
141
|
async def process_data(items: list[str]) -> dict:
|
|
141
142
|
return {"count": len(items)}
|
|
142
143
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
record_args=True,
|
|
147
|
-
record_result=True
|
|
148
|
-
)
|
|
149
|
-
async def query_users(filter: dict) -> list[User]:
|
|
150
|
-
return await db.find(filter)
|
|
151
|
-
|
|
152
|
-
# Agent instrumentation
|
|
153
|
-
@hud.instrument(
|
|
154
|
-
span_type="agent", # category="agent" gets special handling
|
|
155
|
-
record_args=False, # Don't record large message arrays
|
|
156
|
-
record_result=True
|
|
157
|
-
)
|
|
158
|
-
async def get_model_response(self, messages: list) -> Response:
|
|
159
|
-
return await self.model.complete(messages)
|
|
160
|
-
|
|
161
|
-
# Instrument third-party functions
|
|
162
|
-
import requests
|
|
163
|
-
requests.get = hud.instrument(
|
|
164
|
-
span_type="http", # category="http"
|
|
165
|
-
span_kind=SpanKind.CLIENT
|
|
166
|
-
)(requests.get)
|
|
167
|
-
|
|
168
|
-
# Conditional instrumentation
|
|
169
|
-
if settings.enable_db_tracing:
|
|
170
|
-
db.query = hud.instrument(db.query)
|
|
144
|
+
@hud.instrument(category="agent")
|
|
145
|
+
async def call_model(messages: list) -> str:
|
|
146
|
+
return await model.generate(messages)
|
|
171
147
|
"""
|
|
172
|
-
|
|
173
|
-
# This allows users to configure alternative backends before importing agents
|
|
148
|
+
effective_category = span_type if span_type is not None else category
|
|
174
149
|
|
|
175
150
|
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
176
|
-
# Check if already instrumented
|
|
177
151
|
if hasattr(func, "_hud_instrumented"):
|
|
178
|
-
logger.debug("Function %s already instrumented, skipping", func.__name__)
|
|
179
152
|
return func
|
|
180
153
|
|
|
181
|
-
# Get function metadata
|
|
182
154
|
func_module = getattr(func, "__module__", "unknown")
|
|
183
155
|
func_name = getattr(func, "__name__", "unknown")
|
|
184
156
|
func_qualname = getattr(func, "__qualname__", func_name)
|
|
185
|
-
|
|
186
|
-
# Determine span name
|
|
187
157
|
span_name = name or f"{func_module}.{func_qualname}"
|
|
188
158
|
|
|
189
|
-
# Get function signature for argument parsing
|
|
190
159
|
try:
|
|
191
160
|
sig = inspect.signature(func)
|
|
192
161
|
except (ValueError, TypeError):
|
|
193
162
|
sig = None
|
|
194
163
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
"
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
# Add current task_run_id if available
|
|
217
|
-
task_run_id = get_current_task_run_id()
|
|
218
|
-
if task_run_id:
|
|
219
|
-
span_attrs["hud.task_run_id"] = task_run_id
|
|
220
|
-
|
|
221
|
-
# Record function arguments if requested
|
|
164
|
+
def _build_span(
|
|
165
|
+
task_run_id: str,
|
|
166
|
+
args: tuple[Any, ...],
|
|
167
|
+
kwargs: dict[str, Any],
|
|
168
|
+
start_time: str,
|
|
169
|
+
end_time: str,
|
|
170
|
+
result: Any = None,
|
|
171
|
+
error: str | None = None,
|
|
172
|
+
) -> dict[str, Any]:
|
|
173
|
+
"""Build a HudSpan-compatible span record."""
|
|
174
|
+
# Build attributes using TraceStep
|
|
175
|
+
attributes = TraceStep(
|
|
176
|
+
task_run_id=task_run_id,
|
|
177
|
+
category=effective_category,
|
|
178
|
+
type="CLIENT",
|
|
179
|
+
start_timestamp=start_time,
|
|
180
|
+
end_timestamp=end_time,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Record arguments as request
|
|
222
184
|
if record_args and sig:
|
|
223
185
|
try:
|
|
224
186
|
bound_args = sig.bind(*args, **kwargs)
|
|
225
187
|
bound_args.apply_defaults()
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
# Skip 'self' and 'cls' parameters
|
|
232
|
-
if param_name in ("self", "cls"):
|
|
233
|
-
continue
|
|
234
|
-
|
|
235
|
-
args_dict[param_name] = _serialize_value(value)
|
|
236
|
-
except Exception:
|
|
237
|
-
args_dict[param_name] = "<serialization_error>"
|
|
238
|
-
|
|
188
|
+
args_dict = {
|
|
189
|
+
k: _serialize_value(v)
|
|
190
|
+
for k, v in bound_args.arguments.items()
|
|
191
|
+
if k not in ("self", "cls")
|
|
192
|
+
}
|
|
239
193
|
if args_dict:
|
|
240
|
-
|
|
241
|
-
span_attrs["function.arguments"] = args_json
|
|
242
|
-
# Always set generic request field for consistency
|
|
243
|
-
span_attrs["request"] = args_json
|
|
194
|
+
attributes.request = args_dict
|
|
244
195
|
except Exception as e:
|
|
245
|
-
logger.debug("Failed to
|
|
196
|
+
logger.debug("Failed to serialize args: %s", e)
|
|
246
197
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
kind=span_kind,
|
|
250
|
-
attributes=span_attrs,
|
|
251
|
-
) as span:
|
|
198
|
+
# Record result
|
|
199
|
+
if record_result and result is not None and error is None:
|
|
252
200
|
try:
|
|
253
|
-
|
|
254
|
-
result = await func(*args, **kwargs)
|
|
255
|
-
|
|
256
|
-
# Record result if requested
|
|
257
|
-
if record_result:
|
|
258
|
-
try:
|
|
259
|
-
serialized = _serialize_value(result)
|
|
260
|
-
result_json = json.dumps(serialized)
|
|
261
|
-
span.set_attribute("function.result", result_json)
|
|
262
|
-
# Always set generic result field for consistency
|
|
263
|
-
span.set_attribute("result", result_json)
|
|
264
|
-
|
|
265
|
-
# Also set result type for complex objects
|
|
266
|
-
if not isinstance(
|
|
267
|
-
result, str | int | float | bool | type(None) | list | tuple | dict
|
|
268
|
-
):
|
|
269
|
-
span.set_attribute("function.result_type", type(result).__name__)
|
|
270
|
-
except Exception as e:
|
|
271
|
-
logger.debug("Failed to record function result: %s", e)
|
|
272
|
-
|
|
273
|
-
span.set_status(Status(StatusCode.OK))
|
|
274
|
-
return result
|
|
275
|
-
|
|
201
|
+
attributes.result = _serialize_value(result)
|
|
276
202
|
except Exception as e:
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
span_attrs = {
|
|
293
|
-
"category": span_type, # span_type IS the category
|
|
294
|
-
"function.module": func_module,
|
|
295
|
-
"function.name": func_name,
|
|
296
|
-
"function.qualname": func_qualname,
|
|
203
|
+
logger.debug("Failed to serialize result: %s", e)
|
|
204
|
+
|
|
205
|
+
# Build span
|
|
206
|
+
span_id = uuid.uuid4().hex[:16]
|
|
207
|
+
span = {
|
|
208
|
+
"name": span_name,
|
|
209
|
+
"trace_id": _normalize_trace_id(task_run_id),
|
|
210
|
+
"span_id": span_id,
|
|
211
|
+
"parent_span_id": None,
|
|
212
|
+
"start_time": start_time,
|
|
213
|
+
"end_time": end_time,
|
|
214
|
+
"status_code": "ERROR" if error else "OK",
|
|
215
|
+
"status_message": error,
|
|
216
|
+
"attributes": attributes.model_dump(mode="json", exclude_none=True),
|
|
217
|
+
"exceptions": [{"message": error}] if error else None,
|
|
297
218
|
}
|
|
219
|
+
return span
|
|
298
220
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
args_json = json.dumps(args_dict)
|
|
324
|
-
span_attrs["function.arguments"] = args_json
|
|
325
|
-
# Always set generic request field for consistency
|
|
326
|
-
span_attrs["request"] = args_json
|
|
327
|
-
except Exception as e:
|
|
328
|
-
logger.debug("Failed to record function arguments: %s", e)
|
|
329
|
-
|
|
330
|
-
with tracer.start_as_current_span(
|
|
331
|
-
span_name,
|
|
332
|
-
kind=span_kind,
|
|
333
|
-
attributes=span_attrs,
|
|
334
|
-
) as span:
|
|
335
|
-
try:
|
|
336
|
-
# Execute the function
|
|
337
|
-
result = func(*args, **kwargs)
|
|
338
|
-
|
|
339
|
-
# Record result if requested
|
|
340
|
-
if record_result:
|
|
341
|
-
try:
|
|
342
|
-
serialized = _serialize_value(result)
|
|
343
|
-
result_json = json.dumps(serialized)
|
|
344
|
-
span.set_attribute("function.result", result_json)
|
|
345
|
-
# Always set generic result field for consistency
|
|
346
|
-
span.set_attribute("result", result_json)
|
|
347
|
-
|
|
348
|
-
# Also set result type for complex objects
|
|
349
|
-
if not isinstance(
|
|
350
|
-
result, str | int | float | bool | type(None) | list | tuple | dict
|
|
351
|
-
):
|
|
352
|
-
span.set_attribute("function.result_type", type(result).__name__)
|
|
353
|
-
except Exception as e:
|
|
354
|
-
logger.debug("Failed to record function result: %s", e)
|
|
355
|
-
|
|
356
|
-
span.set_status(Status(StatusCode.OK))
|
|
357
|
-
return result
|
|
221
|
+
@functools.wraps(func)
|
|
222
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
223
|
+
task_run_id = _get_trace_id()
|
|
224
|
+
start_time = _now_iso()
|
|
225
|
+
start_perf = time.perf_counter()
|
|
226
|
+
error: str | None = None
|
|
227
|
+
result: Any = None
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
result = await func(*args, **kwargs)
|
|
231
|
+
return result
|
|
232
|
+
except Exception as e:
|
|
233
|
+
error = f"{type(e).__name__}: {e}"
|
|
234
|
+
raise
|
|
235
|
+
finally:
|
|
236
|
+
end_time = _now_iso()
|
|
237
|
+
duration_ms = (time.perf_counter() - start_perf) * 1000
|
|
238
|
+
|
|
239
|
+
if task_run_id:
|
|
240
|
+
span = _build_span(
|
|
241
|
+
task_run_id, args, kwargs, start_time, end_time, result, error
|
|
242
|
+
)
|
|
243
|
+
queue_span(span)
|
|
244
|
+
logger.debug("Span: %s (%.2fms)", span_name, duration_ms)
|
|
358
245
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
246
|
+
@functools.wraps(func)
|
|
247
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
248
|
+
task_run_id = _get_trace_id()
|
|
249
|
+
start_time = _now_iso()
|
|
250
|
+
start_perf = time.perf_counter()
|
|
251
|
+
error: str | None = None
|
|
252
|
+
result: Any = None
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
result = func(*args, **kwargs)
|
|
256
|
+
return result
|
|
257
|
+
except Exception as e:
|
|
258
|
+
error = f"{type(e).__name__}: {e}"
|
|
259
|
+
raise
|
|
260
|
+
finally:
|
|
261
|
+
end_time = _now_iso()
|
|
262
|
+
duration_ms = (time.perf_counter() - start_perf) * 1000
|
|
263
|
+
|
|
264
|
+
if task_run_id:
|
|
265
|
+
span = _build_span(
|
|
266
|
+
task_run_id, args, kwargs, start_time, end_time, result, error
|
|
267
|
+
)
|
|
268
|
+
queue_span(span)
|
|
269
|
+
logger.debug("Span: %s (%.2fms)", span_name, duration_ms)
|
|
363
270
|
|
|
364
|
-
# Choose wrapper based on function type
|
|
365
271
|
wrapper = async_wrapper if asyncio.iscoroutinefunction(func) else sync_wrapper
|
|
366
|
-
|
|
367
|
-
# Mark as instrumented
|
|
368
272
|
wrapper._hud_instrumented = True # type: ignore[attr-defined]
|
|
369
273
|
wrapper._hud_original = func # type: ignore[attr-defined]
|
|
370
274
|
|
|
371
275
|
return wrapper
|
|
372
276
|
|
|
373
|
-
# Handle usage with or without parentheses
|
|
374
277
|
if func is None:
|
|
375
|
-
# Called with arguments: @instrument(name="foo")
|
|
376
278
|
return decorator
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
279
|
+
return decorator(func)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
__all__ = [
|
|
283
|
+
"instrument",
|
|
284
|
+
]
|