hud-python 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -89
- hud/agents/__init__.py +17 -0
- hud/agents/art.py +101 -0
- hud/agents/base.py +599 -0
- hud/{mcp → agents}/claude.py +373 -321
- hud/{mcp → agents}/langchain.py +250 -250
- hud/agents/misc/__init__.py +7 -0
- hud/{agent → agents}/misc/response_agent.py +80 -80
- hud/{mcp → agents}/openai.py +352 -334
- hud/agents/openai_chat_generic.py +154 -0
- hud/{mcp → agents}/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -0
- hud/agents/tests/test_claude.py +324 -0
- hud/{mcp → agents}/tests/test_client.py +363 -324
- hud/{mcp → agents}/tests/test_openai.py +237 -238
- hud/cli/__init__.py +617 -0
- hud/cli/__main__.py +8 -0
- hud/cli/analyze.py +371 -0
- hud/cli/analyze_metadata.py +230 -0
- hud/cli/build.py +427 -0
- hud/cli/clone.py +185 -0
- hud/cli/cursor.py +92 -0
- hud/cli/debug.py +392 -0
- hud/cli/docker_utils.py +83 -0
- hud/cli/init.py +281 -0
- hud/cli/interactive.py +353 -0
- hud/cli/mcp_server.py +756 -0
- hud/cli/pull.py +336 -0
- hud/cli/push.py +379 -0
- hud/cli/remote_runner.py +311 -0
- hud/cli/runner.py +160 -0
- hud/cli/tests/__init__.py +3 -0
- hud/cli/tests/test_analyze.py +284 -0
- hud/cli/tests/test_cli_init.py +265 -0
- hud/cli/tests/test_cli_main.py +27 -0
- hud/cli/tests/test_clone.py +142 -0
- hud/cli/tests/test_cursor.py +253 -0
- hud/cli/tests/test_debug.py +453 -0
- hud/cli/tests/test_mcp_server.py +139 -0
- hud/cli/tests/test_utils.py +388 -0
- hud/cli/utils.py +263 -0
- hud/clients/README.md +143 -0
- hud/clients/__init__.py +16 -0
- hud/clients/base.py +354 -0
- hud/clients/fastmcp.py +202 -0
- hud/clients/mcp_use.py +278 -0
- hud/clients/tests/__init__.py +1 -0
- hud/clients/tests/test_client_integration.py +111 -0
- hud/clients/tests/test_fastmcp.py +342 -0
- hud/clients/tests/test_protocol.py +188 -0
- hud/clients/utils/__init__.py +1 -0
- hud/clients/utils/retry_transport.py +160 -0
- hud/datasets.py +322 -192
- hud/misc/__init__.py +1 -0
- hud/{agent → misc}/claude_plays_pokemon.py +292 -283
- hud/otel/__init__.py +35 -0
- hud/otel/collector.py +142 -0
- hud/otel/config.py +164 -0
- hud/otel/context.py +536 -0
- hud/otel/exporters.py +366 -0
- hud/otel/instrumentation.py +97 -0
- hud/otel/processors.py +118 -0
- hud/otel/tests/__init__.py +1 -0
- hud/otel/tests/test_processors.py +197 -0
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -0
- hud/server/helper/__init__.py +5 -0
- hud/server/low_level.py +132 -0
- hud/server/server.py +166 -0
- hud/server/tests/__init__.py +3 -0
- hud/settings.py +73 -79
- hud/shared/__init__.py +5 -0
- hud/{exceptions.py → shared/exceptions.py} +180 -180
- hud/{server → shared}/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -0
- hud/{server → shared}/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -30
- hud/telemetry/instrument.py +379 -0
- hud/telemetry/job.py +309 -141
- hud/telemetry/replay.py +74 -0
- hud/telemetry/trace.py +83 -0
- hud/tools/__init__.py +33 -34
- hud/tools/base.py +365 -65
- hud/tools/bash.py +161 -137
- hud/tools/computer/__init__.py +15 -13
- hud/tools/computer/anthropic.py +437 -414
- hud/tools/computer/hud.py +376 -328
- hud/tools/computer/openai.py +295 -286
- hud/tools/computer/settings.py +82 -0
- hud/tools/edit.py +314 -290
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -532
- hud/tools/executors/pyautogui.py +621 -619
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -503
- hud/tools/{playwright_tool.py → playwright.py} +412 -379
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -0
- hud/tools/tests/test_bash.py +158 -152
- hud/tools/tests/test_bash_extended.py +197 -0
- hud/tools/tests/test_computer.py +425 -52
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -240
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -157
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -0
- hud/tools/utils.py +50 -50
- hud/types.py +136 -89
- hud/utils/__init__.py +10 -16
- hud/utils/async_utils.py +65 -0
- hud/utils/design.py +168 -0
- hud/utils/mcp.py +55 -0
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -0
- hud/utils/tests/test_init.py +17 -21
- hud/utils/tests/test_progress.py +261 -225
- hud/utils/tests/test_telemetry.py +82 -37
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- hud_python-0.4.0.dist-info/METADATA +474 -0
- hud_python-0.4.0.dist-info/RECORD +132 -0
- hud_python-0.4.0.dist-info/entry_points.txt +3 -0
- {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
- hud/adapters/__init__.py +0 -8
- hud/adapters/claude/__init__.py +0 -5
- hud/adapters/claude/adapter.py +0 -180
- hud/adapters/claude/tests/__init__.py +0 -1
- hud/adapters/claude/tests/test_adapter.py +0 -519
- hud/adapters/common/__init__.py +0 -6
- hud/adapters/common/adapter.py +0 -178
- hud/adapters/common/tests/test_adapter.py +0 -289
- hud/adapters/common/types.py +0 -446
- hud/adapters/operator/__init__.py +0 -5
- hud/adapters/operator/adapter.py +0 -108
- hud/adapters/operator/tests/__init__.py +0 -1
- hud/adapters/operator/tests/test_adapter.py +0 -370
- hud/agent/__init__.py +0 -19
- hud/agent/base.py +0 -126
- hud/agent/claude.py +0 -271
- hud/agent/langchain.py +0 -215
- hud/agent/misc/__init__.py +0 -3
- hud/agent/operator.py +0 -268
- hud/agent/tests/__init__.py +0 -1
- hud/agent/tests/test_base.py +0 -202
- hud/env/__init__.py +0 -11
- hud/env/client.py +0 -35
- hud/env/docker_client.py +0 -349
- hud/env/environment.py +0 -446
- hud/env/local_docker_client.py +0 -358
- hud/env/remote_client.py +0 -212
- hud/env/remote_docker_client.py +0 -292
- hud/gym.py +0 -130
- hud/job.py +0 -773
- hud/mcp/__init__.py +0 -17
- hud/mcp/base.py +0 -631
- hud/mcp/client.py +0 -312
- hud/mcp/tests/test_base.py +0 -512
- hud/mcp/tests/test_claude.py +0 -294
- hud/task.py +0 -149
- hud/taskset.py +0 -237
- hud/telemetry/_trace.py +0 -347
- hud/telemetry/context.py +0 -230
- hud/telemetry/exporter.py +0 -575
- hud/telemetry/instrumentation/__init__.py +0 -3
- hud/telemetry/instrumentation/mcp.py +0 -259
- hud/telemetry/instrumentation/registry.py +0 -59
- hud/telemetry/mcp_models.py +0 -270
- hud/telemetry/tests/__init__.py +0 -1
- hud/telemetry/tests/test_context.py +0 -210
- hud/telemetry/tests/test_trace.py +0 -312
- hud/tools/helper/README.md +0 -56
- hud/tools/helper/__init__.py +0 -9
- hud/tools/helper/mcp_server.py +0 -78
- hud/tools/helper/server_initialization.py +0 -115
- hud/tools/helper/utils.py +0 -58
- hud/trajectory.py +0 -94
- hud/utils/agent.py +0 -37
- hud/utils/common.py +0 -256
- hud/utils/config.py +0 -120
- hud/utils/deprecation.py +0 -115
- hud/utils/misc.py +0 -53
- hud/utils/tests/test_common.py +0 -277
- hud/utils/tests/test_config.py +0 -129
- hud_python-0.3.4.dist-info/METADATA +0 -284
- hud_python-0.3.4.dist-info/RECORD +0 -120
- /hud/{adapters/common → shared}/tests/__init__.py +0 -0
- {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
hud/telemetry/job.py
CHANGED
|
@@ -1,141 +1,309 @@
|
|
|
1
|
-
"""Job
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
import
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
from
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def
|
|
140
|
-
"""
|
|
141
|
-
|
|
1
|
+
"""Job management for HUD SDK.
|
|
2
|
+
|
|
3
|
+
This module provides APIs for managing jobs - logical groupings of related tasks.
|
|
4
|
+
Jobs can be used to track experiments, batch processing, training runs, etc.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
import uuid
|
|
12
|
+
from contextlib import contextmanager
|
|
13
|
+
from datetime import UTC, datetime
|
|
14
|
+
from functools import wraps
|
|
15
|
+
from typing import TYPE_CHECKING, Any
|
|
16
|
+
|
|
17
|
+
from hud.settings import settings
|
|
18
|
+
from hud.shared import make_request, make_request_sync
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from collections.abc import Callable, Generator
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Job:
|
|
27
|
+
"""A job represents a collection of related tasks."""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
job_id: str,
|
|
32
|
+
name: str,
|
|
33
|
+
metadata: dict[str, Any] | None = None,
|
|
34
|
+
dataset_link: str | None = None,
|
|
35
|
+
) -> None:
|
|
36
|
+
self.id = job_id
|
|
37
|
+
self.name = name
|
|
38
|
+
self.metadata = metadata or {}
|
|
39
|
+
self.dataset_link = dataset_link
|
|
40
|
+
self.status = "created"
|
|
41
|
+
self.created_at = datetime.now(UTC)
|
|
42
|
+
self.tasks: list[str] = []
|
|
43
|
+
|
|
44
|
+
def add_task(self, task_id: str) -> None:
|
|
45
|
+
"""Associate a task with this job."""
|
|
46
|
+
self.tasks.append(task_id)
|
|
47
|
+
|
|
48
|
+
async def update_status(self, status: str) -> None:
|
|
49
|
+
"""Update job status on the server."""
|
|
50
|
+
self.status = status
|
|
51
|
+
if settings.telemetry_enabled:
|
|
52
|
+
try:
|
|
53
|
+
payload = {
|
|
54
|
+
"name": self.name,
|
|
55
|
+
"status": status,
|
|
56
|
+
"metadata": self.metadata,
|
|
57
|
+
}
|
|
58
|
+
if self.dataset_link:
|
|
59
|
+
payload["dataset_link"] = self.dataset_link
|
|
60
|
+
|
|
61
|
+
await make_request(
|
|
62
|
+
method="POST",
|
|
63
|
+
url=f"{settings.hud_telemetry_url}/jobs/{self.id}/status",
|
|
64
|
+
json=payload,
|
|
65
|
+
api_key=settings.api_key,
|
|
66
|
+
)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.warning("Failed to update job status: %s", e)
|
|
69
|
+
|
|
70
|
+
def update_status_sync(self, status: str) -> None:
|
|
71
|
+
"""Synchronously update job status on the server."""
|
|
72
|
+
self.status = status
|
|
73
|
+
if settings.telemetry_enabled:
|
|
74
|
+
try:
|
|
75
|
+
payload = {
|
|
76
|
+
"name": self.name,
|
|
77
|
+
"status": status,
|
|
78
|
+
"metadata": self.metadata,
|
|
79
|
+
}
|
|
80
|
+
if self.dataset_link:
|
|
81
|
+
payload["dataset_link"] = self.dataset_link
|
|
82
|
+
|
|
83
|
+
make_request_sync(
|
|
84
|
+
method="POST",
|
|
85
|
+
url=f"{settings.hud_telemetry_url}/jobs/{self.id}/status",
|
|
86
|
+
json=payload,
|
|
87
|
+
api_key=settings.api_key,
|
|
88
|
+
)
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.warning("Failed to update job status: %s", e)
|
|
91
|
+
|
|
92
|
+
def __repr__(self) -> str:
|
|
93
|
+
return f"Job(id={self.id!r}, name={self.name!r}, status={self.status!r})"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# Global job registry for the decorator pattern
|
|
97
|
+
_current_job: Job | None = None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _print_job_url(job_id: str, job_name: str) -> None:
|
|
101
|
+
"""Print the job URL in a colorful box."""
|
|
102
|
+
# Only print HUD URL if HUD telemetry is enabled and has API key
|
|
103
|
+
if not (settings.telemetry_enabled and settings.api_key):
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
url = f"https://app.hud.so/jobs/{job_id}"
|
|
107
|
+
header = f"🚀 Job '{job_name}' started:"
|
|
108
|
+
|
|
109
|
+
# ANSI color codes
|
|
110
|
+
DIM = "\033[90m" # Dim/Gray for border
|
|
111
|
+
GOLD = "\033[33m" # Gold/Yellow for URL
|
|
112
|
+
RESET = "\033[0m"
|
|
113
|
+
BOLD = "\033[1m"
|
|
114
|
+
|
|
115
|
+
# Calculate box width based on the longest line
|
|
116
|
+
box_width = max(len(url), len(header)) + 6
|
|
117
|
+
|
|
118
|
+
# Box drawing characters
|
|
119
|
+
top_border = "╔" + "═" * (box_width - 2) + "╗"
|
|
120
|
+
bottom_border = "╚" + "═" * (box_width - 2) + "╝"
|
|
121
|
+
divider = "╟" + "─" * (box_width - 2) + "╢"
|
|
122
|
+
|
|
123
|
+
# Center the content
|
|
124
|
+
header_padding = (box_width - len(header) - 2) // 2
|
|
125
|
+
url_padding = (box_width - len(url) - 2) // 2
|
|
126
|
+
|
|
127
|
+
# Print the box
|
|
128
|
+
print(f"\n{DIM}{top_border}{RESET}") # noqa: T201
|
|
129
|
+
print( # noqa: T201
|
|
130
|
+
f"{DIM}║{RESET}{' ' * header_padding}{header}{' ' * (box_width - len(header) - header_padding - 3)}{DIM}║{RESET}" # noqa: E501
|
|
131
|
+
)
|
|
132
|
+
print(f"{DIM}{divider}{RESET}") # noqa: T201
|
|
133
|
+
print( # noqa: T201
|
|
134
|
+
f"{DIM}║{RESET}{' ' * url_padding}{BOLD}{GOLD}{url}{RESET}{' ' * (box_width - len(url) - url_padding - 2)}{DIM}║{RESET}" # noqa: E501
|
|
135
|
+
)
|
|
136
|
+
print(f"{DIM}{bottom_border}{RESET}\n") # noqa: T201
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _print_job_complete_url(job_id: str, job_name: str, error_occurred: bool = False) -> None:
|
|
140
|
+
"""Print the job completion URL with appropriate messaging."""
|
|
141
|
+
# Only print HUD URL if HUD telemetry is enabled and has API key
|
|
142
|
+
if not (settings.telemetry_enabled and settings.api_key):
|
|
143
|
+
return
|
|
144
|
+
|
|
145
|
+
url = f"https://app.hud.so/jobs/{job_id}"
|
|
146
|
+
|
|
147
|
+
# ANSI color codes
|
|
148
|
+
GREEN = "\033[92m"
|
|
149
|
+
RED = "\033[91m"
|
|
150
|
+
GOLD = "\033[33m"
|
|
151
|
+
RESET = "\033[0m"
|
|
152
|
+
DIM = "\033[2m"
|
|
153
|
+
BOLD = "\033[1m"
|
|
154
|
+
|
|
155
|
+
if error_occurred:
|
|
156
|
+
print( # noqa: T201
|
|
157
|
+
f"\n{RED}✗ Job '{job_name}' failed!{RESET} {DIM}View details at:{RESET} {BOLD}{GOLD}{url}{RESET}\n" # noqa: E501
|
|
158
|
+
)
|
|
159
|
+
else:
|
|
160
|
+
print( # noqa: T201
|
|
161
|
+
f"\n{GREEN}✓ Job '{job_name}' complete!{RESET} {DIM}View all results at:{RESET} {BOLD}{GOLD}{url}{RESET}\n" # noqa: E501
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def get_current_job() -> Job | None:
|
|
166
|
+
"""Get the currently active job, if any."""
|
|
167
|
+
return _current_job
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@contextmanager
|
|
171
|
+
def job(
|
|
172
|
+
name: str,
|
|
173
|
+
metadata: dict[str, Any] | None = None,
|
|
174
|
+
job_id: str | None = None,
|
|
175
|
+
dataset_link: str | None = None,
|
|
176
|
+
) -> Generator[Job, None, None]:
|
|
177
|
+
"""Context manager for job tracking.
|
|
178
|
+
|
|
179
|
+
Groups related tasks together under a single job for tracking and organization.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
name: Human-readable job name
|
|
183
|
+
metadata: Optional metadata dictionary
|
|
184
|
+
job_id: Optional job ID (auto-generated if not provided)
|
|
185
|
+
dataset_link: Optional HuggingFace dataset identifier (e.g. "hud-evals/SheetBench-50")
|
|
186
|
+
|
|
187
|
+
Yields:
|
|
188
|
+
Job: The job object
|
|
189
|
+
|
|
190
|
+
Example:
|
|
191
|
+
with hud.job("training_run", {"model": "gpt-4"}) as job:
|
|
192
|
+
for epoch in range(10):
|
|
193
|
+
with hud.trace(f"epoch_{epoch}", job_id=job.id):
|
|
194
|
+
train_epoch()
|
|
195
|
+
"""
|
|
196
|
+
global _current_job
|
|
197
|
+
|
|
198
|
+
if not job_id:
|
|
199
|
+
job_id = str(uuid.uuid4())
|
|
200
|
+
|
|
201
|
+
job_obj = Job(job_id, name, metadata, dataset_link)
|
|
202
|
+
|
|
203
|
+
# Set as current job
|
|
204
|
+
old_job = _current_job
|
|
205
|
+
_current_job = job_obj
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
# Update status to running synchronously to ensure job is registered before tasks start
|
|
209
|
+
job_obj.update_status_sync("running")
|
|
210
|
+
# Print the nice job URL box
|
|
211
|
+
_print_job_url(job_obj.id, job_obj.name)
|
|
212
|
+
yield job_obj
|
|
213
|
+
# Update status to completed synchronously to ensure it completes before process exit
|
|
214
|
+
job_obj.update_status_sync("completed")
|
|
215
|
+
# Print job completion message
|
|
216
|
+
_print_job_complete_url(job_obj.id, job_obj.name, error_occurred=False)
|
|
217
|
+
except Exception:
|
|
218
|
+
# Update status to failed synchronously to ensure it completes before process exit
|
|
219
|
+
job_obj.update_status_sync("failed")
|
|
220
|
+
# Print job failure message
|
|
221
|
+
_print_job_complete_url(job_obj.id, job_obj.name, error_occurred=True)
|
|
222
|
+
raise
|
|
223
|
+
finally:
|
|
224
|
+
_current_job = old_job
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def create_job(
|
|
228
|
+
name: str, metadata: dict[str, Any] | None = None, dataset_link: str | None = None
|
|
229
|
+
) -> Job:
|
|
230
|
+
"""Create a job without using context manager.
|
|
231
|
+
|
|
232
|
+
Useful when you need explicit control over job lifecycle.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
name: Human-readable job name
|
|
236
|
+
metadata: Optional metadata dictionary
|
|
237
|
+
dataset_link: Optional HuggingFace dataset identifier (e.g. "hud-evals/SheetBench-50")
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Job: The created job object
|
|
241
|
+
|
|
242
|
+
Example:
|
|
243
|
+
job = hud.create_job("data_processing")
|
|
244
|
+
try:
|
|
245
|
+
for item in items:
|
|
246
|
+
with hud.trace(f"process_{item.id}", job_id=job.id):
|
|
247
|
+
process(item)
|
|
248
|
+
finally:
|
|
249
|
+
await job.update_status("completed")
|
|
250
|
+
"""
|
|
251
|
+
job_id = str(uuid.uuid4())
|
|
252
|
+
return Job(job_id, name, metadata, dataset_link)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def job_decorator(name: str | None = None, **metadata: Any) -> Callable:
|
|
256
|
+
"""Decorator for functions that should be tracked as jobs.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
name: Job name (defaults to function name)
|
|
260
|
+
**metadata: Additional metadata for the job
|
|
261
|
+
|
|
262
|
+
Example:
|
|
263
|
+
@hud.job_decorator("model_training", model="gpt-4", dataset="v2")
|
|
264
|
+
async def train_model(config):
|
|
265
|
+
# This entire function execution is tracked as a job
|
|
266
|
+
await model.train(config)
|
|
267
|
+
return model.evaluate()
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
def decorator(func: Callable) -> Callable:
|
|
271
|
+
job_name = name or func.__name__
|
|
272
|
+
|
|
273
|
+
@wraps(func)
|
|
274
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
275
|
+
with job(job_name, metadata) as job_obj:
|
|
276
|
+
# Store job ID in function for access
|
|
277
|
+
func._current_job_id = job_obj.id
|
|
278
|
+
try:
|
|
279
|
+
return await func(*args, **kwargs)
|
|
280
|
+
finally:
|
|
281
|
+
delattr(func, "_current_job_id")
|
|
282
|
+
|
|
283
|
+
@wraps(func)
|
|
284
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
285
|
+
with job(job_name, metadata) as job_obj:
|
|
286
|
+
# Store job ID in function for access
|
|
287
|
+
func._current_job_id = job_obj.id
|
|
288
|
+
try:
|
|
289
|
+
return func(*args, **kwargs)
|
|
290
|
+
finally:
|
|
291
|
+
delattr(func, "_current_job_id")
|
|
292
|
+
|
|
293
|
+
# Return appropriate wrapper based on function type
|
|
294
|
+
if asyncio.iscoroutinefunction(func):
|
|
295
|
+
return async_wrapper
|
|
296
|
+
else:
|
|
297
|
+
return sync_wrapper
|
|
298
|
+
|
|
299
|
+
return decorator
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
# Convenience exports
|
|
303
|
+
__all__ = [
|
|
304
|
+
"Job",
|
|
305
|
+
"create_job",
|
|
306
|
+
"get_current_job",
|
|
307
|
+
"job",
|
|
308
|
+
"job_decorator",
|
|
309
|
+
]
|
hud/telemetry/replay.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Trace retrieval and replay functionality.
|
|
2
|
+
|
|
3
|
+
This module provides APIs to retrieve collected traces for analysis,
|
|
4
|
+
debugging, and replay purposes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
from hud.otel.collector import clear_trace as _clear_trace
|
|
12
|
+
from hud.otel.collector import get_trace as _get_trace
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from hud.types import Trace
|
|
16
|
+
|
|
17
|
+
__all__ = ["clear_trace", "get_trace"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_trace(task_run_id: str) -> Trace | None:
|
|
21
|
+
"""Retrieve the collected trace for a task run.
|
|
22
|
+
|
|
23
|
+
Returns None if trace collection was disabled or the trace doesn't exist.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
task_run_id: The task run ID to retrieve the trace for
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Trace object containing all collected steps, or None if not found
|
|
30
|
+
|
|
31
|
+
Usage:
|
|
32
|
+
import hud
|
|
33
|
+
|
|
34
|
+
# Run agent with tracing
|
|
35
|
+
with hud.trace() as task_run_id:
|
|
36
|
+
agent = MyAgent()
|
|
37
|
+
result = await agent.run("solve task")
|
|
38
|
+
|
|
39
|
+
# Get the trace for analysis
|
|
40
|
+
trace = hud.get_trace(task_run_id)
|
|
41
|
+
if trace:
|
|
42
|
+
print(f"Collected {len(trace.trace)} steps")
|
|
43
|
+
|
|
44
|
+
# Analyze agent vs MCP steps
|
|
45
|
+
agent_steps = [s for s in trace.trace if s.category == "agent"]
|
|
46
|
+
mcp_steps = [s for s in trace.trace if s.category == "mcp"]
|
|
47
|
+
|
|
48
|
+
print(f"Agent steps: {len(agent_steps)}")
|
|
49
|
+
print(f"MCP steps: {len(mcp_steps)}")
|
|
50
|
+
|
|
51
|
+
# Replay or analyze individual steps
|
|
52
|
+
for step in trace.trace:
|
|
53
|
+
if step.category == "agent" and step.result:
|
|
54
|
+
print(f"Agent: {step.result.get('content') if isinstance(step.result, dict) else step.result}")
|
|
55
|
+
if step.category == "mcp" and step.request:
|
|
56
|
+
print(f"MCP: {step.request.method if hasattr(step.request, 'method') else step.request}")
|
|
57
|
+
""" # noqa: E501
|
|
58
|
+
return _get_trace(task_run_id)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def clear_trace(task_run_id: str) -> None:
|
|
62
|
+
"""Clear the collected trace for a task run ID.
|
|
63
|
+
|
|
64
|
+
Useful for cleaning up memory after processing large traces.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
task_run_id: The task run ID to clear the trace for
|
|
68
|
+
|
|
69
|
+
Usage:
|
|
70
|
+
trace = hud.get_trace(task_run_id)
|
|
71
|
+
# Process trace...
|
|
72
|
+
hud.clear_trace(task_run_id) # Free memory
|
|
73
|
+
"""
|
|
74
|
+
_clear_trace(task_run_id)
|
hud/telemetry/trace.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""User-facing trace context manager for HUD telemetry.
|
|
2
|
+
|
|
3
|
+
This module provides the simple trace() API that users interact with.
|
|
4
|
+
The actual OpenTelemetry implementation is in hud.otel.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import uuid
|
|
10
|
+
from contextlib import contextmanager
|
|
11
|
+
from typing import TYPE_CHECKING, Any
|
|
12
|
+
|
|
13
|
+
from hud.otel import configure_telemetry
|
|
14
|
+
from hud.otel import trace as OtelTrace
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from collections.abc import Generator
|
|
18
|
+
|
|
19
|
+
__all__ = ["trace"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@contextmanager
|
|
23
|
+
def trace(
|
|
24
|
+
name: str = "Test task from hud",
|
|
25
|
+
*,
|
|
26
|
+
root: bool = True,
|
|
27
|
+
attrs: dict[str, Any] | None = None,
|
|
28
|
+
job_id: str | None = None,
|
|
29
|
+
task_id: str | None = None,
|
|
30
|
+
) -> Generator[str, None, None]:
|
|
31
|
+
"""Start a HUD trace context.
|
|
32
|
+
|
|
33
|
+
A unique task_run_id is automatically generated for each trace.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
name: Descriptive name for this trace/task
|
|
37
|
+
root: Whether this is a root trace (updates task status)
|
|
38
|
+
attrs: Additional attributes to attach to the trace
|
|
39
|
+
job_id: Optional job ID to associate with this trace
|
|
40
|
+
|
|
41
|
+
Yields:
|
|
42
|
+
str: The auto-generated task run ID
|
|
43
|
+
|
|
44
|
+
Usage:
|
|
45
|
+
import hud
|
|
46
|
+
|
|
47
|
+
with hud.trace("My Task") as task_run_id:
|
|
48
|
+
# Your code here
|
|
49
|
+
print(f"Running task: {task_run_id}")
|
|
50
|
+
|
|
51
|
+
# Or with default name:
|
|
52
|
+
with hud.trace() as task_run_id:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
# Or with job_id:
|
|
56
|
+
with hud.trace("My Task", job_id="550e8400-e29b-41d4-a716-446655440000") as task_run_id:
|
|
57
|
+
pass
|
|
58
|
+
"""
|
|
59
|
+
# Ensure telemetry is configured
|
|
60
|
+
configure_telemetry()
|
|
61
|
+
|
|
62
|
+
# Only generate task_run_id if using HUD backend
|
|
63
|
+
# For custom OTLP backends, we don't need it
|
|
64
|
+
from hud.settings import get_settings
|
|
65
|
+
|
|
66
|
+
settings = get_settings()
|
|
67
|
+
|
|
68
|
+
if settings.telemetry_enabled and settings.api_key:
|
|
69
|
+
task_run_id = str(uuid.uuid4())
|
|
70
|
+
else:
|
|
71
|
+
# Use a placeholder for custom backends
|
|
72
|
+
task_run_id = "custom-otlp-trace"
|
|
73
|
+
|
|
74
|
+
# Delegate to OpenTelemetry implementation
|
|
75
|
+
with OtelTrace(
|
|
76
|
+
task_run_id,
|
|
77
|
+
is_root=root,
|
|
78
|
+
span_name=name,
|
|
79
|
+
attributes=attrs or {},
|
|
80
|
+
job_id=job_id,
|
|
81
|
+
task_id=task_id,
|
|
82
|
+
) as run_id:
|
|
83
|
+
yield run_id
|