hud-python 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -420
  87. hud/tools/computer/hud.py +376 -334
  88. hud/tools/computer/openai.py +295 -292
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.5.dist-info/METADATA +0 -284
  190. hud_python-0.3.5.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
hud/telemetry/job.py CHANGED
@@ -1,141 +1,309 @@
1
- """Job context manager for grouping related traces."""
2
-
3
- from __future__ import annotations
4
-
5
- import logging
6
- import sys
7
- import uuid
8
- from contextlib import contextmanager
9
- from contextvars import ContextVar
10
- from typing import TYPE_CHECKING, Any
11
-
12
- from hud.telemetry.exporter import JobStatus, submit_to_worker_loop, update_job_status
13
-
14
- if TYPE_CHECKING:
15
- from collections.abc import Generator
16
- from typing import Self
17
-
18
- logger = logging.getLogger("hud.telemetry")
19
-
20
- # Context variables for current job
21
- current_job_id: ContextVar[str | None] = ContextVar("current_job_id", default=None)
22
- current_job_name: ContextVar[str | None] = ContextVar("current_job_name", default=None)
23
-
24
-
25
- class JobContext:
26
- """Context manager for grouping traces under a job."""
27
-
28
- def __init__(
29
- self, name: str, taskset_name: str | None = None, metadata: dict[str, Any] | None = None
30
- ) -> None:
31
- self.id = str(uuid.uuid4())
32
- self.name = name
33
- self.metadata = metadata or {}
34
- self.taskset_name: str | None = taskset_name
35
-
36
- def __enter__(self) -> Self:
37
- # Auto-detect dataset
38
- if self.taskset_name is None:
39
- self._detect_dataset()
40
-
41
- # Set context variables
42
- current_job_id.set(self.id)
43
- current_job_name.set(self.name)
44
-
45
- # Send initial status
46
- job_metadata = {**self.metadata}
47
- coro = update_job_status(
48
- self.id, JobStatus.RUNNING, metadata=job_metadata, taskset_name=self.taskset_name
49
- )
50
- submit_to_worker_loop(coro)
51
-
52
- logger.info("Started job %s (ID: %s)", self.name, self.id)
53
- return self
54
-
55
- def __exit__(
56
- self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: object
57
- ) -> None:
58
- # Determine final status
59
- if exc_type is not None:
60
- # Job failed with exception
61
- error_msg = f"{exc_type.__name__}: {exc_val}"
62
- coro = update_job_status(
63
- self.id, JobStatus.ERROR, error_message=error_msg, taskset_name=self.taskset_name
64
- )
65
- else:
66
- # Job completed successfully
67
- coro = update_job_status(self.id, JobStatus.COMPLETED, taskset_name=self.taskset_name)
68
-
69
- submit_to_worker_loop(coro)
70
-
71
- # Clear context
72
- current_job_id.set(None)
73
- current_job_name.set(None)
74
-
75
- status = "failed" if exc_type else "completed"
76
- logger.info("Job %s %s", self.name, status)
77
-
78
- def _detect_dataset(self) -> None:
79
- """Auto-detect HuggingFace dataset in parent scope."""
80
- try:
81
- # Check frames 2 and 3 (with statement and parent scope)
82
- for frame_depth in [2, 3]:
83
- try:
84
- frame = sys._getframe(frame_depth)
85
-
86
- # Search for Dataset objects
87
- for var_value in frame.f_locals.values():
88
- if hasattr(var_value, "info") and hasattr(var_value.info, "builder_name"):
89
- self.taskset_name = var_value.info.builder_name
90
- logger.debug(
91
- "Auto-detected dataset at frame %d: %s",
92
- frame_depth,
93
- self.taskset_name,
94
- )
95
- return
96
- elif hasattr(var_value, "builder_name"):
97
- # Older dataset format
98
- self.taskset_name = var_value.builder_name
99
- logger.debug(
100
- "Auto-detected dataset at frame %d: %s",
101
- frame_depth,
102
- self.taskset_name,
103
- )
104
- return
105
- except ValueError:
106
- # Frame doesn't exist
107
- continue
108
- except Exception as e:
109
- logger.debug("Dataset auto-detection failed: %s", e)
110
-
111
-
112
- @contextmanager
113
- def job(
114
- name: str, taskset_name: str | None = None, metadata: dict[str, Any] | None = None
115
- ) -> Generator[JobContext, None, None]:
116
- """
117
- Create a job context for grouping related traces.
118
-
119
- Args:
120
- name: Name for the job
121
- metadata: Optional metadata to include with the job
122
-
123
- Example:
124
- with hud.job("evaluation_run") as job:
125
- for task in tasks:
126
- with hud.trace(f"task_{task.id}"):
127
- # Trace automatically includes job_id
128
- result = await agent.run(task)
129
- """
130
- with JobContext(name, taskset_name, metadata) as ctx:
131
- yield ctx
132
-
133
-
134
- def get_current_job_id() -> str | None:
135
- """Get the current job ID if inside a job context."""
136
- return current_job_id.get()
137
-
138
-
139
- def get_current_job_name() -> str | None:
140
- """Get the current job name if inside a job context."""
141
- return current_job_name.get()
1
+ """Job management for HUD SDK.
2
+
3
+ This module provides APIs for managing jobs - logical groupings of related tasks.
4
+ Jobs can be used to track experiments, batch processing, training runs, etc.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import logging
11
+ import uuid
12
+ from contextlib import contextmanager
13
+ from datetime import UTC, datetime
14
+ from functools import wraps
15
+ from typing import TYPE_CHECKING, Any
16
+
17
+ from hud.settings import settings
18
+ from hud.shared import make_request, make_request_sync
19
+
20
+ if TYPE_CHECKING:
21
+ from collections.abc import Callable, Generator
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class Job:
27
+ """A job represents a collection of related tasks."""
28
+
29
+ def __init__(
30
+ self,
31
+ job_id: str,
32
+ name: str,
33
+ metadata: dict[str, Any] | None = None,
34
+ dataset_link: str | None = None,
35
+ ) -> None:
36
+ self.id = job_id
37
+ self.name = name
38
+ self.metadata = metadata or {}
39
+ self.dataset_link = dataset_link
40
+ self.status = "created"
41
+ self.created_at = datetime.now(UTC)
42
+ self.tasks: list[str] = []
43
+
44
+ def add_task(self, task_id: str) -> None:
45
+ """Associate a task with this job."""
46
+ self.tasks.append(task_id)
47
+
48
+ async def update_status(self, status: str) -> None:
49
+ """Update job status on the server."""
50
+ self.status = status
51
+ if settings.telemetry_enabled:
52
+ try:
53
+ payload = {
54
+ "name": self.name,
55
+ "status": status,
56
+ "metadata": self.metadata,
57
+ }
58
+ if self.dataset_link:
59
+ payload["dataset_link"] = self.dataset_link
60
+
61
+ await make_request(
62
+ method="POST",
63
+ url=f"{settings.hud_telemetry_url}/jobs/{self.id}/status",
64
+ json=payload,
65
+ api_key=settings.api_key,
66
+ )
67
+ except Exception as e:
68
+ logger.warning("Failed to update job status: %s", e)
69
+
70
+ def update_status_sync(self, status: str) -> None:
71
+ """Synchronously update job status on the server."""
72
+ self.status = status
73
+ if settings.telemetry_enabled:
74
+ try:
75
+ payload = {
76
+ "name": self.name,
77
+ "status": status,
78
+ "metadata": self.metadata,
79
+ }
80
+ if self.dataset_link:
81
+ payload["dataset_link"] = self.dataset_link
82
+
83
+ make_request_sync(
84
+ method="POST",
85
+ url=f"{settings.hud_telemetry_url}/jobs/{self.id}/status",
86
+ json=payload,
87
+ api_key=settings.api_key,
88
+ )
89
+ except Exception as e:
90
+ logger.warning("Failed to update job status: %s", e)
91
+
92
+ def __repr__(self) -> str:
93
+ return f"Job(id={self.id!r}, name={self.name!r}, status={self.status!r})"
94
+
95
+
96
+ # Global job registry for the decorator pattern
97
+ _current_job: Job | None = None
98
+
99
+
100
+ def _print_job_url(job_id: str, job_name: str) -> None:
101
+ """Print the job URL in a colorful box."""
102
+ # Only print HUD URL if HUD telemetry is enabled and has API key
103
+ if not (settings.telemetry_enabled and settings.api_key):
104
+ return
105
+
106
+ url = f"https://app.hud.so/jobs/{job_id}"
107
+ header = f"🚀 Job '{job_name}' started:"
108
+
109
+ # ANSI color codes
110
+ DIM = "\033[90m" # Dim/Gray for border
111
+ GOLD = "\033[33m" # Gold/Yellow for URL
112
+ RESET = "\033[0m"
113
+ BOLD = "\033[1m"
114
+
115
+ # Calculate box width based on the longest line
116
+ box_width = max(len(url), len(header)) + 6
117
+
118
+ # Box drawing characters
119
+ top_border = "╔" + "═" * (box_width - 2) + "╗"
120
+ bottom_border = "╚" + "═" * (box_width - 2) + "╝"
121
+ divider = "╟" + "─" * (box_width - 2) + "╢"
122
+
123
+ # Center the content
124
+ header_padding = (box_width - len(header) - 2) // 2
125
+ url_padding = (box_width - len(url) - 2) // 2
126
+
127
+ # Print the box
128
+ print(f"\n{DIM}{top_border}{RESET}") # noqa: T201
129
+ print( # noqa: T201
130
+ f"{DIM}║{RESET}{' ' * header_padding}{header}{' ' * (box_width - len(header) - header_padding - 3)}{DIM}║{RESET}" # noqa: E501
131
+ )
132
+ print(f"{DIM}{divider}{RESET}") # noqa: T201
133
+ print( # noqa: T201
134
+ f"{DIM}║{RESET}{' ' * url_padding}{BOLD}{GOLD}{url}{RESET}{' ' * (box_width - len(url) - url_padding - 2)}{DIM}║{RESET}" # noqa: E501
135
+ )
136
+ print(f"{DIM}{bottom_border}{RESET}\n") # noqa: T201
137
+
138
+
139
+ def _print_job_complete_url(job_id: str, job_name: str, error_occurred: bool = False) -> None:
140
+ """Print the job completion URL with appropriate messaging."""
141
+ # Only print HUD URL if HUD telemetry is enabled and has API key
142
+ if not (settings.telemetry_enabled and settings.api_key):
143
+ return
144
+
145
+ url = f"https://app.hud.so/jobs/{job_id}"
146
+
147
+ # ANSI color codes
148
+ GREEN = "\033[92m"
149
+ RED = "\033[91m"
150
+ GOLD = "\033[33m"
151
+ RESET = "\033[0m"
152
+ DIM = "\033[2m"
153
+ BOLD = "\033[1m"
154
+
155
+ if error_occurred:
156
+ print( # noqa: T201
157
+ f"\n{RED}✗ Job '{job_name}' failed!{RESET} {DIM}View details at:{RESET} {BOLD}{GOLD}{url}{RESET}\n" # noqa: E501
158
+ )
159
+ else:
160
+ print( # noqa: T201
161
+ f"\n{GREEN}✓ Job '{job_name}' complete!{RESET} {DIM}View all results at:{RESET} {BOLD}{GOLD}{url}{RESET}\n" # noqa: E501
162
+ )
163
+
164
+
165
+ def get_current_job() -> Job | None:
166
+ """Get the currently active job, if any."""
167
+ return _current_job
168
+
169
+
170
+ @contextmanager
171
+ def job(
172
+ name: str,
173
+ metadata: dict[str, Any] | None = None,
174
+ job_id: str | None = None,
175
+ dataset_link: str | None = None,
176
+ ) -> Generator[Job, None, None]:
177
+ """Context manager for job tracking.
178
+
179
+ Groups related tasks together under a single job for tracking and organization.
180
+
181
+ Args:
182
+ name: Human-readable job name
183
+ metadata: Optional metadata dictionary
184
+ job_id: Optional job ID (auto-generated if not provided)
185
+ dataset_link: Optional HuggingFace dataset identifier (e.g. "hud-evals/SheetBench-50")
186
+
187
+ Yields:
188
+ Job: The job object
189
+
190
+ Example:
191
+ with hud.job("training_run", {"model": "gpt-4"}) as job:
192
+ for epoch in range(10):
193
+ with hud.trace(f"epoch_{epoch}", job_id=job.id):
194
+ train_epoch()
195
+ """
196
+ global _current_job
197
+
198
+ if not job_id:
199
+ job_id = str(uuid.uuid4())
200
+
201
+ job_obj = Job(job_id, name, metadata, dataset_link)
202
+
203
+ # Set as current job
204
+ old_job = _current_job
205
+ _current_job = job_obj
206
+
207
+ try:
208
+ # Update status to running synchronously to ensure job is registered before tasks start
209
+ job_obj.update_status_sync("running")
210
+ # Print the nice job URL box
211
+ _print_job_url(job_obj.id, job_obj.name)
212
+ yield job_obj
213
+ # Update status to completed synchronously to ensure it completes before process exit
214
+ job_obj.update_status_sync("completed")
215
+ # Print job completion message
216
+ _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=False)
217
+ except Exception:
218
+ # Update status to failed synchronously to ensure it completes before process exit
219
+ job_obj.update_status_sync("failed")
220
+ # Print job failure message
221
+ _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=True)
222
+ raise
223
+ finally:
224
+ _current_job = old_job
225
+
226
+
227
+ def create_job(
228
+ name: str, metadata: dict[str, Any] | None = None, dataset_link: str | None = None
229
+ ) -> Job:
230
+ """Create a job without using context manager.
231
+
232
+ Useful when you need explicit control over job lifecycle.
233
+
234
+ Args:
235
+ name: Human-readable job name
236
+ metadata: Optional metadata dictionary
237
+ dataset_link: Optional HuggingFace dataset identifier (e.g. "hud-evals/SheetBench-50")
238
+
239
+ Returns:
240
+ Job: The created job object
241
+
242
+ Example:
243
+ job = hud.create_job("data_processing")
244
+ try:
245
+ for item in items:
246
+ with hud.trace(f"process_{item.id}", job_id=job.id):
247
+ process(item)
248
+ finally:
249
+ await job.update_status("completed")
250
+ """
251
+ job_id = str(uuid.uuid4())
252
+ return Job(job_id, name, metadata, dataset_link)
253
+
254
+
255
+ def job_decorator(name: str | None = None, **metadata: Any) -> Callable:
256
+ """Decorator for functions that should be tracked as jobs.
257
+
258
+ Args:
259
+ name: Job name (defaults to function name)
260
+ **metadata: Additional metadata for the job
261
+
262
+ Example:
263
+ @hud.job_decorator("model_training", model="gpt-4", dataset="v2")
264
+ async def train_model(config):
265
+ # This entire function execution is tracked as a job
266
+ await model.train(config)
267
+ return model.evaluate()
268
+ """
269
+
270
+ def decorator(func: Callable) -> Callable:
271
+ job_name = name or func.__name__
272
+
273
+ @wraps(func)
274
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
275
+ with job(job_name, metadata) as job_obj:
276
+ # Store job ID in function for access
277
+ func._current_job_id = job_obj.id
278
+ try:
279
+ return await func(*args, **kwargs)
280
+ finally:
281
+ delattr(func, "_current_job_id")
282
+
283
+ @wraps(func)
284
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
285
+ with job(job_name, metadata) as job_obj:
286
+ # Store job ID in function for access
287
+ func._current_job_id = job_obj.id
288
+ try:
289
+ return func(*args, **kwargs)
290
+ finally:
291
+ delattr(func, "_current_job_id")
292
+
293
+ # Return appropriate wrapper based on function type
294
+ if asyncio.iscoroutinefunction(func):
295
+ return async_wrapper
296
+ else:
297
+ return sync_wrapper
298
+
299
+ return decorator
300
+
301
+
302
+ # Convenience exports
303
+ __all__ = [
304
+ "Job",
305
+ "create_job",
306
+ "get_current_job",
307
+ "job",
308
+ "job_decorator",
309
+ ]
@@ -0,0 +1,74 @@
1
+ """Trace retrieval and replay functionality.
2
+
3
+ This module provides APIs to retrieve collected traces for analysis,
4
+ debugging, and replay purposes.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ from hud.otel.collector import clear_trace as _clear_trace
12
+ from hud.otel.collector import get_trace as _get_trace
13
+
14
+ if TYPE_CHECKING:
15
+ from hud.types import Trace
16
+
17
+ __all__ = ["clear_trace", "get_trace"]
18
+
19
+
20
+ def get_trace(task_run_id: str) -> Trace | None:
21
+ """Retrieve the collected trace for a task run.
22
+
23
+ Returns None if trace collection was disabled or the trace doesn't exist.
24
+
25
+ Args:
26
+ task_run_id: The task run ID to retrieve the trace for
27
+
28
+ Returns:
29
+ Trace object containing all collected steps, or None if not found
30
+
31
+ Usage:
32
+ import hud
33
+
34
+ # Run agent with tracing
35
+ with hud.trace() as task_run_id:
36
+ agent = MyAgent()
37
+ result = await agent.run("solve task")
38
+
39
+ # Get the trace for analysis
40
+ trace = hud.get_trace(task_run_id)
41
+ if trace:
42
+ print(f"Collected {len(trace.trace)} steps")
43
+
44
+ # Analyze agent vs MCP steps
45
+ agent_steps = [s for s in trace.trace if s.category == "agent"]
46
+ mcp_steps = [s for s in trace.trace if s.category == "mcp"]
47
+
48
+ print(f"Agent steps: {len(agent_steps)}")
49
+ print(f"MCP steps: {len(mcp_steps)}")
50
+
51
+ # Replay or analyze individual steps
52
+ for step in trace.trace:
53
+ if step.category == "agent" and step.result:
54
+ print(f"Agent: {step.result.get('content') if isinstance(step.result, dict) else step.result}")
55
+ if step.category == "mcp" and step.request:
56
+ print(f"MCP: {step.request.method if hasattr(step.request, 'method') else step.request}")
57
+ """ # noqa: E501
58
+ return _get_trace(task_run_id)
59
+
60
+
61
+ def clear_trace(task_run_id: str) -> None:
62
+ """Clear the collected trace for a task run ID.
63
+
64
+ Useful for cleaning up memory after processing large traces.
65
+
66
+ Args:
67
+ task_run_id: The task run ID to clear the trace for
68
+
69
+ Usage:
70
+ trace = hud.get_trace(task_run_id)
71
+ # Process trace...
72
+ hud.clear_trace(task_run_id) # Free memory
73
+ """
74
+ _clear_trace(task_run_id)
hud/telemetry/trace.py ADDED
@@ -0,0 +1,83 @@
1
+ """User-facing trace context manager for HUD telemetry.
2
+
3
+ This module provides the simple trace() API that users interact with.
4
+ The actual OpenTelemetry implementation is in hud.otel.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import uuid
10
+ from contextlib import contextmanager
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ from hud.otel import configure_telemetry
14
+ from hud.otel import trace as OtelTrace
15
+
16
+ if TYPE_CHECKING:
17
+ from collections.abc import Generator
18
+
19
+ __all__ = ["trace"]
20
+
21
+
22
+ @contextmanager
23
+ def trace(
24
+ name: str = "Test task from hud",
25
+ *,
26
+ root: bool = True,
27
+ attrs: dict[str, Any] | None = None,
28
+ job_id: str | None = None,
29
+ task_id: str | None = None,
30
+ ) -> Generator[str, None, None]:
31
+ """Start a HUD trace context.
32
+
33
+ A unique task_run_id is automatically generated for each trace.
34
+
35
+ Args:
36
+ name: Descriptive name for this trace/task
37
+ root: Whether this is a root trace (updates task status)
38
+ attrs: Additional attributes to attach to the trace
39
+ job_id: Optional job ID to associate with this trace
40
+
41
+ Yields:
42
+ str: The auto-generated task run ID
43
+
44
+ Usage:
45
+ import hud
46
+
47
+ with hud.trace("My Task") as task_run_id:
48
+ # Your code here
49
+ print(f"Running task: {task_run_id}")
50
+
51
+ # Or with default name:
52
+ with hud.trace() as task_run_id:
53
+ pass
54
+
55
+ # Or with job_id:
56
+ with hud.trace("My Task", job_id="550e8400-e29b-41d4-a716-446655440000") as task_run_id:
57
+ pass
58
+ """
59
+ # Ensure telemetry is configured
60
+ configure_telemetry()
61
+
62
+ # Only generate task_run_id if using HUD backend
63
+ # For custom OTLP backends, we don't need it
64
+ from hud.settings import get_settings
65
+
66
+ settings = get_settings()
67
+
68
+ if settings.telemetry_enabled and settings.api_key:
69
+ task_run_id = str(uuid.uuid4())
70
+ else:
71
+ # Use a placeholder for custom backends
72
+ task_run_id = "custom-otlp-trace"
73
+
74
+ # Delegate to OpenTelemetry implementation
75
+ with OtelTrace(
76
+ task_run_id,
77
+ is_root=root,
78
+ span_name=name,
79
+ attributes=attrs or {},
80
+ job_id=job_id,
81
+ task_id=task_id,
82
+ ) as run_id:
83
+ yield run_id