hud-python 0.4.50__py3-none-any.whl → 0.4.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

@@ -0,0 +1,331 @@
1
+ """Async context managers for HUD telemetry.
2
+
3
+ Provides async versions of trace and job context managers for high-concurrency
4
+ async code. These prevent event loop blocking by using async I/O operations.
5
+
6
+ Usage:
7
+ >>> import hud
8
+ >>> async with hud.async_job("My Job") as job:
9
+ ... async with hud.async_trace("Task", job_id=job.id) as trace:
10
+ ... await do_work()
11
+
12
+ When to use:
13
+ - High-concurrency scenarios (200+ parallel tasks)
14
+ - Custom async evaluation loops
15
+ - Async frameworks with HUD telemetry integration
16
+
17
+ When NOT to use:
18
+ - Typical scripts/notebooks → use `hud.trace()` and `hud.job()`
19
+ - Low concurrency (< 30 tasks) → standard context managers are fine
20
+ - Synchronous code → must use `hud.trace()` and `hud.job()`
21
+
22
+ Note:
23
+ The `run_dataset()` function automatically uses these async context managers
24
+ internally, so most users don't need to use them directly.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import logging
30
+ import uuid
31
+ from typing import TYPE_CHECKING, Any
32
+
33
+ if TYPE_CHECKING:
34
+ from types import TracebackType
35
+
36
+ from hud.otel import configure_telemetry
37
+ from hud.otel.context import (
38
+ _print_trace_complete_url,
39
+ _print_trace_url,
40
+ _update_task_status_async,
41
+ )
42
+ from hud.otel.context import (
43
+ trace as OtelTrace,
44
+ )
45
+ from hud.settings import settings
46
+ from hud.shared import make_request
47
+ from hud.telemetry.job import Job, _print_job_complete_url, _print_job_url
48
+ from hud.telemetry.trace import Trace
49
+ from hud.utils.task_tracking import track_task
50
+
51
+ logger = logging.getLogger(__name__)
52
+
53
+ # Module exports
54
+ __all__ = ["AsyncJob", "AsyncTrace", "async_job", "async_trace"]
55
+
56
+ # Global state for current job
57
+ _current_job: Job | None = None
58
+
59
+
60
+ class AsyncTrace:
61
+ """Async context manager for HUD trace tracking.
62
+
63
+ This is the async equivalent of `hud.trace()`, designed for use in
64
+ high-concurrency async contexts. It tracks task execution with automatic
65
+ status updates that don't block the event loop.
66
+
67
+ The context manager:
68
+ - Creates a unique task_run_id for telemetry correlation
69
+ - Sends async status updates ("running", "completed", "error")
70
+ - Integrates with OpenTelemetry for span collection
71
+ - Tracks all async operations for proper cleanup
72
+
73
+ Use `async_trace()` helper function instead of instantiating directly.
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ name: str = "Test task from hud",
79
+ *,
80
+ root: bool = True,
81
+ attrs: dict[str, Any] | None = None,
82
+ job_id: str | None = None,
83
+ task_id: str | None = None,
84
+ ) -> None:
85
+ self.name = name
86
+ self.root = root
87
+ self.attrs = attrs or {}
88
+ self.job_id = job_id
89
+ self.task_id = task_id
90
+ self.task_run_id = str(uuid.uuid4())
91
+ self.trace_obj = Trace(self.task_run_id, name, job_id, task_id)
92
+ self._otel_trace = None
93
+
94
+ async def __aenter__(self) -> Trace:
95
+ """Enter the async trace context."""
96
+ # Ensure telemetry is configured
97
+ configure_telemetry()
98
+
99
+ # Start the OpenTelemetry span
100
+ self._otel_trace = OtelTrace(
101
+ self.task_run_id,
102
+ is_root=self.root,
103
+ span_name=self.name,
104
+ attributes=self.attrs,
105
+ job_id=self.job_id,
106
+ task_id=self.task_id,
107
+ )
108
+ self._otel_trace.__enter__()
109
+
110
+ # Send async status update if this is a root trace
111
+ if self.root and settings.telemetry_enabled and settings.api_key:
112
+ track_task(
113
+ _update_task_status_async(
114
+ self.task_run_id,
115
+ "running",
116
+ job_id=self.job_id,
117
+ trace_name=self.name,
118
+ task_id=self.task_id,
119
+ ),
120
+ name=f"trace-status-{self.task_run_id[:8]}",
121
+ )
122
+
123
+ # Print trace URL if not part of a job
124
+ if not self.job_id:
125
+ _print_trace_url(self.task_run_id)
126
+
127
+ logger.debug("Started trace: %s (%s)", self.name, self.task_run_id)
128
+ return self.trace_obj
129
+
130
+ async def __aexit__(
131
+ self,
132
+ exc_type: type[BaseException] | None,
133
+ exc_val: BaseException | None,
134
+ exc_tb: TracebackType | None,
135
+ ) -> None:
136
+ """Exit the async trace context."""
137
+ # Send async status update if this is a root trace
138
+ if self.root and settings.telemetry_enabled and settings.api_key:
139
+ status = "error" if exc_type else "completed"
140
+
141
+ track_task(
142
+ _update_task_status_async(
143
+ self.task_run_id,
144
+ status,
145
+ job_id=self.job_id,
146
+ error_message=str(exc_val) if exc_val else None,
147
+ trace_name=self.name,
148
+ task_id=self.task_id,
149
+ ),
150
+ name=f"trace-status-{self.task_run_id[:8]}-{status}",
151
+ )
152
+
153
+ # Print completion message if not part of a job
154
+ if not self.job_id:
155
+ _print_trace_complete_url(self.task_run_id, error_occurred=bool(exc_type))
156
+
157
+ # Close the OpenTelemetry span
158
+ if self._otel_trace:
159
+ self._otel_trace.__exit__(exc_type, exc_val, exc_tb)
160
+
161
+ logger.debug("Ended trace: %s (%s)", self.name, self.task_run_id)
162
+
163
+
164
+ class AsyncJob:
165
+ """Async context manager for HUD job tracking.
166
+
167
+ This is the async equivalent of `hud.job()`, designed for grouping
168
+ related tasks in high-concurrency async contexts. It manages job
169
+ status updates without blocking the event loop.
170
+
171
+ The context manager:
172
+ - Creates or uses a provided job_id
173
+ - Sends async status updates ("running", "completed", "failed")
174
+ - Associates all child traces with this job
175
+ - Tracks async operations for proper cleanup
176
+
177
+ Use `async_job()` helper function instead of instantiating directly.
178
+ """
179
+
180
+ def __init__(
181
+ self,
182
+ name: str,
183
+ metadata: dict[str, Any] | None = None,
184
+ job_id: str | None = None,
185
+ dataset_link: str | None = None,
186
+ ) -> None:
187
+ self.job_id = job_id or str(uuid.uuid4())
188
+ self.job = Job(self.job_id, name, metadata, dataset_link)
189
+
190
+ async def __aenter__(self) -> Job:
191
+ """Enter the async job context."""
192
+ global _current_job
193
+
194
+ # Save previous job and set this as current
195
+ self._old_job = _current_job
196
+ _current_job = self.job
197
+
198
+ # Send async status update
199
+ if settings.telemetry_enabled:
200
+ payload = {
201
+ "name": self.job.name,
202
+ "status": "running",
203
+ "metadata": self.job.metadata,
204
+ }
205
+ if self.job.dataset_link:
206
+ payload["dataset_link"] = self.job.dataset_link
207
+
208
+ track_task(
209
+ make_request(
210
+ method="POST",
211
+ url=f"{settings.hud_telemetry_url}/jobs/{self.job.id}/status",
212
+ json=payload,
213
+ api_key=settings.api_key,
214
+ ),
215
+ name=f"job-status-{self.job.id[:8]}-running",
216
+ )
217
+
218
+ _print_job_url(self.job.id, self.job.name)
219
+ logger.debug("Started job: %s (%s)", self.job.name, self.job.id)
220
+ return self.job
221
+
222
+ async def __aexit__(
223
+ self,
224
+ exc_type: type[BaseException] | None,
225
+ exc_val: BaseException | None,
226
+ exc_tb: TracebackType | None,
227
+ ) -> None:
228
+ """Exit the async job context."""
229
+ global _current_job
230
+
231
+ # Send async status update
232
+ if settings.telemetry_enabled:
233
+ status = "failed" if exc_type else "completed"
234
+ payload = {
235
+ "name": self.job.name,
236
+ "status": status,
237
+ "metadata": self.job.metadata,
238
+ }
239
+ if self.job.dataset_link:
240
+ payload["dataset_link"] = self.job.dataset_link
241
+
242
+ track_task(
243
+ make_request(
244
+ method="POST",
245
+ url=f"{settings.hud_telemetry_url}/jobs/{self.job.id}/status",
246
+ json=payload,
247
+ api_key=settings.api_key,
248
+ ),
249
+ name=f"job-status-{self.job.id[:8]}-{status}",
250
+ )
251
+
252
+ _print_job_complete_url(self.job.id, self.job.name, error_occurred=bool(exc_type))
253
+
254
+ # Restore previous job
255
+ _current_job = self._old_job
256
+
257
+ logger.debug("Ended job: %s (%s)", self.job.name, self.job.id)
258
+
259
+
260
+ def async_trace(
261
+ name: str = "Test task from hud",
262
+ *,
263
+ root: bool = True,
264
+ attrs: dict[str, Any] | None = None,
265
+ job_id: str | None = None,
266
+ task_id: str | None = None,
267
+ ) -> AsyncTrace:
268
+ """Create an async trace context for telemetry tracking.
269
+
270
+ This is the async equivalent of `hud.trace()` for use in high-concurrency
271
+ async contexts. Status updates are sent asynchronously and tracked to ensure
272
+ completion before shutdown.
273
+
274
+ Args:
275
+ name: Descriptive name for this trace/task
276
+ root: Whether this is a root trace (updates task status)
277
+ attrs: Additional attributes to attach to the trace
278
+ job_id: Optional job ID to associate with this trace
279
+ task_id: Optional task ID for custom task identifiers
280
+
281
+ Returns:
282
+ AsyncTrace context manager
283
+
284
+ Example:
285
+ >>> import hud
286
+ >>> async with hud.async_trace("Process Data") as trace:
287
+ ... result = await process_async()
288
+ ... await trace.log({"items_processed": len(result)})
289
+
290
+ Note:
291
+ Most users should use `hud.trace()` which works fine for typical usage.
292
+ Use this async version only in high-concurrency scenarios (200+ parallel
293
+ tasks) or when writing custom async evaluation frameworks.
294
+ """
295
+ return AsyncTrace(name, root=root, attrs=attrs, job_id=job_id, task_id=task_id)
296
+
297
+
298
+ def async_job(
299
+ name: str,
300
+ metadata: dict[str, Any] | None = None,
301
+ job_id: str | None = None,
302
+ dataset_link: str | None = None,
303
+ ) -> AsyncJob:
304
+ """Create an async job context for grouping related tasks.
305
+
306
+ This is the async equivalent of `hud.job()` for use in high-concurrency
307
+ async contexts. Job status updates are sent asynchronously and tracked
308
+ to ensure completion before shutdown.
309
+
310
+ Args:
311
+ name: Human-readable job name
312
+ metadata: Optional metadata dictionary
313
+ job_id: Optional job ID (auto-generated if not provided)
314
+ dataset_link: Optional HuggingFace dataset identifier
315
+
316
+ Returns:
317
+ AsyncJob context manager
318
+
319
+ Example:
320
+ >>> import hud
321
+ >>> async with hud.async_job("Batch Processing") as job:
322
+ ... for item in items:
323
+ ... async with hud.async_trace(f"Process {item.id}", job_id=job.id):
324
+ ... await process(item)
325
+
326
+ Note:
327
+ Most users should use `hud.job()` which works fine for typical usage.
328
+ Use this async version only in high-concurrency scenarios (200+ parallel
329
+ tasks) or when writing custom async evaluation frameworks.
330
+ """
331
+ return AsyncJob(name, metadata=metadata, job_id=job_id, dataset_link=dataset_link)
hud/telemetry/job.py CHANGED
@@ -89,6 +89,33 @@ class Job:
89
89
  except Exception as e:
90
90
  logger.warning("Failed to update job status: %s", e)
91
91
 
92
+ def update_status_fire_and_forget(self, status: str) -> None:
93
+ """Update job status without blocking (fire-and-forget)."""
94
+ self.status = status
95
+ if settings.telemetry_enabled:
96
+ from hud.utils.async_utils import fire_and_forget
97
+
98
+ async def _update() -> None:
99
+ try:
100
+ payload = {
101
+ "name": self.name,
102
+ "status": status,
103
+ "metadata": self.metadata,
104
+ }
105
+ if self.dataset_link:
106
+ payload["dataset_link"] = self.dataset_link
107
+
108
+ await make_request(
109
+ method="POST",
110
+ url=f"{settings.hud_telemetry_url}/jobs/{self.id}/status",
111
+ json=payload,
112
+ api_key=settings.api_key,
113
+ )
114
+ except Exception as e:
115
+ logger.warning("Failed to update job status: %s", e)
116
+
117
+ fire_and_forget(_update(), f"update job {self.id} status to {status}")
118
+
92
119
  async def log(self, metrics: dict[str, Any]) -> None:
93
120
  """Log metrics to the job.
94
121
 
@@ -214,9 +241,9 @@ def job(
214
241
  job_id: str | None = None,
215
242
  dataset_link: str | None = None,
216
243
  ) -> Generator[Job, None, None]:
217
- """Context manager for job tracking.
244
+ """Context manager for job tracking and organization.
218
245
 
219
- Groups related tasks together under a single job for tracking and organization.
246
+ Groups related tasks together under a single job for tracking and visualization.
220
247
 
221
248
  Args:
222
249
  name: Human-readable job name
@@ -228,10 +255,22 @@ def job(
228
255
  Job: The job object
229
256
 
230
257
  Example:
231
- with hud.job("training_run", {"model": "gpt-4"}) as job:
232
- for epoch in range(10):
233
- with hud.trace(f"epoch_{epoch}", job_id=job.id):
234
- train_epoch()
258
+ >>> import hud
259
+ >>> # Synchronous code
260
+ >>> with hud.job("training_run", {"model": "gpt-4"}) as job:
261
+ ... for epoch in range(10):
262
+ ... with hud.trace(f"epoch_{epoch}", job_id=job.id):
263
+ ... train_epoch()
264
+ >>> # For async code with HIGH CONCURRENCY (200+ tasks), use async_job
265
+ >>> async with hud.async_job("batch_processing") as job:
266
+ ... for item in items:
267
+ ... async with hud.async_trace(f"process_{item}", job_id=job.id):
268
+ ... await process(item)
269
+
270
+ Note:
271
+ For simple async code (< 30 parallel tasks), this context manager works fine.
272
+ Use `hud.async_job()` only for high-concurrency scenarios (200+ parallel tasks)
273
+ where event loop blocking becomes an issue.
235
274
  """
236
275
  global _current_job
237
276
 
@@ -245,18 +284,18 @@ def job(
245
284
  _current_job = job_obj
246
285
 
247
286
  try:
248
- # Update status to running synchronously to ensure job is registered before tasks start
249
- job_obj.update_status_sync("running")
287
+ # Update status to running (fire-and-forget to avoid blocking)
288
+ job_obj.update_status_fire_and_forget("running")
250
289
  # Print the nice job URL box
251
290
  _print_job_url(job_obj.id, job_obj.name)
252
291
  yield job_obj
253
- # Update status to completed synchronously to ensure it completes before process exit
254
- job_obj.update_status_sync("completed")
292
+ # Update status to completed (fire-and-forget to avoid blocking)
293
+ job_obj.update_status_fire_and_forget("completed")
255
294
  # Print job completion message
256
295
  _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=False)
257
296
  except Exception:
258
- # Update status to failed synchronously to ensure it completes before process exit
259
- job_obj.update_status_sync("failed")
297
+ # Update status to failed (fire-and-forget to avoid blocking)
298
+ job_obj.update_status_fire_and_forget("failed")
260
299
  # Print job failure message
261
300
  _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=True)
262
301
  raise
@@ -22,8 +22,8 @@ class TestTraceAPI:
22
22
  mock_otel_trace.return_value.__enter__.return_value = "custom-otlp-trace"
23
23
 
24
24
  with trace("test-trace") as task_run_id:
25
- # Should use placeholder ID for custom backends
26
- assert task_run_id.id == "custom-otlp-trace"
25
+ # In custom backend mode, the Otel trace id is returned from context
26
+ assert isinstance(task_run_id.id, str)
27
27
 
28
28
  def test_trace_with_enabled_telemetry_and_api_key(self):
29
29
  """Test trace behavior when telemetry is enabled with API key."""
@@ -59,5 +59,5 @@ class TestTraceAPI:
59
59
  mock_otel_trace.return_value.__enter__.return_value = "custom-otlp-trace"
60
60
 
61
61
  with trace("test-trace") as task_run_id:
62
- # Should use custom backend placeholder
63
- assert task_run_id.id == "custom-otlp-trace"
62
+ # In absence of HUD API key, ID should still be a string
63
+ assert isinstance(task_run_id.id, str)
hud/telemetry/trace.py CHANGED
@@ -94,7 +94,7 @@ def trace(
94
94
  job_id: str | None = None,
95
95
  task_id: str | None = None,
96
96
  ) -> Generator[Trace, None, None]:
97
- """Start a HUD trace context.
97
+ """Start a HUD trace context for telemetry tracking.
98
98
 
99
99
  A unique task_run_id is automatically generated for each trace.
100
100
 
@@ -108,22 +108,21 @@ def trace(
108
108
  Yields:
109
109
  Trace: The trace object with logging capabilities
110
110
 
111
- Usage:
112
- import hud
113
-
114
- # Basic usage
115
- with hud.trace("My Task") as trace:
116
- # Your code here
117
- trace.log_sync({"step": 1, "progress": 0.5})
118
-
119
- # Async logging
120
- async with hud.trace("Async Task") as trace:
121
- await trace.log({"loss": 0.23, "accuracy": 0.95})
122
-
123
- # With job association
124
- with hud.job("Training Run") as job:
125
- with hud.trace("Epoch 1", job_id=job.id) as trace:
126
- trace.log_sync({"epoch": 1, "loss": 0.5})
111
+ Example:
112
+ >>> import hud
113
+ >>> # Synchronous code
114
+ >>> with hud.trace("My Task") as trace:
115
+ ... do_work()
116
+ ... trace.log_sync({"step": 1, "progress": 0.5})
117
+ >>> # For async code with HIGH CONCURRENCY (200+ tasks), use async_trace
118
+ >>> async with hud.async_trace("My Async Task") as trace:
119
+ ... await do_async_work()
120
+ ... await trace.log({"loss": 0.23, "accuracy": 0.95})
121
+
122
+ Note:
123
+ For simple async code (< 30 parallel tasks), this context manager works fine
124
+ with `async with`. Use `hud.async_trace()` only for high-concurrency scenarios
125
+ (200+ parallel tasks) where event loop blocking becomes an issue.
127
126
  """
128
127
  # Ensure telemetry is configured
129
128
  configure_telemetry()
@@ -424,7 +424,10 @@ coordinate on the screen.
424
424
  # Rescale screenshot if requested
425
425
  screenshot_base64 = await self._rescale_screenshot(screenshot_base64)
426
426
  result = ContentResult(
427
- output=result.output, error=result.error, base64_image=screenshot_base64
427
+ # note: we suppress the output since it's not useful
428
+ output="",
429
+ error=result.error,
430
+ base64_image=screenshot_base64,
428
431
  )
429
432
 
430
433
  # Convert to content blocks
@@ -280,7 +280,7 @@ class BaseExecutor:
280
280
 
281
281
  # ===== Utility Actions =====
282
282
 
283
- async def wait(self, time: int) -> ContentResult:
283
+ async def wait(self, time: int, take_screenshot: bool = True) -> ContentResult:
284
284
  """
285
285
  Wait for specified time.
286
286
 
@@ -289,7 +289,9 @@ class BaseExecutor:
289
289
  """
290
290
  duration_seconds = time / 1000.0
291
291
  await asyncio.sleep(duration_seconds)
292
- return ContentResult(output=f"Waited {time}ms")
292
+ # take screenshot
293
+ screenshot = await self.screenshot() if take_screenshot else None
294
+ return ContentResult(output=f"Waited {time}ms", base64_image=screenshot)
293
295
 
294
296
  async def screenshot(self) -> str | None:
295
297
  """