hud-python 0.4.50__py3-none-any.whl → 0.4.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

hud/otel/exporters.py CHANGED
@@ -1,21 +1,27 @@
1
- """Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
2
- HTTP endpoint (/trace/<id>/telemetry-upload).
1
+ """Custom OpenTelemetry exporter for HUD telemetry backend.
3
2
 
4
- The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
5
- exactly the same semantics the old async worker in ``hud.telemetry.exporter``
6
- implemented.
3
+ This exporter sends spans to the HUD telemetry HTTP endpoint, grouping them
4
+ by task_run_id for efficient batch uploads.
7
5
 
8
- This exporter is *synchronous* (derives from :class:`SpanExporter`). We rely on
9
- ``hud.shared.make_request_sync`` which already contains retry & auth logic.
6
+ Performance optimizations:
7
+ - Detects async contexts and runs exports in a thread pool to avoid blocking
8
+ - Uses persistent HTTP client with connection pooling for reduced overhead
9
+ - Tracks pending export futures to ensure completion during shutdown
10
+
11
+ The exporter derives from SpanExporter (synchronous interface) but handles
12
+ async contexts intelligently to prevent event loop blocking during high-concurrency
13
+ workloads.
10
14
  """
11
15
 
12
16
  from __future__ import annotations
13
17
 
18
+ import atexit
19
+ import concurrent.futures as cf
14
20
  import contextlib
15
21
  import json
16
22
  import logging
17
- import time
18
23
  from collections import defaultdict
24
+ from concurrent.futures import ThreadPoolExecutor
19
25
  from datetime import UTC, datetime
20
26
  from typing import TYPE_CHECKING, Any
21
27
 
@@ -31,6 +37,34 @@ if TYPE_CHECKING:
31
37
 
32
38
  logger = logging.getLogger(__name__)
33
39
 
40
+ # Global singleton thread pool for span exports
41
+ _export_executor: ThreadPoolExecutor | None = None
42
+
43
+
44
+ def get_export_executor() -> ThreadPoolExecutor:
45
+ """Get or create the global thread pool for span exports.
46
+
47
+ Returns a singleton ThreadPoolExecutor used for running span exports
48
+ in a thread pool when called from async contexts, preventing event
49
+ loop blocking during high-concurrency workloads.
50
+
51
+ The executor is automatically cleaned up on process exit via atexit.
52
+
53
+ Returns:
54
+ ThreadPoolExecutor with 8 workers for high-throughput parallel uploads
55
+ """
56
+ global _export_executor
57
+ if _export_executor is None:
58
+ # Use 8 workers to handle high-volume parallel uploads efficiently
59
+ _export_executor = ThreadPoolExecutor(max_workers=8, thread_name_prefix="span-export")
60
+
61
+ def cleanup() -> None:
62
+ if _export_executor is not None:
63
+ _export_executor.shutdown(wait=True)
64
+
65
+ atexit.register(cleanup)
66
+ return _export_executor
67
+
34
68
 
35
69
  # ---------------------------------------------------------------------------
36
70
  # Models
@@ -297,73 +331,213 @@ def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
297
331
 
298
332
 
299
333
  class HudSpanExporter(SpanExporter):
300
- """Exporter that forwards spans to HUD backend using existing endpoint."""
334
+ """OpenTelemetry span exporter for the HUD backend.
335
+
336
+ This exporter groups spans by task_run_id and sends them to the HUD
337
+ telemetry endpoint. Performance optimizations include:
338
+
339
+ - Auto-detects async contexts and runs exports in thread pool (non-blocking)
340
+ - Tracks pending export futures for proper shutdown coordination
341
+
342
+ Handles high-concurrency scenarios (200+ parallel tasks) by offloading
343
+ synchronous HTTP operations to a thread pool when called from async
344
+ contexts, preventing event loop blocking.
345
+ """
301
346
 
302
347
  def __init__(self, *, telemetry_url: str, api_key: str) -> None:
348
+ """Initialize the HUD span exporter.
349
+
350
+ Args:
351
+ telemetry_url: Base URL for the HUD telemetry backend
352
+ api_key: API key for authentication
353
+ """
303
354
  super().__init__()
304
355
  self._telemetry_url = telemetry_url.rstrip("/")
305
356
  self._api_key = api_key
306
357
 
307
- # ------------------------------------------------------------------
308
- # Core API
309
- # ------------------------------------------------------------------
358
+ # Track pending export futures for shutdown coordination
359
+ self._pending_futures: list[cf.Future[SpanExportResult]] = []
360
+
310
361
  def export(self, spans: list[ReadableSpan]) -> SpanExportResult: # type: ignore[override]
362
+ """Export spans to HUD backend.
363
+
364
+ Auto-detects async contexts: if called from an async event loop, runs
365
+ the export in a thread pool to avoid blocking. Otherwise runs synchronously.
366
+
367
+ Args:
368
+ spans: List of ReadableSpan objects to export
369
+
370
+ Returns:
371
+ SpanExportResult.SUCCESS (returns immediately in async contexts)
372
+ """
311
373
  if not spans:
312
374
  return SpanExportResult.SUCCESS
313
375
 
314
- # Group spans by hud.task_run_id attribute
376
+ # Group spans by task_run_id for batched uploads
315
377
  grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
316
378
  for span in spans:
317
379
  run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
318
380
  if not run_id:
319
- # Skip spans that are outside HUD traces
381
+ # Skip spans outside HUD traces
320
382
  continue
321
383
  grouped[str(run_id)].append(span)
322
384
 
323
- # Send each group synchronously (retry inside make_request_sync)
324
- for run_id, span_batch in grouped.items():
325
- try:
326
- url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
327
- telemetry_spans = [_span_to_dict(s) for s in span_batch]
328
- # Include current step count in metadata
329
- metadata = {}
330
- # Get the HIGHEST step count from the batch (most recent)
331
- step_count = 0
332
- for span in span_batch:
333
- if span.attributes and "hud.step_count" in span.attributes:
334
- current_step = span.attributes["hud.step_count"]
335
- if isinstance(current_step, int) and current_step > step_count:
336
- step_count = current_step
337
-
338
- payload = {
339
- "metadata": metadata,
340
- "telemetry": telemetry_spans,
341
- }
342
-
343
- # Only include step_count if we found any steps
344
- if step_count > 0:
345
- payload["step_count"] = step_count
346
-
347
- logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
348
- make_request_sync(
349
- method="POST",
350
- url=url,
351
- json=payload,
352
- api_key=self._api_key,
353
- )
354
- except Exception as exc:
355
- logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
356
- # If *any* group fails we return FAILURE so the OTEL SDK can retry
357
- return SpanExportResult.FAILURE
358
-
359
- return SpanExportResult.SUCCESS
385
+ # Detect async context to avoid event loop blocking
386
+ import asyncio
387
+
388
+ try:
389
+ loop = asyncio.get_running_loop()
390
+ # In async context - offload to thread pool
391
+ executor = get_export_executor()
392
+
393
+ def _sync_export() -> SpanExportResult:
394
+ # Send each group synchronously (retry inside make_request_sync)
395
+ for run_id, span_batch in grouped.items():
396
+ try:
397
+ url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
398
+ telemetry_spans = [_span_to_dict(s) for s in span_batch]
399
+ # Include current step count in metadata
400
+ metadata = {}
401
+ # Get the HIGHEST step count from the batch (most recent)
402
+ step_count = 0
403
+ for span in span_batch:
404
+ if span.attributes and "hud.step_count" in span.attributes:
405
+ current_step = span.attributes["hud.step_count"]
406
+ if isinstance(current_step, int) and current_step > step_count:
407
+ step_count = current_step
408
+
409
+ payload = {
410
+ "metadata": metadata,
411
+ "telemetry": telemetry_spans,
412
+ }
413
+
414
+ # Only include step_count if we found any steps
415
+ if step_count > 0:
416
+ payload["step_count"] = step_count
417
+
418
+ logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
419
+ make_request_sync(
420
+ method="POST",
421
+ url=url,
422
+ json=payload,
423
+ api_key=self._api_key,
424
+ )
425
+ except Exception as exc:
426
+ logger.exception(
427
+ "HUD exporter failed to send spans for task %s: %s", run_id, exc
428
+ )
429
+ return SpanExportResult.FAILURE
430
+ return SpanExportResult.SUCCESS
431
+
432
+ # Run in thread to avoid blocking event loop
433
+ future = loop.run_in_executor(executor, _sync_export)
434
+ # Track and cleanup when done
435
+ self._pending_futures.append(future) # type: ignore[list-item]
436
+
437
+ def _cleanup_done(f: cf.Future[SpanExportResult]) -> None:
438
+ with contextlib.suppress(Exception):
439
+ # Consume exception to avoid "exception was never retrieved"
440
+ _ = f.exception()
441
+ # Remove from pending list
442
+ with contextlib.suppress(ValueError):
443
+ self._pending_futures.remove(f)
444
+
445
+ future.add_done_callback(_cleanup_done) # type: ignore[arg-type]
446
+ # Don't wait for it - return immediately
447
+ return SpanExportResult.SUCCESS
448
+
449
+ except RuntimeError:
450
+ # No event loop - run synchronously
451
+ # Send each group synchronously (retry inside make_request_sync)
452
+ for run_id, span_batch in grouped.items():
453
+ try:
454
+ url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
455
+ telemetry_spans = [_span_to_dict(s) for s in span_batch]
456
+ # Include current step count in metadata
457
+ metadata = {}
458
+ # Get the HIGHEST step count from the batch (most recent)
459
+ step_count = 0
460
+ for span in span_batch:
461
+ if span.attributes and "hud.step_count" in span.attributes:
462
+ current_step = span.attributes["hud.step_count"]
463
+ if isinstance(current_step, int) and current_step > step_count:
464
+ step_count = current_step
465
+
466
+ payload = {
467
+ "metadata": metadata,
468
+ "telemetry": telemetry_spans,
469
+ }
470
+
471
+ # Only include step_count if we found any steps
472
+ if step_count > 0:
473
+ payload["step_count"] = step_count
474
+
475
+ logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
476
+ make_request_sync(
477
+ method="POST",
478
+ url=url,
479
+ json=payload,
480
+ api_key=self._api_key,
481
+ )
482
+ except Exception as exc:
483
+ logger.exception(
484
+ "HUD exporter failed to send spans for task %s: %s", run_id, exc
485
+ )
486
+ # If *any* group fails we return FAILURE so the OTEL SDK can retry
487
+ return SpanExportResult.FAILURE
488
+
489
+ return SpanExportResult.SUCCESS
360
490
 
361
491
  def shutdown(self) -> None: # type: ignore[override]
362
- # Nothing to cleanup, httpx handled inside make_request_sync
363
- pass
492
+ """Shutdown the exporter and wait for pending exports.
493
+
494
+ Waits up to 10 seconds for any in-flight exports to complete.
495
+ """
496
+ try:
497
+ if self._pending_futures:
498
+ with contextlib.suppress(Exception):
499
+ cf.wait(self._pending_futures, timeout=10.0)
500
+ finally:
501
+ self._pending_futures.clear()
364
502
 
365
503
  def force_flush(self, timeout_millis: int | None = None) -> bool: # type: ignore[override]
366
- if timeout_millis:
367
- time.sleep(timeout_millis / 1000)
368
- # Synchronous export, nothing buffered here
369
- return True
504
+ """Force flush all pending span exports.
505
+
506
+ Waits for all pending export futures to complete before returning.
507
+ This is called by the OpenTelemetry SDK during shutdown to ensure
508
+ all telemetry is uploaded.
509
+
510
+ Args:
511
+ timeout_millis: Maximum time to wait in milliseconds
512
+
513
+ Returns:
514
+ True if all exports completed, False otherwise
515
+ """
516
+ try:
517
+ if not self._pending_futures:
518
+ return True
519
+
520
+ total_pending = len(self._pending_futures)
521
+ if total_pending > 10:
522
+ # Show progress for large batches
523
+ logger.info("Flushing %d pending telemetry uploads...", total_pending)
524
+
525
+ timeout = (timeout_millis or 30000) / 1000.0
526
+ done, not_done = cf.wait(self._pending_futures, timeout=timeout)
527
+
528
+ # Consume exceptions to avoid "exception was never retrieved" warnings
529
+ for f in list(done):
530
+ with contextlib.suppress(Exception):
531
+ _ = f.exception()
532
+
533
+ # Remove completed futures
534
+ for f in list(done):
535
+ with contextlib.suppress(ValueError):
536
+ self._pending_futures.remove(f)
537
+
538
+ if total_pending > 10:
539
+ logger.info("Completed %d/%d telemetry uploads", len(done), total_pending)
540
+
541
+ return len(not_done) == 0
542
+ except Exception:
543
+ return False
hud/rl/learner.py CHANGED
@@ -187,7 +187,7 @@ class GRPOLearner:
187
187
  # Use 8-bit optimizer if configured
188
188
  if self.config.training.use_8bit_optimizer and BNB_AVAILABLE:
189
189
  hud_console.info("Using 8-bit AdamW optimizer from bitsandbytes")
190
- optimizer = bnb.optim.AdamW8bit(
190
+ optimizer = bnb.optim.AdamW8bit( # type: ignore
191
191
  trainable_params,
192
192
  lr=self.config.training.lr,
193
193
  betas=self.config.training.adam_betas,
hud/server/router.py CHANGED
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  import logging
6
6
  from typing import TYPE_CHECKING, Any
7
7
 
8
- from hud.server import MCPServer
8
+ from hud.server.server import MCPServer
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from collections.abc import Callable
hud/shared/exceptions.py CHANGED
@@ -69,11 +69,6 @@ class HudException(Exception):
69
69
  elif isinstance(exc_value, Exception):
70
70
  # Try to convert to a specific HudException
71
71
  result = cls._analyze_exception(exc_value, message or str(exc_value))
72
- # If we couldn't categorize it (still base HudException),
73
- # just re-raise the original exception
74
- if type(result) is HudException:
75
- # Re-raise the original exception unchanged
76
- raise exc_value from None
77
72
  return result
78
73
 
79
74
  # Normal creation
@@ -7,7 +7,7 @@ classification and helpful hints for users.
7
7
  from __future__ import annotations
8
8
 
9
9
  import json
10
- from unittest.mock import Mock, patch
10
+ from unittest.mock import Mock
11
11
 
12
12
  import httpx
13
13
  import pytest
@@ -17,6 +17,7 @@ from hud.shared.exceptions import (
17
17
  HudClientError,
18
18
  HudConfigError,
19
19
  HudException,
20
+ HudMCPError,
20
21
  HudRateLimitError,
21
22
  HudRequestError,
22
23
  HudTimeoutError,
@@ -243,23 +244,23 @@ class TestMCPErrorHandling:
243
244
  @pytest.mark.asyncio
244
245
  async def test_mcp_error_handling(self):
245
246
  """Test that McpError is handled appropriately."""
246
- # Since McpError is imported dynamically, we'll mock it
247
- with patch("hud.clients.mcp_use.McpError") as MockMcpError:
248
- MockMcpError.side_effect = Exception
249
247
 
250
- # Create a mock MCP error
251
- mcp_error = Exception("MCP protocol error: Unknown method")
252
- mcp_error.__class__.__name__ = "McpError"
248
+ # Create a mock McpError class
249
+ class McpError(Exception):
250
+ pass
253
251
 
254
- try:
255
- raise mcp_error
256
- except Exception as e:
257
- # This would typically be caught in the client code
258
- # and re-raised as HudException
259
- with pytest.raises(HudException) as exc_info:
260
- raise HudException from e
252
+ # Create a mock MCP error
253
+ mcp_error = McpError("MCP protocol error: Unknown method")
254
+
255
+ try:
256
+ raise mcp_error
257
+ except Exception as e:
258
+ # This would typically be caught in the client code
259
+ # and re-raised as HudException
260
+ with pytest.raises(HudMCPError) as exc_info:
261
+ raise HudException from e
261
262
 
262
- assert "MCP protocol error" in str(exc_info.value)
263
+ assert "MCP protocol error" in str(exc_info.value)
263
264
 
264
265
  def test_mcp_tool_error_result(self):
265
266
  """Test handling of MCP tool execution errors (isError: true)."""
@@ -352,7 +353,7 @@ class TestExceptionRendering:
352
353
  assert len(error.hints) == 1
353
354
  assert error.hints[0] == HUD_API_KEY_MISSING
354
355
  assert error.hints[0].title == "HUD API key required"
355
- assert "Set HUD_API_KEY environment variable" in error.hints[0].tips[0]
356
+ assert "Set HUD_API_KEY" in error.hints[0].tips[0]
356
357
 
357
358
  def test_exception_type_preservation(self):
358
359
  """Test that exception types are preserved through conversion."""
hud/telemetry/__init__.py CHANGED
@@ -1,14 +1,36 @@
1
- """HUD Telemetry - User-facing APIs for tracing and job management.
1
+ """HUD Telemetry - Tracing and job management for agent execution.
2
2
 
3
- This module provides the main telemetry APIs that users interact with:
4
- - trace: Context manager for tracing code execution
5
- - job: Context manager and utilities for job management
6
- - instrument: Decorator for instrumenting functions
7
- - get_trace: Retrieve collected traces for replay/analysis
3
+ Provides telemetry APIs for tracking agent execution and experiments.
4
+
5
+ Standard Usage:
6
+ >>> import hud
7
+ >>> with hud.trace("My Task"):
8
+ ... do_work()
9
+
10
+ >>> with hud.job("My Job") as job:
11
+ ... with hud.trace("Task", job_id=job.id):
12
+ ... do_work()
13
+
14
+ High-Concurrency Usage (200+ parallel tasks):
15
+ >>> import hud
16
+ >>> async with hud.async_job("Evaluation") as job:
17
+ ... async with hud.async_trace("Task", job_id=job.id):
18
+ ... await do_async_work()
19
+
20
+ APIs:
21
+ - trace(), job() - Standard context managers (for typical usage)
22
+ - async_trace(), async_job() - Async context managers (for high concurrency)
23
+ - instrument() - Decorator for instrumenting functions
24
+ - get_trace() - Retrieve collected traces for replay
25
+
26
+ Note:
27
+ Use async_trace/async_job only for high-concurrency scenarios (200+ tasks).
28
+ The run_dataset() function uses them automatically.
8
29
  """
9
30
 
10
31
  from __future__ import annotations
11
32
 
33
+ from .async_context import async_job, async_trace
12
34
  from .instrument import instrument
13
35
  from .job import Job, create_job, job
14
36
  from .replay import clear_trace, get_trace
@@ -17,6 +39,8 @@ from .trace import Trace, trace
17
39
  __all__ = [
18
40
  "Job",
19
41
  "Trace",
42
+ "async_job",
43
+ "async_trace",
20
44
  "clear_trace",
21
45
  "create_job",
22
46
  "get_trace",