PyPI - hud-python - Versions diffs - 0.4.50__py3-none-any.whl → 0.4.52__py3-none-any.whl - Mend

hud-python 0.4.50py3-none-any.whl → 0.4.52py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (37) hide show

hud/__init__.py +13 -1
hud/agents/base.py +5 -1
hud/agents/lite_llm.py +1 -1
hud/agents/tests/test_base.py +8 -16
hud/cli/__init__.py +12 -22
hud/cli/eval.py +53 -84
hud/cli/tests/test_build.py +2 -1
hud/cli/tests/test_eval.py +4 -0
hud/cli/tests/test_mcp_server.py +1 -1
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/version_check.py +257 -0
hud/clients/base.py +1 -1
hud/clients/mcp_use.py +3 -1
hud/datasets/parallel.py +2 -2
hud/datasets/runner.py +85 -24
hud/otel/config.py +8 -6
hud/otel/context.py +4 -4
hud/otel/exporters.py +231 -57
hud/rl/learner.py +1 -1
hud/server/router.py +1 -1
hud/shared/exceptions.py +0 -5
hud/shared/tests/test_exceptions.py +17 -16
hud/telemetry/__init__.py +30 -6
hud/telemetry/async_context.py +331 -0
hud/telemetry/job.py +51 -12
hud/telemetry/tests/test_trace.py +4 -4
hud/telemetry/trace.py +16 -17
hud/tools/computer/qwen.py +4 -1
hud/tools/executors/base.py +4 -2
hud/utils/task_tracking.py +223 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.50.dist-info → hud_python-0.4.52.dist-info}/METADATA +2 -1
{hud_python-0.4.50.dist-info → hud_python-0.4.52.dist-info}/RECORD +37 -34
{hud_python-0.4.50.dist-info → hud_python-0.4.52.dist-info}/WHEEL +0 -0
{hud_python-0.4.50.dist-info → hud_python-0.4.52.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.50.dist-info → hud_python-0.4.52.dist-info}/licenses/LICENSE +0 -0

hud/telemetry/async_context.py ADDED Viewed

@@ -0,0 +1,331 @@
+"""Async context managers for HUD telemetry.
+Provides async versions of trace and job context managers for high-concurrency
+async code. These prevent event loop blocking by using async I/O operations.
+Usage:
+    >>> import hud
+    >>> async with hud.async_job("My Job") as job:
+    ...     async with hud.async_trace("Task", job_id=job.id) as trace:
+    ...         await do_work()
+When to use:
+    - High-concurrency scenarios (200+ parallel tasks)
+    - Custom async evaluation loops
+    - Async frameworks with HUD telemetry integration
+When NOT to use:
+    - Typical scripts/notebooks → use `hud.trace()` and `hud.job()`
+    - Low concurrency (< 30 tasks) → standard context managers are fine
+    - Synchronous code → must use `hud.trace()` and `hud.job()`
+Note:
+    The `run_dataset()` function automatically uses these async context managers
+    internally, so most users don't need to use them directly.
+"""
+from __future__ import annotations
+import logging
+import uuid
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from types import TracebackType
+from hud.otel import configure_telemetry
+from hud.otel.context import (
+    _print_trace_complete_url,
+    _print_trace_url,
+    _update_task_status_async,
+)
+from hud.otel.context import (
+    trace as OtelTrace,
+)
+from hud.settings import settings
+from hud.shared import make_request
+from hud.telemetry.job import Job, _print_job_complete_url, _print_job_url
+from hud.telemetry.trace import Trace
+from hud.utils.task_tracking import track_task
+logger = logging.getLogger(__name__)
+# Module exports
+__all__ = ["AsyncJob", "AsyncTrace", "async_job", "async_trace"]
+# Global state for current job
+_current_job: Job | None = None
+class AsyncTrace:
+    """Async context manager for HUD trace tracking.
+    This is the async equivalent of `hud.trace()`, designed for use in
+    high-concurrency async contexts. It tracks task execution with automatic
+    status updates that don't block the event loop.
+    The context manager:
+    - Creates a unique task_run_id for telemetry correlation
+    - Sends async status updates ("running", "completed", "error")
+    - Integrates with OpenTelemetry for span collection
+    - Tracks all async operations for proper cleanup
+    Use `async_trace()` helper function instead of instantiating directly.
+    """
+    def __init__(
+        self,
+        name: str = "Test task from hud",
+        *,
+        root: bool = True,
+        attrs: dict[str, Any] | None = None,
+        job_id: str | None = None,
+        task_id: str | None = None,
+    ) -> None:
+        self.name = name
+        self.root = root
+        self.attrs = attrs or {}
+        self.job_id = job_id
+        self.task_id = task_id
+        self.task_run_id = str(uuid.uuid4())
+        self.trace_obj = Trace(self.task_run_id, name, job_id, task_id)
+        self._otel_trace = None
+    async def __aenter__(self) -> Trace:
+        """Enter the async trace context."""
+        # Ensure telemetry is configured
+        configure_telemetry()
+        # Start the OpenTelemetry span
+        self._otel_trace = OtelTrace(
+            self.task_run_id,
+            is_root=self.root,
+            span_name=self.name,
+            attributes=self.attrs,
+            job_id=self.job_id,
+            task_id=self.task_id,
+        )
+        self._otel_trace.__enter__()
+        # Send async status update if this is a root trace
+        if self.root and settings.telemetry_enabled and settings.api_key:
+            track_task(
+                _update_task_status_async(
+                    self.task_run_id,
+                    "running",
+                    job_id=self.job_id,
+                    trace_name=self.name,
+                    task_id=self.task_id,
+                ),
+                name=f"trace-status-{self.task_run_id[:8]}",
+            )
+            # Print trace URL if not part of a job
+            if not self.job_id:
+                _print_trace_url(self.task_run_id)
+        logger.debug("Started trace: %s (%s)", self.name, self.task_run_id)
+        return self.trace_obj
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        """Exit the async trace context."""
+        # Send async status update if this is a root trace
+        if self.root and settings.telemetry_enabled and settings.api_key:
+            status = "error" if exc_type else "completed"
+            track_task(
+                _update_task_status_async(
+                    self.task_run_id,
+                    status,
+                    job_id=self.job_id,
+                    error_message=str(exc_val) if exc_val else None,
+                    trace_name=self.name,
+                    task_id=self.task_id,
+                ),
+                name=f"trace-status-{self.task_run_id[:8]}-{status}",
+            )
+            # Print completion message if not part of a job
+            if not self.job_id:
+                _print_trace_complete_url(self.task_run_id, error_occurred=bool(exc_type))
+        # Close the OpenTelemetry span
+        if self._otel_trace:
+            self._otel_trace.__exit__(exc_type, exc_val, exc_tb)
+        logger.debug("Ended trace: %s (%s)", self.name, self.task_run_id)
+class AsyncJob:
+    """Async context manager for HUD job tracking.
+    This is the async equivalent of `hud.job()`, designed for grouping
+    related tasks in high-concurrency async contexts. It manages job
+    status updates without blocking the event loop.
+    The context manager:
+    - Creates or uses a provided job_id
+    - Sends async status updates ("running", "completed", "failed")
+    - Associates all child traces with this job
+    - Tracks async operations for proper cleanup
+    Use `async_job()` helper function instead of instantiating directly.
+    """
+    def __init__(
+        self,
+        name: str,
+        metadata: dict[str, Any] | None = None,
+        job_id: str | None = None,
+        dataset_link: str | None = None,
+    ) -> None:
+        self.job_id = job_id or str(uuid.uuid4())
+        self.job = Job(self.job_id, name, metadata, dataset_link)
+    async def __aenter__(self) -> Job:
+        """Enter the async job context."""
+        global _current_job
+        # Save previous job and set this as current
+        self._old_job = _current_job
+        _current_job = self.job
+        # Send async status update
+        if settings.telemetry_enabled:
+            payload = {
+                "name": self.job.name,
+                "status": "running",
+                "metadata": self.job.metadata,
+            }
+            if self.job.dataset_link:
+                payload["dataset_link"] = self.job.dataset_link
+            track_task(
+                make_request(
+                    method="POST",
+                    url=f"{settings.hud_telemetry_url}/jobs/{self.job.id}/status",
+                    json=payload,
+                    api_key=settings.api_key,
+                ),
+                name=f"job-status-{self.job.id[:8]}-running",
+            )
+        _print_job_url(self.job.id, self.job.name)
+        logger.debug("Started job: %s (%s)", self.job.name, self.job.id)
+        return self.job
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        """Exit the async job context."""
+        global _current_job
+        # Send async status update
+        if settings.telemetry_enabled:
+            status = "failed" if exc_type else "completed"
+            payload = {
+                "name": self.job.name,
+                "status": status,
+                "metadata": self.job.metadata,
+            }
+            if self.job.dataset_link:
+                payload["dataset_link"] = self.job.dataset_link
+            track_task(
+                make_request(
+                    method="POST",
+                    url=f"{settings.hud_telemetry_url}/jobs/{self.job.id}/status",
+                    json=payload,
+                    api_key=settings.api_key,
+                ),
+                name=f"job-status-{self.job.id[:8]}-{status}",
+            )
+        _print_job_complete_url(self.job.id, self.job.name, error_occurred=bool(exc_type))
+        # Restore previous job
+        _current_job = self._old_job
+        logger.debug("Ended job: %s (%s)", self.job.name, self.job.id)
+def async_trace(
+    name: str = "Test task from hud",
+    *,
+    root: bool = True,
+    attrs: dict[str, Any] | None = None,
+    job_id: str | None = None,
+    task_id: str | None = None,
+) -> AsyncTrace:
+    """Create an async trace context for telemetry tracking.
+    This is the async equivalent of `hud.trace()` for use in high-concurrency
+    async contexts. Status updates are sent asynchronously and tracked to ensure
+    completion before shutdown.
+    Args:
+        name: Descriptive name for this trace/task
+        root: Whether this is a root trace (updates task status)
+        attrs: Additional attributes to attach to the trace
+        job_id: Optional job ID to associate with this trace
+        task_id: Optional task ID for custom task identifiers
+    Returns:
+        AsyncTrace context manager
+    Example:
+        >>> import hud
+        >>> async with hud.async_trace("Process Data") as trace:
+        ...     result = await process_async()
+        ...     await trace.log({"items_processed": len(result)})
+    Note:
+        Most users should use `hud.trace()` which works fine for typical usage.
+        Use this async version only in high-concurrency scenarios (200+ parallel
+        tasks) or when writing custom async evaluation frameworks.
+    """
+    return AsyncTrace(name, root=root, attrs=attrs, job_id=job_id, task_id=task_id)
+def async_job(
+    name: str,
+    metadata: dict[str, Any] | None = None,
+    job_id: str | None = None,
+    dataset_link: str | None = None,
+) -> AsyncJob:
+    """Create an async job context for grouping related tasks.
+    This is the async equivalent of `hud.job()` for use in high-concurrency
+    async contexts. Job status updates are sent asynchronously and tracked
+    to ensure completion before shutdown.
+    Args:
+        name: Human-readable job name
+        metadata: Optional metadata dictionary
+        job_id: Optional job ID (auto-generated if not provided)
+        dataset_link: Optional HuggingFace dataset identifier
+    Returns:
+        AsyncJob context manager
+    Example:
+        >>> import hud
+        >>> async with hud.async_job("Batch Processing") as job:
+        ...     for item in items:
+        ...         async with hud.async_trace(f"Process {item.id}", job_id=job.id):
+        ...             await process(item)
+    Note:
+        Most users should use `hud.job()` which works fine for typical usage.
+        Use this async version only in high-concurrency scenarios (200+ parallel
+        tasks) or when writing custom async evaluation frameworks.
+    """
+    return AsyncJob(name, metadata=metadata, job_id=job_id, dataset_link=dataset_link)

hud/telemetry/job.py CHANGED Viewed

@@ -89,6 +89,33 @@ class Job:
             except Exception as e:
                 logger.warning("Failed to update job status: %s", e)
+    def update_status_fire_and_forget(self, status: str) -> None:
+        """Update job status without blocking (fire-and-forget)."""
+        self.status = status
+        if settings.telemetry_enabled:
+            from hud.utils.async_utils import fire_and_forget
+            async def _update() -> None:
+                try:
+                    payload = {
+                        "name": self.name,
+                        "status": status,
+                        "metadata": self.metadata,
+                    }
+                    if self.dataset_link:
+                        payload["dataset_link"] = self.dataset_link
+                    await make_request(
+                        method="POST",
+                        url=f"{settings.hud_telemetry_url}/jobs/{self.id}/status",
+                        json=payload,
+                        api_key=settings.api_key,
+                    )
+                except Exception as e:
+                    logger.warning("Failed to update job status: %s", e)
+            fire_and_forget(_update(), f"update job {self.id} status to {status}")
     async def log(self, metrics: dict[str, Any]) -> None:
         """Log metrics to the job.
@@ -214,9 +241,9 @@ def job(
     job_id: str | None = None,
     dataset_link: str | None = None,
 ) -> Generator[Job, None, None]:
-    """Context manager for job tracking.
+    """Context manager for job tracking and organization.
-    Groups related tasks together under a single job for tracking and organization.
+    Groups related tasks together under a single job for tracking and visualization.
     Args:
         name: Human-readable job name
@@ -228,10 +255,22 @@ def job(
         Job: The job object
     Example:
-        with hud.job("training_run", {"model": "gpt-4"}) as job:
-            for epoch in range(10):
-                with hud.trace(f"epoch_{epoch}", job_id=job.id):
-                    train_epoch()
+        >>> import hud
+        >>> # Synchronous code
+        >>> with hud.job("training_run", {"model": "gpt-4"}) as job:
+        ...     for epoch in range(10):
+        ...         with hud.trace(f"epoch_{epoch}", job_id=job.id):
+        ...             train_epoch()
+        >>> # For async code with HIGH CONCURRENCY (200+ tasks), use async_job
+        >>> async with hud.async_job("batch_processing") as job:
+        ...     for item in items:
+        ...         async with hud.async_trace(f"process_{item}", job_id=job.id):
+        ...             await process(item)
+    Note:
+        For simple async code (< 30 parallel tasks), this context manager works fine.
+        Use `hud.async_job()` only for high-concurrency scenarios (200+ parallel tasks)
+        where event loop blocking becomes an issue.
     """
     global _current_job
@@ -245,18 +284,18 @@ def job(
     _current_job = job_obj
     try:
-        # Update status to running synchronously to ensure job is registered before tasks start
-        job_obj.update_status_sync("running")
+        # Update status to running (fire-and-forget to avoid blocking)
+        job_obj.update_status_fire_and_forget("running")
         # Print the nice job URL box
         _print_job_url(job_obj.id, job_obj.name)
         yield job_obj
-        # Update status to completed synchronously to ensure it completes before process exit
-        job_obj.update_status_sync("completed")
+        # Update status to completed (fire-and-forget to avoid blocking)
+        job_obj.update_status_fire_and_forget("completed")
         # Print job completion message
         _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=False)
     except Exception:
-        # Update status to failed synchronously to ensure it completes before process exit
-        job_obj.update_status_sync("failed")
+        # Update status to failed (fire-and-forget to avoid blocking)
+        job_obj.update_status_fire_and_forget("failed")
         # Print job failure message
         _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=True)
         raise

hud/telemetry/tests/test_trace.py CHANGED Viewed

@@ -22,8 +22,8 @@ class TestTraceAPI:
             mock_otel_trace.return_value.__enter__.return_value = "custom-otlp-trace"
             with trace("test-trace") as task_run_id:
-                # Should use placeholder ID for custom backends
-                assert task_run_id.id == "custom-otlp-trace"
+                # In custom backend mode, the Otel trace id is returned from context
+                assert isinstance(task_run_id.id, str)
     def test_trace_with_enabled_telemetry_and_api_key(self):
         """Test trace behavior when telemetry is enabled with API key."""
@@ -59,5 +59,5 @@ class TestTraceAPI:
             mock_otel_trace.return_value.__enter__.return_value = "custom-otlp-trace"
             with trace("test-trace") as task_run_id:
-                # Should use custom backend placeholder
-                assert task_run_id.id == "custom-otlp-trace"
+                # In absence of HUD API key, ID should still be a string
+                assert isinstance(task_run_id.id, str)

hud/telemetry/trace.py CHANGED Viewed

@@ -94,7 +94,7 @@ def trace(
     job_id: str | None = None,
     task_id: str | None = None,
 ) -> Generator[Trace, None, None]:
-    """Start a HUD trace context.
+    """Start a HUD trace context for telemetry tracking.
     A unique task_run_id is automatically generated for each trace.
@@ -108,22 +108,21 @@ def trace(
     Yields:
         Trace: The trace object with logging capabilities
-    Usage:
-        import hud
-        # Basic usage
-        with hud.trace("My Task") as trace:
-            # Your code here
-            trace.log_sync({"step": 1, "progress": 0.5})
-        # Async logging
-        async with hud.trace("Async Task") as trace:
-            await trace.log({"loss": 0.23, "accuracy": 0.95})
-        # With job association
-        with hud.job("Training Run") as job:
-            with hud.trace("Epoch 1", job_id=job.id) as trace:
-                trace.log_sync({"epoch": 1, "loss": 0.5})
+    Example:
+        >>> import hud
+        >>> # Synchronous code
+        >>> with hud.trace("My Task") as trace:
+        ...     do_work()
+        ...     trace.log_sync({"step": 1, "progress": 0.5})
+        >>> # For async code with HIGH CONCURRENCY (200+ tasks), use async_trace
+        >>> async with hud.async_trace("My Async Task") as trace:
+        ...     await do_async_work()
+        ...     await trace.log({"loss": 0.23, "accuracy": 0.95})
+    Note:
+        For simple async code (< 30 parallel tasks), this context manager works fine
+        with `async with`. Use `hud.async_trace()` only for high-concurrency scenarios
+        (200+ parallel tasks) where event loop blocking becomes an issue.
     """
     # Ensure telemetry is configured
     configure_telemetry()

hud/tools/computer/qwen.py CHANGED Viewed

@@ -424,7 +424,10 @@ coordinate on the screen.
                 # Rescale screenshot if requested
                 screenshot_base64 = await self._rescale_screenshot(screenshot_base64)
                 result = ContentResult(
-                    output=result.output, error=result.error, base64_image=screenshot_base64
+                    # note: we suppress the output since it's not useful
+                    output="",
+                    error=result.error,
+                    base64_image=screenshot_base64,
                 )
         # Convert to content blocks

hud/tools/executors/base.py CHANGED Viewed

@@ -280,7 +280,7 @@ class BaseExecutor:
     # ===== Utility Actions =====
-    async def wait(self, time: int) -> ContentResult:
+    async def wait(self, time: int, take_screenshot: bool = True) -> ContentResult:
         """
         Wait for specified time.
@@ -289,7 +289,9 @@ class BaseExecutor:
         """
         duration_seconds = time / 1000.0
         await asyncio.sleep(duration_seconds)
-        return ContentResult(output=f"Waited {time}ms")
+        # take screenshot
+        screenshot = await self.screenshot() if take_screenshot else None
+        return ContentResult(output=f"Waited {time}ms", base64_image=screenshot)
     async def screenshot(self) -> str | None:
         """

hud-python 0.4.50__py3-none-any.whl → 0.4.52__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.50py3-none-any.whl → 0.4.52py3-none-any.whl