PyPI - hud-python - Versions diffs - 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl - Mend

hud-python 0.4.51py3-none-any.whl → 0.4.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (88) hide show

hud/__init__.py +13 -1
hud/agents/base.py +14 -3
hud/agents/lite_llm.py +1 -1
hud/agents/openai_chat_generic.py +15 -3
hud/agents/tests/test_base.py +9 -2
hud/agents/tests/test_base_runtime.py +164 -0
hud/cli/__init__.py +18 -25
hud/cli/build.py +35 -27
hud/cli/dev.py +11 -29
hud/cli/eval.py +114 -145
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +26 -3
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +134 -0
hud/cli/tests/test_eval.py +4 -0
hud/cli/tests/test_mcp_server.py +8 -7
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/utils/docker.py +120 -1
hud/cli/utils/runner.py +1 -1
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +257 -0
hud/clients/base.py +1 -1
hud/clients/mcp_use.py +3 -1
hud/datasets/parallel.py +2 -2
hud/datasets/runner.py +85 -24
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_runner.py +106 -0
hud/datasets/tests/test_utils.py +228 -0
hud/otel/config.py +8 -6
hud/otel/context.py +4 -4
hud/otel/exporters.py +231 -57
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_instrumentation.py +207 -0
hud/rl/learner.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/shared/exceptions.py +35 -9
hud/shared/hints.py +25 -0
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +39 -30
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +30 -6
hud/telemetry/async_context.py +331 -0
hud/telemetry/job.py +51 -12
hud/telemetry/tests/test_async_context.py +242 -0
hud/telemetry/tests/test_instrument.py +414 -0
hud/telemetry/tests/test_job.py +609 -0
hud/telemetry/tests/test_trace.py +184 -6
hud/telemetry/trace.py +16 -17
hud/tools/computer/qwen.py +4 -1
hud/tools/computer/settings.py +2 -2
hud/tools/executors/base.py +4 -2
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/types.py +7 -1
hud/utils/agent_factories.py +1 -3
hud/utils/mcp.py +1 -1
hud/utils/task_tracking.py +223 -0
hud/utils/tests/test_agent_factories.py +60 -0
hud/utils/tests/test_mcp.py +4 -6
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tasks.py +187 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/METADATA +48 -48
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/RECORD +88 -47
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0

hud/telemetry/__init__.py CHANGED Viewed

@@ -1,14 +1,36 @@
-"""HUD Telemetry - User-facing APIs for tracing and job management.
+"""HUD Telemetry - Tracing and job management for agent execution.
-This module provides the main telemetry APIs that users interact with:
-- trace: Context manager for tracing code execution
-- job: Context manager and utilities for job management
-- instrument: Decorator for instrumenting functions
-- get_trace: Retrieve collected traces for replay/analysis
+Provides telemetry APIs for tracking agent execution and experiments.
+Standard Usage:
+    >>> import hud
+    >>> with hud.trace("My Task"):
+    ...     do_work()
+    >>> with hud.job("My Job") as job:
+    ...     with hud.trace("Task", job_id=job.id):
+    ...         do_work()
+High-Concurrency Usage (200+ parallel tasks):
+    >>> import hud
+    >>> async with hud.async_job("Evaluation") as job:
+    ...     async with hud.async_trace("Task", job_id=job.id):
+    ...         await do_async_work()
+APIs:
+    - trace(), job() - Standard context managers (for typical usage)
+    - async_trace(), async_job() - Async context managers (for high concurrency)
+    - instrument() - Decorator for instrumenting functions
+    - get_trace() - Retrieve collected traces for replay
+Note:
+    Use async_trace/async_job only for high-concurrency scenarios (200+ tasks).
+    The run_dataset() function uses them automatically.
 """
 from __future__ import annotations
+from .async_context import async_job, async_trace
 from .instrument import instrument
 from .job import Job, create_job, job
 from .replay import clear_trace, get_trace
@@ -17,6 +39,8 @@ from .trace import Trace, trace
 __all__ = [
     "Job",
     "Trace",
+    "async_job",
+    "async_trace",
     "clear_trace",
     "create_job",
     "get_trace",

hud/telemetry/async_context.py ADDED Viewed

@@ -0,0 +1,331 @@
+"""Async context managers for HUD telemetry.
+Provides async versions of trace and job context managers for high-concurrency
+async code. These prevent event loop blocking by using async I/O operations.
+Usage:
+    >>> import hud
+    >>> async with hud.async_job("My Job") as job:
+    ...     async with hud.async_trace("Task", job_id=job.id) as trace:
+    ...         await do_work()
+When to use:
+    - High-concurrency scenarios (200+ parallel tasks)
+    - Custom async evaluation loops
+    - Async frameworks with HUD telemetry integration
+When NOT to use:
+    - Typical scripts/notebooks → use `hud.trace()` and `hud.job()`
+    - Low concurrency (< 30 tasks) → standard context managers are fine
+    - Synchronous code → must use `hud.trace()` and `hud.job()`
+Note:
+    The `run_dataset()` function automatically uses these async context managers
+    internally, so most users don't need to use them directly.
+"""
+from __future__ import annotations
+import logging
+import uuid
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from types import TracebackType
+from hud.otel import configure_telemetry
+from hud.otel.context import (
+    _print_trace_complete_url,
+    _print_trace_url,
+    _update_task_status_async,
+)
+from hud.otel.context import (
+    trace as OtelTrace,
+)
+from hud.settings import settings
+from hud.shared import make_request
+from hud.telemetry.job import Job, _print_job_complete_url, _print_job_url
+from hud.telemetry.trace import Trace
+from hud.utils.task_tracking import track_task
+logger = logging.getLogger(__name__)
+# Module exports
+__all__ = ["AsyncJob", "AsyncTrace", "async_job", "async_trace"]
+# Global state for current job
+_current_job: Job | None = None
+class AsyncTrace:
+    """Async context manager for HUD trace tracking.
+    This is the async equivalent of `hud.trace()`, designed for use in
+    high-concurrency async contexts. It tracks task execution with automatic
+    status updates that don't block the event loop.
+    The context manager:
+    - Creates a unique task_run_id for telemetry correlation
+    - Sends async status updates ("running", "completed", "error")
+    - Integrates with OpenTelemetry for span collection
+    - Tracks all async operations for proper cleanup
+    Use `async_trace()` helper function instead of instantiating directly.
+    """
+    def __init__(
+        self,
+        name: str = "Test task from hud",
+        *,
+        root: bool = True,
+        attrs: dict[str, Any] | None = None,
+        job_id: str | None = None,
+        task_id: str | None = None,
+    ) -> None:
+        self.name = name
+        self.root = root
+        self.attrs = attrs or {}
+        self.job_id = job_id
+        self.task_id = task_id
+        self.task_run_id = str(uuid.uuid4())
+        self.trace_obj = Trace(self.task_run_id, name, job_id, task_id)
+        self._otel_trace = None
+    async def __aenter__(self) -> Trace:
+        """Enter the async trace context."""
+        # Ensure telemetry is configured
+        configure_telemetry()
+        # Start the OpenTelemetry span
+        self._otel_trace = OtelTrace(
+            self.task_run_id,
+            is_root=self.root,
+            span_name=self.name,
+            attributes=self.attrs,
+            job_id=self.job_id,
+            task_id=self.task_id,
+        )
+        self._otel_trace.__enter__()
+        # Send async status update if this is a root trace
+        if self.root and settings.telemetry_enabled and settings.api_key:
+            track_task(
+                _update_task_status_async(
+                    self.task_run_id,
+                    "running",
+                    job_id=self.job_id,
+                    trace_name=self.name,
+                    task_id=self.task_id,
+                ),
+                name=f"trace-status-{self.task_run_id[:8]}",
+            )
+            # Print trace URL if not part of a job
+            if not self.job_id:
+                _print_trace_url(self.task_run_id)
+        logger.debug("Started trace: %s (%s)", self.name, self.task_run_id)
+        return self.trace_obj
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        """Exit the async trace context."""
+        # Send async status update if this is a root trace
+        if self.root and settings.telemetry_enabled and settings.api_key:
+            status = "error" if exc_type else "completed"
+            track_task(
+                _update_task_status_async(
+                    self.task_run_id,
+                    status,
+                    job_id=self.job_id,
+                    error_message=str(exc_val) if exc_val else None,
+                    trace_name=self.name,
+                    task_id=self.task_id,
+                ),
+                name=f"trace-status-{self.task_run_id[:8]}-{status}",
+            )
+            # Print completion message if not part of a job
+            if not self.job_id:
+                _print_trace_complete_url(self.task_run_id, error_occurred=bool(exc_type))
+        # Close the OpenTelemetry span
+        if self._otel_trace:
+            self._otel_trace.__exit__(exc_type, exc_val, exc_tb)
+        logger.debug("Ended trace: %s (%s)", self.name, self.task_run_id)
+class AsyncJob:
+    """Async context manager for HUD job tracking.
+    This is the async equivalent of `hud.job()`, designed for grouping
+    related tasks in high-concurrency async contexts. It manages job
+    status updates without blocking the event loop.
+    The context manager:
+    - Creates or uses a provided job_id
+    - Sends async status updates ("running", "completed", "failed")
+    - Associates all child traces with this job
+    - Tracks async operations for proper cleanup
+    Use `async_job()` helper function instead of instantiating directly.
+    """
+    def __init__(
+        self,
+        name: str,
+        metadata: dict[str, Any] | None = None,
+        job_id: str | None = None,
+        dataset_link: str | None = None,
+    ) -> None:
+        self.job_id = job_id or str(uuid.uuid4())
+        self.job = Job(self.job_id, name, metadata, dataset_link)
+    async def __aenter__(self) -> Job:
+        """Enter the async job context."""
+        global _current_job
+        # Save previous job and set this as current
+        self._old_job = _current_job
+        _current_job = self.job
+        # Send async status update
+        if settings.telemetry_enabled:
+            payload = {
+                "name": self.job.name,
+                "status": "running",
+                "metadata": self.job.metadata,
+            }
+            if self.job.dataset_link:
+                payload["dataset_link"] = self.job.dataset_link
+            track_task(
+                make_request(
+                    method="POST",
+                    url=f"{settings.hud_telemetry_url}/jobs/{self.job.id}/status",
+                    json=payload,
+                    api_key=settings.api_key,
+                ),
+                name=f"job-status-{self.job.id[:8]}-running",
+            )
+        _print_job_url(self.job.id, self.job.name)
+        logger.debug("Started job: %s (%s)", self.job.name, self.job.id)
+        return self.job
+    async def __aexit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        """Exit the async job context."""
+        global _current_job
+        # Send async status update
+        if settings.telemetry_enabled:
+            status = "failed" if exc_type else "completed"
+            payload = {
+                "name": self.job.name,
+                "status": status,
+                "metadata": self.job.metadata,
+            }
+            if self.job.dataset_link:
+                payload["dataset_link"] = self.job.dataset_link
+            track_task(
+                make_request(
+                    method="POST",
+                    url=f"{settings.hud_telemetry_url}/jobs/{self.job.id}/status",
+                    json=payload,
+                    api_key=settings.api_key,
+                ),
+                name=f"job-status-{self.job.id[:8]}-{status}",
+            )
+        _print_job_complete_url(self.job.id, self.job.name, error_occurred=bool(exc_type))
+        # Restore previous job
+        _current_job = self._old_job
+        logger.debug("Ended job: %s (%s)", self.job.name, self.job.id)
+def async_trace(
+    name: str = "Test task from hud",
+    *,
+    root: bool = True,
+    attrs: dict[str, Any] | None = None,
+    job_id: str | None = None,
+    task_id: str | None = None,
+) -> AsyncTrace:
+    """Create an async trace context for telemetry tracking.
+    This is the async equivalent of `hud.trace()` for use in high-concurrency
+    async contexts. Status updates are sent asynchronously and tracked to ensure
+    completion before shutdown.
+    Args:
+        name: Descriptive name for this trace/task
+        root: Whether this is a root trace (updates task status)
+        attrs: Additional attributes to attach to the trace
+        job_id: Optional job ID to associate with this trace
+        task_id: Optional task ID for custom task identifiers
+    Returns:
+        AsyncTrace context manager
+    Example:
+        >>> import hud
+        >>> async with hud.async_trace("Process Data") as trace:
+        ...     result = await process_async()
+        ...     await trace.log({"items_processed": len(result)})
+    Note:
+        Most users should use `hud.trace()` which works fine for typical usage.
+        Use this async version only in high-concurrency scenarios (200+ parallel
+        tasks) or when writing custom async evaluation frameworks.
+    """
+    return AsyncTrace(name, root=root, attrs=attrs, job_id=job_id, task_id=task_id)
+def async_job(
+    name: str,
+    metadata: dict[str, Any] | None = None,
+    job_id: str | None = None,
+    dataset_link: str | None = None,
+) -> AsyncJob:
+    """Create an async job context for grouping related tasks.
+    This is the async equivalent of `hud.job()` for use in high-concurrency
+    async contexts. Job status updates are sent asynchronously and tracked
+    to ensure completion before shutdown.
+    Args:
+        name: Human-readable job name
+        metadata: Optional metadata dictionary
+        job_id: Optional job ID (auto-generated if not provided)
+        dataset_link: Optional HuggingFace dataset identifier
+    Returns:
+        AsyncJob context manager
+    Example:
+        >>> import hud
+        >>> async with hud.async_job("Batch Processing") as job:
+        ...     for item in items:
+        ...         async with hud.async_trace(f"Process {item.id}", job_id=job.id):
+        ...             await process(item)
+    Note:
+        Most users should use `hud.job()` which works fine for typical usage.
+        Use this async version only in high-concurrency scenarios (200+ parallel
+        tasks) or when writing custom async evaluation frameworks.
+    """
+    return AsyncJob(name, metadata=metadata, job_id=job_id, dataset_link=dataset_link)

hud/telemetry/job.py CHANGED Viewed

@@ -89,6 +89,33 @@ class Job:
             except Exception as e:
                 logger.warning("Failed to update job status: %s", e)
+    def update_status_fire_and_forget(self, status: str) -> None:
+        """Update job status without blocking (fire-and-forget)."""
+        self.status = status
+        if settings.telemetry_enabled:
+            from hud.utils.async_utils import fire_and_forget
+            async def _update() -> None:
+                try:
+                    payload = {
+                        "name": self.name,
+                        "status": status,
+                        "metadata": self.metadata,
+                    }
+                    if self.dataset_link:
+                        payload["dataset_link"] = self.dataset_link
+                    await make_request(
+                        method="POST",
+                        url=f"{settings.hud_telemetry_url}/jobs/{self.id}/status",
+                        json=payload,
+                        api_key=settings.api_key,
+                    )
+                except Exception as e:
+                    logger.warning("Failed to update job status: %s", e)
+            fire_and_forget(_update(), f"update job {self.id} status to {status}")
     async def log(self, metrics: dict[str, Any]) -> None:
         """Log metrics to the job.
@@ -214,9 +241,9 @@ def job(
     job_id: str | None = None,
     dataset_link: str | None = None,
 ) -> Generator[Job, None, None]:
-    """Context manager for job tracking.
+    """Context manager for job tracking and organization.
-    Groups related tasks together under a single job for tracking and organization.
+    Groups related tasks together under a single job for tracking and visualization.
     Args:
         name: Human-readable job name
@@ -228,10 +255,22 @@ def job(
         Job: The job object
     Example:
-        with hud.job("training_run", {"model": "gpt-4"}) as job:
-            for epoch in range(10):
-                with hud.trace(f"epoch_{epoch}", job_id=job.id):
-                    train_epoch()
+        >>> import hud
+        >>> # Synchronous code
+        >>> with hud.job("training_run", {"model": "gpt-4"}) as job:
+        ...     for epoch in range(10):
+        ...         with hud.trace(f"epoch_{epoch}", job_id=job.id):
+        ...             train_epoch()
+        >>> # For async code with HIGH CONCURRENCY (200+ tasks), use async_job
+        >>> async with hud.async_job("batch_processing") as job:
+        ...     for item in items:
+        ...         async with hud.async_trace(f"process_{item}", job_id=job.id):
+        ...             await process(item)
+    Note:
+        For simple async code (< 30 parallel tasks), this context manager works fine.
+        Use `hud.async_job()` only for high-concurrency scenarios (200+ parallel tasks)
+        where event loop blocking becomes an issue.
     """
     global _current_job
@@ -245,18 +284,18 @@ def job(
     _current_job = job_obj
     try:
-        # Update status to running synchronously to ensure job is registered before tasks start
-        job_obj.update_status_sync("running")
+        # Update status to running (fire-and-forget to avoid blocking)
+        job_obj.update_status_fire_and_forget("running")
         # Print the nice job URL box
         _print_job_url(job_obj.id, job_obj.name)
         yield job_obj
-        # Update status to completed synchronously to ensure it completes before process exit
-        job_obj.update_status_sync("completed")
+        # Update status to completed (fire-and-forget to avoid blocking)
+        job_obj.update_status_fire_and_forget("completed")
         # Print job completion message
         _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=False)
     except Exception:
-        # Update status to failed synchronously to ensure it completes before process exit
-        job_obj.update_status_sync("failed")
+        # Update status to failed (fire-and-forget to avoid blocking)
+        job_obj.update_status_fire_and_forget("failed")
         # Print job failure message
         _print_job_complete_url(job_obj.id, job_obj.name, error_occurred=True)
         raise

hud-python 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.51py3-none-any.whl → 0.4.53py3-none-any.whl