PyPI - hud-python - Versions diffs - 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl - Mend

hud-python 0.4.51py3-none-any.whl → 0.4.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (88) hide show

hud/__init__.py +13 -1
hud/agents/base.py +14 -3
hud/agents/lite_llm.py +1 -1
hud/agents/openai_chat_generic.py +15 -3
hud/agents/tests/test_base.py +9 -2
hud/agents/tests/test_base_runtime.py +164 -0
hud/cli/__init__.py +18 -25
hud/cli/build.py +35 -27
hud/cli/dev.py +11 -29
hud/cli/eval.py +114 -145
hud/cli/tests/test_analyze_module.py +120 -0
hud/cli/tests/test_build.py +26 -3
hud/cli/tests/test_build_failure.py +41 -0
hud/cli/tests/test_build_module.py +50 -0
hud/cli/tests/test_cli_more_wrappers.py +30 -0
hud/cli/tests/test_cli_root.py +134 -0
hud/cli/tests/test_eval.py +4 -0
hud/cli/tests/test_mcp_server.py +8 -7
hud/cli/tests/test_push_happy.py +74 -0
hud/cli/tests/test_push_wrapper.py +23 -0
hud/cli/utils/docker.py +120 -1
hud/cli/utils/runner.py +1 -1
hud/cli/utils/tasks.py +4 -1
hud/cli/utils/tests/__init__.py +0 -0
hud/cli/utils/tests/test_config.py +58 -0
hud/cli/utils/tests/test_docker.py +93 -0
hud/cli/utils/tests/test_docker_hints.py +71 -0
hud/cli/utils/tests/test_env_check.py +74 -0
hud/cli/utils/tests/test_environment.py +42 -0
hud/cli/utils/tests/test_interactive_module.py +60 -0
hud/cli/utils/tests/test_local_runner.py +50 -0
hud/cli/utils/tests/test_logging_utils.py +23 -0
hud/cli/utils/tests/test_metadata.py +49 -0
hud/cli/utils/tests/test_package_runner.py +35 -0
hud/cli/utils/tests/test_registry_utils.py +49 -0
hud/cli/utils/tests/test_remote_runner.py +25 -0
hud/cli/utils/tests/test_runner_modules.py +52 -0
hud/cli/utils/tests/test_source_hash.py +36 -0
hud/cli/utils/tests/test_tasks.py +80 -0
hud/cli/utils/version_check.py +257 -0
hud/clients/base.py +1 -1
hud/clients/mcp_use.py +3 -1
hud/datasets/parallel.py +2 -2
hud/datasets/runner.py +85 -24
hud/datasets/tests/__init__.py +0 -0
hud/datasets/tests/test_runner.py +106 -0
hud/datasets/tests/test_utils.py +228 -0
hud/otel/config.py +8 -6
hud/otel/context.py +4 -4
hud/otel/exporters.py +231 -57
hud/otel/tests/__init__.py +0 -1
hud/otel/tests/test_instrumentation.py +207 -0
hud/rl/learner.py +1 -1
hud/server/tests/test_server_extra.py +2 -0
hud/shared/exceptions.py +35 -9
hud/shared/hints.py +25 -0
hud/shared/requests.py +15 -3
hud/shared/tests/test_exceptions.py +39 -30
hud/shared/tests/test_hints.py +167 -0
hud/telemetry/__init__.py +30 -6
hud/telemetry/async_context.py +331 -0
hud/telemetry/job.py +51 -12
hud/telemetry/tests/test_async_context.py +242 -0
hud/telemetry/tests/test_instrument.py +414 -0
hud/telemetry/tests/test_job.py +609 -0
hud/telemetry/tests/test_trace.py +184 -6
hud/telemetry/trace.py +16 -17
hud/tools/computer/qwen.py +4 -1
hud/tools/computer/settings.py +2 -2
hud/tools/executors/base.py +4 -2
hud/tools/tests/test_submit.py +85 -0
hud/tools/tests/test_types.py +193 -0
hud/types.py +7 -1
hud/utils/agent_factories.py +1 -3
hud/utils/mcp.py +1 -1
hud/utils/task_tracking.py +223 -0
hud/utils/tests/test_agent_factories.py +60 -0
hud/utils/tests/test_mcp.py +4 -6
hud/utils/tests/test_pretty_errors.py +186 -0
hud/utils/tests/test_tasks.py +187 -0
hud/utils/tests/test_tool_shorthand.py +154 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/METADATA +48 -48
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/RECORD +88 -47
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/WHEEL +0 -0
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.51.dist-info → hud_python-0.4.53.dist-info}/licenses/LICENSE +0 -0

hud/datasets/tests/test_utils.py ADDED Viewed

@@ -0,0 +1,228 @@
+from __future__ import annotations
+from unittest.mock import MagicMock, mock_open, patch
+import pytest
+from hud.datasets.utils import fetch_system_prompt_from_dataset, save_tasks
+from hud.types import Task
+@pytest.mark.asyncio
+async def test_fetch_system_prompt_success():
+    """Test successful fetch of system prompt."""
+    with patch("huggingface_hub.hf_hub_download") as mock_download:
+        mock_download.return_value = "/tmp/system_prompt.txt"
+        with patch("builtins.open", mock_open(read_data="Test system prompt")):
+            result = await fetch_system_prompt_from_dataset("test/dataset")
+            assert result == "Test system prompt"
+            mock_download.assert_called_once()
+@pytest.mark.asyncio
+async def test_fetch_system_prompt_empty_file():
+    """Test fetch when file is empty."""
+    with patch("huggingface_hub.hf_hub_download") as mock_download:
+        mock_download.return_value = "/tmp/system_prompt.txt"
+        with patch("builtins.open", mock_open(read_data="  \n  ")):
+            result = await fetch_system_prompt_from_dataset("test/dataset")
+            assert result is None
+@pytest.mark.asyncio
+async def test_fetch_system_prompt_file_not_found():
+    """Test fetch when file doesn't exist."""
+    with patch("huggingface_hub.hf_hub_download") as mock_download:
+        from huggingface_hub.errors import EntryNotFoundError
+        mock_download.side_effect = EntryNotFoundError("File not found")
+        result = await fetch_system_prompt_from_dataset("test/dataset")
+        assert result is None
+@pytest.mark.asyncio
+async def test_fetch_system_prompt_import_error():
+    """Test fetch when huggingface_hub is not installed."""
+    # Mock the import itself to raise ImportError
+    import sys
+    with patch.dict(sys.modules, {"huggingface_hub": None}):
+        result = await fetch_system_prompt_from_dataset("test/dataset")
+        assert result is None
+@pytest.mark.asyncio
+async def test_fetch_system_prompt_general_exception():
+    """Test fetch with general exception."""
+    with patch("huggingface_hub.hf_hub_download") as mock_download:
+        mock_download.side_effect = Exception("Network error")
+        result = await fetch_system_prompt_from_dataset("test/dataset")
+        assert result is None
+def test_save_tasks_basic():
+    """Test basic save_tasks functionality."""
+    tasks = [
+        {"id": "1", "prompt": "test", "mcp_config": {"key": "value"}},
+        {"id": "2", "prompt": "test2", "mcp_config": {"key2": "value2"}},
+    ]
+    with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
+        mock_dataset = MagicMock()
+        mock_dataset_class.from_list.return_value = mock_dataset
+        save_tasks(tasks, "test/repo")
+        mock_dataset_class.from_list.assert_called_once()
+        call_args = mock_dataset_class.from_list.call_args[0][0]
+        assert len(call_args) == 2
+        # Check that mcp_config was JSON serialized
+        assert isinstance(call_args[0]["mcp_config"], str)
+        mock_dataset.push_to_hub.assert_called_once_with("test/repo")
+def test_save_tasks_with_specific_fields():
+    """Test save_tasks with specific fields."""
+    tasks = [
+        {"id": "1", "prompt": "test", "mcp_config": {"key": "value"}, "extra": "data"},
+    ]
+    with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
+        mock_dataset = MagicMock()
+        mock_dataset_class.from_list.return_value = mock_dataset
+        save_tasks(tasks, "test/repo", fields=["id", "prompt"])
+        call_args = mock_dataset_class.from_list.call_args[0][0]
+        assert "id" in call_args[0]
+        assert "prompt" in call_args[0]
+        assert "extra" not in call_args[0]
+def test_save_tasks_with_list_field():
+    """Test save_tasks serializes list fields."""
+    tasks = [
+        {"id": "1", "tags": ["tag1", "tag2"], "count": 5},
+    ]
+    with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
+        mock_dataset = MagicMock()
+        mock_dataset_class.from_list.return_value = mock_dataset
+        save_tasks(tasks, "test/repo")
+        call_args = mock_dataset_class.from_list.call_args[0][0]
+        # List should be JSON serialized
+        assert isinstance(call_args[0]["tags"], str)
+        assert '"tag1"' in call_args[0]["tags"]
+def test_save_tasks_with_primitive_types():
+    """Test save_tasks handles various primitive types."""
+    tasks = [
+        {
+            "string": "text",
+            "integer": 42,
+            "float": 3.14,
+            "boolean": True,
+            "none": None,
+        },
+    ]
+    with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
+        mock_dataset = MagicMock()
+        mock_dataset_class.from_list.return_value = mock_dataset
+        save_tasks(tasks, "test/repo")
+        call_args = mock_dataset_class.from_list.call_args[0][0]
+        assert call_args[0]["string"] == "text"
+        assert call_args[0]["integer"] == 42
+        assert call_args[0]["float"] == 3.14
+        assert call_args[0]["boolean"] is True
+        assert call_args[0]["none"] == ""  # None becomes empty string
+def test_save_tasks_with_other_type():
+    """Test save_tasks converts other types to string."""
+    class CustomObj:
+        def __str__(self):
+            return "custom_value"
+    tasks = [
+        {"id": "1", "custom": CustomObj()},
+    ]
+    with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
+        mock_dataset = MagicMock()
+        mock_dataset_class.from_list.return_value = mock_dataset
+        save_tasks(tasks, "test/repo")
+        call_args = mock_dataset_class.from_list.call_args[0][0]
+        assert call_args[0]["custom"] == "custom_value"
+def test_save_tasks_rejects_task_objects():
+    """Test save_tasks raises error for Task objects."""
+    task = Task(prompt="test", mcp_config={})
+    with pytest.raises(ValueError, match="expects dictionaries, not Task objects"):
+        save_tasks([task], "test/repo")  # type: ignore
+def test_save_tasks_rejects_task_objects_in_list():
+    """Test save_tasks raises error when Task object is in the list."""
+    tasks = [
+        {"id": "1", "prompt": "test", "mcp_config": {}},
+        Task(prompt="test2", mcp_config={}),  # Task object
+    ]
+    with pytest.raises(ValueError, match="Item 1 is a Task object"):
+        save_tasks(tasks, "test/repo")  # type: ignore
+def test_save_tasks_with_kwargs():
+    """Test save_tasks passes kwargs to push_to_hub."""
+    tasks = [{"id": "1", "prompt": "test"}]
+    with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
+        mock_dataset = MagicMock()
+        mock_dataset_class.from_list.return_value = mock_dataset
+        save_tasks(tasks, "test/repo", private=True, commit_message="Test commit")
+        mock_dataset.push_to_hub.assert_called_once_with(
+            "test/repo", private=True, commit_message="Test commit"
+        )
+def test_save_tasks_field_not_in_dict():
+    """Test save_tasks handles missing fields gracefully."""
+    tasks = [
+        {"id": "1", "prompt": "test"},
+    ]
+    with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
+        mock_dataset = MagicMock()
+        mock_dataset_class.from_list.return_value = mock_dataset
+        # Request fields that don't exist
+        save_tasks(tasks, "test/repo", fields=["id", "missing_field"])
+        call_args = mock_dataset_class.from_list.call_args[0][0]
+        assert "id" in call_args[0]
+        assert "missing_field" not in call_args[0]
+def test_save_tasks_empty_list():
+    """Test save_tasks with empty list."""
+    with patch("hud.datasets.utils.Dataset") as mock_dataset_class:
+        mock_dataset = MagicMock()
+        mock_dataset_class.from_list.return_value = mock_dataset
+        save_tasks([], "test/repo")
+        mock_dataset_class.from_list.assert_called_once_with([])
+        mock_dataset.push_to_hub.assert_called_once()

hud/otel/config.py CHANGED Viewed

@@ -94,16 +94,18 @@ def configure_telemetry(
     # HUD exporter (only if enabled and API key is available)
     if settings.telemetry_enabled and settings.api_key:
+        # Use the HudSpanExporter directly (it now handles async context internally)
         exporter = HudSpanExporter(
             telemetry_url=settings.hud_telemetry_url, api_key=settings.api_key
         )
-        # Export more continuously to avoid big end flushes
+        # Batch exports for efficiency while maintaining reasonable real-time visibility
         provider.add_span_processor(
             BatchSpanProcessor(
                 exporter,
-                schedule_delay_millis=1000,
-                max_queue_size=8192,
-                max_export_batch_size=256,
+                schedule_delay_millis=1000,  # Export every 5 seconds (less frequent)
+                max_queue_size=16384,  # Larger queue for high-volume scenarios
+                max_export_batch_size=512,  # Larger batches (fewer uploads)
                 export_timeout_millis=30000,
             )
         )
@@ -140,8 +142,8 @@ def configure_telemetry(
                 BatchSpanProcessor(
                     otlp_exporter,
                     schedule_delay_millis=1000,
-                    max_queue_size=8192,
-                    max_export_batch_size=256,
+                    max_queue_size=16384,
+                    max_export_batch_size=512,
                     export_timeout_millis=30000,
                 )
             )

hud/otel/context.py CHANGED Viewed

@@ -520,8 +520,8 @@ class trace:
         # Update task status if root (only for HUD backend)
         if self.is_root and settings.telemetry_enabled and settings.api_key:
             if exc_type is not None:
-                # Use synchronous update to ensure it completes before process exit
-                _update_task_status_sync(
+                # Use fire-and-forget to avoid blocking the event loop
+                _fire_and_forget_status_update(
                     self.task_run_id,
                     "error",
                     job_id=self.job_id,
@@ -533,8 +533,8 @@ class trace:
                 if not self.job_id:
                     _print_trace_complete_url(self.task_run_id, error_occurred=True)
             else:
-                # Use synchronous update to ensure it completes before process exit
-                _update_task_status_sync(
+                # Use fire-and-forget to avoid blocking the event loop
+                _fire_and_forget_status_update(
                     self.task_run_id,
                     "completed",
                     job_id=self.job_id,

hud/otel/exporters.py CHANGED Viewed

@@ -1,21 +1,27 @@
-"""Custom OpenTelemetry exporter that sends spans to the existing HUD telemetry
-HTTP endpoint (/trace/<id>/telemetry-upload).
+"""Custom OpenTelemetry exporter for HUD telemetry backend.
-The exporter groups spans by ``hud.task_run_id`` baggage / attribute so we keep
-exactly the same semantics the old async worker in ``hud.telemetry.exporter``
-implemented.
+This exporter sends spans to the HUD telemetry HTTP endpoint, grouping them
+by task_run_id for efficient batch uploads.
-This exporter is *synchronous* (derives from :class:`SpanExporter`).  We rely on
-``hud.shared.make_request_sync`` which already contains retry & auth logic.
+Performance optimizations:
+- Detects async contexts and runs exports in a thread pool to avoid blocking
+- Uses persistent HTTP client with connection pooling for reduced overhead
+- Tracks pending export futures to ensure completion during shutdown
+The exporter derives from SpanExporter (synchronous interface) but handles
+async contexts intelligently to prevent event loop blocking during high-concurrency
+workloads.
 """
 from __future__ import annotations
+import atexit
+import concurrent.futures as cf
 import contextlib
 import json
 import logging
-import time
 from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor
 from datetime import UTC, datetime
 from typing import TYPE_CHECKING, Any
@@ -31,6 +37,34 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
+# Global singleton thread pool for span exports
+_export_executor: ThreadPoolExecutor | None = None
+def get_export_executor() -> ThreadPoolExecutor:
+    """Get or create the global thread pool for span exports.
+    Returns a singleton ThreadPoolExecutor used for running span exports
+    in a thread pool when called from async contexts, preventing event
+    loop blocking during high-concurrency workloads.
+    The executor is automatically cleaned up on process exit via atexit.
+    Returns:
+        ThreadPoolExecutor with 8 workers for high-throughput parallel uploads
+    """
+    global _export_executor
+    if _export_executor is None:
+        # Use 8 workers to handle high-volume parallel uploads efficiently
+        _export_executor = ThreadPoolExecutor(max_workers=8, thread_name_prefix="span-export")
+        def cleanup() -> None:
+            if _export_executor is not None:
+                _export_executor.shutdown(wait=True)
+        atexit.register(cleanup)
+    return _export_executor
 # ---------------------------------------------------------------------------
 # Models
@@ -297,73 +331,213 @@ def _span_to_dict(span: ReadableSpan) -> dict[str, Any]:
 class HudSpanExporter(SpanExporter):
-    """Exporter that forwards spans to HUD backend using existing endpoint."""
+    """OpenTelemetry span exporter for the HUD backend.
+    This exporter groups spans by task_run_id and sends them to the HUD
+    telemetry endpoint. Performance optimizations include:
+    - Auto-detects async contexts and runs exports in thread pool (non-blocking)
+    - Tracks pending export futures for proper shutdown coordination
+    Handles high-concurrency scenarios (200+ parallel tasks) by offloading
+    synchronous HTTP operations to a thread pool when called from async
+    contexts, preventing event loop blocking.
+    """
     def __init__(self, *, telemetry_url: str, api_key: str) -> None:
+        """Initialize the HUD span exporter.
+        Args:
+            telemetry_url: Base URL for the HUD telemetry backend
+            api_key: API key for authentication
+        """
         super().__init__()
         self._telemetry_url = telemetry_url.rstrip("/")
         self._api_key = api_key
-    # ------------------------------------------------------------------
-    # Core API
-    # ------------------------------------------------------------------
+        # Track pending export futures for shutdown coordination
+        self._pending_futures: list[cf.Future[SpanExportResult]] = []
     def export(self, spans: list[ReadableSpan]) -> SpanExportResult:  # type: ignore[override]
+        """Export spans to HUD backend.
+        Auto-detects async contexts: if called from an async event loop, runs
+        the export in a thread pool to avoid blocking. Otherwise runs synchronously.
+        Args:
+            spans: List of ReadableSpan objects to export
+        Returns:
+            SpanExportResult.SUCCESS (returns immediately in async contexts)
+        """
         if not spans:
             return SpanExportResult.SUCCESS
-        # Group spans by hud.task_run_id attribute
+        # Group spans by task_run_id for batched uploads
         grouped: dict[str, list[ReadableSpan]] = defaultdict(list)
         for span in spans:
             run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
             if not run_id:
-                # Skip spans that are outside HUD traces
+                # Skip spans outside HUD traces
                 continue
             grouped[str(run_id)].append(span)
-        # Send each group synchronously (retry inside make_request_sync)
-        for run_id, span_batch in grouped.items():
-            try:
-                url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
-                telemetry_spans = [_span_to_dict(s) for s in span_batch]
-                # Include current step count in metadata
-                metadata = {}
-                # Get the HIGHEST step count from the batch (most recent)
-                step_count = 0
-                for span in span_batch:
-                    if span.attributes and "hud.step_count" in span.attributes:
-                        current_step = span.attributes["hud.step_count"]
-                        if isinstance(current_step, int) and current_step > step_count:
-                            step_count = current_step
-                payload = {
-                    "metadata": metadata,
-                    "telemetry": telemetry_spans,
-                }
-                # Only include step_count if we found any steps
-                if step_count > 0:
-                    payload["step_count"] = step_count
-                logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
-                make_request_sync(
-                    method="POST",
-                    url=url,
-                    json=payload,
-                    api_key=self._api_key,
-                )
-            except Exception as exc:
-                logger.exception("HUD exporter failed to send spans for task %s: %s", run_id, exc)
-                # If *any* group fails we return FAILURE so the OTEL SDK can retry
-                return SpanExportResult.FAILURE
-        return SpanExportResult.SUCCESS
+        # Detect async context to avoid event loop blocking
+        import asyncio
+        try:
+            loop = asyncio.get_running_loop()
+            # In async context - offload to thread pool
+            executor = get_export_executor()
+            def _sync_export() -> SpanExportResult:
+                # Send each group synchronously (retry inside make_request_sync)
+                for run_id, span_batch in grouped.items():
+                    try:
+                        url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
+                        telemetry_spans = [_span_to_dict(s) for s in span_batch]
+                        # Include current step count in metadata
+                        metadata = {}
+                        # Get the HIGHEST step count from the batch (most recent)
+                        step_count = 0
+                        for span in span_batch:
+                            if span.attributes and "hud.step_count" in span.attributes:
+                                current_step = span.attributes["hud.step_count"]
+                                if isinstance(current_step, int) and current_step > step_count:
+                                    step_count = current_step
+                        payload = {
+                            "metadata": metadata,
+                            "telemetry": telemetry_spans,
+                        }
+                        # Only include step_count if we found any steps
+                        if step_count > 0:
+                            payload["step_count"] = step_count
+                        logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
+                        make_request_sync(
+                            method="POST",
+                            url=url,
+                            json=payload,
+                            api_key=self._api_key,
+                        )
+                    except Exception as exc:
+                        logger.exception(
+                            "HUD exporter failed to send spans for task %s: %s", run_id, exc
+                        )
+                        return SpanExportResult.FAILURE
+                return SpanExportResult.SUCCESS
+            # Run in thread to avoid blocking event loop
+            future = loop.run_in_executor(executor, _sync_export)
+            # Track and cleanup when done
+            self._pending_futures.append(future)  # type: ignore[list-item]
+            def _cleanup_done(f: cf.Future[SpanExportResult]) -> None:
+                with contextlib.suppress(Exception):
+                    # Consume exception to avoid "exception was never retrieved"
+                    _ = f.exception()
+                # Remove from pending list
+                with contextlib.suppress(ValueError):
+                    self._pending_futures.remove(f)
+            future.add_done_callback(_cleanup_done)  # type: ignore[arg-type]
+            # Don't wait for it - return immediately
+            return SpanExportResult.SUCCESS
+        except RuntimeError:
+            # No event loop - run synchronously
+            # Send each group synchronously (retry inside make_request_sync)
+            for run_id, span_batch in grouped.items():
+                try:
+                    url = f"{self._telemetry_url}/trace/{run_id}/telemetry-upload"
+                    telemetry_spans = [_span_to_dict(s) for s in span_batch]
+                    # Include current step count in metadata
+                    metadata = {}
+                    # Get the HIGHEST step count from the batch (most recent)
+                    step_count = 0
+                    for span in span_batch:
+                        if span.attributes and "hud.step_count" in span.attributes:
+                            current_step = span.attributes["hud.step_count"]
+                            if isinstance(current_step, int) and current_step > step_count:
+                                step_count = current_step
+                    payload = {
+                        "metadata": metadata,
+                        "telemetry": telemetry_spans,
+                    }
+                    # Only include step_count if we found any steps
+                    if step_count > 0:
+                        payload["step_count"] = step_count
+                    logger.debug("HUD exporter sending %d spans to %s", len(span_batch), url)
+                    make_request_sync(
+                        method="POST",
+                        url=url,
+                        json=payload,
+                        api_key=self._api_key,
+                    )
+                except Exception as exc:
+                    logger.exception(
+                        "HUD exporter failed to send spans for task %s: %s", run_id, exc
+                    )
+                    # If *any* group fails we return FAILURE so the OTEL SDK can retry
+                    return SpanExportResult.FAILURE
+            return SpanExportResult.SUCCESS
     def shutdown(self) -> None:  # type: ignore[override]
-        # Nothing to cleanup, httpx handled inside make_request_sync
-        pass
+        """Shutdown the exporter and wait for pending exports.
+        Waits up to 10 seconds for any in-flight exports to complete.
+        """
+        try:
+            if self._pending_futures:
+                with contextlib.suppress(Exception):
+                    cf.wait(self._pending_futures, timeout=10.0)
+        finally:
+            self._pending_futures.clear()
     def force_flush(self, timeout_millis: int | None = None) -> bool:  # type: ignore[override]
-        if timeout_millis:
-            time.sleep(timeout_millis / 1000)
-        # Synchronous export, nothing buffered here
-        return True
+        """Force flush all pending span exports.
+        Waits for all pending export futures to complete before returning.
+        This is called by the OpenTelemetry SDK during shutdown to ensure
+        all telemetry is uploaded.
+        Args:
+            timeout_millis: Maximum time to wait in milliseconds
+        Returns:
+            True if all exports completed, False otherwise
+        """
+        try:
+            if not self._pending_futures:
+                return True
+            total_pending = len(self._pending_futures)
+            if total_pending > 10:
+                # Show progress for large batches
+                logger.info("Flushing %d pending telemetry uploads...", total_pending)
+            timeout = (timeout_millis or 30000) / 1000.0
+            done, not_done = cf.wait(self._pending_futures, timeout=timeout)
+            # Consume exceptions to avoid "exception was never retrieved" warnings
+            for f in list(done):
+                with contextlib.suppress(Exception):
+                    _ = f.exception()
+            # Remove completed futures
+            for f in list(done):
+                with contextlib.suppress(ValueError):
+                    self._pending_futures.remove(f)
+            if total_pending > 10:
+                logger.info("Completed %d/%d telemetry uploads", len(done), total_pending)
+            return len(not_done) == 0
+        except Exception:
+            return False

hud/otel/tests/__init__.py CHANGED Viewed

	@@ -1 +0,0 @@
1	- """Tests for OpenTelemetry integration."""

hud-python 0.4.51__py3-none-any.whl → 0.4.53__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.51py3-none-any.whl → 0.4.53py3-none-any.whl