PyPI - hud-python - Versions diffs - 0.4.22__py3-none-any.whl → 0.4.24__py3-none-any.whl - Mend

hud-python 0.4.22py3-none-any.whl → 0.4.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (53) hide show

hud/agents/base.py +85 -59
hud/agents/claude.py +5 -1
hud/agents/grounded_openai.py +3 -1
hud/agents/misc/response_agent.py +3 -2
hud/agents/openai.py +2 -2
hud/agents/openai_chat_generic.py +3 -1
hud/cli/__init__.py +34 -24
hud/cli/analyze.py +27 -26
hud/cli/build.py +50 -46
hud/cli/debug.py +7 -7
hud/cli/dev.py +107 -99
hud/cli/eval.py +31 -29
hud/cli/hf.py +53 -53
hud/cli/init.py +28 -28
hud/cli/list_func.py +22 -22
hud/cli/pull.py +36 -36
hud/cli/push.py +76 -74
hud/cli/remove.py +42 -40
hud/cli/rl/__init__.py +2 -2
hud/cli/rl/init.py +41 -41
hud/cli/rl/pod.py +97 -91
hud/cli/rl/ssh.py +42 -40
hud/cli/rl/train.py +75 -73
hud/cli/rl/utils.py +10 -10
hud/cli/tests/test_analyze.py +1 -1
hud/cli/tests/test_analyze_metadata.py +2 -2
hud/cli/tests/test_pull.py +45 -45
hud/cli/tests/test_push.py +31 -29
hud/cli/tests/test_registry.py +15 -15
hud/cli/utils/environment.py +11 -11
hud/cli/utils/interactive.py +17 -17
hud/cli/utils/logging.py +12 -12
hud/cli/utils/metadata.py +12 -12
hud/cli/utils/registry.py +5 -5
hud/cli/utils/runner.py +23 -23
hud/cli/utils/server.py +16 -16
hud/clients/mcp_use.py +19 -5
hud/clients/utils/__init__.py +25 -0
hud/clients/utils/retry.py +186 -0
hud/datasets/execution/parallel.py +71 -46
hud/shared/hints.py +7 -7
hud/tools/grounding/grounder.py +2 -1
hud/types.py +4 -4
hud/utils/__init__.py +3 -3
hud/utils/{design.py → hud_console.py} +39 -33
hud/utils/pretty_errors.py +6 -6
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/METADATA +3 -1
{hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/RECORD +53 -52
{hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/WHEEL +0 -0
{hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.22.dist-info → hud_python-0.4.24.dist-info}/licenses/LICENSE +0 -0

hud/clients/mcp_use.py CHANGED Viewed

@@ -15,6 +15,7 @@ from hud.types import MCPToolCall, MCPToolResult
 from hud.version import __version__ as hud_version
 from .base import BaseHUDClient
+from .utils.retry import retry_with_backoff
 logger = logging.getLogger(__name__)
@@ -127,8 +128,11 @@ class MCPUseHUDClient(BaseHUDClient):
                     logger.warning("Client session not initialized for %s", server_name)
                     continue
-                # List tools
-                tools_result = await session.connector.client_session.list_tools()
+                # List tools with retry logic for HTTP errors
+                tools_result = await retry_with_backoff(
+                    session.connector.client_session.list_tools,
+                    operation_name=f"list_tools_{server_name}",
+                )
                 logger.info(
                     "Discovered %d tools from '%s': %s",
@@ -202,9 +206,12 @@ class MCPUseHUDClient(BaseHUDClient):
         if session.connector.client_session is None:
             raise ValueError(f"Client session not initialized for {server_name}")
-        result = await session.connector.client_session.call_tool(
+        # Call tool with retry logic for HTTP errors (502, 503, 504)
+        result = await retry_with_backoff(
+            session.connector.client_session.call_tool,
             name=original_tool.name,  # Use original tool name, not prefixed
             arguments=tool_call.arguments or {},
+            operation_name=f"call_tool_{original_tool.name}",
         )
         if self.verbose:
@@ -232,7 +239,10 @@ class MCPUseHUDClient(BaseHUDClient):
                     continue
                 # Prefer standard method name if available
                 if hasattr(session.connector.client_session, "list_resources"):
-                    resources = await session.connector.client_session.list_resources()
+                    resources = await retry_with_backoff(
+                        session.connector.client_session.list_resources,
+                        operation_name=f"list_resources_{server_name}",
+                    )
                 else:
                     # If the client doesn't support resource listing, skip
                     continue
@@ -262,7 +272,11 @@ class MCPUseHUDClient(BaseHUDClient):
                 resource_uri = AnyUrl(uri) if isinstance(uri, str) else uri
                 # Prefer read_resource; fall back to list_resources if needed
                 if hasattr(session.connector.client_session, "read_resource"):
-                    result = await session.connector.client_session.read_resource(resource_uri)
+                    result = await retry_with_backoff(
+                        session.connector.client_session.read_resource,
+                        resource_uri,
+                        operation_name=f"read_resource_{server_name}",
+                    )
                 else:
                     # Fallback path for older clients: not supported in strict typing
                     raise AttributeError("read_resource not available")

hud/clients/utils/__init__.py CHANGED Viewed

@@ -1 +1,26 @@
 """HUD MCP client utilities."""
+from __future__ import annotations
+from .retry import (
+    DEFAULT_BACKOFF_FACTOR,
+    DEFAULT_MAX_RETRIES,
+    DEFAULT_RETRY_DELAY,
+    DEFAULT_RETRY_STATUS_CODES,
+    is_retryable_error,
+    retry_with_backoff,
+    with_retry,
+)
+from .retry_transport import RetryTransport, create_retry_httpx_client
+__all__ = [
+    "DEFAULT_BACKOFF_FACTOR",
+    "DEFAULT_MAX_RETRIES",
+    "DEFAULT_RETRY_DELAY",
+    "DEFAULT_RETRY_STATUS_CODES",
+    "RetryTransport",
+    "create_retry_httpx_client",
+    "is_retryable_error",
+    "retry_with_backoff",
+    "with_retry",
+]

hud/clients/utils/retry.py ADDED Viewed

@@ -0,0 +1,186 @@
+"""Shared retry utilities for MCP client operations."""
+from __future__ import annotations
+import asyncio
+import logging
+from functools import wraps
+from typing import TYPE_CHECKING, Any, TypeVar
+if TYPE_CHECKING:
+    from collections.abc import Callable
+from httpx import HTTPStatusError
+from mcp.shared.exceptions import McpError
+logger = logging.getLogger(__name__)
+T = TypeVar("T")
+# Default retry configuration matching requests.py
+DEFAULT_MAX_RETRIES = 4
+DEFAULT_RETRY_DELAY = 2.0
+DEFAULT_RETRY_STATUS_CODES = {502, 503, 504}
+DEFAULT_BACKOFF_FACTOR = 2.0
+def is_retryable_error(error: Exception, retry_status_codes: set[int]) -> bool:
+    """
+    Check if an error is retryable based on status codes.
+    Args:
+        error: The exception to check
+        retry_status_codes: Set of HTTP status codes to retry on
+    Returns:
+        True if the error is retryable, False otherwise
+    """
+    # Check for HTTP status errors with retryable status codes
+    if isinstance(error, HTTPStatusError):
+        return error.response.status_code in retry_status_codes
+    # Check for MCP errors that might wrap HTTP errors
+    if isinstance(error, McpError):
+        error_msg = str(error).lower()
+        # Check for common gateway error patterns in the message
+        for code in retry_status_codes:
+            if str(code) in error_msg:
+                return True
+        # Check for gateway error keywords
+        if any(
+            keyword in error_msg
+            for keyword in ["bad gateway", "service unavailable", "gateway timeout"]
+        ):
+            return True
+    # Check for generic errors with status codes in the message
+    error_msg = str(error)
+    for code in retry_status_codes:
+        if f"{code}" in error_msg or f"status {code}" in error_msg.lower():
+            return True
+    return False
+async def retry_with_backoff(
+    func: Callable[..., Any],
+    *args: Any,
+    max_retries: int = DEFAULT_MAX_RETRIES,
+    retry_delay: float = DEFAULT_RETRY_DELAY,
+    retry_status_codes: set[int] | None = None,
+    backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
+    operation_name: str | None = None,
+    **kwargs: Any,
+) -> Any:
+    """
+    Execute an async function with retry logic and exponential backoff.
+    This matches the retry behavior in requests.py but can be applied
+    to any async function, particularly MCP client operations.
+    Args:
+        func: The async function to retry
+        *args: Positional arguments for the function
+        max_retries: Maximum number of retry attempts
+        retry_delay: Initial delay between retries in seconds
+        retry_status_codes: HTTP status codes to retry on
+        backoff_factor: Multiplier for exponential backoff
+        operation_name: Name of the operation for logging
+        **kwargs: Keyword arguments for the function
+    Returns:
+        The result of the function call
+    Raises:
+        The last exception if all retries are exhausted
+    """
+    if retry_status_codes is None:
+        retry_status_codes = DEFAULT_RETRY_STATUS_CODES
+    operation = operation_name or func.__name__
+    last_error = None
+    for attempt in range(max_retries + 1):
+        try:
+            result = await func(*args, **kwargs)
+            return result
+        except Exception as e:
+            last_error = e
+            # Check if this is a retryable error
+            if not is_retryable_error(e, retry_status_codes):
+                # Not retryable, raise immediately
+                raise
+            # Don't retry if we've exhausted attempts
+            if attempt >= max_retries:
+                logger.debug(
+                    "Operation '%s' failed after %d retries: %s",
+                    operation,
+                    max_retries,
+                    e,
+                )
+                raise
+            # Calculate backoff delay (exponential backoff)
+            delay = retry_delay * (backoff_factor**attempt)
+            logger.warning(
+                "Operation '%s' failed with retryable error, "
+                "retrying in %.2f seconds (attempt %d/%d): %s",
+                operation,
+                delay,
+                attempt + 1,
+                max_retries,
+                e,
+            )
+            await asyncio.sleep(delay)
+    # This should never be reached, but just in case
+    if last_error:
+        raise last_error
+    raise RuntimeError(f"Unexpected retry loop exit for operation '{operation}'")
+def with_retry(
+    max_retries: int = DEFAULT_MAX_RETRIES,
+    retry_delay: float = DEFAULT_RETRY_DELAY,
+    retry_status_codes: set[int] | None = None,
+    backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
+) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
+    """
+    Decorator to add retry logic to async methods.
+    Usage:
+        @with_retry(max_retries=3)
+        async def my_method(self, ...):
+            ...
+    Args:
+        max_retries: Maximum number of retry attempts
+        retry_delay: Initial delay between retries
+        retry_status_codes: HTTP status codes to retry on
+        backoff_factor: Multiplier for exponential backoff
+    Returns:
+        Decorated function with retry logic
+    """
+    def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
+        @wraps(func)
+        async def wrapper(*args: Any, **kwargs: Any) -> Any:
+            return await retry_with_backoff(
+                func,
+                *args,
+                max_retries=max_retries,
+                retry_delay=retry_delay,
+                retry_status_codes=retry_status_codes,
+                backoff_factor=backoff_factor,
+                operation_name=func.__name__,
+                **kwargs,
+            )
+        return wrapper
+    return decorator

hud/datasets/execution/parallel.py CHANGED Viewed

@@ -114,36 +114,58 @@ def _process_worker(
                     task_name = task_dict.get("prompt") or f"Task {index}"
                     # Use the job_id to group all tasks under the same job
-                    with hud.trace(task_name, job_id=job_id, task_id=task_dict.get("id")):
-                        # Convert dict to Task
-                        task = Task(**task_dict)
-                        # Create agent instance
-                        agent = agent_class(**(agent_config or {}))
-                        if auto_respond:
-                            agent.response_agent = ResponseAgent()
-                        # Run the task
-                        result = await agent.run(task, max_steps=max_steps)
-                        # Extract and print evaluation score for visibility
-                        reward = getattr(result, "reward", "N/A")
-                        logger.info(
-                            "[Worker %s] Task %s: ✓ Completed (reward: %s)",
-                            worker_id,
-                            index,
-                            reward,
-                        )
-                        logger.info(
-                            "[Worker %s] Completed task %s (reward: %s)",
-                            worker_id,
-                            index,
-                            reward,
-                        )
-                        return (index, result)
+                    with hud.trace(
+                        task_name, job_id=job_id, task_id=task_dict.get("id")
+                    ):
+                        try:
+                            # Convert dict to Task
+                            task = Task(**task_dict)
+                            # Create agent instance
+                            agent = agent_class(**(agent_config or {}))
+                            if auto_respond:
+                                agent.response_agent = ResponseAgent()
+                            # Run the task - this should ALWAYS return a result, even on error
+                            result = await agent.run(task, max_steps=max_steps)
+                            # Extract and print evaluation score for visibility
+                            reward = getattr(result, "reward", "N/A")
+                            logger.info(
+                                "[Worker %s] Task %s: ✓ Completed (reward: %s)",
+                                worker_id,
+                                index,
+                                reward,
+                            )
+                            logger.info(
+                                "[Worker %s] Completed task %s (reward: %s)",
+                                worker_id,
+                                index,
+                                reward,
+                            )
+                            return (index, result)
+                        except Exception as e:
+                            # Even if there's an exception, ensure we have a proper result
+                            logger.error(
+                                "[Worker %s] Task %s failed during execution: %s",
+                                worker_id,
+                                index,
+                                str(e)[:200],
+                            )
+                            # Create a proper Trace result for errors
+                            from hud.types import Trace
+                            error_result = Trace(
+                                reward=0.0,
+                                done=True,
+                                content=f"Task execution failed: {e}",
+                                isError=True,
+                                info={"error": str(e), "traceback": traceback.format_exc()},
+                            )
+                            return (index, error_result)
                 except Exception as e:
                     error_msg = f"Worker {worker_id}: Task {index} failed: {e}"
@@ -190,22 +212,6 @@ def _process_worker(
     try:
         # Run the async batch processing
         results = loop.run_until_complete(process_batch())
-        # CRITICAL: Ensure telemetry is fully sent before process exits
-        # Two things need to complete:
-        # 1. The trace context's __exit__ already called _update_task_status_sync (blocking)
-        # 2. But spans are buffered in BatchSpanProcessor and need explicit flush
-        from opentelemetry import trace as otel_trace
-        provider = otel_trace.get_tracer_provider()
-        if provider and hasattr(provider, "force_flush"):
-            # This forces BatchSpanProcessor to export all buffered spans NOW
-            # The method returns True if successful, False if timeout
-            success = provider.force_flush(timeout_millis=5000)  # 5 second timeout # type: ignore
-            if not success:
-                logger.warning("Worker %s: Telemetry flush timed out", worker_id)
         return results
     except KeyboardInterrupt:
         logger.info("Worker %s: Interrupted by user, stopping gracefully", worker_id)
@@ -230,6 +236,25 @@ def _process_worker(
         logger.error("Worker %s batch processing failed: %s", worker_id, e)
         return [(idx, {"error": str(e), "isError": True}) for idx, _ in task_batch]
     finally:
+        # CRITICAL: Always ensure telemetry is fully sent before process exits
+        # This must happen in finally block to ensure it runs even on errors
+        try:
+            from opentelemetry import trace as otel_trace
+            provider = otel_trace.get_tracer_provider()
+            if provider and hasattr(provider, "force_flush"):
+                # This forces BatchSpanProcessor to export all buffered spans NOW
+                # The method returns True if successful, False if timeout
+                success = provider.force_flush(
+                    timeout_millis=10000
+                )  # 10 second timeout # type: ignore
+                if not success:
+                    logger.warning("Worker %s: Telemetry flush timed out", worker_id)
+                else:
+                    logger.debug("Worker %s: Telemetry flushed successfully", worker_id)
+        except Exception as flush_error:
+            logger.error("Worker %s: Failed to flush telemetry: %s", worker_id, flush_error)
         # Clean up the event loop
         try:
             loop.close()

hud/shared/hints.py CHANGED Viewed

@@ -144,9 +144,9 @@ def render_hints(hints: Iterable[Hint] | None, *, design: Any | None = None) ->
     try:
         if design is None:
-            from hud.utils.design import design as default_design  # lazy import
+            from hud.utils.hud_console import hud_console as default_design  # lazy import
-            design = default_design
+            hud_console = default_design
     except Exception:
         # If design is unavailable (non-CLI contexts), silently skip rendering
         return
@@ -155,23 +155,23 @@ def render_hints(hints: Iterable[Hint] | None, *, design: Any | None = None) ->
         try:
             # Compact rendering - skip title if same as message
             if hint.title and hint.title != hint.message:
-                design.warning(f"{hint.title}: {hint.message}")
+                hud_console.warning(f"{hint.title}: {hint.message}")
             else:
-                design.warning(hint.message)
+                hud_console.warning(hint.message)
             # Tips as bullet points
             if hint.tips:
                 for tip in hint.tips:
-                    design.info(f"  • {tip}")
+                    hud_console.info(f"  • {tip}")
             # Only show command examples if provided
             if hint.command_examples:
                 for cmd in hint.command_examples:
-                    design.command_example(cmd)
+                    hud_console.command_example(cmd)
             # Only show docs URL if provided
             if hint.docs_url:
-                design.link(hint.docs_url)
+                hud_console.link(hint.docs_url)
         except Exception:
             logger.warning("Failed to render hint: %s", hint)
             continue

hud/tools/grounding/grounder.py CHANGED Viewed

@@ -9,7 +9,6 @@ import re
 from openai import AsyncOpenAI
 from opentelemetry import trace
-from PIL import Image
 from hud import instrument
 from hud.tools.grounding.config import GrounderConfig  # noqa: TC001
@@ -45,6 +44,8 @@ class Grounder:
                      (processed_width, processed_height))
         """
         # Decode image
+        from PIL import Image
         image_bytes = base64.b64decode(image_b64)
         img = Image.open(io.BytesIO(image_bytes))
         original_size = (img.width, img.height)

hud/types.py CHANGED Viewed

@@ -29,9 +29,9 @@ class MCPToolCall(CallToolRequestParams):
     def __rich__(self) -> str:
         """Rich representation with color formatting."""
-        from hud.utils.design import design
+        from hud.utils.hud_console import hud_console
-        return design.format_tool_call(self.name, self.arguments)
+        return hud_console.format_tool_call(self.name, self.arguments)
 class MCPToolResult(CallToolResult):
@@ -74,10 +74,10 @@ class MCPToolResult(CallToolResult):
     def __rich__(self) -> str:
         """Rich representation with color formatting."""
-        from hud.utils.design import design
+        from hud.utils.hud_console import hud_console
         content_summary = self._get_content_summary()
-        return design.format_tool_result(content_summary, self.isError)
+        return hud_console.format_tool_result(content_summary, self.isError)
 class AgentResponse(BaseModel):

hud/utils/__init__.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from __future__ import annotations
-from .design import HUDDesign, design
+from .hud_console import HUDConsole, hud_console
 from .telemetry import stream
 __all__ = [
-    "HUDDesign",
-    "design",
+    "HUDConsole",
+    "hud_console",
     "stream",
 ]

hud-python 0.4.22__py3-none-any.whl → 0.4.24__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.22py3-none-any.whl → 0.4.24py3-none-any.whl