PyPI - hud-python - Versions diffs - 0.4.24__tar.gz → 0.4.26__tar.gz - Mend

hud-python 0.4.24tar.gz → 0.4.26tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (202) hide show

{hud_python-0.4.24 → hud_python-0.4.26}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.24
+Version: 0.4.26
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues

{hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/base.py RENAMED Viewed

@@ -556,26 +556,11 @@ class MCPAgent(ABC):
         all_tools = await self.mcp_client.list_tools()
-        # Filter tools
-        self._available_tools = []
-        self._tool_map = {}
-        # Track response tools by server
         response_tools_by_server: dict[str, str] = {}  # server_name -> tool_name
         for tool in all_tools:
-            # Check if tool should be included
-            if self.allowed_tools and tool.name not in self.allowed_tools:
-                continue
-            if tool.name in self.disallowed_tools:
-                continue
-            self._available_tools.append(tool)
-            # Simplified mapping - just tool name to tool
-            self._tool_map[tool.name] = tool
-            # Track response tools
             if "response" in tool.name or tool.name == "response":
+                self.console.debug(f"Found response tool: '{tool.name}'")
                 # Extract server name from tool name (e.g., "grader_response" -> "grader")
                 if "_" in tool.name:
                     server_name = tool.name.split("_", 1)[0]
@@ -583,27 +568,61 @@ class MCPAgent(ABC):
                 else:
                     response_tools_by_server["_default"] = tool.name
-        # Find the response tool to use (prioritize last server in config)
+        # Add response tool to lifecycle tools BEFORE filtering
         if response_tools_by_server and hasattr(self.mcp_client, "mcp_config"):
             # Get server names in order from mcp_config
             server_names = list(self.mcp_client.mcp_config.keys())
+            self.console.debug(f"Server names: {server_names}")
             # Try to find response tool from last server first
             response_tool_name = None
             for server_name in reversed(server_names):
                 if server_name in response_tools_by_server:
                     response_tool_name = response_tools_by_server[server_name]
+                    self.console.debug(f"Found response tool '{response_tool_name}' from server '{server_name}'")
                     break
             # Fallback to any response tool
             if not response_tool_name and response_tools_by_server:
                 response_tool_name = next(iter(response_tools_by_server.values()))
+                self.console.debug(f"Using fallback response tool '{response_tool_name}'")
             # Add to lifecycle tools if found
             if response_tool_name and response_tool_name not in self.lifecycle_tools:
                 self.console.debug(f"Auto-detected '{response_tool_name}' tool as a lifecycle tool")
                 self.response_tool_name = response_tool_name
                 self.lifecycle_tools.append(response_tool_name)
+            elif response_tool_name:
+                self.console.debug(f"Response tool '{response_tool_name}' already in lifecycle_tools")
+                self.response_tool_name = response_tool_name
+        else:
+            self.console.debug(f"No response tools found or no mcp_config")
+        # Filter tools
+        self._available_tools = []
+        self._tool_map = {}
+        self.console.debug(f"All tools: {[t.name for t in all_tools]}")
+        self.console.debug(f"Allowed tools: {self.allowed_tools}")
+        self.console.debug(f"Disallowed tools: {self.disallowed_tools}")
+        self.console.debug(f"Lifecycle tools: {self.lifecycle_tools}")
+        for tool in all_tools:
+            # Lifecycle tools (setup, evaluate, response) should always be included
+            is_lifecycle = tool.name in self.lifecycle_tools
+            # Check if tool should be included
+            if not is_lifecycle:
+                if self.allowed_tools and tool.name not in self.allowed_tools:
+                    self.console.debug(f"Skipping tool '{tool.name}' - not in allowed_tools")
+                    continue
+                if tool.name in self.disallowed_tools:
+                    self.console.debug(f"Skipping tool '{tool.name}' - in disallowed_tools")
+                    continue
+            self.console.debug(f"Adding tool '{tool.name}' to available tools (lifecycle={is_lifecycle})")
+            self._available_tools.append(tool)
+            self._tool_map[tool.name] = tool
         # Check if all required tools are available
         if self.required_tools:

{hud_python-0.4.24 → hud_python-0.4.26}/hud/agents/misc/response_agent.py RENAMED Viewed

@@ -34,7 +34,8 @@ class ResponseAgent:
         - STOP: If the agent indicates it has successfully completed a task, even if phrased as a question
           like "I have entered the right values into this form. Would you like me to do anything else?"
-          or "Here is the website. Is there any other information you need?"
+          or "Here is the website. Is there any other information you need?" or if the agent has
+          strongly determined it wants to stop the task.
         - CONTINUE: If the agent is asking for clarification before proceeding with a task
           like "I'm about to clear cookies from this website. Would you like me to proceed?"

{hud_python-0.4.24 → hud_python-0.4.26}/hud/clients/mcp_use.py RENAMED Viewed

@@ -15,7 +15,7 @@ from hud.types import MCPToolCall, MCPToolResult
 from hud.version import __version__ as hud_version
 from .base import BaseHUDClient
-from .utils.retry import retry_with_backoff
+from .utils.mcp_use_retry import patch_all_sessions
 logger = logging.getLogger(__name__)
@@ -64,6 +64,10 @@ class MCPUseHUDClient(BaseHUDClient):
             self._sessions = await self._client.create_all_sessions()
             logger.info("Created %d MCP sessions", len(self._sessions))
+            # Patch all sessions with retry logic
+            patch_all_sessions(self._sessions)
+            logger.debug("Applied retry logic to all MCP sessions")
             # Configure validation for all sessions based on client setting
             try:
                 for session in self._sessions.values():
@@ -128,11 +132,8 @@ class MCPUseHUDClient(BaseHUDClient):
                     logger.warning("Client session not initialized for %s", server_name)
                     continue
-                # List tools with retry logic for HTTP errors
-                tools_result = await retry_with_backoff(
-                    session.connector.client_session.list_tools,
-                    operation_name=f"list_tools_{server_name}",
-                )
+                # List tools (retry logic is handled at transport level)
+                tools_result = await session.connector.client_session.list_tools()
                 logger.info(
                     "Discovered %d tools from '%s': %s",
@@ -206,12 +207,10 @@ class MCPUseHUDClient(BaseHUDClient):
         if session.connector.client_session is None:
             raise ValueError(f"Client session not initialized for {server_name}")
-        # Call tool with retry logic for HTTP errors (502, 503, 504)
-        result = await retry_with_backoff(
-            session.connector.client_session.call_tool,
+        # Call tool (retry logic is handled at transport level)
+        result = await session.connector.client_session.call_tool(
             name=original_tool.name,  # Use original tool name, not prefixed
             arguments=tool_call.arguments or {},
-            operation_name=f"call_tool_{original_tool.name}",
         )
         if self.verbose:
@@ -239,10 +238,8 @@ class MCPUseHUDClient(BaseHUDClient):
                     continue
                 # Prefer standard method name if available
                 if hasattr(session.connector.client_session, "list_resources"):
-                    resources = await retry_with_backoff(
-                        session.connector.client_session.list_resources,
-                        operation_name=f"list_resources_{server_name}",
-                    )
+                    # List resources (retry logic is handled at transport level)
+                    resources = await session.connector.client_session.list_resources()
                 else:
                     # If the client doesn't support resource listing, skip
                     continue
@@ -272,11 +269,8 @@ class MCPUseHUDClient(BaseHUDClient):
                 resource_uri = AnyUrl(uri) if isinstance(uri, str) else uri
                 # Prefer read_resource; fall back to list_resources if needed
                 if hasattr(session.connector.client_session, "read_resource"):
-                    result = await retry_with_backoff(
-                        session.connector.client_session.read_resource,
-                        resource_uri,
-                        operation_name=f"read_resource_{server_name}",
-                    )
+                    # Read resource (retry logic is handled at transport level)
+                    result = await session.connector.client_session.read_resource(resource_uri)
                 else:
                     # Fallback path for older clients: not supported in strict typing
                     raise AttributeError("read_resource not available")

hud_python-0.4.26/hud/clients/tests/test_mcp_use_retry.py ADDED Viewed

@@ -0,0 +1,378 @@
+"""Tests for MCP-use client retry functionality."""
+from __future__ import annotations
+from unittest.mock import AsyncMock, Mock, patch
+import pytest
+import requests
+from mcp import types
+from hud.clients.mcp_use import MCPUseHUDClient
+from hud.clients.utils.mcp_use_retry import (
+    create_async_retry_wrapper,
+    create_retry_session,
+    patch_all_sessions,
+    patch_mcp_session_http_client,
+)
+from hud.types import MCPToolCall
+class TestRetrySession:
+    """Test the retry session creation."""
+    def test_create_retry_session(self):
+        """Test that retry session is configured correctly."""
+        session = create_retry_session(
+            max_retries=5,
+            retry_status_codes=(500, 502, 503, 504),
+            retry_delay=0.5,
+            backoff_factor=2.0,
+        )
+        # Check that session has adapters mounted
+        assert "http://" in session.adapters
+        assert "https://" in session.adapters
+        # Check adapter configuration
+        adapter = session.adapters["http://"]
+        assert adapter.max_retries.total == 5
+        assert 500 in adapter.max_retries.status_forcelist
+        assert 502 in adapter.max_retries.status_forcelist
+        assert adapter.max_retries.backoff_factor == 2.0
+    def test_retry_session_default_values(self):
+        """Test retry session with default values."""
+        session = create_retry_session()
+        adapter = session.adapters["https://"]
+        assert adapter.max_retries.total == 3
+        assert 502 in adapter.max_retries.status_forcelist
+        assert 503 in adapter.max_retries.status_forcelist
+        assert 504 in adapter.max_retries.status_forcelist
+class TestAsyncRetryWrapper:
+    """Test the async retry wrapper functionality."""
+    @pytest.mark.asyncio
+    async def test_retry_on_error_status_codes(self):
+        """Test that async wrapper retries on specific status codes."""
+        call_count = 0
+        async def mock_func(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            # First two calls fail, third succeeds
+            if call_count < 3:
+                result = Mock()
+                result.status_code = 503  # Service unavailable
+                return result
+            result = Mock()
+            result.status_code = 200
+            return result
+        wrapped = create_async_retry_wrapper(
+            mock_func,
+            max_retries=3,
+            retry_status_codes=(503,),
+            retry_delay=0.01,  # Short delay for testing
+        )
+        result = await wrapped()
+        assert call_count == 3
+        assert result.status_code == 200
+    @pytest.mark.asyncio
+    async def test_retry_on_exception(self):
+        """Test that async wrapper retries on exceptions with status codes."""
+        call_count = 0
+        async def mock_func(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count < 3:
+                raise Exception("HTTP 503 Service Unavailable")
+            return Mock(status_code=200)
+        wrapped = create_async_retry_wrapper(
+            mock_func,
+            max_retries=3,
+            retry_status_codes=(503,),
+            retry_delay=0.01,
+        )
+        result = await wrapped()
+        assert call_count == 3
+        assert result.status_code == 200
+    @pytest.mark.asyncio
+    async def test_no_retry_on_success(self):
+        """Test that successful calls don't trigger retries."""
+        call_count = 0
+        async def mock_func(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            return Mock(status_code=200)
+        wrapped = create_async_retry_wrapper(mock_func)
+        result = await wrapped()
+        assert call_count == 1
+        assert result.status_code == 200
+    @pytest.mark.asyncio
+    async def test_max_retries_exceeded(self):
+        """Test that retries stop after max attempts."""
+        call_count = 0
+        async def mock_func(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            raise Exception("HTTP 503 Service Unavailable")
+        wrapped = create_async_retry_wrapper(
+            mock_func,
+            max_retries=2,
+            retry_status_codes=(503,),
+            retry_delay=0.01,
+        )
+        with pytest.raises(Exception) as exc_info:
+            await wrapped()
+        assert "503" in str(exc_info.value)
+        assert call_count == 3  # Initial + 2 retries
+class TestSessionPatching:
+    """Test the session patching functionality."""
+    def test_patch_sync_session(self):
+        """Test patching a synchronous session."""
+        # Create mock session with connector
+        mock_session = Mock()
+        mock_session.connector = Mock()
+        mock_session.connector._connection_manager = Mock()
+        mock_session.connector._connection_manager._session = requests.Session()
+        # Patch the session
+        patch_mcp_session_http_client(mock_session)
+        # Verify the session was replaced with retry-enabled one
+        patched_session = mock_session.connector._connection_manager._session
+        assert "http://" in patched_session.adapters
+        assert "https://" in patched_session.adapters
+        # Check that it has retry configuration
+        adapter = patched_session.adapters["http://"]
+        assert hasattr(adapter, "max_retries")
+    @pytest.mark.asyncio
+    async def test_patch_async_session(self):
+        """Test patching an async session."""
+        # Create mock async session
+        mock_session = Mock()
+        mock_session.connector = Mock()
+        mock_session.connector.client_session = Mock()
+        async def mock_send_request(*args, **kwargs):
+            return Mock(status_code=200)
+        mock_session.connector.client_session._send_request = mock_send_request
+        # Patch the session
+        patch_mcp_session_http_client(mock_session)
+        # Verify _send_request was wrapped
+        wrapped_func = mock_session.connector.client_session._send_request
+        assert wrapped_func != mock_send_request  # Function was replaced
+        # Test that wrapped function still works
+        result = await wrapped_func()
+        assert result.status_code == 200
+    def test_patch_all_sessions(self):
+        """Test patching multiple sessions."""
+        # Create mock sessions
+        session1 = Mock()
+        session1.connector = Mock()
+        session1.connector._connection_manager = Mock()
+        session1.connector._connection_manager.session = requests.Session()
+        session2 = Mock()
+        session2.connector = Mock()
+        session2.connector.client_session = Mock()
+        session2.connector.client_session._send_request = AsyncMock()
+        sessions = {"server1": session1, "server2": session2}
+        # Patch all sessions
+        patch_all_sessions(sessions)
+        # Verify both were patched
+        assert "http://" in session1.connector._connection_manager.session.adapters
+        assert session2.connector.client_session._send_request != AsyncMock
+class TestMCPUseClientRetry:
+    """Test retry functionality integrated into MCPUseHUDClient."""
+    @pytest.mark.asyncio
+    async def test_client_applies_retry_on_connect(self):
+        """Test that MCPUseHUDClient applies retry logic during connection."""
+        config = {"test_server": {"url": "http://localhost:8080"}}
+        client = MCPUseHUDClient(config)
+        # Mock the MCPUseClient and session creation
+        with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
+            mock_client = Mock()
+            MockMCPUseClient.from_dict.return_value = mock_client
+            # Create mock session
+            mock_session = Mock()
+            mock_session.connector = Mock()
+            mock_session.connector.client_session = Mock()
+            mock_session.connector.client_session._send_request = AsyncMock()
+            mock_session.connector.client_session.list_tools = AsyncMock(
+                return_value=Mock(tools=[])
+            )
+            mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
+            # Initialize client (which applies retry logic)
+            await client.initialize()
+            # Verify session was created and patched
+            assert len(client._sessions) == 1
+            assert "test_server" in client._sessions
+    @pytest.mark.asyncio
+    async def test_tool_call_with_retry(self):
+        """Test that tool calls work with retry logic."""
+        config = {"test_server": {"url": "http://localhost:8080"}}
+        client = MCPUseHUDClient(config)
+        with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
+            mock_client = Mock()
+            MockMCPUseClient.from_dict.return_value = mock_client
+            # Create mock session
+            mock_session = Mock()
+            mock_session.connector = Mock()
+            mock_session.connector.client_session = Mock()
+            # Mock tool listing
+            test_tool = types.Tool(
+                name="test_tool",
+                description="Test tool",
+                inputSchema={"type": "object"},
+            )
+            mock_session.connector.client_session.list_tools = AsyncMock(
+                return_value=Mock(tools=[test_tool])
+            )
+            # Mock tool call with simulated retry
+            call_count = 0
+            async def mock_call_tool(name, arguments):
+                nonlocal call_count
+                call_count += 1
+                # First call fails, second succeeds
+                if call_count == 1:
+                    raise Exception("HTTP 503 Service Unavailable")
+                return Mock(
+                    content=[types.TextContent(type="text", text="Success")],
+                    isError=False,
+                    structuredContent=None,
+                )
+            mock_session.connector.client_session.call_tool = mock_call_tool
+            mock_session.connector.client_session._send_request = AsyncMock()
+            mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
+            # Initialize and call tool
+            await client.initialize()
+            # Wrap call_tool with retry for this test
+            original_call = mock_session.connector.client_session.call_tool
+            mock_session.connector.client_session.call_tool = create_async_retry_wrapper(
+                original_call,
+                max_retries=2,
+                retry_status_codes=(503,),
+                retry_delay=0.01,
+            )
+            result = await client.call_tool(MCPToolCall(name="test_tool", arguments={}))
+            # Verify retry worked
+            assert call_count == 2  # Failed once, then succeeded
+            assert not result.isError
+            assert result.content[0].text == "Success"
+    @pytest.mark.asyncio
+    async def test_resource_read_with_retry(self):
+        """Test that resource reading works with retry logic."""
+        config = {"test_server": {"url": "http://localhost:8080"}}
+        client = MCPUseHUDClient(config)
+        with patch("hud.clients.mcp_use.MCPUseClient") as MockMCPUseClient:
+            mock_client = Mock()
+            MockMCPUseClient.from_dict.return_value = mock_client
+            # Create mock session
+            mock_session = Mock()
+            mock_session.connector = Mock()
+            mock_session.connector.client_session = Mock()
+            mock_session.connector.client_session.list_tools = AsyncMock(
+                return_value=Mock(tools=[])
+            )
+            # Mock resource read with simulated retry
+            call_count = 0
+            async def mock_read_resource(uri):
+                nonlocal call_count
+                call_count += 1
+                # First call fails, second succeeds
+                if call_count == 1:
+                    raise Exception("HTTP 502 Bad Gateway")
+                return Mock(contents=[Mock(text='{"status": "ok"}')])
+            mock_session.connector.client_session.read_resource = mock_read_resource
+            mock_session.connector.client_session._send_request = AsyncMock()
+            mock_client.create_all_sessions = AsyncMock(return_value={"test_server": mock_session})
+            # Initialize
+            await client.initialize()
+            # Wrap read_resource with retry for this test
+            original_read = mock_session.connector.client_session.read_resource
+            mock_session.connector.client_session.read_resource = create_async_retry_wrapper(
+                original_read,
+                max_retries=2,
+                retry_status_codes=(502,),
+                retry_delay=0.01,
+            )
+            result = await client.read_resource("test://resource")
+            # Verify retry worked
+            assert call_count == 2  # Failed once, then succeeded
+            assert result is not None
+            assert result.contents[0].text == '{"status": "ok"}'
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

hud-python 0.4.24__tar.gz → 0.4.26__tar.gz

hud-python 0.4.24tar.gz → 0.4.26tar.gz