PyPI - hud-python - Versions diffs - 0.4.11__py3-none-any.whl → 0.4.13__py3-none-any.whl - Mend

hud-python 0.4.11py3-none-any.whl → 0.4.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hud-python might be problematic. Click here for more details.

Files changed (63) hide show

hud/__main__.py +8 -0
hud/agents/base.py +7 -8
hud/agents/langchain.py +2 -2
hud/agents/tests/test_openai.py +3 -1
hud/cli/__init__.py +114 -52
hud/cli/build.py +121 -71
hud/cli/debug.py +2 -2
hud/cli/{mcp_server.py → dev.py} +101 -38
hud/cli/eval.py +175 -90
hud/cli/init.py +442 -64
hud/cli/list_func.py +72 -71
hud/cli/pull.py +1 -2
hud/cli/push.py +35 -23
hud/cli/remove.py +35 -41
hud/cli/tests/test_analyze.py +2 -1
hud/cli/tests/test_analyze_metadata.py +42 -49
hud/cli/tests/test_build.py +28 -52
hud/cli/tests/test_cursor.py +1 -1
hud/cli/tests/test_debug.py +1 -1
hud/cli/tests/test_list_func.py +75 -64
hud/cli/tests/test_main_module.py +30 -0
hud/cli/tests/test_mcp_server.py +3 -3
hud/cli/tests/test_pull.py +30 -61
hud/cli/tests/test_push.py +70 -89
hud/cli/tests/test_registry.py +36 -38
hud/cli/tests/test_utils.py +1 -1
hud/cli/utils/__init__.py +1 -0
hud/cli/{docker_utils.py → utils/docker.py} +36 -0
hud/cli/{env_utils.py → utils/environment.py} +7 -7
hud/cli/{interactive.py → utils/interactive.py} +91 -19
hud/cli/{analyze_metadata.py → utils/metadata.py} +12 -8
hud/cli/{registry.py → utils/registry.py} +28 -30
hud/cli/{remote_runner.py → utils/remote_runner.py} +1 -1
hud/cli/utils/runner.py +134 -0
hud/cli/utils/server.py +250 -0
hud/clients/base.py +1 -1
hud/clients/fastmcp.py +5 -13
hud/clients/mcp_use.py +6 -10
hud/server/server.py +35 -5
hud/shared/exceptions.py +11 -0
hud/shared/tests/test_exceptions.py +22 -0
hud/telemetry/tests/__init__.py +0 -0
hud/telemetry/tests/test_replay.py +40 -0
hud/telemetry/tests/test_trace.py +63 -0
hud/tools/base.py +20 -3
hud/tools/computer/hud.py +15 -6
hud/tools/executors/tests/test_base_executor.py +27 -0
hud/tools/response.py +12 -8
hud/tools/tests/test_response.py +60 -0
hud/tools/tests/test_tools_init.py +49 -0
hud/utils/design.py +19 -8
hud/utils/mcp.py +17 -5
hud/utils/tests/test_mcp.py +112 -0
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.4.11.dist-info → hud_python-0.4.13.dist-info}/METADATA +16 -13
{hud_python-0.4.11.dist-info → hud_python-0.4.13.dist-info}/RECORD +62 -52
hud/cli/runner.py +0 -160
/hud/cli/{cursor.py → utils/cursor.py} +0 -0
/hud/cli/{utils.py → utils/logging.py} +0 -0
{hud_python-0.4.11.dist-info → hud_python-0.4.13.dist-info}/WHEEL +0 -0
{hud_python-0.4.11.dist-info → hud_python-0.4.13.dist-info}/entry_points.txt +0 -0
{hud_python-0.4.11.dist-info → hud_python-0.4.13.dist-info}/licenses/LICENSE +0 -0

hud/tools/computer/hud.py CHANGED Viewed

@@ -58,20 +58,29 @@ class HudComputerTool(BaseTool):
             title: Human-readable display name for the tool (auto-generated from class name)
             description: Tool description (auto-generated from docstring if not provided)
         """
+        # This is the width and height the agent thinks it operates in
+        # By default, use subclass's width and height
+        # If specifically set to None, use environment width and height
+        self.width = width or computer_settings.DISPLAY_WIDTH
+        self.height = height or computer_settings.DISPLAY_HEIGHT
+        # Build metadata with resolution info
+        meta = {
+            "resolution": {
+                "width": self.width,
+                "height": self.height,
+            }
+        }
         # Initialize base tool with executor as env
         super().__init__(
             env=executor,
             name=name or "computer",
             title=title or "Computer Control",
             description=description or "Control computer with mouse, keyboard, and screenshots",
+            meta=meta,
         )
-        # This is the width and height the agent thinks it operates in
-        # By default, use subclass's width and height
-        # If specifically set to None, use environment width and height
-        self.width = width or computer_settings.DISPLAY_WIDTH
-        self.height = height or computer_settings.DISPLAY_HEIGHT
         # This is the static width and height of the environment screen
         # And the width and height of the screenshots taken by the tool
         self.environment_width = computer_settings.DISPLAY_WIDTH

hud/tools/executors/tests/test_base_executor.py CHANGED Viewed

@@ -336,3 +336,30 @@ class TestBaseExecutor:
         assert result1.base64_image == screenshot1
         assert result2.base64_image == screenshot1
+class TestLazyImports:
+    """Tests for lazy import functionality in executors module."""
+    def test_lazy_import_pyautogui_executor(self):
+        """Test lazy import of PyAutoGUIExecutor."""
+        # This should trigger the __getattr__ function and import PyAutoGUIExecutor
+        from hud.tools.executors import PyAutoGUIExecutor
+        # Verify it's imported correctly
+        assert PyAutoGUIExecutor.__name__ == "PyAutoGUIExecutor"
+    def test_lazy_import_xdo_executor(self):
+        """Test lazy import of XDOExecutor."""
+        # This should trigger the __getattr__ function and import XDOExecutor
+        from hud.tools.executors import XDOExecutor
+        # Verify it's imported correctly
+        assert XDOExecutor.__name__ == "XDOExecutor"
+    def test_lazy_import_invalid_attribute(self):
+        """Test lazy import with invalid attribute name."""
+        import hud.tools.executors as executors_module
+        with pytest.raises(AttributeError, match="module '.*' has no attribute 'InvalidExecutor'"):
+            _ = executors_module.InvalidExecutor

hud/tools/response.py CHANGED Viewed

@@ -12,7 +12,7 @@ if TYPE_CHECKING:
 class ResponseTool(BaseTool):
     """
     Protocol for handling responses within environments.
     This abstract tool defines the interface for response handling in environments.
     Subclasses should implement the __call__ method to handle responses according
     to their specific needs.
@@ -36,18 +36,22 @@ class ResponseTool(BaseTool):
                 return blocks
     """
-    def __init__(self, name: str, title: str, description: str):
+    name: str = "response"
+    title: str = "Response Tool"
+    description: str = "Send a text response or list of messages to the environment"
+    def __init__(
+        self, name: str | None = None, title: str | None = None, description: str | None = None
+    ) -> None:
         super().__init__(
-            name=name or "response",
-            title=title or "Response Tool",
-            description=description or "Send a text response or list of messages to the environment",
+            name=name or self.name,
+            title=title or self.title,
+            description=description or self.description,
         )
     @abstractmethod
     async def __call__(
-        self,
-        response: str | None = None,
-        messages: list[ContentBlock] | None = None
+        self, response: str | None = None, messages: list[ContentBlock] | None = None
     ) -> list[ContentBlock]:
         """Handle response or messages and return as ContentBlocks.

hud/tools/tests/test_response.py ADDED Viewed

@@ -0,0 +1,60 @@
+"""Tests for ResponseTool class."""
+from __future__ import annotations
+import pytest
+from hud.tools.response import ResponseTool
+class ConcreteResponseTool(ResponseTool):
+    """Concrete implementation for testing."""
+    async def __call__(self, response: str | None = None, messages=None):
+        """Concrete implementation."""
+        from mcp.types import TextContent
+        return [TextContent(text=response or "test", type="text")]
+class TestResponseTool:
+    """Tests for ResponseTool abstract class."""
+    def test_init_with_defaults(self):
+        """Test initialization with default values."""
+        tool = ConcreteResponseTool()
+        assert tool.name == "response"
+        assert tool.title == "Response Tool"
+        assert tool.description == "Send a text response or list of messages to the environment"
+    def test_init_with_custom_values(self):
+        """Test initialization with custom values."""
+        tool = ConcreteResponseTool(
+            name="custom_response", title="Custom Response Tool", description="Custom description"
+        )
+        assert tool.name == "custom_response"
+        assert tool.title == "Custom Response Tool"
+        assert tool.description == "Custom description"
+    def test_abstract_method_not_implemented(self):
+        """Test that abstract method raises NotImplementedError when not implemented."""
+        # Create a concrete tool to test the abstract method's NotImplementedError
+        tool = ConcreteResponseTool()
+        # This should trigger the NotImplementedError in the abstract method
+        with pytest.raises(NotImplementedError, match="Subclasses must implement __call__"):
+            # Call the parent abstract method directly to hit the raise line
+            import asyncio
+            asyncio.run(ResponseTool.__call__(tool, "test"))  # type: ignore[attr-defined]
+    @pytest.mark.asyncio
+    async def test_concrete_implementation(self):
+        """Test that concrete implementation works correctly."""
+        tool = ConcreteResponseTool()
+        result = await tool("Hello, World!")
+        assert len(result) == 1
+        assert result[0].text == "Hello, World!"
+        assert result[0].type == "text"

hud/tools/tests/test_tools_init.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""Tests for hud.tools.__init__ module."""
+from __future__ import annotations
+import pytest
+class TestToolsInit:
+    """Tests for the tools package initialization."""
+    def test_lazy_import_anthropic_computer_tool(self):
+        """Test lazy import of AnthropicComputerTool."""
+        from hud.tools import AnthropicComputerTool
+        # Verify it's imported correctly
+        assert AnthropicComputerTool.__name__ == "AnthropicComputerTool"
+    def test_lazy_import_hud_computer_tool(self):
+        """Test lazy import of HudComputerTool."""
+        from hud.tools import HudComputerTool
+        # Verify it's imported correctly
+        assert HudComputerTool.__name__ == "HudComputerTool"
+    def test_lazy_import_openai_computer_tool(self):
+        """Test lazy import of OpenAIComputerTool."""
+        from hud.tools import OpenAIComputerTool
+        # Verify it's imported correctly
+        assert OpenAIComputerTool.__name__ == "OpenAIComputerTool"
+    def test_lazy_import_invalid_attribute(self):
+        """Test lazy import with invalid attribute name."""
+        import hud.tools as tools_module
+        with pytest.raises(AttributeError, match="module '.*' has no attribute 'InvalidTool'"):
+            _ = tools_module.InvalidTool
+    def test_direct_imports_available(self):
+        """Test that directly imported tools are available."""
+        from hud.tools import BaseHub, BaseTool, BashTool, EditTool, PlaywrightTool, ResponseTool
+        # All should be available
+        assert BaseHub is not None
+        assert BaseTool is not None
+        assert BashTool is not None
+        assert EditTool is not None
+        assert PlaywrightTool is not None
+        assert ResponseTool is not None

hud/utils/design.py CHANGED Viewed

@@ -93,10 +93,10 @@ class HUDDesign:
         """
         console = self._stderr_console if stderr else self._stdout_console
         console.print(f"[default not bold]{message}[/default not bold]")
     def print(self, message: str, stderr: bool = True) -> None:
         """Print a message.
         Args:
             message: The message to print
             stderr: If True, output to stderr (default), otherwise stdout
@@ -136,7 +136,9 @@ class HUDDesign:
         console = self._stderr_console if stderr else self._stdout_console
         console.print(f"[default not bold]{json_str}[/default not bold]")
-    def key_value_table(self, data: dict[str, str], show_header: bool = False, stderr: bool = True) -> None:
+    def key_value_table(
+        self, data: dict[str, str], show_header: bool = False, stderr: bool = True
+    ) -> None:
         """Print a key-value table.
         Args:
@@ -197,7 +199,14 @@ class HUDDesign:
         console = self._stderr_console if stderr else self._stdout_console
         console.print(f"\n[yellow]💡 Hint: {hint}[/yellow]")
-    def status_item(self, label: str, value: str, status: str = "success", primary: bool = False, stderr: bool = True) -> None:
+    def status_item(
+        self,
+        label: str,
+        value: str,
+        status: str = "success",
+        primary: bool = False,
+        stderr: bool = True,
+    ) -> None:
         """Print a status item with indicator.
         Args:
@@ -211,18 +220,20 @@ class HUDDesign:
             "success": f"[{GREEN} not bold]✓[/{GREEN} not bold]",
             "error": f"[{RED} not bold]✗[/{RED} not bold]",
             "warning": "[yellow]⚠[/yellow]",
-            "info": f"[{DIM}]•[/{DIM}]"
+            "info": f"[{DIM}]•[/{DIM}]",
         }
         indicator = indicators.get(status, indicators["info"])
         console = self._stderr_console if stderr else self._stdout_console
         if primary:
             console.print(f"{indicator} {label}: [bold cyan]{value}[/bold cyan]")
         else:
             console.print(f"{indicator} {label}: {value}")
-    def command_example(self, command: str, description: str | None = None, stderr: bool = True) -> None:
+    def command_example(
+        self, command: str, description: str | None = None, stderr: bool = True
+    ) -> None:
         """Print a command example with cyan highlighting.
         Args:

hud/utils/mcp.py CHANGED Viewed

@@ -23,7 +23,7 @@ def patch_mcp_config(mcp_config: dict[str, dict[str, Any]], patch: MCPConfigPatc
     for server_cfg in mcp_config.values():
         url = server_cfg.get("url", "")
         # 1) HTTP header lane (only for hud MCP servers)
         if hud_mcp_url in url and patch.headers:
             for key, value in patch.headers.items():
@@ -37,9 +37,11 @@ def patch_mcp_config(mcp_config: dict[str, dict[str, Any]], patch: MCPConfigPatc
                 meta.setdefault(key, value)
-def setup_hud_telemetry(mcp_config: dict[str, dict[str, Any]], auto_trace: bool = True) -> Any | None:
+def setup_hud_telemetry(
+    mcp_config: dict[str, dict[str, Any]], auto_trace: bool = True
+) -> Any | None:
     """Setup telemetry for hud servers.
     Returns:
         The auto-created trace context manager if one was created, None otherwise.
         Caller is responsible for exiting the context manager.
@@ -47,12 +49,22 @@ def setup_hud_telemetry(mcp_config: dict[str, dict[str, Any]], auto_trace: bool
     if not mcp_config:
         raise ValueError("Please run initialize() before setting up client-side telemetry")
+    # Check if there are any HUD servers to setup telemetry for
+    hud_mcp_url = settings.hud_mcp_url
+    has_hud_servers = any(
+        hud_mcp_url in server_cfg.get("url", "") for server_cfg in mcp_config.values()
+    )
+    # If no HUD servers, no need for telemetry setup
+    if not has_hud_servers:
+        return None
     from hud.otel import get_current_task_run_id
     from hud.telemetry import trace
     run_id = get_current_task_run_id()
     auto_trace_cm = None
     if not run_id and auto_trace:
         auto_trace_cm = trace("My Trace")
         run_id = auto_trace_cm.__enter__()
@@ -63,5 +75,5 @@ def setup_hud_telemetry(mcp_config: dict[str, dict[str, Any]], auto_trace: bool
             mcp_config,
             MCPConfigPatch(headers={"Run-Id": run_id}, meta={"run_id": run_id}),
         )
     return auto_trace_cm

hud/utils/tests/test_mcp.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""Tests for MCP utility functions."""
+from __future__ import annotations
+import pytest
+from hud.utils.mcp import MCPConfigPatch, patch_mcp_config, setup_hud_telemetry
+class TestPatchMCPConfig:
+    """Tests for patch_mcp_config function."""
+    def test_patch_headers_for_hud_servers(self):
+        """Test patching headers for HUD MCP servers."""
+        from hud.settings import get_settings
+        settings = get_settings()
+        # Create an MCP config with a HUD server URL
+        mcp_config = {"test_server": {"url": f"{settings.hud_mcp_url}/test"}}
+        # Create patch with headers
+        patch = MCPConfigPatch(headers={"X-Test-Header": "test-value"}, meta=None)
+        # Apply patch
+        patch_mcp_config(mcp_config, patch)
+        # Verify headers were added
+        assert "headers" in mcp_config["test_server"]
+        assert mcp_config["test_server"]["headers"]["X-Test-Header"] == "test-value"  # type: ignore[index]
+    def test_patch_headers_preserves_existing(self):
+        """Test that existing headers are preserved."""
+        from hud.settings import get_settings
+        settings = get_settings()
+        # Create config with existing headers
+        mcp_config = {
+            "test_server": {
+                "url": f"{settings.hud_mcp_url}/test",
+                "headers": {"Existing-Header": "existing-value"},
+            }
+        }
+        patch = MCPConfigPatch(
+            headers={"X-Test-Header": "test-value", "Existing-Header": "new-value"},
+            meta=None,
+        )
+        patch_mcp_config(mcp_config, patch)
+        # Existing header should be preserved, new one added
+        assert mcp_config["test_server"]["headers"]["Existing-Header"] == "existing-value"
+        assert mcp_config["test_server"]["headers"]["X-Test-Header"] == "test-value"
+    def test_patch_meta_for_all_servers(self):
+        """Test patching metadata for all servers."""
+        mcp_config = {
+            "server1": {"url": "http://example.com"},
+            "server2": {"url": "http://other.com"},
+        }
+        patch = MCPConfigPatch(headers=None, meta={"test_key": "test_value"})
+        patch_mcp_config(mcp_config, patch)
+        # Meta should be added to both servers
+        assert mcp_config["server1"]["meta"]["test_key"] == "test_value"  # type: ignore[index]
+        assert mcp_config["server2"]["meta"]["test_key"] == "test_value"  # type: ignore[index]
+    def test_patch_meta_preserves_existing(self):
+        """Test that existing meta is preserved."""
+        mcp_config = {
+            "test_server": {"url": "http://example.com", "meta": {"existing_key": "existing_value"}}
+        }
+        patch = MCPConfigPatch(
+            headers=None,
+            meta={"test_key": "test_value", "existing_key": "new_value"},
+        )
+        patch_mcp_config(mcp_config, patch)
+        # Existing meta should be preserved, new one added
+        assert mcp_config["test_server"]["meta"]["existing_key"] == "existing_value"
+        assert mcp_config["test_server"]["meta"]["test_key"] == "test_value"
+class TestSetupHUDTelemetry:
+    """Tests for setup_hud_telemetry function."""
+    def test_empty_config_raises_error(self):
+        """Test that empty config raises ValueError."""
+        with pytest.raises(
+            ValueError, match="Please run initialize\\(\\) before setting up client-side telemetry"
+        ):
+            setup_hud_telemetry({})
+    def test_none_config_raises_error(self):
+        """Test that None config raises ValueError."""
+        with pytest.raises(
+            ValueError, match="Please run initialize\\(\\) before setting up client-side telemetry"
+        ):
+            setup_hud_telemetry(None)  # type: ignore[arg-type]
+    def test_valid_config_returns_none_when_no_hud_servers(self):
+        """Test that valid config with no HUD servers returns None."""
+        mcp_config = {"test_server": {"url": "http://example.com"}}
+        result = setup_hud_telemetry(mcp_config)
+        assert result is None

hud/utils/tests/test_version.py CHANGED Viewed

@@ -5,4 +5,4 @@ def test_import():
     """Test that the package can be imported."""
     import hud
-    assert hud.__version__ == "0.4.11"
+    assert hud.__version__ == "0.4.13"

hud/version.py CHANGED Viewed

@@ -4,4 +4,4 @@ Version information for the HUD SDK.
 from __future__ import annotations
-__version__ = "0.4.11"
+__version__ = "0.4.13"

{hud_python-0.4.11.dist-info → hud_python-0.4.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.4.11
+Version: 0.4.13
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
@@ -35,10 +35,9 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Requires-Python: <3.14,>=3.11
-Requires-Dist: fastmcp>=2.11.2
 Requires-Dist: httpx<1,>=0.23.0
-Requires-Dist: hud-mcp-python-sdk>=0.1.0
-Requires-Dist: mcp>=1.13.1
+Requires-Dist: hud-fastmcp-python-sdk>=0.1.2
+Requires-Dist: hud-mcp-python-sdk>=3.13.2
 Requires-Dist: opentelemetry-api>=1.34.1
 Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
 Requires-Dist: opentelemetry-instrumentation-mcp>=0.44.1
@@ -56,7 +55,11 @@ Provides-Extra: agent
 Requires-Dist: anthropic; extra == 'agent'
 Requires-Dist: datasets>=2.14.0; extra == 'agent'
 Requires-Dist: dotenv>=0.9.9; extra == 'agent'
-Requires-Dist: hud-mcp-use-python-sdk>=0.1.0; extra == 'agent'
+Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agent'
+Requires-Dist: ipykernel; extra == 'agent'
+Requires-Dist: ipython<9; extra == 'agent'
+Requires-Dist: jupyter-client; extra == 'agent'
+Requires-Dist: jupyter-core; extra == 'agent'
 Requires-Dist: langchain; extra == 'agent'
 Requires-Dist: langchain-anthropic; extra == 'agent'
 Requires-Dist: langchain-openai; extra == 'agent'
@@ -66,7 +69,11 @@ Provides-Extra: agents
 Requires-Dist: anthropic; extra == 'agents'
 Requires-Dist: datasets>=2.14.0; extra == 'agents'
 Requires-Dist: dotenv>=0.9.9; extra == 'agents'
-Requires-Dist: hud-mcp-use-python-sdk>=0.1.0; extra == 'agents'
+Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'agents'
+Requires-Dist: ipykernel; extra == 'agents'
+Requires-Dist: ipython<9; extra == 'agents'
+Requires-Dist: jupyter-client; extra == 'agents'
+Requires-Dist: jupyter-core; extra == 'agents'
 Requires-Dist: langchain; extra == 'agents'
 Requires-Dist: langchain-anthropic; extra == 'agents'
 Requires-Dist: langchain-openai; extra == 'agents'
@@ -77,7 +84,7 @@ Requires-Dist: aiodocker>=0.24.0; extra == 'dev'
 Requires-Dist: anthropic; extra == 'dev'
 Requires-Dist: datasets>=2.14.0; extra == 'dev'
 Requires-Dist: dotenv>=0.9.9; extra == 'dev'
-Requires-Dist: hud-mcp-use-python-sdk>=0.1.0; extra == 'dev'
+Requires-Dist: hud-mcp-use-python-sdk>=2.3.13; extra == 'dev'
 Requires-Dist: inspect-ai>=0.3.80; extra == 'dev'
 Requires-Dist: ipykernel; extra == 'dev'
 Requires-Dist: ipython<9; extra == 'dev'
@@ -233,7 +240,7 @@ Any hud MCP environment and evaluation works with our RL pipeline. Even our remo
 This is Claude Computer Use running on our proprietary financial analyst benchmark [SheetBench-50](https://huggingface.co/datasets/hud-evals/SheetBench-50):
-![Trace screenshot](https://raw.githubusercontent.com/hud-evals/hud-python/l/text-2048/docs/src/images/trace_sheet.gif)
+![Trace screenshot](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
 > [See this trace on _app.hud.so_](https://app.hud.so/trace/9e212e9e-3627-4f1f-9eb5-c6d03c59070a)
@@ -385,7 +392,7 @@ result = await ClaudeAgent().run({  # See all agents: https://docs.hud.so/refere
 All leaderboards are publicly available on [app.hud.so/leaderboards](https://app.hud.so/leaderboards) (see [docs](https://docs.hud.so/evaluate-agents/leaderboards))
-![Leaderboard](https://raw.githubusercontent.com/hud-evals/hud-python/l/text-2048/docs/src/images/leaderboards_2.png)
+![Leaderboard](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/leaderboards_3.png)
 We highly suggest running 3-5 evaluations per dataset for the most consistent results across multiple jobs.
@@ -430,10 +437,6 @@ graph LR
     Trace --> Dashboard
     AnyMCP -->|"MCP"| API
-    style Dashboard fill:#e0e7ff,stroke:#6366f1,stroke-width:2px
-    style SDK fill:#fef3c7,stroke:#f59e0b,stroke-width:2px
-    style RemoteEnv fill:#d1fae5,stroke:#10b981,stroke-width:2px
-    style AnyMCP fill:#fce7f3,stroke:#ec4899,stroke-width:2px,stroke-dasharray: 5 5
 ```
 ## CLI reference

hud-python 0.4.11__py3-none-any.whl → 0.4.13__py3-none-any.whl

Potentially problematic release.

hud-python 0.4.11py3-none-any.whl → 0.4.13py3-none-any.whl