PyPI - hud-python - Versions diffs - 0.5.7__tar.gz → 0.5.9__tar.gz - Mend

hud-python 0.5.7tar.gz → 0.5.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (305) hide show

{hud_python-0.5.7 → hud_python-0.5.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.5.7
+Version: 0.5.9
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues

{hud_python-0.5.7 → hud_python-0.5.9}/hud/cli/__init__.py RENAMED Viewed

@@ -757,7 +757,8 @@ def build(
         hud build environments/text_2048 -e API_KEY=secret
         hud build . --tag my-env:v1.0 -e VAR1=value1 -e VAR2=value2
         hud build . --no-cache       # Force rebuild
-        hud build . --remote-cache my-cache-repo   # Use ECR remote cache (requires AWS_ACCOUNT_ID and AWS_DEFAULT_REGION)[/not dim]
+        hud build . --remote-cache my-cache-repo   # Use ECR remote cache (requires AWS_ACCOUNT_ID and AWS_DEFAULT_REGION)
+        hud build . --build-arg NODE_ENV=production  # Pass Docker build args[/not dim]
     """  # noqa: E501
     # Parse directory and extra arguments
     if params:
@@ -767,8 +768,9 @@ def build(
         directory = "."
         extra_args = []
-    # Parse environment variables from extra args
+    # Parse environment variables and build args from extra args
     env_vars = {}
+    build_args = {}
     i = 0
     while i < len(extra_args):
         if extra_args[i] == "-e" and i + 1 < len(extra_args):
@@ -792,10 +794,26 @@ def build(
                 key, value = env_arg.split("=", 1)
                 env_vars[key] = value
             i += 2
+        elif extra_args[i] == "--build-arg" and i + 1 < len(extra_args):
+            # Parse --build-arg KEY=VALUE format
+            build_arg = extra_args[i + 1]
+            if "=" in build_arg:
+                key, value = build_arg.split("=", 1)
+                build_args[key] = value
+            i += 2
+        elif extra_args[i].startswith("--build-arg="):
+            # Parse --build-arg=KEY=VALUE format
+            build_arg = extra_args[i][12:]  # Remove --build-arg=
+            if "=" in build_arg:
+                key, value = build_arg.split("=", 1)
+                build_args[key] = value
+            i += 1
         else:
             i += 1
-    build_command(directory, tag, no_cache, verbose, env_vars, platform, remote_cache)
+    build_command(
+        directory, tag, no_cache, verbose, env_vars, platform, remote_cache, build_args or None
+    )
 @app.command()

{hud_python-0.5.7 → hud_python-0.5.9}/hud/cli/build.py RENAMED Viewed

@@ -651,10 +651,12 @@ def build_environment(
     env_vars: dict[str, str] | None = None,
     platform: str | None = None,
     remote_cache: str | None = None,
+    build_args: dict[str, str] | None = None,
 ) -> None:
     """Build a HUD environment and generate lock file."""
     hud_console = HUDConsole()
     env_vars = env_vars or {}
+    build_args = build_args or {}
     hud_console.header("HUD Environment Build")
     # Resolve directory
@@ -721,7 +723,7 @@ def build_environment(
         temp_tag,
         no_cache,
         verbose,
-        build_args=None,
+        build_args=build_args or None,
         platform=platform,
         remote_cache=remote_cache,
     ):
@@ -1002,6 +1004,10 @@ def build_environment(
     if image_tag and image_tag not in [version_tag, latest_tag]:
         label_cmd.extend(["-t", image_tag])
+    # Add build args to final image build (same as initial build)
+    for key, value in build_args.items():
+        label_cmd.extend(["--build-arg", f"{key}={value}"])
     label_cmd.append(str(env_dir))
     # Run rebuild using Docker's native output formatting
@@ -1106,6 +1112,9 @@ def build_command(
     env_vars: dict[str, str] | None = None,
     platform: str | None = None,
     remote_cache: str | None = None,
+    build_args: dict[str, str] | None = None,
 ) -> None:
     """Build a HUD environment and generate lock file."""
-    build_environment(directory, tag, no_cache, verbose, env_vars, platform, remote_cache)
+    build_environment(
+        directory, tag, no_cache, verbose, env_vars, platform, remote_cache, build_args
+    )

{hud_python-0.5.7 → hud_python-0.5.9}/hud/clients/mcp_use.py RENAMED Viewed

@@ -64,9 +64,14 @@ class MCPUseHUDClient(BaseHUDClient):
             return
         # Use configurable timeout for SSE read operations to support long-running tool calls.
+        max_request_timeout = 840
         for server_cfg in mcp_config.values():
             if "sse_read_timeout" not in server_cfg:
-                server_cfg["sse_read_timeout"] = settings.client_timeout
+                server_cfg["sse_read_timeout"] = (
+                    min(settings.client_timeout, max_request_timeout)
+                    if settings.client_timeout > 0
+                    else max_request_timeout
+                )
         # If a server target matches HUD's MCP host and no auth is provided,
         # inject the HUD API key as a Bearer token to avoid OAuth browser flow.

{hud_python-0.5.7 → hud_python-0.5.9}/hud/datasets/loader.py RENAMED Viewed

@@ -14,6 +14,10 @@ import warnings
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, overload
+import httpx
+from hud.settings import settings
 if TYPE_CHECKING:
     from hud.eval.task import Task
@@ -106,10 +110,6 @@ def _load_from_huggingface(dataset_name: str) -> list[Task]:
 def _load_raw_from_api(dataset_name: str) -> list[dict[str, Any]]:
     """Load raw task dicts from HUD API."""
-    import httpx
-    from hud.settings import settings
     headers = {}
     if settings.api_key:
         headers["Authorization"] = f"Bearer {settings.api_key}"
@@ -271,10 +271,6 @@ def save_tasks(
         TypeError: If any task is not a v5 Task object (must have 'scenario')
         ValueError: If API key is not set or save fails
     """
-    import httpx
-    from hud.settings import settings
     if not settings.api_key:
         raise ValueError("HUD_API_KEY is required to save tasks")

{hud_python-0.5.7 → hud_python-0.5.9}/hud/datasets/tests/test_loader.py RENAMED Viewed

@@ -12,8 +12,8 @@ from hud.datasets.loader import load_tasks
 class TestLoadTasks:
     """Tests for load_tasks() function."""
-    @patch("httpx.Client")
-    @patch("hud.settings.settings")
+    @patch("hud.datasets.loader.httpx.Client")
+    @patch("hud.datasets.loader.settings")
     def test_load_tasks_success(
         self, mock_settings: MagicMock, mock_client_class: MagicMock
     ) -> None:
@@ -62,8 +62,8 @@ class TestLoadTasks:
             params={"all": "true"},
         )
-    @patch("httpx.Client")
-    @patch("hud.settings.settings")
+    @patch("hud.datasets.loader.httpx.Client")
+    @patch("hud.datasets.loader.settings")
     def test_load_tasks_single_task(
         self, mock_settings: MagicMock, mock_client_class: MagicMock
     ) -> None:
@@ -97,8 +97,8 @@ class TestLoadTasks:
         assert tasks[0].scenario == "checkout"
         assert tasks[0].id == "task-1"
-    @patch("httpx.Client")
-    @patch("hud.settings.settings")
+    @patch("hud.datasets.loader.httpx.Client")
+    @patch("hud.datasets.loader.settings")
     def test_load_tasks_no_api_key(
         self, mock_settings: MagicMock, mock_client_class: MagicMock
     ) -> None:
@@ -129,8 +129,8 @@ class TestLoadTasks:
             params={"all": "true"},
         )
-    @patch("httpx.Client")
-    @patch("hud.settings.settings")
+    @patch("hud.datasets.loader.httpx.Client")
+    @patch("hud.datasets.loader.settings")
     def test_load_tasks_http_error(
         self, mock_settings: MagicMock, mock_client_class: MagicMock
     ) -> None:
@@ -149,8 +149,8 @@ class TestLoadTasks:
         with pytest.raises(ValueError, match="Failed to load tasks"):
             load_tasks("test-org/test-dataset")
-    @patch("httpx.Client")
-    @patch("hud.settings.settings")
+    @patch("hud.datasets.loader.httpx.Client")
+    @patch("hud.datasets.loader.settings")
     def test_load_tasks_json_error(
         self, mock_settings: MagicMock, mock_client_class: MagicMock
     ) -> None:
@@ -171,8 +171,8 @@ class TestLoadTasks:
         with pytest.raises(ValueError, match="Failed to load tasks"):
             load_tasks("test-org/test-dataset")
-    @patch("httpx.Client")
-    @patch("hud.settings.settings")
+    @patch("hud.datasets.loader.httpx.Client")
+    @patch("hud.datasets.loader.settings")
     def test_load_tasks_empty(self, mock_settings: MagicMock, mock_client_class: MagicMock) -> None:
         """load_tasks() handles empty dataset."""
         mock_settings.hud_api_url = "https://api.hud.ai"
@@ -192,8 +192,8 @@ class TestLoadTasks:
         assert len(tasks) == 0
-    @patch("httpx.Client")
-    @patch("hud.settings.settings")
+    @patch("hud.datasets.loader.httpx.Client")
+    @patch("hud.datasets.loader.settings")
     def test_load_tasks_missing_fields(
         self, mock_settings: MagicMock, mock_client_class: MagicMock
     ) -> None:

{hud_python-0.5.7 → hud_python-0.5.9}/hud/environment/connection.py RENAMED Viewed

@@ -120,8 +120,10 @@ class Connector:
         """
         from fastmcp.client import Client as FastMCPClient
-        # Create fresh client from stored transport config
-        self.client = FastMCPClient(transport=self._transport, auth=self._auth)
+        self.client = FastMCPClient(
+            transport=self._transport,
+            auth=self._auth,
+        )
         await self.client.__aenter__()
     async def disconnect(self) -> None:
@@ -198,10 +200,21 @@ class Connector:
         Always fetches fresh data from the server (no caching check).
         The result is cached for use by router.build_resources() via cached_resources property.
+        Note: resources/list is optional in the MCP spec. If the server doesn't
+        implement it, we return an empty list gracefully.
         """
         if self.client is None:
             raise RuntimeError("Not connected - call connect() first")
-        self._resources_cache = await self.client.list_resources()
+        try:
+            self._resources_cache = await self.client.list_resources()
+        except Exception as e:
+            # Handle servers that don't implement resources/list (optional in MCP spec)
+            if "Method not found" in str(e):
+                logger.debug("Server %s does not support resources/list", self.name)
+                self._resources_cache = []
+            else:
+                raise
         return self._resources_cache
     async def list_prompts(self) -> list[mcp_types.Prompt]:
@@ -209,10 +222,21 @@ class Connector:
         Always fetches fresh data from the server (no caching check).
         The result is cached for use by router.build_prompts() via cached_prompts property.
+        Note: prompts/list is optional in the MCP spec. If the server doesn't
+        implement it, we return an empty list gracefully.
         """
         if self.client is None:
             raise RuntimeError("Not connected - call connect() first")
-        self._prompts_cache = await self.client.list_prompts()
+        try:
+            self._prompts_cache = await self.client.list_prompts()
+        except Exception as e:
+            # Handle servers that don't implement prompts/list (optional in MCP spec)
+            if "Method not found" in str(e):
+                logger.debug("Server %s does not support prompts/list", self.name)
+                self._prompts_cache = []
+            else:
+                raise
         return self._prompts_cache
     async def read_resource(

{hud_python-0.5.7 → hud_python-0.5.9}/hud/environment/connectors/mcp_config.py RENAMED Viewed

@@ -50,6 +50,7 @@ class MCPConfigConnectorMixin(BaseConnectorMixin):
             ```
         """
         from hud.environment.connection import ConnectionType
+        from hud.settings import settings
         name = alias or next(iter(config.keys()), "mcp")
         server_config = next(iter(config.values()), {})
@@ -57,9 +58,20 @@ class MCPConfigConnectorMixin(BaseConnectorMixin):
         is_local = "command" in server_config or "args" in server_config
         conn_type = ConnectionType.LOCAL if is_local else ConnectionType.REMOTE
+        transport: Any = config
+        if not is_local and "url" in server_config:
+            max_request_timeout = 840
+            server_config.setdefault(
+                "sse_read_timeout",
+                min(settings.client_timeout, max_request_timeout)
+                if settings.client_timeout > 0
+                else max_request_timeout,
+            )
+            transport = _build_transport(server_config)
         return self._add_connection(
             name,
-            config,
+            transport,
             connection_type=conn_type,
             prefix=prefix,
             include=include,
@@ -107,3 +119,19 @@ class MCPConfigConnectorMixin(BaseConnectorMixin):
         for server_name, server_config in mcp_config.items():
             self.connect_mcp({server_name: server_config}, alias=server_name, **kwargs)
         return self
+def _build_transport(server_config: dict[str, Any]) -> Any:
+    from fastmcp.client.transports import SSETransport, StreamableHttpTransport
+    from fastmcp.mcp_config import infer_transport_type_from_url
+    url = server_config["url"]
+    transport_type = server_config.get("transport") or infer_transport_type_from_url(url)
+    transport_cls = SSETransport if transport_type == "sse" else StreamableHttpTransport
+    return transport_cls(
+        url=url,
+        headers=server_config.get("headers"),
+        auth=server_config.get("auth"),
+        sse_read_timeout=server_config.get("sse_read_timeout"),
+    )

{hud_python-0.5.7 → hud_python-0.5.9}/hud/environment/environment.py RENAMED Viewed

@@ -189,18 +189,25 @@ class Environment(
         """Return tools in MCP format (base format).
         Applies agent-level include/exclude filtering if set.
+        Supports fnmatch-style wildcards (e.g., "*setup*", "browser_*").
         """
+        import fnmatch
         tools = self._router.tools
         # Apply agent-level filtering (from v4 allowed_tools/disallowed_tools)
         if self._agent_include is not None or self._agent_exclude is not None:
             filtered = []
             for tool in tools:
-                # Include filter: None means include all
-                if self._agent_include is not None and tool.name not in self._agent_include:
+                # Include filter: None means include all, check if matches any pattern
+                if self._agent_include is not None and not any(
+                    fnmatch.fnmatch(tool.name, pattern) for pattern in self._agent_include
+                ):
                     continue
-                # Exclude filter
-                if self._agent_exclude is not None and tool.name in self._agent_exclude:
+                # Exclude filter: skip if tool matches any exclude pattern
+                if self._agent_exclude is not None and any(
+                    fnmatch.fnmatch(tool.name, pattern) for pattern in self._agent_exclude
+                ):
                     continue
                 filtered.append(tool)
             return filtered

{hud_python-0.5.7 → hud_python-0.5.9}/hud/environment/tests/test_connectors.py RENAMED Viewed

@@ -3,7 +3,7 @@
 from __future__ import annotations
 from typing import Any
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 from hud.environment.connection import ConnectionType, Connector
@@ -180,39 +180,26 @@ class TestRemoteConnectorMixin:
         conn = env._connections["example"]
         assert conn._auth == "Bearer my-token"
-    @patch("httpx.Client")
-    def test_connect_hub_fetches_config(self, mock_httpx_cls: MagicMock) -> None:
-        """connect_hub fetches mcp_config from API."""
+    def test_connect_hub_creates_connection(self) -> None:
+        """connect_hub creates connection with correct config."""
         from hud.environment.connectors.remote import RemoteConnectorMixin
         class TestEnv(RemoteConnectorMixin):
             def __init__(self) -> None:
                 self._connections: dict[str, Connector] = {}
+                self._hub_config: dict[str, Any] | None = None
             def mount(self, server: Any, *, prefix: str | None = None) -> None:
                 pass
-        # Mock httpx response
-        mock_response = MagicMock()
-        mock_response.json.return_value = {
-            "mcp_config": {
-                "browser": {"url": "https://mcp.hud.ai/browser"},
-            }
-        }
-        mock_response.raise_for_status = MagicMock()
-        mock_client = MagicMock()
-        mock_client.get.return_value = mock_response
-        mock_client.__enter__ = MagicMock(return_value=mock_client)
-        mock_client.__exit__ = MagicMock(return_value=None)
-        mock_httpx_cls.return_value = mock_client
         env = TestEnv()
         with patch("hud.settings.settings") as mock_settings:
-            mock_settings.hud_api_url = "https://api.hud.so"
-            mock_settings.api_key = "test-key"
+            mock_settings.hud_mcp_url = "https://mcp.hud.ai"
+            mock_settings.client_timeout = 300  # Used in connect_mcp for sse_read_timeout
-            env.connect_hub("hud/browser")
+            env.connect_hub("browser")
-        # connect_hub creates a connection named "hud" (the server name)
+        # connect_hub creates a connection named "hud" (from mcp_config key)
         assert "hud" in env._connections
+        # Verify hub config is stored for serialization
+        assert env._hub_config == {"name": "browser"}

{hud_python-0.5.7 → hud_python-0.5.9}/hud/environment/tests/test_environment.py RENAMED Viewed

@@ -343,3 +343,251 @@ class TestEnvironmentMCPProtocol:
         assert hasattr(env, "_env_call_tool")
         assert callable(env._env_list_tools)
         assert callable(env._env_call_tool)
+class TestEnvironmentToolFiltering:
+    """Tests for agent-level tool filtering with wildcard support (v4 backwards compat)."""
+    @pytest.mark.asyncio
+    async def test_as_tools_no_filter(self) -> None:
+        """as_tools returns all tools when no filter is set."""
+        from hud.environment import Environment
+        env = Environment("test")
+        @env.tool()
+        def tool_a() -> str:
+            """Tool A."""
+            return "a"
+        @env.tool()
+        def tool_b() -> str:
+            """Tool B."""
+            return "b"
+        await env._build_routing()
+        tools = env.as_tools()
+        tool_names = [t.name for t in tools]
+        assert "tool_a" in tool_names
+        assert "tool_b" in tool_names
+    @pytest.mark.asyncio
+    async def test_as_tools_exact_include(self) -> None:
+        """as_tools filters with exact include list."""
+        from hud.environment import Environment
+        env = Environment("test")
+        @env.tool()
+        def tool_a() -> str:
+            """Tool A."""
+            return "a"
+        @env.tool()
+        def tool_b() -> str:
+            """Tool B."""
+            return "b"
+        env._agent_include = ["tool_a"]
+        await env._build_routing()
+        tools = env.as_tools()
+        tool_names = [t.name for t in tools]
+        assert "tool_a" in tool_names
+        assert "tool_b" not in tool_names
+    @pytest.mark.asyncio
+    async def test_as_tools_exact_exclude(self) -> None:
+        """as_tools filters with exact exclude list."""
+        from hud.environment import Environment
+        env = Environment("test")
+        @env.tool()
+        def tool_a() -> str:
+            """Tool A."""
+            return "a"
+        @env.tool()
+        def tool_b() -> str:
+            """Tool B."""
+            return "b"
+        env._agent_exclude = ["tool_a"]
+        await env._build_routing()
+        tools = env.as_tools()
+        tool_names = [t.name for t in tools]
+        assert "tool_a" not in tool_names
+        assert "tool_b" in tool_names
+    @pytest.mark.asyncio
+    async def test_as_tools_wildcard_exclude_prefix(self) -> None:
+        """as_tools filters with wildcard prefix pattern (e.g., 'setup_*')."""
+        from hud.environment import Environment
+        env = Environment("test")
+        @env.tool()
+        def setup_database() -> str:
+            """Setup tool."""
+            return "setup"
+        @env.tool()
+        def setup_user() -> str:
+            """Another setup tool."""
+            return "setup"
+        @env.tool()
+        def run_query() -> str:
+            """Regular tool."""
+            return "query"
+        env._agent_exclude = ["setup_*"]
+        await env._build_routing()
+        tools = env.as_tools()
+        tool_names = [t.name for t in tools]
+        assert "setup_database" not in tool_names
+        assert "setup_user" not in tool_names
+        assert "run_query" in tool_names
+    @pytest.mark.asyncio
+    async def test_as_tools_wildcard_exclude_contains(self) -> None:
+        """as_tools filters with wildcard contains pattern (e.g., '*setup*')."""
+        from hud.environment import Environment
+        env = Environment("test")
+        @env.tool()
+        def hud_setup() -> str:
+            """Contains setup."""
+            return "setup"
+        @env.tool()
+        def setup_env() -> str:
+            """Starts with setup."""
+            return "setup"
+        @env.tool()
+        def my_setup_tool() -> str:
+            """Contains setup in middle."""
+            return "setup"
+        @env.tool()
+        def run_query() -> str:
+            """No setup in name."""
+            return "query"
+        env._agent_exclude = ["*setup*"]
+        await env._build_routing()
+        tools = env.as_tools()
+        tool_names = [t.name for t in tools]
+        assert "hud_setup" not in tool_names
+        assert "setup_env" not in tool_names
+        assert "my_setup_tool" not in tool_names
+        assert "run_query" in tool_names
+    @pytest.mark.asyncio
+    async def test_as_tools_multiple_wildcard_patterns(self) -> None:
+        """as_tools filters with multiple wildcard patterns."""
+        from hud.environment import Environment
+        env = Environment("test")
+        @env.tool()
+        def setup_db() -> str:
+            """Setup tool."""
+            return "setup"
+        @env.tool()
+        def evaluate_result() -> str:
+            """Evaluate tool."""
+            return "evaluate"
+        @env.tool()
+        def checkout_branch() -> str:
+            """Checkout tool."""
+            return "checkout"
+        @env.tool()
+        def run_query() -> str:
+            """Regular tool."""
+            return "query"
+        env._agent_exclude = ["*setup*", "*evaluate*", "checkout_branch"]
+        await env._build_routing()
+        tools = env.as_tools()
+        tool_names = [t.name for t in tools]
+        assert "setup_db" not in tool_names
+        assert "evaluate_result" not in tool_names
+        assert "checkout_branch" not in tool_names
+        assert "run_query" in tool_names
+    @pytest.mark.asyncio
+    async def test_as_tools_wildcard_include_all(self) -> None:
+        """as_tools with ['*'] include pattern matches all tools."""
+        from hud.environment import Environment
+        env = Environment("test")
+        @env.tool()
+        def tool_a() -> str:
+            """Tool A."""
+            return "a"
+        @env.tool()
+        def tool_b() -> str:
+            """Tool B."""
+            return "b"
+        env._agent_include = ["*"]
+        await env._build_routing()
+        tools = env.as_tools()
+        tool_names = [t.name for t in tools]
+        assert "tool_a" in tool_names
+        assert "tool_b" in tool_names
+    @pytest.mark.asyncio
+    async def test_as_tools_include_and_exclude_combined(self) -> None:
+        """as_tools applies both include and exclude filters."""
+        from hud.environment import Environment
+        env = Environment("test")
+        @env.tool()
+        def browser_navigate() -> str:
+            """Browser tool."""
+            return "nav"
+        @env.tool()
+        def browser_setup() -> str:
+            """Browser setup - should be excluded."""
+            return "setup"
+        @env.tool()
+        def file_read() -> str:
+            """File tool - not included."""
+            return "read"
+        env._agent_include = ["browser_*"]
+        env._agent_exclude = ["*setup*"]
+        await env._build_routing()
+        tools = env.as_tools()
+        tool_names = [t.name for t in tools]
+        assert "browser_navigate" in tool_names
+        assert "browser_setup" not in tool_names  # Excluded by *setup*
+        assert "file_read" not in tool_names  # Not included by browser_*

hud-python 0.5.7__tar.gz → 0.5.9__tar.gz

hud-python 0.5.7tar.gz → 0.5.9tar.gz