PyPI - hud-python - Versions diffs - 0.5.40__tar.gz → 0.5.41__tar.gz - Mend

hud-python 0.5.40tar.gz → 0.5.41tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (357) hide show

{hud_python-0.5.40 → hud_python-0.5.41}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hud-python
-Version: 0.5.40
+Version: 0.5.41
 Summary: SDK for the HUD platform.
 Project-URL: Homepage, https://github.com/hud-evals/hud-python
 Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues

{hud_python-0.5.40 → hud_python-0.5.41}/hud/cli/init.py RENAMED Viewed

@@ -12,6 +12,8 @@ import httpx
 import questionary
 import typer
+from hud.cli.utils.api import hud_headers
+from hud.settings import settings
 from hud.utils.hud_console import HUDConsole
 # Presets mapping to public GitHub repositories under hud-evals org
@@ -22,6 +24,8 @@ PRESET_MAP: dict[str, str | None] = {
     "blank": "hud-blank",
     "deep-research": "hud-deepresearch",
     "browser": "hud-browser",
+    "remote-browser": "hud-remote-browser",
+    "coding": "coding-template",
     "rubrics": "hud-rubrics",
     "verilog-coding-template": "verilog-coding-template",
     "data-science-template": "data-science-template",
@@ -86,34 +90,53 @@ def _replace_placeholders(target_dir: Path, env_name: str) -> list[str]:
     return modified_files
-def _prompt_for_preset() -> str | None:
+def _fetch_available_templates() -> tuple[list[dict], list[dict]]:
+    """Fetch available templates from the HUD API.
+    Returns (public_templates, private_templates). Falls back to empty
+    private list if the API is unreachable or the user has no API key.
+    """
+    if not settings.api_key:
+        return [], []
+    try:
+        with httpx.Client(timeout=10) as client:
+            resp = client.get(
+                f"{settings.hud_api_url}/templates/available",
+                headers=hud_headers(),
+            )
+            if resp.status_code != 200:
+                return [], []
+            data = resp.json()
+            return data.get("public_templates", []), data.get("private_templates", [])
+    except Exception:
+        return [], []
+def _prompt_for_preset() -> tuple[str, bool] | None:
     """Ask the user to choose a preset when not provided.
-    Returns None if the user cancels the selection.
+    Returns (preset_id, is_private) or None if the user cancels.
     """
+    # Fetch private templates from API
+    _, private_templates = _fetch_available_templates()
     try:
-        choices = [
-            {"name": "blank", "message": "blank"},
-            {"name": "browser", "message": "browser"},
-            {"name": "deep-research", "message": "deep-research"},
-            {"name": "rubrics", "message": "rubrics"},
-            {"name": "verilog-coding-template", "message": "verilog-coding-template"},
-            {"name": "data-science-template", "message": "data-science-template"},
+        choices = [questionary.Choice(title=key, value=(key, False)) for key in PRESET_MAP] + [
+            questionary.Choice(title=t["id"], value=(t["id"], True)) for t in private_templates
         ]
-        display_choices = [c["message"] for c in choices]
         selected = questionary.select(
-            "Choose a preset", choices=display_choices, default=display_choices[0]
+            "Choose a preset",
+            choices=choices,
         ).ask()
         if not selected:
             return None  # User cancelled
-        for c in choices:
-            if c["message"] == selected:
-                return c["name"]
-        return "blank"
+        return selected
     except KeyboardInterrupt:
         return None  # User pressed Ctrl+C
     except Exception:
-        return "blank"
+        return ("blank", False)
 def _download_tarball_repo(
@@ -142,6 +165,32 @@ def _download_tarball_repo(
                 tmp_file.write(chunk)
         tmp_path = Path(tmp_file.name)
+    _extract_tarball(tmp_path, dest_dir, files_created)
+def _download_private_template(template_id: str, dest_dir: Path, files_created: list[str]) -> None:
+    """Download a private template tarball from the HUD API."""
+    url = f"{settings.hud_api_url}/templates/private/{template_id}/download"
+    with (
+        tempfile.NamedTemporaryFile(delete=False) as tmp_file,
+        httpx.Client(timeout=120) as client,
+        client.stream("GET", url, headers=hud_headers()) as resp,
+    ):
+        if resp.status_code == 403:
+            raise RuntimeError("Access denied: your team does not have access to this template.")
+        if resp.status_code != 200:
+            raise RuntimeError(f"Failed to download private template (HTTP {resp.status_code})")
+        for chunk in resp.iter_bytes():
+            if chunk:
+                tmp_file.write(chunk)
+        tmp_path = Path(tmp_file.name)
+    _extract_tarball(tmp_path, dest_dir, files_created)
+def _extract_tarball(tmp_path: Path, dest_dir: Path, files_created: list[str]) -> None:
+    """Extract a tarball into dest_dir, stripping the top-level directory."""
     try:
         with tarfile.open(tmp_path, mode="r:gz") as tar:
             members = tar.getmembers()
@@ -191,15 +240,26 @@ def create_environment(
     hud_console = HUDConsole()
+    is_private = False
     # Choose preset
     if preset:
-        preset_normalized = preset.strip().lower()
+        preset_stripped = preset.strip()
+        preset_normalized = preset_stripped.lower()
+        # Check if the preset matches a private template (case-insensitive)
+        _, private_templates = _fetch_available_templates()
+        for t in private_templates:
+            if t["id"].lower() == preset_normalized:
+                # Preserve the original API ID for case-sensitive downstream use
+                preset_normalized = t["id"]
+                is_private = True
+                break
     else:
         preset_result = _prompt_for_preset()
         if preset_result is None:
             # User cancelled the selection
             raise typer.Exit(0)
-        preset_normalized = preset_result
+        preset_normalized, is_private = preset_result
     # If no name is provided, use the preset name as the environment name
     if name is None:
@@ -209,7 +269,7 @@ def create_environment(
     # Always create a new directory based on the name
     target_dir = Path.cwd() / name if directory == "." else Path(directory) / name
-    if preset_normalized not in PRESET_MAP:
+    if not is_private and preset_normalized not in PRESET_MAP:
         available = ", ".join(sorted(PRESET_MAP.keys()))
         hud_console.warning(
             f"Unknown preset '{preset_normalized}', defaulting to 'blank' (available: {available})"
@@ -225,32 +285,45 @@ def create_environment(
         else:
             hud_console.warning(f"Overwriting existing files in {target_dir}")
-    # Download preset from GitHub
-    repo_name = PRESET_MAP[preset_normalized]
-    if repo_name is None:
-        hud_console.error("Internal error: preset mapping missing repo name")
-        raise typer.Exit(1)
     hud_console.header(f"Initializing HUD Environment: {name} (preset: {preset_normalized})")
-    hud_console.section_title("Downloading template from GitHub")
-    source_url = f"https://github.com/{GITHUB_OWNER}/{repo_name}"
-    hud_console.info("Source: " + source_url)
     target_dir.mkdir(parents=True, exist_ok=True)
     started = time.time()
     files_created_dl: list[str] = []
-    try:
-        _download_tarball_repo(
-            owner=GITHUB_OWNER,
-            repo=repo_name,
-            ref=GITHUB_BRANCH,
-            dest_dir=target_dir,
-            files_created=files_created_dl,
-        )
-    except Exception as e:
-        hud_console.error(f"Failed to download preset '{preset_normalized}': {e}")
-        raise typer.Exit(1) from None
+    if is_private:
+        hud_console.section_title("Downloading private template from HUD")
+        try:
+            _download_private_template(
+                template_id=preset_normalized,
+                dest_dir=target_dir,
+                files_created=files_created_dl,
+            )
+        except Exception as e:
+            hud_console.error(f"Failed to download private template '{preset_normalized}': {e}")
+            raise typer.Exit(1) from None
+    else:
+        # Download preset from GitHub
+        repo_name = PRESET_MAP[preset_normalized]
+        if repo_name is None:
+            hud_console.error("Internal error: preset mapping missing repo name")
+            raise typer.Exit(1)
+        hud_console.section_title("Downloading template from GitHub")
+        source_url = f"https://github.com/{GITHUB_OWNER}/{repo_name}"
+        hud_console.info("Source: " + source_url)
+        try:
+            _download_tarball_repo(
+                owner=GITHUB_OWNER,
+                repo=repo_name,
+                ref=GITHUB_BRANCH,
+                dest_dir=target_dir,
+                files_created=files_created_dl,
+            )
+        except Exception as e:
+            hud_console.error(f"Failed to download preset '{preset_normalized}': {e}")
+            raise typer.Exit(1) from None
     duration_ms = int((time.time() - started) * 1000)
     hud_console.success(
@@ -258,7 +331,7 @@ def create_environment(
     )
     # Replace placeholders in template files (only for blank preset)
-    if preset_normalized == "blank":
+    if preset_normalized == "blank" and not is_private:
         hud_console.section_title("Customizing template files")
         modified_files = _replace_placeholders(target_dir, name)
         if modified_files:

{hud_python-0.5.40 → hud_python-0.5.41}/hud/environment/environment.py RENAMED Viewed

@@ -195,6 +195,58 @@ class Environment(
     # Core Methods
     # =========================================================================
+    def _filtered_tools_for_session(self, session: Any) -> list[mcp_types.Tool]:
+        """Apply scenario-level tool filtering for a given session.
+        Filters in order:
+        1. exclude_sources: remove tools from excluded connections
+        2. exclude_tools: remove tools matching fnmatch patterns
+        3. allowed_tools: rescue specific tools back from exclusions
+        Does NOT apply agent-level filtering (_agent_include/_agent_exclude).
+        Args:
+            session: The ScenarioSession to filter for, or None (no filtering).
+        Returns:
+            List of tools visible under the session's exclusions.
+        """
+        import fnmatch
+        tools = self._router.tools
+        if not session:
+            return tools
+        excluded_sources = set(session.exclude_sources) if session.exclude_sources else None
+        excluded_patterns = session.exclude_tools
+        if excluded_sources or excluded_patterns:
+            filtered = []
+            for tool in tools:
+                if excluded_sources:
+                    source = self._router._tool_routing.get(tool.name, "")
+                    if source in excluded_sources:
+                        continue
+                if excluded_patterns and any(
+                    fnmatch.fnmatch(tool.name, pat) for pat in excluded_patterns
+                ):
+                    continue
+                filtered.append(tool)
+            tools = filtered
+        # Rescue: add back tools matching allowed_tools patterns
+        allowed_patterns = session.allowed_tools
+        if allowed_patterns:
+            visible_names = {t.name for t in tools}
+            for tool in self._router.tools:
+                if tool.name not in visible_names and any(
+                    fnmatch.fnmatch(tool.name, pat) for pat in allowed_patterns
+                ):
+                    tools.append(tool)
+        return tools
     def as_tools(self) -> list[mcp_types.Tool]:
         """Return tools in MCP format (base format).
@@ -207,37 +259,7 @@ class Environment(
         """
         import fnmatch
-        tools = self._router.tools
-        # Scenario-level exclusion (from @env.scenario(exclude_tools/exclude_sources))
-        session = self._active_session
-        if session:
-            excluded_sources = set(session.exclude_sources) if session.exclude_sources else None
-            excluded_patterns = session.exclude_tools
-            if excluded_sources or excluded_patterns:
-                filtered = []
-                for tool in tools:
-                    if excluded_sources:
-                        source = self._router._tool_routing.get(tool.name, "")
-                        if source in excluded_sources:
-                            continue
-                    if excluded_patterns and any(
-                        fnmatch.fnmatch(tool.name, pat) for pat in excluded_patterns
-                    ):
-                        continue
-                    filtered.append(tool)
-                tools = filtered
-            # Rescue: add back tools matching allowed_tools patterns
-            allowed_patterns = session.allowed_tools
-            if allowed_patterns:
-                visible_names = {t.name for t in tools}
-                for tool in self._router.tools:
-                    if tool.name not in visible_names and any(
-                        fnmatch.fnmatch(tool.name, pat) for pat in allowed_patterns
-                    ):
-                        tools.append(tool)
+        tools = self._filtered_tools_for_session(self._active_session)
         # Apply agent-level filtering (from v4 allowed_tools/disallowed_tools)
         if self._agent_include is not None or self._agent_exclude is not None:
@@ -628,10 +650,16 @@ class Environment(
             return mcp_types.ReadResourceResult(contents=contents)
     async def _env_list_tools(self) -> list[mcp_types.Tool]:
-        """Return all tools including those from connectors."""
+        """Return tools filtered by the active scenario session (if any).
+        When an MCP client has an active scenario session (set via get_prompt),
+        applies scenario-level tool exclusions so the agent only sees permitted tools.
+        """
         if not self._tool_routing_built:
             await self._build_tool_routing()
-        return self._router.tools
+        session_id = _safe_session_id(None)
+        session = self._get_session(session_id)
+        return self._filtered_tools_for_session(session)
     async def _env_list_prompts(self) -> list[mcp_types.Prompt]:
         """Return all prompts including those from connectors."""
@@ -649,6 +677,19 @@ class Environment(
         """Route tool calls through our router (handles both local and connector tools)."""
         args = dict(arguments or {})
+        # Enforce scenario-level tool exclusions for MCP clients.
+        # Internal tools (underscore prefix, e.g. _hud_submit) are always allowed
+        # as they are infrastructure tools, not agent-facing.
+        if not name.startswith("_"):
+            session_id = _safe_session_id(None)
+            session = self._get_session(session_id)
+            if session:
+                if not self._tool_routing_built:
+                    await self._build_tool_routing()
+                allowed_names = {t.name for t in self._filtered_tools_for_session(session)}
+                if name not in allowed_names:
+                    raise ValueError(f"Tool '{name}' is not available in the current scenario.")
         # Extract trace context propagated via MCP request (meta or arguments)
         trace_id = args.pop("_hud_trace_id", None)
         meta = kwargs.get("_meta") or kwargs.get("meta")

{hud_python-0.5.40 → hud_python-0.5.41}/hud/environment/tests/test_environment.py RENAMED Viewed

@@ -740,3 +740,254 @@ class TestEnvironmentToolFiltering:
         assert "browser_navigate" in tool_names
         assert "browser_setup" not in tool_names  # Excluded by *setup*
         assert "file_read" not in tool_names  # Not included by browser_*
+class TestMCPServerToolExclusion:
+    """Tests that scenario exclude_tools/exclude_sources/allowed_tools
+    are enforced on the MCP server path (_env_list_tools, _env_call_tool).
+    """
+    @pytest.mark.asyncio
+    async def test_env_list_tools_applies_scenario_filtering(self) -> None:
+        """_env_list_tools resolves the MCP session and applies scenario filtering.
+        The filtering logic itself (exclude_tools, exclude_sources, allowed_tools)
+        is tested thoroughly in test_scenarios.py::TestScenarioToolExclusion.
+        This test verifies the MCP server path wires up session lookup correctly.
+        """
+        from types import SimpleNamespace
+        import mcp.types as mcp_types
+        from mcp.server.lowlevel.server import request_ctx
+        from hud.environment import Environment
+        from hud.environment.connection import ConnectionConfig, ConnectionType, Connector
+        env = Environment("test-env")
+        @env.tool()
+        def browser_navigate(url: str) -> str:
+            """Navigate."""
+            return url
+        @env.tool()
+        def browser_screenshot() -> str:
+            """Screenshot."""
+            return "img"
+        @env.tool()
+        def bash(cmd: str) -> str:
+            """Run command."""
+            return cmd
+        connector = Connector(
+            transport={},
+            config=ConnectionConfig(),
+            name="remote-hub",
+            connection_type=ConnectionType.REMOTE,
+        )
+        connector._tools_cache = [
+            mcp_types.Tool(name="remote_a", inputSchema={"type": "object"}),
+        ]
+        env._connections["remote-hub"] = connector
+        @env.scenario(
+            "filtered",
+            exclude_tools=["browser_*"],
+            exclude_sources=["remote-hub"],
+            allowed_tools=["browser_navigate"],
+        )
+        async def filtered():
+            yield "Do it"
+            yield 1.0
+        await env._build_routing()
+        req = SimpleNamespace(
+            session=SimpleNamespace(),
+            request=SimpleNamespace(headers={"mcp-session-id": "test-session"}),
+        )
+        token = request_ctx.set(req)  # type: ignore[arg-type]
+        try:
+            await env._env_get_prompt("test-env:filtered", {})
+            tools = await env._env_list_tools()
+        finally:
+            request_ctx.reset(token)
+        tool_names = [t.name for t in tools]
+        assert "bash" in tool_names
+        assert "browser_navigate" in tool_names  # Rescued by allowed_tools
+        assert "browser_screenshot" not in tool_names  # Excluded by pattern
+        assert "remote_a" not in tool_names  # Excluded by source
+    @pytest.mark.asyncio
+    async def test_env_call_tool_rejects_excluded_tool(self) -> None:
+        """_env_call_tool raises ValueError for excluded tools."""
+        from types import SimpleNamespace
+        from mcp.server.lowlevel.server import request_ctx
+        from hud.environment import Environment
+        env = Environment("test-env")
+        @env.tool()
+        def browser_navigate(url: str) -> str:
+            """Navigate."""
+            return url
+        @env.tool()
+        def bash(cmd: str) -> str:
+            """Run command."""
+            return cmd
+        @env.scenario("headless", exclude_tools=["browser_*"])
+        async def headless():
+            yield "Do it"
+            yield 1.0
+        await env._build_routing()
+        req = SimpleNamespace(
+            session=SimpleNamespace(),
+            request=SimpleNamespace(headers={"mcp-session-id": "test-session-4"}),
+        )
+        token = request_ctx.set(req)  # type: ignore[arg-type]
+        try:
+            await env._env_get_prompt("test-env:headless", {})
+            with pytest.raises(ValueError, match="not available"):
+                await env._env_call_tool("browser_navigate", {"url": "http://example.com"})
+        finally:
+            request_ctx.reset(token)
+    @pytest.mark.asyncio
+    async def test_env_call_tool_allows_non_excluded_tool(self) -> None:
+        """_env_call_tool succeeds for non-excluded tools."""
+        from types import SimpleNamespace
+        from mcp.server.lowlevel.server import request_ctx
+        from hud.environment import Environment
+        env = Environment("test-env")
+        @env.tool()
+        def browser_navigate(url: str) -> str:
+            """Navigate."""
+            return url
+        @env.tool()
+        def bash(cmd: str) -> str:
+            """Run command."""
+            return cmd
+        @env.scenario("headless", exclude_tools=["browser_*"])
+        async def headless():
+            yield "Do it"
+            yield 1.0
+        await env._build_routing()
+        req = SimpleNamespace(
+            session=SimpleNamespace(),
+            request=SimpleNamespace(headers={"mcp-session-id": "test-session-5"}, scope={}),
+        )
+        token = request_ctx.set(req)  # type: ignore[arg-type]
+        try:
+            await env._env_get_prompt("test-env:headless", {})
+            # Should not raise - bash is not excluded
+            result = await env._env_call_tool("bash", {"cmd": "echo hi"})
+            assert result is not None
+        finally:
+            request_ctx.reset(token)
+    @pytest.mark.asyncio
+    async def test_env_call_tool_allows_internal_tools(self) -> None:
+        """_env_call_tool always allows underscore-prefixed internal tools."""
+        from types import SimpleNamespace
+        from mcp.server.lowlevel.server import request_ctx
+        from hud.environment import Environment
+        env = Environment("test-env")
+        @env.tool()
+        def browser_navigate(url: str) -> str:
+            """Navigate."""
+            return url
+        @env.scenario("headless", exclude_tools=["*"])
+        async def headless():
+            answer = yield "Do it"
+            yield 1.0 if answer == "ok" else 0.0
+        await env._build_routing()
+        req = SimpleNamespace(
+            session=SimpleNamespace(),
+            request=SimpleNamespace(headers={"mcp-session-id": "test-session-6"}, scope={}),
+        )
+        token = request_ctx.set(req)  # type: ignore[arg-type]
+        try:
+            await env._env_get_prompt("test-env:headless", {})
+            # _hud_submit should always work even with exclude_tools=["*"]
+            result = await env._env_call_tool(
+                "_hud_submit", {"scenario": "headless", "answer": "ok"}
+            )
+            assert result is not None
+        finally:
+            request_ctx.reset(token)
+    @pytest.mark.asyncio
+    async def test_env_list_tools_no_session_returns_all(self) -> None:
+        """_env_list_tools returns all tools when no scenario session is active."""
+        from hud.environment import Environment
+        env = Environment("test-env")
+        @env.tool()
+        def browser_navigate(url: str) -> str:
+            """Navigate."""
+            return url
+        @env.tool()
+        def bash(cmd: str) -> str:
+            """Run command."""
+            return cmd
+        @env.scenario("headless", exclude_tools=["browser_*"])
+        async def headless():
+            yield "Do it"
+            yield 1.0
+        await env._build_routing()
+        # No scenario setup, no request_ctx - should return all tools
+        tools = await env._env_list_tools()
+        tool_names = [t.name for t in tools]
+        assert "browser_navigate" in tool_names
+        assert "bash" in tool_names
+    @pytest.mark.asyncio
+    async def test_env_call_tool_no_session_allows_all(self) -> None:
+        """_env_call_tool allows any tool when no scenario session is active."""
+        from hud.environment import Environment
+        env = Environment("test-env")
+        @env.tool()
+        def browser_navigate(url: str) -> str:
+            """Navigate."""
+            return url
+        @env.scenario("headless", exclude_tools=["browser_*"])
+        async def headless():
+            yield "Do it"
+            yield 1.0
+        await env._build_routing()
+        # No scenario setup - should allow any tool
+        result = await env._env_call_tool("browser_navigate", {"url": "http://example.com"})
+        assert result is not None

{hud_python-0.5.40 → hud_python-0.5.41}/hud/tools/computer/gemini.py RENAMED Viewed

@@ -22,6 +22,9 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
+GEMINI_DRAG_INSET = 25
+DISPLAY_DRAG_INSET_PIXELS = 20
 SUPPORTED_GEMINI_COMPUTER_USE_MODELS = (
     "gemini-2.5-computer-use-preview-10-2025",
     "gemini-3-flash-preview",
@@ -168,6 +171,30 @@ class GeminiComputerTool(HudComputerTool):
             **kwargs,
         )
+    def _inset_drag_coordinate(self, value: int) -> int:
+        """Keep Gemini normalized drag endpoints away from display edges."""
+        if (
+            self.coordinate_space is None
+            or not isinstance(value, int | float)
+            or not 0 <= value <= self.coordinate_space
+        ):
+            return value
+        max_value = max(self.coordinate_space - GEMINI_DRAG_INSET, GEMINI_DRAG_INSET)
+        return min(max(value, GEMINI_DRAG_INSET), max_value)
+    def _inset_scaled_drag_path(self, path: list[tuple[int, int]]) -> list[tuple[int, int]]:
+        """Keep scaled drag points inside the display so they do not hit OS/window edges."""
+        max_x = max(self.environment_width - 1 - DISPLAY_DRAG_INSET_PIXELS, 0)
+        max_y = max(self.environment_height - 1 - DISPLAY_DRAG_INSET_PIXELS, 0)
+        return [
+            (
+                min(max(int(x), DISPLAY_DRAG_INSET_PIXELS), max_x),
+                min(max(int(y), DISPLAY_DRAG_INSET_PIXELS), max_y),
+            )
+            for x, y in path
+        ]
     async def __call__(
         self,
         action: str = ACTION_FIELD,
@@ -381,7 +408,16 @@ class GeminiComputerTool(HudComputerTool):
                         message="x, y, destination_x, and destination_y are required",
                     )
                 )
-            path = self._scale_path([(x, y), (destination_x, destination_y)])
+            path = self._scale_path(
+                [
+                    (self._inset_drag_coordinate(x), self._inset_drag_coordinate(y)),
+                    (
+                        self._inset_drag_coordinate(destination_x),
+                        self._inset_drag_coordinate(destination_y),
+                    ),
+                ]
+            )
+            path = self._inset_scaled_drag_path(path)
             result = await self.executor.drag(path=path)
             return await _finalize(result)

hud-python 0.5.40__tar.gz → 0.5.41__tar.gz

hud-python 0.5.40tar.gz → 0.5.41tar.gz