PyPI - hud-python - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.13__py3-none-any.whl - Mend

hud-python 0.5.1py3-none-any.whl → 0.5.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

hud/__init__.py +1 -1
hud/agents/__init__.py +65 -6
hud/agents/base.py +33 -15
hud/agents/claude.py +60 -31
hud/agents/gateway.py +42 -0
hud/agents/gemini.py +15 -26
hud/agents/gemini_cua.py +6 -17
hud/agents/misc/response_agent.py +7 -0
hud/agents/openai.py +16 -29
hud/agents/openai_chat.py +3 -19
hud/agents/operator.py +5 -17
hud/agents/resolver.py +70 -0
hud/agents/tests/test_claude.py +2 -4
hud/agents/tests/test_openai.py +2 -1
hud/agents/tests/test_resolver.py +192 -0
hud/agents/types.py +148 -0
hud/cli/__init__.py +34 -3
hud/cli/build.py +37 -5
hud/cli/dev.py +11 -2
hud/cli/eval.py +51 -39
hud/cli/flows/init.py +1 -1
hud/cli/pull.py +1 -1
hud/cli/push.py +9 -2
hud/cli/tests/test_build.py +2 -2
hud/cli/tests/test_push.py +1 -1
hud/cli/utils/metadata.py +1 -1
hud/cli/utils/tests/test_metadata.py +1 -1
hud/clients/mcp_use.py +6 -1
hud/datasets/loader.py +17 -18
hud/datasets/runner.py +16 -10
hud/datasets/tests/test_loader.py +15 -15
hud/environment/__init__.py +5 -3
hud/environment/connection.py +58 -6
hud/environment/connectors/mcp_config.py +29 -1
hud/environment/environment.py +218 -77
hud/environment/router.py +175 -24
hud/environment/scenarios.py +313 -186
hud/environment/tests/test_connectors.py +10 -23
hud/environment/tests/test_environment.py +432 -0
hud/environment/tests/test_local_connectors.py +81 -40
hud/environment/tests/test_scenarios.py +820 -14
hud/eval/context.py +63 -10
hud/eval/instrument.py +4 -2
hud/eval/manager.py +79 -12
hud/eval/task.py +36 -4
hud/eval/tests/test_eval.py +1 -1
hud/eval/tests/test_task.py +147 -1
hud/eval/types.py +2 -0
hud/eval/utils.py +14 -3
hud/patches/mcp_patches.py +178 -21
hud/telemetry/instrument.py +8 -1
hud/telemetry/tests/test_eval_telemetry.py +8 -8
hud/tools/__init__.py +2 -0
hud/tools/agent.py +223 -0
hud/tools/computer/__init__.py +34 -5
hud/tools/shell.py +3 -3
hud/tools/tests/test_agent_tool.py +355 -0
hud/types.py +62 -34
hud/utils/hud_console.py +30 -17
hud/utils/strict_schema.py +1 -1
hud/utils/tests/test_version.py +1 -1
hud/version.py +1 -1
{hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/METADATA +2 -2
{hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/RECORD +67 -61
{hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/WHEEL +0 -0
{hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
{hud_python-0.5.1.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0

hud/environment/connection.py CHANGED Viewed

@@ -68,6 +68,8 @@ class Connector:
         self.connection_type = connection_type
         self.client: FastMCPClient[Any] | None = None
         self._tools_cache: list[mcp_types.Tool] | None = None
+        self._prompts_cache: list[mcp_types.Prompt] | None = None
+        self._resources_cache: list[mcp_types.Resource] | None = None
     def copy(self) -> Connector:
         """Create a copy of this connector with fresh (unconnected) state.
@@ -101,6 +103,14 @@ class Connector:
     def cached_tools(self) -> list[mcp_types.Tool]:
         return self._tools_cache or []
+    @property
+    def cached_prompts(self) -> list[mcp_types.Prompt]:
+        return self._prompts_cache or []
+    @property
+    def cached_resources(self) -> list[mcp_types.Resource]:
+        return self._resources_cache or []
     async def connect(self) -> None:
         """Create FastMCP client and connect.
@@ -110,19 +120,27 @@ class Connector:
         """
         from fastmcp.client import Client as FastMCPClient
-        # Create fresh client from stored transport config
-        self.client = FastMCPClient(transport=self._transport, auth=self._auth)
+        self.client = FastMCPClient(
+            transport=self._transport,
+            auth=self._auth,
+        )
         await self.client.__aenter__()
     async def disconnect(self) -> None:
-        """Disconnect and clear cache."""
+        """Disconnect and clear all caches."""
         if self.client is not None and self.is_connected:
             await self.client.__aexit__(None, None, None)
         self.client = None
         self._tools_cache = None
+        self._prompts_cache = None
+        self._resources_cache = None
     async def list_tools(self) -> list[mcp_types.Tool]:
-        """Fetch tools from server, apply filters/transforms/prefix, and cache."""
+        """Fetch tools from server, apply filters/transforms/prefix, and cache.
+        Always fetches fresh data from the server (no caching check).
+        The result is cached for use by router.build() via cached_tools property.
+        """
         if self.client is None:
             raise RuntimeError("Not connected - call connect() first")
         tools = await self.client.list_tools()
@@ -178,14 +196,48 @@ class Connector:
         return await self.client.call_tool_mcp(name, arguments or {})
     async def list_resources(self) -> list[mcp_types.Resource]:
+        """Fetch resources from server and cache.
+        Always fetches fresh data from the server (no caching check).
+        The result is cached for use by router.build_resources() via cached_resources property.
+        Note: resources/list is optional in the MCP spec. If the server doesn't
+        implement it, we return an empty list gracefully.
+        """
         if self.client is None:
             raise RuntimeError("Not connected - call connect() first")
-        return await self.client.list_resources()
+        try:
+            self._resources_cache = await self.client.list_resources()
+        except Exception as e:
+            # Handle servers that don't implement resources/list (optional in MCP spec)
+            if "Method not found" in str(e):
+                logger.debug("Server %s does not support resources/list", self.name)
+                self._resources_cache = []
+            else:
+                raise
+        return self._resources_cache
     async def list_prompts(self) -> list[mcp_types.Prompt]:
+        """Fetch prompts from server and cache.
+        Always fetches fresh data from the server (no caching check).
+        The result is cached for use by router.build_prompts() via cached_prompts property.
+        Note: prompts/list is optional in the MCP spec. If the server doesn't
+        implement it, we return an empty list gracefully.
+        """
         if self.client is None:
             raise RuntimeError("Not connected - call connect() first")
-        return await self.client.list_prompts()
+        try:
+            self._prompts_cache = await self.client.list_prompts()
+        except Exception as e:
+            # Handle servers that don't implement prompts/list (optional in MCP spec)
+            if "Method not found" in str(e):
+                logger.debug("Server %s does not support prompts/list", self.name)
+                self._prompts_cache = []
+            else:
+                raise
+        return self._prompts_cache
     async def read_resource(
         self, uri: str

hud/environment/connectors/mcp_config.py CHANGED Viewed

@@ -50,6 +50,7 @@ class MCPConfigConnectorMixin(BaseConnectorMixin):
             ```
         """
         from hud.environment.connection import ConnectionType
+        from hud.settings import settings
         name = alias or next(iter(config.keys()), "mcp")
         server_config = next(iter(config.values()), {})
@@ -57,9 +58,20 @@ class MCPConfigConnectorMixin(BaseConnectorMixin):
         is_local = "command" in server_config or "args" in server_config
         conn_type = ConnectionType.LOCAL if is_local else ConnectionType.REMOTE
+        transport: Any = config
+        if not is_local and "url" in server_config:
+            max_request_timeout = 840
+            server_config.setdefault(
+                "sse_read_timeout",
+                min(settings.client_timeout, max_request_timeout)
+                if settings.client_timeout > 0
+                else max_request_timeout,
+            )
+            transport = _build_transport(server_config)
         return self._add_connection(
             name,
-            config,
+            transport,
             connection_type=conn_type,
             prefix=prefix,
             include=include,
@@ -107,3 +119,19 @@ class MCPConfigConnectorMixin(BaseConnectorMixin):
         for server_name, server_config in mcp_config.items():
             self.connect_mcp({server_name: server_config}, alias=server_name, **kwargs)
         return self
+def _build_transport(server_config: dict[str, Any]) -> Any:
+    from fastmcp.client.transports import SSETransport, StreamableHttpTransport
+    from fastmcp.mcp_config import infer_transport_type_from_url
+    url = server_config["url"]
+    transport_type = server_config.get("transport") or infer_transport_type_from_url(url)
+    transport_cls = SSETransport if transport_type == "sse" else StreamableHttpTransport
+    return transport_cls(
+        url=url,
+        headers=server_config.get("headers"),
+        auth=server_config.get("auth"),
+        sse_read_timeout=server_config.get("sse_read_timeout"),
+    )

hud/environment/environment.py CHANGED Viewed

@@ -119,6 +119,26 @@ class Environment(
     MAX_CONCURRENT_CONNECTIONS = 10
+    @staticmethod
+    def _normalize_name(name: str) -> str:
+        """Normalize environment name to lowercase with hyphens.
+        - Strips whitespace
+        - Replaces spaces and underscores with hyphens
+        - Lowercases the result
+        - Removes any non-alphanumeric characters except hyphens
+        """
+        import re
+        normalized = name.strip().lower()
+        normalized = normalized.replace(" ", "-").replace("_", "-")
+        # Keep only alphanumeric and hyphens
+        normalized = re.sub(r"[^a-z0-9-]", "", normalized)
+        # Collapse multiple hyphens
+        normalized = re.sub(r"-+", "-", normalized)
+        # Strip leading/trailing hyphens
+        return normalized.strip("-") or "environment"
     def __init__(
         self,
         name: str = "environment",
@@ -126,14 +146,23 @@ class Environment(
         conflict_resolution: ConflictResolution = ConflictResolution.PREFIX,
         **fastmcp_kwargs: Any,
     ) -> None:
+        # Normalize name to prevent casing/spacing issues
+        name = self._normalize_name(name)
         super().__init__(name=name, instructions=instructions, **fastmcp_kwargs)
         self._connections: dict[str, Connector] = {}
         self._router = ToolRouter(conflict_resolution=conflict_resolution)
+        # Granular routing flags - only rebuild what's invalidated
+        self._tool_routing_built = False
+        self._prompt_routing_built = False
+        self._resource_routing_built = False
         self._in_context = False
         # Tool call queues - run after connections established
         self._setup_calls: list[tuple[str, dict[str, Any]]] = []
         self._evaluate_calls: list[tuple[str, dict[str, Any]]] = []
+        self._integration_test_calls: list[tuple[str, dict[str, Any]]] = []
+        # Store setup tool results for append_setup_output feature
+        self._setup_results: list[MCPToolResult] = []
         # Default prompt (EvalContext has per-run prompt)
         self.prompt: str | None = None
@@ -163,24 +192,35 @@ class Environment(
         """Return tools in MCP format (base format).
         Applies agent-level include/exclude filtering if set.
+        Supports fnmatch-style wildcards (e.g., "*setup*", "browser_*").
         """
+        import fnmatch
         tools = self._router.tools
         # Apply agent-level filtering (from v4 allowed_tools/disallowed_tools)
         if self._agent_include is not None or self._agent_exclude is not None:
             filtered = []
             for tool in tools:
-                # Include filter: None means include all
-                if self._agent_include is not None and tool.name not in self._agent_include:
+                # Include filter: None means include all, check if matches any pattern
+                if self._agent_include is not None and not any(
+                    fnmatch.fnmatch(tool.name, pattern) for pattern in self._agent_include
+                ):
                     continue
-                # Exclude filter
-                if self._agent_exclude is not None and tool.name in self._agent_exclude:
+                # Exclude filter: skip if tool matches any exclude pattern
+                if self._agent_exclude is not None and any(
+                    fnmatch.fnmatch(tool.name, pattern) for pattern in self._agent_exclude
+                ):
                     continue
                 filtered.append(tool)
             return filtered
         return tools
+    def add_tool(self, obj: Any, **kwargs: Any) -> None:
+        super().add_tool(obj, **kwargs)
+        self._tool_routing_built = False  # Only invalidate tool routing
     async def call_tool(self, call: Any, /, **kwargs: Any) -> Any:
         """Call a tool, auto-detecting format and returning matching result format.
@@ -224,6 +264,9 @@ class Environment(
         Automatically filters to only connections where the tool exists
         (based on cached_tools from initial discovery).
+        For internal tools (starting with _), tries ALL connections since
+        internal tools are hidden from list_tools() and won't be in cached_tools.
         Args:
             tool_name: Name of the tool to call
             **kwargs: Arguments to pass to the tool
@@ -233,10 +276,13 @@ class Environment(
         """
         import asyncio
-        # Only call connections that have this tool
-        targets = self._connections_with_tool(tool_name)
-        if not targets:
-            return {}
+        # For internal tools (underscore prefix), try ALL connections since
+        # they're hidden from list_tools() and won't appear in cached_tools.
+        # For regular tools, only try connections that advertise the tool.
+        if tool_name.startswith("_"):
+            targets = set(self._connections.keys())
+        else:
+            targets = self._connections_with_tool(tool_name)
         results: dict[str, Any] = {}
@@ -245,7 +291,8 @@ class Environment(
             if not connector or not connector.client:
                 return
             try:
-                results[name] = await connector.client.call_tool(tool_name, **kwargs)
+                # Use connector.call_tool which expects arguments as a dict
+                results[name] = await connector.call_tool(tool_name, kwargs)
                 logger.debug("Broadcast '%s' to '%s' succeeded", tool_name, name)
             except Exception as e:
                 results[name] = e
@@ -304,7 +351,7 @@ class Environment(
         """Connect all connectors, build routing, run setup tools."""
         self._in_context = True
-        # Connect to all servers (on_connect callbacks run first within connect())
+        # Connect to all servers and fetch tools/prompts/resources in parallel
         sem = asyncio.Semaphore(self.MAX_CONCURRENT_CONNECTIONS)
         errors: list[tuple[str, Exception]] = []
@@ -312,7 +359,12 @@ class Environment(
             async with sem:
                 try:
                     await conn.connect()
-                    await conn.list_tools()
+                    # Batch fetch all MCP primitives in parallel for performance
+                    await asyncio.gather(
+                        conn.list_tools(),
+                        conn.list_prompts(),
+                        conn.list_resources(),
+                    )
                 except Exception as e:
                     errors.append((name, e))
@@ -328,9 +380,25 @@ class Environment(
         await self._build_routing()
-        # Setup tool calls (after connections)
+        # Setup tool calls (after connections) - abort if any setup tool fails
+        # Store results for append_setup_output feature
+        self._setup_results = []
         for name, args in self._setup_calls:
-            await self._execute_tool(name, args)
+            result = await self._execute_tool(name, args)
+            self._setup_results.append(result)
+            if result.isError:
+                # Extract error message from result content
+                error_msg = "Setup tool failed"
+                if result.content:
+                    for block in result.content:
+                        if isinstance(block, mcp_types.TextContent):
+                            error_msg = block.text
+                            break
+                # Clean up connections before raising (since __aexit__ won't be called)
+                for conn in self._connections.values():
+                    if conn.is_connected:
+                        await conn.disconnect()
+                raise RuntimeError(f"Setup tool '{name}' failed: {error_msg}")
         return self
@@ -351,6 +419,8 @@ class Environment(
                 rewards.append(find_reward(result))
             except Exception as e:
                 logger.warning("Evaluate tool %s failed: %s", name, e)
+                # Record 0.0 for failed evaluate tools so they affect the average
+                rewards.append(0.0)
         # Store average reward from evaluate tools
         self._evaluate_reward: float | None = None
@@ -361,11 +431,44 @@ class Environment(
         if self._connections:
             await asyncio.gather(*[c.disconnect() for c in self._connections.values()])
         self._router.clear()
+        self._tool_routing_built = False
+        self._prompt_routing_built = False
+        self._resource_routing_built = False
+        self._active_session = None  # Clear stale scenario state
+    async def run_async(
+        self,
+        transport: Literal["stdio", "http", "sse"] | None = None,
+        show_banner: bool = True,
+        **transport_kwargs: Any,
+    ) -> None:
+        """Run the MCP server, auto-connecting all connectors first.
+        This ensures that tools from external MCP servers (via connect_mcp_config)
+        are discovered and available when the server starts.
+        """
+        async with self:  # Connect all connectors via __aenter__
+            await super().run_async(
+                transport=transport, show_banner=show_banner, **transport_kwargs
+            )
     async def _build_routing(self) -> None:
+        """Build routing for tools, prompts, and resources in parallel.
+        Only rebuilds what's actually invalidated for performance.
+        """
+        tasks = []
+        if not self._tool_routing_built:
+            tasks.append(self._build_tool_routing())
+        if not self._prompt_routing_built:
+            tasks.append(self._build_prompt_routing())
+        if not self._resource_routing_built:
+            tasks.append(self._build_resource_routing())
+        if tasks:
+            await asyncio.gather(*tasks)
+    async def _build_tool_routing(self) -> None:
         """Build tool routing from local tools and connection caches."""
-        # Use get_tools() not list_tools() - it includes mounted servers without
-        # requiring MCP server communication (via_server=False)
         local_tools_dict = await self._tool_manager.get_tools()
         local_tools = list(local_tools_dict.values())
         self._router.build(
@@ -375,16 +478,54 @@ class Environment(
         )
         # Populate mock schemas for auto-generated mock values
         self._populate_mock_schemas()
+        self._tool_routing_built = True
+    async def _build_prompt_routing(self) -> None:
+        """Build prompt routing from local prompts and connections."""
+        local_prompts_dict = await self._prompt_manager.get_prompts()
+        local_prompts = [p.to_mcp_prompt() for p in local_prompts_dict.values()]
+        self._router.build_prompts(local_prompts, self._connections)
+        self._prompt_routing_built = True
+    async def _build_resource_routing(self) -> None:
+        """Build resource routing from local resources and connections."""
+        local_resources_dict = await self._resource_manager.get_resources()
+        local_resources = [r.to_mcp_resource() for r in local_resources_dict.values()]
+        self._router.build_resources(local_resources, self._connections)
+        self._resource_routing_built = True
+    # =========================================================================
+    # MCP Protocol Overrides - Include connector tools in MCP responses
+    # =========================================================================
+    def _setup_handlers(self) -> None:
+        """Override FastMCP to register our custom handlers for tools."""
+        # Call parent to set up all standard handlers
+        super()._setup_handlers()
+        # Re-register our custom handlers (overwrites parent's registrations)
+        self._mcp_server.list_tools()(self._env_list_tools)
+        self._mcp_server.call_tool()(self._env_call_tool)
+    async def _env_list_tools(self) -> list[mcp_types.Tool]:
+        """Return all tools including those from connectors."""
+        if not self._tool_routing_built:
+            await self._build_tool_routing()
+        return self._router.tools
+    async def _env_call_tool(self, name: str, arguments: dict[str, Any] | None = None) -> list[Any]:
+        """Route tool calls through our router (handles both local and connector tools)."""
+        result = await self._execute_tool(name, arguments or {})
+        return result.content or []
     # =========================================================================
     # Tool Operations
     # =========================================================================
     async def list_tools(self) -> list[mcp_types.Tool]:
-        """Refresh tools from all connections and rebuild routing."""
+        """Refresh tools from all connections and rebuild tool routing."""
         if self._connections:
             await asyncio.gather(*[c.list_tools() for c in self._connections.values()])
-        await self._build_routing()
+        await self._build_tool_routing()
         return self._router.tools
     async def _execute_tool(self, name: str, arguments: dict[str, Any]) -> MCPToolResult:
@@ -397,12 +538,15 @@ class Environment(
             logger.debug("Mock mode: returning mock result for tool %s", name)
             return self._get_mock_result(name, arguments)
+        # Rebuild tool routing if invalidated (e.g., after add_tool)
+        if not self._tool_routing_built:
+            await self._build_tool_routing()
         if self._router.is_local(name):
             # Call tool manager directly to avoid FastMCP context requirement
             result = await self._tool_manager.call_tool(name, arguments)
             return MCPToolResult(
-                content=result.content,
-                structuredContent=result.structured_content,
+                content=result.content, structuredContent=result.structured_content
             )
         connection_name = self._router.get_connection(name)
@@ -422,86 +566,83 @@ class Environment(
     # =========================================================================
     async def list_resources(self) -> list[mcp_types.Resource]:
-        """List all resources (local + remote)."""
-        local = list((await self._resource_manager.get_resources()).values())
-        resources: list[mcp_types.Resource] = [r.to_mcp_resource() for r in local]
+        """Refresh resources from all connections and rebuild resource routing."""
         if self._connections:
-            results = await asyncio.gather(
-                *[c.list_resources() for c in self._connections.values()], return_exceptions=True
-            )
-            for r in results:
-                if isinstance(r, list):
-                    resources.extend(r)
-        return resources
+            await asyncio.gather(*[c.list_resources() for c in self._connections.values()])
+        await self._build_resource_routing()
+        return self._router.resources
     async def read_resource(
         self, uri: str
     ) -> list[mcp_types.TextResourceContents | mcp_types.BlobResourceContents]:
-        """Read a resource by URI (tries local first, then remote)."""
+        """Read a resource by URI using router for connection lookup."""
         from pydantic import AnyUrl
-        try:
-            result = await self._resource_manager.read_resource(uri)
-            resource_uri = AnyUrl(uri)
-            if isinstance(result, str):
-                return [mcp_types.TextResourceContents(uri=resource_uri, text=result)]
-            import base64
+        # Ensure resource routing is built
+        if not self._resource_routing_built:
+            await self._build_resource_routing()
-            return [
-                mcp_types.BlobResourceContents(
-                    uri=resource_uri, blob=base64.b64encode(result).decode()
-                )
-            ]
-        except Exception as e:
-            logger.debug("Local resource read failed for %s: %s", uri, e)
+        # Use router to find which connection has this resource
+        conn_name = self._router.get_resource_connection(uri)
-        for conn in self._connections.values():
+        if conn_name is None:
+            # Local resource
             try:
-                return await conn.read_resource(uri)
+                result = await self._resource_manager.read_resource(uri)
+                resource_uri = AnyUrl(uri)
+                if isinstance(result, str):
+                    return [mcp_types.TextResourceContents(uri=resource_uri, text=result)]
+                import base64
+                return [
+                    mcp_types.BlobResourceContents(
+                        uri=resource_uri, blob=base64.b64encode(result).decode()
+                    )
+                ]
             except Exception as e:
-                logger.debug("Remote resource read failed for %s: %s", uri, e)
-                continue
-        raise ValueError(f"Resource not found: {uri}")
+                logger.debug("Local resource read failed for %s: %s", uri, e)
+                raise ValueError(f"Resource not found: {uri}") from e
+        else:
+            # Remote resource
+            conn = self._connections.get(conn_name)
+            if conn is None:
+                raise ValueError(f"Connection '{conn_name}' not found for resource '{uri}'")
+            return await conn.read_resource(uri)
     # =========================================================================
     # Prompt Operations
     # =========================================================================
     async def list_prompts(self) -> list[mcp_types.Prompt]:
-        """List all prompts (local + remote)."""
-        local = list((await self._prompt_manager.get_prompts()).values())
-        prompts: list[mcp_types.Prompt] = [p.to_mcp_prompt() for p in local]
+        """Refresh prompts from all connections and rebuild prompt routing."""
         if self._connections:
-            results = await asyncio.gather(
-                *[c.list_prompts() for c in self._connections.values()], return_exceptions=True
-            )
-            for r in results:
-                if isinstance(r, list):
-                    prompts.extend(r)
-        return prompts
+            await asyncio.gather(*[c.list_prompts() for c in self._connections.values()])
+        await self._build_prompt_routing()
+        return self._router.prompts
     async def get_prompt(
         self, name: str, arguments: dict[str, Any] | None = None
     ) -> mcp_types.GetPromptResult:
-        """Get a prompt by name (tries local first, then remote)."""
-        try:
-            return await self._prompt_manager.render_prompt(name, arguments or {})
-        except Exception as e:
-            logger.debug("Local prompt render failed for %s: %s", name, e)
+        """Get a prompt by name using router for connection lookup."""
+        # Ensure prompt routing is built
+        if not self._prompt_routing_built:
+            await self._build_prompt_routing()
+        # Use router to find which connection has this prompt
+        conn_name = self._router.get_prompt_connection(name)
-        for conn in self._connections.values():
+        if conn_name is None:
+            # Local prompt
             try:
-                return await conn.get_prompt(name, arguments)
+                return await self._prompt_manager.render_prompt(name, arguments or {})
             except Exception as e:
-                logger.debug("Remote prompt get failed for %s: %s", name, e)
-                continue
-        raise ValueError(f"Prompt not found: {name}")
+                raise ValueError(f"Prompt not found: {name}") from e
+        else:
+            # Remote prompt
+            conn = self._connections.get(conn_name)
+            if conn is None:
+                raise ValueError(f"Connection '{conn_name}' not found for prompt '{name}'")
+            return await conn.get_prompt(name, arguments)
     # =========================================================================
     # Server Methods
@@ -553,7 +694,7 @@ class Environment(
         For v4 format: requires mcp_config, prompt, AND evaluate_tool
         """
         # Check for local tools (registered via @env.tool)
-        if self._router._local_names:
+        if self._router._local_tool_names:
             return False
         # Check for local scenarios (registered via @env.scenario)
         if getattr(self, "_scenarios", {}):
@@ -590,10 +731,10 @@ class Environment(
             task.env.to_config()  # {"prompt": "...", "mcp_config": {...}, ...}
             ```
         """
-        if self._router._local_names:
+        if self._router._local_tool_names:
             raise ValueError(
                 f"Cannot serialize Environment with local tools: "
-                f"{list(self._router._local_names)}. "
+                f"{list(self._router._local_tool_names)}. "
                 "Local tools require local execution. For remote submission, "
                 "use dict config or connect to a remote hub."
             )

hud-python 0.5.1__py3-none-any.whl → 0.5.13__py3-none-any.whl

hud-python 0.5.1py3-none-any.whl → 0.5.13py3-none-any.whl