PyPI - inspect-ai - Versions diffs - 0.3.90__py3-none-any.whl → 0.3.92__py3-none-any.whl - Mend

inspect-ai 0.3.90py3-none-any.whl → 0.3.92py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (370) hide show

inspect_ai/tool/_json_rpc_helpers.py ADDED Viewed

@@ -0,0 +1,285 @@
+import json
+from itertools import count
+from typing import Literal, Protocol, Type, TypeAlias, TypeVar
+from pydantic import BaseModel, RootModel
+from inspect_ai.tool._tool import ToolError
+class JSONRPCResponseBase(BaseModel):
+    jsonrpc: Literal["2.0"]
+    id: int | float | str
+class JSONRPCSuccessResponse(JSONRPCResponseBase):
+    result: object
+JSONRPCParamsType: TypeAlias = list[object] | dict[str, object] | None
+class JSONRPCIncoming(BaseModel):
+    jsonrpc: Literal["2.0"]
+    method: str
+    params: JSONRPCParamsType = None
+class JSONRPCRequest(JSONRPCIncoming):
+    id: int | float | str
+class JSONRPCNotification(JSONRPCIncoming):
+    pass
+class JSONRPCError(BaseModel):
+    """See: https://www.jsonrpc.org/specification#error_object"""
+    code: int
+    message: str
+    data: object | None = None
+class JSONRPCErrorResponse(JSONRPCResponseBase):
+    error: JSONRPCError
+class JSONRPCResponse(RootModel[JSONRPCSuccessResponse | JSONRPCErrorResponse]):
+    pass
+BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
+ScalarT = TypeVar("ScalarT", str, int, float, bool, None)
+class JSONRPCTransport(Protocol):
+    async def __call__(
+        self, method: str, params: JSONRPCParamsType, is_notification: bool
+    ) -> str: ...
+class JSONRPCServerErrorMapper(Protocol):
+    def __call__(
+        self, code: int, message: str, method: str, params: JSONRPCParamsType
+    ) -> Exception: ...
+async def exec_scalar_request(
+    method: str,
+    params: JSONRPCParamsType,
+    result_type: Type[ScalarT],
+    transport: JSONRPCTransport,
+) -> ScalarT:
+    """
+    Execute a JSON-RPC command expecting a scalar result.
+    Args:
+      method (str): The JSON-RPC method to call.
+      params (JSONRPCParamsType): The parameters for the JSON-RPC method.
+      result_type (Type[ScalarT]): The scalar type (str, int, float, bool, None) to validate the result against.
+      transport (JSONRPCTransport): The transport callable to use for the RPC communication.
+    Returns:
+      ScalarT: The scalar result of the JSON-RPC call.
+    Raises:
+      RuntimeError: If execution fails or if there is an error in the JSON-RPC response.
+      ToolParsingError: If the JSON-RPC response contains a specific error code indicating a parsing error.
+      ValueError: If the result is not of the expected scalar type.
+    """
+    rpc_result = await _exec_request(method=method, params=params, transport=transport)
+    if (result_type is type(None) and rpc_result is not None) or not isinstance(
+        rpc_result, result_type
+    ):
+        raise ValueError(f"Expected {result_type} result, got {type(rpc_result)}")
+    return rpc_result
+async def exec_model_request(
+    method: str,
+    params: JSONRPCParamsType,
+    result_type: Type[BaseModelT],
+    transport: JSONRPCTransport,
+) -> BaseModelT:
+    """
+    Execute a JSON-RPC command to a sandbox environment expecting a model result.
+    Args:
+      method (str): The JSON-RPC method to call.
+      params (JSONRPCParamsType): The parameters for the JSON-RPC method.
+      result_type (Type[BaseModelT]): The Pydantic model class to validate and parse the result.
+      transport (JSONRPCTransport): The transport callable to use for the RPC communication.
+    Returns:
+      BaseModelT: The parsed and validated result of the JSON-RPC call.
+    Raises:
+      RuntimeError: If the sandbox execution fails or if there is an error in the JSON-RPC response.
+      ToolParsingError: If the JSON-RPC response contains a specific error code indicating a parsing error.
+      ValueError: If the result cannot be validated against the provided model class.
+    """
+    rpc_result = await _exec_request(method=method, params=params, transport=transport)
+    return result_type.model_validate(rpc_result, strict=True)
+async def exec_notification(
+    method: str,
+    params: JSONRPCParamsType,
+    transport: JSONRPCTransport,
+) -> None:
+    """
+    Execute a JSON-RPC notification to a sandbox environment.
+    A notification is a JSON-RPC request that doesn't expect any response.
+    Args:
+      sandbox (SandboxEnvironment): The sandbox environment to execute the notification in.
+      method (str): The JSON-RPC method to call.
+      params (JSONRPCParamsType): The parameters for the JSON-RPC method.
+      transport (JSONRPCTransport): The transport callable to use for the RPC communication.
+    Returns:
+      None: The function always returns None if successful.
+    Raises:
+      RuntimeError: If the sandbox execution fails or if there is an unexpected response to the notification.
+    """
+    stdout = await transport(
+        method=method,
+        params=params,
+        is_notification=True,
+    )
+    if stdout.strip():
+        raise RuntimeError(
+            f"Unexpected response to a Notification: {_rpc_call_description(method, params)}: {stdout}"
+        )
+async def _exec_request(
+    *,
+    method: str,
+    params: JSONRPCParamsType,
+    transport: JSONRPCTransport,
+) -> object:
+    """Execute a request using the provided transport mechanism."""
+    return parse_json_rpc_response(
+        await transport(
+            method=method,
+            params=params,
+            is_notification=False,
+        ),
+        method,
+        params,
+    )
+def parse_json_rpc_response(
+    response_str: str,
+    method: str,
+    params: JSONRPCParamsType,
+) -> object:
+    """Validates the JSON RPC response and returns the result or raises a proper Inspect error."""
+    match JSONRPCResponse.model_validate_json(response_str).root:
+        case JSONRPCSuccessResponse(result=rpc_result):
+            return rpc_result
+        case JSONRPCErrorResponse(
+            error=JSONRPCError(code=code, message=message, data=_)
+        ):
+            raise exception_for_rpc_response_error(code, message, method, params)
+        case _:
+            raise ValueError(
+                f"Unexpected JSON RPC response to request {_rpc_call_description(method, params)}: {response_str}"
+            )
+def exception_for_rpc_response_error(
+    code: int,
+    message: str,
+    method: str,
+    params: JSONRPCParamsType,
+    server_error_mapper: JSONRPCServerErrorMapper | None = None,
+) -> Exception:
+    """Maps JSON-RPC error codes to Inspect tool related exceptions."""
+    # code    message           meaning
+    # -32000
+    #    |    Server error      Reserved for implementation-defined server-errors.
+    # -32099
+    # -32600  Invalid Request   The JSON sent is not a valid Request object.
+    # -32601  Method not found  The method does not exist / is not available.
+    # -32602  Invalid params    Invalid method parameter(s).
+    # -32603  Internal error    Internal JSON-RPC error.
+    # -32700  Parse error       Invalid JSON was received by the server. An error occurred on the server while parsing the JSON text.
+    if -32099 <= code <= -32000:
+        # This range is server defined. This layer has no idea what server was
+        # called, so if special mapping is needed, it must be provided by the
+        # caller.
+        return (
+            server_error_mapper(code, message, method, params)
+            if server_error_mapper
+            else ToolError(message)
+        )
+    elif code == -32603:
+        return ToolError(message)
+    else:
+        # -32600 (Invalid Request)
+        #   If we sent a bogus request, it's 100% a code bug.
+        # -32601 (Method not found)
+        # -32602 (Invalid params)
+        #   These shouldn't be possible since Inspect did validation prior to
+        #   making the tool call. Because of that, these errors should not make
+        #   it back to the model, so choose RuntimeError.
+        # -32700 (Parse error)
+        #   shouldn't be seen in this flow since we're processing responses, and
+        #   this is a request oriented error.
+        #
+        return RuntimeError(
+            f"Error executing tool command{f'  {_rpc_call_description(method, params)}' if method and params else ''}: {code=} {message}"
+        )
+def _rpc_call_description(method: str, params: JSONRPCParamsType) -> str:
+    """
+    Generate a string description of an RPC call.
+    Args:
+        method (str): The name of the RPC method.
+        params (JSONRPCParamsType): The parameters for the RPC method.
+    Returns:
+        str: A string description of the RPC call.
+    Examples:
+        >>> _rpc_call_description("subtract", {"minuend": 42, "subtrahend": 23})
+        'subtract(minuend: 42, subtrahend: 23)'
+        >>> _rpc_call_description("subtract", (42, 23))
+        'subtract(42, 23)'
+    """
+    normalized_params = (
+        ""
+        if params is None
+        else list(map(str, params))
+        if isinstance(params, list)
+        else [f"{k}: {v}" for k, v in params.items()]
+    )
+    return f"{method}({', '.join(normalized_params)})"
+id_generator = count(666)
+def create_json_rpc_request(
+    method: str,
+    params: JSONRPCParamsType,
+    is_notification: bool,
+) -> str:
+    return json.dumps(
+        {
+            "jsonrpc": "2.0",
+            "method": method,
+            **({"params": params} if params else {}),
+            **({"id": next(id_generator)} if not is_notification else {}),
+        }
+    )

inspect_ai/tool/_mcp/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from ._types import MCPServer
+from .connection import mcp_connection
+from .server import mcp_server_sandbox, mcp_server_sse, mcp_server_stdio
+from .tools import mcp_tools
+__all__ = [
+    "mcp_tools",
+    "mcp_server_stdio",
+    "mcp_server_sse",
+    "mcp_server_sandbox",
+    "mcp_connection",
+    "MCPServer",
+]

inspect_ai/tool/_mcp/_context.py ADDED Viewed

@@ -0,0 +1,14 @@
+from contextlib import _AsyncGeneratorContextManager
+from typing import TypeAlias
+from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
+from mcp.types import (
+    JSONRPCMessage,
+)
+MCPServerContext: TypeAlias = _AsyncGeneratorContextManager[
+    tuple[
+        MemoryObjectReceiveStream[JSONRPCMessage | Exception],
+        MemoryObjectSendStream[JSONRPCMessage],
+    ],
+]

inspect_ai/tool/_mcp/_mcp.py ADDED Viewed

@@ -0,0 +1,293 @@
+import contextlib
+import sys
+from contextlib import AsyncExitStack
+from fnmatch import fnmatch
+from logging import getLogger
+from pathlib import Path
+from typing import Any, AsyncIterator, Callable, Literal
+import anyio
+from mcp import McpError
+from mcp.client.session import ClientSession, SamplingFnT
+from mcp.client.sse import sse_client
+from mcp.client.stdio import StdioServerParameters, stdio_client
+from mcp.types import (
+    EmbeddedResource,
+    ImageContent,
+    TextContent,
+    TextResourceContents,
+)
+from mcp.types import Tool as MCPTool
+from typing_extensions import override
+from inspect_ai._util.format import format_function_call
+from inspect_ai._util.trace import trace_action
+from inspect_ai.tool._json_rpc_helpers import exception_for_rpc_response_error
+from inspect_ai.tool._tool import Tool, ToolError, ToolResult
+from inspect_ai.tool._tool_def import ToolDef
+from inspect_ai.tool._tool_params import ToolParams
+from ._context import MCPServerContext
+from ._sandbox import sandbox_client
+from ._types import MCPServer
+from .sampling import as_inspect_content, sampling_fn
+# https://github.com/modelcontextprotocol/python-sdk/pull/401
+# https://github.com/modelcontextprotocol/python-sdk/pull/361
+# https://github.com/modelcontextprotocol/python-sdk/pull/289
+logger = getLogger(__name__)
+class MCPServerImpl(MCPServer):
+    def __init__(
+        self, client: Callable[[], MCPServerContext], *, name: str, events: bool
+    ) -> None:
+        super().__init__()
+        self._client = client
+        self._name = name
+        self._events = events
+    @override
+    async def _connect(self) -> None:
+        await self._task_session()._connect()
+    @override
+    async def _close(self) -> None:
+        await self._task_session()._close()
+    async def _list_tools(
+        self, tools: Literal["all"] | list[str] = "all"
+    ) -> list[Tool]:
+        return await self._task_session()._list_tools(tools)
+    # create a separate MCPServer session per async task
+    _task_sessions: dict[int, "MCPServerSession"] = {}
+    def _task_session(self) -> "MCPServerSession":
+        task_id = anyio.get_current_task().id
+        if task_id not in self._task_sessions:
+            MCPServerImpl._task_sessions[task_id] = MCPServerSession(
+                self._client, name=self._name, events=self._events
+            )
+        return MCPServerImpl._task_sessions[task_id]
+class MCPServerSession(MCPServer):
+    def __init__(
+        self, client: Callable[[], MCPServerContext], *, name: str, events: bool
+    ) -> None:
+        super().__init__()
+        self._refcount = 0
+        self._client = client
+        self._name = name
+        self._events = events
+        self._session: ClientSession | None = None
+        self._exit_stack: AsyncExitStack | None = None
+        self._cached_tool_list: list[MCPTool] | None = None
+    @override
+    async def _connect(self) -> None:
+        if self._session is not None:
+            assert self._refcount > 0
+            self._refcount = self._refcount + 1
+        else:
+            assert self._refcount == 0
+            self._exit_stack = AsyncExitStack()
+            await self._exit_stack.__aenter__()
+            with trace_action(logger, "MCPServer", f"create client ({self._name})"):
+                read, write = await self._exit_stack.enter_async_context(self._client())
+            with trace_action(logger, "MCPServer", f"create session ({self._name})"):
+                self._session = await self._exit_stack.enter_async_context(
+                    ClientSession(read, write, sampling_callback=self._sampling_fn())
+                )
+            with trace_action(
+                logger, "MCPServer", f"initialize session ({self._name})"
+            ):
+                await self._session.initialize()
+            self._refcount = 1
+    @override
+    async def _close(self) -> None:
+        assert self._refcount > 0
+        self._refcount = self._refcount - 1
+        if self._refcount == 0:
+            with trace_action(logger, "MCPServer", f"disconnect ({self._name})"):
+                assert self._session is not None
+                assert self._exit_stack is not None
+                try:
+                    await self._exit_stack.aclose()
+                finally:
+                    self._session = None
+                    self._exit_stack = None
+    async def _list_tools(
+        self, tools: Literal["all"] | list[str] = "all"
+    ) -> list[Tool]:
+        if self._cached_tool_list:
+            mcp_tools = self._cached_tool_list
+        else:
+            async with self._client_session() as session:
+                # get the underlying tools on the server
+                with trace_action(logger, "MCPServer", f"list_tools {self._name}"):
+                    mcp_tools = (await session.list_tools()).tools
+                self._cached_tool_list = mcp_tools
+        # filter them
+        def include_tool(tool: MCPTool) -> bool:
+            if tools == "all":
+                return True
+            else:
+                return any([fnmatch(tool.name, t) for t in tools])
+        mcp_tools = [mcp_tool for mcp_tool in mcp_tools if include_tool(mcp_tool)]
+        # dynamically create tools
+        return [
+            self._tool_def_from_mcp_tool(mcp_tool).as_tool() for mcp_tool in mcp_tools
+        ]
+    def _tool_def_from_mcp_tool(self, mcp_tool: MCPTool) -> ToolDef:
+        async def execute(**kwargs: Any) -> ToolResult:
+            async with self._client_session() as tool_session:
+                mcp_call = format_function_call(
+                    mcp_tool.name, kwargs, width=sys.maxsize
+                )
+                with trace_action(
+                    logger, "MCPServer", f"call_tool ({self._name}): {mcp_call}"
+                ):
+                    try:
+                        result = await tool_session.call_tool(mcp_tool.name, kwargs)
+                        if result.isError:
+                            raise ToolError(tool_result_as_text(result.content))
+                    except McpError as e:
+                        # Some errors that are raised via McpError (e.g. -32603)
+                        # need to be converted to ToolError so that they make it
+                        # back to the model.
+                        raise exception_for_rpc_response_error(
+                            e.error.code, e.error.message, mcp_tool.name, kwargs
+                        ) from e
+                return [as_inspect_content(c) for c in result.content]
+        # get parameters (fill in missing ones)
+        parameters = ToolParams.model_validate(mcp_tool.inputSchema)
+        for name, param in parameters.properties.items():
+            param.description = param.description or name
+        return ToolDef(
+            execute,
+            name=mcp_tool.name,
+            description=mcp_tool.description,
+            parameters=parameters,
+        )
+    # if we have been entered as a context manager then return that session,
+    # otherwise, create a brand new session from the client
+    @contextlib.asynccontextmanager
+    async def _client_session(self) -> AsyncIterator[ClientSession]:
+        # if _connect has been previously called and we still have the connection
+        # to the session, we can just return nit
+        if self._session is not None:
+            yield self._session
+        # otherwise, create a new session and yield it (it will be cleaned up
+        # when the context manager exits)
+        else:
+            async with AsyncExitStack() as exit_stack:
+                with trace_action(logger, "MCPServer", f"create client ({self._name})"):
+                    read, write = await exit_stack.enter_async_context(self._client())
+                with trace_action(
+                    logger, "MCPServer", f"create session ({self._name})"
+                ):
+                    session = await exit_stack.enter_async_context(
+                        ClientSession(
+                            read, write, sampling_callback=self._sampling_fn()
+                        )
+                    )
+                with trace_action(
+                    logger, "MCPServer", f"initialize session ({self._name})"
+                ):
+                    await session.initialize()
+                yield session
+    def _sampling_fn(self) -> SamplingFnT | None:
+        from inspect_ai.model._model import active_model
+        if self._events and active_model() is not None:
+            return sampling_fn
+        else:
+            return None
+def create_server_sse(
+    url: str,
+    headers: dict[str, Any] | None = None,
+    timeout: float = 5,
+    sse_read_timeout: float = 60 * 5,
+) -> MCPServer:
+    return MCPServerImpl(
+        lambda: sse_client(url, headers, timeout, sse_read_timeout),
+        name=url,
+        events=True,
+    )
+def create_server_stdio(
+    command: str,
+    args: list[str] = [],
+    cwd: str | Path | None = None,
+    env: dict[str, str] | None = None,
+) -> MCPServer:
+    return MCPServerImpl(
+        lambda: stdio_client(
+            StdioServerParameters(
+                command=command,
+                args=args,
+                cwd=cwd,
+                env=env,
+            )
+        ),
+        name=" ".join([command] + args),
+        events=True,
+    )
+def create_server_sandbox(
+    command: str,
+    args: list[str] = [],
+    cwd: str | Path | None = None,
+    env: dict[str, str] | None = None,
+    sandbox: str | None = None,
+) -> MCPServer:
+    # TODO: Confirm the lifetime concepts. By the time a request makes it to the
+    # sandbox, it's going to need both a session id and a server "name".
+    name = " ".join([command] + args)
+    return MCPServerImpl(
+        lambda: sandbox_client(
+            StdioServerParameters(
+                command=command,
+                args=args,
+                cwd=cwd,
+                env=env,
+            ),
+            sandbox_name=sandbox,
+        ),
+        name=name,
+        events=False,
+    )
+def tool_result_as_text(
+    content: list[TextContent | ImageContent | EmbeddedResource],
+) -> str:
+    content_list: list[str] = []
+    for c in content:
+        if isinstance(c, TextContent):
+            content_list.append(c.text)
+        elif isinstance(c, ImageContent):
+            content_list.append("(base64 encoded image ommitted)")
+        elif isinstance(c.resource, TextResourceContents):
+            content_list.append(c.resource.text)
+    return "\n\n".join(content_list)

inspect-ai 0.3.90__py3-none-any.whl → 0.3.92__py3-none-any.whl

inspect-ai 0.3.90py3-none-any.whl → 0.3.92py3-none-any.whl