PyPI - inspect-ai - Versions diffs - 0.3.75__py3-none-any.whl → 0.3.77__py3-none-any.whl - Mend

inspect-ai 0.3.75py3-none-any.whl → 0.3.77py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

inspect_ai/_cli/eval.py +16 -0
inspect_ai/_display/core/results.py +6 -1
inspect_ai/_eval/eval.py +8 -1
inspect_ai/_eval/evalset.py +6 -2
inspect_ai/_eval/registry.py +3 -5
inspect_ai/_eval/run.py +7 -2
inspect_ai/_eval/task/run.py +4 -0
inspect_ai/_util/content.py +3 -0
inspect_ai/_util/logger.py +3 -0
inspect_ai/_view/www/dist/assets/index.css +28 -16
inspect_ai/_view/www/dist/assets/index.js +4811 -4609
inspect_ai/_view/www/log-schema.json +79 -9
inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +22 -4
inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +1 -1
inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +1 -1
inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -2
inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +1 -1
inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +2 -2
inspect_ai/_view/www/src/types/log.d.ts +11 -5
inspect_ai/log/_recorders/json.py +8 -0
inspect_ai/log/_transcript.py +13 -4
inspect_ai/model/_call_tools.py +13 -4
inspect_ai/model/_chat_message.py +3 -0
inspect_ai/model/_model.py +5 -1
inspect_ai/model/_model_output.py +6 -1
inspect_ai/model/_openai.py +78 -10
inspect_ai/model/_openai_responses.py +277 -0
inspect_ai/model/_providers/anthropic.py +134 -75
inspect_ai/model/_providers/azureai.py +2 -2
inspect_ai/model/_providers/mistral.py +29 -13
inspect_ai/model/_providers/openai.py +64 -57
inspect_ai/model/_providers/openai_responses.py +177 -0
inspect_ai/model/_providers/openrouter.py +52 -2
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/vertex.py +5 -2
inspect_ai/tool/__init__.py +6 -0
inspect_ai/tool/_tool.py +23 -3
inspect_ai/tool/_tool_call.py +5 -2
inspect_ai/tool/_tool_support_helpers.py +200 -0
inspect_ai/tool/_tools/_bash_session.py +119 -0
inspect_ai/tool/_tools/_computer/_computer.py +1 -1
inspect_ai/tool/_tools/_text_editor.py +121 -0
inspect_ai/tool/_tools/_think.py +48 -0
inspect_ai/tool/_tools/_web_browser/_back_compat.py +150 -0
inspect_ai/tool/_tools/_web_browser/_web_browser.py +75 -130
inspect_ai/tool/_tools/_web_search.py +1 -1
inspect_ai/util/_json.py +28 -0
inspect_ai/util/_sandbox/context.py +16 -7
inspect_ai/util/_sandbox/docker/config.py +1 -1
inspect_ai/util/_sandbox/docker/internal.py +3 -3
{inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/METADATA +5 -2
{inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/RECORD +56 -80
{inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/WHEEL +1 -1
inspect_ai/model/_image.py +0 -15
inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +0 -8
inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +0 -24
inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +0 -25
inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +0 -22
inspect_ai/tool/_tools/_web_browser/_resources/README.md +0 -63
inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +0 -71
inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +0 -323
inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +0 -5
inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +0 -279
inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +0 -9
inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +0 -293
inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +0 -94
inspect_ai/tool/_tools/_web_browser/_resources/constants.py +0 -2
inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +0 -2
inspect_ai/tool/_tools/_web_browser/_resources/mock_environment.py +0 -45
inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +0 -50
inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +0 -48
inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +0 -280
inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +0 -65
inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +0 -64
inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +0 -146
inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +0 -64
inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +0 -180
inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +0 -99
inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +0 -15
inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +0 -44
inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +0 -39
inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +0 -214
inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +0 -35
inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +0 -192
{inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info/licenses}/LICENSE +0 -0
{inspect_ai-0.3.75.dist-info → inspect_ai-0.3.77.dist-info}/top_level.txt +0 -0

inspect_ai/tool/_tools/_computer/_computer.py CHANGED Viewed

@@ -15,7 +15,7 @@ ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
 def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool:
     """Desktop computer tool.
-    See documentation at <https://inspect.aisi.org.uk/tools.html#sec-computer>.
+    See documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-computer>.
     Args:
       max_screenshots: The maximum number of screenshots to play

inspect_ai/tool/_tools/_text_editor.py ADDED Viewed

@@ -0,0 +1,121 @@
+import inspect
+from typing import Annotated, Literal
+from pydantic import BaseModel, Discriminator, RootModel
+from inspect_ai.tool import ToolResult
+from inspect_ai.tool._tool_support_helpers import (
+    exec_sandbox_rpc,
+    tool_container_sandbox,
+)
+from .._tool import Tool, tool
+# These models are cloned from the container code. If/when we decide to create
+# a package that is shared between the inspect and tool-container codebases, we'll
+# just have to live with it.
+class BaseParams(BaseModel):
+    path: str
+class ViewParams(BaseParams):
+    command: Literal["view"] = "view"
+    view_range: list[int] | None = None
+class CreateParams(BaseParams):
+    command: Literal["create"] = "create"
+    file_text: str
+class StrReplaceParams(BaseParams):
+    command: Literal["str_replace"] = "str_replace"
+    old_str: str
+    new_str: str | None = None
+class InsertParams(BaseParams):
+    command: Literal["insert"] = "insert"
+    insert_line: int
+    new_str: str
+class UndoEditParams(BaseParams):
+    command: Literal["undo_edit"] = "undo_edit"
+class TextEditorParams(
+    RootModel[
+        ViewParams | CreateParams | StrReplaceParams | InsertParams | UndoEditParams
+    ]
+):
+    root: Annotated[
+        ViewParams | CreateParams | StrReplaceParams | InsertParams | UndoEditParams,
+        Discriminator("command"),
+    ]
+TextEditorResult = str
+@tool()
+def text_editor(timeout: int | None = None, user: str | None = None) -> Tool:
+    """Custom editing tool for viewing, creating and editing files.
+    Perform text editor operations using a sandbox environment (e.g. "docker").
+    IMPORTANT: This tool does not currently support Subtask isolation. This means
+    that a change made to a file by on Subtask will be visible to another Subtask.
+    Args:
+      timeout: Timeout (in seconds) for command.
+      user: User to execute commands as.
+    Returns:
+      String with command output (stdout) or command error (stderr).
+    """
+    async def execute(
+        command: Literal["view", "create", "str_replace", "insert", "undo_edit"],
+        path: str,
+        file_text: str | None = None,
+        insert_line: int | None = None,
+        new_str: str | None = None,
+        old_str: str | None = None,
+        view_range: list[int] | None = None,
+    ) -> ToolResult:
+        """
+        Use this function to execute text editing commands.
+        Args:
+          command: The command to execute.
+          path: Absolute path to file or directory, e.g. `/repo/file.py` or `/repo`.
+          file_text: Required parameter of `create` command, with the content of the file to be created.
+          insert_line: Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.
+          new_str: Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.
+          old_str: Required parameter of `str_replace` command containing the string in `path` to replace.
+          view_range: Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.
+        Returns:
+          The output of the command.
+        """
+        sandbox = await tool_container_sandbox("editor")
+        # Create a dictionary of the parameters
+        params = {
+            k: v
+            for k, v in locals().items()
+            if k in inspect.signature(execute).parameters
+        }
+        return await exec_sandbox_rpc(
+            sandbox,
+            "text_editor",
+            params,
+            TextEditorResult,
+            timeout=timeout,
+        )
+    return execute

inspect_ai/tool/_tools/_think.py ADDED Viewed

@@ -0,0 +1,48 @@
+from .._tool import Tool, tool
+from .._tool_call import ToolCall, ToolCallContent, ToolCallView, ToolCallViewer
+from .._tool_def import ToolDef
+@tool
+def think(
+    description: str | None = None,
+    thought_description: str | None = None,
+) -> Tool:
+    """Think tool for extra thinking.
+    Tool that provides models with the ability to include an additional thinking step as part of getting to its final answer.
+    Note that the `think()` tool is not a substitute for reasoning and extended thinking, but rather an an alternate way of letting models express thinking that is better suited to some tool use scenarios. Please see the documentation on using the [think tool](https://inspect.aisi.org.uk/tools-standard.html#sec-think) before using it in your evaluations.
+    Args:
+        description: Override the default description of the think tool.
+        thought_description: Override the default description of the thought parameter.
+    """
+    async def execute(thought: str) -> str:
+        """Use the tool to think about something.
+        The will not obtain new information or change the environment, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed."
+        Args:
+            thought: A thought to think about.
+        """
+        return ""
+    return ToolDef(
+        execute,
+        name="think",
+        description=description,
+        parameters=(dict(thought=thought_description) if thought_description else None),
+        viewer=think_tool_viewer(),
+    ).as_tool()
+def think_tool_viewer() -> ToolCallViewer:
+    def viewer(tool_call: ToolCall) -> ToolCallView:
+        call = ToolCallContent(
+            format="markdown", content=tool_call.arguments["thought"]
+        )
+        return ToolCallView(call=call)
+    return viewer

inspect_ai/tool/_tools/_web_browser/_back_compat.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""This module provides the "old" client code for running against the, now deprecated, `aisiuk/inspect-web-browser-tool` image."""
+import re
+from logging import getLogger
+from textwrap import dedent
+from pydantic import Field
+from inspect_ai._util.content import ContentText
+from inspect_ai._util.error import PrerequisiteError
+from inspect_ai._util.logger import warn_once
+from inspect_ai.tool import ToolError, ToolResult
+from inspect_ai.util import SandboxEnvironment, StoreModel, sandbox_with, store_as
+from inspect_ai.util._sandbox.docker.internal import (
+    INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB_DEPRECATED,
+)
+logger = getLogger("web_browser")
+WEB_CLIENT_REQUEST = "/app/web_browser/web_client.py"
+WEB_CLIENT_NEW_SESSION = "/app/web_browser/web_client_new_session.py"
+class WebBrowserStore(StoreModel):
+    main_content: str = Field(default_factory=str)
+    web_at: str = Field(default_factory=str)
+    session_id: str = Field(default_factory=str)
+async def old_web_browser_cmd(cmd: str, *args: str) -> ToolResult:
+    sandbox_env = await _web_browser_sandbox()
+    warn_once(
+        logger,
+        "WARNING: Use of the `aisiuk/inspect-web-browser-tool` image is deprecated. Please update your configuration to use the `aisiuk/inspect-tool-support` image or install the `inspect-tool-support` package into your own image.",
+    )
+    store = store_as(WebBrowserStore)
+    if not store.session_id:
+        result = await sandbox_env.exec(
+            ["python3", WEB_CLIENT_NEW_SESSION], timeout=180
+        )
+        if not result.success:
+            raise RuntimeError(
+                f"Error creating new web browser session: {result.stderr}"
+            )
+        store.session_id = result.stdout.strip("\n")
+    session_flag = f"--session_name={store.session_id}"
+    arg_list = None
+    if session_flag:
+        arg_list = ["python3", WEB_CLIENT_REQUEST, session_flag, cmd] + list(args)
+    else:
+        arg_list = ["python3", WEB_CLIENT_REQUEST, cmd] + list(args)
+    result = await sandbox_env.exec(arg_list, timeout=180)
+    if not result.success:
+        raise RuntimeError(
+            f"Error executing web browser command {cmd}({', '.join(args)}): {result.stderr}"
+        )
+    else:
+        response = _parse_web_browser_output(result.stdout)
+        if "error" in response and response.get("error", "").strip() != "":
+            raise ToolError(str(response.get("error")) or "(unknown error)")
+        elif "web_at" in response:
+            main_content = str(response.get("main_content")) or None
+            web_at = (
+                str(response.get("web_at")) or "(no web accessibility tree available)"
+            )
+            # Remove base64 data from images.
+            web_at_lines = web_at.split("\n")
+            web_at_lines = [
+                line.partition("data:image/png;base64")[0] for line in web_at_lines
+            ]
+            store_as(WebBrowserStore).main_content = (
+                main_content or "(no main text summary)"
+            )
+            store_as(WebBrowserStore).web_at = web_at
+            web_at = "\n".join(web_at_lines)
+            return (
+                [
+                    ContentText(text=f"main content:\n{main_content}\n\n"),
+                    ContentText(text=f"accessibility tree:\n{web_at}"),
+                ]
+                if main_content
+                else web_at
+            )
+        else:
+            raise RuntimeError(
+                f"web_browser output must contain either 'error' or 'web_at' field: {result.stdout}"
+            )
+async def _web_browser_sandbox() -> SandboxEnvironment:
+    sb = await sandbox_with(WEB_CLIENT_REQUEST)
+    if sb:
+        return sb
+    else:
+        msg = dedent(f"""
+                The web browser service was not found in any of the sandboxes for this sample. Please add the web browser service to your configuration. For example, the following Docker compose file uses the {INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB_DEPRECATED} image as its default sandbox:
+                services:
+                  default:
+                    image: "{INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB_DEPRECATED}"
+                    init: true
+                Alternatively, this Docker compose file creates a dedicated image for the web browser service:
+                services:
+                  default:
+                    image: "python:3.12-bookworm"
+                    init: true
+                    command: "tail -f /dev/null"
+                  web_browser:
+                    image: "{INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB_DEPRECATED}"
+                    init: true
+                """).strip()
+        raise PrerequisiteError(msg)
+def _parse_web_browser_output(output: str) -> dict[str, str]:
+    response: dict[str, str] = dict(
+        web_url="", main_content="", web_at="", info="", error=""
+    )
+    active_field: str | None = None
+    active_field_lines: list[str] = []
+    def collect_active_field() -> None:
+        if active_field is not None:
+            response[active_field] = "\n".join(active_field_lines)
+        active_field_lines.clear()
+    for line in output.splitlines():
+        field_match = re.match(
+            r"^(error|main_content|web_at|web_url|info)\s*:\s*(.+)$", line
+        )
+        if field_match:
+            collect_active_field()
+            active_field = field_match.group(1)
+            active_field_lines.append(field_match.group(2))
+        else:
+            active_field_lines.append(line)
+    collect_active_field()
+    return response

inspect_ai/tool/_tools/_web_browser/_web_browser.py CHANGED Viewed

@@ -1,23 +1,40 @@
 import re
-from textwrap import dedent
-from pydantic import Field
+from pydantic import BaseModel, Field
 from inspect_ai._util.content import ContentText
 from inspect_ai._util.error import PrerequisiteError
 from inspect_ai.tool._tool import Tool, ToolError, ToolResult, tool
 from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
 from inspect_ai.tool._tool_info import parse_tool_info
+from inspect_ai.tool._tool_support_helpers import (
+    exec_sandbox_rpc,
+    tool_container_sandbox,
+)
 from inspect_ai.tool._tool_with import tool_with
-from inspect_ai.util._sandbox import SandboxEnvironment, sandbox_with
-from inspect_ai.util._sandbox.docker.internal import INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB
 from inspect_ai.util._store_model import StoreModel, store_as
+from ._back_compat import old_web_browser_cmd
+# These two models are cloned from the container code. If/when we decide to create
+# a package that is shared between the inspect and tool-container codebases, we'll
+# just have to live with it.
+class NewSessionResult(BaseModel):
+    session_name: str
+class CrawlerResult(BaseModel):
+    web_url: str
+    main_content: str | None = None
+    web_at: str
+    error: str | None = None
 def web_browser(interactive: bool = True) -> list[Tool]:
     """Tools used for web browser navigation.
-     See documentation at <https://inspect.aisi.org.uk/tools.html#sec-web-browser>.
+     See documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-browser>.
     Args:
        interactive: Provide interactive tools (enable
@@ -85,7 +102,7 @@ def web_browser_go() -> Tool:
         Returns:
           Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await web_browser_cmd("web_go", url)
+        return await _web_browser_cmd("web_go", locals())
     return execute
@@ -165,7 +182,7 @@ def web_browser_click() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await web_browser_cmd("web_click", str(element_id))
+        return await _web_browser_cmd("web_click", locals())
     return execute
@@ -203,7 +220,7 @@ def web_browser_type_submit() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await web_browser_cmd("web_type_submit", str(element_id), text)
+        return await _web_browser_cmd("web_type_submit", locals())
     return execute
@@ -241,7 +258,7 @@ def web_browser_type() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await web_browser_cmd("web_type", str(element_id), text)
+        return await _web_browser_cmd("web_type", locals())
     return execute
@@ -271,7 +288,7 @@ def web_browser_scroll() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await web_browser_cmd("web_scroll", direction)
+        return await _web_browser_cmd("web_scroll", locals())
     return execute
@@ -292,7 +309,7 @@ def web_browser_back() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await web_browser_cmd("web_back")
+        return await _web_browser_cmd("web_back", locals())
     return execute
@@ -313,7 +330,7 @@ def web_browser_forward() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await web_browser_cmd("web_forward")
+        return await _web_browser_cmd("web_forward", locals())
     return execute
@@ -334,133 +351,61 @@ def web_browser_refresh() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await web_browser_cmd("web_refresh")
+        return await _web_browser_cmd("web_refresh", locals())
     return execute
-WEB_CLIENT_REQUEST = "/app/web_browser/web_client.py"
-WEB_CLIENT_NEW_SESSION = "/app/web_browser/web_client_new_session.py"
-async def web_browser_cmd(cmd: str, *args: str) -> ToolResult:
-    sandbox_env = await sandbox_with(WEB_CLIENT_NEW_SESSION)
-    session_flag = ""
-    if sandbox_env:
-        store = store_as(WebBrowserStore)
-        if not store.session_id:
-            result = await sandbox_env.exec(
-                ["python3", WEB_CLIENT_NEW_SESSION], timeout=180
+async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolResult:
+    try:
+        sandbox_env = await tool_container_sandbox("web browser")
+    except PrerequisiteError as e:
+        # The user may have the old, incompatible, sandbox. If so, use that and
+        # execute the old compatible code.
+        try:
+            return await old_web_browser_cmd(tool_name, *params)
+        except PrerequisiteError:
+            raise e
+    store = store_as(WebBrowserStore)
+    if not store.session_id:
+        store.session_id = (
+            await exec_sandbox_rpc(
+                sandbox_env,
+                "web_new_session",
+                {"headful": False},
+                NewSessionResult,
             )
+        ).session_name
-            if not result.success:
-                raise RuntimeError(
-                    f"Error creating new web browser session: {result.stderr}"
-                )
-            store.session_id = result.stdout.strip("\n")
+    params["session_name"] = store.session_id
-        session_flag = f"--session_name={store.session_id}"
-    else:
-        sandbox_env = await web_browser_sandbox()
-    arg_list = None
-    if session_flag:
-        arg_list = ["python3", WEB_CLIENT_REQUEST, session_flag, cmd] + list(args)
+    crawler_result = await exec_sandbox_rpc(
+        sandbox_env, tool_name, params, CrawlerResult
+    )
+    if crawler_result.error and crawler_result.error.strip() != "":
+        raise ToolError(crawler_result.error)
     else:
-        arg_list = ["python3", WEB_CLIENT_REQUEST, cmd] + list(args)
+        main_content = crawler_result.main_content
+        web_at = crawler_result.web_at or "(no web accessibility tree available)"
+        # Remove base64 data from images.
+        web_at_lines = web_at.split("\n")
+        web_at_lines = [
+            line.partition("data:image/png;base64")[0] for line in web_at_lines
+        ]
-    result = await sandbox_env.exec(arg_list, timeout=180)
-    if not result.success:
-        raise RuntimeError(
-            f"Error executing web browser command {cmd}({', '.join(args)}): {result.stderr}"
+        store_as(WebBrowserStore).main_content = (
+            main_content or "(no main text summary)"
         )
-    else:
-        response = parse_web_browser_output(result.stdout)
-        if "error" in response and response.get("error", "").strip() != "":
-            raise ToolError(str(response.get("error")) or "(unknown error)")
-        elif "web_at" in response:
-            main_content = str(response.get("main_content")) or None
-            web_at = (
-                str(response.get("web_at")) or "(no web accessibility tree available)"
-            )
-            # Remove base64 data from images.
-            web_at_lines = web_at.split("\n")
-            web_at_lines = [
-                line.partition("data:image/png;base64")[0] for line in web_at_lines
-            ]
+        store_as(WebBrowserStore).web_at = web_at
-            store_as(WebBrowserStore).main_content = (
-                main_content or "(no main text summary)"
-            )
-            store_as(WebBrowserStore).web_at = web_at
-            web_at = "\n".join(web_at_lines)
-            return (
-                [
-                    ContentText(text=f"main content:\n{main_content}\n\n"),
-                    ContentText(text=f"accessibility tree:\n{web_at}"),
-                ]
-                if main_content
-                else web_at
-            )
-        else:
-            raise RuntimeError(
-                f"web_browser output must contain either 'error' or 'web_at' field: {result.stdout}"
-            )
-async def web_browser_sandbox() -> SandboxEnvironment:
-    sb = await sandbox_with(WEB_CLIENT_REQUEST)
-    if sb:
-        return sb
-    else:
-        msg = dedent(f"""
-                The web browser service was not found in any of the sandboxes for this sample. Please add the web browser service to your configuration. For example, the following Docker compose file uses the {INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB} image as its default sandbox:
-                services:
-                  default:
-                    image: "{INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB}"
-                    init: true
-                Alternatively, this Docker compose file creates a dedicated image for the web browser service:
-                services:
-                  default:
-                    image: "python:3.12-bookworm"
-                    init: true
-                    command: "tail -f /dev/null"
-                  web_browser:
-                    image: "{INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB}"
-                    init: true
-                """).strip()
-        raise PrerequisiteError(msg)
-def parse_web_browser_output(output: str) -> dict[str, str]:
-    response: dict[str, str] = dict(
-        web_url="", main_content="", web_at="", info="", error=""
-    )
-    active_field: str | None = None
-    active_field_lines: list[str] = []
-    def collect_active_field() -> None:
-        if active_field is not None:
-            response[active_field] = "\n".join(active_field_lines)
-        active_field_lines.clear()
-    for line in output.splitlines():
-        field_match = re.match(
-            r"^(error|main_content|web_at|web_url|info)\s*:\s*(.+)$", line
+        web_at = "\n".join(web_at_lines)
+        return (
+            [
+                ContentText(text=f"main content:\n{main_content}\n\n"),
+                ContentText(text=f"accessibility tree:\n{web_at}"),
+            ]
+            if main_content
+            else web_at
         )
-        if field_match:
-            collect_active_field()
-            active_field = field_match.group(1)
-            active_field_lines.append(field_match.group(2))
-        else:
-            active_field_lines.append(line)
-    collect_active_field()
-    return response

inspect_ai/tool/_tools/_web_search.py CHANGED Viewed

@@ -52,7 +52,7 @@ def web_search(
     A web search is conducted using the specified provider, the results are parsed for relevance
     using the specified model, and the top 'num_results' relevant pages are returned.
-    See further documentation at <https://inspect.aisi.org.uk/tools.html#sec-web-search>.
+    See further documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-search>.
     Args:
       provider: Search provider (defaults to "google", currently

inspect_ai/util/_json.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import types
 import typing
+from copy import deepcopy
 from dataclasses import is_dataclass
 from typing import (
     Any,
@@ -10,6 +11,7 @@ from typing import (
     Tuple,
     Type,
     Union,
+    cast,
     get_args,
     get_origin,
     get_type_hints,
@@ -127,6 +129,7 @@ def cls_json_schema(cls: Type[Any]) -> JSONSchema:
                 required.append(name)
     elif isinstance(cls, type) and issubclass(cls, BaseModel):
         schema = cls.model_json_schema()
+        schema = resolve_schema_references(schema)
         for name, prop in schema.get("properties", {}).items():
             properties[name] = JSONSchema(**prop)
         required = schema.get("required", [])
@@ -168,3 +171,28 @@ def python_type_to_json_type(python_type: str | None) -> JSONType:
             raise ValueError(
                 f"Unsupported type: {python_type} for Python to JSON conversion."
             )
+def resolve_schema_references(schema: dict[str, Any]) -> dict[str, Any]:
+    """Resolves all $ref references in a JSON schema by inlining the definitions."""
+    schema = deepcopy(schema)
+    definitions = schema.pop("$defs", {})
+    def _resolve_refs(obj: Any) -> Any:
+        if isinstance(obj, dict):
+            if "$ref" in obj and obj["$ref"].startswith("#/$defs/"):
+                ref_key = obj["$ref"].split("/")[-1]
+                if ref_key in definitions:
+                    # Replace with a deep copy of the definition
+                    resolved = deepcopy(definitions[ref_key])
+                    # Process any nested references in the definition
+                    return _resolve_refs(resolved)
+            # Process all entries in the dictionary
+            return {k: _resolve_refs(v) for k, v in obj.items()}
+        elif isinstance(obj, list):
+            return [_resolve_refs(item) for item in obj]
+        else:
+            return obj
+    return cast(dict[str, Any], _resolve_refs(schema))

inspect-ai 0.3.75__py3-none-any.whl → 0.3.77__py3-none-any.whl

inspect-ai 0.3.75py3-none-any.whl → 0.3.77py3-none-any.whl