PyPI - inspect-ai - Versions diffs - 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl - Mend

inspect-ai 0.3.58py3-none-any.whl → 0.3.60py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

inspect_ai/_cli/common.py +3 -1
inspect_ai/_cli/eval.py +15 -9
inspect_ai/_display/core/active.py +4 -1
inspect_ai/_display/core/config.py +3 -3
inspect_ai/_display/core/panel.py +7 -3
inspect_ai/_display/plain/__init__.py +0 -0
inspect_ai/_display/plain/display.py +203 -0
inspect_ai/_display/rich/display.py +0 -5
inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
inspect_ai/_display/textual/widgets/samples.py +79 -12
inspect_ai/_display/textual/widgets/sandbox.py +37 -0
inspect_ai/_eval/eval.py +10 -1
inspect_ai/_eval/loader.py +79 -19
inspect_ai/_eval/registry.py +6 -0
inspect_ai/_eval/score.py +3 -1
inspect_ai/_eval/task/results.py +51 -22
inspect_ai/_eval/task/run.py +47 -13
inspect_ai/_eval/task/sandbox.py +10 -5
inspect_ai/_util/constants.py +1 -0
inspect_ai/_util/port_names.py +61 -0
inspect_ai/_util/text.py +23 -0
inspect_ai/_view/www/App.css +31 -1
inspect_ai/_view/www/dist/assets/index.css +31 -1
inspect_ai/_view/www/dist/assets/index.js +25498 -2044
inspect_ai/_view/www/log-schema.json +32 -2
inspect_ai/_view/www/package.json +2 -0
inspect_ai/_view/www/src/App.mjs +14 -16
inspect_ai/_view/www/src/Types.mjs +1 -2
inspect_ai/_view/www/src/api/Types.ts +133 -0
inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
inspect_ai/_view/www/src/api/api-http.ts +219 -0
inspect_ai/_view/www/src/api/api-shared.ts +47 -0
inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
inspect_ai/_view/www/src/api/index.ts +51 -0
inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
inspect_ai/_view/www/src/index.js +77 -4
inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
inspect_ai/_view/www/src/types/log.d.ts +13 -2
inspect_ai/_view/www/src/utils/Format.mjs +10 -3
inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
inspect_ai/_view/www/src/utils/vscode.ts +36 -0
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
inspect_ai/_view/www/vite.config.js +7 -0
inspect_ai/_view/www/yarn.lock +116 -0
inspect_ai/approval/_human/__init__.py +0 -0
inspect_ai/approval/_human/manager.py +1 -1
inspect_ai/approval/_policy.py +12 -6
inspect_ai/log/_log.py +1 -1
inspect_ai/log/_samples.py +16 -0
inspect_ai/log/_transcript.py +4 -1
inspect_ai/model/_call_tools.py +59 -0
inspect_ai/model/_conversation.py +16 -7
inspect_ai/model/_generate_config.py +12 -12
inspect_ai/model/_model.py +117 -18
inspect_ai/model/_model_output.py +22 -2
inspect_ai/model/_openai.py +383 -0
inspect_ai/model/_providers/anthropic.py +152 -55
inspect_ai/model/_providers/azureai.py +21 -21
inspect_ai/model/_providers/bedrock.py +37 -40
inspect_ai/model/_providers/goodfire.py +248 -0
inspect_ai/model/_providers/google.py +46 -54
inspect_ai/model/_providers/groq.py +7 -3
inspect_ai/model/_providers/hf.py +6 -0
inspect_ai/model/_providers/mistral.py +13 -12
inspect_ai/model/_providers/openai.py +51 -218
inspect_ai/model/_providers/openai_o1.py +11 -12
inspect_ai/model/_providers/providers.py +23 -1
inspect_ai/model/_providers/together.py +12 -12
inspect_ai/model/_providers/util/__init__.py +2 -3
inspect_ai/model/_providers/util/hf_handler.py +1 -1
inspect_ai/model/_providers/util/llama31.py +1 -1
inspect_ai/model/_providers/util/util.py +0 -76
inspect_ai/model/_providers/vertex.py +1 -4
inspect_ai/scorer/_metric.py +3 -0
inspect_ai/scorer/_reducer/reducer.py +1 -1
inspect_ai/scorer/_scorer.py +4 -3
inspect_ai/solver/__init__.py +4 -5
inspect_ai/solver/_basic_agent.py +1 -1
inspect_ai/solver/_bridge/__init__.py +3 -0
inspect_ai/solver/_bridge/bridge.py +100 -0
inspect_ai/solver/_bridge/patch.py +170 -0
inspect_ai/solver/_prompt.py +35 -5
inspect_ai/solver/_solver.py +6 -0
inspect_ai/solver/_task_state.py +80 -38
inspect_ai/tool/__init__.py +2 -0
inspect_ai/tool/_tool.py +12 -1
inspect_ai/tool/_tool_call.py +10 -0
inspect_ai/tool/_tool_def.py +16 -5
inspect_ai/tool/_tool_with.py +21 -4
inspect_ai/tool/beta/__init__.py +5 -0
inspect_ai/tool/beta/_computer/__init__.py +3 -0
inspect_ai/tool/beta/_computer/_common.py +133 -0
inspect_ai/tool/beta/_computer/_computer.py +155 -0
inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/util/__init__.py +2 -0
inspect_ai/util/_display.py +5 -0
inspect_ai/util/_limit.py +26 -0
inspect_ai/util/_sandbox/docker/docker.py +64 -1
inspect_ai/util/_sandbox/docker/internal.py +3 -1
inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
inspect_ai/util/_sandbox/environment.py +14 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
inspect_ai/_view/www/src/api/Types.mjs +0 -117
inspect_ai/_view/www/src/api/api-http.mjs +0 -300
inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
inspect_ai/_view/www/src/api/index.mjs +0 -49
inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0

inspect_ai/tool/_tool.py CHANGED Viewed

@@ -24,7 +24,7 @@ from inspect_ai._util.registry import (
     registry_tag,
 )
-from ._tool_call import ToolCallViewer
+from ._tool_call import ToolCallModelInput, ToolCallViewer
 logger = getLogger(__name__)
@@ -112,6 +112,7 @@ def tool(
     *,
     name: str | None = None,
     viewer: ToolCallViewer | None = None,
+    model_input: ToolCallModelInput | None = None,
     parallel: bool = True,
     prompt: str | None = None,
 ) -> Callable[[Callable[P, Tool]], Callable[P, Tool]]: ...
@@ -122,6 +123,7 @@ def tool(
     *,
     name: str | None = None,
     viewer: ToolCallViewer | None = None,
+    model_input: ToolCallModelInput | None = None,
     parallel: bool = True,
     prompt: str | None = None,
 ) -> Callable[P, Tool] | Callable[[Callable[P, Tool]], Callable[P, Tool]]:
@@ -135,6 +137,8 @@ def tool(
             will be used as the name of the tool.
         viewer (ToolCallViewer | None): Provide a custom view
             of tool call and context.
+        model_input (ToolCallModelInput | None): Provide a custom
+            function for playing back tool results as model input.
         parallel (bool):
             Does this tool support parallel execution?
             (defaults to True).
@@ -176,6 +180,9 @@ def tool(
                         TOOL_PROMPT: prompt,
                         TOOL_PARALLEL: parallel,
                         TOOL_VIEWER: viewer,
+                        TOOL_MODEL_INPUT: (
+                            model_input or getattr(tool, TOOL_INIT_MODEL_INPUT, None)
+                        ),
                     },
                 ),
                 *args,
@@ -195,3 +202,7 @@ def tool(
 TOOL_PROMPT = "prompt"
 TOOL_PARALLEL = "parallel"
 TOOL_VIEWER = "viewer"
+TOOL_MODEL_INPUT = "model_input"
+TOOL_INIT_MODEL_INPUT = "__TOOL_INIT_MODEL_INPUT__"

inspect_ai/tool/_tool_call.py CHANGED Viewed

@@ -3,6 +3,8 @@ from typing import Any, Callable, Literal
 from pydantic import BaseModel, Field
+from inspect_ai._util.content import Content
 class ToolCallContent(BaseModel):
     """Content to include in tool call view."""
@@ -71,3 +73,11 @@ class ToolCallError:
 ToolCallViewer = Callable[[ToolCall], ToolCallView]
 """Custom view renderer for tool calls."""
+ToolCallModelInput = Callable[[int, int, str | list[Content]], str | list[Content]]
+"""Determine how tool call results are played back as model input.
+The first argument is an index into the total number of tool results
+for this tool in the message history, the second is the total number.
+"""

inspect_ai/tool/_tool_def.py CHANGED Viewed

@@ -13,8 +13,8 @@ from inspect_ai._util.registry import (
     set_registry_params,
 )
-from ._tool import TOOL_PARALLEL, TOOL_PROMPT, TOOL_VIEWER, Tool
-from ._tool_call import ToolCallViewer
+from ._tool import TOOL_MODEL_INPUT, TOOL_PARALLEL, TOOL_PROMPT, TOOL_VIEWER, Tool
+from ._tool_call import ToolCallModelInput, ToolCallViewer
 from ._tool_description import (
     ToolDescription,
     set_tool_description,
@@ -33,6 +33,7 @@ class ToolDef:
         parameters: dict[str, str] | ToolParams | None = None,
         parallel: bool | None = None,
         viewer: ToolCallViewer | None = None,
+        model_input: ToolCallModelInput | None = None,
     ) -> None:
         """Tool definition.
@@ -46,6 +47,8 @@ class ToolDef:
           parallel (bool | None): Does the tool support parallel execution
              (defaults to True if not specified)
           viewer (ToolCallViewer | None): Optional tool call viewer implementation.
+          model_input (ToolCallModelInput | None): Optional function that determines how
+              tool call results are played back as model input.
         Returns:
           Tool definition.
@@ -68,6 +71,7 @@ class ToolDef:
             parameters = parameters if parameters is not None else tdef.parameters
             self.parallel = parallel if parallel is not None else tdef.parallel
             self.viewer = viewer or tdef.viewer
+            self.model_input = model_input or tdef.model_input
         # if its not a tool then extract tool_info if all fields have not
         # been provided explicitly
@@ -97,6 +101,7 @@ class ToolDef:
             # behavioral attributes
             self.parallel = parallel is not False
             self.viewer = viewer
+            self.model_input = model_input
     tool: Callable[..., Any]
     """Callable to execute tool."""
@@ -116,6 +121,9 @@ class ToolDef:
     viewer: ToolCallViewer | None
     """Custom viewer for tool call"""
+    model_input: ToolCallModelInput | None
+    """Custom model input presenter for tool calls."""
     def as_tool(self) -> Tool:
         """Convert a ToolDef to a Tool."""
         tool = self.tool
@@ -159,11 +167,12 @@ class ToolDefFields(NamedTuple):
     parameters: ToolParams
     parallel: bool
     viewer: ToolCallViewer | None
+    model_input: ToolCallModelInput | None
 def tool_def_fields(tool: Tool) -> ToolDefFields:
     # get tool_info
-    name, prompt, parallel, viewer = tool_registry_info(tool)
+    name, prompt, parallel, viewer, model_input = tool_registry_info(tool)
     tool_info = parse_tool_info(tool)
     # if there is a description then append any prompt to the
@@ -213,15 +222,17 @@ def tool_def_fields(tool: Tool) -> ToolDefFields:
         parameters=tool_info.parameters,
         parallel=parallel,
         viewer=viewer,
+        model_input=model_input,
     )
 def tool_registry_info(
     tool: Tool,
-) -> tuple[str, str | None, bool, ToolCallViewer | None]:
+) -> tuple[str, str | None, bool, ToolCallViewer | None, ToolCallModelInput | None]:
     info = registry_info(tool)
     name = info.name.split("/")[-1]
     prompt = info.metadata.get(TOOL_PROMPT, None)
     parallel = info.metadata.get(TOOL_PARALLEL, True)
     viewer = info.metadata.get(TOOL_VIEWER, None)
-    return name, prompt, parallel, viewer
+    model_input = info.metadata.get(TOOL_MODEL_INPUT, None)
+    return name, prompt, parallel, viewer, model_input

inspect_ai/tool/_tool_with.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from copy import copy
+from copy import deepcopy
 from inspect_ai._util.registry import (
     registry_info,
@@ -6,8 +6,9 @@ from inspect_ai._util.registry import (
     set_registry_info,
     set_registry_params,
 )
+from inspect_ai.tool._tool_call import ToolCallModelInput, ToolCallViewer
-from ._tool import Tool
+from ._tool import TOOL_MODEL_INPUT, TOOL_PARALLEL, TOOL_VIEWER, Tool
 from ._tool_description import ToolDescription, set_tool_description
 from ._tool_info import parse_tool_info
@@ -17,6 +18,9 @@ def tool_with(
     name: str | None = None,
     description: str | None = None,
     parameters: dict[str, str] | None = None,
+    parallel: bool | None = None,
+    viewer: ToolCallViewer | None = None,
+    model_input: ToolCallModelInput | None = None,
 ) -> Tool:
     """Tool with modifications to name and descriptions.
@@ -25,6 +29,11 @@ def tool_with(
        name (str | None): Tool name (optional).
        description (str | None): Tool description (optional).
        parameters (dict[str,str] | None): Parameter descriptions (optional)
+       parallel (bool | None): Does the tool support parallel execution
+          (defaults to True if not specified)
+       viewer (ToolCallViewer | None): Optional tool call viewer implementation.
+       model_input (ToolCallModelInput | None): Optional function that determines how
+           tool call results are played back as model input.
     Returns:
        A copy of the passed tool with the specified descriptive information.
@@ -46,8 +55,16 @@ def tool_with(
             ]
     # copy the tool and set the descriptions on the new copy
-    tool_copy = copy(tool)
-    set_registry_info(tool_copy, registry_info(tool))
+    tool_copy = deepcopy(tool)
+    info = registry_info(tool).model_copy()
+    if parallel is not None:
+        info.metadata[TOOL_PARALLEL] = parallel
+    elif viewer is not None:
+        info.metadata[TOOL_VIEWER] = viewer
+    elif model_input is not None:
+        info.metadata[TOOL_MODEL_INPUT] = model_input
+    set_registry_info(tool_copy, info)
     set_registry_params(tool_copy, registry_params(tool))
     set_tool_description(
         tool_copy,

inspect_ai/tool/beta/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from ._computer import computer
+__all__ = [
+    "computer",
+]

inspect_ai/tool/beta/_computer/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from ._computer import computer
+__all__ = ["computer"]

inspect_ai/tool/beta/_computer/_common.py ADDED Viewed

@@ -0,0 +1,133 @@
+import json
+from textwrap import dedent
+from typing import Literal
+from pydantic import BaseModel, Field
+from inspect_ai._util.content import ContentText
+from inspect_ai._util.error import PrerequisiteError
+from inspect_ai.model import ContentImage
+from inspect_ai.tool import ToolError, ToolResult
+from inspect_ai.util._sandbox.context import sandbox_with
+from inspect_ai.util._sandbox.environment import SandboxEnvironment
+Action = Literal[
+    "key",
+    "type",
+    "mouse_move",
+    "left_click",
+    "left_click_drag",
+    "right_click",
+    "middle_click",
+    "double_click",
+    "screenshot",
+    "cursor_position",
+]
+class ToolExecResult(BaseModel):
+    output: str | None = Field(default=None)
+    error: str | None = Field(default=None)
+    base64_image: str | None = Field(default=None)
+async def _send_cmd(cmdTail: list[str], timeout: int | None = None) -> ToolResult:
+    from inspect_ai.log._samples import sample_active
+    sample = sample_active()
+    assert sample
+    sample_id = sample.sample.id
+    assert sample_id
+    cmd = ["python3", "/opt/inspect/tool/computer_tool.py", "--action"] + cmdTail
+    raw_exec_result = await (await computer_sandbox()).exec(cmd, timeout=timeout)
+    if not raw_exec_result.success:
+        raise RuntimeError(
+            f"Failure executing command: ${cmd} {raw_exec_result.stderr}"
+        )
+    result = ToolExecResult(**json.loads(raw_exec_result.stdout))
+    if result.error:
+        raise ToolError(result.error)
+    image = (
+        ContentImage(image=f"data:image/png;base64,{result.base64_image}")
+        if result.base64_image
+        else None
+    )
+    text = result.output if result.output and len(result.output) > 0 else None
+    if text is not None and image is not None:
+        return [ContentText(text=text), image]
+    if text is not None:
+        return text
+    if image is not None:
+        return [image]
+    return "OK"
+async def cursor_position(timeout: int | None = None) -> ToolResult:
+    return await _send_cmd(["cursor_position"], timeout=timeout)
+async def screenshot(timeout: int | None = None) -> ToolResult:
+    return await _send_cmd(["screenshot"], timeout=timeout)
+async def mouse_move(x: int, y: int, timeout: int | None = None) -> ToolResult:
+    return await _send_cmd(
+        ["mouse_move", "--coordinate", f"{x}", f"{y}"], timeout=timeout
+    )
+async def left_click(timeout: int | None = None) -> ToolResult:
+    return await _send_cmd(["left_click"], timeout=timeout)
+async def left_click_drag(x: int, y: int, timeout: int | None = None) -> ToolResult:
+    return await _send_cmd(
+        ["left_click_drag", "--coordinate", f"{x}", f"{y}"], timeout=timeout
+    )
+async def right_click(timeout: int | None = None) -> ToolResult:
+    return await _send_cmd(["right_click"], timeout=timeout)
+async def middle_click(timeout: int | None = None) -> ToolResult:
+    return await _send_cmd(["middle_click"], timeout=timeout)
+async def double_click(timeout: int | None = None) -> ToolResult:
+    return await _send_cmd(["double_click"], timeout=timeout)
+async def press_key(key: str, timeout: int | None = None) -> ToolResult:
+    return await _send_cmd(["key", "--text", key], timeout=timeout)
+async def type(text: str, timeout: int | None = None) -> ToolResult:
+    return await _send_cmd(["type", "--text", text], timeout=timeout)
+async def computer_sandbox() -> SandboxEnvironment:
+    sb = await sandbox_with("/opt/inspect/tool/computer_tool.py")
+    if sb:
+        return sb
+    else:
+        raise PrerequisiteError(
+            dedent("""
+                The computer tool service was not found in any of the sandboxes for this sample. Please add the computer tool service to your configuration. For example, the following Docker compose file uses the aisiuk/inspect-computer-tool:latest-beta image as its default sandbox:
+                services:
+                  default:
+                    image: "aisiuk/inspect-computer-tool:latest-beta"
+                    init: true
+                """).strip()
+        )

inspect_ai/tool/beta/_computer/_computer.py ADDED Viewed

@@ -0,0 +1,155 @@
+from typing import Awaitable, Callable
+from inspect_ai._util.content import Content, ContentImage, ContentText
+from inspect_ai.tool import Tool, ToolResult, tool
+from inspect_ai.tool._tool import (
+    TOOL_INIT_MODEL_INPUT,
+    ToolParsingError,
+)
+from inspect_ai.tool._tool_call import ToolCallModelInput
+from . import _common as common
+from ._common import Action
+ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
+@tool
+def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool:
+    async def execute(
+        action: Action,
+        text: str | None = None,
+        coordinate: list[int] | None = None,
+    ) -> ToolResult:
+        """
+        Use this tool to interact with a computer.
+        Use a mouse and keyboard to interact with a computer's desktop GUI.
+        Keep in mind that icons require double clicks to open while other UI affordances like menu items and buttons require a single click.
+        Args:
+          action (Action): The action to perform.
+              - `key`: Press a key or key-combination on the keyboard.
+                  - Example: execute(action="key", text="ctrl+s")
+                  - Text can be any key name supported by xdotool's `key` such as:
+                      "Return", "Escape", "alt+Tab", "BackSpace", "Tab", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key),
+                      "Insert", "Delete", "Home", "End", "Prior", "Next", "Left", "Up", "Right", "Down",
+                      "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12",
+                      "Shift_L", "Shift_R", "Control_L", "Control_R", "Alt_L", "Alt_R", "Scroll_Lock", "Num_Lock", "Caps_Lock", "Pause",
+                      "KP_Multiply", "KP_Home", "KP_Up", "KP_Prior", "KP_Subtract", "KP_Left", "KP_Begin", "KP_Right", "KP_Add", "KP_End","KP_Down",
+                      "KP_Next", "KP_Insert", "KP_Delete", "KP_Enter", "KP_Divide", "KP_Equal", "KP_Decimal",
+              - `type`: Type a string of text on the keyboard. If the text contains spaces, enclose it in quotes.
+                  - Example: execute(action="type", text="The crux of the biscuit is the apostrophe!")
+              - `cursor_position`: Get the current (x, y) pixel coordinate of the cursor on the screen.
+              - `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the screen.
+                  - Example: execute(action="mouse_move", coordinate=(100, 200))
+              - `left_click`: Click the left mouse button.
+              - `left_click_drag`: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
+                  - Example: execute(action="left_click_drag", coordinate=(150, 250))
+              - `right_click`: Click the right mouse button.
+              - `middle_click`: Click the middle mouse button.
+              - `double_click`: Double-click the left mouse button.
+              - `screenshot`: Take a screenshot.
+          text (str | None): The text to type or the key to press. Required when action is "key" or "type".
+          coordinate (tuple[int, int] | None): The (x, y) pixel coordinate on the screen to which to move or drag. Required when action is "mouse_move" or "left_click_drag".
+        Returns:
+          The output of the command. Many commands will include a screenshot reflecting the result of the command in their output.
+        """
+        if action in ("mouse_move", "left_click_drag"):
+            if coordinate is None:
+                raise ToolParsingError(f"coordinate is required for {action}")
+            if text is not None:
+                raise ToolParsingError(f"text is not accepted for {action}")
+            if not isinstance(coordinate, list) or len(coordinate) != 2:
+                raise ToolParsingError(f"{coordinate} must be a tuple of length 2")
+            if not all(isinstance(i, int) and i >= 0 for i in coordinate):
+                raise ToolParsingError(
+                    f"{coordinate} must be a tuple of non-negative ints"
+                )
+            if action == "mouse_move":
+                return await common.mouse_move(
+                    coordinate[0], coordinate[1], timeout=timeout
+                )
+            elif action == "left_click_drag":
+                return await common.left_click_drag(
+                    coordinate[0], coordinate[1], timeout=timeout
+                )
+        if action in ("key", "type"):
+            if text is None:
+                raise ToolParsingError(f"text is required for {action}")
+            if coordinate is not None:
+                raise ToolParsingError(f"coordinate is not accepted for {action}")
+            if not isinstance(text, str):
+                raise ToolParsingError(output=f"{text} must be a string")
+            if action == "key":
+                return await common.press_key(text, timeout=timeout)
+            elif action == "type":
+                return await common.type(text, timeout=timeout)
+        if action in (
+            "left_click",
+            "right_click",
+            "double_click",
+            "middle_click",
+            "screenshot",
+            "cursor_position",
+        ):
+            if text is not None:
+                raise ToolParsingError(f"text is not accepted for {action}")
+            if coordinate is not None:
+                raise ToolParsingError(f"coordinate is not accepted for {action}")
+            if action == "screenshot":
+                return await common.screenshot(timeout=timeout)
+            elif action == "cursor_position":
+                return await common.cursor_position(timeout=timeout)
+            elif action == "left_click":
+                return await common.left_click(timeout=timeout)
+            elif action == "right_click":
+                return await common.right_click(timeout=timeout)
+            elif action == "middle_click":
+                return await common.middle_click(timeout=timeout)
+            elif action == "double_click":
+                return await common.double_click(timeout=timeout)
+        raise ToolParsingError(f"Invalid action: {action}")
+    # if max_screenshots is specified then polk model input into where @tool can find it
+    if max_screenshots is not None:
+        setattr(execute, TOOL_INIT_MODEL_INPUT, _computer_model_input(max_screenshots))
+    return execute
+def _computer_model_input(max_screenshots: int) -> ToolCallModelInput:
+    def model_input(
+        message_index: int, message_total: int, content: str | list[Content]
+    ) -> str | list[Content]:
+        # nothing to do for scalars
+        if isinstance(content, str):
+            return content
+        # if we are inside max_screenshots then return as is
+        elif (message_total - message_index) <= max_screenshots:
+            return content
+        # otherwise convert images to text placeholdrs
+        else:
+            input_content: list[Content] = []
+            for c in content:
+                if isinstance(c, ContentImage):
+                    input_content.append(
+                        ContentText(
+                            text="Screenshot removed to reduce size of input. Please consult the latest screenshots for the most up to date state of the screen."
+                        )
+                    )
+                else:
+                    input_content.append(c)
+            return input_content
+    return model_input

inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl

inspect-ai 0.3.58py3-none-any.whl → 0.3.60py3-none-any.whl