PyPI - inspect-ai - Versions diffs - 0.3.82__py3-none-any.whl → 0.3.83__py3-none-any.whl - Mend

inspect-ai 0.3.82py3-none-any.whl → 0.3.83py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

inspect_ai/__init__.py +2 -1
inspect_ai/_display/textual/app.py +14 -3
inspect_ai/_display/textual/display.py +4 -0
inspect_ai/_display/textual/widgets/samples.py +9 -3
inspect_ai/_display/textual/widgets/task_detail.py +3 -4
inspect_ai/_display/textual/widgets/tasks.py +17 -1
inspect_ai/_display/textual/widgets/vscode.py +44 -0
inspect_ai/_eval/eval.py +36 -24
inspect_ai/_eval/evalset.py +17 -18
inspect_ai/_eval/loader.py +34 -11
inspect_ai/_eval/run.py +8 -13
inspect_ai/_eval/score.py +13 -3
inspect_ai/_eval/task/generate.py +8 -9
inspect_ai/_eval/task/log.py +2 -0
inspect_ai/_eval/task/task.py +23 -9
inspect_ai/_util/file.py +13 -0
inspect_ai/_util/json.py +2 -1
inspect_ai/_util/registry.py +1 -0
inspect_ai/_util/vscode.py +37 -0
inspect_ai/_view/www/App.css +6 -0
inspect_ai/_view/www/dist/assets/index.css +304 -128
inspect_ai/_view/www/dist/assets/index.js +47495 -27519
inspect_ai/_view/www/log-schema.json +124 -31
inspect_ai/_view/www/package.json +3 -0
inspect_ai/_view/www/src/App.tsx +12 -0
inspect_ai/_view/www/src/appearance/icons.ts +1 -0
inspect_ai/_view/www/src/components/Card.tsx +6 -4
inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
inspect_ai/_view/www/src/components/Modal.module.css +38 -0
inspect_ai/_view/www/src/components/Modal.tsx +77 -0
inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
inspect_ai/_view/www/src/state/hooks.ts +5 -3
inspect_ai/_view/www/src/state/logPolling.ts +5 -1
inspect_ai/_view/www/src/state/logSlice.ts +10 -0
inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
inspect_ai/_view/www/src/types/log.d.ts +34 -26
inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
inspect_ai/_view/www/yarn.lock +94 -1
inspect_ai/agent/__init__.py +36 -0
inspect_ai/agent/_agent.py +268 -0
inspect_ai/agent/_as_solver.py +72 -0
inspect_ai/agent/_as_tool.py +122 -0
inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
inspect_ai/agent/_filter.py +46 -0
inspect_ai/agent/_handoff.py +93 -0
inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
inspect_ai/agent/_react.py +241 -0
inspect_ai/agent/_run.py +36 -0
inspect_ai/agent/_types.py +81 -0
inspect_ai/log/_log.py +11 -2
inspect_ai/log/_transcript.py +13 -9
inspect_ai/model/__init__.py +7 -1
inspect_ai/model/_call_tools.py +256 -52
inspect_ai/model/_chat_message.py +7 -4
inspect_ai/model/_conversation.py +13 -62
inspect_ai/model/_display.py +85 -0
inspect_ai/model/_model.py +113 -14
inspect_ai/model/_model_output.py +14 -9
inspect_ai/model/_openai.py +16 -4
inspect_ai/model/_openai_computer_use.py +162 -0
inspect_ai/model/_openai_responses.py +319 -165
inspect_ai/model/_providers/anthropic.py +20 -21
inspect_ai/model/_providers/azureai.py +24 -13
inspect_ai/model/_providers/bedrock.py +1 -7
inspect_ai/model/_providers/cloudflare.py +3 -3
inspect_ai/model/_providers/goodfire.py +2 -6
inspect_ai/model/_providers/google.py +11 -10
inspect_ai/model/_providers/groq.py +6 -3
inspect_ai/model/_providers/hf.py +7 -3
inspect_ai/model/_providers/mistral.py +7 -10
inspect_ai/model/_providers/openai.py +47 -17
inspect_ai/model/_providers/openai_o1.py +11 -4
inspect_ai/model/_providers/openai_responses.py +12 -14
inspect_ai/model/_providers/providers.py +2 -2
inspect_ai/model/_providers/together.py +12 -2
inspect_ai/model/_providers/util/chatapi.py +7 -2
inspect_ai/model/_providers/util/hf_handler.py +4 -2
inspect_ai/model/_providers/util/llama31.py +4 -2
inspect_ai/model/_providers/vertex.py +11 -9
inspect_ai/model/_providers/vllm.py +4 -4
inspect_ai/scorer/__init__.py +2 -0
inspect_ai/scorer/_metrics/__init__.py +2 -0
inspect_ai/scorer/_metrics/grouped.py +84 -0
inspect_ai/scorer/_score.py +26 -6
inspect_ai/solver/__init__.py +2 -2
inspect_ai/solver/_basic_agent.py +22 -9
inspect_ai/solver/_bridge.py +31 -0
inspect_ai/solver/_chain.py +20 -12
inspect_ai/solver/_fork.py +5 -1
inspect_ai/solver/_human_agent.py +52 -0
inspect_ai/solver/_prompt.py +3 -1
inspect_ai/solver/_run.py +59 -0
inspect_ai/solver/_solver.py +14 -4
inspect_ai/solver/_task_state.py +5 -3
inspect_ai/tool/_tool_call.py +15 -8
inspect_ai/tool/_tool_def.py +17 -12
inspect_ai/tool/_tool_support_helpers.py +2 -2
inspect_ai/tool/_tool_with.py +14 -11
inspect_ai/tool/_tools/_bash_session.py +11 -2
inspect_ai/tool/_tools/_computer/_common.py +18 -2
inspect_ai/tool/_tools/_computer/_computer.py +18 -2
inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
inspect_ai/tool/_tools/_think.py +1 -1
inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
inspect_ai/util/__init__.py +2 -0
inspect_ai/util/_anyio.py +27 -0
inspect_ai/util/_sandbox/__init__.py +2 -1
inspect_ai/util/_sandbox/context.py +32 -7
inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
inspect_ai/util/_sandbox/docker/compose.py +2 -2
inspect_ai/util/_sandbox/docker/docker.py +12 -1
inspect_ai/util/_store_model.py +30 -7
inspect_ai/util/_subprocess.py +13 -3
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +179 -153
inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
/inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0

inspect_ai/solver/_human_agent.py ADDED Viewed

@@ -0,0 +1,52 @@
+from logging import getLogger
+from inspect_ai._util.logger import warn_once
+from inspect_ai.agent._as_solver import as_solver
+from ._solver import Solver, solver
+logger = getLogger(__name__)
+@solver
+def human_agent(
+    answer: bool | str = True,
+    intermediate_scoring: bool = False,
+    record_session: bool = True,
+) -> Solver:
+    """Human solver for agentic tasks that run in a Linux environment.
+    The Human agent solver installs agent task tools in the default
+    sandbox and presents the user with both task instructions and
+    documentation for the various tools (e.g. `task submit`,
+    `task start`, `task stop` `task instructions`, etc.). A human agent panel
+    is displayed with instructions for logging in to the sandbox.
+    If the user is running in VS Code with the Inspect extension,
+    they will also be presented with links to login to the sandbox
+    using a VS Code Window or Terminal.
+    Args:
+       answer: Is an explicit answer required for this task or is it scored
+          based on files in the container? Pass a `str` with a regex to validate
+          that the answer matches the expected format.
+       intermediate_scoring: Allow the human agent to check their score while working.
+       record_session: Record all user commands and outputs in the sandbox bash session.
+    Returns:
+       Solver: Human agent solver.
+    """
+    from inspect_ai.agent._human.agent import human_cli
+    warn_once(
+        logger,
+        "The human_agent solver is deprecated. Please use the human_cli agent from the agents module instead.",
+    )
+    return as_solver(
+        human_cli(
+            answer=answer,
+            intermediate_scoring=intermediate_scoring,
+            record_session=record_session,
+        )
+    )

inspect_ai/solver/_prompt.py CHANGED Viewed

@@ -123,7 +123,9 @@ def assistant_message(template: str, **params: Any) -> Solver:
     async def solve(state: TaskState, generate: Generate) -> TaskState:
         kwargs = state.metadata | state.store._data | params
         state.messages.append(
-            ChatMessageAssistant(content=format_template(content, kwargs))
+            ChatMessageAssistant(
+                content=format_template(content, kwargs), model=state.model.name
+            )
         )
         return state

inspect_ai/solver/_run.py ADDED Viewed

@@ -0,0 +1,59 @@
+from copy import copy
+from inspect_ai.model import ChatMessage, ChatMessageUser, ModelName, ModelOutput
+from ._fork import task_generate
+from ._solver import Solver
+from ._task_state import TaskState
+async def run(
+    solver: Solver, input: str | list[ChatMessage]
+) -> tuple[list[ChatMessage], ModelOutput | None]:
+    """Run a solver over chat message input.
+    Args:
+        solver: Solver to run.
+        input: Chat message input
+    Returns:
+        Tuple of `list[ChatMessage], ModelOutput | None` (returns
+        [], None if no generates were done by the solver)
+    """
+    from inspect_ai.log._samples import sample_active
+    # get the generate function for the current task
+    generate = task_generate()
+    if generate is None:
+        raise RuntimeError("Called run() outside of a running task.")
+    # get the active sample
+    active = sample_active()
+    if active is None:
+        raise RuntimeError("Called run() outside of a running task")
+    assert active.sample.id
+    # build messages list
+    messages: list[ChatMessage] = (
+        [ChatMessageUser(content=input)] if isinstance(input, str) else input
+    )
+    # build state
+    state = TaskState(
+        model=ModelName(active.model),
+        sample_id=active.sample.id,
+        epoch=active.epoch,
+        input=input,
+        messages=copy(messages),
+    )
+    # run solver
+    state = await solver(state, generate)
+    # return any messages that don't match our initial prefix
+    new_messages: list[ChatMessage] = []
+    for index, message in enumerate(state.messages):
+        if index >= len(messages) or message.id != messages[index].id:
+            new_messages.append(message)
+    return new_messages, state.output if len(state.output.choices) > 0 else None

inspect_ai/solver/_solver.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import (
     Literal,
     ParamSpec,
     Protocol,
+    TypeAlias,
     cast,
     overload,
     runtime_checkable,
@@ -23,6 +24,8 @@ from inspect_ai._util.registry import (
     registry_name,
     registry_tag,
 )
+from inspect_ai.agent._agent import Agent, is_agent
+from inspect_ai.agent._as_solver import as_solver
 from inspect_ai.model import CachePolicy, GenerateConfigArgs
 from ._task_state import TaskState, set_sample_state
@@ -136,23 +139,27 @@ def solver_create(name: str, **kwargs: Any) -> Solver:
     return cast(Solver, registry_create("solver", name, **kwargs))
+SolverType: TypeAlias = Solver | Agent
+"""Return type for @solver decorated functions. """
 @overload
 def solver(name: str) -> Callable[[Callable[P, Solver]], Callable[P, Solver]]: ...
 @overload
-def solver(name: Callable[P, Solver]) -> Callable[P, Solver]: ...
+def solver(name: Callable[P, SolverType]) -> Callable[P, Solver]: ...
 def solver(
-    name: str | Callable[P, Solver],
+    name: str | Callable[P, SolverType],
 ) -> Callable[[Callable[P, Solver]], Callable[P, Solver]] | Callable[P, Solver]:
     r"""Decorator for registering solvers.
     Args:
         name:
             Optional name for solver. If the decorator has no name
-            argument then the name of the underlying Callable[P, Solver]
+            argument then the name of the underlying Callable[P, SolverType]
             object will be used to automatically assign a name.
     Returns:
@@ -176,7 +183,7 @@ def solver(
     #  (b) Ensure that instances of Solver created by SolverType also
     #      carry registry info.
     def create_solver_wrapper(
-        solver_type: Callable[P, Solver], name: str | None = None
+        solver_type: Callable[P, SolverType], name: str | None = None
     ) -> Callable[P, Solver]:
         solver_name = registry_name(
             solver_type, name if name else getattr(solver_type, "__name__")
@@ -185,6 +192,9 @@ def solver(
         @wraps(solver_type)
         def solver_wrapper(*args: P.args, **kwargs: P.kwargs) -> Solver:
             solver = solver_type(*args, **kwargs)
+            if is_agent(solver):
+                solver = as_solver(solver)
+            solver = cast(Solver, solver)
             if not is_callable_coroutine(solver):
                 raise TypeError(f"'{solver}' is not declared as an async callable.")

inspect_ai/solver/_task_state.py CHANGED Viewed

@@ -394,16 +394,18 @@ class TaskState:
         return metadata_as(self.metadata, metadata_cls)
-    def store_as(self, model_cls: Type[SMT]) -> SMT:
+    def store_as(self, model_cls: Type[SMT], instance: str | None = None) -> SMT:
         """Pydantic model interface to the store.
         Args:
           model_cls: Pydantic model type (must derive from StoreModel)
+          instance: Optional instances name for store (enables multiple instances
+            of a given StoreModel type within a single sample)
         Returns:
-          StoreModel: Instance of model_cls bound to current Store.
+          StoreModel: model_cls bound to sample store data.
         """
-        return model_cls(store=self.store)
+        return model_cls(store=self.store, instance=instance)
 def sample_state() -> TaskState | None:

inspect_ai/tool/_tool_call.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from dataclasses import dataclass, field
-from typing import Any, Callable, Literal
+from typing import Any, Callable, Literal, TypedDict
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, JsonValue
 from inspect_ai._util.content import Content
@@ -44,11 +44,8 @@ class ToolCall:
     arguments: dict[str, Any]
     """Arguments to function."""
-    type: str
-    """Type of tool call ('function' or a model specific internal tool type)"""
-    internal_name: str | None = field(default=None)
-    """Model's internal name for the tool - if any."""
+    internal: JsonValue | None = field(default=None)
+    """Model provider specific payload - typically used to aid transformation back to model types."""
     parse_error: str | None = field(default=None)
     """Error which occurred parsing tool call."""
@@ -82,7 +79,17 @@ ToolCallViewer = Callable[[ToolCall], ToolCallView]
 """Custom view renderer for tool calls."""
-ToolCallModelInput = Callable[[int, int, str | list[Content]], str | list[Content]]
+class ToolCallModelInputHints(TypedDict):
+    # This type is a little sketchy but it allows tools to customize their
+    # input hook behavior based on model limitations without creating a tight
+    # coupling to the model provider.
+    disable_computer_screenshot_truncation: bool
+    """The model does not support the truncation/redaction of computer screenshots."""
+ToolCallModelInput = Callable[
+    [int, int, str | list[Content], ToolCallModelInputHints], str | list[Content]
+]
 """Determine how tool call results are played back as model input.
 The first argument is an index into the total number of tool results

inspect_ai/tool/_tool_def.py CHANGED Viewed

@@ -21,7 +21,7 @@ from ._tool_description import (
     tool_description,
 )
 from ._tool_info import parse_tool_info
-from ._tool_params import ToolParams
+from ._tool_params import ToolParam, ToolParams
 class ToolDef:
@@ -194,17 +194,7 @@ def tool_def_fields(tool: Tool) -> ToolDefFields:
         raise ValueError(f"Description not provided for tool function '{name}'")
     # validate that we have types/descriptions for paramters
-    for param_name, param in tool_info.parameters.properties.items():
-        def raise_not_provided_error(context: str) -> None:
-            raise ValueError(
-                f"{context} not provided for parameter '{param_name}' of tool function '{name}'."
-            )
-        if param.type is None and not param.anyOf and not param.enum:
-            raise_not_provided_error("Unsupported type or type annotation")
-        elif not param.description:
-            raise_not_provided_error("Description")
+    validate_tool_parameters(name, tool_info.parameters.properties)
     # see if the user has overriden any of the tool's descriptions
     desc = tool_description(tool)
@@ -238,3 +228,18 @@ def tool_registry_info(
     viewer = info.metadata.get(TOOL_VIEWER, None)
     model_input = info.metadata.get(TOOL_MODEL_INPUT, None)
     return name, prompt, parallel, viewer, model_input
+def validate_tool_parameters(tool_name: str, parameters: dict[str, ToolParam]) -> None:
+    # validate that we have types/descriptions for paramters
+    for param_name, param in parameters.items():
+        def raise_not_provided_error(context: str) -> None:
+            raise ValueError(
+                f"{context} provided for parameter '{param_name}' of function '{tool_name}'."
+            )
+        if param.type is None and not param.anyOf and not param.enum:
+            raise_not_provided_error("Unsupported type or type annotation")
+        elif not param.description:
+            raise_not_provided_error("Description not")

inspect_ai/tool/_tool_support_helpers.py CHANGED Viewed

@@ -129,8 +129,8 @@ async def tool_container_sandbox(tool_name: str) -> SandboxEnvironment:
                 Alternatively, you can include the service into your own Dockerfile:
                 ENV PATH="$PATH:/opt/inspect_tool_support/bin"
-                RUN python -m venv /opt/inspect_tool_support && \
-                    /opt/inspect_tool_support/bin/pip install inspect-tool-support && \
+                RUN python -m venv /opt/inspect_tool_support && \\
+                    /opt/inspect_tool_support/bin/pip install inspect-tool-support && \\
                     /opt/inspect_tool_support/bin/inspect-tool-support post-install
                 """).strip()
         raise PrerequisiteError(msg)

inspect_ai/tool/_tool_with.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from copy import deepcopy
 from inspect_ai._util.registry import (
     registry_info,
     registry_params,
@@ -22,10 +20,15 @@ def tool_with(
     viewer: ToolCallViewer | None = None,
     model_input: ToolCallModelInput | None = None,
 ) -> Tool:
-    """Tool with modifications to name and descriptions.
+    """Tool with modifications to various attributes.
+    This function modifies the passed tool in place and
+    returns it. If you want to create multiple variations
+    of a single tool using `tool_with()` you should create
+    the underlying tool multiple times.
     Args:
-       tool: Tool instance to copy and add descriptions to.
+       tool: Tool instance to modify.
        name: Tool name (optional).
        description: Tool description (optional).
        parameters: Parameter descriptions (optional)
@@ -36,7 +39,7 @@ def tool_with(
            tool call results are played back as model input.
     Returns:
-       A copy of the passed tool with the specified descriptive information.
+       The passed tool with the requested modifications.
     """
     # get the existing tool info
     tool_info = parse_tool_info(tool)
@@ -54,8 +57,7 @@ def tool_with(
                 param_name
             ]
-    # copy the tool and set the descriptions on the new copy
-    tool_copy = deepcopy(tool)
+    # resolve attributes
     info = registry_info(tool).model_copy()
     if parallel is not None:
         info.metadata[TOOL_PARALLEL] = parallel
@@ -64,12 +66,13 @@ def tool_with(
     elif model_input is not None:
         info.metadata[TOOL_MODEL_INPUT] = model_input
-    set_registry_info(tool_copy, info)
-    set_registry_params(tool_copy, registry_params(tool))
+    # set attributes
+    set_registry_info(tool, info)
+    set_registry_params(tool, registry_params(tool))
     set_tool_description(
-        tool_copy,
+        tool,
         ToolDescription(
             name=name, description=description, parameters=tool_info.parameters
         ),
     )
-    return tool_copy
+    return tool

inspect_ai/tool/_tools/_bash_session.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from pydantic import BaseModel, Field, RootModel
+from shortuuid import uuid
 from inspect_ai.tool import ToolResult
 from inspect_ai.tool._tool_support_helpers import (
@@ -52,13 +53,21 @@ def code_viewer(language: str, code_param: str) -> ToolCallViewer:
 @tool(viewer=code_viewer("bash", "command"))
-def bash_session(timeout: int | None = None) -> Tool:
+def bash_session(*, timeout: int | None = None, instance: str | None = uuid()) -> Tool:
     """Bash shell session command execution tool.
     Execute bash shell commands in a long running session using a sandbox environment (e.g. "docker").
+    By default, a separate bash process is created within the sandbox for each
+    call to `bash_session()`. You can modify this behavior by passing `instance=None`
+    (which will result in a single bash process for the entire sample) or use other
+    `instance` values that implement another scheme).
+    See complete documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-bash-session>.
     Args:
       timeout: Timeout (in seconds) for command.
+      instance: Instance id (each unique instance id has its own bash process)
     Returns:
       String with command output (stdout) or command error (stderr).
@@ -85,7 +94,7 @@ def bash_session(timeout: int | None = None) -> Tool:
         params: dict[str, object] = {"command": command, "restart": restart}
         sandbox = await tool_container_sandbox("bash session")
-        store = store_as(BashSessionStore)
+        store = store_as(BashSessionStore, instance=instance)
         if not store.session_id:
             store.session_id = (

inspect_ai/tool/_tools/_computer/_common.py CHANGED Viewed

@@ -83,6 +83,22 @@ async def middle_click(coordinate: list[int], timeout: int | None = None) -> Too
     )
+async def back_click(coordinate: list[int], timeout: int | None = None) -> ToolResult:
+    return await _send_cmd(
+        ["back_click", "--coordinate", f"{coordinate[0]}", f"{coordinate[1]}"],
+        timeout=timeout,
+    )
+async def forward_click(
+    coordinate: list[int], timeout: int | None = None
+) -> ToolResult:
+    return await _send_cmd(
+        ["forward_click", "--coordinate", f"{coordinate[0]}", f"{coordinate[1]}"],
+        timeout=timeout,
+    )
 async def double_click(coordinate: list[int], timeout: int | None = None) -> ToolResult:
     return await _send_cmd(
         ["double_click", "--coordinate", f"{coordinate[0]}", f"{coordinate[1]}"],
@@ -182,11 +198,11 @@ async def computer_sandbox() -> SandboxEnvironment:
     else:
         raise PrerequisiteError(
             dedent("""
-                The computer tool service was not found in any of the sandboxes for this sample. Please add the computer tool service to your configuration. For example, the following Docker compose file uses the aisiuk/inspect-computer-tool:latest image as its default sandbox:
+                The computer tool service was not found in any of the sandboxes for this sample. Please add the computer tool service to your configuration. For example, the following Docker compose file uses the aisiuk/inspect-computer-tool image as its default sandbox:
                 services:
                   default:
-                    image: "aisiuk/inspect-computer-tool:latest"
+                    image: "aisiuk/inspect-computer-tool"
                     init: true
                 """).strip()
         )

inspect_ai/tool/_tools/_computer/_computer.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Awaitable, Callable, Literal, TypeVar
 from inspect_ai._util.content import Content, ContentImage, ContentText
 from inspect_ai.tool import Tool, ToolResult, tool
 from inspect_ai.tool._tool import TOOL_INIT_MODEL_INPUT, ToolParsingError
-from inspect_ai.tool._tool_call import ToolCallModelInput
+from inspect_ai.tool._tool_call import ToolCallModelInput, ToolCallModelInputHints
 from . import _common as common
 from ._resources.tool._constants import Action
@@ -64,6 +64,8 @@ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool
                   - Example: execute(action="left_click_drag", coordinate=(150, 250))
               - `right_click`: Click the right mouse button.
               - `middle_click`: Click the middle mouse button.
+              - `back_click`: Click the 'back' mouse button.
+              - `forward_click`: Click the 'forward' mouse button.
               - `double_click`: Double-click the left mouse button.
               - `triple_click`: Double-click the left mouse button.
               - `wait`: Wait for a specified duration (in seconds).
@@ -117,6 +119,14 @@ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool
                 return await common.middle_click(
                     not_none(coordinate, "coordinate"), timeout=timeout
                 )
+            case "back_click":
+                return await common.back_click(
+                    not_none(coordinate, "coordinate"), timeout=timeout
+                )
+            case "forward_click":
+                return await common.forward_click(
+                    not_none(coordinate, "coordinate"), timeout=timeout
+                )
             case "double_click":
                 return await common.double_click(
                     not_none(coordinate, "coordinate"), timeout=timeout
@@ -150,8 +160,14 @@ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool
 def _computer_model_input(max_screenshots: int) -> ToolCallModelInput:
     def model_input(
-        message_index: int, message_total: int, content: str | list[Content]
+        message_index: int,
+        message_total: int,
+        content: str | list[Content],
+        hints: ToolCallModelInputHints,
     ) -> str | list[Content]:
+        if hints.get("forbids_computer_screenshot_truncation", False):
+            return content
         # nothing to do for scalars
         if isinstance(content, str):
             return content

inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py CHANGED Viewed

@@ -12,6 +12,8 @@ Action = Literal[
     "left_click_drag",
     "right_click",
     "middle_click",
+    "back_click",
+    "forward_click",
     "double_click",
     "triple_click",
     "scroll",

inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py CHANGED Viewed

@@ -153,6 +153,19 @@ class X11Client:
     ) -> ToolResult:
         return await self._mouse_move_and("middle_click", coordinate, text)
+    # https://wiki.archlinux.org/title/Mouse_buttons#Thumb_buttons_-_forward_and_back
+    # suggests that, although not in any spec, the de facto standard is 8 for
+    # back and 9 for forward.
+    async def back_click(
+        self, coordinate: tuple[int, int] | None, text: str | None
+    ) -> ToolResult:
+        return await self._mouse_move_and("back_click", coordinate, text)
+    async def forward_click(
+        self, coordinate: tuple[int, int] | None, text: str | None
+    ) -> ToolResult:
+        return await self._mouse_move_and("forward_click", coordinate, text)
     async def double_click(
         self, coordinate: tuple[int, int] | None, text: str | None
     ) -> ToolResult:
@@ -215,6 +228,8 @@ class X11Client:
             "left_click",
             "right_click",
             "middle_click",
+            "back_click",
+            "forward_click",
             "double_click",
             "triple_click",
         ],
@@ -233,6 +248,8 @@ class X11Client:
             "left_click": "1",
             "right_click": "3",
             "middle_click": "2",
+            "back_click": "8",
+            "forward_click": "9",
             "double_click": "--repeat 2 --delay 300 1",
             "triple_click": "--repeat 3 --delay 300 1",
         }[action]

inspect_ai/tool/_tools/_think.py CHANGED Viewed

@@ -22,7 +22,7 @@ def think(
     async def execute(thought: str) -> str:
         """Use the tool to think about something.
-        The will not obtain new information or change the environment, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed."
+        The will not obtain new information or change the environment, but just append the thought to the log. Use it when complex reasoning or some cache memory is needed.
         Args:
             thought: A thought to think about.

inspect-ai 0.3.82__py3-none-any.whl → 0.3.83__py3-none-any.whl

inspect-ai 0.3.82py3-none-any.whl → 0.3.83py3-none-any.whl