PyPI - inspect-ai - Versions diffs - 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl - Mend

inspect-ai 0.3.57py3-none-any.whl → 0.3.59py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

inspect_ai/__init__.py +2 -1
inspect_ai/_cli/common.py +7 -3
inspect_ai/_cli/eval.py +17 -2
inspect_ai/_cli/trace.py +21 -2
inspect_ai/_display/core/active.py +4 -3
inspect_ai/_display/core/config.py +3 -3
inspect_ai/_display/core/panel.py +7 -3
inspect_ai/_display/plain/__init__.py +0 -0
inspect_ai/_display/plain/display.py +203 -0
inspect_ai/_display/rich/display.py +4 -9
inspect_ai/_display/textual/app.py +4 -1
inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
inspect_ai/_display/textual/widgets/samples.py +119 -16
inspect_ai/_display/textual/widgets/sandbox.py +37 -0
inspect_ai/_eval/eval.py +32 -20
inspect_ai/_eval/evalset.py +7 -5
inspect_ai/_eval/score.py +1 -0
inspect_ai/_eval/task/__init__.py +2 -2
inspect_ai/_eval/task/images.py +40 -25
inspect_ai/_eval/task/results.py +50 -22
inspect_ai/_eval/task/run.py +180 -124
inspect_ai/_eval/task/sandbox.py +10 -5
inspect_ai/_eval/task/task.py +140 -25
inspect_ai/_util/constants.py +2 -0
inspect_ai/_util/content.py +23 -1
inspect_ai/_util/images.py +20 -17
inspect_ai/_util/kvstore.py +73 -0
inspect_ai/_util/notgiven.py +18 -0
inspect_ai/_util/port_names.py +61 -0
inspect_ai/_util/text.py +23 -0
inspect_ai/_util/thread.py +5 -0
inspect_ai/_view/www/App.css +31 -1
inspect_ai/_view/www/dist/assets/index.css +31 -1
inspect_ai/_view/www/dist/assets/index.js +25375 -1846
inspect_ai/_view/www/log-schema.json +129 -15
inspect_ai/_view/www/package.json +2 -0
inspect_ai/_view/www/src/App.mjs +8 -10
inspect_ai/_view/www/src/Types.mjs +0 -1
inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
inspect_ai/_view/www/src/index.js +75 -2
inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
inspect_ai/_view/www/src/types/log.d.ts +62 -27
inspect_ai/_view/www/src/utils/Format.mjs +10 -3
inspect_ai/_view/www/src/utils/Json.mjs +12 -6
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
inspect_ai/_view/www/vite.config.js +7 -0
inspect_ai/_view/www/yarn.lock +116 -0
inspect_ai/approval/_human/__init__.py +0 -0
inspect_ai/approval/_human/util.py +2 -2
inspect_ai/approval/_policy.py +12 -6
inspect_ai/dataset/_sources/csv.py +2 -1
inspect_ai/dataset/_sources/json.py +2 -1
inspect_ai/dataset/_sources/util.py +15 -7
inspect_ai/log/_condense.py +11 -1
inspect_ai/log/_log.py +3 -6
inspect_ai/log/_recorders/eval.py +19 -8
inspect_ai/log/_samples.py +26 -5
inspect_ai/log/_transcript.py +32 -2
inspect_ai/model/__init__.py +10 -2
inspect_ai/model/_call_tools.py +59 -12
inspect_ai/model/_chat_message.py +2 -4
inspect_ai/model/_conversation.py +61 -0
inspect_ai/model/_generate_config.py +10 -4
inspect_ai/model/_model.py +117 -18
inspect_ai/model/_model_output.py +7 -2
inspect_ai/model/_providers/anthropic.py +109 -51
inspect_ai/model/_providers/azureai.py +26 -24
inspect_ai/model/_providers/bedrock.py +43 -44
inspect_ai/model/_providers/google.py +121 -58
inspect_ai/model/_providers/groq.py +7 -5
inspect_ai/model/_providers/hf.py +11 -6
inspect_ai/model/_providers/mistral.py +17 -20
inspect_ai/model/_providers/openai.py +32 -21
inspect_ai/model/_providers/openai_o1.py +9 -8
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/together.py +8 -8
inspect_ai/model/_providers/vertex.py +18 -8
inspect_ai/scorer/__init__.py +13 -2
inspect_ai/scorer/_metrics/__init__.py +2 -2
inspect_ai/scorer/_metrics/std.py +3 -3
inspect_ai/scorer/_reducer/reducer.py +1 -1
inspect_ai/scorer/_scorer.py +2 -2
inspect_ai/solver/__init__.py +2 -5
inspect_ai/solver/_prompt.py +35 -5
inspect_ai/solver/_task_state.py +80 -38
inspect_ai/tool/__init__.py +11 -1
inspect_ai/tool/_tool.py +21 -3
inspect_ai/tool/_tool_call.py +10 -0
inspect_ai/tool/_tool_def.py +16 -5
inspect_ai/tool/_tool_with.py +21 -4
inspect_ai/tool/beta/__init__.py +5 -0
inspect_ai/tool/beta/_computer/__init__.py +3 -0
inspect_ai/tool/beta/_computer/_common.py +133 -0
inspect_ai/tool/beta/_computer/_computer.py +155 -0
inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/util/__init__.py +2 -3
inspect_ai/util/{_trace.py → _conversation.py} +3 -17
inspect_ai/util/_display.py +14 -4
inspect_ai/util/_limit.py +26 -0
inspect_ai/util/_sandbox/context.py +12 -13
inspect_ai/util/_sandbox/docker/compose.py +24 -11
inspect_ai/util/_sandbox/docker/docker.py +84 -14
inspect_ai/util/_sandbox/docker/internal.py +3 -1
inspect_ai/util/_sandbox/environment.py +27 -1
inspect_ai/util/_sandbox/local.py +1 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
inspect_ai/model/_trace.py +0 -48
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0

inspect_ai/util/{_trace.py → _conversation.py} RENAMED Viewed

@@ -1,5 +1,3 @@
-from contextvars import ContextVar
 from rich import print
 from rich.console import RenderableType
 from rich.text import Text
@@ -7,12 +5,7 @@ from rich.text import Text
 from inspect_ai._util.transcript import transcript_panel
-def trace_enabled() -> bool:
-    """Is trace mode currently enabled."""
-    return _trace.get(None) is True
-def trace_panel(
+def conversation_panel(
     title: str,
     *,
     subtitle: str | None = None,
@@ -20,8 +13,8 @@ def trace_panel(
 ) -> None:
     """Trace content into a standard trace panel display.
-    Typically you would call `trace_enabled()` to confirm that trace mode
-    is enabled before calling `trace_panel()`.
+    Typically you would call `display_type() == "conversation"` to confirm that
+    we are in conversation mode before calling `conversation_panel()`.
     Args:
       title (str): Panel title.
@@ -32,10 +25,3 @@ def trace_panel(
         transcript_panel(title, subtitle, content),
         Text(),
     )
-def init_trace(trace: bool | None) -> None:
-    _trace.set(trace)
-_trace: ContextVar[bool | None] = ContextVar("_trace_mode")

inspect_ai/util/_display.py CHANGED Viewed

@@ -3,10 +3,11 @@ from logging import getLogger
 from typing import Literal
 from inspect_ai._util.constants import DEFAULT_DISPLAY
+from inspect_ai._util.thread import is_main_thread
 logger = getLogger(__name__)
-DisplayType = Literal["full", "rich", "plain", "none"]
+DisplayType = Literal["full", "conversation", "rich", "plain", "none"]
 """Console display type."""
@@ -15,15 +16,24 @@ _display_type: DisplayType | None = None
 def init_display_type(display: str | None = None) -> DisplayType:
     global _display_type
-    global _display_metrics
     display = (
         display or os.environ.get("INSPECT_DISPLAY", DEFAULT_DISPLAY).lower().strip()
     )
+    # if we are on a background thread then throttle down to "plain"
+    # ("full" requires textual which cannot run in a background thread
+    # b/c it calls the Python signal function; "rich" assumes exclusive
+    # display access which may not be the case for threads)
+    if display in ["full", "rich"] and not is_main_thread():
+        display = "plain"
     match display:
-        case "full" | "rich" | "plain" | "none":
+        case "full" | "conversation" | "rich" | "plain" | "none":
             _display_type = display
         case _:
-            logger.warning(f"Unknown display type '{display}'")
+            logger.warning(
+                f"Unknown display type '{display}' (setting display to 'full')"
+            )
             _display_type = "full"
     return _display_type

inspect_ai/util/_limit.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import Literal
+class SampleLimitExceededError(Exception):
+    """Exception raised when a sample limit is exceeded.
+    Args:
+       type (Literal["message", "time", "token", "operator"]): Type of limit exceeded.
+       value (int): Value compared to
+       limit (int): Limit applied.
+       message (str | None): Optional. Human readable message.
+    """
+    def __init__(
+        self,
+        type: Literal["message", "time", "token", "operator", "custom"],
+        *,
+        value: int,
+        limit: int,
+        message: str | None = None,
+    ) -> None:
+        self.type = type
+        self.value = value
+        self.limit = limit
+        self.message = f"Exceeded {type} limit: {limit:,}"
+        super().__init__(message)

inspect_ai/util/_sandbox/context.py CHANGED Viewed

@@ -4,6 +4,8 @@ from typing import Any, NoReturn, cast
 from shortuuid import uuid
+from inspect_ai._util.constants import SANDBOX_SETUP_TIMEOUT
 from .environment import (
     SampleCleanup,
     SampleInit,
@@ -193,23 +195,20 @@ async def setup_sandbox_environment(
     setup_file = f"/tmp/{uuid()}"
     await env.write_file(setup_file, setup)
-    # chmod, execute, and remove
-    async def exec(cmd: list[str]) -> None:
-        try:
-            result = await env.exec(cmd, timeout=30)
-        except TimeoutError:
-            raise RuntimeError(
-                f"Timed out executing command {' '.join(cmd)} in sandbox"
-            )
+    # execute and then remove setup script (don't retry it on timeout
+    # in case it is not idempotent)
+    try:
+        await env.exec(["chmod", "+x", setup_file], timeout=30)
+        result = await env.exec(
+            ["env", setup_file], timeout=SANDBOX_SETUP_TIMEOUT, timeout_retry=False
+        )
         if not result.success:
             raise RuntimeError(
                 f"Failed to execute setup script for sample: {result.stderr}"
             )
-    await exec(["chmod", "+x", setup_file])
-    await exec(["env", setup_file])
-    await exec(["rm", setup_file])
+        await env.exec(["rm", setup_file], timeout=30)
+    except TimeoutError:
+        raise RuntimeError("Timed out executing setup command in sandbox")
 def default_sandbox_environment(

inspect_ai/util/_sandbox/docker/compose.py CHANGED Viewed

@@ -25,16 +25,17 @@ COMPOSE_WAIT = "120"
 async def compose_up(project: ComposeProject) -> None:
-    # Start the environment
-    result = await compose_command(
+    # Start the environment. Note that we don't check the result because docker will
+    # return a non-zero exit code for services that exit (even successfully) when
+    # passing the --wait flag (see https://github.com/docker/compose/issues/10596).
+    # In practice, we will catch any errors when calling compose_check_running()
+    # immediately after we call compose_up().
+    await compose_command(
         ["up", "--detach", "--wait", "--wait-timeout", COMPOSE_WAIT],
         project=project,
         # wait up to 5 minutes for container to go up (compose wait + 3 minutes)
         timeout=300,
     )
-    if not result.success:
-        msg = f"Failed to start docker services for {project.config}: {result.stderr}"
-        raise RuntimeError(msg)
 async def compose_down(project: ComposeProject, quiet: bool = True) -> None:
@@ -91,14 +92,21 @@ async def compose_cp(
         raise RuntimeError(msg)
-async def compose_check_running(services: list[str], project: ComposeProject) -> None:
+async def compose_check_running(
+    services: list[str], project: ComposeProject
+) -> list[str]:
     # Check to ensure that the status of containers is healthy
     running_services = await compose_ps(project=project, status="running")
-    if len(running_services) > 0:
-        if len(running_services) != len(services):
+    exited_services = await compose_ps(project=project, status="exited")
+    successful_services = running_services + [
+        service for service in exited_services if service["ExitCode"] == 0
+    ]
+    if len(successful_services) > 0:
+        if len(successful_services) != len(services):
             unhealthy_services = services
-            for running_service in running_services:
-                unhealthy_services.remove(running_service["Service"])
+            for successful_service in successful_services:
+                unhealthy_services.remove(successful_service["Service"])
             msg = (
                 "One or more docker containers failed to start from "
@@ -108,6 +116,8 @@ async def compose_check_running(services: list[str], project: ComposeProject) ->
     else:
         raise RuntimeError("No services started")
+    return [service["Service"] for service in running_services]
 async def compose_ps(
     project: ComposeProject,
@@ -166,6 +176,7 @@ async def compose_exec(
     *,
     project: ComposeProject,
     timeout: int | None,
+    timeout_retry: bool = True,
     input: str | bytes | None = None,
     output_limit: int | None = None,
 ) -> ExecResult[str]:
@@ -173,6 +184,7 @@ async def compose_exec(
         ["exec"] + command,
         project=project,
         timeout=timeout,
+        timeout_retry=timeout_retry,
         input=input,
         forward_env=False,
         output_limit=output_limit,
@@ -258,6 +270,7 @@ async def compose_command(
     *,
     project: ComposeProject,
     timeout: int | None,
+    timeout_retry: bool = True,
     input: str | bytes | None = None,
     cwd: str | Path | None = None,
     forward_env: bool = True,
@@ -325,7 +338,7 @@ async def compose_command(
                 return await run_command(command_timeout)
             except TimeoutError:
                 retries += 1
-                if retries <= MAX_RETRIES:
+                if timeout_retry and (retries <= MAX_RETRIES):
                     logger.info(
                         f"Retrying docker compose command: {shlex.join(compose_command)}"
                     )

inspect_ai/util/_sandbox/docker/docker.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import errno
+import json
 import os
 import tempfile
 from logging import getLogger
@@ -7,9 +8,11 @@ from typing import Literal, Union, cast, overload
 from typing_extensions import override
-from inspect_ai.util._subprocess import ExecResult
+from inspect_ai.util._subprocess import ExecResult, subprocess
 from ..environment import (
+    HostMapping,
+    PortMapping,
     SandboxConnection,
     SandboxEnvironment,
     SandboxEnvironmentConfigType,
@@ -138,28 +141,31 @@ class DockerSandboxEnvironment(SandboxEnvironment):
             # start the services
             await compose_up(project)
+            # check to ensure that the services are running
+            running_services = await compose_check_running(
+                list(services.keys()), project=project
+            )
             # note that the project is running
             project_startup(project)
-            # check to ensure that the services are running
-            await compose_check_running(list(services.keys()), project=project)
-            # create sandbox environments
+            # create sandbox environments for all running services
             default_service: str | None = None
             environments: dict[str, SandboxEnvironment] = {}
             for service, service_info in services.items():
-                # update the project w/ the working directory
-                working_dir = await container_working_dir(service, project)
+                if service in running_services:
+                    # update the project w/ the working directory
+                    working_dir = await container_working_dir(service, project)
-                # create the docker sandbox environemnt
-                docker_env = DockerSandboxEnvironment(service, project, working_dir)
+                    # create the docker sandbox environemnt
+                    docker_env = DockerSandboxEnvironment(service, project, working_dir)
-                # save reference to default service if requested
-                if service_info.get("x-default", False):
-                    default_service = service
+                    # save reference to default service if requested
+                    if service_info.get("x-default", False):
+                        default_service = service
-                # record service => environment
-                environments[service] = docker_env
+                    # record service => environment
+                    environments[service] = docker_env
             # confirm that we have a 'default' environemnt
             if environments.get("default", None) is None and default_service is None:
@@ -225,6 +231,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
         env: dict[str, str] = {},
         user: str | None = None,
         timeout: int | None = None,
+        timeout_retry: bool = True,
     ) -> ExecResult[str]:
         # additional args
         args = []
@@ -251,6 +258,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
             args + [self._service] + cmd,
             project=self._project,
             timeout=timeout,
+            timeout_retry=timeout_retry,
             input=input,
             output_limit=SandboxEnvironmentLimits.MAX_EXEC_OUTPUT_SIZE,
         )
@@ -428,11 +436,14 @@ class DockerSandboxEnvironment(SandboxEnvironment):
         # return container connection
         if container:
             return SandboxConnection(
+                type="docker",
                 command=f"docker exec -it {container} bash -l",
                 vscode_command=[
                     "remote-containers.attachToRunningContainer",
                     container,
                 ],
+                ports=await get_ports_info(container),
+                container=container,
             )
         # error (not currently running)
         else:
@@ -461,3 +472,62 @@ async def container_working_dir(
             + f"{result.stderr}"
         )
         return default
+async def get_ports_info(container: str) -> list[PortMapping] | None:
+    try:
+        result = await subprocess(
+            [
+                "docker",
+                "inspect",
+                container,
+                "--format",
+                "{{json .NetworkSettings.Ports}}",
+            ],
+            timeout=60,
+        )
+        if not result.success:
+            raise RuntimeError(result.stderr)
+        return parse_docker_inspect_ports(result.stdout)
+    # It's currently a policy decision to let docker timeouts to be silent.
+    except TimeoutError:
+        return None
+def parse_docker_inspect_ports(json_str: str) -> list[PortMapping] | None:
+    """
+    Parses the JSON output from `docker inspect {container_name} --format='{{json .NetworkSettings.Ports}}'` to extract port mappings.
+    Args:
+        json_str (str): A JSON string representing the `NetworkSettings.Ports` output of `docker inspect`. e.g.
+          ```
+          {
+              "5900/tcp": [{"HostIp": "0.0.0.0", "HostPort": "54023"}],
+              "8080/tcp": [{"HostIp": "0.0.0.0", "HostPort": "54024"}]
+          }
+          ```
+    Returns:
+        list[PortMapping] | None: A list of PortMapping objects if any port mappings are found,
+                                   otherwise None.
+    """
+    data = json.loads(json_str)
+    port_mappings = []
+    for port_protocol, mappings in data.items():
+        if mappings is None:
+            continue
+        container_port, protocol = port_protocol.split("/")
+        host_mappings = [
+            HostMapping(host_ip=mapping["HostIp"], host_port=int(mapping["HostPort"]))
+            for mapping in mappings
+        ]
+        port_mapping = PortMapping(
+            container_port=int(container_port),
+            protocol=protocol,
+            mappings=host_mappings,
+        )
+        port_mappings.append(port_mapping)
+    return port_mappings if port_mappings else None

inspect_ai/util/_sandbox/docker/internal.py CHANGED Viewed

@@ -6,13 +6,15 @@ from inspect_ai.util._subprocess import subprocess
 INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB = "aisiuk/inspect-web-browser-tool"
 INSPECT_WEB_BROWSER_IMAGE = "inspect_web_browser"
+INSPECT_COMPUTER_IMAGE = "inspect-computer-tool"
 INTERNAL_IMAGES = {
     INSPECT_WEB_BROWSER_IMAGE: PKG_PATH
     / "tool"
     / "_tools"
     / "_web_browser"
-    / "_resources"
+    / "_resources",
+    INSPECT_COMPUTER_IMAGE: PKG_PATH / "tool" / "beta" / "_computer" / "_resources",
 }

inspect_ai/util/_sandbox/environment.py CHANGED Viewed

@@ -28,15 +28,35 @@ SampleCleanup = Callable[
 ]
+class HostMapping(BaseModel):
+    host_ip: str
+    host_port: int
+class PortMapping(BaseModel):
+    container_port: int
+    protocol: Literal["tcp", "udp"]
+    mappings: list[HostMapping]
 class SandboxConnection(BaseModel):
     """Information required to connect to sandbox."""
+    type: str
+    """Sandbox type name (e.g. 'docker', 'local', etc.)"""
     command: str
     """Shell command to connect to sandbox."""
     vscode_command: list[Any] | None = Field(default=None)
     """Optional vscode command (+args) to connect to sandbox."""
+    ports: list[PortMapping] | None = Field(default=None)
+    """Optional list of port mappings into container"""
+    container: str | None = Field(default=None)
+    """Optional container name (does not apply to all sandboxes)."""
 class SandboxEnvironment(abc.ABC):
     """Environment for executing arbitrary code from tools.
@@ -139,6 +159,7 @@ class SandboxEnvironment(abc.ABC):
         env: dict[str, str] = {},
         user: str | None = None,
         timeout: int | None = None,
+        timeout_retry: bool = True,
     ) -> ExecResult[str]:
         """Execute a command within a sandbox environment.
@@ -155,12 +176,17 @@ class SandboxEnvironment(abc.ABC):
           env (dict[str,str]): Environment variables for execution.
           user (str | None): Optional username or UID to run the command as.
           timeout (int | None): Optional execution timeout (seconds).
+          timeout_retry (bool): Retry the command in the case that it times out.
+            Commands will be retried up to twice, with a timeout of no greater
+            than 60 seconds for the first retry and 30 for the second.
         Returns:
           Execution result (status code, stderr/stdout, etc.)
         Raises:
-          TimeoutError: If the specified `timeout` expires.
+          TimeoutError: If the specified `timeout` expires
+            (and `timeout_retry` attempts also timeout).
           UnicodeDecodeError: If an error occurs while
             decoding the command output.
           PermissionError: If the user does not have

inspect_ai/util/_sandbox/local.py CHANGED Viewed

@@ -55,6 +55,7 @@ class LocalSandboxEnvironment(SandboxEnvironment):
         env: dict[str, str] = {},
         user: str | None = None,
         timeout: int | None = None,
+        timeout_retry: bool = True,
     ) -> ExecResult[str]:
         if user is not None:
             warnings.warn(

{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: inspect_ai
-Version: 0.3.57
+Version: 0.3.59
 Summary: Framework for large language model evaluations
 Author: UK AI Safety Institute
 License: MIT License
@@ -67,7 +67,7 @@ Requires-Dist: pytest-asyncio; extra == "dev"
 Requires-Dist: pytest-cov; extra == "dev"
 Requires-Dist: pytest-dotenv; extra == "dev"
 Requires-Dist: pytest-xdist; extra == "dev"
-Requires-Dist: ruff==0.9.0; extra == "dev"
+Requires-Dist: ruff==0.9.2; extra == "dev"
 Requires-Dist: textual-dev>=0.86.2; extra == "dev"
 Requires-Dist: types-PyYAML; extra == "dev"
 Requires-Dist: types-beautifulsoup4; extra == "dev"

inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl

inspect-ai 0.3.57py3-none-any.whl → 0.3.59py3-none-any.whl