PyPI - inspect-ai - Versions diffs - 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl - Mend

inspect-ai 0.3.55py3-none-any.whl → 0.3.57py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

inspect_ai/__init__.py +1 -0
inspect_ai/_cli/common.py +1 -1
inspect_ai/_cli/trace.py +33 -20
inspect_ai/_display/core/active.py +1 -1
inspect_ai/_display/core/display.py +1 -1
inspect_ai/_display/core/footer.py +1 -1
inspect_ai/_display/core/panel.py +1 -1
inspect_ai/_display/core/progress.py +0 -6
inspect_ai/_display/core/rich.py +1 -1
inspect_ai/_display/rich/display.py +2 -2
inspect_ai/_display/textual/app.py +15 -17
inspect_ai/_display/textual/widgets/clock.py +3 -3
inspect_ai/_display/textual/widgets/samples.py +6 -13
inspect_ai/_eval/context.py +9 -1
inspect_ai/_eval/run.py +16 -11
inspect_ai/_eval/score.py +4 -10
inspect_ai/_eval/task/results.py +5 -4
inspect_ai/_eval/task/run.py +6 -12
inspect_ai/_eval/task/task.py +10 -0
inspect_ai/_util/ansi.py +31 -0
inspect_ai/_util/datetime.py +1 -1
inspect_ai/_util/deprecation.py +1 -1
inspect_ai/_util/format.py +7 -0
inspect_ai/_util/json.py +11 -1
inspect_ai/_util/logger.py +14 -13
inspect_ai/_util/throttle.py +10 -1
inspect_ai/_util/trace.py +79 -47
inspect_ai/_util/transcript.py +37 -4
inspect_ai/_util/vscode.py +51 -0
inspect_ai/_view/notify.py +2 -1
inspect_ai/_view/www/.prettierrc.js +12 -0
inspect_ai/_view/www/App.css +22 -1
inspect_ai/_view/www/dist/assets/index.css +2374 -2
inspect_ai/_view/www/dist/assets/index.js +29752 -24492
inspect_ai/_view/www/log-schema.json +262 -215
inspect_ai/_view/www/package.json +1 -0
inspect_ai/_view/www/src/App.mjs +19 -9
inspect_ai/_view/www/src/Types.mjs +0 -1
inspect_ai/_view/www/src/api/Types.mjs +15 -4
inspect_ai/_view/www/src/api/api-http.mjs +2 -0
inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
inspect_ai/_view/www/src/components/Tools.mjs +28 -5
inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
inspect_ai/_view/www/src/types/log.d.ts +28 -20
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
inspect_ai/_view/www/yarn.lock +44 -0
inspect_ai/approval/_apply.py +4 -0
inspect_ai/approval/_human/panel.py +5 -8
inspect_ai/dataset/_dataset.py +51 -10
inspect_ai/dataset/_util.py +31 -3
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_log.py +30 -2
inspect_ai/log/_recorders/eval.py +2 -0
inspect_ai/model/_call_tools.py +31 -7
inspect_ai/model/_chat_message.py +3 -0
inspect_ai/model/_model.py +42 -1
inspect_ai/model/_providers/anthropic.py +4 -0
inspect_ai/model/_providers/google.py +24 -6
inspect_ai/model/_providers/openai.py +17 -3
inspect_ai/model/_providers/openai_o1.py +10 -12
inspect_ai/model/_render.py +9 -2
inspect_ai/scorer/_metric.py +12 -1
inspect_ai/solver/__init__.py +2 -0
inspect_ai/solver/_human_agent/agent.py +83 -0
inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
inspect_ai/solver/_human_agent/commands/clock.py +70 -0
inspect_ai/solver/_human_agent/commands/command.py +59 -0
inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
inspect_ai/solver/_human_agent/commands/note.py +42 -0
inspect_ai/solver/_human_agent/commands/score.py +80 -0
inspect_ai/solver/_human_agent/commands/status.py +62 -0
inspect_ai/solver/_human_agent/commands/submit.py +151 -0
inspect_ai/solver/_human_agent/install.py +222 -0
inspect_ai/solver/_human_agent/panel.py +252 -0
inspect_ai/solver/_human_agent/service.py +45 -0
inspect_ai/solver/_human_agent/state.py +55 -0
inspect_ai/solver/_human_agent/view.py +24 -0
inspect_ai/solver/_task_state.py +28 -2
inspect_ai/tool/_tool.py +10 -2
inspect_ai/tool/_tool_info.py +2 -1
inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
inspect_ai/tool/_tools/_web_browser/_web_browser.py +16 -13
inspect_ai/util/__init__.py +12 -4
inspect_ai/{_util/display.py → util/_display.py} +6 -0
inspect_ai/util/_panel.py +31 -9
inspect_ai/util/_sandbox/__init__.py +0 -3
inspect_ai/util/_sandbox/context.py +5 -1
inspect_ai/util/_sandbox/docker/compose.py +17 -13
inspect_ai/util/_sandbox/docker/docker.py +9 -6
inspect_ai/util/_sandbox/docker/internal.py +1 -1
inspect_ai/util/_sandbox/docker/util.py +3 -2
inspect_ai/util/_sandbox/environment.py +6 -5
inspect_ai/util/_sandbox/local.py +1 -1
inspect_ai/util/_sandbox/self_check.py +18 -18
inspect_ai/util/_sandbox/service.py +22 -7
inspect_ai/util/_store.py +7 -8
inspect_ai/util/_store_model.py +110 -0
inspect_ai/util/_subprocess.py +3 -3
inspect_ai/util/_throttle.py +32 -0
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +131 -108
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0

inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py CHANGED Viewed

@@ -38,9 +38,9 @@ class EnvironmentSpec:
             for i, obs_spec in enumerate(env_obs_spec.values()):
                 self.observation_spec[i + 1] = convert(obs_spec)
-        assert isinstance(
-            env.action_spec(), specs.Array
-        ), "Only a single action type is supported."
+        assert isinstance(env.action_spec(), specs.Array), (
+            "Only a single action type is supported."
+        )
         self.action_spec = {1: convert(env.action_spec())}
         self.observation_manager = spec_manager.SpecManager(self.observation_spec)
@@ -234,12 +234,12 @@ class EnvironmentService(dm_env_rpc_pb2_grpc.EnvironmentServicer):
             observations.
         """
         with self._lock:
-            assert (
-                cur_world in self._envs
-            ), "Current world does not have an assosiated environment"
-            assert (
-                cur_world in self._joined_worlds
-            ), "Please join world before calling step."
+            assert cur_world in self._envs, (
+                "Current world does not have an assosiated environment"
+            )
+            assert cur_world in self._joined_worlds, (
+                "Please join world before calling step."
+            )
             env = self._envs[cur_world]
             spec = self._specs[cur_world]

inspect_ai/tool/_tools/_web_browser/_web_browser.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import re
 from textwrap import dedent
+from pydantic import Field
 from inspect_ai._util.error import PrerequisiteError
 from inspect_ai.tool._tool import Tool, ToolError, tool
 from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
@@ -8,7 +10,7 @@ from inspect_ai.tool._tool_info import parse_tool_info
 from inspect_ai.tool._tool_with import tool_with
 from inspect_ai.util._sandbox import SandboxEnvironment, sandbox_with
 from inspect_ai.util._sandbox.docker.internal import INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB
-from inspect_ai.util._store import store
+from inspect_ai.util._store_model import StoreModel, store_as
 def web_browser(interactive: bool = True) -> list[Tool]:
@@ -97,12 +99,15 @@ def go_without_interactive_docs(tool: Tool) -> Tool:
 # custom viewer for interactive tool calls that shows a truncated
 # version of current the web accessiblity tree if available
-WEB_BROWSER_AT = "web_browser:at"
+class WebBrowserStore(StoreModel):
+    web_at: str = Field(default_factory=str)
+    session_id: str = Field(default_factory=str)
 def web_at_viewer(call: ToolCall) -> ToolCallView:
     # get the web accessiblity tree, if we have it create a view from it
-    web_at = store().get(WEB_BROWSER_AT, "")
+    web_at = store_as(WebBrowserStore).web_at
     element_id = call.arguments.get("element_id", 0)
     if web_at and element_id:
         lines = web_at.splitlines()
@@ -332,15 +337,14 @@ def web_browser_refresh() -> Tool:
 WEB_CLIENT_REQUEST = "/app/web_browser/web_client.py"
 WEB_CLIENT_NEW_SESSION = "/app/web_browser/web_client_new_session.py"
-BROWSER_SESSION_ID = "BROWSER_SESSION_ID"
 async def web_browser_cmd(cmd: str, *args: str) -> str:
     sandbox_env = await sandbox_with(WEB_CLIENT_NEW_SESSION)
     session_flag = ""
     if sandbox_env:
-        browser_session = store().get(BROWSER_SESSION_ID, "")
-        if not browser_session:
+        store = store_as(WebBrowserStore)
+        if not store.session_id:
             result = await sandbox_env.exec(["python3", WEB_CLIENT_NEW_SESSION])
             if not result.success:
@@ -348,10 +352,9 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
                     f"Error creating new web browser session: {result.stderr}"
                 )
-            browser_session = result.stdout.strip("\n")
-            store().set(BROWSER_SESSION_ID, browser_session)
+            store.session_id = result.stdout.strip("\n")
-        session_flag = f"--session_name={browser_session}"
+        session_flag = f"--session_name={store.session_id}"
     else:
         sandbox_env = await web_browser_sandbox()
@@ -369,7 +372,9 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
         )
     else:
         response = parse_web_browser_output(result.stdout)
-        if "web_at" in response:
+        if "error" in response and response.get("error", "").strip() != "":
+            raise ToolError(str(response.get("error")) or "(unknown error)")
+        elif "web_at" in response:
             web_at = (
                 str(response.get("web_at")) or "(no web accessiblity tree available)"
             )
@@ -379,10 +384,8 @@ async def web_browser_cmd(cmd: str, *args: str) -> str:
                 line.partition("data:image/png;base64")[0] for line in web_at_lines
             ]
             web_at = "\n".join(web_at_lines)
-            store().set(WEB_BROWSER_AT, web_at)
+            store_as(WebBrowserStore).web_at = web_at
             return web_at
-        elif "error" in response:
-            raise ToolError(str(response.get("error")) or "(unknown error)")
         else:
             raise RuntimeError(
                 f"web_browser output must contain either 'error' or 'web_at' field: {result.stdout}"

inspect_ai/util/__init__.py CHANGED Viewed

@@ -1,5 +1,8 @@
+from inspect_ai._util.trace import trace_action, trace_message
 from ._concurrency import concurrency
 from ._console import input_screen
+from ._display import DisplayType, display_type
 from ._panel import InputPanel, input_panel
 from ._resource import resource
 from ._sandbox import (
@@ -11,23 +14,25 @@ from ._sandbox import (
     SandboxEnvironments,
     SandboxEnvironmentSpec,
     SandboxEnvironmentType,
-    SandboxService,
     sandbox,
-    sandbox_service,
     sandbox_with,
     sandboxenv,
 )
 from ._store import Store, store
+from ._store_model import StoreModel, store_as
 from ._subprocess import (
     ExecResult,
     subprocess,
 )
 from ._subtask import Subtask, subtask
+from ._throttle import throttle
 from ._trace import trace_enabled, trace_panel
 __all__ = [
     "ExecResult",
     "concurrency",
+    "DisplayType",
+    "display_type",
     "InputPanel",
     "input_panel",
     "input_screen",
@@ -44,12 +49,15 @@ __all__ = [
     "sandboxenv",
     "sandbox",
     "sandbox_with",
-    "SandboxService",
-    "sandbox_service",
     "Store",
     "store",
+    "StoreModel",
+    "store_as",
     "Subtask",
     "subtask",
+    "throttle",
     "trace_enabled",
     "trace_panel",
+    "trace_action",
+    "trace_message",
 ]

inspect_ai/{_util/display.py → util/_display.py} RENAMED Viewed

@@ -7,6 +7,7 @@ from inspect_ai._util.constants import DEFAULT_DISPLAY
 logger = getLogger(__name__)
 DisplayType = Literal["full", "rich", "plain", "none"]
+"""Console display type."""
 _display_type: DisplayType | None = None
@@ -28,6 +29,11 @@ def init_display_type(display: str | None = None) -> DisplayType:
 def display_type() -> DisplayType:
+    """Get the current console display type.
+    Returns:
+       DisplayType: Display type.
+    """
     global _display_type
     if _display_type:
         return _display_type

inspect_ai/util/_panel.py CHANGED Viewed

@@ -1,23 +1,46 @@
 from typing import Any, Protocol, TypeVar
 from textual.containers import Container
+from typing_extensions import Self
 class InputPanel(Container):
+    DEFAULT_TITLE = "Panel"
     DEFAULT_CLASSES = "task-input-panel"
+    DEFAULT_CSS = """
+    InputPanel {
+        padding: 0 1 1 1;
+    }
+    """
     class Host(Protocol):
         def set_title(self, title: str) -> None: ...
         def activate(self) -> None: ...
         def deactivate(self) -> None: ...
         def close(self) -> None: ...
-    def __init__(self, title: str, host: Host) -> None:
+    def __init__(self, host: Host) -> None:
+        """Initialise the panel.
+        Panels are created as required by the input_panel() function so
+        you should NOT override __init__ with your own initisation (rather,
+        you should define reactive props and/or methods that perform
+        initialisation).
+        You should also override the `DEFAULT_TITLE` variable for your panel to
+        provide a default tab title (you can change the table dynamically as
+        required using the `title` property).
+        Args:
+           host (InputPanel.Host): Interface to UI host of input panel.
+        """
         super().__init__()
-        self._title = title
+        self._title = self.DEFAULT_TITLE
         self._host = host
-    async def __aenter__(self) -> "InputPanel":
+    async def __aenter__(self) -> Self:
         self.activate()
         return self
@@ -50,10 +73,10 @@ class InputPanel(Container):
         pass
-TP = TypeVar("TP", bound=InputPanel)
+TP = TypeVar("TP", bound=InputPanel, covariant=True)
-async def input_panel(title: str, panel: type[TP]) -> TP:
+async def input_panel(panel: type[TP]) -> TP:
     """Create an input panel in the task display.
     There can only be a single instance of an InputPanel with a given
@@ -65,19 +88,18 @@ async def input_panel(title: str, panel: type[TP]) -> TP:
         the scope exits -- see below for open/close semantics)
         ```python
-        panel = await input_panel("Custom", CustomPanel)
+        panel = await input_panel(CustomPanel)
         panel.activate()
         ```
         Activate and close an input panel using a context manager:
         ```python
-        async with await input_panel("Custom", CustomPanel) as panel:
+        async with await input_panel(CustomPanel) as panel:
             ...
         ```
     Args:
-       title (str): Input panel title.
        panel (type[TP]): Type of panel widget (must derive from `InputPanel`)
     Returns:
@@ -88,4 +110,4 @@ async def input_panel(title: str, panel: type[TP]) -> TP:
     """
     from inspect_ai._display.core.active import task_screen
-    return await task_screen().input_panel(title, panel)
+    return await task_screen().input_panel(panel)

inspect_ai/util/_sandbox/__init__.py CHANGED Viewed

@@ -13,7 +13,6 @@ from .environment import (
 from .limits import OutputLimitExceededError, SandboxEnvironmentLimits
 from .local import LocalSandboxEnvironment  # noqa: F401
 from .registry import sandboxenv
-from .service import SandboxService, sandbox_service
 __all__ = [
     "OutputLimitExceededError",
@@ -27,6 +26,4 @@ __all__ = [
     "sandboxenv",
     "sandbox",
     "sandbox_with",
-    "SandboxService",
-    "sandbox_service",
 ]

inspect_ai/util/_sandbox/context.py CHANGED Viewed

@@ -24,6 +24,10 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
     Return:
       SandboxEnvironment instance.
+    Raises:
+      ProcessLookupError: If there are no sandboxes available.
+      ValueError: If an invalid sandbox name is specified.
     """
     # verify we have a context
     environments = sandbox_environments_context_var.get(None)
@@ -102,7 +106,7 @@ async def sandbox_connections() -> dict[str, SandboxConnection]:
 def raise_no_sandbox() -> NoReturn:
-    raise RuntimeError(
+    raise ProcessLookupError(
         "No sandbox environment has been provided for the current sample or task. "
         + "Please specify a sandbox for the sample or a global default sandbox for the task"
     )

inspect_ai/util/_sandbox/docker/compose.py CHANGED Viewed

@@ -8,8 +8,8 @@ from typing import Any, Literal, TypedDict, cast
 import yaml
 from pydantic import BaseModel
-from inspect_ai._util.display import display_type
 from inspect_ai._util.error import PrerequisiteError
+from inspect_ai.util._display import display_type
 from inspect_ai.util._subprocess import ExecResult, subprocess
 from .prereqs import (
@@ -29,11 +29,11 @@ async def compose_up(project: ComposeProject) -> None:
     result = await compose_command(
         ["up", "--detach", "--wait", "--wait-timeout", COMPOSE_WAIT],
         project=project,
+        # wait up to 5 minutes for container to go up (compose wait + 3 minutes)
+        timeout=300,
     )
     if not result.success:
-        msg = (
-            f"Failed to start docker services for {project.config}: " f"{result.stderr}"
-        )
+        msg = f"Failed to start docker services for {project.config}: {result.stderr}"
         raise RuntimeError(msg)
@@ -80,7 +80,11 @@ async def compose_cp(
     output_limit: int | None = None,
 ) -> None:
     result = await compose_command(
-        ["cp", "--", src, dest], project=project, cwd=cwd, output_limit=output_limit
+        ["cp", "--", src, dest],
+        project=project,
+        timeout=120,  # 2-minute timeout for file copies
+        cwd=cwd,
+        output_limit=output_limit,
     )
     if not result.success:
         msg = f"Failed to copy file from '{src}' to '{dest}': {result.stderr}"
@@ -118,7 +122,7 @@ async def compose_ps(
         command.append("--all")
     if status:
         command = command + ["--status", status]
-    result = await compose_command(command, project=project)
+    result = await compose_command(command, project=project, timeout=60)
     if not result.success:
         msg = f"Error querying for running services: {result.stderr}"
         raise RuntimeError(msg)
@@ -136,6 +140,7 @@ async def compose_build(project: ComposeProject, capture_output: bool = False) -
     result = await compose_command(
         ["build"],
         project=project,
+        timeout=None,  # no timeout for build
         capture_output=capture_output,
     )
     if not result.success:
@@ -151,6 +156,7 @@ async def compose_pull(
     return await compose_command(
         ["pull", "--ignore-buildable", "--policy", "missing", service],
         project=project,
+        timeout=None,  # no timeout for pull
         capture_output=capture_output,
     )
@@ -185,7 +191,7 @@ ComposeService = TypedDict(
 async def compose_services(project: ComposeProject) -> dict[str, ComposeService]:
-    result = await compose_command(["config"], project=project)
+    result = await compose_command(["config"], project=project, timeout=60)
     if not result.success:
         raise RuntimeError(f"Error reading docker config: {result.stderr}")
     return cast(dict[str, ComposeService], yaml.safe_load(result.stdout)["services"])
@@ -209,12 +215,13 @@ async def compose_ls() -> list[Project]:
 async def compose_cleanup_images(
     project: ComposeProject,
+    *,
     cwd: str | None = None,
-    timeout: int | None = None,
+    timeout: int | None,
 ) -> None:
     # List the images that would be created for this compose
     images_result = await compose_command(
-        ["config", "--images"], project=project, cwd=cwd
+        ["config", "--images"], project=project, timeout=timeout, cwd=cwd
     )
     # Remove those images explicitly
@@ -246,14 +253,11 @@ async def compose_cleanup_images(
                         logger.warning(msg)
-DEFAULT_COMPOSE_TIMEOUT = 60
 async def compose_command(
     command: list[str],
     *,
     project: ComposeProject,
-    timeout: int | None = DEFAULT_COMPOSE_TIMEOUT,
+    timeout: int | None,
     input: str | bytes | None = None,
     cwd: str | Path | None = None,
     forward_env: bool = True,

inspect_ai/util/_sandbox/docker/docker.py CHANGED Viewed

@@ -78,7 +78,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
             await compose_build(project)
             # cleanup images created during build
-            await compose_cleanup_images(project)
+            await compose_cleanup_images(project, timeout=60)
             services = await compose_services(project)
             for name, service in services.items():
@@ -326,6 +326,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
                 container_tmpfile,
             ],
             project=self._project,
+            timeout=60,
         )
         parent = PurePosixPath(file).parent
@@ -405,7 +406,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
             # read and return w/ appropriate encoding
             if text:
-                with open(dest_file, "r", encoding="utf-8") as f:
+                with open(dest_file, "r", newline="", encoding="utf-8") as f:
                     return f.read()
             else:
                 with open(dest_file, "rb") as f:
@@ -424,13 +425,15 @@ class DockerSandboxEnvironment(SandboxEnvironment):
             None,
         )
-        # return container login
+        # return container connection
         if container:
             return SandboxConnection(
-                command=f"docker exec -it {container} /bin/bash --login",
-                container=container,
+                command=f"docker exec -it {container} bash -l",
+                vscode_command=[
+                    "remote-containers.attachToRunningContainer",
+                    container,
+                ],
             )
         # error (not currently running)
         else:
             raise ConnectionError(

inspect_ai/util/_sandbox/docker/internal.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from inspect_ai._util.constants import PKG_PATH
-from inspect_ai._util.display import display_type
 from inspect_ai._util.error import PrerequisiteError
+from inspect_ai.util._display import display_type
 from inspect_ai.util._subprocess import subprocess
 INSPECT_WEB_BROWSER_IMAGE_DOCKERHUB = "aisiuk/inspect-web-browser-tool"

inspect_ai/util/_sandbox/docker/util.py CHANGED Viewed

@@ -84,10 +84,11 @@ def task_project_name(task: str) -> str:
     if len(task) == 0:
         task = "task"
-    return f"inspect-{task}-i{uuid().lower()}"
+    # _- breaks docker project name constraints so we strip trailing underscores.
+    return f"inspect-{task[:12].rstrip('_')}-i{uuid().lower()[:6]}"
-inspect_project_pattern = r"^inspect-[a-z\d\-_]*-i[a-z\d]{22}$"
+inspect_project_pattern = r"^inspect-[a-z\d\-_]*-i[a-z\d]{6,}$"
 def is_inspect_project(name: str) -> bool:

inspect_ai/util/_sandbox/environment.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import abc
 from dataclasses import dataclass, field
-from typing import Awaitable, Callable, Literal, NamedTuple, Union, overload
+from typing import Any, Awaitable, Callable, Literal, NamedTuple, Union, overload
 from pydantic import BaseModel, Field
@@ -34,12 +34,9 @@ class SandboxConnection(BaseModel):
     command: str
     """Shell command to connect to sandbox."""
-    vscode_command: list[str] | None = Field(default=None)
+    vscode_command: list[Any] | None = Field(default=None)
     """Optional vscode command (+args) to connect to sandbox."""
-    container: str | None = Field(default=None)
-    """Optional container name (will not apply to all sandboxes)."""
 class SandboxEnvironment(abc.ABC):
     """Environment for executing arbitrary code from tools.
@@ -205,6 +202,10 @@ class SandboxEnvironment(abc.ABC):
         File size is limited to 100 MiB.
+        When reading text files, implementations should preserve newline constructs
+        (e.g. crlf should be preserved not converted to lf). This is equivalent
+        to specifying `newline=""` in a call to the Python `open()` function.
         Args:
           file (str): Path to file (relative file paths will resolve to the
             per-sample working directory).

inspect_ai/util/_sandbox/local.py CHANGED Viewed

@@ -101,7 +101,7 @@ class LocalSandboxEnvironment(SandboxEnvironment):
         file = self._resolve_file(file)
         verify_read_file_size(file)
         if text:
-            with open(file, "r", encoding="utf-8") as f:
+            with open(file, "r", newline="", encoding="utf-8") as f:
                 return f.read()
         else:
             with open(file, "rb") as f:

inspect_ai/util/_sandbox/self_check.py CHANGED Viewed

@@ -75,9 +75,9 @@ async def test_read_and_write_file_text(sandbox_env: SandboxEnvironment) -> None
     written_file_string = await sandbox_env.read_file(
         "test_read_and_write_file_text.file", text=True
     )
-    assert (
-        "great #content\nincluding newlines" == written_file_string
-    ), f"unexpected content: [{written_file_string}]"
+    assert "great #content\nincluding newlines" == written_file_string, (
+        f"unexpected content: [{written_file_string}]"
+    )
     await _cleanup_file(sandbox_env, "test_read_and_write_file_text.file")
@@ -219,9 +219,9 @@ async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
     exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; echo bar"])
     expected = "foo\nbar\n"
     # in the assertion message, we show the actual bytes to help debug newline issues
-    assert (
-        exec_result.stdout == expected
-    ), f"Unexpected output:expected {expected.encode('UTF-8')!r}; got {exec_result.stdout.encode('UTF-8')!r}"
+    assert exec_result.stdout == expected, (
+        f"Unexpected output:expected {expected.encode('UTF-8')!r}; got {exec_result.stdout.encode('UTF-8')!r}"
+    )
 async def test_exec_timeout(sandbox_env: SandboxEnvironment) -> None:
@@ -248,13 +248,13 @@ async def test_exec_as_user(sandbox_env: SandboxEnvironment) -> None:
         # Test exec as different users
         root_result = await sandbox_env.exec(["whoami"], user="root")
-        assert (
-            root_result.stdout.strip() == "root"
-        ), f"Expected 'root', got '{root_result.stdout.strip()}'"
+        assert root_result.stdout.strip() == "root", (
+            f"Expected 'root', got '{root_result.stdout.strip()}'"
+        )
         myuser_result = await sandbox_env.exec(["whoami"], user=username)
-        assert (
-            myuser_result.stdout.strip() == username
-        ), f"Expected '{username}', got '{myuser_result.stdout.strip()}'"
+        assert myuser_result.stdout.strip() == username, (
+            f"Expected '{username}', got '{myuser_result.stdout.strip()}'"
+        )
     finally:
         # Clean up
         await sandbox_env.exec(["userdel", "-r", username], user="root")
@@ -266,9 +266,9 @@ async def test_exec_as_nonexistent_user(sandbox_env: SandboxEnvironment) -> None
     expected_error = (
         "unable to find user nonexistent: no matching entries in passwd file"
     )
-    assert (
-        expected_error in result.stdout
-    ), f"Error string '{expected_error}' not found in error output: '{result.stdout}'"
+    assert expected_error in result.stdout, (
+        f"Error string '{expected_error}' not found in error output: '{result.stdout}'"
+    )
 async def test_cwd_unspecified(sandbox_env: SandboxEnvironment) -> None:
@@ -291,9 +291,9 @@ async def test_cwd_relative(sandbox_env: SandboxEnvironment) -> None:
     file_path = cwd_subdirectory + "/" + file_name
     await sandbox_env.write_file(file_path, "ls me plz")
     current_dir_contents = (await sandbox_env.exec(["ls"], cwd=cwd_subdirectory)).stdout
-    assert (
-        file_name in current_dir_contents
-    ), f"{file_name} not found in {current_dir_contents}"
+    assert file_name in current_dir_contents, (
+        f"{file_name} not found in {current_dir_contents}"
+    )
     await _cleanup_file(sandbox_env, file_path)

inspect-ai 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl

inspect-ai 0.3.55py3-none-any.whl → 0.3.57py3-none-any.whl