PyPI - inspect-ai - Versions diffs - 0.3.63__py3-none-any.whl → 0.3.65__py3-none-any.whl - Mend

inspect-ai 0.3.63py3-none-any.whl → 0.3.65py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (182) hide show

inspect_ai/_cli/cache.py +8 -7
inspect_ai/_cli/common.py +0 -12
inspect_ai/_cli/eval.py +32 -4
inspect_ai/_cli/info.py +1 -0
inspect_ai/_cli/list.py +1 -1
inspect_ai/_cli/log.py +2 -0
inspect_ai/_cli/sandbox.py +4 -1
inspect_ai/_cli/score.py +181 -32
inspect_ai/_cli/trace.py +2 -0
inspect_ai/_cli/view.py +4 -2
inspect_ai/_display/core/config.py +7 -1
inspect_ai/_display/core/progress.py +1 -1
inspect_ai/_display/textual/app.py +8 -4
inspect_ai/_display/textual/widgets/samples.py +6 -5
inspect_ai/_display/textual/widgets/sandbox.py +6 -0
inspect_ai/_eval/__init__.py +0 -0
inspect_ai/_eval/eval.py +100 -97
inspect_ai/_eval/evalset.py +69 -69
inspect_ai/_eval/loader.py +122 -12
inspect_ai/_eval/registry.py +1 -1
inspect_ai/_eval/run.py +14 -0
inspect_ai/_eval/score.py +125 -36
inspect_ai/_eval/task/log.py +105 -4
inspect_ai/_eval/task/results.py +92 -38
inspect_ai/_eval/task/run.py +6 -2
inspect_ai/_eval/task/sandbox.py +35 -2
inspect_ai/_eval/task/task.py +49 -46
inspect_ai/_util/__init__.py +0 -0
inspect_ai/_util/constants.py +1 -1
inspect_ai/_util/content.py +8 -0
inspect_ai/_util/error.py +2 -0
inspect_ai/_util/file.py +15 -1
inspect_ai/_util/logger.py +4 -2
inspect_ai/_util/registry.py +7 -1
inspect_ai/_view/view.py +1 -2
inspect_ai/_view/www/App.css +8 -3
inspect_ai/_view/www/README.md +1 -1
inspect_ai/_view/www/dist/assets/index.css +66 -38
inspect_ai/_view/www/dist/assets/index.js +525 -523
inspect_ai/_view/www/log-schema.json +86 -73
inspect_ai/_view/www/package.json +1 -1
inspect_ai/_view/www/src/App.tsx +1 -0
inspect_ai/_view/www/src/components/AnsiDisplay.tsx +1 -1
inspect_ai/_view/www/src/components/JsonPanel.tsx +1 -1
inspect_ai/_view/www/src/components/LargeModal.tsx +39 -49
inspect_ai/_view/www/src/components/NavPills.tsx +3 -1
inspect_ai/_view/www/src/components/TabSet.tsx +19 -4
inspect_ai/_view/www/src/logfile/remoteLogFile.ts +0 -1
inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +1 -1
inspect_ai/_view/www/src/metadata/MetaDataView.tsx +1 -1
inspect_ai/_view/www/src/metadata/RenderedContent.tsx +6 -13
inspect_ai/_view/www/src/plan/PlanDetailView.tsx +17 -2
inspect_ai/_view/www/src/plan/SolverDetailView.tsx +1 -1
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +14 -5
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +4 -2
inspect_ai/_view/www/src/samples/SamplesTools.tsx +16 -24
inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +1 -1
inspect_ai/_view/www/src/samples/chat/ChatView.tsx +1 -0
inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +27 -13
inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +19 -17
inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +12 -10
inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +56 -66
inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +12 -5
inspect_ai/_view/www/src/samples/chat/tools/tool.ts +21 -36
inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +3 -1
inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +27 -25
inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +5 -1
inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +13 -13
inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +1 -1
inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +2 -2
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +9 -5
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +1 -1
inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -4
inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +1 -0
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +1 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +17 -6
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +14 -19
inspect_ai/_view/www/src/types/log.d.ts +107 -19
inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +7 -1
inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +5 -3
inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +25 -27
inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +12 -11
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +25 -2
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +60 -36
inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +4 -0
inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +6 -4
inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +16 -14
inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +9 -19
inspect_ai/_view/www/src/workspace/utils.ts +34 -0
inspect_ai/approval/_approval.py +2 -0
inspect_ai/approval/_approver.py +4 -4
inspect_ai/approval/_auto.py +1 -1
inspect_ai/approval/_human/approver.py +3 -0
inspect_ai/approval/_policy.py +5 -0
inspect_ai/approval/_registry.py +2 -2
inspect_ai/dataset/_dataset.py +36 -45
inspect_ai/dataset/_sources/__init__.py +0 -0
inspect_ai/dataset/_sources/csv.py +13 -13
inspect_ai/dataset/_sources/hf.py +29 -29
inspect_ai/dataset/_sources/json.py +10 -10
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_convert.py +3 -3
inspect_ai/log/_file.py +24 -9
inspect_ai/log/_log.py +98 -7
inspect_ai/log/_message.py +3 -1
inspect_ai/log/_recorders/file.py +4 -0
inspect_ai/log/_recorders/recorder.py +3 -0
inspect_ai/log/_transcript.py +19 -8
inspect_ai/model/__init__.py +2 -0
inspect_ai/model/_cache.py +39 -21
inspect_ai/model/_call_tools.py +2 -2
inspect_ai/model/_chat_message.py +14 -4
inspect_ai/model/_generate_config.py +1 -1
inspect_ai/model/_model.py +31 -24
inspect_ai/model/_model_output.py +14 -1
inspect_ai/model/_openai.py +10 -18
inspect_ai/model/_providers/google.py +9 -5
inspect_ai/model/_providers/openai.py +5 -9
inspect_ai/model/_providers/openrouter.py +1 -1
inspect_ai/scorer/__init__.py +6 -1
inspect_ai/scorer/_answer.py +1 -1
inspect_ai/scorer/_classification.py +4 -0
inspect_ai/scorer/_match.py +4 -5
inspect_ai/scorer/_metric.py +87 -28
inspect_ai/scorer/_metrics/__init__.py +3 -3
inspect_ai/scorer/_metrics/accuracy.py +8 -10
inspect_ai/scorer/_metrics/mean.py +3 -17
inspect_ai/scorer/_metrics/std.py +111 -30
inspect_ai/scorer/_model.py +12 -12
inspect_ai/scorer/_pattern.py +3 -3
inspect_ai/scorer/_reducer/reducer.py +36 -21
inspect_ai/scorer/_reducer/registry.py +2 -2
inspect_ai/scorer/_reducer/types.py +7 -1
inspect_ai/scorer/_score.py +11 -1
inspect_ai/scorer/_scorer.py +110 -16
inspect_ai/solver/__init__.py +1 -1
inspect_ai/solver/_basic_agent.py +19 -22
inspect_ai/solver/_bridge/__init__.py +0 -3
inspect_ai/solver/_bridge/bridge.py +3 -3
inspect_ai/solver/_chain.py +1 -2
inspect_ai/solver/_critique.py +3 -3
inspect_ai/solver/_fork.py +2 -2
inspect_ai/solver/_human_agent/__init__.py +0 -0
inspect_ai/solver/_human_agent/agent.py +5 -8
inspect_ai/solver/_human_agent/commands/clock.py +14 -10
inspect_ai/solver/_human_agent/commands/note.py +1 -1
inspect_ai/solver/_human_agent/commands/score.py +0 -11
inspect_ai/solver/_multiple_choice.py +15 -18
inspect_ai/solver/_prompt.py +7 -7
inspect_ai/solver/_solver.py +53 -52
inspect_ai/solver/_task_state.py +80 -69
inspect_ai/solver/_use_tools.py +9 -9
inspect_ai/tool/__init__.py +2 -1
inspect_ai/tool/_tool.py +43 -14
inspect_ai/tool/_tool_call.py +6 -2
inspect_ai/tool/_tool_choice.py +3 -1
inspect_ai/tool/_tool_def.py +10 -8
inspect_ai/tool/_tool_params.py +24 -0
inspect_ai/tool/_tool_with.py +7 -7
inspect_ai/tool/_tools/__init__.py +0 -0
inspect_ai/tool/_tools/_computer/_common.py +2 -2
inspect_ai/tool/_tools/_computer/_computer.py +11 -0
inspect_ai/tool/_tools/_execute.py +15 -9
inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
inspect_ai/tool/_tools/_web_search.py +7 -5
inspect_ai/util/_concurrency.py +3 -3
inspect_ai/util/_panel.py +2 -0
inspect_ai/util/_resource.py +12 -12
inspect_ai/util/_sandbox/docker/compose.py +23 -20
inspect_ai/util/_sandbox/docker/config.py +2 -1
inspect_ai/util/_sandbox/docker/docker.py +10 -1
inspect_ai/util/_sandbox/docker/service.py +100 -0
inspect_ai/util/_sandbox/environment.py +99 -96
inspect_ai/util/_subprocess.py +5 -3
inspect_ai/util/_subtask.py +15 -16
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/LICENSE +1 -1
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/METADATA +10 -6
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/RECORD +182 -175
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.63.dist-info → inspect_ai-0.3.65.dist-info}/top_level.txt +0 -0

inspect_ai/tool/_tools/_computer/_computer.py CHANGED Viewed

@@ -13,6 +13,17 @@ ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
 @tool
 def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool:
+    """Desktop computer tool.
+    See documentation at <https://inspect.ai-safety-institute.org.uk/tools.html#sec-computer>.
+    Args:
+      max_screenshots: The maximum number of screenshots to play
+        back to the model as input. Defaults to 1 (set to `None` to have no limit).
+      timeout: Timeout in seconds for computer tool actions.
+        Defaults to 180 (set to `None` for no timeout).
+    """
     async def execute(
         action: Action,
         text: str | None = None,

inspect_ai/tool/_tools/_execute.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from inspect_ai.util import sandbox
+from inspect_ai.util import sandbox as sandbox_env
 from .._tool import Tool, tool
 from .._tool_call import ToolCall, ToolCallContent, ToolCallView, ToolCallViewer
@@ -20,14 +20,17 @@ def code_viewer(language: str, code_param: str) -> ToolCallViewer:
 @tool(viewer=code_viewer("bash", "cmd"))
-def bash(timeout: int | None = None, user: str | None = None) -> Tool:
+def bash(
+    timeout: int | None = None, user: str | None = None, sandbox: str | None = None
+) -> Tool:
     """Bash shell command execution tool.
     Execute bash shell commands using a sandbox environment (e.g. "docker").
     Args:
-      timeout (int | None): Timeout (in seconds) for command.
-      user (str | None): User to execute commands as.
+      timeout: Timeout (in seconds) for command.
+      user: User to execute commands as.
+      sandbox: Optional sandbox environmnent name.
     Returns:
       String with command output (stdout) or command error (stderr).
@@ -44,7 +47,7 @@ def bash(timeout: int | None = None, user: str | None = None) -> Tool:
           The output of the command.
         """
         # execute the command
-        result = await sandbox().exec(
+        result = await sandbox_env(sandbox).exec(
             cmd=["bash", "--login", "-c", cmd], timeout=timeout, user=user
         )
         # return output (including stderr if any)
@@ -57,14 +60,17 @@ def bash(timeout: int | None = None, user: str | None = None) -> Tool:
 @tool(viewer=code_viewer("python", "code"))
-def python(timeout: int | None = None, user: str | None = None) -> Tool:
+def python(
+    timeout: int | None = None, user: str | None = None, sandbox: str | None = None
+) -> Tool:
     """Python code execution tool.
     Execute Python code using a sandbox environment (e.g. "docker").
     Args:
-      timeout (int | None): Timeout (in seconds) for command.
-      user (str | None): User to execute commands as.
+      timeout: Timeout (in seconds) for command.
+      user: User to execute commands as.
+      sandbox: Optional sandbox environmnent name.
     Returns:
       String with command output (stdout) or command error (stderr).
@@ -89,7 +95,7 @@ def python(timeout: int | None = None, user: str | None = None) -> Tool:
         Returns:
           The output of the Python code.
         """
-        result = await sandbox().exec(
+        result = await sandbox_env(sandbox).exec(
             cmd=["python3"], input=code, timeout=timeout, user=user
         )
         # return output (including stderr if any)

inspect_ai/tool/_tools/_web_browser/_resources/README.md CHANGED Viewed

@@ -40,7 +40,7 @@ The result will be printed out in _stdout_ in the following format:
 error: <an ERROR message if one occured>
 info: <general info about the container>
 web_url: <the URL of the page the browser is currently at>
-wen_at: <accessibility tree of the visible elements of the page>
+web_at: <accessibility tree of the visible elements of the page>
 ```
@@ -57,7 +57,7 @@ The tool consists of the following components:
   * _web_environment.py_ - an environment which gets instantiated by the servicer and which launches the browser, stores its state and maps client commands to Playwright API.
   * _playwright_crawler.py_ - a wrapper over the sync Playwright API.
-* [WebClient](web_client.py) - a simple stateless client to interract with the server. When launched, the client:
+* [WebClient](web_client.py) - a simple stateless client to interact with the server. When launched, the client:
   1. creates a connection with the server;
   2. sends user command to the server;
   3. receives the response in the form of observations and prints them to stdout;

inspect_ai/tool/_tools/_web_browser/_web_browser.py CHANGED Viewed

@@ -16,10 +16,12 @@ from inspect_ai.util._store_model import StoreModel, store_as
 def web_browser(interactive: bool = True) -> list[Tool]:
     """Tools used for web browser navigation.
+     See documentation at <https://inspect.ai-safety-institute.org.uk/tools.html#sec-web-browser>.
     Args:
-       interactive (bool): Provide interactive tools (enable
-         clicking, typing, and submitting forms). Defaults
-         to True.
+       interactive: Provide interactive tools (enable
+          clicking, typing, and submitting forms). Defaults
+          to True.
     Returns:
        List of tools used for web browser navigation.

inspect_ai/tool/_tools/_web_search.py CHANGED Viewed

@@ -41,14 +41,16 @@ def web_search(
     A web search is conducted using the specified provider, the results are parsed for relevance
     using the specified model, and the top 'num_results' relevant pages are returned.
+    See further documentation at <https://inspect.ai-safety-institute.org.uk/tools.html#sec-web-search>.
     Args:
-      provider (Literal["google"]): Search provider (defaults to "google", currently
+      provider: Search provider (defaults to "google", currently
         the only provider). Possible future providers include "brave" and "bing".
-      num_results (int): Number of web search result pages to return to the model.
-      max_provider_calls (int): Maximum number of search calls to make to the search provider.
-      max_connections (int): Maximum number of concurrent connections to API
+      num_results: Number of web search result pages to return to the model.
+      max_provider_calls: Maximum number of search calls to make to the search provider.
+      max_connections: Maximum number of concurrent connections to API
         endpoint of search provider.
-      model (str | Model): Model used to parse web pages for relevance.
+      model: Model used to parse web pages for relevance.
     Returns:
        A tool that can be registered for use by models to search the web.

inspect_ai/util/_concurrency.py CHANGED Viewed

@@ -23,12 +23,12 @@ def concurrency(
     for launching subprocesses is handled via the `subprocess` function.
     Args:
-      name (str): Name for concurrency context. This serves as the
+      name: Name for concurrency context. This serves as the
          display name for the context, and also the unique context
          key (if the `key` parameter is omitted)
-      concurrency (int): Maximum number of coroutines that can
+      concurrency: Maximum number of coroutines that can
          enter the context.
-      key (str | None): Unique context key for this context. Optional.
+      key: Unique context key for this context. Optional.
          Used if the unique key isn't human readable -- e.g. includes
          api tokens or account ids so that the more readable `name`
          can be presented to users e.g in console UI>

inspect_ai/util/_panel.py CHANGED Viewed

@@ -5,6 +5,8 @@ from typing_extensions import Self
 class InputPanel(Container):
+    """Base class for for Inspect input panels."""
     DEFAULT_TITLE = "Panel"
     DEFAULT_CLASSES = "task-input-panel"

inspect_ai/util/_resource.py CHANGED Viewed

@@ -33,18 +33,18 @@ def resource(
     `resource("templates/prompt.txt", type="file")`
     Args:
-        resource (str): Path to local or remote (e.g. s3://)
-          resource, or for `type="auto"` (the default),
-          a string containing the literal resource value.
-        type (Literal["auto", "file"]): For "auto" (the default),
-          interpret the resource as a literal string if its not
-          a valid path. For "file", always interpret it as
-          a file path.
-        fs_options (dict[str, Any]): Optional. Additional
-          arguments to pass through to the `fsspec` filesystem
-          provider (e.g. `S3FileSystem`). Use `{"anon": True }`
-          if you are accessing a public S3 bucket with no
-          credentials.
+        resource: Path to local or remote (e.g. s3://)
+            resource, or for `type="auto"` (the default),
+            a string containing the literal resource value.
+        type: For "auto" (the default),
+            interpret the resource as a literal string if its not
+            a valid path. For "file", always interpret it as
+            a file path.
+        fs_options: Optional. Additional
+            arguments to pass through to the `fsspec` filesystem
+            provider (e.g. `S3FileSystem`). Use `{"anon": True }`
+            if you are accessing a public S3 bucket with no
+            credentials.
     Returns:
        Text content of resource.

inspect_ai/util/_sandbox/docker/compose.py CHANGED Viewed

@@ -3,12 +3,13 @@ import os
 import shlex
 from logging import getLogger
 from pathlib import Path
-from typing import Any, Literal, TypedDict, cast
+from typing import Any, Literal, cast
 import yaml
 from pydantic import BaseModel
 from inspect_ai._util.error import PrerequisiteError
+from inspect_ai._util.trace import trace_message
 from inspect_ai.util._display import display_type
 from inspect_ai.util._subprocess import ExecResult, subprocess
@@ -16,26 +17,39 @@ from .prereqs import (
     DOCKER_COMPOSE_REQUIRED_VERSION_PULL_POLICY,
     validate_docker_compose,
 )
+from .service import ComposeService, services_healthcheck_time
 from .util import ComposeProject, is_inspect_project
 logger = getLogger(__name__)
 # How long to wait for compose environment to pass a health check
-COMPOSE_WAIT = "120"
+COMPOSE_WAIT = 120
-async def compose_up(project: ComposeProject) -> None:
+async def compose_up(
+    project: ComposeProject, services: dict[str, ComposeService]
+) -> None:
+    # compute the maximum amount of time we will
+    up_command = ["up", "--detach", "--wait"]
+    # are there healthchecks in the service definitions? if so then peg our timeout
+    # at the maximum total wait time. otherwise, pick a reasonable default
+    healthcheck_time = services_healthcheck_time(services)
+    if healthcheck_time > 0:
+        timeout: int = healthcheck_time
+        trace_message(logger, "Docker", "Docker services heathcheck timeout: {timeout}")
+    else:
+        timeout = COMPOSE_WAIT
+    # align global wait timeout to maximum healthcheck timeout
+    up_command.extend(["--wait-timeout", str(timeout + 1)])
     # Start the environment. Note that we don't check the result because docker will
     # return a non-zero exit code for services that exit (even successfully) when
     # passing the --wait flag (see https://github.com/docker/compose/issues/10596).
     # In practice, we will catch any errors when calling compose_check_running()
     # immediately after we call compose_up().
-    await compose_command(
-        ["up", "--detach", "--wait", "--wait-timeout", COMPOSE_WAIT],
-        project=project,
-        # wait up to 5 minutes for container to go up (compose wait + 3 minutes)
-        timeout=300,
-    )
+    await compose_command(up_command, project=project, timeout=timeout)
 async def compose_down(project: ComposeProject, quiet: bool = True) -> None:
@@ -191,17 +205,6 @@ async def compose_exec(
     )
-ComposeService = TypedDict(
-    "ComposeService",
-    {
-        "image": str | None,
-        "build": str | None,
-        "x-default": bool | None,
-        "x-local": bool | None,
-    },
-)
 async def compose_services(project: ComposeProject) -> dict[str, ComposeService]:
     result = await compose_command(["config"], project=project, timeout=60)
     if not result.success:

inspect_ai/util/_sandbox/docker/config.py CHANGED Viewed

@@ -42,7 +42,8 @@ def find_compose_file(parent: str = "") -> str | None:
 def is_dockerfile(file: str) -> bool:
-    return os.path.basename(file) == DOCKERFILE
+    path = Path(file)
+    return path.stem == DOCKERFILE or path.suffix == f".{DOCKERFILE}"
 def has_dockerfile(parent: str = "") -> bool:

inspect_ai/util/_sandbox/docker/docker.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import Literal, Union, cast, overload
 from typing_extensions import override
+from inspect_ai._util.error import PrerequisiteError
 from inspect_ai.util._subprocess import ExecResult, subprocess
 from ..environment import (
@@ -85,6 +86,14 @@ class DockerSandboxEnvironment(SandboxEnvironment):
             services = await compose_services(project)
             for name, service in services.items():
+                # if the service has an explicit container_name then
+                # error (as this won't work w/ epochs > 1)
+                container_name = service.get("container_name", None)
+                if container_name:
+                    raise PrerequisiteError(
+                        f"ERROR: Docker service '{name}' includes an explicitly configured container_name ('{container_name}'). This is not permitted, as container names should be provisioned by Docker compose and an explicit container_name will not work with epochs > 1."
+                    )
                 # build internal images
                 image = service.get("image", None)
                 if image and is_internal_image(image):
@@ -139,7 +148,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
             services = await compose_services(project)
             # start the services
-            await compose_up(project)
+            await compose_up(project, services)
             # check to ensure that the services are running
             running_services = await compose_check_running(

inspect_ai/util/_sandbox/docker/service.py ADDED Viewed

@@ -0,0 +1,100 @@
+import re
+from dataclasses import dataclass
+from typing import TypedDict
+class ComposeServiceHealthcheck(TypedDict, total=False):
+    start_period: str
+    interval: str
+    retries: int
+    timeout: str
+ComposeService = TypedDict(
+    "ComposeService",
+    {
+        "image": str,
+        "build": str,
+        "container_name": str,
+        "x-default": bool,
+        "x-local": bool,
+        "healthcheck": ComposeServiceHealthcheck,
+    },
+    total=False,
+)
+def services_healthcheck_time(services: dict[str, ComposeService]) -> int:
+    max_time = 0
+    for _, service in services.items():
+        service_time = service_healthcheck_time(service)
+        max_time = max(max_time, service_time)
+    return max_time
+def service_healthcheck_time(service: ComposeService) -> int:
+    """
+    Calculate the maximum time a single service's healthcheck could take.
+    The total time is:
+    (retries * (interval + timeout))
+    Default values (from Docker documentation):
+    - retries: 3
+    - interval: 30s
+    - timeout: 30s
+    """
+    healthcheck = service.get("healthcheck", None)
+    if healthcheck is None:
+        return 0
+    # Parse duration strings with defaults
+    retries = healthcheck.get("retries", 3)
+    interval = parse_duration(healthcheck.get("interval", "30s"))
+    timeout = parse_duration(healthcheck.get("timeout", "30s"))
+    # Calculate total time in seconds
+    total_time = retries * (interval.seconds + timeout.seconds)
+    return int(total_time)
+@dataclass
+class Duration:
+    nanoseconds: int
+    @property
+    def seconds(self) -> float:
+        return self.nanoseconds / 1_000_000_000
+def parse_duration(duration_str: str) -> Duration:
+    """Parse a Docker compose style duration string."""
+    if not duration_str:
+        return Duration(0)
+    units = {
+        "ns": 1,
+        "us": 1_000,
+        "ms": 1_000_000,
+        "s": 1_000_000_000,
+        "m": 60_000_000_000,
+        "h": 3_600_000_000_000,
+    }
+    duration_str = "".join(duration_str.split())
+    pattern = re.compile(r"(\d+)([a-z]+)")
+    matches = pattern.findall(duration_str)
+    if not matches:
+        raise ValueError(f"Invalid duration format: {duration_str}")
+    total_nanoseconds = 0
+    for number, unit in matches:
+        if unit not in units:
+            raise ValueError(f"Invalid unit: {unit}")
+        total_nanoseconds += int(number) * units[unit]
+    return Duration(total_nanoseconds)

inspect_ai/util/_sandbox/environment.py CHANGED Viewed

@@ -65,91 +65,6 @@ class SandboxEnvironment(abc.ABC):
     filesystem context to copy samples files into and resolve relative paths to.
     """
-    @classmethod
-    def config_files(cls) -> list[str]:
-        """Standard config files for this provider (used for automatic discovery)"""
-        return []
-    @classmethod
-    def default_concurrency(cls) -> int | None:
-        """Default max_sandboxes for this provider (`None` means no maximum)"""
-        return None
-    @classmethod
-    async def task_init(
-        cls, task_name: str, config: SandboxEnvironmentConfigType | None
-    ) -> None:
-        """Called at task startup initialize resources.
-        Args:
-          task_name (str): Name of task using the sandbox environment.
-          config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
-        """
-        pass
-    @classmethod
-    async def sample_init(
-        cls,
-        task_name: str,
-        config: SandboxEnvironmentConfigType | None,
-        metadata: dict[str, str],
-    ) -> dict[str, "SandboxEnvironment"]:
-        """Initialize sandbox environments for a sample.
-        Args:
-          task_name (str): Name of task using the sandbox environment.
-          config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
-          metadata (dict[str,str]): Sample `metadata` field
-        Returns:
-          Dictionary of named sandbox environments. The environment which represents
-          the default environment (resolved by `sandbox("default")` or `sandbox()`) must
-          be the first key/value pair in the dictionary.
-        """
-        return {}
-    @classmethod
-    @abc.abstractmethod
-    async def sample_cleanup(
-        cls,
-        task_name: str,
-        config: SandboxEnvironmentConfigType | None,
-        environments: dict[str, "SandboxEnvironment"],
-        interrupted: bool,
-    ) -> None:
-        """Cleanup sandbox environments.
-        Args:
-          task_name (str): Name of task using the sandbox environment.
-          config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
-          environments (dict[str,SandboxEnvironment]): Sandbox environments created for this sample.
-          interrupted (bool): Was the task interrupted by an error or cancellation
-        """
-        ...
-    @classmethod
-    async def task_cleanup(
-        cls, task_name: str, config: SandboxEnvironmentConfigType | None, cleanup: bool
-    ) -> None:
-        """Called at task exit as a last chance to cleanup resources.
-        Args:
-          task_name (str): Name of task using the sandbox environment.
-          config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
-          cleanup (bool): Whether to actually cleanup environment resources
-            (False if `--no-sandbox-cleanup` was specified)
-        """
-        pass
-    @classmethod
-    async def cli_cleanup(cls, id: str | None) -> None:
-        """Handle a cleanup invoked from the CLI (e.g. inspect sandbox cleanup).
-        Args:
-          id (str | None): Optional ID to limit scope of cleanup.
-        """
-        pass
     @abc.abstractmethod
     async def exec(
         self,
@@ -170,13 +85,13 @@ class SandboxEnvironment(abc.ABC):
         `OutputLimitExceededError` will be raised.
         Args:
-          cmd (str | list[str]): Command or command and arguments to execute.
-          input (str | bytes | None): Standard input (optional).
-          cwd (str | None): Current working dir (optional). If relative, will be relative to the per-sample filesystem context.
-          env (dict[str,str]): Environment variables for execution.
-          user (str | None): Optional username or UID to run the command as.
-          timeout (int | None): Optional execution timeout (seconds).
-          timeout_retry (bool): Retry the command in the case that it times out.
+          cmd: Command or command and arguments to execute.
+          input: Standard input (optional).
+          cwd: Current working dir (optional). If relative, will be relative to the per-sample filesystem context.
+          env: Environment variables for execution.
+          user: Optional username or UID to run the command as.
+          timeout: Optional execution timeout (seconds).
+          timeout_retry: Retry the command in the case that it times out.
             Commands will be retried up to twice, with a timeout of no greater
             than 60 seconds for the first retry and 30 for the second.
@@ -204,9 +119,9 @@ class SandboxEnvironment(abc.ABC):
         should be automatically created.
         Args:
-          file (str): Path to file (relative file paths will resolve to the
+          file: Path to file (relative file paths will resolve to the
             per-sample working directory).
-          contents (str | bytes): Text or binary file contents.
+          contents: Text or binary file contents.
         Raises:
           PermissionError: If the current user does not have permission to
@@ -233,9 +148,9 @@ class SandboxEnvironment(abc.ABC):
         to specifying `newline=""` in a call to the Python `open()` function.
         Args:
-          file (str): Path to file (relative file paths will resolve to the
+          file: Path to file (relative file paths will resolve to the
             per-sample working directory).
-          text (bool): Read as a utf-8 encoded text file.
+          text: Read as a utf-8 encoded text file.
         Returns:
           Contents of file (as str or bytes for binary files)
@@ -265,6 +180,91 @@ class SandboxEnvironment(abc.ABC):
         """
         raise NotImplementedError("connection not implemented")
+    @classmethod
+    def config_files(cls) -> list[str]:
+        """Standard config files for this provider (used for automatic discovery)"""
+        return []
+    @classmethod
+    def default_concurrency(cls) -> int | None:
+        """Default max_sandboxes for this provider (`None` means no maximum)"""
+        return None
+    @classmethod
+    async def task_init(
+        cls, task_name: str, config: SandboxEnvironmentConfigType | None
+    ) -> None:
+        """Called at task startup initialize resources.
+        Args:
+          task_name: Name of task using the sandbox environment.
+          config: Implementation defined configuration (optional).
+        """
+        pass
+    @classmethod
+    async def sample_init(
+        cls,
+        task_name: str,
+        config: SandboxEnvironmentConfigType | None,
+        metadata: dict[str, str],
+    ) -> dict[str, "SandboxEnvironment"]:
+        """Initialize sandbox environments for a sample.
+        Args:
+          task_name: Name of task using the sandbox environment.
+          config: Implementation defined configuration (optional).
+          metadata: Sample `metadata` field
+        Returns:
+          Dictionary of named sandbox environments. The environment which represents
+          the default environment (resolved by `sandbox("default")` or `sandbox()`) must
+          be the first key/value pair in the dictionary.
+        """
+        return {}
+    @classmethod
+    @abc.abstractmethod
+    async def sample_cleanup(
+        cls,
+        task_name: str,
+        config: SandboxEnvironmentConfigType | None,
+        environments: dict[str, "SandboxEnvironment"],
+        interrupted: bool,
+    ) -> None:
+        """Cleanup sandbox environments.
+        Args:
+          task_name: Name of task using the sandbox environment.
+          config: Implementation defined configuration (optional).
+          environments: Sandbox environments created for this sample.
+          interrupted: Was the task interrupted by an error or cancellation
+        """
+        ...
+    @classmethod
+    async def task_cleanup(
+        cls, task_name: str, config: SandboxEnvironmentConfigType | None, cleanup: bool
+    ) -> None:
+        """Called at task exit as a last chance to cleanup resources.
+        Args:
+          task_name: Name of task using the sandbox environment.
+          config: Implementation defined configuration (optional).
+          cleanup: Whether to actually cleanup environment resources
+            (False if `--no-sandbox-cleanup` was specified)
+        """
+        pass
+    @classmethod
+    async def cli_cleanup(cls, id: str | None) -> None:
+        """Handle a cleanup invoked from the CLI (e.g. inspect sandbox cleanup).
+        Args:
+          id: Optional ID to limit scope of cleanup.
+        """
+        pass
 @dataclass
 class SandboxEnvironments:
@@ -284,7 +284,10 @@ class SandboxEnvironmentSpec(NamedTuple):
     """Specification of a SandboxEnvironment."""
     type: str
+    """Sandbox type (e.g. 'local', 'docker')"""
     config: SandboxEnvironmentConfigType | None = None
+    """Sandbox configuration (filename or config object)."""
 SandboxEnvironmentConfigType = BaseModel | str

inspect-ai 0.3.63__py3-none-any.whl → 0.3.65__py3-none-any.whl

inspect-ai 0.3.63py3-none-any.whl → 0.3.65py3-none-any.whl