PyPI - inspect-ai - Versions diffs - 0.3.52__py3-none-any.whl → 0.3.54__py3-none-any.whl - Mend

inspect-ai 0.3.52py3-none-any.whl → 0.3.54py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

inspect_ai/_cli/eval.py +55 -1
inspect_ai/_cli/main.py +2 -0
inspect_ai/_cli/trace.py +244 -0
inspect_ai/_display/core/progress.py +9 -3
inspect_ai/_display/core/results.py +8 -4
inspect_ai/_display/textual/app.py +5 -1
inspect_ai/_display/textual/widgets/task_detail.py +3 -0
inspect_ai/_display/textual/widgets/tasks.py +97 -6
inspect_ai/_eval/eval.py +33 -0
inspect_ai/_eval/evalset.py +4 -0
inspect_ai/_eval/registry.py +2 -2
inspect_ai/_eval/task/images.py +4 -14
inspect_ai/_eval/task/results.py +22 -4
inspect_ai/_eval/task/run.py +40 -20
inspect_ai/_eval/task/sandbox.py +72 -43
inspect_ai/_eval/task/task.py +4 -0
inspect_ai/_eval/task/util.py +2 -0
inspect_ai/_util/constants.py +3 -3
inspect_ai/_util/display.py +1 -0
inspect_ai/_util/logger.py +34 -8
inspect_ai/_util/trace.py +275 -0
inspect_ai/_view/www/App.css +13 -0
inspect_ai/_view/www/dist/assets/index.css +13 -0
inspect_ai/_view/www/dist/assets/index.js +80 -43
inspect_ai/_view/www/src/App.mjs +31 -6
inspect_ai/_view/www/src/Types.mjs +6 -0
inspect_ai/_view/www/src/components/JsonPanel.mjs +11 -17
inspect_ai/_view/www/src/components/MessageContent.mjs +9 -2
inspect_ai/_view/www/src/components/Tools.mjs +46 -18
inspect_ai/_view/www/src/navbar/Navbar.mjs +12 -0
inspect_ai/_view/www/src/samples/SampleList.mjs +2 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +2 -2
inspect_ai/log/_log.py +6 -0
inspect_ai/log/_message.py +2 -2
inspect_ai/log/_recorders/eval.py +8 -18
inspect_ai/log/_recorders/json.py +19 -17
inspect_ai/model/_cache.py +22 -16
inspect_ai/model/_call_tools.py +9 -1
inspect_ai/model/_generate_config.py +8 -2
inspect_ai/model/_model.py +11 -12
inspect_ai/model/_providers/azureai.py +1 -1
inspect_ai/model/_providers/bedrock.py +18 -2
inspect_ai/model/_providers/hf.py +1 -1
inspect_ai/model/_providers/openai.py +32 -8
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/vllm.py +1 -1
inspect_ai/tool/_tools/_web_browser/_web_browser.py +1 -1
inspect_ai/util/_sandbox/context.py +7 -3
inspect_ai/util/_sandbox/docker/compose.py +58 -19
inspect_ai/util/_sandbox/docker/config.py +8 -10
inspect_ai/util/_sandbox/docker/docker.py +20 -16
inspect_ai/util/_sandbox/docker/util.py +3 -9
inspect_ai/util/_sandbox/environment.py +7 -2
inspect_ai/util/_sandbox/limits.py +1 -1
inspect_ai/util/_sandbox/local.py +8 -9
inspect_ai/util/_sandbox/service.py +17 -7
inspect_ai/util/_subprocess.py +6 -1
inspect_ai/util/_subtask.py +8 -2
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/METADATA +6 -8
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/RECORD +64 -62
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/top_level.txt +0 -0

inspect_ai/util/_sandbox/docker/docker.py CHANGED Viewed

@@ -5,7 +5,6 @@ from logging import getLogger
 from pathlib import Path, PurePosixPath
 from typing import Literal, Union, cast, overload
-import aiofiles
 from typing_extensions import override
 from inspect_ai.util._subprocess import ExecResult
@@ -43,7 +42,7 @@ from .compose import (
 from .config import CONFIG_FILES, DOCKERFILE
 from .internal import build_internal_image, is_internal_image
 from .prereqs import validate_prereqs
-from .util import ComposeProject, sandbox_log, task_project_name
+from .util import ComposeProject, task_project_name
 logger = getLogger(__name__)
@@ -54,6 +53,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
     def config_files(cls) -> list[str]:
         return CONFIG_FILES + [DOCKERFILE]
+    @classmethod
+    def default_concurrency(cls) -> int | None:
+        count = os.cpu_count() or 1
+        return 2 * count
     @classmethod
     async def task_init(
         cls, task_name: str, config: SandboxEnvironmentConfigType | None
@@ -109,8 +113,6 @@ class DockerSandboxEnvironment(SandboxEnvironment):
         config: SandboxEnvironmentConfigType | None,
         metadata: dict[str, str],
     ) -> dict[str, SandboxEnvironment]:
-        sandbox_log("setup")
         # create environment variables for sample metadata
         env: dict[str, str] = {}
         if isinstance(config, str) and Path(config).exists():
@@ -260,7 +262,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
     @override
     async def write_file(self, file: str, contents: str | bytes) -> None:
-        sandbox_log(f"write_file: {file}")
+        # exec function w/ timeout
+        async def exec(cmd: list[str]) -> ExecResult[str]:
+            return await self.exec(cmd, timeout=60)
         # resolve relative file paths
         file = self.container_file(file)
@@ -307,8 +311,8 @@ class DockerSandboxEnvironment(SandboxEnvironment):
         local_tmpfile.close()  # this will also delete the file
         if not hasattr(self, "_docker_user"):
-            uid = (await self.exec(["id", "-u"])).stdout.strip()
-            gid = (await self.exec(["id", "-g"])).stdout.strip()
+            uid = (await exec(["id", "-u"])).stdout.strip()
+            gid = (await exec(["id", "-g"])).stdout.strip()
             self._docker_user = (uid, gid)
         await compose_command(
@@ -327,7 +331,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
         parent = PurePosixPath(file).parent
         # We do these steps in a shell script for efficiency to avoid round-trips to docker.
-        res_cp = await self.exec(
+        res_cp = await exec(
             [
                 "sh",
                 "-e",
@@ -342,7 +346,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
         if res_cp.returncode != 0:
             if "Permission denied" in res_cp.stderr:
-                ls_result = await self.exec(["ls", "-la", "."])
+                ls_result = await exec(["ls", "-la", "."])
                 error_string = f"Permission was denied. Error details: {res_cp.stderr}; ls -la: {ls_result.stdout}; {self._docker_user=}"
                 raise PermissionError(error_string)
             elif (
@@ -363,8 +367,6 @@ class DockerSandboxEnvironment(SandboxEnvironment):
     @override
     async def read_file(self, file: str, text: bool = True) -> Union[str, bytes]:
-        sandbox_log(f"read_file: {file}")
         # Write the contents to a temp file
         with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
             # resolve relative file paths
@@ -403,11 +405,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
             # read and return w/ appropriate encoding
             if text:
-                async with aiofiles.open(dest_file, "r", encoding="utf-8") as f:
-                    return await f.read()
+                with open(dest_file, "r", encoding="utf-8") as f:
+                    return f.read()
             else:
-                async with aiofiles.open(dest_file, "rb") as f:
-                    return await f.read()
+                with open(dest_file, "rb") as f:
+                    return f.read()
     @override
     async def connection(self) -> SandboxConnection:
@@ -445,7 +447,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
 async def container_working_dir(
     service: str, project: ComposeProject, default: str = "/"
 ) -> str:
-    result = await compose_exec([service, "sh", "-c", "pwd"], project)
+    result = await compose_exec(
+        [service, "sh", "-c", "pwd"], timeout=60, project=project
+    )
     if result.success:
         return result.stdout.strip()
     else:

inspect_ai/util/_sandbox/docker/util.py CHANGED Viewed

@@ -5,8 +5,6 @@ from pathlib import Path
 from shortuuid import uuid
-from inspect_ai._util.constants import SANDBOX
 from ..environment import SandboxEnvironmentConfigType
 from .config import (
     COMPOSE_DOCKERFILE_YAML,
@@ -41,7 +39,7 @@ class ComposeProject:
         # if its a Dockerfile, then config is the auto-generated .compose.yaml
         if config_path and is_dockerfile(config_path.name):
-            config = await auto_compose_file(
+            config = auto_compose_file(
                 COMPOSE_DOCKERFILE_YAML, config_path.parent.as_posix()
             )
@@ -51,12 +49,12 @@ class ComposeProject:
         # no config passed, look for 'auto-config' (compose.yaml, Dockerfile, etc.)
         else:
-            config = await resolve_compose_file()
+            config = resolve_compose_file()
         # this could be a cleanup where docker has tracked a .compose.yaml file
         # as part of its ConfigFiles and passed it back to us -- we in the
         # meantime have cleaned it up so we re-create it here as required
-        await ensure_auto_compose_file(config)
+        ensure_auto_compose_file(config)
         # return project
         return ComposeProject(name, config, env)
@@ -94,7 +92,3 @@ inspect_project_pattern = r"^inspect-[a-z\d\-_]*-i[a-z\d]{22}$"
 def is_inspect_project(name: str) -> bool:
     return re.match(inspect_project_pattern, name) is not None
-def sandbox_log(msg: str) -> None:
-    logger.log(SANDBOX, f"DOCKER: {msg}")

inspect_ai/util/_sandbox/environment.py CHANGED Viewed

@@ -53,6 +53,11 @@ class SandboxEnvironment(abc.ABC):
         """Standard config files for this provider (used for automatic discovery)"""
         return []
+    @classmethod
+    def default_concurrency(cls) -> int | None:
+        """Default max_sandboxes for this provider (`None` means no maximum)"""
+        return None
     @classmethod
     async def task_init(
         cls, task_name: str, config: SandboxEnvironmentConfigType | None
@@ -143,7 +148,7 @@ class SandboxEnvironment(abc.ABC):
         The current working directory for execution will be the per-sample
         filesystem context.
-        Each output stream (stdout and stderr) is limited to 1 MiB. If exceeded, an
+        Each output stream (stdout and stderr) is limited to 10 MiB. If exceeded, an
         `OutputLimitExceededError` will be raised.
         Args:
@@ -164,7 +169,7 @@ class SandboxEnvironment(abc.ABC):
           PermissionError: If the user does not have
             permission to execute the command.
           OutputLimitExceededError: If an output stream
-            exceeds the 1 MiB limit.
+            exceeds the 10 MiB limit.
         """
         ...

inspect_ai/util/_sandbox/limits.py CHANGED Viewed

@@ -29,7 +29,7 @@ def verify_exec_result_size(exec_result: ExecResult[str]) -> None:
     """Verify the size of the output streams in an `ExecResult`.
     Raises:
-      OutputLimitExceededError: If an output stream exceeds the 1 MiB limit.
+      OutputLimitExceededError: If an output stream exceeds the limit.
     """
     limit = SandboxEnvironmentLimits.MAX_EXEC_OUTPUT_SIZE
     stdout_truncated = truncate_string_to_bytes(exec_result.stdout, limit)

inspect_ai/util/_sandbox/local.py CHANGED Viewed

@@ -3,7 +3,6 @@ import warnings
 from pathlib import Path
 from typing import Literal, Union, cast, overload
-import aiofiles
 from typing_extensions import override
 from .._subprocess import ExecResult, subprocess
@@ -85,11 +84,11 @@ class LocalSandboxEnvironment(SandboxEnvironment):
         Path(file).parent.mkdir(parents=True, exist_ok=True)
         if isinstance(contents, str):
-            async with aiofiles.open(file, "w", encoding="utf-8") as f:
-                await f.write(contents)
+            with open(file, "w", encoding="utf-8") as f:
+                f.write(contents)
         else:
-            async with aiofiles.open(file, "wb") as f:
-                await f.write(contents)
+            with open(file, "wb") as f:
+                f.write(contents)
     @overload
     async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
@@ -102,11 +101,11 @@ class LocalSandboxEnvironment(SandboxEnvironment):
         file = self._resolve_file(file)
         verify_read_file_size(file)
         if text:
-            async with aiofiles.open(file, "r", encoding="utf-8") as f:
-                return await f.read()
+            with open(file, "r", encoding="utf-8") as f:
+                return f.read()
         else:
-            async with aiofiles.open(file, "rb") as f:
-                return await f.read()
+            with open(file, "rb") as f:
+                return f.read()
     def _resolve_file(self, file: str) -> str:
         path = Path(file)

inspect_ai/util/_sandbox/service.py CHANGED Viewed

@@ -10,6 +10,8 @@ from typing import (
 from pydantic import JsonValue
+from inspect_ai.util._subprocess import ExecResult
 from .environment import SandboxEnvironment
 REQUESTS_DIR = "requests"
@@ -129,9 +131,9 @@ class SandboxService:
         """Handle all pending service requests."""
         # list pending requests
         list_requests = f"ls -1 {self._requests_dir}/*.json"
-        result = await self._sandbox.exec(["bash", "-c", list_requests])
+        result = await self._exec(["bash", "-c", list_requests])
-        # process reqests
+        # process requests
         if result.success:
             request_files = result.stdout.strip().splitlines()
             if request_files:
@@ -142,7 +144,7 @@ class SandboxService:
     async def _handle_request(self, request_file: str) -> None:
         # read request
         read_request = f"cat {request_file}"
-        result = await self._sandbox.exec(["bash", "-c", read_request])
+        result = await self._exec(["bash", "-c", read_request])
         if not result.success:
             raise RuntimeError(
                 f"Error reading request for service {self._name}: '{read_request}' ({result.stderr})"
@@ -181,7 +183,7 @@ class SandboxService:
             await self._write_text_file(response_path, json.dumps(response_data))
             # remove request file
-            exec_rm = await self._sandbox.exec(["rm", "-f", request_file])
+            exec_rm = await self._exec(["rm", "-f", request_file])
             if not exec_rm.success:
                 raise RuntimeError(
                     f"Error removing request file '{request_file}': {exec_rm.stderr}"
@@ -215,8 +217,8 @@ class SandboxService:
     async def _create_rpc_dir(self, name: str) -> str:
         rpc_dir = PurePosixPath(self._service_dir, name).as_posix()
-        result = await self._sandbox.exec(["rm", "-rf", rpc_dir])
-        result = await self._sandbox.exec(["mkdir", "-p", rpc_dir])
+        result = await self._exec(["rm", "-rf", rpc_dir])
+        result = await self._exec(["mkdir", "-p", rpc_dir])
         if not result.success:
             raise RuntimeError(
                 f"Error creating rpc directory '{name}' for sandbox '{self._name}': {result.stderr}"
@@ -224,11 +226,19 @@ class SandboxService:
         return rpc_dir
     async def _write_text_file(self, file: str, contents: str) -> None:
-        result = await self._sandbox.exec(["tee", "--", file], input=contents)
+        result = await self._exec(["tee", "--", file], input=contents)
         if not result.success:
             msg = f"Failed to write file '{file}' into container: {result.stderr}"
             raise RuntimeError(msg)
+    async def _exec(self, cmd: list[str], input: str | None = None) -> ExecResult[str]:
+        try:
+            return await self._sandbox.exec(cmd, input=input, timeout=30)
+        except TimeoutError:
+            raise RuntimeError(
+                f"Timed out executing command {' '.join(cmd)} in sandbox"
+            )
     def _generate_client(self) -> str:
         return dedent(f"""
         from typing import Any

inspect_ai/util/_subprocess.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import os
+import shlex
 import sys
 from asyncio.subprocess import Process
 from contextvars import ContextVar
@@ -8,6 +9,8 @@ from logging import getLogger
 from pathlib import Path
 from typing import AsyncGenerator, Generic, Literal, TypeVar, Union, cast, overload
+from inspect_ai._util.trace import trace_action
 from ._concurrency import concurrency
 logger = getLogger(__name__)
@@ -217,7 +220,9 @@ async def subprocess(
     # run command
     async with concurrency("subprocesses", max_subprocesses_context_var.get()):
-        return await run_command_timeout()
+        message = args if isinstance(args, str) else shlex.join(args)
+        with trace_action(logger, "Subprocess", message):
+            return await run_command_timeout()
 def init_max_subprocesses(max_subprocesses: int | None = None) -> None:

inspect_ai/util/_subtask.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import asyncio
 import inspect
 from functools import wraps
+from logging import getLogger
 from typing import (
     Any,
     Callable,
@@ -13,6 +14,7 @@ from typing import (
 from inspect_ai._util._async import is_callable_coroutine
 from inspect_ai._util.content import Content
+from inspect_ai._util.trace import trace_action
 from inspect_ai.util._store import Store, dict_jsonable, init_subtask_store
 SubtaskResult = str | int | float | bool | list[Content]
@@ -20,6 +22,9 @@ SubtaskResult = str | int | float | bool | list[Content]
 RT = TypeVar("RT", SubtaskResult, Any)
+logger = getLogger(__name__)
 @runtime_checkable
 class Subtask(Protocol):
     """Subtask with distinct `Store` and `Transcript`.
@@ -118,8 +123,9 @@ def subtask(
                 init_subtask(subtask_name, store if store else Store())
                 # run the subtask
-                with track_store_changes():  # type: ignore
-                    result = await func(*args, **kwargs)
+                with trace_action(logger, "Subtask", subtask_name):
+                    with track_store_changes():  # type: ignore
+                        result = await func(*args, **kwargs)
                 # return result and event
                 return result, list(transcript().events)

{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: inspect_ai
-Version: 0.3.52
+Version: 0.3.54
 Summary: Framework for large language model evaluations
 Author: UK AI Safety Institute
 License: MIT License
@@ -20,7 +20,6 @@ Classifier: Operating System :: OS Independent
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: aiofiles
 Requires-Dist: aiohttp>=3.9.0
 Requires-Dist: anyio>=4.4.0
 Requires-Dist: beautifulsoup4
@@ -71,7 +70,6 @@ Requires-Dist: pytest-xdist; extra == "dev"
 Requires-Dist: ruff==0.8.3; extra == "dev"
 Requires-Dist: textual-dev>=0.86.2; extra == "dev"
 Requires-Dist: types-PyYAML; extra == "dev"
-Requires-Dist: types-aiofiles; extra == "dev"
 Requires-Dist: types-beautifulsoup4; extra == "dev"
 Requires-Dist: types-aioboto3; extra == "dev"
 Requires-Dist: types-boto3; extra == "dev"
@@ -98,22 +96,22 @@ To get started with Inspect, please see the documentation at <https://inspect.ai
 ***
 To work on development of Inspect, clone the repository and install with the `-e` flag and `[dev]` optional dependencies:
 ```bash
-$ git clone https://github.com/UKGovernmentBEIS/inspect_ai.git
-$ cd inspect_ai
-$ pip install -e ".[dev]"
+git clone https://github.com/UKGovernmentBEIS/inspect_ai.git
+cd inspect_ai
+pip install -e ".[dev]"
 ```
 Optionally install pre-commit hooks via
 ```bash
 make hooks
 ```
 Run linting, formatting, and tests via
 ```bash
 make check
 make test

inspect-ai 0.3.52__py3-none-any.whl → 0.3.54__py3-none-any.whl

inspect-ai 0.3.52py3-none-any.whl → 0.3.54py3-none-any.whl