PyPI - inspect-ai - Versions diffs - 0.3.68__py3-none-any.whl → 0.3.70__py3-none-any.whl - Mend

inspect-ai 0.3.68py3-none-any.whl → 0.3.70py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

inspect_ai/_cli/eval.py +13 -1
inspect_ai/_display/plain/display.py +9 -11
inspect_ai/_display/textual/app.py +5 -5
inspect_ai/_display/textual/widgets/samples.py +47 -18
inspect_ai/_display/textual/widgets/transcript.py +25 -12
inspect_ai/_eval/eval.py +14 -2
inspect_ai/_eval/evalset.py +6 -1
inspect_ai/_eval/run.py +6 -0
inspect_ai/_eval/task/run.py +44 -15
inspect_ai/_eval/task/task.py +26 -3
inspect_ai/_util/interrupt.py +15 -0
inspect_ai/_util/logger.py +23 -0
inspect_ai/_util/rich.py +7 -8
inspect_ai/_util/text.py +301 -1
inspect_ai/_util/transcript.py +10 -2
inspect_ai/_util/working.py +46 -0
inspect_ai/_view/www/dist/assets/index.css +56 -12
inspect_ai/_view/www/dist/assets/index.js +905 -751
inspect_ai/_view/www/log-schema.json +337 -2
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
inspect_ai/_view/www/node_modules/flatted/python/test.py +63 -0
inspect_ai/_view/www/src/appearance/icons.ts +3 -1
inspect_ai/_view/www/src/metadata/RenderedContent.tsx +0 -1
inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +28 -1
inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +23 -2
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +1 -1
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -0
inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +152 -0
inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +9 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +19 -1
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
inspect_ai/_view/www/src/types/log.d.ts +188 -108
inspect_ai/_view/www/src/utils/format.ts +7 -4
inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -6
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_condense.py +1 -0
inspect_ai/log/_log.py +72 -12
inspect_ai/log/_samples.py +5 -5
inspect_ai/log/_transcript.py +31 -1
inspect_ai/model/_call_tools.py +1 -1
inspect_ai/model/_conversation.py +1 -1
inspect_ai/model/_model.py +35 -16
inspect_ai/model/_model_call.py +10 -3
inspect_ai/model/_providers/anthropic.py +13 -2
inspect_ai/model/_providers/bedrock.py +7 -0
inspect_ai/model/_providers/cloudflare.py +20 -7
inspect_ai/model/_providers/google.py +358 -302
inspect_ai/model/_providers/groq.py +57 -23
inspect_ai/model/_providers/hf.py +6 -0
inspect_ai/model/_providers/mistral.py +81 -52
inspect_ai/model/_providers/openai.py +9 -0
inspect_ai/model/_providers/providers.py +6 -6
inspect_ai/model/_providers/util/tracker.py +92 -0
inspect_ai/model/_providers/vllm.py +13 -5
inspect_ai/solver/_basic_agent.py +1 -3
inspect_ai/solver/_bridge/patch.py +0 -2
inspect_ai/solver/_limit.py +4 -4
inspect_ai/solver/_plan.py +3 -3
inspect_ai/solver/_solver.py +3 -0
inspect_ai/solver/_task_state.py +10 -1
inspect_ai/tool/_tools/_web_search.py +3 -3
inspect_ai/util/_concurrency.py +14 -8
inspect_ai/util/_sandbox/context.py +15 -0
inspect_ai/util/_sandbox/docker/cleanup.py +8 -3
inspect_ai/util/_sandbox/docker/compose.py +5 -9
inspect_ai/util/_sandbox/docker/docker.py +20 -6
inspect_ai/util/_sandbox/docker/util.py +10 -1
inspect_ai/util/_sandbox/environment.py +32 -1
inspect_ai/util/_sandbox/events.py +149 -0
inspect_ai/util/_sandbox/local.py +3 -3
inspect_ai/util/_sandbox/self_check.py +2 -1
inspect_ai/util/_subprocess.py +4 -1
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/METADATA +5 -5
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/RECORD +82 -74
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/top_level.txt +0 -0

inspect_ai/solver/_plan.py CHANGED Viewed

@@ -2,6 +2,7 @@ import inspect
 from logging import getLogger
 from typing import Any, Awaitable, Callable, TypeVar, cast
+from inspect_ai._util.interrupt import check_sample_interrupt
 from inspect_ai._util.registry import (
     RegistryInfo,
     is_registry_object,
@@ -115,15 +116,14 @@ class Plan(Solver):
                 with solver_transcript(self.finish, state) as st:
                     state = await self.finish(state, generate)
                     st.complete(state)
-            # mark completed
-            state.completed = True
+                check_sample_interrupt()
         finally:
             # always do cleanup if we have one
             if self.cleanup:
                 try:
                     await self.cleanup(state)
+                    check_sample_interrupt()
                 except Exception as ex:
                     logger.warning(f"Exception occurred during plan cleanup: {ex}")

inspect_ai/solver/_solver.py CHANGED Viewed

@@ -15,6 +15,7 @@ from typing import (
 from typing_extensions import Unpack
 from inspect_ai._util._async import is_callable_coroutine
+from inspect_ai._util.interrupt import check_sample_interrupt
 from inspect_ai._util.registry import (
     RegistryInfo,
     registry_add,
@@ -200,6 +201,7 @@ def solver(
                     state: TaskState, generate: Generate
                 ) -> TaskState:
                     state = await original_call(state, generate)
+                    check_sample_interrupt()
                     set_sample_state(state)
                     return state
@@ -215,6 +217,7 @@ def solver(
                     state: TaskState, generate: Generate
                 ) -> TaskState:
                     state = await solver(state, generate)
+                    check_sample_interrupt()
                     set_sample_state(state)
                     return state

inspect_ai/solver/_task_state.py CHANGED Viewed

@@ -7,7 +7,9 @@ from random import Random
 from typing import Any, Iterable, SupportsIndex, Type, Union, cast, overload
 from pydantic_core import to_jsonable_python
+from shortuuid import uuid
+from inspect_ai._util.interrupt import check_sample_interrupt
 from inspect_ai.dataset._dataset import MT, Sample, metadata_as
 from inspect_ai.model import (
     ChatMessage,
@@ -164,6 +166,7 @@ class TaskState:
         self._token_limit = token_limit
         self._completed = completed
         self._store = Store()
+        self._uuid = uuid()
         if choices:
             self.choices = Choices(choices)
@@ -333,7 +336,7 @@ class TaskState:
     def completed(self) -> bool:
         """Is the task completed.
-        Additionally, checks message and token limits and raises if they are exceeded.
+        Additionally, checks message and token limits and raises if they are exceeded, and also checks for an operator interrupt of the sample.
         """
         from inspect_ai.log._samples import set_active_sample_total_messages
@@ -356,6 +359,7 @@ class TaskState:
                 "token", value=self.token_usage, limit=self.token_limit, state=self
             )
         else:
+            check_sample_interrupt()
             return self._completed
     @completed.setter
@@ -371,6 +375,11 @@ class TaskState:
     scores: dict[str, Score] | None = None
     """Scores yielded by running task."""
+    @property
+    def uuid(self) -> str:
+        """Globally unique identifier for sample run."""
+        return self._uuid
     def metadata_as(self, metadata_cls: Type[MT]) -> MT:
         """Pydantic model interface to metadata.

inspect_ai/tool/_tools/_web_search.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 import os
-from typing import Literal, Protocol, cast, runtime_checkable
+from typing import Literal, Protocol, runtime_checkable
 import httpx
 from bs4 import BeautifulSoup, NavigableString
@@ -90,8 +90,8 @@ def web_search(
                 return_exceptions=True,
             )
             for page, link in zip(pages, links):
-                if page and not isinstance(page, Exception):
-                    page_contents.append(cast(str, page))
+                if page and not isinstance(page, BaseException):
+                    page_contents.append(page)
                     urls.append(link.url)
                     snippets.append(link.snippet)
             search_calls += 1

inspect_ai/util/_concurrency.py CHANGED Viewed

@@ -1,13 +1,19 @@
 import asyncio
+import contextlib
+import time
 from dataclasses import dataclass
+from typing import AsyncIterator
+from inspect_ai._util.working import report_sample_waiting_time
-def concurrency(
+@contextlib.asynccontextmanager
+async def concurrency(
     name: str,
     concurrency: int,
     key: str | None = None,
-) -> asyncio.Semaphore:
-    """Obtain a concurrency context.
+) -> AsyncIterator[None]:
+    """Concurrency context manager.
     A concurrency context can be used to limit the number of coroutines
     executing a block of code (e.g calling an API). For example, here
@@ -32,9 +38,6 @@ def concurrency(
          Used if the unique key isn't human readable -- e.g. includes
          api tokens or account ids so that the more readable `name`
          can be presented to users e.g in console UI>
-    Returns:
-       Asyncio Semaphore for concurrency context.
     """
     # sort out key
     key = key if key else name
@@ -47,8 +50,11 @@ def concurrency(
         )
         _concurrency_semaphores[key] = semaphore
-    # return the semaphore
-    return semaphore.semaphore
+    # wait and yield to protected code
+    start_wait = time.monotonic()
+    async with semaphore.semaphore:
+        report_sample_waiting_time(time.monotonic() - start_wait)
+        yield
 def concurrency_status() -> dict[str, tuple[int, int]]:

inspect_ai/util/_sandbox/context.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, NoReturn, cast
 from shortuuid import uuid
 from inspect_ai._util.constants import SANDBOX_SETUP_TIMEOUT
+from inspect_ai.util._sandbox.events import SandboxEnvironmentProxy
 from .environment import (
     SampleCleanup,
@@ -132,6 +133,9 @@ async def init_sandbox_environments_sample(
     # verify that there is at least one environment and a 'default' env
     validate_sandbox_environments(sandboxenv_type, environments)
+    # proxy environments (for recording SandboxEvent)
+    environments = {k: SandboxEnvironmentProxy(v) for k, v in environments.items()}
     try:
         # copy files into environments
         await copy_sandbox_environment_files(files, environments)
@@ -148,6 +152,7 @@ async def init_sandbox_environments_sample(
         return environments
     except Exception as ex:
+        environments = unproxy_environments(environments)
         await sample_cleanup(task_name, config, environments, True)
         raise ex
@@ -161,9 +166,19 @@ async def cleanup_sandbox_environments_sample(
 ) -> None:
     sandboxenv_type = registry_find_sandboxenv(type)
     sample_cleanup = cast(SampleCleanup, getattr(sandboxenv_type, "sample_cleanup"))
+    environments = unproxy_environments(environments)
     await sample_cleanup(task_name, config, environments, interrupted)
+def unproxy_environments(
+    environments: dict[str, SandboxEnvironment],
+) -> dict[str, SandboxEnvironment]:
+    return {
+        k: v._sandbox
+        for k, v in cast(dict[str, SandboxEnvironmentProxy], environments).items()
+    }
 async def copy_sandbox_environment_files(
     files: dict[str, bytes], environments: dict[str, SandboxEnvironment]
 ) -> None:

inspect_ai/util/_sandbox/docker/cleanup.py CHANGED Viewed

@@ -56,17 +56,22 @@ async def project_cleanup_shutdown(cleanup: bool) -> None:
                     title_style="bold",
                     title_justify="left",
                 )
+                table.add_column("Sample ID")
+                table.add_column("Epoch")
                 table.add_column("Container(s)", no_wrap=True)
-                table.add_column("Cleanup")
                 for project in shutdown_projects:
                     containers = await compose_ps(project, all=True)
                     table.add_row(
+                        str(project.sample_id) if project.sample_id is not None else "",
+                        str(project.epoch if project.epoch is not None else ""),
                         "\n".join(container["Name"] for container in containers),
-                        f"[blue]inspect sandbox cleanup docker {project.name}[/blue]",
                     )
                 print(table)
                 print(
-                    "\nCleanup all environments with: [blue]inspect sandbox cleanup docker[/blue]\n"
+                    "\n"
+                    "Cleanup all containers  : [blue]inspect sandbox cleanup docker[/blue]\n"
+                    "Cleanup single container: [blue]inspect sandbox cleanup docker <container-id>[/blue]",
+                    "\n",
                 )
         # remove auto-compose files

inspect_ai/util/_sandbox/docker/compose.py CHANGED Viewed

@@ -28,7 +28,7 @@ COMPOSE_WAIT = 120
 async def compose_up(
     project: ComposeProject, services: dict[str, ComposeService]
-) -> None:
+) -> ExecResult[str]:
     # compute the maximum amount of time we will
     up_command = ["up", "--detach", "--wait"]
@@ -49,7 +49,8 @@ async def compose_up(
     # passing the --wait flag (see https://github.com/docker/compose/issues/10596).
     # In practice, we will catch any errors when calling compose_check_running()
     # immediately after we call compose_up().
-    await compose_command(up_command, project=project, timeout=timeout)
+    result = await compose_command(up_command, project=project, timeout=timeout)
+    return result
 async def compose_down(project: ComposeProject, quiet: bool = True) -> None:
@@ -121,14 +122,9 @@ async def compose_check_running(
             unhealthy_services = services
             for successful_service in successful_services:
                 unhealthy_services.remove(successful_service["Service"])
-            msg = (
-                "One or more docker containers failed to start from "
-                f"{project.config}: {','.join(unhealthy_services)}"
-            )
-            raise RuntimeError(msg)
+            return []
     else:
-        raise RuntimeError("No services started")
+        return []
     return [service["Service"] for service in running_services]

inspect_ai/util/_sandbox/docker/docker.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 import tempfile
 from logging import getLogger
 from pathlib import Path, PurePosixPath
-from typing import Literal, Union, cast, overload
+from typing import Literal, Union, overload
 from typing_extensions import override
@@ -139,8 +139,15 @@ class DockerSandboxEnvironment(SandboxEnvironment):
                     env[key] = str(value)
         # create project
+        from inspect_ai.log._samples import sample_active
+        sample = sample_active()
         project = await ComposeProject.create(
-            name=task_project_name(task_name), config=config, env=env
+            name=task_project_name(task_name),
+            config=config,
+            sample_id=sample.sample.id if sample is not None else None,
+            epoch=sample.epoch if sample is not None else None,
+            env=env,
         )
         try:
@@ -148,13 +155,18 @@ class DockerSandboxEnvironment(SandboxEnvironment):
             services = await compose_services(project)
             # start the services
-            await compose_up(project, services)
+            result = await compose_up(project, services)
             # check to ensure that the services are running
             running_services = await compose_check_running(
                 list(services.keys()), project=project
             )
+            if not running_services:
+                raise RuntimeError(
+                    f"No services started.\nCompose up stderr: {result.stderr}"
+                )
             # note that the project is running
             project_startup(project)
@@ -209,9 +221,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
         # (this enables us to show output for the cleanup operation)
         if not interrupted:
             # extract project from first environment
-            project = cast(
-                DockerSandboxEnvironment, next(iter(environments.values()))
-            )._project
+            project = (
+                next(iter(environments.values()))
+                .as_type(DockerSandboxEnvironment)
+                ._project
+            )
             # cleanup the project
             await project_cleanup(project=project, quiet=True)

inspect_ai/util/_sandbox/docker/util.py CHANGED Viewed

@@ -21,6 +21,8 @@ logger = getLogger(__name__)
 class ComposeProject:
     name: str
     config: str | None
+    sample_id: int | str | None
+    epoch: int | None
     env: dict[str, str] | None
     @classmethod
@@ -28,6 +30,9 @@ class ComposeProject:
         cls,
         name: str,
         config: SandboxEnvironmentConfigType | None,
+        *,
+        sample_id: int | str | None = None,
+        epoch: int | None = None,
         env: dict[str, str] = {},
     ) -> "ComposeProject":
         # resolve config to full path if we have one
@@ -58,16 +63,20 @@ class ComposeProject:
         ensure_auto_compose_file(config)
         # return project
-        return ComposeProject(name, config, env)
+        return ComposeProject(name, config, sample_id=sample_id, epoch=epoch, env=env)
     def __init__(
         self,
         name: str,
         config: str | None,
+        sample_id: int | str | None,
+        epoch: int | None,
         env: dict[str, str],
     ) -> None:
         self.name = name
         self.config = config
+        self.sample_id = sample_id
+        self.epoch = epoch
         self.env = env
     def __eq__(self, other: object) -> bool:

inspect_ai/util/_sandbox/environment.py CHANGED Viewed

@@ -2,12 +2,24 @@ from __future__ import annotations
 import abc
 from dataclasses import dataclass, field
-from typing import Any, Awaitable, Callable, Literal, NamedTuple, Union, overload
+from typing import (
+    Any,
+    Awaitable,
+    Callable,
+    Literal,
+    NamedTuple,
+    Type,
+    TypeVar,
+    Union,
+    overload,
+)
 from pydantic import BaseModel, Field
 from .._subprocess import ExecResult
+ST = TypeVar("ST", bound="SandboxEnvironment")
 TaskInit = Callable[[str, Union["SandboxEnvironmentConfigType", None]], Awaitable[None]]
 TaskCleanup = Callable[
     [str, Union["SandboxEnvironmentConfigType", None], bool], Awaitable[None]
@@ -180,6 +192,25 @@ class SandboxEnvironment(abc.ABC):
         """
         raise NotImplementedError("connection not implemented")
+    def as_type(self, sandbox_cls: Type[ST]) -> ST:
+        """Verify and return a reference to a subclass of SandboxEnvironment.
+        Args:
+           sandbox_cls: Class of sandbox (subclass of SandboxEnvironment)
+        Returns:
+           Reference to the sandbox using the requested type.
+        Raises:
+           TypeError: If the sandbox is not of the requested type.
+        """
+        if isinstance(self, sandbox_cls):
+            return self
+        else:
+            raise TypeError(
+                f"Expected instance of {sandbox_cls.__name__}, got {type(self).__name__}"
+            )
     @classmethod
     def config_files(cls) -> list[str]:
         """Standard config files for this provider (used for automatic discovery)"""

inspect_ai/util/_sandbox/events.py ADDED Viewed

@@ -0,0 +1,149 @@
+import shlex
+from typing import Literal, Type, Union, overload
+from pydantic import JsonValue
+from pydantic_core import to_jsonable_python
+from typing_extensions import override
+from inspect_ai._util.text import truncate_lines
+from inspect_ai.util._subprocess import ExecResult
+from .environment import (
+    ST,
+    SandboxConnection,
+    SandboxEnvironment,
+    SandboxEnvironmentConfigType,
+)
+class SandboxEnvironmentProxy(SandboxEnvironment):
+    def __init__(self, sandbox: SandboxEnvironment) -> None:
+        self._sandbox = sandbox
+    @override
+    async def exec(
+        self,
+        cmd: list[str],
+        input: str | bytes | None = None,
+        cwd: str | None = None,
+        env: dict[str, str] = {},
+        user: str | None = None,
+        timeout: int | None = None,
+        timeout_retry: bool = True,
+    ) -> ExecResult[str]:
+        from inspect_ai.log._transcript import SandboxEvent, transcript
+        # make call
+        result = await self._sandbox.exec(
+            cmd, input, cwd, env, user, timeout, timeout_retry
+        )
+        # yield event
+        options: dict[str, JsonValue] = {}
+        if cwd:
+            options["cwd"] = cwd
+        if env:
+            options["env"] = to_jsonable_python(env)
+        if user:
+            options["user"] = user
+        if timeout is not None:
+            options["timeout"] = timeout
+        if timeout_retry is not True:
+            options["timeout_retry"] = timeout_retry
+        transcript()._event(
+            SandboxEvent(
+                action="exec",
+                cmd=" ".join([shlex.quote(c) for c in cmd]),
+                input=content_display(input) if input is not None else None,
+                options=options,
+                result=result.returncode,
+                output=content_display(
+                    f"{result.stderr}\n\n{result.stdout}"
+                    if result.stderr
+                    else result.stdout
+                ),
+            )
+        )
+        # return result
+        return result
+    @override
+    async def write_file(self, file: str, contents: str | bytes) -> None:
+        from inspect_ai.log._transcript import SandboxEvent, transcript
+        # make call
+        await self._sandbox.write_file(file, contents)
+        # yield event
+        transcript()._event(
+            SandboxEvent(
+                action="write_file", file=file, input=content_display(contents)
+            )
+        )
+    @overload
+    async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
+    @overload
+    async def read_file(self, file: str, text: Literal[False]) -> bytes: ...
+    @override
+    async def read_file(self, file: str, text: bool = True) -> Union[str | bytes]:
+        from inspect_ai.log._transcript import SandboxEvent, transcript
+        # make call
+        if text is True:
+            output: str | bytes = await self._sandbox.read_file(file, True)
+        else:
+            output = await self._sandbox.read_file(file, False)
+        # yield event
+        transcript()._event(
+            SandboxEvent(action="read_file", file=file, output=content_display(output))
+        )
+        # return result
+        return output
+    @override
+    async def connection(self) -> SandboxConnection:
+        return await self._sandbox.connection()
+    @override
+    def as_type(self, sandbox_cls: Type[ST]) -> ST:
+        if isinstance(self._sandbox, sandbox_cls):
+            return self._sandbox
+        else:
+            raise TypeError(
+                f"Expected instance of {sandbox_cls.__name__}, got {type(self._sandbox).__name__}"
+            )
+    @classmethod
+    async def sample_cleanup(
+        cls,
+        task_name: str,
+        config: SandboxEnvironmentConfigType | None,
+        environments: dict[str, SandboxEnvironment],
+        interrupted: bool,
+    ) -> None:
+        pass
+def content_display(content: str | bytes) -> str:
+    if isinstance(content, str):
+        content, truncated = truncate_lines(content, 20)
+        if truncated:
+            content = f"{content}\n\nOutput truncated ({truncated} additional lines)"
+        return content
+    else:
+        return f"binary ({pretty_size(len(content))})"
+def pretty_size(size: int) -> str:
+    if size < 1024:
+        return f"{size} B"
+    if size < 1024 * 1024:
+        return f"{size / 1024:.2f} KB"
+    return f"{size / (1024 * 1024):.2f} MB"

inspect_ai/util/_sandbox/local.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import tempfile
 import warnings
 from pathlib import Path
-from typing import Literal, Union, cast, overload
+from typing import Literal, Union, overload
 from typing_extensions import override
@@ -40,8 +40,8 @@ class LocalSandboxEnvironment(SandboxEnvironment):
         interrupted: bool,
     ) -> None:
         for environment in environments.values():
-            env = cast(LocalSandboxEnvironment, environment)
-            env.directory.cleanup()
+            sandbox = environment.as_type(LocalSandboxEnvironment)
+            sandbox.directory.cleanup()
     def __init__(self) -> None:
         self.directory = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)

inspect_ai/util/_sandbox/self_check.py CHANGED Viewed

@@ -445,7 +445,8 @@ async def test_exec_stdout_is_limited(sandbox_env: SandboxEnvironment) -> None:
     assert "limit of 10 MiB was exceeded" in str(e_info.value)
     truncated_output = e_info.value.truncated_output
     # `yes` outputs 'y\n' (ASCII) so the size equals the string length.
-    assert truncated_output and len(truncated_output) == 10 * 1024**2
+    # some shells additionally output 'canceled\n' so we add fudge factor for that
+    assert truncated_output and (len(truncated_output) - 10 * 1024**2) < 10
 async def test_exec_stderr_is_limited(sandbox_env: SandboxEnvironment) -> None:

inspect_ai/util/_subprocess.py CHANGED Viewed

@@ -199,7 +199,10 @@ async def subprocess(
                 else:
                     result = await asyncio.wait_for(anext(rc), timeout=timeout)
                     return cast(Union[ExecResult[str], ExecResult[bytes]], result)
-            except asyncio.exceptions.TimeoutError:
+            # wait_for raises asyncio.TimeoutError under Python 3.10, but TimeoutError
+            # under Python > 3.11! asynio.timeout (introduced in Python 3.11) always
+            # raises the standard TimeoutError
+            except (TimeoutError, asyncio.exceptions.TimeoutError):
                 # terminate timed out process -- try for graceful termination
                 # then be more forceful if requied
                 try:

{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: inspect_ai
-Version: 0.3.68
+Version: 0.3.70
 Summary: Framework for large language model evaluations
 Author: UK AI Security Institute
 License: MIT License
@@ -26,7 +26,7 @@ Requires-Dist: beautifulsoup4
 Requires-Dist: click>=8.1.3
 Requires-Dist: debugpy
 Requires-Dist: docstring-parser>=0.16
-Requires-Dist: fsspec>=2021.09.0
+Requires-Dist: fsspec<=2024.12.0,>=2023.1.0
 Requires-Dist: httpx
 Requires-Dist: ijson>=3.2.0
 Requires-Dist: jsonlines>=3.0.0
@@ -45,7 +45,7 @@ Requires-Dist: s3fs>=2023
 Requires-Dist: semver>=3.0.0
 Requires-Dist: shortuuid
 Requires-Dist: tenacity
-Requires-Dist: textual<=1.0.0,>=0.86.2
+Requires-Dist: textual>=0.86.2
 Requires-Dist: typing_extensions>=4.9.0
 Requires-Dist: zipp>=3.19.1
 Provides-Extra: dev
@@ -53,7 +53,7 @@ Requires-Dist: anthropic; extra == "dev"
 Requires-Dist: aioboto3; extra == "dev"
 Requires-Dist: azure-ai-inference; extra == "dev"
 Requires-Dist: google-cloud-aiplatform; extra == "dev"
-Requires-Dist: google-generativeai; extra == "dev"
+Requires-Dist: google-genai; extra == "dev"
 Requires-Dist: goodfire; extra == "dev"
 Requires-Dist: griffe; extra == "dev"
 Requires-Dist: groq; extra == "dev"
@@ -71,7 +71,7 @@ Requires-Dist: pytest-asyncio; extra == "dev"
 Requires-Dist: pytest-cov; extra == "dev"
 Requires-Dist: pytest-dotenv; extra == "dev"
 Requires-Dist: pytest-xdist; extra == "dev"
-Requires-Dist: ruff==0.9.5; extra == "dev"
+Requires-Dist: ruff==0.9.6; extra == "dev"
 Requires-Dist: textual-dev>=0.86.2; extra == "dev"
 Requires-Dist: types-Markdown; extra == "dev"
 Requires-Dist: types-PyYAML; extra == "dev"

inspect-ai 0.3.68__py3-none-any.whl → 0.3.70__py3-none-any.whl

inspect-ai 0.3.68py3-none-any.whl → 0.3.70py3-none-any.whl