PyPI - inspect-ai - Versions diffs - 0.3.69__py3-none-any.whl → 0.3.70__py3-none-any.whl - Mend

inspect-ai 0.3.69py3-none-any.whl → 0.3.70py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

inspect_ai/_cli/eval.py +13 -1
inspect_ai/_display/textual/app.py +3 -2
inspect_ai/_display/textual/widgets/samples.py +4 -10
inspect_ai/_display/textual/widgets/transcript.py +25 -12
inspect_ai/_eval/eval.py +14 -2
inspect_ai/_eval/evalset.py +6 -1
inspect_ai/_eval/run.py +6 -0
inspect_ai/_eval/task/run.py +44 -15
inspect_ai/_eval/task/task.py +26 -3
inspect_ai/_util/interrupt.py +6 -0
inspect_ai/_util/logger.py +19 -0
inspect_ai/_util/rich.py +7 -8
inspect_ai/_util/text.py +13 -0
inspect_ai/_util/transcript.py +10 -2
inspect_ai/_util/working.py +46 -0
inspect_ai/_view/www/dist/assets/index.css +56 -12
inspect_ai/_view/www/dist/assets/index.js +904 -750
inspect_ai/_view/www/log-schema.json +337 -2
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
inspect_ai/_view/www/node_modules/flatted/python/test.py +63 -0
inspect_ai/_view/www/src/appearance/icons.ts +3 -1
inspect_ai/_view/www/src/metadata/RenderedContent.tsx +0 -1
inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +28 -1
inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +23 -2
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -0
inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +152 -0
inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +9 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +19 -1
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
inspect_ai/_view/www/src/types/log.d.ts +188 -108
inspect_ai/_view/www/src/utils/format.ts +7 -4
inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -6
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_condense.py +1 -0
inspect_ai/log/_log.py +72 -12
inspect_ai/log/_samples.py +5 -1
inspect_ai/log/_transcript.py +31 -1
inspect_ai/model/_call_tools.py +1 -1
inspect_ai/model/_conversation.py +1 -1
inspect_ai/model/_model.py +32 -16
inspect_ai/model/_model_call.py +10 -3
inspect_ai/model/_providers/anthropic.py +13 -2
inspect_ai/model/_providers/bedrock.py +7 -0
inspect_ai/model/_providers/cloudflare.py +20 -7
inspect_ai/model/_providers/google.py +2 -0
inspect_ai/model/_providers/groq.py +57 -23
inspect_ai/model/_providers/hf.py +6 -0
inspect_ai/model/_providers/mistral.py +78 -51
inspect_ai/model/_providers/openai.py +9 -0
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/util/tracker.py +92 -0
inspect_ai/model/_providers/vllm.py +13 -5
inspect_ai/solver/_basic_agent.py +1 -3
inspect_ai/solver/_bridge/patch.py +0 -2
inspect_ai/solver/_limit.py +4 -4
inspect_ai/solver/_plan.py +0 -3
inspect_ai/solver/_task_state.py +7 -0
inspect_ai/tool/_tools/_web_search.py +3 -3
inspect_ai/util/_concurrency.py +14 -8
inspect_ai/util/_sandbox/context.py +15 -0
inspect_ai/util/_sandbox/docker/docker.py +7 -5
inspect_ai/util/_sandbox/environment.py +32 -1
inspect_ai/util/_sandbox/events.py +149 -0
inspect_ai/util/_sandbox/local.py +3 -3
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/METADATA +3 -3
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/RECORD +74 -67
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/top_level.txt +0 -0

inspect_ai/tool/_tools/_web_search.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 import os
-from typing import Literal, Protocol, cast, runtime_checkable
+from typing import Literal, Protocol, runtime_checkable
 import httpx
 from bs4 import BeautifulSoup, NavigableString
@@ -90,8 +90,8 @@ def web_search(
                 return_exceptions=True,
             )
             for page, link in zip(pages, links):
-                if page and not isinstance(page, Exception):
-                    page_contents.append(cast(str, page))
+                if page and not isinstance(page, BaseException):
+                    page_contents.append(page)
                     urls.append(link.url)
                     snippets.append(link.snippet)
             search_calls += 1

inspect_ai/util/_concurrency.py CHANGED Viewed

@@ -1,13 +1,19 @@
 import asyncio
+import contextlib
+import time
 from dataclasses import dataclass
+from typing import AsyncIterator
+from inspect_ai._util.working import report_sample_waiting_time
-def concurrency(
+@contextlib.asynccontextmanager
+async def concurrency(
     name: str,
     concurrency: int,
     key: str | None = None,
-) -> asyncio.Semaphore:
-    """Obtain a concurrency context.
+) -> AsyncIterator[None]:
+    """Concurrency context manager.
     A concurrency context can be used to limit the number of coroutines
     executing a block of code (e.g calling an API). For example, here
@@ -32,9 +38,6 @@ def concurrency(
          Used if the unique key isn't human readable -- e.g. includes
          api tokens or account ids so that the more readable `name`
          can be presented to users e.g in console UI>
-    Returns:
-       Asyncio Semaphore for concurrency context.
     """
     # sort out key
     key = key if key else name
@@ -47,8 +50,11 @@ def concurrency(
         )
         _concurrency_semaphores[key] = semaphore
-    # return the semaphore
-    return semaphore.semaphore
+    # wait and yield to protected code
+    start_wait = time.monotonic()
+    async with semaphore.semaphore:
+        report_sample_waiting_time(time.monotonic() - start_wait)
+        yield
 def concurrency_status() -> dict[str, tuple[int, int]]:

inspect_ai/util/_sandbox/context.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, NoReturn, cast
 from shortuuid import uuid
 from inspect_ai._util.constants import SANDBOX_SETUP_TIMEOUT
+from inspect_ai.util._sandbox.events import SandboxEnvironmentProxy
 from .environment import (
     SampleCleanup,
@@ -132,6 +133,9 @@ async def init_sandbox_environments_sample(
     # verify that there is at least one environment and a 'default' env
     validate_sandbox_environments(sandboxenv_type, environments)
+    # proxy environments (for recording SandboxEvent)
+    environments = {k: SandboxEnvironmentProxy(v) for k, v in environments.items()}
     try:
         # copy files into environments
         await copy_sandbox_environment_files(files, environments)
@@ -148,6 +152,7 @@ async def init_sandbox_environments_sample(
         return environments
     except Exception as ex:
+        environments = unproxy_environments(environments)
         await sample_cleanup(task_name, config, environments, True)
         raise ex
@@ -161,9 +166,19 @@ async def cleanup_sandbox_environments_sample(
 ) -> None:
     sandboxenv_type = registry_find_sandboxenv(type)
     sample_cleanup = cast(SampleCleanup, getattr(sandboxenv_type, "sample_cleanup"))
+    environments = unproxy_environments(environments)
     await sample_cleanup(task_name, config, environments, interrupted)
+def unproxy_environments(
+    environments: dict[str, SandboxEnvironment],
+) -> dict[str, SandboxEnvironment]:
+    return {
+        k: v._sandbox
+        for k, v in cast(dict[str, SandboxEnvironmentProxy], environments).items()
+    }
 async def copy_sandbox_environment_files(
     files: dict[str, bytes], environments: dict[str, SandboxEnvironment]
 ) -> None:

inspect_ai/util/_sandbox/docker/docker.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 import tempfile
 from logging import getLogger
 from pathlib import Path, PurePosixPath
-from typing import Literal, Union, cast, overload
+from typing import Literal, Union, overload
 from typing_extensions import override
@@ -145,7 +145,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
         project = await ComposeProject.create(
             name=task_project_name(task_name),
             config=config,
-            sample_id=sample.id if sample is not None else None,
+            sample_id=sample.sample.id if sample is not None else None,
             epoch=sample.epoch if sample is not None else None,
             env=env,
         )
@@ -221,9 +221,11 @@ class DockerSandboxEnvironment(SandboxEnvironment):
         # (this enables us to show output for the cleanup operation)
         if not interrupted:
             # extract project from first environment
-            project = cast(
-                DockerSandboxEnvironment, next(iter(environments.values()))
-            )._project
+            project = (
+                next(iter(environments.values()))
+                .as_type(DockerSandboxEnvironment)
+                ._project
+            )
             # cleanup the project
             await project_cleanup(project=project, quiet=True)

inspect_ai/util/_sandbox/environment.py CHANGED Viewed

@@ -2,12 +2,24 @@ from __future__ import annotations
 import abc
 from dataclasses import dataclass, field
-from typing import Any, Awaitable, Callable, Literal, NamedTuple, Union, overload
+from typing import (
+    Any,
+    Awaitable,
+    Callable,
+    Literal,
+    NamedTuple,
+    Type,
+    TypeVar,
+    Union,
+    overload,
+)
 from pydantic import BaseModel, Field
 from .._subprocess import ExecResult
+ST = TypeVar("ST", bound="SandboxEnvironment")
 TaskInit = Callable[[str, Union["SandboxEnvironmentConfigType", None]], Awaitable[None]]
 TaskCleanup = Callable[
     [str, Union["SandboxEnvironmentConfigType", None], bool], Awaitable[None]
@@ -180,6 +192,25 @@ class SandboxEnvironment(abc.ABC):
         """
         raise NotImplementedError("connection not implemented")
+    def as_type(self, sandbox_cls: Type[ST]) -> ST:
+        """Verify and return a reference to a subclass of SandboxEnvironment.
+        Args:
+           sandbox_cls: Class of sandbox (subclass of SandboxEnvironment)
+        Returns:
+           Reference to the sandbox using the requested type.
+        Raises:
+           TypeError: If the sandbox is not of the requested type.
+        """
+        if isinstance(self, sandbox_cls):
+            return self
+        else:
+            raise TypeError(
+                f"Expected instance of {sandbox_cls.__name__}, got {type(self).__name__}"
+            )
     @classmethod
     def config_files(cls) -> list[str]:
         """Standard config files for this provider (used for automatic discovery)"""

inspect_ai/util/_sandbox/events.py ADDED Viewed

@@ -0,0 +1,149 @@
+import shlex
+from typing import Literal, Type, Union, overload
+from pydantic import JsonValue
+from pydantic_core import to_jsonable_python
+from typing_extensions import override
+from inspect_ai._util.text import truncate_lines
+from inspect_ai.util._subprocess import ExecResult
+from .environment import (
+    ST,
+    SandboxConnection,
+    SandboxEnvironment,
+    SandboxEnvironmentConfigType,
+)
+class SandboxEnvironmentProxy(SandboxEnvironment):
+    def __init__(self, sandbox: SandboxEnvironment) -> None:
+        self._sandbox = sandbox
+    @override
+    async def exec(
+        self,
+        cmd: list[str],
+        input: str | bytes | None = None,
+        cwd: str | None = None,
+        env: dict[str, str] = {},
+        user: str | None = None,
+        timeout: int | None = None,
+        timeout_retry: bool = True,
+    ) -> ExecResult[str]:
+        from inspect_ai.log._transcript import SandboxEvent, transcript
+        # make call
+        result = await self._sandbox.exec(
+            cmd, input, cwd, env, user, timeout, timeout_retry
+        )
+        # yield event
+        options: dict[str, JsonValue] = {}
+        if cwd:
+            options["cwd"] = cwd
+        if env:
+            options["env"] = to_jsonable_python(env)
+        if user:
+            options["user"] = user
+        if timeout is not None:
+            options["timeout"] = timeout
+        if timeout_retry is not True:
+            options["timeout_retry"] = timeout_retry
+        transcript()._event(
+            SandboxEvent(
+                action="exec",
+                cmd=" ".join([shlex.quote(c) for c in cmd]),
+                input=content_display(input) if input is not None else None,
+                options=options,
+                result=result.returncode,
+                output=content_display(
+                    f"{result.stderr}\n\n{result.stdout}"
+                    if result.stderr
+                    else result.stdout
+                ),
+            )
+        )
+        # return result
+        return result
+    @override
+    async def write_file(self, file: str, contents: str | bytes) -> None:
+        from inspect_ai.log._transcript import SandboxEvent, transcript
+        # make call
+        await self._sandbox.write_file(file, contents)
+        # yield event
+        transcript()._event(
+            SandboxEvent(
+                action="write_file", file=file, input=content_display(contents)
+            )
+        )
+    @overload
+    async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
+    @overload
+    async def read_file(self, file: str, text: Literal[False]) -> bytes: ...
+    @override
+    async def read_file(self, file: str, text: bool = True) -> Union[str | bytes]:
+        from inspect_ai.log._transcript import SandboxEvent, transcript
+        # make call
+        if text is True:
+            output: str | bytes = await self._sandbox.read_file(file, True)
+        else:
+            output = await self._sandbox.read_file(file, False)
+        # yield event
+        transcript()._event(
+            SandboxEvent(action="read_file", file=file, output=content_display(output))
+        )
+        # return result
+        return output
+    @override
+    async def connection(self) -> SandboxConnection:
+        return await self._sandbox.connection()
+    @override
+    def as_type(self, sandbox_cls: Type[ST]) -> ST:
+        if isinstance(self._sandbox, sandbox_cls):
+            return self._sandbox
+        else:
+            raise TypeError(
+                f"Expected instance of {sandbox_cls.__name__}, got {type(self._sandbox).__name__}"
+            )
+    @classmethod
+    async def sample_cleanup(
+        cls,
+        task_name: str,
+        config: SandboxEnvironmentConfigType | None,
+        environments: dict[str, SandboxEnvironment],
+        interrupted: bool,
+    ) -> None:
+        pass
+def content_display(content: str | bytes) -> str:
+    if isinstance(content, str):
+        content, truncated = truncate_lines(content, 20)
+        if truncated:
+            content = f"{content}\n\nOutput truncated ({truncated} additional lines)"
+        return content
+    else:
+        return f"binary ({pretty_size(len(content))})"
+def pretty_size(size: int) -> str:
+    if size < 1024:
+        return f"{size} B"
+    if size < 1024 * 1024:
+        return f"{size / 1024:.2f} KB"
+    return f"{size / (1024 * 1024):.2f} MB"

inspect_ai/util/_sandbox/local.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import tempfile
 import warnings
 from pathlib import Path
-from typing import Literal, Union, cast, overload
+from typing import Literal, Union, overload
 from typing_extensions import override
@@ -40,8 +40,8 @@ class LocalSandboxEnvironment(SandboxEnvironment):
         interrupted: bool,
     ) -> None:
         for environment in environments.values():
-            env = cast(LocalSandboxEnvironment, environment)
-            env.directory.cleanup()
+            sandbox = environment.as_type(LocalSandboxEnvironment)
+            sandbox.directory.cleanup()
     def __init__(self) -> None:
         self.directory = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)

{inspect_ai-0.3.69.dist-info → inspect_ai-0.3.70.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: inspect_ai
-Version: 0.3.69
+Version: 0.3.70
 Summary: Framework for large language model evaluations
 Author: UK AI Security Institute
 License: MIT License
@@ -26,7 +26,7 @@ Requires-Dist: beautifulsoup4
 Requires-Dist: click>=8.1.3
 Requires-Dist: debugpy
 Requires-Dist: docstring-parser>=0.16
-Requires-Dist: fsspec>=2021.09.0
+Requires-Dist: fsspec<=2024.12.0,>=2023.1.0
 Requires-Dist: httpx
 Requires-Dist: ijson>=3.2.0
 Requires-Dist: jsonlines>=3.0.0
@@ -45,7 +45,7 @@ Requires-Dist: s3fs>=2023
 Requires-Dist: semver>=3.0.0
 Requires-Dist: shortuuid
 Requires-Dist: tenacity
-Requires-Dist: textual<=1.0.0,>=0.86.2
+Requires-Dist: textual>=0.86.2
 Requires-Dist: typing_extensions>=4.9.0
 Requires-Dist: zipp>=3.19.1
 Provides-Extra: dev

inspect-ai 0.3.69__py3-none-any.whl → 0.3.70__py3-none-any.whl

inspect-ai 0.3.69py3-none-any.whl → 0.3.70py3-none-any.whl