PyPI - inspect-ai - Versions diffs - 0.3.72__py3-none-any.whl → 0.3.73__py3-none-any.whl - Mend

inspect-ai 0.3.72py3-none-any.whl → 0.3.73py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

inspect_ai/_cli/eval.py +14 -3
inspect_ai/_cli/sandbox.py +3 -3
inspect_ai/_cli/score.py +6 -4
inspect_ai/_cli/trace.py +53 -6
inspect_ai/_display/core/config.py +1 -1
inspect_ai/_display/core/display.py +2 -1
inspect_ai/_display/core/footer.py +6 -6
inspect_ai/_display/plain/display.py +11 -6
inspect_ai/_display/rich/display.py +23 -13
inspect_ai/_display/textual/app.py +10 -9
inspect_ai/_display/textual/display.py +2 -2
inspect_ai/_display/textual/widgets/footer.py +4 -0
inspect_ai/_display/textual/widgets/samples.py +14 -5
inspect_ai/_eval/context.py +1 -2
inspect_ai/_eval/eval.py +54 -41
inspect_ai/_eval/loader.py +9 -2
inspect_ai/_eval/run.py +148 -81
inspect_ai/_eval/score.py +13 -8
inspect_ai/_eval/task/images.py +31 -21
inspect_ai/_eval/task/run.py +62 -59
inspect_ai/_eval/task/rundir.py +16 -9
inspect_ai/_eval/task/sandbox.py +7 -8
inspect_ai/_eval/task/util.py +7 -0
inspect_ai/_util/_async.py +118 -10
inspect_ai/_util/constants.py +0 -2
inspect_ai/_util/file.py +15 -29
inspect_ai/_util/future.py +37 -0
inspect_ai/_util/http.py +3 -99
inspect_ai/_util/httpx.py +60 -0
inspect_ai/_util/interrupt.py +2 -2
inspect_ai/_util/json.py +5 -52
inspect_ai/_util/logger.py +30 -86
inspect_ai/_util/retry.py +10 -61
inspect_ai/_util/trace.py +2 -2
inspect_ai/_view/server.py +86 -3
inspect_ai/_view/www/dist/assets/index.js +25837 -13269
inspect_ai/_view/www/log-schema.json +253 -186
inspect_ai/_view/www/package.json +2 -2
inspect_ai/_view/www/src/plan/PlanDetailView.tsx +8 -3
inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +2 -3
inspect_ai/_view/www/src/types/log.d.ts +122 -94
inspect_ai/approval/_human/manager.py +6 -10
inspect_ai/approval/_human/panel.py +2 -2
inspect_ai/dataset/_sources/util.py +7 -6
inspect_ai/log/__init__.py +4 -0
inspect_ai/log/_file.py +35 -61
inspect_ai/log/_log.py +18 -1
inspect_ai/log/_recorders/eval.py +14 -23
inspect_ai/log/_recorders/json.py +3 -18
inspect_ai/log/_samples.py +27 -2
inspect_ai/log/_transcript.py +8 -8
inspect_ai/model/__init__.py +2 -1
inspect_ai/model/_call_tools.py +60 -40
inspect_ai/model/_chat_message.py +3 -2
inspect_ai/model/_generate_config.py +25 -0
inspect_ai/model/_model.py +74 -36
inspect_ai/model/_openai.py +9 -1
inspect_ai/model/_providers/anthropic.py +24 -26
inspect_ai/model/_providers/azureai.py +11 -9
inspect_ai/model/_providers/bedrock.py +33 -24
inspect_ai/model/_providers/cloudflare.py +8 -9
inspect_ai/model/_providers/goodfire.py +7 -3
inspect_ai/model/_providers/google.py +47 -13
inspect_ai/model/_providers/groq.py +15 -15
inspect_ai/model/_providers/hf.py +24 -17
inspect_ai/model/_providers/mistral.py +36 -20
inspect_ai/model/_providers/openai.py +30 -25
inspect_ai/model/_providers/openai_o1.py +1 -1
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/together.py +3 -4
inspect_ai/model/_providers/util/__init__.py +2 -2
inspect_ai/model/_providers/util/chatapi.py +6 -19
inspect_ai/model/_providers/util/hooks.py +165 -0
inspect_ai/model/_providers/vertex.py +20 -3
inspect_ai/model/_providers/vllm.py +16 -19
inspect_ai/scorer/_multi.py +5 -2
inspect_ai/solver/_bridge/patch.py +31 -1
inspect_ai/solver/_fork.py +5 -3
inspect_ai/solver/_human_agent/agent.py +3 -2
inspect_ai/tool/__init__.py +8 -2
inspect_ai/tool/_tool_info.py +4 -90
inspect_ai/tool/_tool_params.py +4 -34
inspect_ai/tool/_tools/_web_search.py +30 -24
inspect_ai/util/__init__.py +4 -0
inspect_ai/util/_concurrency.py +5 -6
inspect_ai/util/_display.py +6 -0
inspect_ai/util/_json.py +170 -0
inspect_ai/util/_sandbox/docker/cleanup.py +13 -9
inspect_ai/util/_sandbox/docker/docker.py +5 -0
inspect_ai/util/_sandbox/environment.py +56 -9
inspect_ai/util/_sandbox/service.py +12 -5
inspect_ai/util/_subprocess.py +94 -113
inspect_ai/util/_subtask.py +2 -4
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/METADATA +6 -2
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/RECORD +99 -99
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/WHEEL +1 -1
inspect_ai/_util/timeouts.py +0 -160
inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
inspect_ai/model/_providers/util/tracker.py +0 -92
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.72.dist-info → inspect_ai-0.3.73.dist-info}/top_level.txt +0 -0

inspect_ai/log/_file.py CHANGED Viewed

@@ -1,16 +1,15 @@
 import os
 import re
 from logging import getLogger
-from typing import Any, Callable, Generator, Literal, cast
+from typing import Any, Callable, Generator, Literal
 from pydantic import BaseModel
 from pydantic_core import to_json
-from inspect_ai._util._async import run_coroutine
+from inspect_ai._util._async import current_async_backend, run_coroutine
 from inspect_ai._util.constants import ALL_LOG_FORMATS, EVAL_LOG_FORMAT
 from inspect_ai._util.file import (
     FileInfo,
-    async_fileystem,
     file,
     filesystem,
 )
@@ -96,62 +95,6 @@ def list_eval_logs(
         return eval_logs
-async def list_eval_logs_async(
-    log_dir: str = os.environ.get("INSPECT_LOG_DIR", "./logs"),
-    formats: list[Literal["eval", "json"]] | None = None,
-    recursive: bool = True,
-    descending: bool = True,
-    fs_options: dict[str, Any] = {},
-) -> list[EvalLogInfo]:
-    """List all eval logs in a directory.
-    Will be async for filesystem providers that support async (e.g. s3, gcs, etc.)
-    otherwise will fallback to sync implementation.
-    Args:
-      log_dir (str): Log directory (defaults to INSPECT_LOG_DIR)
-      formats (Literal["eval", "json"]): Formats to list (default
-        to listing all formats)
-      recursive (bool): List log files recursively (defaults to True).
-      descending (bool): List in descending order.
-      fs_options (dict[str, Any]): Optional. Additional arguments to pass through
-          to the filesystem provider (e.g. `S3FileSystem`).
-    Returns:
-       List of EvalLog Info.
-    """
-    # async filesystem if we can
-    fs = filesystem(log_dir, fs_options)
-    if fs.is_async():
-        async with async_fileystem(log_dir, fs_options=fs_options) as async_fs:
-            if await async_fs._exists(log_dir):
-                # prevent caching of listings
-                async_fs.invalidate_cache(log_dir)
-                # list logs
-                if recursive:
-                    files: list[dict[str, Any]] = []
-                    async for _, _, filenames in async_fs._walk(log_dir, detail=True):
-                        files.extend(filenames.values())
-                else:
-                    files = cast(
-                        list[dict[str, Any]],
-                        await async_fs._ls(log_dir, detail=True),
-                    )
-                logs = [fs._file_info(file) for file in files]
-                # resolve to eval logs
-                return log_files_from_ls(logs, formats, descending)
-            else:
-                return []
-    else:
-        return list_eval_logs(
-            log_dir=log_dir,
-            formats=formats,
-            recursive=recursive,
-            descending=descending,
-            fs_options=fs_options,
-        )
 def write_eval_log(
     log: EvalLog,
     location: str | FileInfo | None = None,
@@ -165,6 +108,14 @@ def write_eval_log(
        format (Literal["eval", "json", "auto"]): Write to format
           (defaults to 'auto' based on `log_file` extension)
     """
+    # don't mix trio and asyncio
+    if current_async_backend() == "trio":
+        raise RuntimeError(
+            "write_eval_log cannot be called from a trio async context (please use write_eval_log_async instead)"
+        )
+    # will use s3fs and is not called from main inspect solver/scorer/tool/sandbox
+    # flow, so force the use of asyncio
     run_coroutine(write_eval_log_async(log, location, format))
@@ -265,8 +216,21 @@ def read_eval_log(
     Returns:
        EvalLog object read from file.
     """
+    # don't mix trio and asyncio
+    if current_async_backend() == "trio":
+        raise RuntimeError(
+            "read_eval_log cannot be called from a trio async context (please use read_eval_log_async instead)"
+        )
+    # will use s3fs and is not called from main inspect solver/scorer/tool/sandbox
+    # flow, so force the use of asyncio
     return run_coroutine(
-        read_eval_log_async(log_file, header_only, resolve_attachments, format)
+        read_eval_log_async(
+            log_file,
+            header_only,
+            resolve_attachments,
+            format,
+        )
     )
@@ -281,7 +245,7 @@ async def read_eval_log_async(
     Args:
        log_file (str | FileInfo): Log file to read.
        header_only (bool): Read only the header (i.e. exclude
-         the "samples" and "logging" fields). Defaults to False.
+          the "samples" and "logging" fields). Defaults to False.
        resolve_attachments (bool): Resolve attachments (e.g. images)
           to their full content.
        format (Literal["eval", "json", "auto"]): Read from format
@@ -321,6 +285,8 @@ async def read_eval_log_async(
 def read_eval_log_headers(
     log_files: list[str] | list[EvalLogInfo],
 ) -> list[EvalLog]:
+    # will use s3fs and is not called from main inspect solver/scorer/tool/sandbox
+    # flow, so force the use of asyncio
     return run_coroutine(read_eval_log_headers_async(log_files))
@@ -356,6 +322,14 @@ def read_eval_log_sample(
     Raises:
        IndexError: If the passed id and epoch are not found.
     """
+    # don't mix trio and asyncio
+    if current_async_backend() == "trio":
+        raise RuntimeError(
+            "read_eval_log_sample cannot be called from a trio async context (please use read_eval_log_sample_async instead)"
+        )
+    # will use s3fs and is not called from main inspect solver/scorer/tool/sandbox
+    # flow, so force the use of asyncio
     return run_coroutine(
         read_eval_log_sample_async(log_file, id, epoch, resolve_attachments, format)
     )

inspect_ai/log/_log.py CHANGED Viewed

@@ -295,7 +295,7 @@ class EvalSample(BaseModel):
             # warning will handle this)
             del values["transcript"]
-        return values
+        return migrate_sandbox_spec(values)
     # allow field model_usage
     model_config = ConfigDict(protected_namespaces=())
@@ -607,6 +607,23 @@ class EvalSpec(BaseModel):
     # allow field model_args
     model_config = ConfigDict(protected_namespaces=())
+    @model_validator(mode="before")
+    @classmethod
+    def read_sandbox_spec(
+        cls: Type["EvalSpec"], values: dict[str, Any]
+    ) -> dict[str, Any]:
+        return migrate_sandbox_spec(values)
+def migrate_sandbox_spec(values: dict[str, Any]) -> dict[str, Any]:
+    if "sandbox" in values:
+        sandbox = values.get("sandbox")
+        if isinstance(sandbox, list):
+            values["sandbox"] = SandboxEnvironmentSpec(
+                type=sandbox[0], config=sandbox[1]
+            )
+    return values
 def eval_error(
     exception: BaseException,

inspect_ai/log/_recorders/eval.py CHANGED Viewed

@@ -1,13 +1,11 @@
-import asyncio
 import json
 import os
 import tempfile
-from contextlib import _AsyncGeneratorContextManager
 from logging import getLogger
 from typing import Any, BinaryIO, Literal, cast
 from zipfile import ZIP_DEFLATED, ZipFile
-from fsspec.asyn import AsyncFileSystem  # type: ignore
+import anyio
 from pydantic import BaseModel, Field
 from pydantic_core import to_json
 from typing_extensions import override
@@ -21,7 +19,7 @@ from inspect_ai._util.content import (
     ContentVideo,
 )
 from inspect_ai._util.error import EvalError
-from inspect_ai._util.file import FileSystem, async_fileystem, dirname, file, filesystem
+from inspect_ai._util.file import FileSystem, dirname, file, filesystem
 from inspect_ai._util.json import jsonable_python
 from inspect_ai._util.trace import trace_action
 from inspect_ai.model._chat_message import ChatMessage
@@ -277,16 +275,14 @@ def text_inputs(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
 class ZipLogFile:
-    _zip: ZipFile
+    _zip: ZipFile | None
     _temp_file: BinaryIO
     _fs: FileSystem
-    _async_fs_context: _AsyncGeneratorContextManager[AsyncFileSystem] | None = None
-    _async_fs: AsyncFileSystem | None = None
     def __init__(self, file: str) -> None:
         self._file = file
         self._fs = filesystem(file)
-        self._lock = asyncio.Lock()
+        self._lock = anyio.Lock()
         self._temp_file = tempfile.TemporaryFile()
         self._samples: list[EvalSample] = []
         self._summary_counter = 0
@@ -300,11 +296,6 @@ class ZipLogFile:
         summaries: list[SampleSummary],
     ) -> None:
         async with self._lock:
-            # connect to async filesystem if we can
-            if self._fs.is_async():
-                self._async_fs_context = async_fileystem(self._file)
-                self._async_fs = await self._async_fs_context.__aenter__()
             self._open()
             self._summary_counter = summary_counter
             self._summaries = summaries
@@ -364,7 +355,8 @@ class ZipLogFile:
     async def flush(self) -> None:
         async with self._lock:
             # close the zip file so it is flushed
-            self._zip.close()
+            if self._zip:
+                self._zip.close()
             # read the temp_file (leaves pointer at end for subsequent appends)
             self._temp_file.seek(0)
@@ -380,21 +372,19 @@ class ZipLogFile:
     async def close(self) -> EvalLog:
         async with self._lock:
-            # close the async context if we have one
-            try:
-                if self._async_fs_context:
-                    await self._async_fs_context.__aexit__(None, None, None)
-            except Exception as ex:
-                logger.warning(
-                    f"Error occurred while closing async fs for {self._file}: {ex}"
-                )
             # read the log from the temp file then close it
             try:
                 self._temp_file.seek(0)
                 return _read_log(self._temp_file, self._file)
             finally:
                 self._temp_file.close()
+                if self._zip:
+                    self._zip.close()
+    # cleanup zip file if we didn't in normal course
+    def __del__(self) -> None:
+        if self._zip:
+            self._zip.close()
     def _open(self) -> None:
         self._zip = ZipFile(
@@ -406,6 +396,7 @@ class ZipLogFile:
     # raw unsynchronized version of write
     def _zip_writestr(self, filename: str, data: Any) -> None:
+        assert self._zip
         self._zip.writestr(
             filename,
             to_json(

inspect_ai/log/_recorders/json.py CHANGED Viewed

@@ -9,7 +9,7 @@ from typing_extensions import override
 from inspect_ai._util.constants import LOG_SCHEMA_VERSION
 from inspect_ai._util.error import EvalError
-from inspect_ai._util.file import absolute_file_path, async_fileystem, file, filesystem
+from inspect_ai._util.file import absolute_file_path, file
 from inspect_ai._util.trace import trace_action
 from .._log import (
@@ -178,23 +178,8 @@ class JSONRecorder(FileRecorder):
         log_bytes = eval_log_json(log)
         with trace_action(logger, "Log Write", location):
-            # try to write async for async filesystems
-            written = False
-            try:
-                fs = filesystem(location)
-                if fs.is_async():
-                    async with async_fileystem(location) as async_fs:
-                        await async_fs._pipe_file(location, log_bytes)
-                        written = True
-            except Exception as ex:
-                logger.warning(
-                    f"Error occurred during async write to {location}: {ex}. Falling back to sync write."
-                )
-            # otherwise use sync
-            if not written:
-                with file(location, "wb") as f:
-                    f.write(log_bytes)
+            with file(location, "wb") as f:
+                f.write(log_bytes)
 def _validate_version(ver: int) -> None:

inspect_ai/log/_samples.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import contextlib
 from contextvars import ContextVar
 from datetime import datetime
-from typing import AsyncGenerator, Literal
+from typing import AsyncGenerator, Iterator, Literal
 from shortuuid import uuid
+from inspect_ai._util.constants import SAMPLE_SUBTASK
 from inspect_ai.dataset._dataset import Sample
 from inspect_ai.util._sandbox import SandboxConnection
 from inspect_ai.util._sandbox.context import sandbox_connections
-from ._transcript import Transcript
+from ._transcript import Transcript, transcript
 class ActiveSample:
@@ -44,6 +45,7 @@ class ActiveSample:
         self.total_tokens = 0
         self.transcript = transcript
         self.sandboxes = sandboxes
+        self.retry_count = 0
         self._interrupt_action: Literal["score", "error"] | None = None
     @property
@@ -153,6 +155,29 @@ def set_active_sample_total_messages(total_messages: int) -> None:
         active.total_messages = total_messages
+@contextlib.contextmanager
+def track_active_sample_retries() -> Iterator[None]:
+    reset_active_sample_retries()
+    try:
+        yield
+    finally:
+        reset_active_sample_retries()
+def reset_active_sample_retries() -> None:
+    active = sample_active()
+    if active:
+        active.retry_count = 0
+def report_active_sample_retry() -> None:
+    active = sample_active()
+    if active:
+        # only do this for the top level subtask
+        if transcript().name == SAMPLE_SUBTASK:
+            active.retry_count = active.retry_count + 1
 _sample_active: ContextVar[ActiveSample | None] = ContextVar(
     "_sample_active", default=None
 )

inspect_ai/log/_transcript.py CHANGED Viewed

@@ -1,10 +1,10 @@
-import asyncio
 import contextlib
 from contextvars import ContextVar
 from datetime import datetime
 from logging import getLogger
 from typing import (
     Any,
+    Callable,
     Iterator,
     Literal,
     Sequence,
@@ -210,15 +210,15 @@ class ToolEvent(BaseEvent):
     # mechanism for operator to cancel the tool call
-    def _set_task(self, task: asyncio.Task[Any]) -> None:
+    def _set_cancel_fn(self, cancel_fn: Callable[[], None]) -> None:
         """Set the tool task (for possible cancellation)"""
-        self._task = task
+        self._cancel_fn = cancel_fn
     def _cancel(self) -> None:
         """Cancel the tool task."""
-        if self._task:
+        if self._cancel_fn and not self.cancelled:
             self._cancelled = True
-            self._task.cancel()
+            self._cancel_fn()
     @property
     def cancelled(self) -> bool:
@@ -228,11 +228,11 @@ class ToolEvent(BaseEvent):
     _cancelled: bool | None = None
     """Was this tool call cancelled?"""
-    _task: asyncio.Task[Any] | None = None
-    """Handle to task (used for cancellation)"""
+    _cancel_fn: Callable[[], None] | None = None
+    """Function which can be used to cancel the tool call."""
     model_config = ConfigDict(arbitrary_types_allowed=True)
-    """Required so that we can include '_task' as a member."""
+    """Required so that we can include '_cancel_fn' as a member."""
     @field_serializer("completed")
     def serialize_completed(self, dt: datetime) -> str:

inspect_ai/model/__init__.py CHANGED Viewed

@@ -27,7 +27,7 @@ from ._chat_message import (
     ChatMessageTool,
     ChatMessageUser,
 )
-from ._generate_config import GenerateConfig, GenerateConfigArgs
+from ._generate_config import GenerateConfig, GenerateConfigArgs, ResponseSchema
 from ._model import (
     Model,
     ModelAPI,
@@ -49,6 +49,7 @@ from ._registry import modelapi
 __all__ = [
     "GenerateConfig",
     "GenerateConfigArgs",
+    "ResponseSchema",
     "CachePolicy",
     "ContentAudio",
     "ContentImage",

inspect_ai/model/_call_tools.py CHANGED Viewed

@@ -1,6 +1,6 @@
-import asyncio
 import inspect
 import json
+import sys
 import types
 from dataclasses import is_dataclass
 from logging import getLogger
@@ -22,7 +22,13 @@ from typing import (
     is_typeddict,
 )
+if sys.version_info < (3, 11):
+    from exceptiongroup import ExceptionGroup
+import anyio
 import yaml
+from anyio.streams.memory import MemoryObjectSendStream
 from jsonschema import Draft7Validator
 from pydantic import BaseModel
@@ -80,7 +86,10 @@ async def call_tools(
         tdefs = tool_defs(tools)
-        async def call_tool_task(call: ToolCall) -> tuple[ChatMessageTool, ToolEvent]:
+        async def call_tool_task(
+            call: ToolCall,
+            send_stream: MemoryObjectSendStream[tuple[ChatMessageTool, ToolEvent]],
+        ) -> None:
             # create a transript for this call
             init_transcript(Transcript(name=call.function))
@@ -166,20 +175,23 @@ async def call_tools(
                 events=list(transcript().events),
             )
-            # return message and event
-            return ChatMessageTool(
-                content=content,
-                tool_call_id=call.id,
-                function=call.function,
-                error=tool_error,
-            ), event
+            # yield message and event
+            async with send_stream:
+                await send_stream.send(
+                    (
+                        ChatMessageTool(
+                            content=content,
+                            tool_call_id=call.id,
+                            function=call.function,
+                            error=tool_error,
+                        ),
+                        event,
+                    )
+                )
         # call tools
         tool_messages: list[ChatMessageTool] = []
         for call in message.tool_calls:
-            # create the task
-            task = asyncio.create_task(call_tool_task(call))
             # create pending tool event and add it to the transcript
             # (record the waiting time for the sample so we can compare
             # it at the end to deduce total waiting time inside the tool
@@ -192,38 +204,46 @@ async def call_tools(
                 view=call.view,
                 pending=True,
             )
-            event._set_task(task)
             transcript()._event(event)
-            # execute the tool call. if the operator cancelled the
+            # execute the tool call. if the operator cancels the
             # tool call then synthesize the appropriate message/event
+            send_stream, receive_stream = anyio.create_memory_object_stream[
+                tuple[ChatMessageTool, ToolEvent]
+            ]()
             try:
-                tool_message, result_event = await task
-            except asyncio.CancelledError:
-                if event.cancelled:
-                    tool_message = ChatMessageTool(
-                        content="",
-                        function=call.function,
-                        tool_call_id=call.id,
-                        error=ToolCallError(
-                            "timeout", "Command timed out before completing."
-                        ),
-                    )
-                    result_event = ToolEvent(
-                        id=call.id,
-                        function=call.function,
-                        arguments=call.arguments,
-                        result=tool_message.content,
-                        truncated=None,
-                        view=call.view,
-                        error=tool_message.error,
-                        events=[],
-                    )
-                    transcript().info(
-                        f"Tool call '{call.function}' was cancelled by operator."
-                    )
-                else:
-                    raise
+                async with anyio.create_task_group() as tg:
+                    tg.start_soon(call_tool_task, call, send_stream)
+                    event._set_cancel_fn(tg.cancel_scope.cancel)
+                    async with receive_stream:
+                        async for result in receive_stream:
+                            tool_message, result_event = result
+                            break
+            except ExceptionGroup as ex:
+                raise ex.exceptions[0]
+            if event.cancelled:
+                tool_message = ChatMessageTool(
+                    content="",
+                    function=call.function,
+                    tool_call_id=call.id,
+                    error=ToolCallError(
+                        "timeout", "Command timed out before completing."
+                    ),
+                )
+                result_event = ToolEvent(
+                    id=call.id,
+                    function=call.function,
+                    arguments=call.arguments,
+                    result=tool_message.content,
+                    truncated=None,
+                    view=call.view,
+                    error=tool_message.error,
+                    events=[],
+                )
+                transcript().info(
+                    f"Tool call '{call.function}' was cancelled by operator."
+                )
             # update return messages
             tool_messages.append(tool_message)

inspect_ai/model/_chat_message.py CHANGED Viewed

@@ -2,6 +2,7 @@ from logging import getLogger
 from typing import Any, Literal, Type, Union
 from pydantic import BaseModel, Field, model_validator
+from shortuuid import uuid
 from inspect_ai._util.content import Content, ContentReasoning, ContentText
 from inspect_ai.tool import ToolCall
@@ -15,8 +16,8 @@ logger = getLogger(__name__)
 class ChatMessageBase(BaseModel):
     """Base class for chat messages."""
-    role: Literal["system", "user", "assistant", "tool"]
-    """Conversation role"""
+    id: str = Field(default_factory=uuid)
+    """Unique identifer for message."""
     content: str | list[Content]
     """Content (simple string or list of content objects)"""

inspect_ai/model/_generate_config.py CHANGED Viewed

@@ -5,6 +5,25 @@ from typing import Any, Literal, Union
 from pydantic import BaseModel, Field, model_validator
 from typing_extensions import TypedDict
+from inspect_ai.util._json import JSONSchema
+class ResponseSchema(BaseModel):
+    """Schema for model response when using Structured Output."""
+    name: str
+    """The name of the response schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."""
+    json_schema: JSONSchema
+    """The schema for the response format, described as a JSON Schema object."""
+    description: str | None = Field(default=None)
+    """A description of what the response format is for, used by the model to determine how to respond in the format."""
+    strict: bool | None = Field(default=None)
+    """Whether to enable strict schema adherence when generating the output. If set to true, the model will always follow the exact schema defined in the schema field.
+    OpenAI and Mistral only."""
 class GenerateConfigArgs(TypedDict, total=False):
     """Type for kwargs that selectively override GenerateConfig."""
@@ -81,6 +100,9 @@ class GenerateConfigArgs(TypedDict, total=False):
     reasoning_history: Literal["none", "all", "last", "auto"] | None
     """Include reasoning in chat message history sent to generate."""
+    response_schema: ResponseSchema | None
+    """Request a response format as JSONSchema (output should still be validated). OpenAI, Google, and Mistral only."""
 class GenerateConfig(BaseModel):
     """Model generation options."""
@@ -159,6 +181,9 @@ class GenerateConfig(BaseModel):
     )
     """Include reasoning in chat message history sent to generate."""
+    response_schema: ResponseSchema | None = Field(default=None)
+    """Request a response format as JSONSchema (output should still be validated). OpenAI, Google, and Mistral only."""
     # migrate reasoning_history as a bool
     @model_validator(mode="before")
     @classmethod

inspect-ai 0.3.72__py3-none-any.whl → 0.3.73__py3-none-any.whl

inspect-ai 0.3.72py3-none-any.whl → 0.3.73py3-none-any.whl