PyPI - inspect-ai - Versions diffs - 0.3.95__py3-none-any.whl → 0.3.96__py3-none-any.whl - Mend

inspect-ai 0.3.95py3-none-any.whl → 0.3.96py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

inspect_ai/_util/local_server.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
 import os
+import platform
 import random
 import socket
 import subprocess
@@ -33,6 +34,21 @@ def reserve_port(
     Returns:
         A tuple (port, lock_socket) where `lock_socket` is kept open to hold the lock.
     """
+    is_macos = platform.system() == "Darwin"
+    if is_macos:
+        logger.info(
+            "MacOS system detected. A free binding port will be identified, but not reserved until the server binds to it."
+        )
+        # On macOS, let the OS pick a free port but not open it
+        # It leads to a small racode condition window until the port
+        # is actually opened by the llm server
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind((host, 0))  # Bind to any free port
+            port = s.getsockname()[1]
+        return port, s
+    # Non-macOS behavior: try ports in range
     candidates = list(range(start, end))
     random.shuffle(candidates)

inspect_ai/agent/_types.py CHANGED Viewed

@@ -43,7 +43,7 @@ class AgentPrompt(NamedTuple):
 DEFAULT_CONTINUE_PROMPT = """
 Please proceed to the next step using your best judgement. If you believe you
-have completed the task, please call the `{submit}()` tool.
+have completed the task, please call the `{submit}()` tool with your final answer.
 """

inspect_ai/analysis/beta/__init__.py CHANGED Viewed

@@ -15,7 +15,13 @@ from ._dataframe.evals.columns import (
     EvalTask,
 )
 from ._dataframe.evals.table import evals_df
-from ._dataframe.events.columns import EventColumn
+from ._dataframe.events.columns import (
+    EventColumn,
+    EventInfo,
+    EventTiming,
+    ModelEventColumns,
+    ToolEventColumns,
+)
 from ._dataframe.events.table import events_df
 from ._dataframe.messages.columns import (
     MessageColumn,
@@ -50,6 +56,10 @@ __all__ = [
     "MessageFilter",
     "events_df",
     "EventColumn",
+    "EventInfo",
+    "EventTiming",
+    "ModelEventColumns",
+    "ToolEventColumns",
     "Column",
     "ColumnType",
     "ColumnError",

inspect_ai/analysis/beta/_dataframe/evals/table.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING, Literal, overload
+from typing import TYPE_CHECKING, Callable, Literal, overload
-from inspect_ai._display import display
 from inspect_ai._util.path import pretty_path
+from inspect_ai.analysis.beta._dataframe.progress import import_progress
 from inspect_ai.log._file import (
+    list_eval_logs,
     read_eval_log,
 )
@@ -29,39 +30,32 @@ EVAL_SUFFIX = "_eval"
 @overload
 def evals_df(
-    logs: LogPaths,
+    logs: LogPaths = list_eval_logs(),
     columns: list[Column] = EvalColumns,
-    recursive: bool = True,
-    reverse: bool = False,
     strict: Literal[True] = True,
 ) -> "pd.DataFrame": ...
 @overload
 def evals_df(
-    logs: LogPaths,
+    logs: LogPaths = list_eval_logs(),
     columns: list[Column] = EvalColumns,
-    recursive: bool = True,
-    reverse: bool = False,
     strict: Literal[False] = False,
 ) -> tuple["pd.DataFrame", ColumnErrors]: ...
 def evals_df(
-    logs: LogPaths,
+    logs: LogPaths = list_eval_logs(),
     columns: list[Column] = EvalColumns,
-    recursive: bool = True,
-    reverse: bool = False,
     strict: bool = True,
 ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
     """Read a dataframe containing evals.
     Args:
        logs: One or more paths to log files or log directories.
+          Defaults to the contents of the currently active log directory
+          (e.g. ./logs or INSPECT_LOG_DIR).
        columns: Specification for what columns to read from log files.
-       recursive: Include recursive contents of directories (defaults to `True`)
-       reverse: Reverse the order of the dataframe (by default, items
-          are ordered from oldest to newest).
        strict: Raise import errors immediately. Defaults to `True`.
           If `False` then a tuple of `DataFrame` and errors is returned.
@@ -73,7 +67,46 @@ def evals_df(
     verify_prerequisites()
     # resolve logs
-    log_paths = resolve_logs(logs, recursive=recursive, reverse=reverse)
+    log_paths = resolve_logs(logs)
+    with import_progress("reading logs", total=len(log_paths)) as (p, task_id):
+        if strict:
+            evals_table, _ = _read_evals_df(
+                log_paths, columns, True, lambda: p.update(task_id, advance=1)
+            )
+            return evals_table
+        else:
+            evals_table, all_errors, _ = _read_evals_df(
+                log_paths, columns, False, lambda: p.update(task_id, advance=1)
+            )
+            return evals_table, all_errors
+@overload
+def _read_evals_df(
+    log_paths: list[str],
+    columns: list[Column],
+    strict: Literal[True],
+    progress: Callable[[], None],
+) -> tuple["pd.DataFrame", int]: ...
+@overload
+def _read_evals_df(
+    log_paths: list[str],
+    columns: list[Column],
+    strict: Literal[False],
+    progress: Callable[[], None],
+) -> tuple["pd.DataFrame", ColumnErrors, int]: ...
+def _read_evals_df(
+    log_paths: list[str],
+    columns: list[Column],
+    strict: bool,
+    progress: Callable[[], None],
+) -> tuple["pd.DataFrame", int] | tuple["pd.DataFrame", ColumnErrors, int]:
+    verify_prerequisites()
     # resolve duplicate columns
     columns = resolve_duplicate_columns(columns)
@@ -85,27 +118,31 @@ def evals_df(
     ensure_eval_id(columns)
     # read logs
+    total_samples = 0
     records: list[dict[str, ColumnType]] = []
-    with display().progress(total=len(log_paths)) as p:
-        for log_path in log_paths:
-            log = read_eval_log(log_path, header_only=True)
-            if strict:
-                record = import_record(log, columns, strict=True)
-            else:
-                record, errors = import_record(log, columns, strict=False)
-                all_errors[pretty_path(log_path)] = errors
-            records.append(record)
-            p.update()
+    for log_path in log_paths:
+        log = read_eval_log(log_path, header_only=True)
+        if strict:
+            record = import_record(log, columns, strict=True)
+        else:
+            record, errors = import_record(log, columns, strict=False)
+            all_errors[pretty_path(log_path)] = errors
+        records.append(record)
+        total_samples += (
+            len(log.eval.dataset.sample_ids)
+            if log.eval.dataset.sample_ids is not None
+            else (log.eval.dataset.samples or 100)
+        )
+        progress()
     # return table (+errors if strict=False)
     evals_table = records_to_pandas(records)
     evals_table = reorder_evals_df_columns(evals_table, columns)
     if strict:
-        return evals_table
+        return evals_table, total_samples
     else:
-        return evals_table, all_errors
+        return evals_table, all_errors, total_samples
 def ensure_eval_id(columns: list[Column]) -> None:

inspect_ai/analysis/beta/_dataframe/events/columns.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from datetime import datetime
 from typing import Any, Callable, Mapping, Type
 from jsonpath_ng import JSONPath  # type: ignore
@@ -7,6 +8,12 @@ from typing_extensions import override
 from inspect_ai.log._transcript import Event
 from ..columns import Column, ColumnType
+from .extract import (
+    completion_as_str,
+    model_event_input_as_str,
+    tool_choice_as_str,
+    tool_view_as_str,
+)
 class EventColumn(Column):
@@ -35,3 +42,46 @@ class EventColumn(Column):
     @override
     def path_schema(self) -> Mapping[str, Any] | None:
         return None
+EventInfo: list[Column] = [
+    EventColumn("event", path="event"),
+    EventColumn("span_id", path="span_id"),
+]
+"""Event basic information columns."""
+EventTiming: list[Column] = [
+    EventColumn("timestamp", path="timestamp", type=datetime),
+    EventColumn("completed", path="completed", type=datetime),
+    EventColumn("working_start", path="working_start"),
+    EventColumn("working_time", path="working_time"),
+]
+"""Event timing columns."""
+ModelEventColumns: list[Column] = [
+    EventColumn("model_event_model", path="model"),
+    EventColumn("model_event_role", path="role"),
+    EventColumn("model_event_input", path=model_event_input_as_str),
+    EventColumn("model_event_tools", path="tools"),
+    EventColumn("model_event_tool_choice", path=tool_choice_as_str),
+    EventColumn("model_event_config", path="config"),
+    EventColumn("model_event_usage", path="output.usage"),
+    EventColumn("model_event_time", path="output.time"),
+    EventColumn("model_event_completion", path=completion_as_str),
+    EventColumn("model_event_retries", path="retries"),
+    EventColumn("model_event_error", path="error"),
+    EventColumn("model_event_cache", path="cache"),
+    EventColumn("model_event_call", path="call"),
+]
+"""Model event columns."""
+ToolEventColumns: list[Column] = [
+    EventColumn("tool_event_function", path="function"),
+    EventColumn("tool_event_arguments", path="arguments"),
+    EventColumn("tool_event_view", path=tool_view_as_str),
+    EventColumn("tool_event_result", path="result"),
+    EventColumn("tool_event_truncated", path="truncated"),
+    EventColumn("tool_event_error_type", path="error.type"),
+    EventColumn("tool_event_error_message", path="error.message"),
+]
+"""Tool event columns."""

inspect_ai/analysis/beta/_dataframe/events/extract.py ADDED Viewed

@@ -0,0 +1,26 @@
+from inspect_ai.log._transcript import ModelEvent, ToolEvent
+from ..extract import messages_as_str
+def model_event_input_as_str(event: ModelEvent) -> str:
+    return messages_as_str(event.input)
+def tool_choice_as_str(event: ModelEvent) -> str:
+    if isinstance(event.tool_choice, str):
+        return event.tool_choice
+    else:
+        return event.tool_choice.name
+def completion_as_str(event: ModelEvent) -> str:
+    return event.output.completion
+def tool_view_as_str(event: ToolEvent) -> str | None:
+    if event.view is not None:
+        title = f"{event.view.title}\n\n" if event.view.title is not None else ""
+        return f"{title}{event.view.content}"
+    else:
+        return None

inspect_ai/analysis/beta/_dataframe/events/table.py CHANGED Viewed

@@ -1,14 +1,100 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Callable, Literal, TypeAlias
+from inspect_ai.analysis.beta._dataframe.events.columns import EventInfo
+from inspect_ai.log._file import list_eval_logs
+from inspect_ai.log._transcript import Event
 if TYPE_CHECKING:
     import pandas as pd
+from typing_extensions import overload
+from ..columns import Column, ColumnErrors
+from ..samples.table import EventsDetail, _read_samples_df
 from ..util import LogPaths, verify_prerequisites
+EventFilter: TypeAlias = (
+    list[
+        Literal[
+            "sample_init",
+            "sample_limit",
+            "sandbox",
+            "state",
+            "store",
+            "model",
+            "tool",
+            "sandbox",
+            "approval",
+            "input",
+            "score",
+            "error",
+            "logger",
+            "info",
+            "span_begin",
+            "span_end",
+            "subtask",
+        ]
+    ]
+    | Callable[[Event], bool]
+)
+"""Filter for `events_df()` rows."""
+@overload
+def events_df(
+    logs: LogPaths = list_eval_logs(),
+    columns: list[Column] = EventInfo,
+    filter: EventFilter | None = None,
+    strict: Literal[True] = True,
+) -> "pd.DataFrame": ...
-def events_df(logs: LogPaths, recursive: bool = True) -> "pd.DataFrame":
+@overload
+def events_df(
+    logs: LogPaths = list_eval_logs(),
+    columns: list[Column] = EventInfo,
+    filter: EventFilter | None = None,
+    strict: Literal[False] = False,
+) -> tuple["pd.DataFrame", ColumnErrors]: ...
+def events_df(
+    logs: LogPaths = list_eval_logs(),
+    columns: list[Column] = EventInfo,
+    filter: EventFilter | None = None,
+    strict: bool = True,
+) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
+    """Read a dataframe containing events from a set of evals.
+    Args:
+       logs: One or more paths to log files or log directories.
+          Defaults to the contents of the currently active log directory
+          (e.g. ./logs or INSPECT_LOG_DIR).
+       columns: Specification for what columns to read from log files.
+       filter: List of event types to include or callable that performs the filter.
+       strict: Raise import errors immediately. Defaults to `True`.
+          If `False` then a tuple of `DataFrame` and errors is returned.
+    Returns:
+       For `strict`, a Pandas `DataFrame` with information for the specified logs.
+       For `strict=False`, a tuple of Pandas `DataFrame` and a dictionary of errors
+       encountered (by log file) during import.
+    """
     verify_prerequisites()
-    raise NotImplementedError("events_df has not been implemented yet.")
+    # resolve filter/detail
+    if filter is None:
+        detail = EventsDetail(filter=lambda e: True)
+    elif callable(filter):
+        detail = EventsDetail(filter=filter)
+    else:
+        detail = EventsDetail(filter=lambda e: e.event in filter)
+    return _read_samples_df(
+        logs=logs,
+        columns=columns,
+        strict=strict,
+        detail=detail,
+    )

inspect_ai/analysis/beta/_dataframe/extract.py CHANGED Viewed

@@ -5,11 +5,16 @@ from typing import Any, cast
 import shortuuid
 from pydantic import BaseModel, JsonValue
-from inspect_ai._util.json import jsonable_python
+from inspect_ai.model._chat_message import (
+    ChatMessage,
+    ChatMessageAssistant,
+    ChatMessageTool,
+    ChatMessageUser,
+)
 def model_to_record(model: BaseModel) -> dict[str, JsonValue]:
-    return cast(dict[str, JsonValue], jsonable_python(model))
+    return cast(dict[str, JsonValue], model.model_dump(mode="json", exclude_none=True))
 def list_as_str(x: JsonValue) -> str:
@@ -21,34 +26,48 @@ def score_values(x: JsonValue) -> dict[str, JsonValue]:
     return {k: v["value"] for k, v in scores.items()}
-def input_as_str(x: JsonValue) -> str:
-    if isinstance(x, str):
-        return x
-    else:
-        return messages_as_str(x)
+def auto_id(base: str, index: str) -> str:
+    seed = f"{base}_{index}"
+    hash_bytes = hashlib.md5(seed.encode("utf-8")).digest()
+    long_uuid = uuid.UUID(bytes=hash_bytes)
+    return shortuuid.encode(long_uuid)
-def messages_as_str(x: JsonValue) -> str:
-    if isinstance(x, list):
-        messages = cast(list[dict[str, Any]], x)
-        return "\n\n".join([message_as_str(message) for message in messages])
-    else:
-        raise ValueError(f"Unexpected type for messages: {type(x)}")
+def messages_as_str(messages: str | list[ChatMessage]) -> str:
+    if isinstance(messages, str):
+        messages = [ChatMessageUser(content=messages)]
+    return "\n\n".join([message_as_str(message) for message in messages])
-def message_as_str(message: dict[str, Any]) -> str:
-    return f"{message['role']}:\n{content_as_str(message['content'])}"
+def message_as_str(message: ChatMessage) -> str:
+    transcript: list[str] = []
+    role = message.role
+    content = message.text.strip() if message.text else ""
+    # assistant messages with tool calls
+    if isinstance(message, ChatMessageAssistant) and message.tool_calls is not None:
+        entry = f"{role}:\n{content}\n"
-def content_as_str(content: str | list[dict[str, Any]]) -> str:
-    if isinstance(content, str):
-        return content
-    else:
-        return "\n".join([c["text"] if c["type"] == "text" else "" for c in content])
+        for tool in message.tool_calls:
+            func_name = tool.function
+            args = tool.arguments
+            if isinstance(args, dict):
+                args_text = "\n".join(f"{k}: {v}" for k, v in args.items())
+                entry += f"\nTool Call: {func_name}\nArguments:\n{args_text}"
+            else:
+                entry += f"\nTool Call: {func_name}\nArguments: {args}"
-def auto_id(base: str, index: str) -> str:
-    seed = f"{base}_{index}"
-    hash_bytes = hashlib.md5(seed.encode("utf-8")).digest()
-    long_uuid = uuid.UUID(bytes=hash_bytes)
-    return shortuuid.encode(long_uuid)
+        transcript.append(entry)
+    # tool responses with errors
+    elif isinstance(message, ChatMessageTool) and message.error is not None:
+        func_name = message.function or "unknown"
+        entry = f"{role}:\n{content}\n\nError in tool call '{func_name}':\n{message.error.message}\n"
+        transcript.append(entry)
+    # normal messages
+    else:
+        transcript.append(f"{role}:\n{content}\n")
+    return "\n".join(transcript)

inspect_ai/analysis/beta/_dataframe/messages/columns.py CHANGED Viewed

@@ -43,8 +43,8 @@ class MessageColumn(Column):
 MessageContent: list[Column] = [
     MessageColumn("role", path="role", required=True),
-    MessageColumn("content", path=message_text),
     MessageColumn("source", path="source"),
+    MessageColumn("content", path=message_text),
 ]
 """Message content columns."""

inspect_ai/analysis/beta/_dataframe/messages/table.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Callable, Literal, TypeAlias
+from inspect_ai.log._file import list_eval_logs
 from inspect_ai.model._chat_message import ChatMessage
 if TYPE_CHECKING:
@@ -22,43 +23,36 @@ MessageFilter: TypeAlias = (
 @overload
 def messages_df(
-    logs: LogPaths,
+    logs: LogPaths = list_eval_logs(),
     columns: list[Column] = MessageColumns,
     filter: MessageFilter | None = None,
-    recursive: bool = True,
-    reverse: bool = False,
     strict: Literal[True] = True,
 ) -> "pd.DataFrame": ...
 @overload
 def messages_df(
-    logs: LogPaths,
+    logs: LogPaths = list_eval_logs(),
     columns: list[Column] = MessageColumns,
     filter: MessageFilter | None = None,
-    recursive: bool = True,
-    reverse: bool = False,
     strict: Literal[False] = False,
 ) -> tuple["pd.DataFrame", ColumnErrors]: ...
 def messages_df(
-    logs: LogPaths,
+    logs: LogPaths = list_eval_logs(),
     columns: list[Column] = MessageColumns,
     filter: MessageFilter | None = None,
-    recursive: bool = True,
-    reverse: bool = False,
     strict: bool = True,
 ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
     """Read a dataframe containing messages from a set of evals.
     Args:
        logs: One or more paths to log files or log directories.
+          Defaults to the contents of the currently active log directory
+          (e.g. ./logs or INSPECT_LOG_DIR).
        columns: Specification for what columns to read from log files.
        filter: List of message role types to include or callable that performs the filter.
-       recursive: Include recursive contents of directories (defaults to `True`)
-       reverse: Reverse the order of the dataframe (by default, items
-          are ordered from oldest to newest).
        strict: Raise import errors immediately. Defaults to `True`.
           If `False` then a tuple of `DataFrame` and errors is returned.
@@ -80,8 +74,6 @@ def messages_df(
     return _read_samples_df(
         logs=logs,
         columns=columns,
-        recursive=recursive,
-        reverse=reverse,
         strict=strict,
         detail=detail,
     )

inspect_ai/analysis/beta/_dataframe/progress.py ADDED Viewed

@@ -0,0 +1,26 @@
+from contextlib import contextmanager
+from typing import Iterator
+from rich.progress import (
+    BarColumn,
+    Progress,
+    TaskID,
+    TaskProgressColumn,
+    TextColumn,
+    TimeElapsedColumn,
+)
+@contextmanager
+def import_progress(
+    description: str, total: float | None
+) -> Iterator[tuple[Progress, TaskID]]:
+    with Progress(
+        TextColumn("[progress.description]{task.description:<18}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        TimeElapsedColumn(),
+        transient=True,
+    ) as progress:
+        task_id = progress.add_task(description, total=total)
+        yield progress, task_id

inspect_ai/analysis/beta/_dataframe/samples/columns.py CHANGED Viewed

@@ -7,9 +7,13 @@ from typing_extensions import override
 from inspect_ai.log._log import EvalSample, EvalSampleSummary
 from ..columns import Column, ColumnType
-from ..extract import input_as_str, list_as_str, score_values
+from ..extract import list_as_str, score_values
 from ..validate import resolved_schema
-from .extract import sample_messages_as_str, sample_path_requires_full
+from .extract import (
+    sample_input_as_str,
+    sample_messages_as_str,
+    sample_path_requires_full,
+)
 class SampleColumn(Column):
@@ -54,7 +58,7 @@ class SampleColumn(Column):
 SampleSummary: list[Column] = [
     SampleColumn("id", path="id", required=True, type=str),
     SampleColumn("epoch", path="epoch", required=True),
-    SampleColumn("input", path="input", required=True, value=input_as_str),
+    SampleColumn("input", path=sample_input_as_str, required=True),
     SampleColumn("target", path="target", required=True, value=list_as_str),
     SampleColumn("metadata_*", path="metadata"),
     SampleColumn("score_*", path="scores", value=score_values),

inspect_ai/analysis/beta/_dataframe/samples/extract.py CHANGED Viewed

@@ -3,45 +3,17 @@ from typing import Callable
 from jsonpath_ng import JSONPath  # type: ignore
 from pydantic import JsonValue
-from inspect_ai.analysis.beta._dataframe.extract import auto_id
 from inspect_ai.log._log import EvalSample, EvalSampleSummary
-from inspect_ai.model._chat_message import ChatMessageAssistant, ChatMessageTool
+from ..extract import auto_id, messages_as_str
-def sample_messages_as_str(sample: EvalSample) -> str:
-    # format each message for the transcript
-    transcript: list[str] = []
-    for msg in sample.messages:
-        role = msg.role
-        content = msg.text.strip() if msg.text else ""
-        # assistant messages with tool calls
-        if isinstance(msg, ChatMessageAssistant) and msg.tool_calls is not None:
-            entry = f"{role}:\n{content}\n"
-            for tool in msg.tool_calls:
-                func_name = tool.function
-                args = tool.arguments
-                if isinstance(args, dict):
-                    args_text = "\n".join(f"{k}: {v}" for k, v in args.items())
-                    entry += f"\nTool Call: {func_name}\nArguments:\n{args_text}"
-                else:
-                    entry += f"\nTool Call: {func_name}\nArguments: {args}"
+def sample_input_as_str(sample: EvalSample) -> str:
+    return messages_as_str(sample.input)
-            transcript.append(entry)
-        # tool responses with errors
-        elif isinstance(msg, ChatMessageTool) and msg.error is not None:
-            func_name = msg.function or "unknown"
-            entry = f"{role}:\n{content}\n\nError in tool call '{func_name}':\n{msg.error.message}\n"
-            transcript.append(entry)
-        # normal messages
-        else:
-            transcript.append(f"{role}:\n{content}\n")
-    return "\n".join(transcript)
+def sample_messages_as_str(sample: EvalSample) -> str:
+    return messages_as_str(sample.messages)
 def sample_path_requires_full(

inspect_ai/analysis/beta/_dataframe/samples/table.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 from dataclasses import dataclass
+from functools import lru_cache
 from typing import (
     TYPE_CHECKING,
     Callable,
@@ -9,21 +10,24 @@ from typing import (
     overload,
 )
-from inspect_ai._display import display
+from inspect_ai._util.hash import mm3_hash
 from inspect_ai._util.path import pretty_path
-from inspect_ai.analysis.beta._dataframe.events.columns import EventColumn
-from inspect_ai.analysis.beta._dataframe.messages.columns import MessageColumn
+from inspect_ai.analysis.beta._dataframe.progress import import_progress
 from inspect_ai.log._file import (
+    list_eval_logs,
     read_eval_log_sample_summaries,
     read_eval_log_samples,
 )
 from inspect_ai.log._log import EvalSample, EvalSampleSummary
-from inspect_ai.log._transcript import BaseEvent, Event
+from inspect_ai.log._transcript import Event
 from inspect_ai.model._chat_message import ChatMessage
 from ..columns import Column, ColumnErrors, ColumnType
 from ..evals.columns import EvalColumn
-from ..evals.table import EVAL_ID, EVAL_SUFFIX, ensure_eval_id, evals_df
+from ..evals.table import EVAL_ID, EVAL_SUFFIX, _read_evals_df, ensure_eval_id
+from ..events.columns import EventColumn
+from ..extract import message_as_str
+from ..messages.columns import MessageColumn
 from ..record import import_record, resolve_duplicate_columns
 from ..util import (
     LogPaths,
@@ -46,39 +50,32 @@ SAMPLE_SUFFIX = "_sample"
 @overload
 def samples_df(
-    logs: LogPaths,
+    logs: LogPaths = list_eval_logs(),
     columns: list[Column] = SampleSummary,
-    recursive: bool = True,
-    reverse: bool = False,
     strict: Literal[True] = True,
 ) -> "pd.DataFrame": ...
 @overload
 def samples_df(
-    logs: LogPaths,
+    logs: LogPaths = list_eval_logs(),
     columns: list[Column] = SampleSummary,
-    recursive: bool = True,
-    reverse: bool = False,
     strict: Literal[False] = False,
 ) -> tuple["pd.DataFrame", ColumnErrors]: ...
 def samples_df(
-    logs: LogPaths,
+    logs: LogPaths = list_eval_logs(),
     columns: list[Column] = SampleSummary,
-    recursive: bool = True,
-    reverse: bool = False,
     strict: bool = True,
 ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
     """Read a dataframe containing samples from a set of evals.
     Args:
        logs: One or more paths to log files or log directories.
+          Defaults to the contents of the currently active log directory
+          (e.g. ./logs or INSPECT_LOG_DIR).
        columns: Specification for what columns to read from log files.
-       recursive: Include recursive contents of directories (defaults to `True`)
-       reverse: Reverse the order of the dataframe (by default, items
-          are ordered from oldest to newest).
        strict: Raise import errors immediately. Defaults to `True`.
           If `False` then a tuple of `DataFrame` and errors is returned.
@@ -87,9 +84,7 @@ def samples_df(
        For `strict=False`, a tuple of Pandas `DataFrame` and a dictionary of errors
        encountered (by log file) during import.
     """
-    return _read_samples_df(
-        logs, columns, recursive=recursive, reverse=reverse, strict=strict
-    )
+    return _read_samples_df(logs, columns, strict=strict)
 @dataclass
@@ -101,24 +96,22 @@ class MessagesDetail:
 @dataclass
 class EventsDetail:
-    name: str = "message"
+    name: str = "event"
     col_type = EventColumn
-    filter: Callable[[BaseEvent], bool] = lambda e: True
+    filter: Callable[[Event], bool] = lambda e: True
 def _read_samples_df(
     logs: LogPaths,
     columns: list[Column],
     *,
-    recursive: bool = True,
-    reverse: bool = False,
     strict: bool = True,
     detail: MessagesDetail | EventsDetail | None = None,
 ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
     verify_prerequisites()
     # resolve logs
-    logs = resolve_logs(logs, recursive=recursive, reverse=reverse)
+    logs = resolve_logs(logs)
     # split columns by type
     columns_eval: list[Column] = []
@@ -150,12 +143,31 @@ def _read_samples_df(
     # make sure eval_id is present
     ensure_eval_id(columns_eval)
-    # read samples from each log
-    sample_records: list[dict[str, ColumnType]] = []
-    detail_records: list[dict[str, ColumnType]] = []
-    all_errors = ColumnErrors()
-    evals_table = evals_df(logs, columns=columns_eval)
-    with display().progress(total=len(evals_table)) as p:
+    # determine how we will allocate progress
+    with import_progress("scanning logs", total=len(logs)) as (
+        p,
+        task_id,
+    ):
+        def progress() -> None:
+            p.update(task_id, advance=1)
+        # read samples from each log
+        sample_records: list[dict[str, ColumnType]] = []
+        detail_records: list[dict[str, ColumnType]] = []
+        all_errors = ColumnErrors()
+        # read logs and note total samples
+        evals_table, total_samples = _read_evals_df(
+            logs, columns=columns_eval, strict=True, progress=progress
+        )
+        # update progress now that we know the total samples
+        entity = detail.name if detail else "sample"
+        p.reset(
+            task_id, description=f"reading {entity}s", completed=0, total=total_samples
+        )
         # read samples
         for eval_id, log in zip(evals_table[EVAL_ID].to_list(), logs):
             # get a generator for the samples (might require reading the full log
@@ -191,9 +203,9 @@ def _read_samples_df(
                     # filter detail records
                     assert isinstance(sample, EvalSample)
                     if isinstance(detail, MessagesDetail):
-                        detail_items: list[ChatMessage] | list[Event] = [
-                            m for m in sample.messages if detail.filter(m)
-                        ]
+                        detail_items: list[ChatMessage] | list[Event] = (
+                            sample_messages_from_events(sample.events, detail.filter)
+                        )
                     elif isinstance(detail, EventsDetail):
                         detail_items = [e for e in sample.events if detail.filter(e)]
                     else:
@@ -226,7 +238,7 @@ def _read_samples_df(
                 # record sample record
                 sample_records.append(record)
-            p.update()
+                progress()
     # normalize records and produce samples table
     samples_table = records_to_pandas(sample_records)
@@ -262,6 +274,35 @@ def _read_samples_df(
         return samples_table, all_errors
+def sample_messages_from_events(
+    events: list[Event], filter: Callable[[ChatMessage], bool]
+) -> list[ChatMessage]:
+    # don't yield the same event twice
+    ids: set[str] = set()
+    # we need to look at the full input to every model event and add
+    # messages we haven't seen before
+    messages: list[ChatMessage] = []
+    for event in events:
+        if event.event == "model":
+            event_messages = event.input + (
+                [event.output.message] if not event.output.empty else []
+            )
+            for message in event_messages:
+                id = message.id or message_hash(message_as_str(message))
+                if id not in ids:
+                    messages.append(message)
+                    ids.add(id)
+    # then apply the filter
+    return [message for message in messages if filter(message)]
+@lru_cache(maxsize=100)
+def message_hash(message: str) -> str:
+    return mm3_hash(message)
 def reorder_samples_df_columns(
     df: "pd.DataFrame",
     eval_columns: list[Column],

inspect_ai/analysis/beta/_dataframe/util.py CHANGED Viewed

@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Sequence, TypeAlias
 from inspect_ai._util.error import pip_dependency_error
 from inspect_ai._util.file import FileInfo, filesystem
 from inspect_ai._util.version import verify_required_version
-from inspect_ai.log._file import log_files_from_ls
+from inspect_ai.log._file import EvalLogInfo, log_files_from_ls
 if TYPE_CHECKING:
     import pandas as pd
@@ -17,7 +17,9 @@ if TYPE_CHECKING:
 from .columns import ColumnType
-LogPaths: TypeAlias = PathLike[str] | str | Sequence[PathLike[str] | str]
+LogPaths: TypeAlias = (
+    PathLike[str] | str | EvalLogInfo | Sequence[PathLike[str] | str | EvalLogInfo]
+)
 def verify_prerequisites() -> None:
@@ -41,30 +43,31 @@ def verify_prerequisites() -> None:
     verify_required_version("inspect_ai.analysis", "pyarrow", "10.0.1")
-def resolve_logs(logs: LogPaths, recursive: bool, reverse: bool) -> list[str]:
+def resolve_logs(logs: LogPaths) -> list[str]:
     # normalize to list of str
-    logs = [logs] if isinstance(logs, str | PathLike) else logs
-    logs = [Path(log).as_posix() if isinstance(log, PathLike) else log for log in logs]
+    logs = [logs] if isinstance(logs, str | PathLike | EvalLogInfo) else logs
+    logs_str = [
+        Path(log).as_posix()
+        if isinstance(log, PathLike)
+        else log.name
+        if isinstance(log, EvalLogInfo)
+        else log
+        for log in logs
+    ]
     # expand directories
     log_paths: list[FileInfo] = []
-    for log in logs:
-        if isinstance(log, PathLike):
-            log = Path(log).as_posix()
-        fs = filesystem(log)
-        info = fs.info(log)
+    for log_str in logs_str:
+        fs = filesystem(log_str)
+        info = fs.info(log_str)
         if info.type == "directory":
             log_paths.extend(
-                [
-                    fi
-                    for fi in fs.ls(info.name, recursive=recursive)
-                    if fi.type == "file"
-                ]
+                [fi for fi in fs.ls(info.name, recursive=True) if fi.type == "file"]
             )
         else:
             log_paths.append(info)
-    log_files = log_files_from_ls(log_paths, descending=reverse)
+    log_files = log_files_from_ls(log_paths, sort=False)
     return [log_file.name for log_file in log_files]

inspect_ai/log/_file.py CHANGED Viewed

@@ -526,12 +526,19 @@ def log_files_from_ls(
     ls: list[FileInfo],
     formats: list[Literal["eval", "json"]] | None = None,
     descending: bool = True,
+    sort: bool = True,
 ) -> list[EvalLogInfo]:
     extensions = [f".{format}" for format in (formats or ALL_LOG_FORMATS)]
     return [
         log_file_info(file)
-        for file in sorted(
-            ls, key=lambda file: (file.mtime if file.mtime else 0), reverse=descending
+        for file in (
+            sorted(
+                ls,
+                key=lambda file: (file.mtime if file.mtime else 0),
+                reverse=descending,
+            )
+            if sort
+            else ls
         )
         if file.type == "file" and is_log_file(file.name, extensions)
     ]

inspect_ai/solver/_task_state.py CHANGED Viewed

@@ -138,7 +138,7 @@ class TaskState:
     The `TaskState` represents the internal state of the `Task` being run for a single `Sample`.
     The `TaskState` is passed to and returned from each solver during a sample's
-    evaluation. It allows us to manipulated the message history, the tools
+    evaluation. It allows us to maintain the manipulated message history, the tools
     available to the model, the final output of the model, and whether the task
     is completed or has hit a limit.
     """

inspect_ai/tool/_mcp/_sandbox.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import TextIO
 import anyio
 from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
 from mcp import JSONRPCRequest, StdioServerParameters
+from mcp.shared.message import SessionMessage
 from mcp.types import JSONRPCMessage, JSONRPCNotification
 from inspect_ai.tool._tool_support_helpers import (
@@ -36,12 +37,12 @@ async def sandbox_client(  # type: ignore
     )
     # read_stream is remote process's stdout
-    read_stream: MemoryObjectReceiveStream[JSONRPCMessage | Exception]
-    read_stream_writer: MemoryObjectSendStream[JSONRPCMessage | Exception]
+    read_stream: MemoryObjectReceiveStream[SessionMessage | Exception]
+    read_stream_writer: MemoryObjectSendStream[SessionMessage | Exception]
     # write_stream is remote process's stdin
-    write_stream: MemoryObjectSendStream[JSONRPCMessage]
-    write_stream_reader: MemoryObjectReceiveStream[JSONRPCMessage]
+    write_stream: MemoryObjectSendStream[SessionMessage]
+    write_stream_reader: MemoryObjectReceiveStream[SessionMessage]
     read_stream_writer, read_stream = anyio.create_memory_object_stream(0)
     write_stream, write_stream_reader = anyio.create_memory_object_stream(0)
@@ -64,18 +65,20 @@ async def sandbox_client(  # type: ignore
             async with write_stream_reader:
                 # This reads messages until the stream is closed
                 async for message in write_stream_reader:
-                    root = message.root
+                    root = message.message.root
                     if isinstance(root, JSONRPCRequest):
                         await read_stream_writer.send(
-                            await exec_model_request(
-                                sandbox=sandbox_environment,
-                                method="mcp_send_request",
-                                params={
-                                    "session_id": session_id,
-                                    "request": root.model_dump(),
-                                },
-                                result_type=JSONRPCMessage,
-                                timeout=timeout,
+                            SessionMessage(
+                                message=await exec_model_request(
+                                    sandbox=sandbox_environment,
+                                    method="mcp_send_request",
+                                    params={
+                                        "session_id": session_id,
+                                        "request": root.model_dump(),
+                                    },
+                                    result_type=JSONRPCMessage,
+                                    timeout=timeout,
+                                )
                             )
                         )
                     elif isinstance(root, JSONRPCNotification):

{inspect_ai-0.3.95.dist-info → inspect_ai-0.3.96.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inspect_ai
-Version: 0.3.95
+Version: 0.3.96
 Summary: Framework for large language model evaluations
 Author: UK AI Security Institute
 License: MIT License
@@ -23,7 +23,7 @@ License-File: LICENSE
 Requires-Dist: aiohttp>=3.9.0
 Requires-Dist: anyio>=4.8.0
 Requires-Dist: beautifulsoup4
-Requires-Dist: click>=8.1.3
+Requires-Dist: click<8.2.0,>=8.1.3
 Requires-Dist: debugpy
 Requires-Dist: docstring-parser>=0.16
 Requires-Dist: exceptiongroup>=1.0.2; python_version < "3.11"

{inspect_ai-0.3.95.dist-info → inspect_ai-0.3.96.dist-info}/RECORD RENAMED Viewed

@@ -98,7 +98,7 @@ inspect_ai/_util/interrupt.py,sha256=T30e5YaKSNmnO695p0lK0dquUWFq6dNNtdAFPmWGwME
 inspect_ai/_util/json.py,sha256=LiHF4XPrcuCBpnBKYCIX2AkvmsYuPieQ6HNdSlUMVvU,3653
 inspect_ai/_util/kvstore.py,sha256=z2IXLWP4QqqGqsq5_MbYjBQPcEJqfWK4IyZXgV-kppA,2398
 inspect_ai/_util/list.py,sha256=6_5r5jI5RKK34kCmIqqVQ5hYG-G8v0F5H7L-DmQQ2E4,279
-inspect_ai/_util/local_server.py,sha256=gtDaxmpeKjiIIFUo9tSEx5Avc8fCl4D_b5lH-TY3xUc,13142
+inspect_ai/_util/local_server.py,sha256=T54l-csb2qmQDvZ7zNYVq6_j0BuW5FZSBKT9GfXNc6w,13787
 inspect_ai/_util/logger.py,sha256=XpGyoe8V7FIhNU1rnjTjwR07LVbshA9rRZn33sOitig,6230
 inspect_ai/_util/notebook.py,sha256=Mgz3J4uBh-MqVBRmpiJqDHRpn2hd7HIOBeJBwLG-bbk,2998
 inspect_ai/_util/notgiven.py,sha256=zkn6AYflKLf8YlnwTAMxPLQ-4LyIVmKpGcNcXf-Ssng,457
@@ -485,7 +485,7 @@ inspect_ai/agent/_filter.py,sha256=qnT0HbT4edpDi0MwXY3Q3It2pzNRkTRXZDOqfCwMY6M,1
 inspect_ai/agent/_handoff.py,sha256=NY29zJWxZyB9YtIi9TtD7ydvULEY-Q8wfdedMDD1bjA,3729
 inspect_ai/agent/_react.py,sha256=oTHY-ZMXkCNMBwn161G_Ov-svgKqAfzOp7FryJg9imE,14078
 inspect_ai/agent/_run.py,sha256=9KAfguMPn9czothbFk_ng5xRtvIWeOjNvHuvERWENMU,1875
-inspect_ai/agent/_types.py,sha256=HoTuocY9qFU2cwmNujC5-4N1ACbBmwhldwALpMB2QhE,4204
+inspect_ai/agent/_types.py,sha256=FALCBDziC2CrEy18wBzBbIcQlZs5bCPilyqQ4RXizDc,4227
 inspect_ai/agent/_bridge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 inspect_ai/agent/_bridge/bridge.py,sha256=Qk1z54vSZvFZMmFMOvopwY6rhFxHmJwOipZ_yVsbryU,3465
 inspect_ai/agent/_bridge/patch.py,sha256=IFpgL7WImh5RnAz7fAr574krVqa_Gm9A_eZ7leW983s,7061
@@ -505,28 +505,30 @@ inspect_ai/agent/_human/commands/score.py,sha256=6DyKiYHU7w-tKxHH5cZ0rXgFY7NWc4k
 inspect_ai/agent/_human/commands/status.py,sha256=uUO5M4skWDp29OS8sqVKAqZw0OcM3MSesBYQNbRypJ0,1934
 inspect_ai/agent/_human/commands/submit.py,sha256=D2p1M2ApvAcaVZhbP3fFofG9ZsPVvmxivSLIF5xQxtA,6524
 inspect_ai/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-inspect_ai/analysis/beta/__init__.py,sha256=mANqMDNdVJSwp3tXB_S7kTTpHYMFSWSRILU2c1_kfuI,1242
+inspect_ai/analysis/beta/__init__.py,sha256=iz72c_fRBhtXmfBUPH_cGnnFpH-SD9DEULTb0-pNY-8,1413
 inspect_ai/analysis/beta/_dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 inspect_ai/analysis/beta/_dataframe/columns.py,sha256=feUqCpm9kxieoKPwXT8EwF8DTcwxG4JCCjCGO5XNcJc,4454
-inspect_ai/analysis/beta/_dataframe/extract.py,sha256=ZhLMcoqE1j722wYosfdaHL-gLiBpjtQV1sZ_buA7n3Y,1525
+inspect_ai/analysis/beta/_dataframe/extract.py,sha256=MnRUwtJ0ATS-94qi8nzqZ5hdV2ZQ5rw_kBZ_FCxxdLg,2288
+inspect_ai/analysis/beta/_dataframe/progress.py,sha256=YUUi8U-4BIklDmPbuVCeIQ6DkpQMC0tJHrGrZdOLIno,626
 inspect_ai/analysis/beta/_dataframe/record.py,sha256=rT3k9LuMvogw2kbFoCIHhNYb_p8QqER_FY2J9W0f1kY,12690
-inspect_ai/analysis/beta/_dataframe/util.py,sha256=qa6WHBPbleryuCtVHcoELNGzQb3VtOTMpA3E6RKCfYU,4981
+inspect_ai/analysis/beta/_dataframe/util.py,sha256=OGfBa2P3i3a1PQQP7Q5Y-uaUms1gYuaE83kvnfhrYXA,4964
 inspect_ai/analysis/beta/_dataframe/validate.py,sha256=_UBn_fosgppF3Y5wCCtF8-cnCVM61XdOK6Lm91jMgH0,6213
 inspect_ai/analysis/beta/_dataframe/evals/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 inspect_ai/analysis/beta/_dataframe/evals/columns.py,sha256=ZMR1AByGmHWGmn3qoWefF7pDNnL4mMMlzDlwkUECm5I,4725
 inspect_ai/analysis/beta/_dataframe/evals/extract.py,sha256=XUHFWveTcAFWYTPFgsOIKB9jZT0o4v_7ElVZGJ-SAf8,586
-inspect_ai/analysis/beta/_dataframe/evals/table.py,sha256=nPO4fj2BEcGR41ESZIps5n4ZjXn8wCCwbYWSd4zBV-M,4005
+inspect_ai/analysis/beta/_dataframe/evals/table.py,sha256=oxSJg762WPIjTln5P04nC_h-KDmAEblROyMUgePPqak,5077
 inspect_ai/analysis/beta/_dataframe/events/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-inspect_ai/analysis/beta/_dataframe/events/columns.py,sha256=MipnAkZxhkajZhxLtLR7t8EY8maDI0s9iZ5hkusPBHc,1022
-inspect_ai/analysis/beta/_dataframe/events/table.py,sha256=j1HFEld4I5d5wxh0RfJUFB4nj1YMp5xF65pE0vWADJ0,339
+inspect_ai/analysis/beta/_dataframe/events/columns.py,sha256=VH6U0zXiBEK_4dXskh1OhksYjAY7KvpZWMTv9w0bBbA,2912
+inspect_ai/analysis/beta/_dataframe/events/extract.py,sha256=XxCMslBjzbI_q74bG47w5f9ncBzqJxMXSrCBJ3g23NE,705
+inspect_ai/analysis/beta/_dataframe/events/table.py,sha256=KDZbhCgwevfwAHdSGIhUIvzBKqJWFzRe6OalxJpDRE8,2869
 inspect_ai/analysis/beta/_dataframe/messages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-inspect_ai/analysis/beta/_dataframe/messages/columns.py,sha256=_WpVP8DdMz827zhGe50_A_z59SX333O54RzXqdBH87Y,1728
+inspect_ai/analysis/beta/_dataframe/messages/columns.py,sha256=T8dbyGsg6ut7G0xbnfxVAaJK43EmfvDnCbIhIvrmbB8,1728
 inspect_ai/analysis/beta/_dataframe/messages/extract.py,sha256=B7st9zoXSIj_sXm9-h_fLaRtb3ybIgXcOk41IfOxhGA,660
-inspect_ai/analysis/beta/_dataframe/messages/table.py,sha256=FugXqrhPPRANu86nTc7CrWaITxw7MQcOS75uYplgfM4,2713
+inspect_ai/analysis/beta/_dataframe/messages/table.py,sha256=pAESqFx9WzAyuQCsjrzD0ShbJT1yFf7Con6cu10etbs,2519
 inspect_ai/analysis/beta/_dataframe/samples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-inspect_ai/analysis/beta/_dataframe/samples/columns.py,sha256=PURrwrtAQpzMS3fozXFrjgkwnh_J549KTKA3D593Cdw,2477
-inspect_ai/analysis/beta/_dataframe/samples/extract.py,sha256=FEKFZYUqmxFLIR1qK-6mn5M7yBS2YG0wErYco7VtcE4,2613
-inspect_ai/analysis/beta/_dataframe/samples/table.py,sha256=J9fpBiI7vfLt3Zjj2rcfLfBKZAmVZ9dCGVARVZLN5_A,11014
+inspect_ai/analysis/beta/_dataframe/samples/columns.py,sha256=Ffi734379rSwrkDth3wyMGVIsrepp8fjXKFVWUS-CQw,2493
+inspect_ai/analysis/beta/_dataframe/samples/extract.py,sha256=WkalxZbV4Fwx1hCJIdC3D6JeE51tPBNkufjQ762eWEQ,1404
+inspect_ai/analysis/beta/_dataframe/samples/table.py,sha256=c9CMdrcCcZECcvNn1jsZj_oh1RirX9aSOLJxFJ9HnY4,12252
 inspect_ai/approval/__init__.py,sha256=Bqq4GFljOqKaIUkuCvhlFv89TfJpvbuO_R0jVyjb8VI,379
 inspect_ai/approval/_apply.py,sha256=v9v9XfvBt203TbvdB5aJbHR_SqC23xcEjBPpESbXKg8,2146
 inspect_ai/approval/_approval.py,sha256=twQcEvfU3-hPdsG785ak8OvRMOzMa00-UQAdz9Mh8Fo,863
@@ -560,7 +562,7 @@ inspect_ai/log/__init__.py,sha256=PZsopxfD0ipS6g_5CMipbttrxI1R1fy10Si0zs4lO38,25
 inspect_ai/log/_bundle.py,sha256=5Uy-s64_SFokZ7WRzti9mD7yoKrd2sOzdvqKyahoiC4,8045
 inspect_ai/log/_condense.py,sha256=OedMphK5Q2YPuY1cnoAM7tGsyVIU6Kwrv3oIeb3dFmY,10881
 inspect_ai/log/_convert.py,sha256=afEOHkaQtCkTWdwyFweGTEzLq0VVdhTjhr0IgVX5W7I,3324
-inspect_ai/log/_file.py,sha256=12DJm7ns-YXRqLM1g7Dx_ra8O77ZJ4nEZZf3eYqQnJE,19375
+inspect_ai/log/_file.py,sha256=PPYVC1TbvGpWjUyke_in84fNQQ-U-ybZdMV2jbX0ugU,19503
 inspect_ai/log/_log.py,sha256=2WiLyUPygrq4CyzRoDCT5-lqRzh-HMkt-pHEfuEt0sE,29994
 inspect_ai/log/_message.py,sha256=QofM_JZF_x3k_5ta1uQzoN_VnMoUhXFnqWurIn9FXOY,1999
 inspect_ai/log/_model.py,sha256=8tEhFZc1tBFgA6A_spXTqTBdvbzZP5t7ul7DiloHRWk,1698
@@ -661,7 +663,7 @@ inspect_ai/solver/_plan.py,sha256=lpbjIbBpiPzud7jaHqA81ZFFO0gjt_4EW0blzG4DquA,72
 inspect_ai/solver/_prompt.py,sha256=n2gkRUMSRKViDBL4WtepNoMx7zidIkQgOHLGllP6WVo,4955
 inspect_ai/solver/_run.py,sha256=k-IYoFpyNq8-HTFgQck4Akvs3OtopiL4qRWj8_yLhvY,1763
 inspect_ai/solver/_solver.py,sha256=UJ2CvmJr74n65x4xipZTxNzGfvUyuTHnnRSY0QqNo5I,9563
-inspect_ai/solver/_task_state.py,sha256=Q_hsd9BugMYQ8Q25WaxVKh6xWz7lvGyymdghZdUi_2w,14695
+inspect_ai/solver/_task_state.py,sha256=DMbaDuXOViZlCez5pEv3Y9czIPk61qyfycNjSIgprOI,14704
 inspect_ai/solver/_transcript.py,sha256=kdnkR8243NXlIvcDpZ4nb1XKT7pBYHLk5V26MtwP2EU,1047
 inspect_ai/solver/_use_tools.py,sha256=VmhCjKpkWgifOS20toBcK2bFDmyPqfxkBvcHs_-nv58,2235
 inspect_ai/solver/_util.py,sha256=pthrf-CzC6FnQYSUFLXTYM4wFEJptZrh5POTmV-Jtow,446
@@ -681,7 +683,7 @@ inspect_ai/tool/beta.py,sha256=KQYntN2MLiIHp4Gf4GXv3QO3aYHBBaP-npkluTT-aDM,153
 inspect_ai/tool/_mcp/__init__.py,sha256=vqtlBle1T_jlRQPvLKJbLgW5h_I0Ee33nDBI-rCtIeA,314
 inspect_ai/tool/_mcp/_context.py,sha256=tKQuBZ5ooRvDEW0ffACejdjKi7f8VFfYRn5uaMZGDPw,405
 inspect_ai/tool/_mcp/_mcp.py,sha256=gNTlNTzMRU5L-h4_EGPqosbPLumSdIh3_25ofrGodqs,10599
-inspect_ai/tool/_mcp/_sandbox.py,sha256=eM-B9x3NQfAoa7mw67mPdlLqwMATtvYtP187MJdxw1I,4268
+inspect_ai/tool/_mcp/_sandbox.py,sha256=tW3-kqUrtKlbPEUtyIP2Ywh7FhakCQA9dyeabmLnPuU,4444
 inspect_ai/tool/_mcp/_types.py,sha256=RT9ZRugYR3ArKe54_fuYxeenlWa_os0_DYadVIJEHlM,769
 inspect_ai/tool/_mcp/connection.py,sha256=c1VRVtN90f2KptKCXlQ6fAX2Bxx8HXu3_ZvYmt_35dw,1901
 inspect_ai/tool/_mcp/sampling.py,sha256=YDfrYj6GAec4R3JkQpUc_fPROQUpRARvbUPq7FVKSQ0,4001
@@ -737,9 +739,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
 inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
 inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
 inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
-inspect_ai-0.3.95.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
-inspect_ai-0.3.95.dist-info/METADATA,sha256=_P4GBqO5yJ99jOvUQ1s4Nq3evvpL4TCzBkoxbSBsuwI,5431
-inspect_ai-0.3.95.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
-inspect_ai-0.3.95.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
-inspect_ai-0.3.95.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
-inspect_ai-0.3.95.dist-info/RECORD,,
+inspect_ai-0.3.96.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
+inspect_ai-0.3.96.dist-info/METADATA,sha256=GHfPnN-m8cQUeqysFWpqn5z0tfuMt7JCeTNN1tYJYOA,5438
+inspect_ai-0.3.96.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
+inspect_ai-0.3.96.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
+inspect_ai-0.3.96.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
+inspect_ai-0.3.96.dist-info/RECORD,,

{inspect_ai-0.3.95.dist-info → inspect_ai-0.3.96.dist-info}/WHEEL RENAMED Viewed

File without changes

{inspect_ai-0.3.95.dist-info → inspect_ai-0.3.96.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{inspect_ai-0.3.95.dist-info → inspect_ai-0.3.96.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{inspect_ai-0.3.95.dist-info → inspect_ai-0.3.96.dist-info}/top_level.txt RENAMED Viewed

File without changes

inspect-ai 0.3.95__py3-none-any.whl → 0.3.96__py3-none-any.whl

inspect-ai 0.3.95py3-none-any.whl → 0.3.96py3-none-any.whl