inspect-ai 0.3.95__py3-none-any.whl → 0.3.96__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  import os
4
+ import platform
4
5
  import random
5
6
  import socket
6
7
  import subprocess
@@ -33,6 +34,21 @@ def reserve_port(
33
34
  Returns:
34
35
  A tuple (port, lock_socket) where `lock_socket` is kept open to hold the lock.
35
36
  """
37
+ is_macos = platform.system() == "Darwin"
38
+
39
+ if is_macos:
40
+ logger.info(
41
+ "MacOS system detected. A free binding port will be identified, but not reserved until the server binds to it."
42
+ )
43
+ # On macOS, let the OS pick a free port but not open it
44
+ # It leads to a small racode condition window until the port
45
+ # is actually opened by the llm server
46
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
47
+ s.bind((host, 0)) # Bind to any free port
48
+ port = s.getsockname()[1]
49
+ return port, s
50
+
51
+ # Non-macOS behavior: try ports in range
36
52
  candidates = list(range(start, end))
37
53
  random.shuffle(candidates)
38
54
 
@@ -43,7 +43,7 @@ class AgentPrompt(NamedTuple):
43
43
 
44
44
  DEFAULT_CONTINUE_PROMPT = """
45
45
  Please proceed to the next step using your best judgement. If you believe you
46
- have completed the task, please call the `{submit}()` tool.
46
+ have completed the task, please call the `{submit}()` tool with your final answer.
47
47
  """
48
48
 
49
49
 
@@ -15,7 +15,13 @@ from ._dataframe.evals.columns import (
15
15
  EvalTask,
16
16
  )
17
17
  from ._dataframe.evals.table import evals_df
18
- from ._dataframe.events.columns import EventColumn
18
+ from ._dataframe.events.columns import (
19
+ EventColumn,
20
+ EventInfo,
21
+ EventTiming,
22
+ ModelEventColumns,
23
+ ToolEventColumns,
24
+ )
19
25
  from ._dataframe.events.table import events_df
20
26
  from ._dataframe.messages.columns import (
21
27
  MessageColumn,
@@ -50,6 +56,10 @@ __all__ = [
50
56
  "MessageFilter",
51
57
  "events_df",
52
58
  "EventColumn",
59
+ "EventInfo",
60
+ "EventTiming",
61
+ "ModelEventColumns",
62
+ "ToolEventColumns",
53
63
  "Column",
54
64
  "ColumnType",
55
65
  "ColumnError",
@@ -1,10 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Literal, overload
3
+ from typing import TYPE_CHECKING, Callable, Literal, overload
4
4
 
5
- from inspect_ai._display import display
6
5
  from inspect_ai._util.path import pretty_path
6
+ from inspect_ai.analysis.beta._dataframe.progress import import_progress
7
7
  from inspect_ai.log._file import (
8
+ list_eval_logs,
8
9
  read_eval_log,
9
10
  )
10
11
 
@@ -29,39 +30,32 @@ EVAL_SUFFIX = "_eval"
29
30
 
30
31
  @overload
31
32
  def evals_df(
32
- logs: LogPaths,
33
+ logs: LogPaths = list_eval_logs(),
33
34
  columns: list[Column] = EvalColumns,
34
- recursive: bool = True,
35
- reverse: bool = False,
36
35
  strict: Literal[True] = True,
37
36
  ) -> "pd.DataFrame": ...
38
37
 
39
38
 
40
39
  @overload
41
40
  def evals_df(
42
- logs: LogPaths,
41
+ logs: LogPaths = list_eval_logs(),
43
42
  columns: list[Column] = EvalColumns,
44
- recursive: bool = True,
45
- reverse: bool = False,
46
43
  strict: Literal[False] = False,
47
44
  ) -> tuple["pd.DataFrame", ColumnErrors]: ...
48
45
 
49
46
 
50
47
  def evals_df(
51
- logs: LogPaths,
48
+ logs: LogPaths = list_eval_logs(),
52
49
  columns: list[Column] = EvalColumns,
53
- recursive: bool = True,
54
- reverse: bool = False,
55
50
  strict: bool = True,
56
51
  ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
57
52
  """Read a dataframe containing evals.
58
53
 
59
54
  Args:
60
55
  logs: One or more paths to log files or log directories.
56
+ Defaults to the contents of the currently active log directory
57
+ (e.g. ./logs or INSPECT_LOG_DIR).
61
58
  columns: Specification for what columns to read from log files.
62
- recursive: Include recursive contents of directories (defaults to `True`)
63
- reverse: Reverse the order of the dataframe (by default, items
64
- are ordered from oldest to newest).
65
59
  strict: Raise import errors immediately. Defaults to `True`.
66
60
  If `False` then a tuple of `DataFrame` and errors is returned.
67
61
 
@@ -73,7 +67,46 @@ def evals_df(
73
67
  verify_prerequisites()
74
68
 
75
69
  # resolve logs
76
- log_paths = resolve_logs(logs, recursive=recursive, reverse=reverse)
70
+ log_paths = resolve_logs(logs)
71
+
72
+ with import_progress("reading logs", total=len(log_paths)) as (p, task_id):
73
+ if strict:
74
+ evals_table, _ = _read_evals_df(
75
+ log_paths, columns, True, lambda: p.update(task_id, advance=1)
76
+ )
77
+ return evals_table
78
+ else:
79
+ evals_table, all_errors, _ = _read_evals_df(
80
+ log_paths, columns, False, lambda: p.update(task_id, advance=1)
81
+ )
82
+ return evals_table, all_errors
83
+
84
+
85
+ @overload
86
+ def _read_evals_df(
87
+ log_paths: list[str],
88
+ columns: list[Column],
89
+ strict: Literal[True],
90
+ progress: Callable[[], None],
91
+ ) -> tuple["pd.DataFrame", int]: ...
92
+
93
+
94
+ @overload
95
+ def _read_evals_df(
96
+ log_paths: list[str],
97
+ columns: list[Column],
98
+ strict: Literal[False],
99
+ progress: Callable[[], None],
100
+ ) -> tuple["pd.DataFrame", ColumnErrors, int]: ...
101
+
102
+
103
+ def _read_evals_df(
104
+ log_paths: list[str],
105
+ columns: list[Column],
106
+ strict: bool,
107
+ progress: Callable[[], None],
108
+ ) -> tuple["pd.DataFrame", int] | tuple["pd.DataFrame", ColumnErrors, int]:
109
+ verify_prerequisites()
77
110
 
78
111
  # resolve duplicate columns
79
112
  columns = resolve_duplicate_columns(columns)
@@ -85,27 +118,31 @@ def evals_df(
85
118
  ensure_eval_id(columns)
86
119
 
87
120
  # read logs
121
+ total_samples = 0
88
122
  records: list[dict[str, ColumnType]] = []
89
- with display().progress(total=len(log_paths)) as p:
90
- for log_path in log_paths:
91
- log = read_eval_log(log_path, header_only=True)
92
- if strict:
93
- record = import_record(log, columns, strict=True)
94
- else:
95
- record, errors = import_record(log, columns, strict=False)
96
- all_errors[pretty_path(log_path)] = errors
97
- records.append(record)
98
-
99
- p.update()
123
+ for log_path in log_paths:
124
+ log = read_eval_log(log_path, header_only=True)
125
+ if strict:
126
+ record = import_record(log, columns, strict=True)
127
+ else:
128
+ record, errors = import_record(log, columns, strict=False)
129
+ all_errors[pretty_path(log_path)] = errors
130
+ records.append(record)
131
+ total_samples += (
132
+ len(log.eval.dataset.sample_ids)
133
+ if log.eval.dataset.sample_ids is not None
134
+ else (log.eval.dataset.samples or 100)
135
+ )
136
+ progress()
100
137
 
101
138
  # return table (+errors if strict=False)
102
139
  evals_table = records_to_pandas(records)
103
140
  evals_table = reorder_evals_df_columns(evals_table, columns)
104
141
 
105
142
  if strict:
106
- return evals_table
143
+ return evals_table, total_samples
107
144
  else:
108
- return evals_table, all_errors
145
+ return evals_table, all_errors, total_samples
109
146
 
110
147
 
111
148
  def ensure_eval_id(columns: list[Column]) -> None:
@@ -1,3 +1,4 @@
1
+ from datetime import datetime
1
2
  from typing import Any, Callable, Mapping, Type
2
3
 
3
4
  from jsonpath_ng import JSONPath # type: ignore
@@ -7,6 +8,12 @@ from typing_extensions import override
7
8
  from inspect_ai.log._transcript import Event
8
9
 
9
10
  from ..columns import Column, ColumnType
11
+ from .extract import (
12
+ completion_as_str,
13
+ model_event_input_as_str,
14
+ tool_choice_as_str,
15
+ tool_view_as_str,
16
+ )
10
17
 
11
18
 
12
19
  class EventColumn(Column):
@@ -35,3 +42,46 @@ class EventColumn(Column):
35
42
  @override
36
43
  def path_schema(self) -> Mapping[str, Any] | None:
37
44
  return None
45
+
46
+
47
+ EventInfo: list[Column] = [
48
+ EventColumn("event", path="event"),
49
+ EventColumn("span_id", path="span_id"),
50
+ ]
51
+ """Event basic information columns."""
52
+
53
+ EventTiming: list[Column] = [
54
+ EventColumn("timestamp", path="timestamp", type=datetime),
55
+ EventColumn("completed", path="completed", type=datetime),
56
+ EventColumn("working_start", path="working_start"),
57
+ EventColumn("working_time", path="working_time"),
58
+ ]
59
+ """Event timing columns."""
60
+
61
+ ModelEventColumns: list[Column] = [
62
+ EventColumn("model_event_model", path="model"),
63
+ EventColumn("model_event_role", path="role"),
64
+ EventColumn("model_event_input", path=model_event_input_as_str),
65
+ EventColumn("model_event_tools", path="tools"),
66
+ EventColumn("model_event_tool_choice", path=tool_choice_as_str),
67
+ EventColumn("model_event_config", path="config"),
68
+ EventColumn("model_event_usage", path="output.usage"),
69
+ EventColumn("model_event_time", path="output.time"),
70
+ EventColumn("model_event_completion", path=completion_as_str),
71
+ EventColumn("model_event_retries", path="retries"),
72
+ EventColumn("model_event_error", path="error"),
73
+ EventColumn("model_event_cache", path="cache"),
74
+ EventColumn("model_event_call", path="call"),
75
+ ]
76
+ """Model event columns."""
77
+
78
+ ToolEventColumns: list[Column] = [
79
+ EventColumn("tool_event_function", path="function"),
80
+ EventColumn("tool_event_arguments", path="arguments"),
81
+ EventColumn("tool_event_view", path=tool_view_as_str),
82
+ EventColumn("tool_event_result", path="result"),
83
+ EventColumn("tool_event_truncated", path="truncated"),
84
+ EventColumn("tool_event_error_type", path="error.type"),
85
+ EventColumn("tool_event_error_message", path="error.message"),
86
+ ]
87
+ """Tool event columns."""
@@ -0,0 +1,26 @@
1
+ from inspect_ai.log._transcript import ModelEvent, ToolEvent
2
+
3
+ from ..extract import messages_as_str
4
+
5
+
6
+ def model_event_input_as_str(event: ModelEvent) -> str:
7
+ return messages_as_str(event.input)
8
+
9
+
10
+ def tool_choice_as_str(event: ModelEvent) -> str:
11
+ if isinstance(event.tool_choice, str):
12
+ return event.tool_choice
13
+ else:
14
+ return event.tool_choice.name
15
+
16
+
17
+ def completion_as_str(event: ModelEvent) -> str:
18
+ return event.output.completion
19
+
20
+
21
+ def tool_view_as_str(event: ToolEvent) -> str | None:
22
+ if event.view is not None:
23
+ title = f"{event.view.title}\n\n" if event.view.title is not None else ""
24
+ return f"{title}{event.view.content}"
25
+ else:
26
+ return None
@@ -1,14 +1,100 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING
3
+ from typing import TYPE_CHECKING, Callable, Literal, TypeAlias
4
+
5
+ from inspect_ai.analysis.beta._dataframe.events.columns import EventInfo
6
+ from inspect_ai.log._file import list_eval_logs
7
+ from inspect_ai.log._transcript import Event
4
8
 
5
9
  if TYPE_CHECKING:
6
10
  import pandas as pd
7
11
 
12
+ from typing_extensions import overload
13
+
14
+ from ..columns import Column, ColumnErrors
15
+ from ..samples.table import EventsDetail, _read_samples_df
8
16
  from ..util import LogPaths, verify_prerequisites
9
17
 
18
+ EventFilter: TypeAlias = (
19
+ list[
20
+ Literal[
21
+ "sample_init",
22
+ "sample_limit",
23
+ "sandbox",
24
+ "state",
25
+ "store",
26
+ "model",
27
+ "tool",
28
+ "sandbox",
29
+ "approval",
30
+ "input",
31
+ "score",
32
+ "error",
33
+ "logger",
34
+ "info",
35
+ "span_begin",
36
+ "span_end",
37
+ "subtask",
38
+ ]
39
+ ]
40
+ | Callable[[Event], bool]
41
+ )
42
+ """Filter for `events_df()` rows."""
43
+
44
+
45
+ @overload
46
+ def events_df(
47
+ logs: LogPaths = list_eval_logs(),
48
+ columns: list[Column] = EventInfo,
49
+ filter: EventFilter | None = None,
50
+ strict: Literal[True] = True,
51
+ ) -> "pd.DataFrame": ...
52
+
10
53
 
11
- def events_df(logs: LogPaths, recursive: bool = True) -> "pd.DataFrame":
54
+ @overload
55
+ def events_df(
56
+ logs: LogPaths = list_eval_logs(),
57
+ columns: list[Column] = EventInfo,
58
+ filter: EventFilter | None = None,
59
+ strict: Literal[False] = False,
60
+ ) -> tuple["pd.DataFrame", ColumnErrors]: ...
61
+
62
+
63
+ def events_df(
64
+ logs: LogPaths = list_eval_logs(),
65
+ columns: list[Column] = EventInfo,
66
+ filter: EventFilter | None = None,
67
+ strict: bool = True,
68
+ ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
69
+ """Read a dataframe containing events from a set of evals.
70
+
71
+ Args:
72
+ logs: One or more paths to log files or log directories.
73
+ Defaults to the contents of the currently active log directory
74
+ (e.g. ./logs or INSPECT_LOG_DIR).
75
+ columns: Specification for what columns to read from log files.
76
+ filter: List of event types to include or callable that performs the filter.
77
+ strict: Raise import errors immediately. Defaults to `True`.
78
+ If `False` then a tuple of `DataFrame` and errors is returned.
79
+
80
+ Returns:
81
+ For `strict`, a Pandas `DataFrame` with information for the specified logs.
82
+ For `strict=False`, a tuple of Pandas `DataFrame` and a dictionary of errors
83
+ encountered (by log file) during import.
84
+ """
12
85
  verify_prerequisites()
13
86
 
14
- raise NotImplementedError("events_df has not been implemented yet.")
87
+ # resolve filter/detail
88
+ if filter is None:
89
+ detail = EventsDetail(filter=lambda e: True)
90
+ elif callable(filter):
91
+ detail = EventsDetail(filter=filter)
92
+ else:
93
+ detail = EventsDetail(filter=lambda e: e.event in filter)
94
+
95
+ return _read_samples_df(
96
+ logs=logs,
97
+ columns=columns,
98
+ strict=strict,
99
+ detail=detail,
100
+ )
@@ -5,11 +5,16 @@ from typing import Any, cast
5
5
  import shortuuid
6
6
  from pydantic import BaseModel, JsonValue
7
7
 
8
- from inspect_ai._util.json import jsonable_python
8
+ from inspect_ai.model._chat_message import (
9
+ ChatMessage,
10
+ ChatMessageAssistant,
11
+ ChatMessageTool,
12
+ ChatMessageUser,
13
+ )
9
14
 
10
15
 
11
16
  def model_to_record(model: BaseModel) -> dict[str, JsonValue]:
12
- return cast(dict[str, JsonValue], jsonable_python(model))
17
+ return cast(dict[str, JsonValue], model.model_dump(mode="json", exclude_none=True))
13
18
 
14
19
 
15
20
  def list_as_str(x: JsonValue) -> str:
@@ -21,34 +26,48 @@ def score_values(x: JsonValue) -> dict[str, JsonValue]:
21
26
  return {k: v["value"] for k, v in scores.items()}
22
27
 
23
28
 
24
- def input_as_str(x: JsonValue) -> str:
25
- if isinstance(x, str):
26
- return x
27
- else:
28
- return messages_as_str(x)
29
+ def auto_id(base: str, index: str) -> str:
30
+ seed = f"{base}_{index}"
31
+ hash_bytes = hashlib.md5(seed.encode("utf-8")).digest()
32
+ long_uuid = uuid.UUID(bytes=hash_bytes)
33
+ return shortuuid.encode(long_uuid)
29
34
 
30
35
 
31
- def messages_as_str(x: JsonValue) -> str:
32
- if isinstance(x, list):
33
- messages = cast(list[dict[str, Any]], x)
34
- return "\n\n".join([message_as_str(message) for message in messages])
35
- else:
36
- raise ValueError(f"Unexpected type for messages: {type(x)}")
36
+ def messages_as_str(messages: str | list[ChatMessage]) -> str:
37
+ if isinstance(messages, str):
38
+ messages = [ChatMessageUser(content=messages)]
39
+ return "\n\n".join([message_as_str(message) for message in messages])
37
40
 
38
41
 
39
- def message_as_str(message: dict[str, Any]) -> str:
40
- return f"{message['role']}:\n{content_as_str(message['content'])}"
42
+ def message_as_str(message: ChatMessage) -> str:
43
+ transcript: list[str] = []
44
+ role = message.role
45
+ content = message.text.strip() if message.text else ""
41
46
 
47
+ # assistant messages with tool calls
48
+ if isinstance(message, ChatMessageAssistant) and message.tool_calls is not None:
49
+ entry = f"{role}:\n{content}\n"
42
50
 
43
- def content_as_str(content: str | list[dict[str, Any]]) -> str:
44
- if isinstance(content, str):
45
- return content
46
- else:
47
- return "\n".join([c["text"] if c["type"] == "text" else "" for c in content])
51
+ for tool in message.tool_calls:
52
+ func_name = tool.function
53
+ args = tool.arguments
48
54
 
55
+ if isinstance(args, dict):
56
+ args_text = "\n".join(f"{k}: {v}" for k, v in args.items())
57
+ entry += f"\nTool Call: {func_name}\nArguments:\n{args_text}"
58
+ else:
59
+ entry += f"\nTool Call: {func_name}\nArguments: {args}"
49
60
 
50
- def auto_id(base: str, index: str) -> str:
51
- seed = f"{base}_{index}"
52
- hash_bytes = hashlib.md5(seed.encode("utf-8")).digest()
53
- long_uuid = uuid.UUID(bytes=hash_bytes)
54
- return shortuuid.encode(long_uuid)
61
+ transcript.append(entry)
62
+
63
+ # tool responses with errors
64
+ elif isinstance(message, ChatMessageTool) and message.error is not None:
65
+ func_name = message.function or "unknown"
66
+ entry = f"{role}:\n{content}\n\nError in tool call '{func_name}':\n{message.error.message}\n"
67
+ transcript.append(entry)
68
+
69
+ # normal messages
70
+ else:
71
+ transcript.append(f"{role}:\n{content}\n")
72
+
73
+ return "\n".join(transcript)
@@ -43,8 +43,8 @@ class MessageColumn(Column):
43
43
 
44
44
  MessageContent: list[Column] = [
45
45
  MessageColumn("role", path="role", required=True),
46
- MessageColumn("content", path=message_text),
47
46
  MessageColumn("source", path="source"),
47
+ MessageColumn("content", path=message_text),
48
48
  ]
49
49
  """Message content columns."""
50
50
 
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from typing import TYPE_CHECKING, Callable, Literal, TypeAlias
4
4
 
5
+ from inspect_ai.log._file import list_eval_logs
5
6
  from inspect_ai.model._chat_message import ChatMessage
6
7
 
7
8
  if TYPE_CHECKING:
@@ -22,43 +23,36 @@ MessageFilter: TypeAlias = (
22
23
 
23
24
  @overload
24
25
  def messages_df(
25
- logs: LogPaths,
26
+ logs: LogPaths = list_eval_logs(),
26
27
  columns: list[Column] = MessageColumns,
27
28
  filter: MessageFilter | None = None,
28
- recursive: bool = True,
29
- reverse: bool = False,
30
29
  strict: Literal[True] = True,
31
30
  ) -> "pd.DataFrame": ...
32
31
 
33
32
 
34
33
  @overload
35
34
  def messages_df(
36
- logs: LogPaths,
35
+ logs: LogPaths = list_eval_logs(),
37
36
  columns: list[Column] = MessageColumns,
38
37
  filter: MessageFilter | None = None,
39
- recursive: bool = True,
40
- reverse: bool = False,
41
38
  strict: Literal[False] = False,
42
39
  ) -> tuple["pd.DataFrame", ColumnErrors]: ...
43
40
 
44
41
 
45
42
  def messages_df(
46
- logs: LogPaths,
43
+ logs: LogPaths = list_eval_logs(),
47
44
  columns: list[Column] = MessageColumns,
48
45
  filter: MessageFilter | None = None,
49
- recursive: bool = True,
50
- reverse: bool = False,
51
46
  strict: bool = True,
52
47
  ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
53
48
  """Read a dataframe containing messages from a set of evals.
54
49
 
55
50
  Args:
56
51
  logs: One or more paths to log files or log directories.
52
+ Defaults to the contents of the currently active log directory
53
+ (e.g. ./logs or INSPECT_LOG_DIR).
57
54
  columns: Specification for what columns to read from log files.
58
55
  filter: List of message role types to include or callable that performs the filter.
59
- recursive: Include recursive contents of directories (defaults to `True`)
60
- reverse: Reverse the order of the dataframe (by default, items
61
- are ordered from oldest to newest).
62
56
  strict: Raise import errors immediately. Defaults to `True`.
63
57
  If `False` then a tuple of `DataFrame` and errors is returned.
64
58
 
@@ -80,8 +74,6 @@ def messages_df(
80
74
  return _read_samples_df(
81
75
  logs=logs,
82
76
  columns=columns,
83
- recursive=recursive,
84
- reverse=reverse,
85
77
  strict=strict,
86
78
  detail=detail,
87
79
  )
@@ -0,0 +1,26 @@
1
+ from contextlib import contextmanager
2
+ from typing import Iterator
3
+
4
+ from rich.progress import (
5
+ BarColumn,
6
+ Progress,
7
+ TaskID,
8
+ TaskProgressColumn,
9
+ TextColumn,
10
+ TimeElapsedColumn,
11
+ )
12
+
13
+
14
+ @contextmanager
15
+ def import_progress(
16
+ description: str, total: float | None
17
+ ) -> Iterator[tuple[Progress, TaskID]]:
18
+ with Progress(
19
+ TextColumn("[progress.description]{task.description:<18}"),
20
+ BarColumn(),
21
+ TaskProgressColumn(),
22
+ TimeElapsedColumn(),
23
+ transient=True,
24
+ ) as progress:
25
+ task_id = progress.add_task(description, total=total)
26
+ yield progress, task_id
@@ -7,9 +7,13 @@ from typing_extensions import override
7
7
  from inspect_ai.log._log import EvalSample, EvalSampleSummary
8
8
 
9
9
  from ..columns import Column, ColumnType
10
- from ..extract import input_as_str, list_as_str, score_values
10
+ from ..extract import list_as_str, score_values
11
11
  from ..validate import resolved_schema
12
- from .extract import sample_messages_as_str, sample_path_requires_full
12
+ from .extract import (
13
+ sample_input_as_str,
14
+ sample_messages_as_str,
15
+ sample_path_requires_full,
16
+ )
13
17
 
14
18
 
15
19
  class SampleColumn(Column):
@@ -54,7 +58,7 @@ class SampleColumn(Column):
54
58
  SampleSummary: list[Column] = [
55
59
  SampleColumn("id", path="id", required=True, type=str),
56
60
  SampleColumn("epoch", path="epoch", required=True),
57
- SampleColumn("input", path="input", required=True, value=input_as_str),
61
+ SampleColumn("input", path=sample_input_as_str, required=True),
58
62
  SampleColumn("target", path="target", required=True, value=list_as_str),
59
63
  SampleColumn("metadata_*", path="metadata"),
60
64
  SampleColumn("score_*", path="scores", value=score_values),
@@ -3,45 +3,17 @@ from typing import Callable
3
3
  from jsonpath_ng import JSONPath # type: ignore
4
4
  from pydantic import JsonValue
5
5
 
6
- from inspect_ai.analysis.beta._dataframe.extract import auto_id
7
6
  from inspect_ai.log._log import EvalSample, EvalSampleSummary
8
- from inspect_ai.model._chat_message import ChatMessageAssistant, ChatMessageTool
9
7
 
8
+ from ..extract import auto_id, messages_as_str
10
9
 
11
- def sample_messages_as_str(sample: EvalSample) -> str:
12
- # format each message for the transcript
13
- transcript: list[str] = []
14
- for msg in sample.messages:
15
- role = msg.role
16
- content = msg.text.strip() if msg.text else ""
17
-
18
- # assistant messages with tool calls
19
- if isinstance(msg, ChatMessageAssistant) and msg.tool_calls is not None:
20
- entry = f"{role}:\n{content}\n"
21
-
22
- for tool in msg.tool_calls:
23
- func_name = tool.function
24
- args = tool.arguments
25
10
 
26
- if isinstance(args, dict):
27
- args_text = "\n".join(f"{k}: {v}" for k, v in args.items())
28
- entry += f"\nTool Call: {func_name}\nArguments:\n{args_text}"
29
- else:
30
- entry += f"\nTool Call: {func_name}\nArguments: {args}"
11
+ def sample_input_as_str(sample: EvalSample) -> str:
12
+ return messages_as_str(sample.input)
31
13
 
32
- transcript.append(entry)
33
14
 
34
- # tool responses with errors
35
- elif isinstance(msg, ChatMessageTool) and msg.error is not None:
36
- func_name = msg.function or "unknown"
37
- entry = f"{role}:\n{content}\n\nError in tool call '{func_name}':\n{msg.error.message}\n"
38
- transcript.append(entry)
39
-
40
- # normal messages
41
- else:
42
- transcript.append(f"{role}:\n{content}\n")
43
-
44
- return "\n".join(transcript)
15
+ def sample_messages_as_str(sample: EvalSample) -> str:
16
+ return messages_as_str(sample.messages)
45
17
 
46
18
 
47
19
  def sample_path_requires_full(
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from dataclasses import dataclass
4
+ from functools import lru_cache
4
5
  from typing import (
5
6
  TYPE_CHECKING,
6
7
  Callable,
@@ -9,21 +10,24 @@ from typing import (
9
10
  overload,
10
11
  )
11
12
 
12
- from inspect_ai._display import display
13
+ from inspect_ai._util.hash import mm3_hash
13
14
  from inspect_ai._util.path import pretty_path
14
- from inspect_ai.analysis.beta._dataframe.events.columns import EventColumn
15
- from inspect_ai.analysis.beta._dataframe.messages.columns import MessageColumn
15
+ from inspect_ai.analysis.beta._dataframe.progress import import_progress
16
16
  from inspect_ai.log._file import (
17
+ list_eval_logs,
17
18
  read_eval_log_sample_summaries,
18
19
  read_eval_log_samples,
19
20
  )
20
21
  from inspect_ai.log._log import EvalSample, EvalSampleSummary
21
- from inspect_ai.log._transcript import BaseEvent, Event
22
+ from inspect_ai.log._transcript import Event
22
23
  from inspect_ai.model._chat_message import ChatMessage
23
24
 
24
25
  from ..columns import Column, ColumnErrors, ColumnType
25
26
  from ..evals.columns import EvalColumn
26
- from ..evals.table import EVAL_ID, EVAL_SUFFIX, ensure_eval_id, evals_df
27
+ from ..evals.table import EVAL_ID, EVAL_SUFFIX, _read_evals_df, ensure_eval_id
28
+ from ..events.columns import EventColumn
29
+ from ..extract import message_as_str
30
+ from ..messages.columns import MessageColumn
27
31
  from ..record import import_record, resolve_duplicate_columns
28
32
  from ..util import (
29
33
  LogPaths,
@@ -46,39 +50,32 @@ SAMPLE_SUFFIX = "_sample"
46
50
 
47
51
  @overload
48
52
  def samples_df(
49
- logs: LogPaths,
53
+ logs: LogPaths = list_eval_logs(),
50
54
  columns: list[Column] = SampleSummary,
51
- recursive: bool = True,
52
- reverse: bool = False,
53
55
  strict: Literal[True] = True,
54
56
  ) -> "pd.DataFrame": ...
55
57
 
56
58
 
57
59
  @overload
58
60
  def samples_df(
59
- logs: LogPaths,
61
+ logs: LogPaths = list_eval_logs(),
60
62
  columns: list[Column] = SampleSummary,
61
- recursive: bool = True,
62
- reverse: bool = False,
63
63
  strict: Literal[False] = False,
64
64
  ) -> tuple["pd.DataFrame", ColumnErrors]: ...
65
65
 
66
66
 
67
67
  def samples_df(
68
- logs: LogPaths,
68
+ logs: LogPaths = list_eval_logs(),
69
69
  columns: list[Column] = SampleSummary,
70
- recursive: bool = True,
71
- reverse: bool = False,
72
70
  strict: bool = True,
73
71
  ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
74
72
  """Read a dataframe containing samples from a set of evals.
75
73
 
76
74
  Args:
77
75
  logs: One or more paths to log files or log directories.
76
+ Defaults to the contents of the currently active log directory
77
+ (e.g. ./logs or INSPECT_LOG_DIR).
78
78
  columns: Specification for what columns to read from log files.
79
- recursive: Include recursive contents of directories (defaults to `True`)
80
- reverse: Reverse the order of the dataframe (by default, items
81
- are ordered from oldest to newest).
82
79
  strict: Raise import errors immediately. Defaults to `True`.
83
80
  If `False` then a tuple of `DataFrame` and errors is returned.
84
81
 
@@ -87,9 +84,7 @@ def samples_df(
87
84
  For `strict=False`, a tuple of Pandas `DataFrame` and a dictionary of errors
88
85
  encountered (by log file) during import.
89
86
  """
90
- return _read_samples_df(
91
- logs, columns, recursive=recursive, reverse=reverse, strict=strict
92
- )
87
+ return _read_samples_df(logs, columns, strict=strict)
93
88
 
94
89
 
95
90
  @dataclass
@@ -101,24 +96,22 @@ class MessagesDetail:
101
96
 
102
97
  @dataclass
103
98
  class EventsDetail:
104
- name: str = "message"
99
+ name: str = "event"
105
100
  col_type = EventColumn
106
- filter: Callable[[BaseEvent], bool] = lambda e: True
101
+ filter: Callable[[Event], bool] = lambda e: True
107
102
 
108
103
 
109
104
  def _read_samples_df(
110
105
  logs: LogPaths,
111
106
  columns: list[Column],
112
107
  *,
113
- recursive: bool = True,
114
- reverse: bool = False,
115
108
  strict: bool = True,
116
109
  detail: MessagesDetail | EventsDetail | None = None,
117
110
  ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
118
111
  verify_prerequisites()
119
112
 
120
113
  # resolve logs
121
- logs = resolve_logs(logs, recursive=recursive, reverse=reverse)
114
+ logs = resolve_logs(logs)
122
115
 
123
116
  # split columns by type
124
117
  columns_eval: list[Column] = []
@@ -150,12 +143,31 @@ def _read_samples_df(
150
143
  # make sure eval_id is present
151
144
  ensure_eval_id(columns_eval)
152
145
 
153
- # read samples from each log
154
- sample_records: list[dict[str, ColumnType]] = []
155
- detail_records: list[dict[str, ColumnType]] = []
156
- all_errors = ColumnErrors()
157
- evals_table = evals_df(logs, columns=columns_eval)
158
- with display().progress(total=len(evals_table)) as p:
146
+ # determine how we will allocate progress
147
+ with import_progress("scanning logs", total=len(logs)) as (
148
+ p,
149
+ task_id,
150
+ ):
151
+
152
+ def progress() -> None:
153
+ p.update(task_id, advance=1)
154
+
155
+ # read samples from each log
156
+ sample_records: list[dict[str, ColumnType]] = []
157
+ detail_records: list[dict[str, ColumnType]] = []
158
+ all_errors = ColumnErrors()
159
+
160
+ # read logs and note total samples
161
+ evals_table, total_samples = _read_evals_df(
162
+ logs, columns=columns_eval, strict=True, progress=progress
163
+ )
164
+
165
+ # update progress now that we know the total samples
166
+ entity = detail.name if detail else "sample"
167
+ p.reset(
168
+ task_id, description=f"reading {entity}s", completed=0, total=total_samples
169
+ )
170
+
159
171
  # read samples
160
172
  for eval_id, log in zip(evals_table[EVAL_ID].to_list(), logs):
161
173
  # get a generator for the samples (might require reading the full log
@@ -191,9 +203,9 @@ def _read_samples_df(
191
203
  # filter detail records
192
204
  assert isinstance(sample, EvalSample)
193
205
  if isinstance(detail, MessagesDetail):
194
- detail_items: list[ChatMessage] | list[Event] = [
195
- m for m in sample.messages if detail.filter(m)
196
- ]
206
+ detail_items: list[ChatMessage] | list[Event] = (
207
+ sample_messages_from_events(sample.events, detail.filter)
208
+ )
197
209
  elif isinstance(detail, EventsDetail):
198
210
  detail_items = [e for e in sample.events if detail.filter(e)]
199
211
  else:
@@ -226,7 +238,7 @@ def _read_samples_df(
226
238
 
227
239
  # record sample record
228
240
  sample_records.append(record)
229
- p.update()
241
+ progress()
230
242
 
231
243
  # normalize records and produce samples table
232
244
  samples_table = records_to_pandas(sample_records)
@@ -262,6 +274,35 @@ def _read_samples_df(
262
274
  return samples_table, all_errors
263
275
 
264
276
 
277
+ def sample_messages_from_events(
278
+ events: list[Event], filter: Callable[[ChatMessage], bool]
279
+ ) -> list[ChatMessage]:
280
+ # don't yield the same event twice
281
+ ids: set[str] = set()
282
+
283
+ # we need to look at the full input to every model event and add
284
+ # messages we haven't seen before
285
+ messages: list[ChatMessage] = []
286
+ for event in events:
287
+ if event.event == "model":
288
+ event_messages = event.input + (
289
+ [event.output.message] if not event.output.empty else []
290
+ )
291
+ for message in event_messages:
292
+ id = message.id or message_hash(message_as_str(message))
293
+ if id not in ids:
294
+ messages.append(message)
295
+ ids.add(id)
296
+
297
+ # then apply the filter
298
+ return [message for message in messages if filter(message)]
299
+
300
+
301
+ @lru_cache(maxsize=100)
302
+ def message_hash(message: str) -> str:
303
+ return mm3_hash(message)
304
+
305
+
265
306
  def reorder_samples_df_columns(
266
307
  df: "pd.DataFrame",
267
308
  eval_columns: list[Column],
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Sequence, TypeAlias
9
9
  from inspect_ai._util.error import pip_dependency_error
10
10
  from inspect_ai._util.file import FileInfo, filesystem
11
11
  from inspect_ai._util.version import verify_required_version
12
- from inspect_ai.log._file import log_files_from_ls
12
+ from inspect_ai.log._file import EvalLogInfo, log_files_from_ls
13
13
 
14
14
  if TYPE_CHECKING:
15
15
  import pandas as pd
@@ -17,7 +17,9 @@ if TYPE_CHECKING:
17
17
 
18
18
  from .columns import ColumnType
19
19
 
20
- LogPaths: TypeAlias = PathLike[str] | str | Sequence[PathLike[str] | str]
20
+ LogPaths: TypeAlias = (
21
+ PathLike[str] | str | EvalLogInfo | Sequence[PathLike[str] | str | EvalLogInfo]
22
+ )
21
23
 
22
24
 
23
25
  def verify_prerequisites() -> None:
@@ -41,30 +43,31 @@ def verify_prerequisites() -> None:
41
43
  verify_required_version("inspect_ai.analysis", "pyarrow", "10.0.1")
42
44
 
43
45
 
44
- def resolve_logs(logs: LogPaths, recursive: bool, reverse: bool) -> list[str]:
46
+ def resolve_logs(logs: LogPaths) -> list[str]:
45
47
  # normalize to list of str
46
- logs = [logs] if isinstance(logs, str | PathLike) else logs
47
- logs = [Path(log).as_posix() if isinstance(log, PathLike) else log for log in logs]
48
+ logs = [logs] if isinstance(logs, str | PathLike | EvalLogInfo) else logs
49
+ logs_str = [
50
+ Path(log).as_posix()
51
+ if isinstance(log, PathLike)
52
+ else log.name
53
+ if isinstance(log, EvalLogInfo)
54
+ else log
55
+ for log in logs
56
+ ]
48
57
 
49
58
  # expand directories
50
59
  log_paths: list[FileInfo] = []
51
- for log in logs:
52
- if isinstance(log, PathLike):
53
- log = Path(log).as_posix()
54
- fs = filesystem(log)
55
- info = fs.info(log)
60
+ for log_str in logs_str:
61
+ fs = filesystem(log_str)
62
+ info = fs.info(log_str)
56
63
  if info.type == "directory":
57
64
  log_paths.extend(
58
- [
59
- fi
60
- for fi in fs.ls(info.name, recursive=recursive)
61
- if fi.type == "file"
62
- ]
65
+ [fi for fi in fs.ls(info.name, recursive=True) if fi.type == "file"]
63
66
  )
64
67
  else:
65
68
  log_paths.append(info)
66
69
 
67
- log_files = log_files_from_ls(log_paths, descending=reverse)
70
+ log_files = log_files_from_ls(log_paths, sort=False)
68
71
  return [log_file.name for log_file in log_files]
69
72
 
70
73
 
inspect_ai/log/_file.py CHANGED
@@ -526,12 +526,19 @@ def log_files_from_ls(
526
526
  ls: list[FileInfo],
527
527
  formats: list[Literal["eval", "json"]] | None = None,
528
528
  descending: bool = True,
529
+ sort: bool = True,
529
530
  ) -> list[EvalLogInfo]:
530
531
  extensions = [f".{format}" for format in (formats or ALL_LOG_FORMATS)]
531
532
  return [
532
533
  log_file_info(file)
533
- for file in sorted(
534
- ls, key=lambda file: (file.mtime if file.mtime else 0), reverse=descending
534
+ for file in (
535
+ sorted(
536
+ ls,
537
+ key=lambda file: (file.mtime if file.mtime else 0),
538
+ reverse=descending,
539
+ )
540
+ if sort
541
+ else ls
535
542
  )
536
543
  if file.type == "file" and is_log_file(file.name, extensions)
537
544
  ]
@@ -138,7 +138,7 @@ class TaskState:
138
138
  The `TaskState` represents the internal state of the `Task` being run for a single `Sample`.
139
139
 
140
140
  The `TaskState` is passed to and returned from each solver during a sample's
141
- evaluation. It allows us to manipulated the message history, the tools
141
+ evaluation. It allows us to maintain the manipulated message history, the tools
142
142
  available to the model, the final output of the model, and whether the task
143
143
  is completed or has hit a limit.
144
144
  """
@@ -5,6 +5,7 @@ from typing import TextIO
5
5
  import anyio
6
6
  from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
7
7
  from mcp import JSONRPCRequest, StdioServerParameters
8
+ from mcp.shared.message import SessionMessage
8
9
  from mcp.types import JSONRPCMessage, JSONRPCNotification
9
10
 
10
11
  from inspect_ai.tool._tool_support_helpers import (
@@ -36,12 +37,12 @@ async def sandbox_client( # type: ignore
36
37
  )
37
38
 
38
39
  # read_stream is remote process's stdout
39
- read_stream: MemoryObjectReceiveStream[JSONRPCMessage | Exception]
40
- read_stream_writer: MemoryObjectSendStream[JSONRPCMessage | Exception]
40
+ read_stream: MemoryObjectReceiveStream[SessionMessage | Exception]
41
+ read_stream_writer: MemoryObjectSendStream[SessionMessage | Exception]
41
42
 
42
43
  # write_stream is remote process's stdin
43
- write_stream: MemoryObjectSendStream[JSONRPCMessage]
44
- write_stream_reader: MemoryObjectReceiveStream[JSONRPCMessage]
44
+ write_stream: MemoryObjectSendStream[SessionMessage]
45
+ write_stream_reader: MemoryObjectReceiveStream[SessionMessage]
45
46
 
46
47
  read_stream_writer, read_stream = anyio.create_memory_object_stream(0)
47
48
  write_stream, write_stream_reader = anyio.create_memory_object_stream(0)
@@ -64,18 +65,20 @@ async def sandbox_client( # type: ignore
64
65
  async with write_stream_reader:
65
66
  # This reads messages until the stream is closed
66
67
  async for message in write_stream_reader:
67
- root = message.root
68
+ root = message.message.root
68
69
  if isinstance(root, JSONRPCRequest):
69
70
  await read_stream_writer.send(
70
- await exec_model_request(
71
- sandbox=sandbox_environment,
72
- method="mcp_send_request",
73
- params={
74
- "session_id": session_id,
75
- "request": root.model_dump(),
76
- },
77
- result_type=JSONRPCMessage,
78
- timeout=timeout,
71
+ SessionMessage(
72
+ message=await exec_model_request(
73
+ sandbox=sandbox_environment,
74
+ method="mcp_send_request",
75
+ params={
76
+ "session_id": session_id,
77
+ "request": root.model_dump(),
78
+ },
79
+ result_type=JSONRPCMessage,
80
+ timeout=timeout,
81
+ )
79
82
  )
80
83
  )
81
84
  elif isinstance(root, JSONRPCNotification):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.95
3
+ Version: 0.3.96
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License
@@ -23,7 +23,7 @@ License-File: LICENSE
23
23
  Requires-Dist: aiohttp>=3.9.0
24
24
  Requires-Dist: anyio>=4.8.0
25
25
  Requires-Dist: beautifulsoup4
26
- Requires-Dist: click>=8.1.3
26
+ Requires-Dist: click<8.2.0,>=8.1.3
27
27
  Requires-Dist: debugpy
28
28
  Requires-Dist: docstring-parser>=0.16
29
29
  Requires-Dist: exceptiongroup>=1.0.2; python_version < "3.11"
@@ -98,7 +98,7 @@ inspect_ai/_util/interrupt.py,sha256=T30e5YaKSNmnO695p0lK0dquUWFq6dNNtdAFPmWGwME
98
98
  inspect_ai/_util/json.py,sha256=LiHF4XPrcuCBpnBKYCIX2AkvmsYuPieQ6HNdSlUMVvU,3653
99
99
  inspect_ai/_util/kvstore.py,sha256=z2IXLWP4QqqGqsq5_MbYjBQPcEJqfWK4IyZXgV-kppA,2398
100
100
  inspect_ai/_util/list.py,sha256=6_5r5jI5RKK34kCmIqqVQ5hYG-G8v0F5H7L-DmQQ2E4,279
101
- inspect_ai/_util/local_server.py,sha256=gtDaxmpeKjiIIFUo9tSEx5Avc8fCl4D_b5lH-TY3xUc,13142
101
+ inspect_ai/_util/local_server.py,sha256=T54l-csb2qmQDvZ7zNYVq6_j0BuW5FZSBKT9GfXNc6w,13787
102
102
  inspect_ai/_util/logger.py,sha256=XpGyoe8V7FIhNU1rnjTjwR07LVbshA9rRZn33sOitig,6230
103
103
  inspect_ai/_util/notebook.py,sha256=Mgz3J4uBh-MqVBRmpiJqDHRpn2hd7HIOBeJBwLG-bbk,2998
104
104
  inspect_ai/_util/notgiven.py,sha256=zkn6AYflKLf8YlnwTAMxPLQ-4LyIVmKpGcNcXf-Ssng,457
@@ -485,7 +485,7 @@ inspect_ai/agent/_filter.py,sha256=qnT0HbT4edpDi0MwXY3Q3It2pzNRkTRXZDOqfCwMY6M,1
485
485
  inspect_ai/agent/_handoff.py,sha256=NY29zJWxZyB9YtIi9TtD7ydvULEY-Q8wfdedMDD1bjA,3729
486
486
  inspect_ai/agent/_react.py,sha256=oTHY-ZMXkCNMBwn161G_Ov-svgKqAfzOp7FryJg9imE,14078
487
487
  inspect_ai/agent/_run.py,sha256=9KAfguMPn9czothbFk_ng5xRtvIWeOjNvHuvERWENMU,1875
488
- inspect_ai/agent/_types.py,sha256=HoTuocY9qFU2cwmNujC5-4N1ACbBmwhldwALpMB2QhE,4204
488
+ inspect_ai/agent/_types.py,sha256=FALCBDziC2CrEy18wBzBbIcQlZs5bCPilyqQ4RXizDc,4227
489
489
  inspect_ai/agent/_bridge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
490
490
  inspect_ai/agent/_bridge/bridge.py,sha256=Qk1z54vSZvFZMmFMOvopwY6rhFxHmJwOipZ_yVsbryU,3465
491
491
  inspect_ai/agent/_bridge/patch.py,sha256=IFpgL7WImh5RnAz7fAr574krVqa_Gm9A_eZ7leW983s,7061
@@ -505,28 +505,30 @@ inspect_ai/agent/_human/commands/score.py,sha256=6DyKiYHU7w-tKxHH5cZ0rXgFY7NWc4k
505
505
  inspect_ai/agent/_human/commands/status.py,sha256=uUO5M4skWDp29OS8sqVKAqZw0OcM3MSesBYQNbRypJ0,1934
506
506
  inspect_ai/agent/_human/commands/submit.py,sha256=D2p1M2ApvAcaVZhbP3fFofG9ZsPVvmxivSLIF5xQxtA,6524
507
507
  inspect_ai/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
508
- inspect_ai/analysis/beta/__init__.py,sha256=mANqMDNdVJSwp3tXB_S7kTTpHYMFSWSRILU2c1_kfuI,1242
508
+ inspect_ai/analysis/beta/__init__.py,sha256=iz72c_fRBhtXmfBUPH_cGnnFpH-SD9DEULTb0-pNY-8,1413
509
509
  inspect_ai/analysis/beta/_dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
510
510
  inspect_ai/analysis/beta/_dataframe/columns.py,sha256=feUqCpm9kxieoKPwXT8EwF8DTcwxG4JCCjCGO5XNcJc,4454
511
- inspect_ai/analysis/beta/_dataframe/extract.py,sha256=ZhLMcoqE1j722wYosfdaHL-gLiBpjtQV1sZ_buA7n3Y,1525
511
+ inspect_ai/analysis/beta/_dataframe/extract.py,sha256=MnRUwtJ0ATS-94qi8nzqZ5hdV2ZQ5rw_kBZ_FCxxdLg,2288
512
+ inspect_ai/analysis/beta/_dataframe/progress.py,sha256=YUUi8U-4BIklDmPbuVCeIQ6DkpQMC0tJHrGrZdOLIno,626
512
513
  inspect_ai/analysis/beta/_dataframe/record.py,sha256=rT3k9LuMvogw2kbFoCIHhNYb_p8QqER_FY2J9W0f1kY,12690
513
- inspect_ai/analysis/beta/_dataframe/util.py,sha256=qa6WHBPbleryuCtVHcoELNGzQb3VtOTMpA3E6RKCfYU,4981
514
+ inspect_ai/analysis/beta/_dataframe/util.py,sha256=OGfBa2P3i3a1PQQP7Q5Y-uaUms1gYuaE83kvnfhrYXA,4964
514
515
  inspect_ai/analysis/beta/_dataframe/validate.py,sha256=_UBn_fosgppF3Y5wCCtF8-cnCVM61XdOK6Lm91jMgH0,6213
515
516
  inspect_ai/analysis/beta/_dataframe/evals/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
516
517
  inspect_ai/analysis/beta/_dataframe/evals/columns.py,sha256=ZMR1AByGmHWGmn3qoWefF7pDNnL4mMMlzDlwkUECm5I,4725
517
518
  inspect_ai/analysis/beta/_dataframe/evals/extract.py,sha256=XUHFWveTcAFWYTPFgsOIKB9jZT0o4v_7ElVZGJ-SAf8,586
518
- inspect_ai/analysis/beta/_dataframe/evals/table.py,sha256=nPO4fj2BEcGR41ESZIps5n4ZjXn8wCCwbYWSd4zBV-M,4005
519
+ inspect_ai/analysis/beta/_dataframe/evals/table.py,sha256=oxSJg762WPIjTln5P04nC_h-KDmAEblROyMUgePPqak,5077
519
520
  inspect_ai/analysis/beta/_dataframe/events/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
520
- inspect_ai/analysis/beta/_dataframe/events/columns.py,sha256=MipnAkZxhkajZhxLtLR7t8EY8maDI0s9iZ5hkusPBHc,1022
521
- inspect_ai/analysis/beta/_dataframe/events/table.py,sha256=j1HFEld4I5d5wxh0RfJUFB4nj1YMp5xF65pE0vWADJ0,339
521
+ inspect_ai/analysis/beta/_dataframe/events/columns.py,sha256=VH6U0zXiBEK_4dXskh1OhksYjAY7KvpZWMTv9w0bBbA,2912
522
+ inspect_ai/analysis/beta/_dataframe/events/extract.py,sha256=XxCMslBjzbI_q74bG47w5f9ncBzqJxMXSrCBJ3g23NE,705
523
+ inspect_ai/analysis/beta/_dataframe/events/table.py,sha256=KDZbhCgwevfwAHdSGIhUIvzBKqJWFzRe6OalxJpDRE8,2869
522
524
  inspect_ai/analysis/beta/_dataframe/messages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
523
- inspect_ai/analysis/beta/_dataframe/messages/columns.py,sha256=_WpVP8DdMz827zhGe50_A_z59SX333O54RzXqdBH87Y,1728
525
+ inspect_ai/analysis/beta/_dataframe/messages/columns.py,sha256=T8dbyGsg6ut7G0xbnfxVAaJK43EmfvDnCbIhIvrmbB8,1728
524
526
  inspect_ai/analysis/beta/_dataframe/messages/extract.py,sha256=B7st9zoXSIj_sXm9-h_fLaRtb3ybIgXcOk41IfOxhGA,660
525
- inspect_ai/analysis/beta/_dataframe/messages/table.py,sha256=FugXqrhPPRANu86nTc7CrWaITxw7MQcOS75uYplgfM4,2713
527
+ inspect_ai/analysis/beta/_dataframe/messages/table.py,sha256=pAESqFx9WzAyuQCsjrzD0ShbJT1yFf7Con6cu10etbs,2519
526
528
  inspect_ai/analysis/beta/_dataframe/samples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
527
- inspect_ai/analysis/beta/_dataframe/samples/columns.py,sha256=PURrwrtAQpzMS3fozXFrjgkwnh_J549KTKA3D593Cdw,2477
528
- inspect_ai/analysis/beta/_dataframe/samples/extract.py,sha256=FEKFZYUqmxFLIR1qK-6mn5M7yBS2YG0wErYco7VtcE4,2613
529
- inspect_ai/analysis/beta/_dataframe/samples/table.py,sha256=J9fpBiI7vfLt3Zjj2rcfLfBKZAmVZ9dCGVARVZLN5_A,11014
529
+ inspect_ai/analysis/beta/_dataframe/samples/columns.py,sha256=Ffi734379rSwrkDth3wyMGVIsrepp8fjXKFVWUS-CQw,2493
530
+ inspect_ai/analysis/beta/_dataframe/samples/extract.py,sha256=WkalxZbV4Fwx1hCJIdC3D6JeE51tPBNkufjQ762eWEQ,1404
531
+ inspect_ai/analysis/beta/_dataframe/samples/table.py,sha256=c9CMdrcCcZECcvNn1jsZj_oh1RirX9aSOLJxFJ9HnY4,12252
530
532
  inspect_ai/approval/__init__.py,sha256=Bqq4GFljOqKaIUkuCvhlFv89TfJpvbuO_R0jVyjb8VI,379
531
533
  inspect_ai/approval/_apply.py,sha256=v9v9XfvBt203TbvdB5aJbHR_SqC23xcEjBPpESbXKg8,2146
532
534
  inspect_ai/approval/_approval.py,sha256=twQcEvfU3-hPdsG785ak8OvRMOzMa00-UQAdz9Mh8Fo,863
@@ -560,7 +562,7 @@ inspect_ai/log/__init__.py,sha256=PZsopxfD0ipS6g_5CMipbttrxI1R1fy10Si0zs4lO38,25
560
562
  inspect_ai/log/_bundle.py,sha256=5Uy-s64_SFokZ7WRzti9mD7yoKrd2sOzdvqKyahoiC4,8045
561
563
  inspect_ai/log/_condense.py,sha256=OedMphK5Q2YPuY1cnoAM7tGsyVIU6Kwrv3oIeb3dFmY,10881
562
564
  inspect_ai/log/_convert.py,sha256=afEOHkaQtCkTWdwyFweGTEzLq0VVdhTjhr0IgVX5W7I,3324
563
- inspect_ai/log/_file.py,sha256=12DJm7ns-YXRqLM1g7Dx_ra8O77ZJ4nEZZf3eYqQnJE,19375
565
+ inspect_ai/log/_file.py,sha256=PPYVC1TbvGpWjUyke_in84fNQQ-U-ybZdMV2jbX0ugU,19503
564
566
  inspect_ai/log/_log.py,sha256=2WiLyUPygrq4CyzRoDCT5-lqRzh-HMkt-pHEfuEt0sE,29994
565
567
  inspect_ai/log/_message.py,sha256=QofM_JZF_x3k_5ta1uQzoN_VnMoUhXFnqWurIn9FXOY,1999
566
568
  inspect_ai/log/_model.py,sha256=8tEhFZc1tBFgA6A_spXTqTBdvbzZP5t7ul7DiloHRWk,1698
@@ -661,7 +663,7 @@ inspect_ai/solver/_plan.py,sha256=lpbjIbBpiPzud7jaHqA81ZFFO0gjt_4EW0blzG4DquA,72
661
663
  inspect_ai/solver/_prompt.py,sha256=n2gkRUMSRKViDBL4WtepNoMx7zidIkQgOHLGllP6WVo,4955
662
664
  inspect_ai/solver/_run.py,sha256=k-IYoFpyNq8-HTFgQck4Akvs3OtopiL4qRWj8_yLhvY,1763
663
665
  inspect_ai/solver/_solver.py,sha256=UJ2CvmJr74n65x4xipZTxNzGfvUyuTHnnRSY0QqNo5I,9563
664
- inspect_ai/solver/_task_state.py,sha256=Q_hsd9BugMYQ8Q25WaxVKh6xWz7lvGyymdghZdUi_2w,14695
666
+ inspect_ai/solver/_task_state.py,sha256=DMbaDuXOViZlCez5pEv3Y9czIPk61qyfycNjSIgprOI,14704
665
667
  inspect_ai/solver/_transcript.py,sha256=kdnkR8243NXlIvcDpZ4nb1XKT7pBYHLk5V26MtwP2EU,1047
666
668
  inspect_ai/solver/_use_tools.py,sha256=VmhCjKpkWgifOS20toBcK2bFDmyPqfxkBvcHs_-nv58,2235
667
669
  inspect_ai/solver/_util.py,sha256=pthrf-CzC6FnQYSUFLXTYM4wFEJptZrh5POTmV-Jtow,446
@@ -681,7 +683,7 @@ inspect_ai/tool/beta.py,sha256=KQYntN2MLiIHp4Gf4GXv3QO3aYHBBaP-npkluTT-aDM,153
681
683
  inspect_ai/tool/_mcp/__init__.py,sha256=vqtlBle1T_jlRQPvLKJbLgW5h_I0Ee33nDBI-rCtIeA,314
682
684
  inspect_ai/tool/_mcp/_context.py,sha256=tKQuBZ5ooRvDEW0ffACejdjKi7f8VFfYRn5uaMZGDPw,405
683
685
  inspect_ai/tool/_mcp/_mcp.py,sha256=gNTlNTzMRU5L-h4_EGPqosbPLumSdIh3_25ofrGodqs,10599
684
- inspect_ai/tool/_mcp/_sandbox.py,sha256=eM-B9x3NQfAoa7mw67mPdlLqwMATtvYtP187MJdxw1I,4268
686
+ inspect_ai/tool/_mcp/_sandbox.py,sha256=tW3-kqUrtKlbPEUtyIP2Ywh7FhakCQA9dyeabmLnPuU,4444
685
687
  inspect_ai/tool/_mcp/_types.py,sha256=RT9ZRugYR3ArKe54_fuYxeenlWa_os0_DYadVIJEHlM,769
686
688
  inspect_ai/tool/_mcp/connection.py,sha256=c1VRVtN90f2KptKCXlQ6fAX2Bxx8HXu3_ZvYmt_35dw,1901
687
689
  inspect_ai/tool/_mcp/sampling.py,sha256=YDfrYj6GAec4R3JkQpUc_fPROQUpRARvbUPq7FVKSQ0,4001
@@ -737,9 +739,9 @@ inspect_ai/util/_sandbox/docker/internal.py,sha256=c8X8TLrBPOvsfnq5TkMlb_bzTALyc
737
739
  inspect_ai/util/_sandbox/docker/prereqs.py,sha256=0j6_OauBBnVlpBleADcZavIAAQZy4WewVjbRn9c0stg,3355
738
740
  inspect_ai/util/_sandbox/docker/service.py,sha256=hhHIWH1VDFLwehdGd19aUBD_VKfDO3GCPxpw1HSwVQk,2437
739
741
  inspect_ai/util/_sandbox/docker/util.py,sha256=EeInihCNXgUWxaqZ4dNOJd719kXL2_jr63QCoXn68vA,3154
740
- inspect_ai-0.3.95.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
741
- inspect_ai-0.3.95.dist-info/METADATA,sha256=_P4GBqO5yJ99jOvUQ1s4Nq3evvpL4TCzBkoxbSBsuwI,5431
742
- inspect_ai-0.3.95.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
743
- inspect_ai-0.3.95.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
744
- inspect_ai-0.3.95.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
745
- inspect_ai-0.3.95.dist-info/RECORD,,
742
+ inspect_ai-0.3.96.dist-info/licenses/LICENSE,sha256=xZPCr8gTiFIerrA_DRpLAbw-UUftnLFsHxKeW-NTtq8,1081
743
+ inspect_ai-0.3.96.dist-info/METADATA,sha256=GHfPnN-m8cQUeqysFWpqn5z0tfuMt7JCeTNN1tYJYOA,5438
744
+ inspect_ai-0.3.96.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
745
+ inspect_ai-0.3.96.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
746
+ inspect_ai-0.3.96.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
747
+ inspect_ai-0.3.96.dist-info/RECORD,,