inspect-ai 0.3.95__py3-none-any.whl → 0.3.97__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. inspect_ai/_eval/eval.py +10 -2
  2. inspect_ai/_eval/task/util.py +32 -3
  3. inspect_ai/_util/local_server.py +16 -0
  4. inspect_ai/_util/registry.py +7 -0
  5. inspect_ai/_util/timer.py +13 -0
  6. inspect_ai/_view/www/dist/assets/index.css +275 -195
  7. inspect_ai/_view/www/dist/assets/index.js +8568 -7376
  8. inspect_ai/_view/www/src/app/App.css +1 -0
  9. inspect_ai/_view/www/src/app/App.tsx +27 -10
  10. inspect_ai/_view/www/src/app/appearance/icons.ts +5 -0
  11. inspect_ai/_view/www/src/app/content/RecordTree.module.css +22 -0
  12. inspect_ai/_view/www/src/app/content/RecordTree.tsx +370 -0
  13. inspect_ai/_view/www/src/app/content/RenderedContent.module.css +5 -0
  14. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +32 -19
  15. inspect_ai/_view/www/src/app/content/record_processors/store.ts +101 -0
  16. inspect_ai/_view/www/src/app/content/record_processors/types.ts +3 -0
  17. inspect_ai/_view/www/src/app/content/types.ts +5 -0
  18. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -0
  19. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +35 -28
  20. inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +1 -8
  21. inspect_ai/_view/www/src/app/log-view/navbar/PrimaryBar.tsx +2 -4
  22. inspect_ai/_view/www/src/app/log-view/navbar/ResultsPanel.tsx +13 -3
  23. inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.module.css +15 -0
  24. inspect_ai/_view/www/src/app/log-view/navbar/ScoreGrid.tsx +14 -10
  25. inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +9 -3
  26. inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +1 -3
  27. inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +8 -2
  28. inspect_ai/_view/www/src/app/log-view/types.ts +1 -0
  29. inspect_ai/_view/www/src/app/plan/ModelCard.module.css +7 -0
  30. inspect_ai/_view/www/src/app/plan/ModelCard.tsx +5 -2
  31. inspect_ai/_view/www/src/app/plan/PlanCard.tsx +13 -8
  32. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +63 -8
  33. inspect_ai/_view/www/src/app/routing/url.ts +45 -0
  34. inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +2 -1
  35. inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.tsx +15 -8
  36. inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +3 -0
  37. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +16 -5
  38. inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +9 -1
  39. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +68 -31
  40. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +12 -7
  41. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -5
  42. inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.module.css +9 -0
  43. inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +48 -18
  44. inspect_ai/_view/www/src/app/samples/chat/ChatView.tsx +0 -1
  45. inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.module.css +4 -0
  46. inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +41 -1
  47. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -0
  48. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +0 -3
  49. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -1
  50. inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +1 -1
  51. inspect_ai/_view/www/src/app/samples/chat/tools/ToolOutput.module.css +1 -1
  52. inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +5 -1
  53. inspect_ai/_view/www/src/app/samples/descriptor/score/PassFailScoreDescriptor.tsx +11 -6
  54. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +7 -0
  55. inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +5 -18
  56. inspect_ai/_view/www/src/app/samples/sample-tools/SortFilter.tsx +1 -1
  57. inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.tsx +18 -5
  58. inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.module.css +0 -6
  59. inspect_ai/_view/www/src/app/samples/scores/SampleScoresView.tsx +4 -1
  60. inspect_ai/_view/www/src/app/samples/transcript/ApprovalEventView.tsx +4 -2
  61. inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +6 -4
  62. inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.module.css +1 -1
  63. inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +13 -6
  64. inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +6 -4
  65. inspect_ai/_view/www/src/app/samples/transcript/LoggerEventView.tsx +4 -2
  66. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +11 -8
  67. inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +14 -8
  68. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +13 -8
  69. inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +25 -16
  70. inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +7 -5
  71. inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +11 -28
  72. inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +12 -20
  73. inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +12 -31
  74. inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +25 -29
  75. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +297 -0
  76. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +0 -8
  77. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +43 -25
  78. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +43 -0
  79. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +109 -43
  80. inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +19 -8
  81. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +128 -60
  82. inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +14 -4
  83. inspect_ai/_view/www/src/app/samples/transcript/types.ts +6 -4
  84. inspect_ai/_view/www/src/app/types.ts +12 -1
  85. inspect_ai/_view/www/src/components/Card.css +6 -3
  86. inspect_ai/_view/www/src/components/Card.tsx +15 -2
  87. inspect_ai/_view/www/src/components/CopyButton.tsx +4 -6
  88. inspect_ai/_view/www/src/components/ExpandablePanel.module.css +20 -14
  89. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +17 -22
  90. inspect_ai/_view/www/src/components/LargeModal.tsx +5 -1
  91. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +25 -1
  92. inspect_ai/_view/www/src/components/MarkdownDiv.css +4 -0
  93. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +2 -2
  94. inspect_ai/_view/www/src/components/TabSet.module.css +6 -1
  95. inspect_ai/_view/www/src/components/TabSet.tsx +8 -2
  96. inspect_ai/_view/www/src/state/hooks.ts +83 -13
  97. inspect_ai/_view/www/src/state/logPolling.ts +2 -2
  98. inspect_ai/_view/www/src/state/logSlice.ts +1 -2
  99. inspect_ai/_view/www/src/state/logsSlice.ts +9 -9
  100. inspect_ai/_view/www/src/state/samplePolling.ts +1 -1
  101. inspect_ai/_view/www/src/state/sampleSlice.ts +134 -7
  102. inspect_ai/_view/www/src/state/scoring.ts +1 -1
  103. inspect_ai/_view/www/src/state/scrolling.ts +39 -6
  104. inspect_ai/_view/www/src/state/store.ts +5 -0
  105. inspect_ai/_view/www/src/state/store_filter.ts +47 -44
  106. inspect_ai/_view/www/src/utils/debugging.ts +95 -0
  107. inspect_ai/_view/www/src/utils/format.ts +2 -2
  108. inspect_ai/_view/www/src/utils/json.ts +29 -0
  109. inspect_ai/agent/__init__.py +2 -1
  110. inspect_ai/agent/_agent.py +12 -0
  111. inspect_ai/agent/_react.py +184 -48
  112. inspect_ai/agent/_types.py +15 -2
  113. inspect_ai/analysis/beta/__init__.py +11 -3
  114. inspect_ai/analysis/beta/_dataframe/columns.py +11 -16
  115. inspect_ai/analysis/beta/_dataframe/evals/table.py +101 -39
  116. inspect_ai/analysis/beta/_dataframe/events/columns.py +50 -0
  117. inspect_ai/analysis/beta/_dataframe/events/extract.py +26 -0
  118. inspect_ai/analysis/beta/_dataframe/events/table.py +77 -3
  119. inspect_ai/analysis/beta/_dataframe/extract.py +44 -25
  120. inspect_ai/analysis/beta/_dataframe/messages/columns.py +1 -1
  121. inspect_ai/analysis/beta/_dataframe/messages/table.py +30 -29
  122. inspect_ai/analysis/beta/_dataframe/progress.py +56 -0
  123. inspect_ai/analysis/beta/_dataframe/record.py +13 -9
  124. inspect_ai/analysis/beta/_dataframe/samples/columns.py +8 -4
  125. inspect_ai/analysis/beta/_dataframe/samples/extract.py +5 -33
  126. inspect_ai/analysis/beta/_dataframe/samples/table.py +211 -60
  127. inspect_ai/analysis/beta/_dataframe/util.py +33 -28
  128. inspect_ai/log/_file.py +9 -2
  129. inspect_ai/model/_call_tools.py +1 -1
  130. inspect_ai/model/_providers/anthropic.py +18 -5
  131. inspect_ai/model/_providers/azureai.py +7 -2
  132. inspect_ai/model/_providers/util/llama31.py +3 -3
  133. inspect_ai/solver/_task_state.py +1 -1
  134. inspect_ai/tool/_mcp/_sandbox.py +17 -14
  135. {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/METADATA +2 -2
  136. {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/RECORD +140 -133
  137. {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/WHEEL +1 -1
  138. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.module.css +0 -48
  139. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +0 -276
  140. {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/entry_points.txt +0 -0
  141. {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/licenses/LICENSE +0 -0
  142. {inspect_ai-0.3.95.dist-info → inspect_ai-0.3.97.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ from datetime import datetime
1
2
  from typing import Any, Callable, Mapping, Type
2
3
 
3
4
  from jsonpath_ng import JSONPath # type: ignore
@@ -7,6 +8,12 @@ from typing_extensions import override
7
8
  from inspect_ai.log._transcript import Event
8
9
 
9
10
  from ..columns import Column, ColumnType
11
+ from .extract import (
12
+ completion_as_str,
13
+ model_event_input_as_str,
14
+ tool_choice_as_str,
15
+ tool_view_as_str,
16
+ )
10
17
 
11
18
 
12
19
  class EventColumn(Column):
@@ -35,3 +42,46 @@ class EventColumn(Column):
35
42
  @override
36
43
  def path_schema(self) -> Mapping[str, Any] | None:
37
44
  return None
45
+
46
+
47
+ EventInfo: list[Column] = [
48
+ EventColumn("event", path="event"),
49
+ EventColumn("span_id", path="span_id"),
50
+ ]
51
+ """Event basic information columns."""
52
+
53
+ EventTiming: list[Column] = [
54
+ EventColumn("timestamp", path="timestamp", type=datetime),
55
+ EventColumn("completed", path="completed", type=datetime),
56
+ EventColumn("working_start", path="working_start"),
57
+ EventColumn("working_time", path="working_time"),
58
+ ]
59
+ """Event timing columns."""
60
+
61
+ ModelEventColumns: list[Column] = [
62
+ EventColumn("model_event_model", path="model"),
63
+ EventColumn("model_event_role", path="role"),
64
+ EventColumn("model_event_input", path=model_event_input_as_str),
65
+ EventColumn("model_event_tools", path="tools"),
66
+ EventColumn("model_event_tool_choice", path=tool_choice_as_str),
67
+ EventColumn("model_event_config", path="config"),
68
+ EventColumn("model_event_usage", path="output.usage"),
69
+ EventColumn("model_event_time", path="output.time"),
70
+ EventColumn("model_event_completion", path=completion_as_str),
71
+ EventColumn("model_event_retries", path="retries"),
72
+ EventColumn("model_event_error", path="error"),
73
+ EventColumn("model_event_cache", path="cache"),
74
+ EventColumn("model_event_call", path="call"),
75
+ ]
76
+ """Model event columns."""
77
+
78
+ ToolEventColumns: list[Column] = [
79
+ EventColumn("tool_event_function", path="function"),
80
+ EventColumn("tool_event_arguments", path="arguments"),
81
+ EventColumn("tool_event_view", path=tool_view_as_str),
82
+ EventColumn("tool_event_result", path="result"),
83
+ EventColumn("tool_event_truncated", path="truncated"),
84
+ EventColumn("tool_event_error_type", path="error.type"),
85
+ EventColumn("tool_event_error_message", path="error.message"),
86
+ ]
87
+ """Tool event columns."""
@@ -0,0 +1,26 @@
1
+ from inspect_ai.log._transcript import ModelEvent, ToolEvent
2
+
3
+ from ..extract import messages_as_str
4
+
5
+
6
+ def model_event_input_as_str(event: ModelEvent) -> str:
7
+ return messages_as_str(event.input)
8
+
9
+
10
+ def tool_choice_as_str(event: ModelEvent) -> str:
11
+ if isinstance(event.tool_choice, str):
12
+ return event.tool_choice
13
+ else:
14
+ return event.tool_choice.name
15
+
16
+
17
+ def completion_as_str(event: ModelEvent) -> str:
18
+ return event.output.completion
19
+
20
+
21
+ def tool_view_as_str(event: ToolEvent) -> str | None:
22
+ if event.view is not None:
23
+ title = f"{event.view.title}\n\n" if event.view.title is not None else ""
24
+ return f"{title}{event.view.content}"
25
+ else:
26
+ return None
@@ -1,14 +1,88 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING
3
+ from typing import TYPE_CHECKING, Callable, Literal, Sequence, TypeAlias
4
+
5
+ from inspect_ai.analysis.beta._dataframe.events.columns import EventInfo
6
+ from inspect_ai.log._file import list_eval_logs
7
+ from inspect_ai.log._transcript import Event
4
8
 
5
9
  if TYPE_CHECKING:
6
10
  import pandas as pd
7
11
 
12
+ from typing_extensions import overload
13
+
14
+ from ..columns import Column, ColumnError
15
+ from ..samples.table import EventsDetail, _read_samples_df
8
16
  from ..util import LogPaths, verify_prerequisites
9
17
 
18
+ EventFilter: TypeAlias = Callable[[Event], bool]
19
+ """Filter for `events_df()` rows."""
20
+
21
+
22
+ @overload
23
+ def events_df(
24
+ logs: LogPaths = list_eval_logs(),
25
+ columns: Sequence[Column] = EventInfo,
26
+ filter: EventFilter | None = None,
27
+ strict: Literal[True] = True,
28
+ parallel: bool | int = False,
29
+ quiet: bool = False,
30
+ ) -> "pd.DataFrame": ...
31
+
10
32
 
11
- def events_df(logs: LogPaths, recursive: bool = True) -> "pd.DataFrame":
33
+ @overload
34
+ def events_df(
35
+ logs: LogPaths = list_eval_logs(),
36
+ columns: Sequence[Column] = EventInfo,
37
+ filter: EventFilter | None = None,
38
+ strict: Literal[False] = False,
39
+ parallel: bool | int = False,
40
+ quiet: bool = False,
41
+ ) -> tuple["pd.DataFrame", list[ColumnError]]: ...
42
+
43
+
44
+ def events_df(
45
+ logs: LogPaths = list_eval_logs(),
46
+ columns: Sequence[Column] = EventInfo,
47
+ filter: EventFilter | None = None,
48
+ strict: bool = True,
49
+ parallel: bool | int = False,
50
+ quiet: bool = False,
51
+ ) -> "pd.DataFrame" | tuple["pd.DataFrame", list[ColumnError]]:
52
+ """Read a dataframe containing events from a set of evals.
53
+
54
+ Args:
55
+ logs: One or more paths to log files or log directories.
56
+ Defaults to the contents of the currently active log directory
57
+ (e.g. ./logs or INSPECT_LOG_DIR).
58
+ columns: Specification for what columns to read from log files.
59
+ filter: Callable that filters event types.
60
+ strict: Raise import errors immediately. Defaults to `True`.
61
+ If `False` then a tuple of `DataFrame` and errors is returned.
62
+ parallel: If `True`, use `ProcessPoolExecutor` to read logs in parallel
63
+ (with workers based on `mp.cpu_count()`, capped at 8). If `int`, read
64
+ in parallel with the specified number of workers. If `False` (the default)
65
+ do not read in parallel.
66
+ quiet: If `True` do not print any output or progress (defaults to `False`).
67
+
68
+ Returns:
69
+ For `strict`, a Pandas `DataFrame` with information for the specified logs.
70
+ For `strict=False`, a tuple of Pandas `DataFrame` and a dictionary of errors
71
+ encountered (by log file) during import.
72
+ """
12
73
  verify_prerequisites()
13
74
 
14
- raise NotImplementedError("events_df has not been implemented yet.")
75
+ # resolve filter/detail
76
+ if callable(filter):
77
+ detail = EventsDetail(filter=filter)
78
+ else:
79
+ detail = EventsDetail()
80
+
81
+ return _read_samples_df(
82
+ logs=logs,
83
+ columns=columns,
84
+ strict=strict,
85
+ detail=detail,
86
+ progress=not quiet,
87
+ parallel=parallel,
88
+ )
@@ -5,11 +5,16 @@ from typing import Any, cast
5
5
  import shortuuid
6
6
  from pydantic import BaseModel, JsonValue
7
7
 
8
- from inspect_ai._util.json import jsonable_python
8
+ from inspect_ai.model._chat_message import (
9
+ ChatMessage,
10
+ ChatMessageAssistant,
11
+ ChatMessageTool,
12
+ ChatMessageUser,
13
+ )
9
14
 
10
15
 
11
16
  def model_to_record(model: BaseModel) -> dict[str, JsonValue]:
12
- return cast(dict[str, JsonValue], jsonable_python(model))
17
+ return cast(dict[str, JsonValue], model.model_dump(mode="json", exclude_none=True))
13
18
 
14
19
 
15
20
  def list_as_str(x: JsonValue) -> str:
@@ -21,34 +26,48 @@ def score_values(x: JsonValue) -> dict[str, JsonValue]:
21
26
  return {k: v["value"] for k, v in scores.items()}
22
27
 
23
28
 
24
- def input_as_str(x: JsonValue) -> str:
25
- if isinstance(x, str):
26
- return x
27
- else:
28
- return messages_as_str(x)
29
+ def auto_id(base: str, index: str) -> str:
30
+ seed = f"{base}_{index}"
31
+ hash_bytes = hashlib.md5(seed.encode("utf-8")).digest()
32
+ long_uuid = uuid.UUID(bytes=hash_bytes)
33
+ return shortuuid.encode(long_uuid)
29
34
 
30
35
 
31
- def messages_as_str(x: JsonValue) -> str:
32
- if isinstance(x, list):
33
- messages = cast(list[dict[str, Any]], x)
34
- return "\n\n".join([message_as_str(message) for message in messages])
35
- else:
36
- raise ValueError(f"Unexpected type for messages: {type(x)}")
36
+ def messages_as_str(messages: str | list[ChatMessage]) -> str:
37
+ if isinstance(messages, str):
38
+ messages = [ChatMessageUser(content=messages)]
39
+ return "\n\n".join([message_as_str(message) for message in messages])
37
40
 
38
41
 
39
- def message_as_str(message: dict[str, Any]) -> str:
40
- return f"{message['role']}:\n{content_as_str(message['content'])}"
42
+ def message_as_str(message: ChatMessage) -> str:
43
+ transcript: list[str] = []
44
+ role = message.role
45
+ content = message.text.strip() if message.text else ""
41
46
 
47
+ # assistant messages with tool calls
48
+ if isinstance(message, ChatMessageAssistant) and message.tool_calls is not None:
49
+ entry = f"{role}:\n{content}\n"
42
50
 
43
- def content_as_str(content: str | list[dict[str, Any]]) -> str:
44
- if isinstance(content, str):
45
- return content
46
- else:
47
- return "\n".join([c["text"] if c["type"] == "text" else "" for c in content])
51
+ for tool in message.tool_calls:
52
+ func_name = tool.function
53
+ args = tool.arguments
48
54
 
55
+ if isinstance(args, dict):
56
+ args_text = "\n".join(f"{k}: {v}" for k, v in args.items())
57
+ entry += f"\nTool Call: {func_name}\nArguments:\n{args_text}"
58
+ else:
59
+ entry += f"\nTool Call: {func_name}\nArguments: {args}"
49
60
 
50
- def auto_id(base: str, index: str) -> str:
51
- seed = f"{base}_{index}"
52
- hash_bytes = hashlib.md5(seed.encode("utf-8")).digest()
53
- long_uuid = uuid.UUID(bytes=hash_bytes)
54
- return shortuuid.encode(long_uuid)
61
+ transcript.append(entry)
62
+
63
+ # tool responses with errors
64
+ elif isinstance(message, ChatMessageTool) and message.error is not None:
65
+ func_name = message.function or "unknown"
66
+ entry = f"{role}:\n{content}\n\nError in tool call '{func_name}':\n{message.error.message}\n"
67
+ transcript.append(entry)
68
+
69
+ # normal messages
70
+ else:
71
+ transcript.append(f"{role}:\n{content}\n")
72
+
73
+ return "\n".join(transcript)
@@ -43,8 +43,8 @@ class MessageColumn(Column):
43
43
 
44
44
  MessageContent: list[Column] = [
45
45
  MessageColumn("role", path="role", required=True),
46
- MessageColumn("content", path=message_text),
47
46
  MessageColumn("source", path="source"),
47
+ MessageColumn("content", path=message_text),
48
48
  ]
49
49
  """Message content columns."""
50
50
 
@@ -1,7 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Callable, Literal, TypeAlias
3
+ from typing import TYPE_CHECKING, Callable, Literal, Sequence, TypeAlias
4
4
 
5
+ from inspect_ai.log._file import list_eval_logs
5
6
  from inspect_ai.model._chat_message import ChatMessage
6
7
 
7
8
  if TYPE_CHECKING:
@@ -9,58 +10,60 @@ if TYPE_CHECKING:
9
10
 
10
11
  from typing_extensions import overload
11
12
 
12
- from ..columns import Column, ColumnErrors
13
+ from ..columns import Column, ColumnError
13
14
  from ..samples.table import MessagesDetail, _read_samples_df
14
15
  from ..util import LogPaths, verify_prerequisites
15
16
  from .columns import MessageColumns
16
17
 
17
- MessageFilter: TypeAlias = (
18
- list[Literal["system", "user", "assistant", "tool"]] | Callable[[ChatMessage], bool]
19
- )
18
+ MessageFilter: TypeAlias = Callable[[ChatMessage], bool]
20
19
  """Filter for `messages_df()` rows."""
21
20
 
22
21
 
23
22
  @overload
24
23
  def messages_df(
25
- logs: LogPaths,
26
- columns: list[Column] = MessageColumns,
24
+ logs: LogPaths = list_eval_logs(),
25
+ columns: Sequence[Column] = MessageColumns,
27
26
  filter: MessageFilter | None = None,
28
- recursive: bool = True,
29
- reverse: bool = False,
30
27
  strict: Literal[True] = True,
28
+ parallel: bool | int = False,
29
+ quiet: bool = False,
31
30
  ) -> "pd.DataFrame": ...
32
31
 
33
32
 
34
33
  @overload
35
34
  def messages_df(
36
- logs: LogPaths,
37
- columns: list[Column] = MessageColumns,
35
+ logs: LogPaths = list_eval_logs(),
36
+ columns: Sequence[Column] = MessageColumns,
38
37
  filter: MessageFilter | None = None,
39
- recursive: bool = True,
40
- reverse: bool = False,
41
38
  strict: Literal[False] = False,
42
- ) -> tuple["pd.DataFrame", ColumnErrors]: ...
39
+ parallel: bool | int = False,
40
+ quiet: bool = False,
41
+ ) -> tuple["pd.DataFrame", list[ColumnError]]: ...
43
42
 
44
43
 
45
44
  def messages_df(
46
- logs: LogPaths,
47
- columns: list[Column] = MessageColumns,
45
+ logs: LogPaths = list_eval_logs(),
46
+ columns: Sequence[Column] = MessageColumns,
48
47
  filter: MessageFilter | None = None,
49
- recursive: bool = True,
50
- reverse: bool = False,
51
48
  strict: bool = True,
52
- ) -> "pd.DataFrame" | tuple["pd.DataFrame", ColumnErrors]:
49
+ parallel: bool | int = False,
50
+ quiet: bool = False,
51
+ ) -> "pd.DataFrame" | tuple["pd.DataFrame", list[ColumnError]]:
53
52
  """Read a dataframe containing messages from a set of evals.
54
53
 
55
54
  Args:
56
55
  logs: One or more paths to log files or log directories.
56
+ Defaults to the contents of the currently active log directory
57
+ (e.g. ./logs or INSPECT_LOG_DIR).
57
58
  columns: Specification for what columns to read from log files.
58
- filter: List of message role types to include or callable that performs the filter.
59
- recursive: Include recursive contents of directories (defaults to `True`)
60
- reverse: Reverse the order of the dataframe (by default, items
61
- are ordered from oldest to newest).
59
+ filter: Callable that filters messages
62
60
  strict: Raise import errors immediately. Defaults to `True`.
63
61
  If `False` then a tuple of `DataFrame` and errors is returned.
62
+ parallel: If `True`, use `ProcessPoolExecutor` to read logs in parallel
63
+ (with workers based on `mp.cpu_count()`, capped at 8). If `int`, read
64
+ in parallel with the specified number of workers. If `False` (the default)
65
+ do not read in parallel.
66
+ quiet: If `True` do not print any output or progress (defaults to `False`).
64
67
 
65
68
  Returns:
66
69
  For `strict`, a Pandas `DataFrame` with information for the specified logs.
@@ -70,18 +73,16 @@ def messages_df(
70
73
  verify_prerequisites()
71
74
 
72
75
  # resolve filter/detail
73
- if filter is None:
74
- detail = MessagesDetail(filter=lambda m: True)
75
- elif callable(filter):
76
+ if callable(filter):
76
77
  detail = MessagesDetail(filter=filter)
77
78
  else:
78
- detail = MessagesDetail(filter=lambda m: m.role in filter)
79
+ detail = MessagesDetail()
79
80
 
80
81
  return _read_samples_df(
81
82
  logs=logs,
82
83
  columns=columns,
83
- recursive=recursive,
84
- reverse=reverse,
85
84
  strict=strict,
86
85
  detail=detail,
86
+ parallel=parallel,
87
+ progress=not quiet,
87
88
  )
@@ -0,0 +1,56 @@
1
+ from contextlib import contextmanager
2
+ from typing import Iterator, Protocol
3
+
4
+ from rich.progress import (
5
+ BarColumn,
6
+ Progress,
7
+ TaskID,
8
+ TaskProgressColumn,
9
+ TextColumn,
10
+ TimeElapsedColumn,
11
+ )
12
+
13
+
14
+ class ImportProgress(Protocol):
15
+ def update(self) -> None: ...
16
+ def reset(self, description: str, completed: int, total: int) -> None: ...
17
+
18
+
19
+ class NoProgress(ImportProgress):
20
+ def update(self) -> None:
21
+ pass
22
+
23
+ def reset(self, description: str, completed: int, total: int) -> None:
24
+ pass
25
+
26
+
27
+ class RichImportProgress(ImportProgress):
28
+ def __init__(self, progress: Progress, task_id: TaskID) -> None:
29
+ self._progress = progress
30
+ self._task_id = task_id
31
+
32
+ def update(self) -> None:
33
+ self._progress.update(self._task_id, advance=1)
34
+
35
+ def reset(self, description: str, completed: int, total: int) -> None:
36
+ self._progress.reset(
37
+ self._task_id, description=description, completed=completed, total=total
38
+ )
39
+
40
+
41
+ @contextmanager
42
+ def no_progress() -> Iterator[ImportProgress]:
43
+ yield NoProgress()
44
+
45
+
46
+ @contextmanager
47
+ def import_progress(description: str, total: float | None) -> Iterator[ImportProgress]:
48
+ with Progress(
49
+ TextColumn("[progress.description]{task.description:<18}"),
50
+ BarColumn(),
51
+ TaskProgressColumn(),
52
+ TimeElapsedColumn(),
53
+ transient=True,
54
+ ) as progress:
55
+ task_id = progress.add_task(description, total=total)
56
+ yield RichImportProgress(progress, task_id)
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  from datetime import date, datetime, time, timezone
3
- from typing import Any, Callable, Literal, Type, cast, overload
3
+ from typing import Any, Callable, Literal, Sequence, Type, cast, overload
4
4
 
5
5
  import yaml
6
6
  from jsonpath_ng import JSONPath # type: ignore
@@ -20,38 +20,41 @@ from .extract import model_to_record
20
20
 
21
21
  @overload
22
22
  def import_record(
23
+ log: EvalLog,
23
24
  record: EvalLog
24
25
  | EvalSampleSummary
25
26
  | EvalSample
26
27
  | ChatMessage
27
28
  | Event
28
29
  | dict[str, JsonValue],
29
- columns: list[Column],
30
+ columns: Sequence[Column],
30
31
  strict: Literal[True] = True,
31
32
  ) -> dict[str, ColumnType]: ...
32
33
 
33
34
 
34
35
  @overload
35
36
  def import_record(
37
+ log: EvalLog,
36
38
  record: EvalLog
37
39
  | EvalSampleSummary
38
40
  | EvalSample
39
41
  | ChatMessage
40
42
  | Event
41
43
  | dict[str, JsonValue],
42
- columns: list[Column],
44
+ columns: Sequence[Column],
43
45
  strict: Literal[False],
44
46
  ) -> tuple[dict[str, ColumnType], list[ColumnError]]: ...
45
47
 
46
48
 
47
49
  def import_record(
50
+ log: EvalLog,
48
51
  record: EvalLog
49
52
  | EvalSampleSummary
50
53
  | EvalSample
51
54
  | ChatMessage
52
55
  | Event
53
56
  | dict[str, JsonValue],
54
- columns: list[Column],
57
+ columns: Sequence[Column],
55
58
  strict: bool = True,
56
59
  ) -> dict[str, ColumnType] | tuple[dict[str, ColumnType], list[ColumnError]]:
57
60
  # resolve the record BaseModel into a dict (and optionally a summary dict).
@@ -80,7 +83,7 @@ def import_record(
80
83
  try:
81
84
  result[name] = _resolve_value(value, column.type)
82
85
  except ValueError as ex:
83
- error = ColumnError(name, path=column.path, message=str(ex))
86
+ error = ColumnError(name, path=column.path, error=ex, log=log)
84
87
  if strict:
85
88
  raise ValueError(str(error))
86
89
  else:
@@ -90,10 +93,10 @@ def import_record(
90
93
  def field_not_found(
91
94
  name: str, path: JSONPath | None, required_type: str | None = None
92
95
  ) -> None:
93
- message = (
96
+ ex = ValueError(
94
97
  f"field not of type {required_type}" if required_type else "field not found"
95
98
  )
96
- error = ColumnError(name, path=path, message=f"{message}")
99
+ error = ColumnError(name, path=path, error=ex, log=log)
97
100
  if strict:
98
101
  raise ValueError(str(error))
99
102
  else:
@@ -157,7 +160,8 @@ def import_record(
157
160
  error = ColumnError(
158
161
  column.name,
159
162
  path=str(column.path) if column.path else None,
160
- message=str(ex),
163
+ error=ex,
164
+ log=log,
161
165
  )
162
166
  if strict:
163
167
  raise ValueError(str(error))
@@ -190,7 +194,7 @@ def import_record(
190
194
  return result, errors
191
195
 
192
196
 
193
- def resolve_duplicate_columns(columns: list[Column]) -> list[Column]:
197
+ def resolve_duplicate_columns(columns: Sequence[Column]) -> list[Column]:
194
198
  """Remove duplicate columns (with the later columns winning)"""
195
199
  seen = set[str]()
196
200
  deduped: list[Column] = []
@@ -7,9 +7,13 @@ from typing_extensions import override
7
7
  from inspect_ai.log._log import EvalSample, EvalSampleSummary
8
8
 
9
9
  from ..columns import Column, ColumnType
10
- from ..extract import input_as_str, list_as_str, score_values
10
+ from ..extract import list_as_str, score_values
11
11
  from ..validate import resolved_schema
12
- from .extract import sample_messages_as_str, sample_path_requires_full
12
+ from .extract import (
13
+ sample_input_as_str,
14
+ sample_messages_as_str,
15
+ sample_path_requires_full,
16
+ )
13
17
 
14
18
 
15
19
  class SampleColumn(Column):
@@ -54,14 +58,14 @@ class SampleColumn(Column):
54
58
  SampleSummary: list[Column] = [
55
59
  SampleColumn("id", path="id", required=True, type=str),
56
60
  SampleColumn("epoch", path="epoch", required=True),
57
- SampleColumn("input", path="input", required=True, value=input_as_str),
61
+ SampleColumn("input", path=sample_input_as_str, required=True),
58
62
  SampleColumn("target", path="target", required=True, value=list_as_str),
59
63
  SampleColumn("metadata_*", path="metadata"),
60
64
  SampleColumn("score_*", path="scores", value=score_values),
61
65
  SampleColumn("model_usage", path="model_usage"),
62
66
  SampleColumn("total_time", path="total_time"),
63
67
  SampleColumn("working_time", path="total_time"),
64
- SampleColumn("error", path="error"),
68
+ SampleColumn("error", path="error", default=""),
65
69
  SampleColumn("limit", path="limit"),
66
70
  SampleColumn("retries", path="retries"),
67
71
  ]
@@ -3,45 +3,17 @@ from typing import Callable
3
3
  from jsonpath_ng import JSONPath # type: ignore
4
4
  from pydantic import JsonValue
5
5
 
6
- from inspect_ai.analysis.beta._dataframe.extract import auto_id
7
6
  from inspect_ai.log._log import EvalSample, EvalSampleSummary
8
- from inspect_ai.model._chat_message import ChatMessageAssistant, ChatMessageTool
9
7
 
8
+ from ..extract import auto_id, messages_as_str
10
9
 
11
- def sample_messages_as_str(sample: EvalSample) -> str:
12
- # format each message for the transcript
13
- transcript: list[str] = []
14
- for msg in sample.messages:
15
- role = msg.role
16
- content = msg.text.strip() if msg.text else ""
17
-
18
- # assistant messages with tool calls
19
- if isinstance(msg, ChatMessageAssistant) and msg.tool_calls is not None:
20
- entry = f"{role}:\n{content}\n"
21
-
22
- for tool in msg.tool_calls:
23
- func_name = tool.function
24
- args = tool.arguments
25
10
 
26
- if isinstance(args, dict):
27
- args_text = "\n".join(f"{k}: {v}" for k, v in args.items())
28
- entry += f"\nTool Call: {func_name}\nArguments:\n{args_text}"
29
- else:
30
- entry += f"\nTool Call: {func_name}\nArguments: {args}"
11
+ def sample_input_as_str(sample: EvalSample) -> str:
12
+ return messages_as_str(sample.input)
31
13
 
32
- transcript.append(entry)
33
14
 
34
- # tool responses with errors
35
- elif isinstance(msg, ChatMessageTool) and msg.error is not None:
36
- func_name = msg.function or "unknown"
37
- entry = f"{role}:\n{content}\n\nError in tool call '{func_name}':\n{msg.error.message}\n"
38
- transcript.append(entry)
39
-
40
- # normal messages
41
- else:
42
- transcript.append(f"{role}:\n{content}\n")
43
-
44
- return "\n".join(transcript)
15
+ def sample_messages_as_str(sample: EvalSample) -> str:
16
+ return messages_as_str(sample.messages)
45
17
 
46
18
 
47
19
  def sample_path_requires_full(