inspect-ai 0.3.93__py3-none-any.whl → 0.3.95__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. inspect_ai/_display/textual/widgets/samples.py +3 -3
  2. inspect_ai/_display/textual/widgets/transcript.py +3 -29
  3. inspect_ai/_eval/loader.py +1 -1
  4. inspect_ai/_eval/task/run.py +21 -12
  5. inspect_ai/_util/answer.py +26 -0
  6. inspect_ai/_util/constants.py +0 -1
  7. inspect_ai/_util/exception.py +4 -0
  8. inspect_ai/_util/hash.py +39 -0
  9. inspect_ai/_util/local_server.py +51 -21
  10. inspect_ai/_util/path.py +22 -0
  11. inspect_ai/_util/trace.py +1 -1
  12. inspect_ai/_util/working.py +4 -0
  13. inspect_ai/_view/www/dist/assets/index.css +23 -22
  14. inspect_ai/_view/www/dist/assets/index.js +517 -204
  15. inspect_ai/_view/www/log-schema.json +375 -0
  16. inspect_ai/_view/www/package.json +1 -1
  17. inspect_ai/_view/www/src/@types/log.d.ts +90 -12
  18. inspect_ai/_view/www/src/app/log-view/navbar/SecondaryBar.tsx +2 -2
  19. inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +1 -4
  20. inspect_ai/_view/www/src/app/samples/SamplesTools.tsx +3 -13
  21. inspect_ai/_view/www/src/app/samples/sample-tools/SelectScorer.tsx +45 -48
  22. inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +16 -15
  23. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +47 -75
  24. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +9 -9
  25. inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
  26. inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
  27. inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
  28. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
  29. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
  30. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
  31. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
  32. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
  33. inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
  34. inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
  35. inspect_ai/_view/www/src/app/types.ts +12 -2
  36. inspect_ai/_view/www/src/components/ExpandablePanel.module.css +1 -1
  37. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +5 -5
  38. inspect_ai/_view/www/src/state/hooks.ts +19 -3
  39. inspect_ai/_view/www/src/state/logSlice.ts +23 -5
  40. inspect_ai/_view/www/yarn.lock +9 -9
  41. inspect_ai/agent/_as_solver.py +3 -1
  42. inspect_ai/agent/_as_tool.py +6 -4
  43. inspect_ai/agent/_bridge/patch.py +1 -3
  44. inspect_ai/agent/_handoff.py +5 -1
  45. inspect_ai/agent/_react.py +4 -3
  46. inspect_ai/agent/_run.py +6 -1
  47. inspect_ai/agent/_types.py +9 -0
  48. inspect_ai/analysis/__init__.py +0 -0
  49. inspect_ai/analysis/beta/__init__.py +57 -0
  50. inspect_ai/analysis/beta/_dataframe/__init__.py +0 -0
  51. inspect_ai/analysis/beta/_dataframe/columns.py +145 -0
  52. inspect_ai/analysis/beta/_dataframe/evals/__init__.py +0 -0
  53. inspect_ai/analysis/beta/_dataframe/evals/columns.py +132 -0
  54. inspect_ai/analysis/beta/_dataframe/evals/extract.py +23 -0
  55. inspect_ai/analysis/beta/_dataframe/evals/table.py +140 -0
  56. inspect_ai/analysis/beta/_dataframe/events/__init__.py +0 -0
  57. inspect_ai/analysis/beta/_dataframe/events/columns.py +37 -0
  58. inspect_ai/analysis/beta/_dataframe/events/table.py +14 -0
  59. inspect_ai/analysis/beta/_dataframe/extract.py +54 -0
  60. inspect_ai/analysis/beta/_dataframe/messages/__init__.py +0 -0
  61. inspect_ai/analysis/beta/_dataframe/messages/columns.py +60 -0
  62. inspect_ai/analysis/beta/_dataframe/messages/extract.py +21 -0
  63. inspect_ai/analysis/beta/_dataframe/messages/table.py +87 -0
  64. inspect_ai/analysis/beta/_dataframe/record.py +377 -0
  65. inspect_ai/analysis/beta/_dataframe/samples/__init__.py +0 -0
  66. inspect_ai/analysis/beta/_dataframe/samples/columns.py +73 -0
  67. inspect_ai/analysis/beta/_dataframe/samples/extract.py +82 -0
  68. inspect_ai/analysis/beta/_dataframe/samples/table.py +329 -0
  69. inspect_ai/analysis/beta/_dataframe/util.py +157 -0
  70. inspect_ai/analysis/beta/_dataframe/validate.py +171 -0
  71. inspect_ai/dataset/_dataset.py +6 -3
  72. inspect_ai/log/__init__.py +10 -0
  73. inspect_ai/log/_convert.py +4 -9
  74. inspect_ai/log/_file.py +1 -1
  75. inspect_ai/log/_log.py +21 -1
  76. inspect_ai/log/_samples.py +14 -17
  77. inspect_ai/log/_transcript.py +77 -35
  78. inspect_ai/log/_tree.py +118 -0
  79. inspect_ai/model/_call_tools.py +44 -35
  80. inspect_ai/model/_model.py +51 -44
  81. inspect_ai/model/_openai_responses.py +17 -18
  82. inspect_ai/model/_providers/anthropic.py +30 -5
  83. inspect_ai/model/_providers/hf.py +27 -1
  84. inspect_ai/model/_providers/providers.py +1 -1
  85. inspect_ai/model/_providers/sglang.py +8 -2
  86. inspect_ai/model/_providers/vllm.py +6 -2
  87. inspect_ai/scorer/_choice.py +1 -2
  88. inspect_ai/solver/_chain.py +1 -1
  89. inspect_ai/solver/_fork.py +1 -1
  90. inspect_ai/solver/_multiple_choice.py +9 -23
  91. inspect_ai/solver/_plan.py +2 -2
  92. inspect_ai/solver/_task_state.py +7 -3
  93. inspect_ai/solver/_transcript.py +6 -7
  94. inspect_ai/tool/_mcp/_context.py +3 -5
  95. inspect_ai/tool/_mcp/_mcp.py +6 -5
  96. inspect_ai/tool/_mcp/server.py +1 -1
  97. inspect_ai/tool/_tools/_execute.py +4 -1
  98. inspect_ai/tool/_tools/_think.py +1 -1
  99. inspect_ai/tool/_tools/_web_search/__init__.py +3 -0
  100. inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} +56 -103
  101. inspect_ai/tool/_tools/_web_search/_tavily.py +77 -0
  102. inspect_ai/tool/_tools/_web_search/_web_search.py +85 -0
  103. inspect_ai/util/__init__.py +4 -0
  104. inspect_ai/util/_anyio.py +11 -0
  105. inspect_ai/util/_collect.py +50 -0
  106. inspect_ai/util/_sandbox/events.py +3 -2
  107. inspect_ai/util/_span.py +58 -0
  108. inspect_ai/util/_subtask.py +27 -42
  109. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/METADATA +8 -1
  110. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/RECORD +114 -82
  111. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/WHEEL +1 -1
  112. inspect_ai/_display/core/group.py +0 -79
  113. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/entry_points.txt +0 -0
  114. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/licenses/LICENSE +0 -0
  115. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,12 @@
1
1
  // This is a special name that signals a group of sandbox events.
2
2
 
3
- import { Events, StepEvent } from "../../../../@types/log";
3
+ import {
4
+ Events,
5
+ SpanBeginEvent,
6
+ SpanEndEvent,
7
+ StepEvent,
8
+ } from "../../../../@types/log";
9
+ import { hasSpans } from "./utils";
4
10
 
5
11
  // It will be caught elsewhere and rendered with a pretty name
6
12
  export const kSandboxSignalName = "53787D8A-D3FC-426D-B383-9F880B70E4AA";
@@ -54,39 +60,55 @@ const processPendingEvents = (events: Events, filter: boolean): Events => {
54
60
  };
55
61
 
56
62
  const collapseSampleInit = (events: Events): Events => {
57
- // See if the events have an init step
63
+ // Don't performance sample init logic if spans are present
64
+ const hasSpans = events.some((e) => {
65
+ return e.event === "span_begin" || e.event === "span_end";
66
+ });
67
+ if (hasSpans) {
68
+ return events;
69
+ }
70
+
71
+ // Don't synthesize a sample init step if one already exists
58
72
  const hasInitStep =
59
73
  events.findIndex((e) => {
60
74
  return e.event === "step" && e.name === "init";
61
75
  }) !== -1;
76
+ if (hasInitStep) {
77
+ return events;
78
+ }
62
79
 
80
+ // Find a sample init event
63
81
  const initEventIndex = events.findIndex((e) => {
64
82
  return e.event === "sample_init";
65
83
  });
66
84
  const initEvent = events[initEventIndex];
85
+ if (!initEvent) {
86
+ return events;
87
+ }
67
88
 
89
+ // Splice in sample init step if needed
68
90
  const fixedUp = [...events];
69
- if (!hasInitStep && initEvent) {
70
- fixedUp.splice(initEventIndex, 0, {
71
- timestamp: initEvent.timestamp,
72
- event: "step",
73
- action: "begin",
74
- type: null,
75
- name: "sample_init",
76
- pending: false,
77
- working_start: 0,
78
- });
79
-
80
- fixedUp.splice(initEventIndex + 2, 0, {
81
- timestamp: initEvent.timestamp,
82
- event: "step",
83
- action: "end",
84
- type: null,
85
- name: "sample_init",
86
- pending: false,
87
- working_start: 0,
88
- });
89
- }
91
+ fixedUp.splice(initEventIndex, 0, {
92
+ timestamp: initEvent.timestamp,
93
+ event: "step",
94
+ action: "begin",
95
+ type: null,
96
+ name: "sample_init",
97
+ pending: false,
98
+ working_start: 0,
99
+ span_id: initEvent.span_id,
100
+ });
101
+
102
+ fixedUp.splice(initEventIndex + 2, 0, {
103
+ timestamp: initEvent.timestamp,
104
+ event: "step",
105
+ action: "end",
106
+ type: null,
107
+ name: "sample_init",
108
+ pending: false,
109
+ working_start: 0,
110
+ span_id: initEvent.span_id,
111
+ });
90
112
  return fixedUp;
91
113
  };
92
114
 
@@ -94,12 +116,22 @@ const groupSandboxEvents = (events: Events): Events => {
94
116
  const result: Events = [];
95
117
  const pendingSandboxEvents: Events = [];
96
118
 
119
+ const useSpans = hasSpans(events);
120
+
97
121
  const pushPendingSandboxEvents = () => {
98
122
  const timestamp =
99
123
  pendingSandboxEvents[pendingSandboxEvents.length - 1].timestamp;
100
- result.push(createStepEvent(kSandboxSignalName, timestamp, "begin"));
124
+ if (useSpans) {
125
+ result.push(createSpanBegin(kSandboxSignalName, timestamp, null));
126
+ } else {
127
+ result.push(createStepEvent(kSandboxSignalName, timestamp, "begin"));
128
+ }
101
129
  result.push(...pendingSandboxEvents);
102
- result.push(createStepEvent(kSandboxSignalName, timestamp, "end"));
130
+ if (useSpans) {
131
+ result.push(createSpanEnd(kSandboxSignalName, timestamp));
132
+ } else {
133
+ result.push(createStepEvent(kSandboxSignalName, timestamp, "end"));
134
+ }
103
135
  pendingSandboxEvents.length = 0;
104
136
  };
105
137
 
@@ -139,4 +171,34 @@ const createStepEvent = (
139
171
  name,
140
172
  pending: false,
141
173
  working_start: 0,
174
+ span_id: null,
142
175
  });
176
+
177
+ const createSpanBegin = (
178
+ name: string,
179
+ timestamp: string,
180
+ parent_id: string | null,
181
+ ): SpanBeginEvent => {
182
+ return {
183
+ name,
184
+ id: `${name}-begin`,
185
+ span_id: name,
186
+ parent_id,
187
+ timestamp,
188
+ event: "span_begin",
189
+ type: null,
190
+ pending: false,
191
+ working_start: 0,
192
+ };
193
+ };
194
+
195
+ const createSpanEnd = (name: string, timestamp: string): SpanEndEvent => {
196
+ return {
197
+ id: `${name}-end`,
198
+ timestamp,
199
+ event: "span_end",
200
+ pending: false,
201
+ working_start: 0,
202
+ span_id: name,
203
+ };
204
+ };
@@ -1,14 +1,28 @@
1
1
  import { Events } from "../../../../@types/log";
2
2
  import { EventNode, EventType } from "../types";
3
+ import {
4
+ ACTION_BEGIN,
5
+ ET_SPAN_BEGIN,
6
+ ET_SPAN_END,
7
+ ET_STEP,
8
+ hasSpans,
9
+ } from "./utils";
10
+
11
+ type TreeifyFunction = (
12
+ event: EventType,
13
+ addNode: (event: EventType) => EventNode,
14
+ pushStack: (node: EventNode) => void,
15
+ popStack: () => void,
16
+ ) => void;
3
17
 
4
- /**
5
- * Gathers events into a hierarchy of EventNodes.
6
- */
7
18
  export function treeifyEvents(events: Events, depth: number): EventNode[] {
19
+ const useSpans = hasSpans(events);
20
+ const treeFn = useSpans ? treeifyFnSpan : treeifyFnStep;
21
+
8
22
  const rootNodes: EventNode[] = [];
9
23
  const stack: EventNode[] = [];
10
24
 
11
- const pushNode = (event: EventType): EventNode => {
25
+ const addNode = (event: EventType): EventNode => {
12
26
  const node = new EventNode(event, stack.length + depth);
13
27
  if (stack.length > 0) {
14
28
  const parentNode = stack[stack.length - 1];
@@ -19,21 +33,219 @@ export function treeifyEvents(events: Events, depth: number): EventNode[] {
19
33
  return node;
20
34
  };
21
35
 
36
+ const pushStack = (node: EventNode): void => {
37
+ stack.push(node);
38
+ };
39
+
40
+ const popStack = (): void => {
41
+ if (stack.length > 0) {
42
+ stack.pop();
43
+ }
44
+ };
45
+
22
46
  events.forEach((event) => {
23
- if (event.event === "step" && event.action === "begin") {
24
- // Starting a new step
25
- const node = pushNode(event);
26
- stack.push(node);
27
- } else if (event.event === "step" && event.action === "end") {
28
- // An ending step
29
- if (stack.length > 0) {
30
- stack.pop();
47
+ treeFn(event, addNode, pushStack, popStack);
48
+ });
49
+
50
+ if (useSpans) {
51
+ return transformTree(rootNodes);
52
+ } else {
53
+ return rootNodes;
54
+ }
55
+ }
56
+
57
+ const treeifyFnStep: TreeifyFunction = (
58
+ event: EventType,
59
+ addNode: (event: EventType) => EventNode,
60
+ pushStack: (node: EventNode) => void,
61
+ popStack: () => void,
62
+ ): void => {
63
+ switch (event.event) {
64
+ case ET_STEP:
65
+ if (event.action === ACTION_BEGIN) {
66
+ // Starting a new step
67
+ const node = addNode(event);
68
+ pushStack(node);
69
+ } else {
70
+ // An ending step
71
+ popStack();
31
72
  }
32
- } else {
73
+ break;
74
+ case ET_SPAN_BEGIN: {
75
+ // These shoudn't be here, but throw away
76
+ break;
77
+ }
78
+ case ET_SPAN_END: {
79
+ // These shoudn't be here, but throw away
80
+ break;
81
+ }
82
+ default:
33
83
  // An event
34
- pushNode(event);
84
+ addNode(event);
85
+ break;
86
+ }
87
+ };
88
+
89
+ const treeifyFnSpan: TreeifyFunction = (
90
+ event: EventType,
91
+ addNode: (event: EventType) => EventNode,
92
+ pushStack: (node: EventNode) => void,
93
+ popStack: () => void,
94
+ ): void => {
95
+ switch (event.event) {
96
+ case ET_STEP:
97
+ // strip steps
98
+ break;
99
+ case ET_SPAN_BEGIN: {
100
+ const node = addNode(event);
101
+ pushStack(node);
102
+ break;
35
103
  }
36
- });
104
+ case ET_SPAN_END: {
105
+ popStack();
106
+ break;
107
+ }
108
+ default:
109
+ // An event
110
+ addNode(event);
111
+ break;
112
+ }
113
+ };
37
114
 
38
- return rootNodes;
39
- }
115
+ type TreeNodeTransformer = {
116
+ name: string;
117
+ matches: (node: EventNode) => boolean;
118
+ process: (node: EventNode) => EventNode;
119
+ };
120
+
121
+ const treeNodeTransformers: TreeNodeTransformer[] = [
122
+ {
123
+ name: "unwrap_tools",
124
+ matches: (node) =>
125
+ node.event.event === "span_begin" && node.event.type === "tool",
126
+ process: (node) => elevateChildNode(node, "tool") || node,
127
+ },
128
+ {
129
+ name: "unwrap_subtasks",
130
+ matches: (node) =>
131
+ node.event.event === "span_begin" && node.event.type === "subtask",
132
+ process: (node) => elevateChildNode(node, "subtask") || node,
133
+ },
134
+ {
135
+ name: "unwrap_agent_solver",
136
+ matches: (node) =>
137
+ node.event.event === "span_begin" &&
138
+ node.event["type"] === "solver" &&
139
+ node.children.length === 2 &&
140
+ node.children[0].event.event === "span_begin" &&
141
+ node.children[0].event.type === "agent" &&
142
+ node.children[1].event.event === "state",
143
+
144
+ process: (node) => skipFirstChildNode(node),
145
+ },
146
+ {
147
+ name: "unwrap_agent_solver w/store",
148
+ matches: (node) =>
149
+ node.event.event === "span_begin" &&
150
+ node.event["type"] === "solver" &&
151
+ node.children.length === 3 &&
152
+ node.children[0].event.event === "span_begin" &&
153
+ node.children[0].event.type === "agent" &&
154
+ node.children[1].event.event === "state" &&
155
+ node.children[2].event.event === "store",
156
+ process: (node) => skipFirstChildNode(node),
157
+ },
158
+ {
159
+ name: "unwrap_handoff",
160
+ matches: (node) =>
161
+ node.event.event === "span_begin" &&
162
+ node.event["type"] === "handoff" &&
163
+ node.children.length === 2 &&
164
+ node.children[0].event.event === "tool" &&
165
+ node.children[1].event.event === "store" &&
166
+ node.children[0].children.length === 2 &&
167
+ node.children[0].children[0].event.event === "span_begin" &&
168
+ node.children[0].children[0].event.type === "agent",
169
+ process: (node) => skipThisNode(node),
170
+ },
171
+ ];
172
+
173
+ const transformTree = (roots: EventNode[]): EventNode[] => {
174
+ const visitNode = (node: EventNode): EventNode => {
175
+ let processedNode = node;
176
+
177
+ // Visit children (depth first)
178
+ processedNode.children = processedNode.children.map(visitNode);
179
+
180
+ // Apply any visitors to this node
181
+ for (const transformer of treeNodeTransformers) {
182
+ if (transformer.matches(processedNode)) {
183
+ processedNode = transformer.process(processedNode);
184
+ // Only apply the first matching transformer
185
+ break;
186
+ }
187
+ }
188
+ return processedNode;
189
+ };
190
+
191
+ return roots.map(visitNode);
192
+ };
193
+
194
+ /**
195
+ * Process a span node by elevating a specific child node type and moving its siblings as children
196
+ * @template T - Type of the event (either ToolEvent or SubtaskEvent)
197
+ */
198
+ const elevateChildNode = (
199
+ node: EventNode,
200
+ childEventType: "tool" | "subtask",
201
+ ): EventNode | null => {
202
+ // Find the specific event child
203
+ const targetIndex = node.children.findIndex(
204
+ (child) => child.event.event === childEventType,
205
+ );
206
+
207
+ if (targetIndex === -1) {
208
+ console.log(
209
+ `No ${childEventType} event found in a span, this is very unexpected.`,
210
+ );
211
+ return null;
212
+ }
213
+
214
+ // Get the target node and set its depth
215
+ const targetNode = { ...node.children[targetIndex] };
216
+ const remainingChildren = node.children.filter((_, i) => i !== targetIndex);
217
+
218
+ // Process the remaining children
219
+ targetNode.depth = node.depth;
220
+ targetNode.children = reduceDepth(remainingChildren);
221
+
222
+ // No need to update the event itself (events have been deprecated
223
+ // and more importantly we drive children / transcripts using the tree structure itself
224
+ // and notes rather than the event.events itself)
225
+ return targetNode;
226
+ };
227
+
228
+ const skipFirstChildNode = (node: EventNode): EventNode => {
229
+ const agentSpan = node.children.splice(0, 1)[0];
230
+ node.children.unshift(...reduceDepth(agentSpan.children));
231
+ return node;
232
+ };
233
+
234
+ const skipThisNode = (node: EventNode): EventNode => {
235
+ const newNode = { ...node.children[0] };
236
+ newNode.depth = node.depth;
237
+ newNode.children = reduceDepth(newNode.children[0].children, 2);
238
+ return newNode;
239
+ };
240
+
241
+ // Reduce the depth of the children by 1
242
+ // This is used when we hoist a child node to the parent
243
+ const reduceDepth = (nodes: EventNode[], depth: number = 1): EventNode[] => {
244
+ return nodes.map((node) => {
245
+ if (node.children.length > 0) {
246
+ node.children = reduceDepth(node.children, 1);
247
+ }
248
+ node.depth = node.depth - depth;
249
+ return node;
250
+ });
251
+ };
@@ -0,0 +1,11 @@
1
+ import { Events } from "../../../../@types/log";
2
+
3
+ export const ET_STEP = "step";
4
+ export const ACTION_BEGIN = "begin";
5
+
6
+ export const ET_SPAN_BEGIN = "span_begin";
7
+ export const ET_SPAN_END = "span_end";
8
+
9
+ export const hasSpans = (events: Events): boolean => {
10
+ return events.some((event) => event.event === ET_SPAN_BEGIN);
11
+ };
@@ -10,6 +10,8 @@ import {
10
10
  SampleLimitEvent,
11
11
  SandboxEvent,
12
12
  ScoreEvent,
13
+ SpanBeginEvent,
14
+ SpanEndEvent,
13
15
  StateEvent,
14
16
  StepEvent,
15
17
  StoreEvent,
@@ -39,7 +41,9 @@ export type EventType =
39
41
  | InputEvent
40
42
  | ErrorEvent
41
43
  | ApprovalEvent
42
- | SandboxEvent;
44
+ | SandboxEvent
45
+ | SpanBeginEvent
46
+ | SpanEndEvent;
43
47
 
44
48
  export class EventNode {
45
49
  event: EventType;
@@ -67,7 +67,9 @@ export interface LogState {
67
67
  selectedLogSummary?: EvalSummary;
68
68
  pendingSampleSummaries?: PendingSamples;
69
69
 
70
- filter: ScoreFilter;
70
+ filter: string;
71
+ filterError?: FilterError;
72
+
71
73
  epoch: string;
72
74
  sort: string;
73
75
  score?: ScoreLabel;
@@ -122,8 +124,16 @@ export interface ScoreLabel {
122
124
  scorer: string;
123
125
  }
124
126
 
125
- export interface ScoreFilter {
127
+ export interface SampleFilter {
126
128
  value?: string;
129
+ error?: FilterError;
130
+ }
131
+
132
+ export interface FilterError {
133
+ from: number;
134
+ to: number;
135
+ message: string;
136
+ severity: "warning" | "error";
127
137
  }
128
138
 
129
139
  export type SampleMode = "none" | "single" | "many";
@@ -18,7 +18,7 @@
18
18
  display: flex;
19
19
  margin-top: 0;
20
20
  position: relative;
21
- height: 8px;
21
+ height: 18px;
22
22
  }
23
23
 
24
24
  .moreToggle.bordered {
@@ -27,19 +27,19 @@ export const ExpandablePanel: FC<ExpandablePanelProps> = memo(
27
27
  const [collapsed, setCollapsed] = useCollapsedState(id, collapse);
28
28
 
29
29
  const [showToggle, setShowToggle] = useState(false);
30
- const lineHeightRef = useRef<number>(0);
30
+ const baseFontSizeRef = useRef<number>(0);
31
31
 
32
32
  const checkOverflow = useCallback(
33
33
  (entry: ResizeObserverEntry) => {
34
34
  const element = entry.target as HTMLDivElement;
35
35
 
36
36
  // Calculate line height if we haven't yet
37
- if (!lineHeightRef.current) {
37
+ if (baseFontSizeRef.current === 0) {
38
38
  const computedStyle = window.getComputedStyle(element);
39
- lineHeightRef.current = parseInt(computedStyle.lineHeight) || 16; // fallback to 16px if can't get line height
39
+ const rootFontSize = parseFloat(computedStyle.fontSize);
40
+ baseFontSizeRef.current = rootFontSize;
40
41
  }
41
-
42
- const maxCollapsedHeight = lines * lineHeightRef.current;
42
+ const maxCollapsedHeight = baseFontSizeRef.current * lines;
43
43
  const contentHeight = element.scrollHeight;
44
44
 
45
45
  setShowToggle(contentHeight > maxCollapsedHeight);
@@ -132,6 +132,11 @@ export const useFilteredSamples = () => {
132
132
  const evalDescriptor = useEvalDescriptor();
133
133
  const sampleSummaries = useSampleSummaries();
134
134
  const filter = useStore((state) => state.log.filter);
135
+ const setFilterError = useStore((state) => state.logActions.setFilterError);
136
+ const clearFilterError = useStore(
137
+ (state) => state.logActions.clearFilterError,
138
+ );
139
+
135
140
  const epoch = useStore((state) => state.log.epoch);
136
141
  const sort = useStore((state) => state.log.sort);
137
142
  const samplesDescriptor = useSampleDescriptor();
@@ -139,10 +144,19 @@ export const useFilteredSamples = () => {
139
144
 
140
145
  return useMemo(() => {
141
146
  // Apply filters
147
+ const { result, error, allErrors } =
148
+ evalDescriptor && filter
149
+ ? filterSamples(evalDescriptor, sampleSummaries, filter)
150
+ : { result: sampleSummaries, error: undefined, allErrors: false };
151
+
152
+ if (error && allErrors) {
153
+ setFilterError(error);
154
+ } else {
155
+ clearFilterError();
156
+ }
157
+
142
158
  const prefiltered =
143
- evalDescriptor && filter.value
144
- ? filterSamples(evalDescriptor, sampleSummaries, filter.value).result
145
- : sampleSummaries;
159
+ error === undefined || !allErrors ? result : sampleSummaries;
146
160
 
147
161
  // Filter epochs
148
162
  const filtered =
@@ -160,6 +174,8 @@ export const useFilteredSamples = () => {
160
174
  evalDescriptor,
161
175
  sampleSummaries,
162
176
  filter,
177
+ setFilterError,
178
+ clearFilterError,
163
179
  epoch,
164
180
  sort,
165
181
  samplesDescriptor,
@@ -1,4 +1,4 @@
1
- import { LogState, ScoreFilter, ScoreLabel } from "../app/types";
1
+ import { FilterError, LogState, ScoreLabel } from "../app/types";
2
2
  import { EvalSummary, PendingSamples } from "../client/api/types";
3
3
  import { kDefaultSort, kLogViewInfoTabId } from "../constants";
4
4
  import { createLogger } from "../utils/logger";
@@ -23,7 +23,13 @@ export interface LogSlice {
23
23
  setPendingSampleSummaries: (samples: PendingSamples) => void;
24
24
 
25
25
  // Set filter criteria
26
- setFilter: (filter: ScoreFilter) => void;
26
+ setFilter: (filter: string) => void;
27
+
28
+ // Set the filter error
29
+ setFilterError: (error: FilterError) => void;
30
+
31
+ // Clear the filter error
32
+ clearFilterError: () => void;
27
33
 
28
34
  // Set epoch filter
29
35
  setEpoch: (epoch: string) => void;
@@ -60,7 +66,9 @@ const initialState = {
60
66
  loadedLog: undefined,
61
67
 
62
68
  // Filter state
63
- filter: {},
69
+ filter: "",
70
+ filterError: undefined,
71
+
64
72
  epoch: "all",
65
73
  sort: kDefaultSort,
66
74
  score: undefined,
@@ -110,10 +118,19 @@ export const createLogSlice = (
110
118
  state.log.pendingSampleSummaries = pendingSampleSummaries;
111
119
  }),
112
120
 
113
- setFilter: (filter: ScoreFilter) =>
121
+ setFilter: (filter: string) =>
114
122
  set((state) => {
115
123
  state.log.filter = filter;
116
124
  }),
125
+ setFilterError: (error: FilterError) =>
126
+ set((state) => {
127
+ state.log.filterError = error;
128
+ }),
129
+ clearFilterError: () => {
130
+ set((state) => {
131
+ state.log.filterError = undefined;
132
+ });
133
+ },
117
134
  setEpoch: (epoch: string) =>
118
135
  set((state) => {
119
136
  state.log.epoch = epoch;
@@ -132,7 +149,8 @@ export const createLogSlice = (
132
149
  }),
133
150
  resetFiltering: () =>
134
151
  set((state) => {
135
- state.log.filter = {};
152
+ state.log.filter = "";
153
+ state.log.filterError = undefined;
136
154
  state.log.epoch = "all";
137
155
  state.log.sort = kDefaultSort;
138
156
  state.log.score = undefined;
@@ -4254,17 +4254,17 @@ react-refresh@^0.17.0:
4254
4254
  resolved "https://registry.yarnpkg.com/react-refresh/-/react-refresh-0.17.0.tgz#b7e579c3657f23d04eccbe4ad2e58a8ed51e7e53"
4255
4255
  integrity sha512-z6F7K9bV85EfseRCp2bzrpyQ0Gkw1uLoCel9XBVWPg/TjRj94SkJzUTGfOa4bs7iJvBWtQG0Wq7wnI0syw3EBQ==
4256
4256
 
4257
- react-router-dom@^7.5.0:
4258
- version "7.5.1"
4259
- resolved "https://registry.yarnpkg.com/react-router-dom/-/react-router-dom-7.5.1.tgz#16ffa63006dfdbab53cf243be84c29535e7dc4e0"
4260
- integrity sha512-5DPSPc7ENrt2tlKPq0FtpG80ZbqA9aIKEyqX6hSNJDlol/tr6iqCK4crqdsusmOSSotq6zDsn0y3urX9TuTNmA==
4257
+ react-router-dom@^7.5.3:
4258
+ version "7.5.3"
4259
+ resolved "https://registry.yarnpkg.com/react-router-dom/-/react-router-dom-7.5.3.tgz#496e9f6d90f731703c7772668b41747028e0a2d5"
4260
+ integrity sha512-cK0jSaTyW4jV9SRKAItMIQfWZ/D6WEZafgHuuCb9g+SjhLolY78qc+De4w/Cz9ybjvLzShAmaIMEXt8iF1Cm+A==
4261
4261
  dependencies:
4262
- react-router "7.5.1"
4262
+ react-router "7.5.3"
4263
4263
 
4264
- react-router@7.5.1:
4265
- version "7.5.1"
4266
- resolved "https://registry.yarnpkg.com/react-router/-/react-router-7.5.1.tgz#e0bae54e913d139e727e7a88d165174a2cdbeb27"
4267
- integrity sha512-/jjU3fcYNd2bwz9Q0xt5TwyiyoO8XjSEFXJY4O/lMAlkGTHWuHRAbR9Etik+lSDqMC7A7mz3UlXzgYT6Vl58sA==
4264
+ react-router@7.5.3:
4265
+ version "7.5.3"
4266
+ resolved "https://registry.yarnpkg.com/react-router/-/react-router-7.5.3.tgz#9e5420832af8c3690740c1797d4fa54613fea06d"
4267
+ integrity sha512-3iUDM4/fZCQ89SXlDa+Ph3MevBrozBAI655OAfWQlTm9nBR0IKlrmNwFow5lPHttbwvITZfkeeeZFP6zt3F7pw==
4268
4268
  dependencies:
4269
4269
  cookie "^1.0.1"
4270
4270
  set-cookie-parser "^2.6.0"
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  from typing import TYPE_CHECKING, Any
4
4
 
5
5
  from inspect_ai.util._limit import Limit, apply_limits
6
+ from inspect_ai.util._span import span
6
7
 
7
8
  if TYPE_CHECKING:
8
9
  from inspect_ai.solver._solver import Solver
@@ -61,7 +62,8 @@ def as_solver(agent: Agent, limits: list[Limit] = [], **agent_kwargs: Any) -> So
61
62
  try:
62
63
  # run the agent with limits
63
64
  with apply_limits(limits):
64
- agent_state = await agent(agent_state, **agent_kwargs)
65
+ async with span(name=agent_name, type="agent"):
66
+ agent_state = await agent(agent_state, **agent_kwargs)
65
67
  # if an exception occurs, we still want to update the TaskState with the
66
68
  # AgentState's messages + output so that it appears in the log and is scored
67
69
  finally: