inspect-ai 0.3.99__py3-none-any.whl → 0.3.101__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. inspect_ai/_cli/eval.py +2 -1
  2. inspect_ai/_display/core/config.py +11 -5
  3. inspect_ai/_display/core/panel.py +66 -2
  4. inspect_ai/_display/core/textual.py +5 -2
  5. inspect_ai/_display/plain/display.py +1 -0
  6. inspect_ai/_display/rich/display.py +2 -2
  7. inspect_ai/_display/textual/widgets/transcript.py +37 -9
  8. inspect_ai/_eval/eval.py +13 -1
  9. inspect_ai/_eval/evalset.py +3 -2
  10. inspect_ai/_eval/run.py +2 -0
  11. inspect_ai/_eval/score.py +2 -4
  12. inspect_ai/_eval/task/log.py +3 -1
  13. inspect_ai/_eval/task/run.py +59 -81
  14. inspect_ai/_util/content.py +11 -6
  15. inspect_ai/_util/interrupt.py +2 -2
  16. inspect_ai/_util/text.py +7 -0
  17. inspect_ai/_util/working.py +8 -37
  18. inspect_ai/_view/__init__.py +0 -0
  19. inspect_ai/_view/schema.py +2 -1
  20. inspect_ai/_view/www/CLAUDE.md +15 -0
  21. inspect_ai/_view/www/dist/assets/index.css +307 -171
  22. inspect_ai/_view/www/dist/assets/index.js +24733 -21641
  23. inspect_ai/_view/www/log-schema.json +77 -3
  24. inspect_ai/_view/www/package.json +9 -5
  25. inspect_ai/_view/www/src/@types/log.d.ts +9 -0
  26. inspect_ai/_view/www/src/app/App.tsx +1 -15
  27. inspect_ai/_view/www/src/app/appearance/icons.ts +4 -1
  28. inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +24 -6
  29. inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +0 -5
  30. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +220 -205
  31. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +2 -1
  32. inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +5 -0
  33. inspect_ai/_view/www/src/app/log-view/tabs/grouping.ts +4 -4
  34. inspect_ai/_view/www/src/app/routing/navigationHooks.ts +22 -25
  35. inspect_ai/_view/www/src/app/routing/url.ts +84 -4
  36. inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +0 -5
  37. inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +1 -1
  38. inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +7 -0
  39. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +24 -17
  40. inspect_ai/_view/www/src/app/samples/SampleSummaryView.module.css +1 -2
  41. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +8 -6
  42. inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +0 -4
  43. inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +3 -2
  44. inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +2 -0
  45. inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +2 -0
  46. inspect_ai/_view/www/src/app/samples/chat/messages.ts +1 -0
  47. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -0
  48. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +17 -5
  49. inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +1 -1
  50. inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +1 -2
  51. inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +1 -1
  52. inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +1 -2
  53. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.module.css +1 -1
  54. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
  55. inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +1 -1
  56. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +3 -2
  57. inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +4 -5
  58. inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +1 -1
  59. inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +1 -2
  60. inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +1 -3
  61. inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +1 -2
  62. inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +3 -4
  63. inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.module.css +42 -0
  64. inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.tsx +77 -0
  65. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +27 -71
  66. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +13 -3
  67. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +27 -2
  68. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +1 -0
  69. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +21 -22
  70. inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.module.css +45 -0
  71. inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +223 -0
  72. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.module.css +10 -0
  73. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +258 -0
  74. inspect_ai/_view/www/src/app/samples/transcript/outline/tree-visitors.ts +187 -0
  75. inspect_ai/_view/www/src/app/samples/transcript/state/StateEventRenderers.tsx +8 -1
  76. inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +3 -4
  77. inspect_ai/_view/www/src/app/samples/transcript/transform/hooks.ts +78 -0
  78. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +340 -135
  79. inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +3 -0
  80. inspect_ai/_view/www/src/app/samples/transcript/types.ts +2 -0
  81. inspect_ai/_view/www/src/app/types.ts +5 -1
  82. inspect_ai/_view/www/src/client/api/api-browser.ts +2 -2
  83. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +6 -1
  84. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +1 -1
  85. inspect_ai/_view/www/src/components/PopOver.tsx +422 -0
  86. inspect_ai/_view/www/src/components/PulsingDots.module.css +9 -9
  87. inspect_ai/_view/www/src/components/PulsingDots.tsx +4 -1
  88. inspect_ai/_view/www/src/components/StickyScroll.tsx +183 -0
  89. inspect_ai/_view/www/src/components/TabSet.tsx +4 -0
  90. inspect_ai/_view/www/src/state/hooks.ts +52 -2
  91. inspect_ai/_view/www/src/state/logSlice.ts +4 -3
  92. inspect_ai/_view/www/src/state/samplePolling.ts +8 -0
  93. inspect_ai/_view/www/src/state/sampleSlice.ts +53 -9
  94. inspect_ai/_view/www/src/state/scrolling.ts +152 -0
  95. inspect_ai/_view/www/src/utils/attachments.ts +7 -0
  96. inspect_ai/_view/www/src/utils/python.ts +18 -0
  97. inspect_ai/_view/www/yarn.lock +290 -33
  98. inspect_ai/agent/_react.py +12 -7
  99. inspect_ai/agent/_run.py +2 -3
  100. inspect_ai/analysis/beta/__init__.py +2 -0
  101. inspect_ai/analysis/beta/_dataframe/samples/table.py +19 -18
  102. inspect_ai/dataset/_sources/csv.py +2 -6
  103. inspect_ai/dataset/_sources/hf.py +2 -6
  104. inspect_ai/dataset/_sources/json.py +2 -6
  105. inspect_ai/dataset/_util.py +23 -0
  106. inspect_ai/log/_log.py +1 -1
  107. inspect_ai/log/_recorders/eval.py +4 -3
  108. inspect_ai/log/_recorders/file.py +2 -9
  109. inspect_ai/log/_recorders/json.py +1 -0
  110. inspect_ai/log/_recorders/recorder.py +1 -0
  111. inspect_ai/log/_transcript.py +1 -1
  112. inspect_ai/model/_call_tools.py +6 -2
  113. inspect_ai/model/_openai.py +1 -1
  114. inspect_ai/model/_openai_responses.py +85 -41
  115. inspect_ai/model/_openai_web_search.py +38 -0
  116. inspect_ai/model/_providers/azureai.py +72 -3
  117. inspect_ai/model/_providers/openai.py +4 -1
  118. inspect_ai/model/_providers/openai_responses.py +5 -1
  119. inspect_ai/scorer/_metric.py +1 -2
  120. inspect_ai/scorer/_reducer/reducer.py +1 -1
  121. inspect_ai/solver/_task_state.py +2 -2
  122. inspect_ai/tool/_tool.py +6 -2
  123. inspect_ai/tool/_tool_def.py +27 -4
  124. inspect_ai/tool/_tool_info.py +2 -0
  125. inspect_ai/tool/_tools/_web_search/_google.py +43 -15
  126. inspect_ai/tool/_tools/_web_search/_tavily.py +46 -13
  127. inspect_ai/tool/_tools/_web_search/_web_search.py +214 -45
  128. inspect_ai/util/__init__.py +4 -0
  129. inspect_ai/util/_json.py +3 -0
  130. inspect_ai/util/_limit.py +230 -20
  131. inspect_ai/util/_sandbox/docker/compose.py +20 -11
  132. inspect_ai/util/_span.py +1 -1
  133. {inspect_ai-0.3.99.dist-info → inspect_ai-0.3.101.dist-info}/METADATA +3 -3
  134. {inspect_ai-0.3.99.dist-info → inspect_ai-0.3.101.dist-info}/RECORD +138 -124
  135. {inspect_ai-0.3.99.dist-info → inspect_ai-0.3.101.dist-info}/WHEEL +1 -1
  136. {inspect_ai-0.3.99.dist-info → inspect_ai-0.3.101.dist-info}/entry_points.txt +0 -0
  137. {inspect_ai-0.3.99.dist-info → inspect_ai-0.3.101.dist-info}/licenses/LICENSE +0 -0
  138. {inspect_ai-0.3.99.dist-info → inspect_ai-0.3.101.dist-info}/top_level.txt +0 -0
@@ -18,7 +18,7 @@ import {
18
18
 
19
19
  import { FC, useEffect, useMemo } from "react";
20
20
  import { useStore } from "../../../../state/store";
21
- import { EventNode } from "../types";
21
+ import { EventNode, kTranscriptCollapseScope } from "../types";
22
22
  import styles from "./StateEventView.module.css";
23
23
 
24
24
  interface StateEventViewProps {
@@ -35,7 +35,6 @@ export const StateEventView: FC<StateEventViewProps> = ({
35
35
  className,
36
36
  }) => {
37
37
  const event = eventNode.event;
38
- const id = eventNode.id;
39
38
 
40
39
  const summary = useMemo(() => {
41
40
  return summarizeChanges(event.changes);
@@ -67,13 +66,13 @@ export const StateEventView: FC<StateEventViewProps> = ({
67
66
  const collapseEvent = useStore((state) => state.sampleActions.collapseEvent);
68
67
  useEffect(() => {
69
68
  if (changePreview === undefined) {
70
- collapseEvent(id, true);
69
+ collapseEvent(kTranscriptCollapseScope, eventNode.id, true);
71
70
  }
72
71
  }, [changePreview, collapseEvent]);
73
72
 
74
73
  return (
75
74
  <EventPanel
76
- id={id}
75
+ eventNodeId={eventNode.id}
77
76
  depth={eventNode.depth}
78
77
  title={title}
79
78
  className={className}
@@ -0,0 +1,78 @@
1
+ import { useMemo } from "react";
2
+ import {
3
+ Events,
4
+ SpanBeginEvent,
5
+ StepEvent,
6
+ SubtaskEvent,
7
+ ToolEvent,
8
+ } from "../../../../@types/log";
9
+ import { EventNode } from "../types";
10
+ import { fixupEventStream, kSandboxSignalName } from "./fixups";
11
+ import { treeifyEvents } from "./treeify";
12
+
13
+ export const useEventNodes = (events: Events, running: boolean) => {
14
+ // Normalize Events in a flattened filtered list
15
+ const { eventTree, defaultCollapsedIds } = useMemo((): {
16
+ eventTree: EventNode[];
17
+ defaultCollapsedIds: Record<string, true>;
18
+ } => {
19
+ // Apply fixups to the event string
20
+ const resolvedEvents = fixupEventStream(events, !running);
21
+
22
+ // Build the event tree
23
+ const eventTree = treeifyEvents(resolvedEvents, 0);
24
+
25
+ // Apply collapse filters to the event tree
26
+ const defaultCollapsedIds: Record<string, true> = {};
27
+ const findCollapsibleEvents = (nodes: EventNode[]) => {
28
+ for (const node of nodes) {
29
+ if (
30
+ (node.event.event === "step" ||
31
+ node.event.event === "span_begin" ||
32
+ node.event.event === "tool" ||
33
+ node.event.event === "subtask") &&
34
+ collapseFilters.some((filter) =>
35
+ filter(
36
+ node.event as
37
+ | StepEvent
38
+ | SpanBeginEvent
39
+ | ToolEvent
40
+ | SubtaskEvent,
41
+ ),
42
+ )
43
+ ) {
44
+ defaultCollapsedIds[node.id] = true;
45
+ }
46
+
47
+ // Recursively check children
48
+ findCollapsibleEvents(node.children);
49
+ }
50
+ };
51
+ findCollapsibleEvents(eventTree);
52
+
53
+ return { eventTree, defaultCollapsedIds };
54
+ }, [events, running]);
55
+
56
+ return { eventNodes: eventTree, defaultCollapsedIds };
57
+ };
58
+
59
+ const collapseFilters: Array<
60
+ (event: StepEvent | SpanBeginEvent | ToolEvent | SubtaskEvent) => boolean
61
+ > = [
62
+ (event: StepEvent | SpanBeginEvent | ToolEvent | SubtaskEvent) =>
63
+ event.type === "solver" && event.name === "system_message",
64
+ (event: StepEvent | SpanBeginEvent | ToolEvent | SubtaskEvent) => {
65
+ if (event.event === "step" || event.event === "span_begin") {
66
+ return (
67
+ event.name === kSandboxSignalName ||
68
+ event.name === "init" ||
69
+ event.name === "sample_init"
70
+ );
71
+ }
72
+ return false;
73
+ },
74
+ (event: StepEvent | SpanBeginEvent | ToolEvent | SubtaskEvent) =>
75
+ event.event === "tool" && !event.agent && !event.failed,
76
+ (event: StepEvent | SpanBeginEvent | ToolEvent | SubtaskEvent) =>
77
+ event.event === "subtask",
78
+ ];
@@ -1,4 +1,4 @@
1
- import { Events } from "../../../../@types/log";
1
+ import { Events, SpanBeginEvent, SpanEndEvent } from "../../../../@types/log";
2
2
  import { EventNode, EventType } from "../types";
3
3
  import {
4
4
  ACTION_BEGIN,
@@ -11,7 +11,10 @@ import {
11
11
  TOOL,
12
12
  TYPE_AGENT,
13
13
  TYPE_HANDOFF,
14
+ TYPE_SCORER,
15
+ TYPE_SCORERS,
14
16
  TYPE_SOLVER,
17
+ TYPE_SOLVERS,
15
18
  TYPE_SUBTASK,
16
19
  TYPE_TOOL,
17
20
  hasSpans,
@@ -31,6 +34,9 @@ export function treeifyEvents(events: Events, depth: number): EventNode[] {
31
34
  const rootNodes: EventNode[] = [];
32
35
  const stack: EventNode[] = [];
33
36
 
37
+ // The function used to build the tree
38
+ const treeifyFn = getTreeifyFunction();
39
+
34
40
  const addNode = (event: EventType): EventNode => {
35
41
  const currentDepth = stack.length;
36
42
 
@@ -45,7 +51,11 @@ export function treeifyEvents(events: Events, depth: number): EventNode[] {
45
51
 
46
52
  // Create a new node
47
53
  const idPath = pathIndices.slice(0, currentDepth + 1).join(".");
48
- const node = new EventNode(idPath, event, currentDepth + depth);
54
+ const node = new EventNode(
55
+ `event_node_${idPath}`,
56
+ event,
57
+ currentDepth + depth,
58
+ );
49
59
  if (stack.length > 0) {
50
60
  const parentNode = stack[stack.length - 1];
51
61
  parentNode.children.push(node);
@@ -65,6 +75,10 @@ export function treeifyEvents(events: Events, depth: number): EventNode[] {
65
75
  pathIndices.pop();
66
76
  };
67
77
 
78
+ // First inject spans that may be needed
79
+ events = injectScorersSpan(events);
80
+
81
+ // Now treeify the list
68
82
  events.forEach((event) => {
69
83
  treeifyFn(event, addNode, pushStack, popStack);
70
84
  });
@@ -76,156 +90,294 @@ export function treeifyEvents(events: Events, depth: number): EventNode[] {
76
90
  }
77
91
  }
78
92
 
79
- const treeifyFn: TreeifyFunction = (
80
- event: EventType,
81
- addNode: (event: EventType) => EventNode,
82
- pushStack: (node: EventNode) => void,
83
- popStack: () => void,
84
- ): void => {
85
- switch (event.event) {
86
- case STEP:
87
- if (event.action === ACTION_BEGIN) {
88
- // Starting a new step
89
- const node = addNode(event);
90
- pushStack(node);
93
+ // This injects a scorer span around top level scorer events if one
94
+ // isn't already present
95
+ const kBeginScorerId = "E617087FA405";
96
+ const kEndScorerId = "C39922B09481";
97
+ const kScorersSpanId = "C5A831026F2C";
98
+ const injectScorersSpan = (events: Events): Events => {
99
+ const results: Events = [];
100
+ const collectedScorerEvents: Events = [];
101
+ let hasCollectedScorers = false;
102
+ let collecting: string | null = null;
103
+
104
+ const flushCollected = (): Events => {
105
+ if (collectedScorerEvents.length > 0) {
106
+ const beginSpan: SpanBeginEvent = {
107
+ name: "scorers",
108
+ id: kBeginScorerId,
109
+ span_id: kScorersSpanId,
110
+ event: SPAN_BEGIN,
111
+ type: TYPE_SCORERS,
112
+ timestamp: collectedScorerEvents[0].timestamp,
113
+ working_start: collectedScorerEvents[0].working_start,
114
+ pending: false,
115
+ parent_id: null,
116
+ };
117
+
118
+ const scoreEvents: Events = collectedScorerEvents.map((event) => {
119
+ return {
120
+ ...event,
121
+ parent_id:
122
+ event.event === "span_begin"
123
+ ? event.parent_id || kScorersSpanId
124
+ : null,
125
+ };
126
+ });
127
+
128
+ const endSpan: SpanEndEvent = {
129
+ id: kEndScorerId,
130
+ span_id: kScorersSpanId,
131
+ event: SPAN_END,
132
+ pending: false,
133
+ working_start:
134
+ collectedScorerEvents[collectedScorerEvents.length - 1].working_start,
135
+ timestamp:
136
+ collectedScorerEvents[collectedScorerEvents.length - 1].timestamp,
137
+ };
138
+
139
+ collectedScorerEvents.length = 0;
140
+ hasCollectedScorers = true;
141
+ return [beginSpan, ...scoreEvents, endSpan];
142
+ }
143
+ return [];
144
+ };
145
+
146
+ for (const event of events) {
147
+ // Return events immediately if the scorers span is present
148
+ if (event.event === SPAN_BEGIN && event.type === TYPE_SCORERS) {
149
+ return events;
150
+ }
151
+
152
+ if (
153
+ event.event === SPAN_BEGIN &&
154
+ event.type === TYPE_SCORER &&
155
+ !hasCollectedScorers
156
+ ) {
157
+ collecting = event.span_id;
158
+ }
159
+
160
+ // Look for the first scorer event and then begin
161
+ if (collecting) {
162
+ if (event.event === SPAN_END && event.span_id === collecting) {
163
+ collecting = null;
164
+ results.push(...flushCollected());
165
+ results.push(event);
91
166
  } else {
92
- // An ending step
93
- popStack();
167
+ collectedScorerEvents.push(event);
94
168
  }
95
- break;
96
- case SPAN_BEGIN: {
97
- const node = addNode(event);
98
- pushStack(node);
99
- break;
100
- }
101
- case SPAN_END: {
102
- popStack();
103
- break;
169
+ } else {
170
+ results.push(event);
104
171
  }
105
- case TOOL:
106
- {
107
- const node = addNode(event);
172
+ }
108
173
 
109
- // In the span world, the first child will be a span of type tool
110
- if (
111
- event.events.length > 0 &&
112
- (event.events[0].event !== SPAN_BEGIN ||
113
- event.events[0].type !== TYPE_TOOL)
114
- ) {
115
- // Expand the children
174
+ return results;
175
+ };
176
+
177
+ const getTreeifyFunction = () => {
178
+ const treeifyFn: TreeifyFunction = (
179
+ event: EventType,
180
+ addNode: (event: EventType) => EventNode,
181
+ pushStack: (node: EventNode) => void,
182
+ popStack: () => void,
183
+ ): void => {
184
+ switch (event.event) {
185
+ case STEP:
186
+ if (event.action === ACTION_BEGIN) {
187
+ // Starting a new step
188
+ const node = addNode(event);
116
189
  pushStack(node);
117
- for (const child of event.events) {
118
- treeifyFn(child, addNode, pushStack, popStack);
119
- }
190
+ } else {
191
+ // An ending step
120
192
  popStack();
121
193
  }
122
- }
123
-
124
- break;
125
- case SUBTASK:
126
- {
194
+ break;
195
+ case SPAN_BEGIN: {
127
196
  const node = addNode(event);
197
+ pushStack(node);
198
+ break;
199
+ }
200
+ case SPAN_END: {
201
+ popStack();
202
+ break;
203
+ }
204
+ case TOOL:
205
+ {
206
+ const node = addNode(event);
207
+
208
+ // In the span world, the first child will be a span of type tool
209
+ if (
210
+ event.events.length > 0 &&
211
+ (event.events[0].event !== SPAN_BEGIN ||
212
+ event.events[0].type !== TYPE_TOOL)
213
+ ) {
214
+ // Expand the children
215
+ pushStack(node);
216
+ for (const child of event.events) {
217
+ treeifyFn(child, addNode, pushStack, popStack);
218
+ }
219
+ popStack();
220
+ }
221
+ }
128
222
 
129
- // In the span world, the first child will be a span of type tool
130
- if (
131
- event.events.length > 0 &&
132
- (event.events[0].event !== SPAN_BEGIN ||
133
- event.events[0].type !== TYPE_SUBTASK)
134
- ) {
135
- // Expand the children
136
- pushStack(node);
137
- for (const child of event.events) {
138
- treeifyFn(child, addNode, pushStack, popStack);
223
+ break;
224
+ case SUBTASK:
225
+ {
226
+ const node = addNode(event);
227
+
228
+ // In the span world, the first child will be a span of type tool
229
+ if (
230
+ event.events.length > 0 &&
231
+ (event.events[0].event !== SPAN_BEGIN ||
232
+ event.events[0].type !== TYPE_SUBTASK)
233
+ ) {
234
+ // Expand the children
235
+ pushStack(node);
236
+ for (const child of event.events) {
237
+ treeifyFn(child, addNode, pushStack, popStack);
238
+ }
239
+ popStack();
139
240
  }
140
- popStack();
141
241
  }
142
- }
143
242
 
144
- break;
145
- default:
146
- // An event
147
- addNode(event);
148
- break;
149
- }
150
- };
243
+ break;
151
244
 
152
- type TreeNodeTransformer = {
153
- name: string;
154
- matches: (node: EventNode) => boolean;
155
- process: (node: EventNode) => EventNode;
245
+ default:
246
+ // An event
247
+ addNode(event);
248
+ break;
249
+ }
250
+ };
251
+ return treeifyFn;
156
252
  };
157
253
 
158
- const treeNodeTransformers: TreeNodeTransformer[] = [
159
- {
160
- name: "unwrap_tools",
161
- matches: (node) =>
162
- node.event.event === SPAN_BEGIN && node.event.type === TYPE_TOOL,
163
- process: (node) => elevateChildNode(node, TYPE_TOOL) || node,
164
- },
165
- {
166
- name: "unwrap_subtasks",
167
- matches: (node) =>
168
- node.event.event === SPAN_BEGIN && node.event.type === TYPE_SUBTASK,
169
- process: (node) => elevateChildNode(node, TYPE_SUBTASK) || node,
170
- },
171
- {
172
- name: "unwrap_agent_solver",
173
- matches: (node) =>
174
- node.event.event === SPAN_BEGIN &&
175
- node.event["type"] === TYPE_SOLVER &&
176
- node.children.length === 2 &&
177
- node.children[0].event.event === SPAN_BEGIN &&
178
- node.children[0].event.type === TYPE_AGENT &&
179
- node.children[1].event.event === STATE,
180
-
181
- process: (node) => skipFirstChildNode(node),
182
- },
183
- {
184
- name: "unwrap_agent_solver w/store",
185
- matches: (node) =>
186
- node.event.event === SPAN_BEGIN &&
187
- node.event["type"] === TYPE_SOLVER &&
188
- node.children.length === 3 &&
189
- node.children[0].event.event === SPAN_BEGIN &&
190
- node.children[0].event.type === TYPE_AGENT &&
191
- node.children[1].event.event === STATE &&
192
- node.children[2].event.event === STORE,
193
- process: (node) => skipFirstChildNode(node),
194
- },
195
- {
196
- name: "unwrap_handoff",
197
- matches: (node) =>
198
- node.event.event === SPAN_BEGIN &&
199
- node.event["type"] === TYPE_HANDOFF &&
200
- node.children.length === 2 &&
201
- node.children[0].event.event === TOOL &&
202
- node.children[1].event.event === STORE &&
203
- node.children[0].children.length === 2 &&
204
- node.children[0].children[0].event.event === SPAN_BEGIN &&
205
- node.children[0].children[0].event.type === TYPE_AGENT,
206
- process: (node) => skipThisNode(node),
207
- },
208
- ];
209
-
210
254
  const transformTree = (roots: EventNode[]): EventNode[] => {
211
- const visitNode = (node: EventNode): EventNode => {
212
- let processedNode = node;
255
+ // Gather the transformers that we'll use
256
+ const treeNodeTransformers: TreeNodeTransformer[] = transformers();
257
+
258
+ const visitNode = (node: EventNode): EventNode | EventNode[] => {
259
+ // Start with the original node
260
+ let currentNodes: EventNode[] = [node];
213
261
 
214
- // Visit children (depth first)
215
- processedNode.children = processedNode.children.map(visitNode);
262
+ // Process children of all nodes first (depth-first)
263
+ currentNodes = currentNodes.map((n) => {
264
+ n.children = n.children.flatMap(visitNode);
265
+ return n;
266
+ });
216
267
 
217
- // Apply any visitors to this node
268
+ // Apply each transformer to all nodes that match
218
269
  for (const transformer of treeNodeTransformers) {
219
- if (transformer.matches(processedNode)) {
220
- processedNode = transformer.process(processedNode);
221
- // Only apply the first matching transformer
222
- break;
270
+ const nextNodes: EventNode[] = [];
271
+
272
+ // Process each current node with this transformer
273
+ for (const currentNode of currentNodes) {
274
+ if (transformer.matches(currentNode)) {
275
+ const result = transformer.process(currentNode);
276
+ if (Array.isArray(result)) {
277
+ nextNodes.push(...result);
278
+ } else {
279
+ nextNodes.push(result);
280
+ }
281
+ } else {
282
+ // Node doesn't match this transformer, keep it unchanged
283
+ nextNodes.push(currentNode);
284
+ }
223
285
  }
286
+
287
+ // Update current nodes for next transformer
288
+ currentNodes = nextNodes;
224
289
  }
225
- return processedNode;
290
+
291
+ // Return all processed nodes
292
+ return currentNodes.length === 1 ? currentNodes[0] : currentNodes;
226
293
  };
227
294
 
228
- return roots.map(visitNode);
295
+ // Process all nodes first
296
+ const processedRoots = roots.flatMap(visitNode);
297
+
298
+ // Call flush on any transformers that have it
299
+ const flushedNodes: EventNode[] = [];
300
+ for (const transformer of treeNodeTransformers) {
301
+ if (transformer.flush) {
302
+ const flushResults = transformer.flush();
303
+ if (flushResults && flushResults.length > 0) {
304
+ flushedNodes.push(...flushResults);
305
+ }
306
+ }
307
+ }
308
+
309
+ return [...processedRoots, ...flushedNodes];
310
+ };
311
+
312
+ const transformers = () => {
313
+ const treeNodeTransformers: TreeNodeTransformer[] = [
314
+ {
315
+ name: "unwrap_tools",
316
+ matches: (node) =>
317
+ node.event.event === SPAN_BEGIN && node.event.type === TYPE_TOOL,
318
+ process: (node) => elevateChildNode(node, TYPE_TOOL) || node,
319
+ },
320
+ {
321
+ name: "unwrap_subtasks",
322
+ matches: (node) =>
323
+ node.event.event === SPAN_BEGIN && node.event.type === TYPE_SUBTASK,
324
+ process: (node) => elevateChildNode(node, TYPE_SUBTASK) || node,
325
+ },
326
+ {
327
+ name: "unwrap_agent_solver",
328
+ matches: (node) =>
329
+ node.event.event === SPAN_BEGIN &&
330
+ node.event["type"] === TYPE_SOLVER &&
331
+ node.children.length === 2 &&
332
+ node.children[0].event.event === SPAN_BEGIN &&
333
+ node.children[0].event.type === TYPE_AGENT &&
334
+ node.children[1].event.event === STATE,
335
+
336
+ process: (node) => skipFirstChildNode(node),
337
+ },
338
+ {
339
+ name: "unwrap_agent_solver w/store",
340
+ matches: (node) =>
341
+ node.event.event === SPAN_BEGIN &&
342
+ node.event["type"] === TYPE_SOLVER &&
343
+ node.children.length === 3 &&
344
+ node.children[0].event.event === SPAN_BEGIN &&
345
+ node.children[0].event.type === TYPE_AGENT &&
346
+ node.children[1].event.event === STATE &&
347
+ node.children[2].event.event === STORE,
348
+ process: (node) => skipFirstChildNode(node),
349
+ },
350
+ {
351
+ name: "unwrap_handoff",
352
+ matches: (node) =>
353
+ node.event.event === SPAN_BEGIN &&
354
+ node.event["type"] === TYPE_HANDOFF &&
355
+ node.children.length === 2 &&
356
+ node.children[0].event.event === TOOL &&
357
+ node.children[1].event.event === STORE &&
358
+ node.children[0].children.length === 2 &&
359
+ node.children[0].children[0].event.event === SPAN_BEGIN &&
360
+ node.children[0].children[0].event.type === TYPE_AGENT,
361
+ process: (node) => skipThisNode(node),
362
+ },
363
+ {
364
+ name: "discard_solvers_span",
365
+ matches: (Node) =>
366
+ Node.event.event === SPAN_BEGIN && Node.event.type === TYPE_SOLVERS,
367
+ process: (node) => {
368
+ const nodes = discardNode(node);
369
+ return nodes;
370
+ },
371
+ },
372
+ ];
373
+ return treeNodeTransformers;
374
+ };
375
+
376
+ type TreeNodeTransformer = {
377
+ name: string;
378
+ matches: (node: EventNode) => boolean;
379
+ process: (node: EventNode) => EventNode | EventNode[];
380
+ flush?: () => EventNode[];
229
381
  };
230
382
 
231
383
  /**
@@ -275,6 +427,11 @@ const skipThisNode = (node: EventNode): EventNode => {
275
427
  return newNode;
276
428
  };
277
429
 
430
+ const discardNode = (node: EventNode): EventNode[] => {
431
+ const nodes = reduceDepth(node.children, 1);
432
+ return nodes;
433
+ };
434
+
278
435
  // Reduce the depth of the children by 1
279
436
  // This is used when we hoist a child node to the parent
280
437
  const reduceDepth = (nodes: EventNode[], depth: number = 1): EventNode[] => {
@@ -297,23 +454,71 @@ const setDepth = (nodes: EventNode[], depth: number): EventNode[] => {
297
454
  });
298
455
  };
299
456
 
457
+ export interface TreeNodeVisitor {
458
+ visit: (node: EventNode, parent?: EventNode) => EventNode[];
459
+ flush?: () => EventNode[];
460
+ }
461
+
300
462
  /**
301
463
  * Flatten the tree structure into a flat array of EventNode objects
302
464
  * Each node in the result will have its children set properly
303
- * @param events - The events to flatten
304
- * @param depth - The current depth in the tree
465
+ * @param eventNodes - The event nodes to flatten
466
+ * @param collapsed - Record indicating which nodes are collapsed
467
+ * @param visitors - Array of visitors to apply to each node
468
+ * @param parentNode - The parent node of the current nodes being processed
305
469
  * @returns An array of EventNode objects
306
470
  */
307
471
  export const flatTree = (
308
472
  eventNodes: EventNode[],
309
- collapsed: Record<string, true> | null,
473
+ collapsed: Record<string, boolean> | null,
474
+ visitors?: TreeNodeVisitor[],
475
+ parentNode?: EventNode,
310
476
  ): EventNode[] => {
311
477
  const result: EventNode[] = [];
312
478
  for (const node of eventNodes) {
313
- result.push(node);
314
- if (collapsed === null || collapsed[node.id] !== true) {
315
- result.push(...flatTree(node.children, collapsed));
479
+ if (visitors && visitors.length > 0) {
480
+ let pendingNodes: EventNode[] = [{ ...node }];
481
+
482
+ for (const visitor of visitors) {
483
+ const allResults: EventNode[] = [];
484
+ for (const pendingNode of pendingNodes) {
485
+ const visitorResult = visitor.visit(pendingNode);
486
+ if (parentNode) {
487
+ parentNode.children = visitorResult;
488
+ }
489
+ allResults.push(...visitorResult);
490
+ }
491
+ pendingNodes = allResults;
492
+ }
493
+
494
+ for (const pendingNode of pendingNodes) {
495
+ const children = flatTree(
496
+ pendingNode.children,
497
+ collapsed,
498
+ visitors,
499
+ pendingNode,
500
+ );
501
+ pendingNode.children = children;
502
+ result.push(pendingNode);
503
+ if (collapsed === null || collapsed[pendingNode.id] !== true) {
504
+ result.push(...children);
505
+ }
506
+ }
507
+
508
+ for (const visitor of visitors) {
509
+ if (visitor.flush) {
510
+ const finalNodes = visitor.flush();
511
+ result.push(...finalNodes);
512
+ }
513
+ }
514
+ } else {
515
+ result.push(node);
516
+ const children = flatTree(node.children, collapsed, visitors, node);
517
+ if (collapsed === null || collapsed[node.id] !== true) {
518
+ result.push(...children);
519
+ }
316
520
  }
317
521
  }
522
+
318
523
  return result;
319
524
  };
@@ -13,8 +13,11 @@ export const STATE = "state";
13
13
  export const TYPE_TOOL = "tool";
14
14
  export const TYPE_SUBTASK = "subtask";
15
15
  export const TYPE_SOLVER = "solver";
16
+ export const TYPE_SOLVERS = "solvers";
16
17
  export const TYPE_AGENT = "agent";
17
18
  export const TYPE_HANDOFF = "handoff";
19
+ export const TYPE_SCORERS = "scorers";
20
+ export const TYPE_SCORER = "scorer";
18
21
 
19
22
  export const hasSpans = (events: Events): boolean => {
20
23
  return events.some((event) => event.event === SPAN_BEGIN);
@@ -26,6 +26,8 @@ export interface StateManager {
26
26
  applyChanges(changes: Changes): object;
27
27
  }
28
28
 
29
+ export const kTranscriptCollapseScope = "transcript-collapse";
30
+
29
31
  export type EventType =
30
32
  | SampleInitEvent
31
33
  | SampleLimitEvent