inspect-ai 0.3.98__py3-none-any.whl → 0.3.100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -0
- inspect_ai/_cli/log.py +1 -1
- inspect_ai/_display/core/config.py +11 -5
- inspect_ai/_display/core/panel.py +66 -2
- inspect_ai/_display/core/textual.py +5 -2
- inspect_ai/_display/plain/display.py +1 -0
- inspect_ai/_display/rich/display.py +2 -2
- inspect_ai/_display/textual/widgets/transcript.py +41 -1
- inspect_ai/_eval/run.py +12 -4
- inspect_ai/_eval/score.py +2 -4
- inspect_ai/_eval/task/log.py +1 -1
- inspect_ai/_eval/task/run.py +59 -81
- inspect_ai/_eval/task/task.py +1 -1
- inspect_ai/_util/_async.py +1 -1
- inspect_ai/_util/content.py +11 -6
- inspect_ai/_util/interrupt.py +2 -2
- inspect_ai/_util/text.py +7 -0
- inspect_ai/_util/working.py +8 -37
- inspect_ai/_view/__init__.py +0 -0
- inspect_ai/_view/schema.py +3 -1
- inspect_ai/_view/view.py +14 -0
- inspect_ai/_view/www/CLAUDE.md +15 -0
- inspect_ai/_view/www/dist/assets/index.css +273 -169
- inspect_ai/_view/www/dist/assets/index.js +20079 -17019
- inspect_ai/_view/www/log-schema.json +122 -8
- inspect_ai/_view/www/package.json +5 -1
- inspect_ai/_view/www/src/@types/log.d.ts +20 -2
- inspect_ai/_view/www/src/app/App.tsx +1 -15
- inspect_ai/_view/www/src/app/appearance/icons.ts +4 -1
- inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +24 -6
- inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +0 -5
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +221 -205
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +2 -1
- inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +5 -0
- inspect_ai/_view/www/src/app/routing/url.ts +84 -4
- inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +0 -5
- inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +7 -0
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +26 -19
- inspect_ai/_view/www/src/app/samples/SampleSummaryView.module.css +1 -2
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +8 -6
- inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +0 -4
- inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +3 -2
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +2 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +2 -0
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +1 -0
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -0
- inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.module.css +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +2 -3
- inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +1 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +3 -2
- inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +4 -5
- inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +1 -2
- inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +1 -3
- inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +1 -2
- inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +3 -4
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.module.css +42 -0
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.tsx +77 -0
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +27 -71
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +13 -3
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +27 -2
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +1 -0
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +21 -22
- inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.module.css +45 -0
- inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +223 -0
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.module.css +10 -0
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +258 -0
- inspect_ai/_view/www/src/app/samples/transcript/outline/tree-visitors.ts +187 -0
- inspect_ai/_view/www/src/app/samples/transcript/state/StateEventRenderers.tsx +8 -1
- inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +3 -4
- inspect_ai/_view/www/src/app/samples/transcript/transform/hooks.ts +78 -0
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +340 -135
- inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +3 -0
- inspect_ai/_view/www/src/app/samples/transcript/types.ts +2 -0
- inspect_ai/_view/www/src/app/types.ts +5 -1
- inspect_ai/_view/www/src/client/api/api-browser.ts +2 -2
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +6 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +1 -1
- inspect_ai/_view/www/src/components/PopOver.tsx +422 -0
- inspect_ai/_view/www/src/components/PulsingDots.module.css +9 -9
- inspect_ai/_view/www/src/components/PulsingDots.tsx +4 -1
- inspect_ai/_view/www/src/components/StickyScroll.tsx +183 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -0
- inspect_ai/_view/www/src/state/hooks.ts +52 -2
- inspect_ai/_view/www/src/state/logSlice.ts +4 -3
- inspect_ai/_view/www/src/state/samplePolling.ts +8 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +53 -9
- inspect_ai/_view/www/src/state/scrolling.ts +152 -0
- inspect_ai/_view/www/src/utils/attachments.ts +7 -0
- inspect_ai/_view/www/src/utils/python.ts +18 -0
- inspect_ai/_view/www/yarn.lock +269 -6
- inspect_ai/agent/_react.py +12 -7
- inspect_ai/agent/_run.py +46 -11
- inspect_ai/analysis/beta/_dataframe/samples/table.py +19 -18
- inspect_ai/log/_bundle.py +5 -3
- inspect_ai/log/_log.py +3 -3
- inspect_ai/log/_recorders/file.py +2 -9
- inspect_ai/log/_transcript.py +1 -1
- inspect_ai/model/_call_tools.py +6 -2
- inspect_ai/model/_openai.py +1 -1
- inspect_ai/model/_openai_responses.py +78 -39
- inspect_ai/model/_openai_web_search.py +31 -0
- inspect_ai/model/_providers/anthropic.py +3 -6
- inspect_ai/model/_providers/azureai.py +72 -3
- inspect_ai/model/_providers/openai.py +2 -1
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/scorer/_metric.py +1 -2
- inspect_ai/solver/_task_state.py +2 -2
- inspect_ai/tool/_tool.py +6 -2
- inspect_ai/tool/_tool_def.py +27 -4
- inspect_ai/tool/_tool_info.py +2 -0
- inspect_ai/tool/_tools/_web_search/_google.py +15 -4
- inspect_ai/tool/_tools/_web_search/_tavily.py +35 -12
- inspect_ai/tool/_tools/_web_search/_web_search.py +214 -45
- inspect_ai/util/__init__.py +6 -0
- inspect_ai/util/_json.py +3 -0
- inspect_ai/util/_limit.py +374 -141
- inspect_ai/util/_sandbox/docker/compose.py +20 -11
- inspect_ai/util/_span.py +1 -1
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/METADATA +3 -3
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/RECORD +131 -117
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/WHEEL +1 -1
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
import { Events } from "../../../../@types/log";
|
1
|
+
import { Events, SpanBeginEvent, SpanEndEvent } from "../../../../@types/log";
|
2
2
|
import { EventNode, EventType } from "../types";
|
3
3
|
import {
|
4
4
|
ACTION_BEGIN,
|
@@ -11,7 +11,10 @@ import {
|
|
11
11
|
TOOL,
|
12
12
|
TYPE_AGENT,
|
13
13
|
TYPE_HANDOFF,
|
14
|
+
TYPE_SCORER,
|
15
|
+
TYPE_SCORERS,
|
14
16
|
TYPE_SOLVER,
|
17
|
+
TYPE_SOLVERS,
|
15
18
|
TYPE_SUBTASK,
|
16
19
|
TYPE_TOOL,
|
17
20
|
hasSpans,
|
@@ -31,6 +34,9 @@ export function treeifyEvents(events: Events, depth: number): EventNode[] {
|
|
31
34
|
const rootNodes: EventNode[] = [];
|
32
35
|
const stack: EventNode[] = [];
|
33
36
|
|
37
|
+
// The function used to build the tree
|
38
|
+
const treeifyFn = getTreeifyFunction();
|
39
|
+
|
34
40
|
const addNode = (event: EventType): EventNode => {
|
35
41
|
const currentDepth = stack.length;
|
36
42
|
|
@@ -45,7 +51,11 @@ export function treeifyEvents(events: Events, depth: number): EventNode[] {
|
|
45
51
|
|
46
52
|
// Create a new node
|
47
53
|
const idPath = pathIndices.slice(0, currentDepth + 1).join(".");
|
48
|
-
const node = new EventNode(
|
54
|
+
const node = new EventNode(
|
55
|
+
`event_node_${idPath}`,
|
56
|
+
event,
|
57
|
+
currentDepth + depth,
|
58
|
+
);
|
49
59
|
if (stack.length > 0) {
|
50
60
|
const parentNode = stack[stack.length - 1];
|
51
61
|
parentNode.children.push(node);
|
@@ -65,6 +75,10 @@ export function treeifyEvents(events: Events, depth: number): EventNode[] {
|
|
65
75
|
pathIndices.pop();
|
66
76
|
};
|
67
77
|
|
78
|
+
// First inject spans that may be needed
|
79
|
+
events = injectScorersSpan(events);
|
80
|
+
|
81
|
+
// Now treeify the list
|
68
82
|
events.forEach((event) => {
|
69
83
|
treeifyFn(event, addNode, pushStack, popStack);
|
70
84
|
});
|
@@ -76,156 +90,294 @@ export function treeifyEvents(events: Events, depth: number): EventNode[] {
|
|
76
90
|
}
|
77
91
|
}
|
78
92
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
):
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
93
|
+
// This injects a scorer span around top level scorer events if one
|
94
|
+
// isn't already present
|
95
|
+
const kBeginScorerId = "E617087FA405";
|
96
|
+
const kEndScorerId = "C39922B09481";
|
97
|
+
const kScorersSpanId = "C5A831026F2C";
|
98
|
+
const injectScorersSpan = (events: Events): Events => {
|
99
|
+
const results: Events = [];
|
100
|
+
const collectedScorerEvents: Events = [];
|
101
|
+
let hasCollectedScorers = false;
|
102
|
+
let collecting: string | null = null;
|
103
|
+
|
104
|
+
const flushCollected = (): Events => {
|
105
|
+
if (collectedScorerEvents.length > 0) {
|
106
|
+
const beginSpan: SpanBeginEvent = {
|
107
|
+
name: "scorers",
|
108
|
+
id: kBeginScorerId,
|
109
|
+
span_id: kScorersSpanId,
|
110
|
+
event: SPAN_BEGIN,
|
111
|
+
type: TYPE_SCORERS,
|
112
|
+
timestamp: collectedScorerEvents[0].timestamp,
|
113
|
+
working_start: collectedScorerEvents[0].working_start,
|
114
|
+
pending: false,
|
115
|
+
parent_id: null,
|
116
|
+
};
|
117
|
+
|
118
|
+
const scoreEvents: Events = collectedScorerEvents.map((event) => {
|
119
|
+
return {
|
120
|
+
...event,
|
121
|
+
parent_id:
|
122
|
+
event.event === "span_begin"
|
123
|
+
? event.parent_id || kScorersSpanId
|
124
|
+
: null,
|
125
|
+
};
|
126
|
+
});
|
127
|
+
|
128
|
+
const endSpan: SpanEndEvent = {
|
129
|
+
id: kEndScorerId,
|
130
|
+
span_id: kScorersSpanId,
|
131
|
+
event: SPAN_END,
|
132
|
+
pending: false,
|
133
|
+
working_start:
|
134
|
+
collectedScorerEvents[collectedScorerEvents.length - 1].working_start,
|
135
|
+
timestamp:
|
136
|
+
collectedScorerEvents[collectedScorerEvents.length - 1].timestamp,
|
137
|
+
};
|
138
|
+
|
139
|
+
collectedScorerEvents.length = 0;
|
140
|
+
hasCollectedScorers = true;
|
141
|
+
return [beginSpan, ...scoreEvents, endSpan];
|
142
|
+
}
|
143
|
+
return [];
|
144
|
+
};
|
145
|
+
|
146
|
+
for (const event of events) {
|
147
|
+
// Return events immediately if the scorers span is present
|
148
|
+
if (event.event === SPAN_BEGIN && event.type === TYPE_SCORERS) {
|
149
|
+
return events;
|
150
|
+
}
|
151
|
+
|
152
|
+
if (
|
153
|
+
event.event === SPAN_BEGIN &&
|
154
|
+
event.type === TYPE_SCORER &&
|
155
|
+
!hasCollectedScorers
|
156
|
+
) {
|
157
|
+
collecting = event.span_id;
|
158
|
+
}
|
159
|
+
|
160
|
+
// Look for the first scorer event and then begin
|
161
|
+
if (collecting) {
|
162
|
+
if (event.event === SPAN_END && event.span_id === collecting) {
|
163
|
+
collecting = null;
|
164
|
+
results.push(...flushCollected());
|
165
|
+
results.push(event);
|
91
166
|
} else {
|
92
|
-
|
93
|
-
popStack();
|
167
|
+
collectedScorerEvents.push(event);
|
94
168
|
}
|
95
|
-
|
96
|
-
|
97
|
-
const node = addNode(event);
|
98
|
-
pushStack(node);
|
99
|
-
break;
|
100
|
-
}
|
101
|
-
case SPAN_END: {
|
102
|
-
popStack();
|
103
|
-
break;
|
169
|
+
} else {
|
170
|
+
results.push(event);
|
104
171
|
}
|
105
|
-
|
106
|
-
{
|
107
|
-
const node = addNode(event);
|
172
|
+
}
|
108
173
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
174
|
+
return results;
|
175
|
+
};
|
176
|
+
|
177
|
+
const getTreeifyFunction = () => {
|
178
|
+
const treeifyFn: TreeifyFunction = (
|
179
|
+
event: EventType,
|
180
|
+
addNode: (event: EventType) => EventNode,
|
181
|
+
pushStack: (node: EventNode) => void,
|
182
|
+
popStack: () => void,
|
183
|
+
): void => {
|
184
|
+
switch (event.event) {
|
185
|
+
case STEP:
|
186
|
+
if (event.action === ACTION_BEGIN) {
|
187
|
+
// Starting a new step
|
188
|
+
const node = addNode(event);
|
116
189
|
pushStack(node);
|
117
|
-
|
118
|
-
|
119
|
-
}
|
190
|
+
} else {
|
191
|
+
// An ending step
|
120
192
|
popStack();
|
121
193
|
}
|
122
|
-
|
123
|
-
|
124
|
-
break;
|
125
|
-
case SUBTASK:
|
126
|
-
{
|
194
|
+
break;
|
195
|
+
case SPAN_BEGIN: {
|
127
196
|
const node = addNode(event);
|
197
|
+
pushStack(node);
|
198
|
+
break;
|
199
|
+
}
|
200
|
+
case SPAN_END: {
|
201
|
+
popStack();
|
202
|
+
break;
|
203
|
+
}
|
204
|
+
case TOOL:
|
205
|
+
{
|
206
|
+
const node = addNode(event);
|
207
|
+
|
208
|
+
// In the span world, the first child will be a span of type tool
|
209
|
+
if (
|
210
|
+
event.events.length > 0 &&
|
211
|
+
(event.events[0].event !== SPAN_BEGIN ||
|
212
|
+
event.events[0].type !== TYPE_TOOL)
|
213
|
+
) {
|
214
|
+
// Expand the children
|
215
|
+
pushStack(node);
|
216
|
+
for (const child of event.events) {
|
217
|
+
treeifyFn(child, addNode, pushStack, popStack);
|
218
|
+
}
|
219
|
+
popStack();
|
220
|
+
}
|
221
|
+
}
|
128
222
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
223
|
+
break;
|
224
|
+
case SUBTASK:
|
225
|
+
{
|
226
|
+
const node = addNode(event);
|
227
|
+
|
228
|
+
// In the span world, the first child will be a span of type tool
|
229
|
+
if (
|
230
|
+
event.events.length > 0 &&
|
231
|
+
(event.events[0].event !== SPAN_BEGIN ||
|
232
|
+
event.events[0].type !== TYPE_SUBTASK)
|
233
|
+
) {
|
234
|
+
// Expand the children
|
235
|
+
pushStack(node);
|
236
|
+
for (const child of event.events) {
|
237
|
+
treeifyFn(child, addNode, pushStack, popStack);
|
238
|
+
}
|
239
|
+
popStack();
|
139
240
|
}
|
140
|
-
popStack();
|
141
241
|
}
|
142
|
-
}
|
143
242
|
|
144
|
-
|
145
|
-
default:
|
146
|
-
// An event
|
147
|
-
addNode(event);
|
148
|
-
break;
|
149
|
-
}
|
150
|
-
};
|
243
|
+
break;
|
151
244
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
245
|
+
default:
|
246
|
+
// An event
|
247
|
+
addNode(event);
|
248
|
+
break;
|
249
|
+
}
|
250
|
+
};
|
251
|
+
return treeifyFn;
|
156
252
|
};
|
157
253
|
|
158
|
-
const treeNodeTransformers: TreeNodeTransformer[] = [
|
159
|
-
{
|
160
|
-
name: "unwrap_tools",
|
161
|
-
matches: (node) =>
|
162
|
-
node.event.event === SPAN_BEGIN && node.event.type === TYPE_TOOL,
|
163
|
-
process: (node) => elevateChildNode(node, TYPE_TOOL) || node,
|
164
|
-
},
|
165
|
-
{
|
166
|
-
name: "unwrap_subtasks",
|
167
|
-
matches: (node) =>
|
168
|
-
node.event.event === SPAN_BEGIN && node.event.type === TYPE_SUBTASK,
|
169
|
-
process: (node) => elevateChildNode(node, TYPE_SUBTASK) || node,
|
170
|
-
},
|
171
|
-
{
|
172
|
-
name: "unwrap_agent_solver",
|
173
|
-
matches: (node) =>
|
174
|
-
node.event.event === SPAN_BEGIN &&
|
175
|
-
node.event["type"] === TYPE_SOLVER &&
|
176
|
-
node.children.length === 2 &&
|
177
|
-
node.children[0].event.event === SPAN_BEGIN &&
|
178
|
-
node.children[0].event.type === TYPE_AGENT &&
|
179
|
-
node.children[1].event.event === STATE,
|
180
|
-
|
181
|
-
process: (node) => skipFirstChildNode(node),
|
182
|
-
},
|
183
|
-
{
|
184
|
-
name: "unwrap_agent_solver w/store",
|
185
|
-
matches: (node) =>
|
186
|
-
node.event.event === SPAN_BEGIN &&
|
187
|
-
node.event["type"] === TYPE_SOLVER &&
|
188
|
-
node.children.length === 3 &&
|
189
|
-
node.children[0].event.event === SPAN_BEGIN &&
|
190
|
-
node.children[0].event.type === TYPE_AGENT &&
|
191
|
-
node.children[1].event.event === STATE &&
|
192
|
-
node.children[2].event.event === STORE,
|
193
|
-
process: (node) => skipFirstChildNode(node),
|
194
|
-
},
|
195
|
-
{
|
196
|
-
name: "unwrap_handoff",
|
197
|
-
matches: (node) =>
|
198
|
-
node.event.event === SPAN_BEGIN &&
|
199
|
-
node.event["type"] === TYPE_HANDOFF &&
|
200
|
-
node.children.length === 2 &&
|
201
|
-
node.children[0].event.event === TOOL &&
|
202
|
-
node.children[1].event.event === STORE &&
|
203
|
-
node.children[0].children.length === 2 &&
|
204
|
-
node.children[0].children[0].event.event === SPAN_BEGIN &&
|
205
|
-
node.children[0].children[0].event.type === TYPE_AGENT,
|
206
|
-
process: (node) => skipThisNode(node),
|
207
|
-
},
|
208
|
-
];
|
209
|
-
|
210
254
|
const transformTree = (roots: EventNode[]): EventNode[] => {
|
211
|
-
|
212
|
-
|
255
|
+
// Gather the transformers that we'll use
|
256
|
+
const treeNodeTransformers: TreeNodeTransformer[] = transformers();
|
257
|
+
|
258
|
+
const visitNode = (node: EventNode): EventNode | EventNode[] => {
|
259
|
+
// Start with the original node
|
260
|
+
let currentNodes: EventNode[] = [node];
|
213
261
|
|
214
|
-
//
|
215
|
-
|
262
|
+
// Process children of all nodes first (depth-first)
|
263
|
+
currentNodes = currentNodes.map((n) => {
|
264
|
+
n.children = n.children.flatMap(visitNode);
|
265
|
+
return n;
|
266
|
+
});
|
216
267
|
|
217
|
-
// Apply
|
268
|
+
// Apply each transformer to all nodes that match
|
218
269
|
for (const transformer of treeNodeTransformers) {
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
270
|
+
const nextNodes: EventNode[] = [];
|
271
|
+
|
272
|
+
// Process each current node with this transformer
|
273
|
+
for (const currentNode of currentNodes) {
|
274
|
+
if (transformer.matches(currentNode)) {
|
275
|
+
const result = transformer.process(currentNode);
|
276
|
+
if (Array.isArray(result)) {
|
277
|
+
nextNodes.push(...result);
|
278
|
+
} else {
|
279
|
+
nextNodes.push(result);
|
280
|
+
}
|
281
|
+
} else {
|
282
|
+
// Node doesn't match this transformer, keep it unchanged
|
283
|
+
nextNodes.push(currentNode);
|
284
|
+
}
|
223
285
|
}
|
286
|
+
|
287
|
+
// Update current nodes for next transformer
|
288
|
+
currentNodes = nextNodes;
|
224
289
|
}
|
225
|
-
|
290
|
+
|
291
|
+
// Return all processed nodes
|
292
|
+
return currentNodes.length === 1 ? currentNodes[0] : currentNodes;
|
226
293
|
};
|
227
294
|
|
228
|
-
|
295
|
+
// Process all nodes first
|
296
|
+
const processedRoots = roots.flatMap(visitNode);
|
297
|
+
|
298
|
+
// Call flush on any transformers that have it
|
299
|
+
const flushedNodes: EventNode[] = [];
|
300
|
+
for (const transformer of treeNodeTransformers) {
|
301
|
+
if (transformer.flush) {
|
302
|
+
const flushResults = transformer.flush();
|
303
|
+
if (flushResults && flushResults.length > 0) {
|
304
|
+
flushedNodes.push(...flushResults);
|
305
|
+
}
|
306
|
+
}
|
307
|
+
}
|
308
|
+
|
309
|
+
return [...processedRoots, ...flushedNodes];
|
310
|
+
};
|
311
|
+
|
312
|
+
const transformers = () => {
|
313
|
+
const treeNodeTransformers: TreeNodeTransformer[] = [
|
314
|
+
{
|
315
|
+
name: "unwrap_tools",
|
316
|
+
matches: (node) =>
|
317
|
+
node.event.event === SPAN_BEGIN && node.event.type === TYPE_TOOL,
|
318
|
+
process: (node) => elevateChildNode(node, TYPE_TOOL) || node,
|
319
|
+
},
|
320
|
+
{
|
321
|
+
name: "unwrap_subtasks",
|
322
|
+
matches: (node) =>
|
323
|
+
node.event.event === SPAN_BEGIN && node.event.type === TYPE_SUBTASK,
|
324
|
+
process: (node) => elevateChildNode(node, TYPE_SUBTASK) || node,
|
325
|
+
},
|
326
|
+
{
|
327
|
+
name: "unwrap_agent_solver",
|
328
|
+
matches: (node) =>
|
329
|
+
node.event.event === SPAN_BEGIN &&
|
330
|
+
node.event["type"] === TYPE_SOLVER &&
|
331
|
+
node.children.length === 2 &&
|
332
|
+
node.children[0].event.event === SPAN_BEGIN &&
|
333
|
+
node.children[0].event.type === TYPE_AGENT &&
|
334
|
+
node.children[1].event.event === STATE,
|
335
|
+
|
336
|
+
process: (node) => skipFirstChildNode(node),
|
337
|
+
},
|
338
|
+
{
|
339
|
+
name: "unwrap_agent_solver w/store",
|
340
|
+
matches: (node) =>
|
341
|
+
node.event.event === SPAN_BEGIN &&
|
342
|
+
node.event["type"] === TYPE_SOLVER &&
|
343
|
+
node.children.length === 3 &&
|
344
|
+
node.children[0].event.event === SPAN_BEGIN &&
|
345
|
+
node.children[0].event.type === TYPE_AGENT &&
|
346
|
+
node.children[1].event.event === STATE &&
|
347
|
+
node.children[2].event.event === STORE,
|
348
|
+
process: (node) => skipFirstChildNode(node),
|
349
|
+
},
|
350
|
+
{
|
351
|
+
name: "unwrap_handoff",
|
352
|
+
matches: (node) =>
|
353
|
+
node.event.event === SPAN_BEGIN &&
|
354
|
+
node.event["type"] === TYPE_HANDOFF &&
|
355
|
+
node.children.length === 2 &&
|
356
|
+
node.children[0].event.event === TOOL &&
|
357
|
+
node.children[1].event.event === STORE &&
|
358
|
+
node.children[0].children.length === 2 &&
|
359
|
+
node.children[0].children[0].event.event === SPAN_BEGIN &&
|
360
|
+
node.children[0].children[0].event.type === TYPE_AGENT,
|
361
|
+
process: (node) => skipThisNode(node),
|
362
|
+
},
|
363
|
+
{
|
364
|
+
name: "discard_solvers_span",
|
365
|
+
matches: (Node) =>
|
366
|
+
Node.event.event === SPAN_BEGIN && Node.event.type === TYPE_SOLVERS,
|
367
|
+
process: (node) => {
|
368
|
+
const nodes = discardNode(node);
|
369
|
+
return nodes;
|
370
|
+
},
|
371
|
+
},
|
372
|
+
];
|
373
|
+
return treeNodeTransformers;
|
374
|
+
};
|
375
|
+
|
376
|
+
type TreeNodeTransformer = {
|
377
|
+
name: string;
|
378
|
+
matches: (node: EventNode) => boolean;
|
379
|
+
process: (node: EventNode) => EventNode | EventNode[];
|
380
|
+
flush?: () => EventNode[];
|
229
381
|
};
|
230
382
|
|
231
383
|
/**
|
@@ -275,6 +427,11 @@ const skipThisNode = (node: EventNode): EventNode => {
|
|
275
427
|
return newNode;
|
276
428
|
};
|
277
429
|
|
430
|
+
const discardNode = (node: EventNode): EventNode[] => {
|
431
|
+
const nodes = reduceDepth(node.children, 1);
|
432
|
+
return nodes;
|
433
|
+
};
|
434
|
+
|
278
435
|
// Reduce the depth of the children by 1
|
279
436
|
// This is used when we hoist a child node to the parent
|
280
437
|
const reduceDepth = (nodes: EventNode[], depth: number = 1): EventNode[] => {
|
@@ -297,23 +454,71 @@ const setDepth = (nodes: EventNode[], depth: number): EventNode[] => {
|
|
297
454
|
});
|
298
455
|
};
|
299
456
|
|
457
|
+
export interface TreeNodeVisitor {
|
458
|
+
visit: (node: EventNode, parent?: EventNode) => EventNode[];
|
459
|
+
flush?: () => EventNode[];
|
460
|
+
}
|
461
|
+
|
300
462
|
/**
|
301
463
|
* Flatten the tree structure into a flat array of EventNode objects
|
302
464
|
* Each node in the result will have its children set properly
|
303
|
-
* @param
|
304
|
-
* @param
|
465
|
+
* @param eventNodes - The event nodes to flatten
|
466
|
+
* @param collapsed - Record indicating which nodes are collapsed
|
467
|
+
* @param visitors - Array of visitors to apply to each node
|
468
|
+
* @param parentNode - The parent node of the current nodes being processed
|
305
469
|
* @returns An array of EventNode objects
|
306
470
|
*/
|
307
471
|
export const flatTree = (
|
308
472
|
eventNodes: EventNode[],
|
309
|
-
collapsed: Record<string,
|
473
|
+
collapsed: Record<string, boolean> | null,
|
474
|
+
visitors?: TreeNodeVisitor[],
|
475
|
+
parentNode?: EventNode,
|
310
476
|
): EventNode[] => {
|
311
477
|
const result: EventNode[] = [];
|
312
478
|
for (const node of eventNodes) {
|
313
|
-
|
314
|
-
|
315
|
-
|
479
|
+
if (visitors && visitors.length > 0) {
|
480
|
+
let pendingNodes: EventNode[] = [{ ...node }];
|
481
|
+
|
482
|
+
for (const visitor of visitors) {
|
483
|
+
const allResults: EventNode[] = [];
|
484
|
+
for (const pendingNode of pendingNodes) {
|
485
|
+
const visitorResult = visitor.visit(pendingNode);
|
486
|
+
if (parentNode) {
|
487
|
+
parentNode.children = visitorResult;
|
488
|
+
}
|
489
|
+
allResults.push(...visitorResult);
|
490
|
+
}
|
491
|
+
pendingNodes = allResults;
|
492
|
+
}
|
493
|
+
|
494
|
+
for (const pendingNode of pendingNodes) {
|
495
|
+
const children = flatTree(
|
496
|
+
pendingNode.children,
|
497
|
+
collapsed,
|
498
|
+
visitors,
|
499
|
+
pendingNode,
|
500
|
+
);
|
501
|
+
pendingNode.children = children;
|
502
|
+
result.push(pendingNode);
|
503
|
+
if (collapsed === null || collapsed[pendingNode.id] !== true) {
|
504
|
+
result.push(...children);
|
505
|
+
}
|
506
|
+
}
|
507
|
+
|
508
|
+
for (const visitor of visitors) {
|
509
|
+
if (visitor.flush) {
|
510
|
+
const finalNodes = visitor.flush();
|
511
|
+
result.push(...finalNodes);
|
512
|
+
}
|
513
|
+
}
|
514
|
+
} else {
|
515
|
+
result.push(node);
|
516
|
+
const children = flatTree(node.children, collapsed, visitors, node);
|
517
|
+
if (collapsed === null || collapsed[node.id] !== true) {
|
518
|
+
result.push(...children);
|
519
|
+
}
|
316
520
|
}
|
317
521
|
}
|
522
|
+
|
318
523
|
return result;
|
319
524
|
};
|
@@ -13,8 +13,11 @@ export const STATE = "state";
|
|
13
13
|
export const TYPE_TOOL = "tool";
|
14
14
|
export const TYPE_SUBTASK = "subtask";
|
15
15
|
export const TYPE_SOLVER = "solver";
|
16
|
+
export const TYPE_SOLVERS = "solvers";
|
16
17
|
export const TYPE_AGENT = "agent";
|
17
18
|
export const TYPE_HANDOFF = "handoff";
|
19
|
+
export const TYPE_SCORERS = "scorers";
|
20
|
+
export const TYPE_SCORER = "scorer";
|
18
21
|
|
19
22
|
export const hasSpans = (events: Events): boolean => {
|
20
23
|
return events.some((event) => event.event === SPAN_BEGIN);
|
@@ -92,10 +92,14 @@ export interface SampleState {
|
|
92
92
|
sampleError: Error | undefined;
|
93
93
|
sampleNeedsReload: number;
|
94
94
|
|
95
|
+
visiblePopover?: string;
|
96
|
+
|
95
97
|
// Events and attachments
|
96
98
|
runningEvents: Event[];
|
97
|
-
collapsedEvents: Record<string,
|
99
|
+
collapsedEvents: Record<string, Record<string, boolean>> | null;
|
98
100
|
collapsedIdBuckets: Record<string, Record<string, boolean>>;
|
101
|
+
|
102
|
+
selectedOutlineId?: string;
|
99
103
|
}
|
100
104
|
|
101
105
|
export type Event =
|
@@ -113,11 +113,11 @@ async function eval_log_sample_data(
|
|
113
113
|
params.append("log", log_file);
|
114
114
|
params.append("id", String(id));
|
115
115
|
params.append("epoch", String(epoch));
|
116
|
-
if (last_event) {
|
116
|
+
if (last_event !== undefined) {
|
117
117
|
params.append("last-event-id", String(last_event));
|
118
118
|
}
|
119
119
|
|
120
|
-
if (last_attachment) {
|
120
|
+
if (last_attachment !== undefined) {
|
121
121
|
params.append("after-attachment-id", String(last_attachment));
|
122
122
|
}
|
123
123
|
|
@@ -36,6 +36,9 @@ interface LiveVirtualListProps<T> {
|
|
36
36
|
// The initial index to scroll to when loading
|
37
37
|
initialTopMostItemIndex?: number;
|
38
38
|
|
39
|
+
// The offset to use when scrolling items
|
40
|
+
offsetTop?: number;
|
41
|
+
|
39
42
|
components?: Components<T>;
|
40
43
|
}
|
41
44
|
|
@@ -51,6 +54,7 @@ export const LiveVirtualList = <T,>({
|
|
51
54
|
live,
|
52
55
|
showProgress,
|
53
56
|
initialTopMostItemIndex,
|
57
|
+
offsetTop,
|
54
58
|
components,
|
55
59
|
}: LiveVirtualListProps<T>) => {
|
56
60
|
// The list handle and list state management
|
@@ -170,7 +174,8 @@ export const LiveVirtualList = <T,>({
|
|
170
174
|
listHandle.current?.scrollToIndex({
|
171
175
|
index: initialTopMostItemIndex,
|
172
176
|
align: "start",
|
173
|
-
behavior: "
|
177
|
+
behavior: "smooth",
|
178
|
+
offset: offsetTop ? -offsetTop : undefined,
|
174
179
|
});
|
175
180
|
}, 50);
|
176
181
|
|
@@ -59,7 +59,7 @@ export const MarkdownDiv = forwardRef<HTMLDivElement, MarkdownDivProps>(
|
|
59
59
|
ref={ref}
|
60
60
|
dangerouslySetInnerHTML={markup}
|
61
61
|
style={style}
|
62
|
-
className={clsx(className, "markdown-content"
|
62
|
+
className={clsx(className, "markdown-content")}
|
63
63
|
/>
|
64
64
|
);
|
65
65
|
},
|