inspect-ai 0.3.93__py3-none-any.whl → 0.3.95__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_display/textual/widgets/samples.py +3 -3
- inspect_ai/_display/textual/widgets/transcript.py +3 -29
- inspect_ai/_eval/loader.py +1 -1
- inspect_ai/_eval/task/run.py +21 -12
- inspect_ai/_util/answer.py +26 -0
- inspect_ai/_util/constants.py +0 -1
- inspect_ai/_util/exception.py +4 -0
- inspect_ai/_util/hash.py +39 -0
- inspect_ai/_util/local_server.py +51 -21
- inspect_ai/_util/path.py +22 -0
- inspect_ai/_util/trace.py +1 -1
- inspect_ai/_util/working.py +4 -0
- inspect_ai/_view/www/dist/assets/index.css +23 -22
- inspect_ai/_view/www/dist/assets/index.js +517 -204
- inspect_ai/_view/www/log-schema.json +375 -0
- inspect_ai/_view/www/package.json +1 -1
- inspect_ai/_view/www/src/@types/log.d.ts +90 -12
- inspect_ai/_view/www/src/app/log-view/navbar/SecondaryBar.tsx +2 -2
- inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +1 -4
- inspect_ai/_view/www/src/app/samples/SamplesTools.tsx +3 -13
- inspect_ai/_view/www/src/app/samples/sample-tools/SelectScorer.tsx +45 -48
- inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +16 -15
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +47 -75
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +9 -9
- inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
- inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
- inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
- inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
- inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
- inspect_ai/_view/www/src/app/types.ts +12 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.module.css +1 -1
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +5 -5
- inspect_ai/_view/www/src/state/hooks.ts +19 -3
- inspect_ai/_view/www/src/state/logSlice.ts +23 -5
- inspect_ai/_view/www/yarn.lock +9 -9
- inspect_ai/agent/_as_solver.py +3 -1
- inspect_ai/agent/_as_tool.py +6 -4
- inspect_ai/agent/_bridge/patch.py +1 -3
- inspect_ai/agent/_handoff.py +5 -1
- inspect_ai/agent/_react.py +4 -3
- inspect_ai/agent/_run.py +6 -1
- inspect_ai/agent/_types.py +9 -0
- inspect_ai/analysis/__init__.py +0 -0
- inspect_ai/analysis/beta/__init__.py +57 -0
- inspect_ai/analysis/beta/_dataframe/__init__.py +0 -0
- inspect_ai/analysis/beta/_dataframe/columns.py +145 -0
- inspect_ai/analysis/beta/_dataframe/evals/__init__.py +0 -0
- inspect_ai/analysis/beta/_dataframe/evals/columns.py +132 -0
- inspect_ai/analysis/beta/_dataframe/evals/extract.py +23 -0
- inspect_ai/analysis/beta/_dataframe/evals/table.py +140 -0
- inspect_ai/analysis/beta/_dataframe/events/__init__.py +0 -0
- inspect_ai/analysis/beta/_dataframe/events/columns.py +37 -0
- inspect_ai/analysis/beta/_dataframe/events/table.py +14 -0
- inspect_ai/analysis/beta/_dataframe/extract.py +54 -0
- inspect_ai/analysis/beta/_dataframe/messages/__init__.py +0 -0
- inspect_ai/analysis/beta/_dataframe/messages/columns.py +60 -0
- inspect_ai/analysis/beta/_dataframe/messages/extract.py +21 -0
- inspect_ai/analysis/beta/_dataframe/messages/table.py +87 -0
- inspect_ai/analysis/beta/_dataframe/record.py +377 -0
- inspect_ai/analysis/beta/_dataframe/samples/__init__.py +0 -0
- inspect_ai/analysis/beta/_dataframe/samples/columns.py +73 -0
- inspect_ai/analysis/beta/_dataframe/samples/extract.py +82 -0
- inspect_ai/analysis/beta/_dataframe/samples/table.py +329 -0
- inspect_ai/analysis/beta/_dataframe/util.py +157 -0
- inspect_ai/analysis/beta/_dataframe/validate.py +171 -0
- inspect_ai/dataset/_dataset.py +6 -3
- inspect_ai/log/__init__.py +10 -0
- inspect_ai/log/_convert.py +4 -9
- inspect_ai/log/_file.py +1 -1
- inspect_ai/log/_log.py +21 -1
- inspect_ai/log/_samples.py +14 -17
- inspect_ai/log/_transcript.py +77 -35
- inspect_ai/log/_tree.py +118 -0
- inspect_ai/model/_call_tools.py +44 -35
- inspect_ai/model/_model.py +51 -44
- inspect_ai/model/_openai_responses.py +17 -18
- inspect_ai/model/_providers/anthropic.py +30 -5
- inspect_ai/model/_providers/hf.py +27 -1
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/sglang.py +8 -2
- inspect_ai/model/_providers/vllm.py +6 -2
- inspect_ai/scorer/_choice.py +1 -2
- inspect_ai/solver/_chain.py +1 -1
- inspect_ai/solver/_fork.py +1 -1
- inspect_ai/solver/_multiple_choice.py +9 -23
- inspect_ai/solver/_plan.py +2 -2
- inspect_ai/solver/_task_state.py +7 -3
- inspect_ai/solver/_transcript.py +6 -7
- inspect_ai/tool/_mcp/_context.py +3 -5
- inspect_ai/tool/_mcp/_mcp.py +6 -5
- inspect_ai/tool/_mcp/server.py +1 -1
- inspect_ai/tool/_tools/_execute.py +4 -1
- inspect_ai/tool/_tools/_think.py +1 -1
- inspect_ai/tool/_tools/_web_search/__init__.py +3 -0
- inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} +56 -103
- inspect_ai/tool/_tools/_web_search/_tavily.py +77 -0
- inspect_ai/tool/_tools/_web_search/_web_search.py +85 -0
- inspect_ai/util/__init__.py +4 -0
- inspect_ai/util/_anyio.py +11 -0
- inspect_ai/util/_collect.py +50 -0
- inspect_ai/util/_sandbox/events.py +3 -2
- inspect_ai/util/_span.py +58 -0
- inspect_ai/util/_subtask.py +27 -42
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/METADATA +8 -1
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/RECORD +114 -82
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/WHEEL +1 -1
- inspect_ai/_display/core/group.py +0 -79
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,12 @@
|
|
1
1
|
// This is a special name that signals a group of sandbox events.
|
2
2
|
|
3
|
-
import {
|
3
|
+
import {
|
4
|
+
Events,
|
5
|
+
SpanBeginEvent,
|
6
|
+
SpanEndEvent,
|
7
|
+
StepEvent,
|
8
|
+
} from "../../../../@types/log";
|
9
|
+
import { hasSpans } from "./utils";
|
4
10
|
|
5
11
|
// It will be caught elsewhere and rendered with a pretty name
|
6
12
|
export const kSandboxSignalName = "53787D8A-D3FC-426D-B383-9F880B70E4AA";
|
@@ -54,39 +60,55 @@ const processPendingEvents = (events: Events, filter: boolean): Events => {
|
|
54
60
|
};
|
55
61
|
|
56
62
|
const collapseSampleInit = (events: Events): Events => {
|
57
|
-
//
|
63
|
+
// Don't performance sample init logic if spans are present
|
64
|
+
const hasSpans = events.some((e) => {
|
65
|
+
return e.event === "span_begin" || e.event === "span_end";
|
66
|
+
});
|
67
|
+
if (hasSpans) {
|
68
|
+
return events;
|
69
|
+
}
|
70
|
+
|
71
|
+
// Don't synthesize a sample init step if one already exists
|
58
72
|
const hasInitStep =
|
59
73
|
events.findIndex((e) => {
|
60
74
|
return e.event === "step" && e.name === "init";
|
61
75
|
}) !== -1;
|
76
|
+
if (hasInitStep) {
|
77
|
+
return events;
|
78
|
+
}
|
62
79
|
|
80
|
+
// Find a sample init event
|
63
81
|
const initEventIndex = events.findIndex((e) => {
|
64
82
|
return e.event === "sample_init";
|
65
83
|
});
|
66
84
|
const initEvent = events[initEventIndex];
|
85
|
+
if (!initEvent) {
|
86
|
+
return events;
|
87
|
+
}
|
67
88
|
|
89
|
+
// Splice in sample init step if needed
|
68
90
|
const fixedUp = [...events];
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
}
|
91
|
+
fixedUp.splice(initEventIndex, 0, {
|
92
|
+
timestamp: initEvent.timestamp,
|
93
|
+
event: "step",
|
94
|
+
action: "begin",
|
95
|
+
type: null,
|
96
|
+
name: "sample_init",
|
97
|
+
pending: false,
|
98
|
+
working_start: 0,
|
99
|
+
span_id: initEvent.span_id,
|
100
|
+
});
|
101
|
+
|
102
|
+
fixedUp.splice(initEventIndex + 2, 0, {
|
103
|
+
timestamp: initEvent.timestamp,
|
104
|
+
event: "step",
|
105
|
+
action: "end",
|
106
|
+
type: null,
|
107
|
+
name: "sample_init",
|
108
|
+
pending: false,
|
109
|
+
working_start: 0,
|
110
|
+
span_id: initEvent.span_id,
|
111
|
+
});
|
90
112
|
return fixedUp;
|
91
113
|
};
|
92
114
|
|
@@ -94,12 +116,22 @@ const groupSandboxEvents = (events: Events): Events => {
|
|
94
116
|
const result: Events = [];
|
95
117
|
const pendingSandboxEvents: Events = [];
|
96
118
|
|
119
|
+
const useSpans = hasSpans(events);
|
120
|
+
|
97
121
|
const pushPendingSandboxEvents = () => {
|
98
122
|
const timestamp =
|
99
123
|
pendingSandboxEvents[pendingSandboxEvents.length - 1].timestamp;
|
100
|
-
|
124
|
+
if (useSpans) {
|
125
|
+
result.push(createSpanBegin(kSandboxSignalName, timestamp, null));
|
126
|
+
} else {
|
127
|
+
result.push(createStepEvent(kSandboxSignalName, timestamp, "begin"));
|
128
|
+
}
|
101
129
|
result.push(...pendingSandboxEvents);
|
102
|
-
|
130
|
+
if (useSpans) {
|
131
|
+
result.push(createSpanEnd(kSandboxSignalName, timestamp));
|
132
|
+
} else {
|
133
|
+
result.push(createStepEvent(kSandboxSignalName, timestamp, "end"));
|
134
|
+
}
|
103
135
|
pendingSandboxEvents.length = 0;
|
104
136
|
};
|
105
137
|
|
@@ -139,4 +171,34 @@ const createStepEvent = (
|
|
139
171
|
name,
|
140
172
|
pending: false,
|
141
173
|
working_start: 0,
|
174
|
+
span_id: null,
|
142
175
|
});
|
176
|
+
|
177
|
+
const createSpanBegin = (
|
178
|
+
name: string,
|
179
|
+
timestamp: string,
|
180
|
+
parent_id: string | null,
|
181
|
+
): SpanBeginEvent => {
|
182
|
+
return {
|
183
|
+
name,
|
184
|
+
id: `${name}-begin`,
|
185
|
+
span_id: name,
|
186
|
+
parent_id,
|
187
|
+
timestamp,
|
188
|
+
event: "span_begin",
|
189
|
+
type: null,
|
190
|
+
pending: false,
|
191
|
+
working_start: 0,
|
192
|
+
};
|
193
|
+
};
|
194
|
+
|
195
|
+
const createSpanEnd = (name: string, timestamp: string): SpanEndEvent => {
|
196
|
+
return {
|
197
|
+
id: `${name}-end`,
|
198
|
+
timestamp,
|
199
|
+
event: "span_end",
|
200
|
+
pending: false,
|
201
|
+
working_start: 0,
|
202
|
+
span_id: name,
|
203
|
+
};
|
204
|
+
};
|
@@ -1,14 +1,28 @@
|
|
1
1
|
import { Events } from "../../../../@types/log";
|
2
2
|
import { EventNode, EventType } from "../types";
|
3
|
+
import {
|
4
|
+
ACTION_BEGIN,
|
5
|
+
ET_SPAN_BEGIN,
|
6
|
+
ET_SPAN_END,
|
7
|
+
ET_STEP,
|
8
|
+
hasSpans,
|
9
|
+
} from "./utils";
|
10
|
+
|
11
|
+
type TreeifyFunction = (
|
12
|
+
event: EventType,
|
13
|
+
addNode: (event: EventType) => EventNode,
|
14
|
+
pushStack: (node: EventNode) => void,
|
15
|
+
popStack: () => void,
|
16
|
+
) => void;
|
3
17
|
|
4
|
-
/**
|
5
|
-
* Gathers events into a hierarchy of EventNodes.
|
6
|
-
*/
|
7
18
|
export function treeifyEvents(events: Events, depth: number): EventNode[] {
|
19
|
+
const useSpans = hasSpans(events);
|
20
|
+
const treeFn = useSpans ? treeifyFnSpan : treeifyFnStep;
|
21
|
+
|
8
22
|
const rootNodes: EventNode[] = [];
|
9
23
|
const stack: EventNode[] = [];
|
10
24
|
|
11
|
-
const
|
25
|
+
const addNode = (event: EventType): EventNode => {
|
12
26
|
const node = new EventNode(event, stack.length + depth);
|
13
27
|
if (stack.length > 0) {
|
14
28
|
const parentNode = stack[stack.length - 1];
|
@@ -19,21 +33,219 @@ export function treeifyEvents(events: Events, depth: number): EventNode[] {
|
|
19
33
|
return node;
|
20
34
|
};
|
21
35
|
|
36
|
+
const pushStack = (node: EventNode): void => {
|
37
|
+
stack.push(node);
|
38
|
+
};
|
39
|
+
|
40
|
+
const popStack = (): void => {
|
41
|
+
if (stack.length > 0) {
|
42
|
+
stack.pop();
|
43
|
+
}
|
44
|
+
};
|
45
|
+
|
22
46
|
events.forEach((event) => {
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
47
|
+
treeFn(event, addNode, pushStack, popStack);
|
48
|
+
});
|
49
|
+
|
50
|
+
if (useSpans) {
|
51
|
+
return transformTree(rootNodes);
|
52
|
+
} else {
|
53
|
+
return rootNodes;
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
const treeifyFnStep: TreeifyFunction = (
|
58
|
+
event: EventType,
|
59
|
+
addNode: (event: EventType) => EventNode,
|
60
|
+
pushStack: (node: EventNode) => void,
|
61
|
+
popStack: () => void,
|
62
|
+
): void => {
|
63
|
+
switch (event.event) {
|
64
|
+
case ET_STEP:
|
65
|
+
if (event.action === ACTION_BEGIN) {
|
66
|
+
// Starting a new step
|
67
|
+
const node = addNode(event);
|
68
|
+
pushStack(node);
|
69
|
+
} else {
|
70
|
+
// An ending step
|
71
|
+
popStack();
|
31
72
|
}
|
32
|
-
|
73
|
+
break;
|
74
|
+
case ET_SPAN_BEGIN: {
|
75
|
+
// These shoudn't be here, but throw away
|
76
|
+
break;
|
77
|
+
}
|
78
|
+
case ET_SPAN_END: {
|
79
|
+
// These shoudn't be here, but throw away
|
80
|
+
break;
|
81
|
+
}
|
82
|
+
default:
|
33
83
|
// An event
|
34
|
-
|
84
|
+
addNode(event);
|
85
|
+
break;
|
86
|
+
}
|
87
|
+
};
|
88
|
+
|
89
|
+
const treeifyFnSpan: TreeifyFunction = (
|
90
|
+
event: EventType,
|
91
|
+
addNode: (event: EventType) => EventNode,
|
92
|
+
pushStack: (node: EventNode) => void,
|
93
|
+
popStack: () => void,
|
94
|
+
): void => {
|
95
|
+
switch (event.event) {
|
96
|
+
case ET_STEP:
|
97
|
+
// strip steps
|
98
|
+
break;
|
99
|
+
case ET_SPAN_BEGIN: {
|
100
|
+
const node = addNode(event);
|
101
|
+
pushStack(node);
|
102
|
+
break;
|
35
103
|
}
|
36
|
-
|
104
|
+
case ET_SPAN_END: {
|
105
|
+
popStack();
|
106
|
+
break;
|
107
|
+
}
|
108
|
+
default:
|
109
|
+
// An event
|
110
|
+
addNode(event);
|
111
|
+
break;
|
112
|
+
}
|
113
|
+
};
|
37
114
|
|
38
|
-
|
39
|
-
|
115
|
+
type TreeNodeTransformer = {
|
116
|
+
name: string;
|
117
|
+
matches: (node: EventNode) => boolean;
|
118
|
+
process: (node: EventNode) => EventNode;
|
119
|
+
};
|
120
|
+
|
121
|
+
const treeNodeTransformers: TreeNodeTransformer[] = [
|
122
|
+
{
|
123
|
+
name: "unwrap_tools",
|
124
|
+
matches: (node) =>
|
125
|
+
node.event.event === "span_begin" && node.event.type === "tool",
|
126
|
+
process: (node) => elevateChildNode(node, "tool") || node,
|
127
|
+
},
|
128
|
+
{
|
129
|
+
name: "unwrap_subtasks",
|
130
|
+
matches: (node) =>
|
131
|
+
node.event.event === "span_begin" && node.event.type === "subtask",
|
132
|
+
process: (node) => elevateChildNode(node, "subtask") || node,
|
133
|
+
},
|
134
|
+
{
|
135
|
+
name: "unwrap_agent_solver",
|
136
|
+
matches: (node) =>
|
137
|
+
node.event.event === "span_begin" &&
|
138
|
+
node.event["type"] === "solver" &&
|
139
|
+
node.children.length === 2 &&
|
140
|
+
node.children[0].event.event === "span_begin" &&
|
141
|
+
node.children[0].event.type === "agent" &&
|
142
|
+
node.children[1].event.event === "state",
|
143
|
+
|
144
|
+
process: (node) => skipFirstChildNode(node),
|
145
|
+
},
|
146
|
+
{
|
147
|
+
name: "unwrap_agent_solver w/store",
|
148
|
+
matches: (node) =>
|
149
|
+
node.event.event === "span_begin" &&
|
150
|
+
node.event["type"] === "solver" &&
|
151
|
+
node.children.length === 3 &&
|
152
|
+
node.children[0].event.event === "span_begin" &&
|
153
|
+
node.children[0].event.type === "agent" &&
|
154
|
+
node.children[1].event.event === "state" &&
|
155
|
+
node.children[2].event.event === "store",
|
156
|
+
process: (node) => skipFirstChildNode(node),
|
157
|
+
},
|
158
|
+
{
|
159
|
+
name: "unwrap_handoff",
|
160
|
+
matches: (node) =>
|
161
|
+
node.event.event === "span_begin" &&
|
162
|
+
node.event["type"] === "handoff" &&
|
163
|
+
node.children.length === 2 &&
|
164
|
+
node.children[0].event.event === "tool" &&
|
165
|
+
node.children[1].event.event === "store" &&
|
166
|
+
node.children[0].children.length === 2 &&
|
167
|
+
node.children[0].children[0].event.event === "span_begin" &&
|
168
|
+
node.children[0].children[0].event.type === "agent",
|
169
|
+
process: (node) => skipThisNode(node),
|
170
|
+
},
|
171
|
+
];
|
172
|
+
|
173
|
+
const transformTree = (roots: EventNode[]): EventNode[] => {
|
174
|
+
const visitNode = (node: EventNode): EventNode => {
|
175
|
+
let processedNode = node;
|
176
|
+
|
177
|
+
// Visit children (depth first)
|
178
|
+
processedNode.children = processedNode.children.map(visitNode);
|
179
|
+
|
180
|
+
// Apply any visitors to this node
|
181
|
+
for (const transformer of treeNodeTransformers) {
|
182
|
+
if (transformer.matches(processedNode)) {
|
183
|
+
processedNode = transformer.process(processedNode);
|
184
|
+
// Only apply the first matching transformer
|
185
|
+
break;
|
186
|
+
}
|
187
|
+
}
|
188
|
+
return processedNode;
|
189
|
+
};
|
190
|
+
|
191
|
+
return roots.map(visitNode);
|
192
|
+
};
|
193
|
+
|
194
|
+
/**
|
195
|
+
* Process a span node by elevating a specific child node type and moving its siblings as children
|
196
|
+
* @template T - Type of the event (either ToolEvent or SubtaskEvent)
|
197
|
+
*/
|
198
|
+
const elevateChildNode = (
|
199
|
+
node: EventNode,
|
200
|
+
childEventType: "tool" | "subtask",
|
201
|
+
): EventNode | null => {
|
202
|
+
// Find the specific event child
|
203
|
+
const targetIndex = node.children.findIndex(
|
204
|
+
(child) => child.event.event === childEventType,
|
205
|
+
);
|
206
|
+
|
207
|
+
if (targetIndex === -1) {
|
208
|
+
console.log(
|
209
|
+
`No ${childEventType} event found in a span, this is very unexpected.`,
|
210
|
+
);
|
211
|
+
return null;
|
212
|
+
}
|
213
|
+
|
214
|
+
// Get the target node and set its depth
|
215
|
+
const targetNode = { ...node.children[targetIndex] };
|
216
|
+
const remainingChildren = node.children.filter((_, i) => i !== targetIndex);
|
217
|
+
|
218
|
+
// Process the remaining children
|
219
|
+
targetNode.depth = node.depth;
|
220
|
+
targetNode.children = reduceDepth(remainingChildren);
|
221
|
+
|
222
|
+
// No need to update the event itself (events have been deprecated
|
223
|
+
// and more importantly we drive children / transcripts using the tree structure itself
|
224
|
+
// and notes rather than the event.events itself)
|
225
|
+
return targetNode;
|
226
|
+
};
|
227
|
+
|
228
|
+
const skipFirstChildNode = (node: EventNode): EventNode => {
|
229
|
+
const agentSpan = node.children.splice(0, 1)[0];
|
230
|
+
node.children.unshift(...reduceDepth(agentSpan.children));
|
231
|
+
return node;
|
232
|
+
};
|
233
|
+
|
234
|
+
const skipThisNode = (node: EventNode): EventNode => {
|
235
|
+
const newNode = { ...node.children[0] };
|
236
|
+
newNode.depth = node.depth;
|
237
|
+
newNode.children = reduceDepth(newNode.children[0].children, 2);
|
238
|
+
return newNode;
|
239
|
+
};
|
240
|
+
|
241
|
+
// Reduce the depth of the children by 1
|
242
|
+
// This is used when we hoist a child node to the parent
|
243
|
+
const reduceDepth = (nodes: EventNode[], depth: number = 1): EventNode[] => {
|
244
|
+
return nodes.map((node) => {
|
245
|
+
if (node.children.length > 0) {
|
246
|
+
node.children = reduceDepth(node.children, 1);
|
247
|
+
}
|
248
|
+
node.depth = node.depth - depth;
|
249
|
+
return node;
|
250
|
+
});
|
251
|
+
};
|
@@ -0,0 +1,11 @@
|
|
1
|
+
import { Events } from "../../../../@types/log";
|
2
|
+
|
3
|
+
export const ET_STEP = "step";
|
4
|
+
export const ACTION_BEGIN = "begin";
|
5
|
+
|
6
|
+
export const ET_SPAN_BEGIN = "span_begin";
|
7
|
+
export const ET_SPAN_END = "span_end";
|
8
|
+
|
9
|
+
export const hasSpans = (events: Events): boolean => {
|
10
|
+
return events.some((event) => event.event === ET_SPAN_BEGIN);
|
11
|
+
};
|
@@ -10,6 +10,8 @@ import {
|
|
10
10
|
SampleLimitEvent,
|
11
11
|
SandboxEvent,
|
12
12
|
ScoreEvent,
|
13
|
+
SpanBeginEvent,
|
14
|
+
SpanEndEvent,
|
13
15
|
StateEvent,
|
14
16
|
StepEvent,
|
15
17
|
StoreEvent,
|
@@ -39,7 +41,9 @@ export type EventType =
|
|
39
41
|
| InputEvent
|
40
42
|
| ErrorEvent
|
41
43
|
| ApprovalEvent
|
42
|
-
| SandboxEvent
|
44
|
+
| SandboxEvent
|
45
|
+
| SpanBeginEvent
|
46
|
+
| SpanEndEvent;
|
43
47
|
|
44
48
|
export class EventNode {
|
45
49
|
event: EventType;
|
@@ -67,7 +67,9 @@ export interface LogState {
|
|
67
67
|
selectedLogSummary?: EvalSummary;
|
68
68
|
pendingSampleSummaries?: PendingSamples;
|
69
69
|
|
70
|
-
filter:
|
70
|
+
filter: string;
|
71
|
+
filterError?: FilterError;
|
72
|
+
|
71
73
|
epoch: string;
|
72
74
|
sort: string;
|
73
75
|
score?: ScoreLabel;
|
@@ -122,8 +124,16 @@ export interface ScoreLabel {
|
|
122
124
|
scorer: string;
|
123
125
|
}
|
124
126
|
|
125
|
-
export interface
|
127
|
+
export interface SampleFilter {
|
126
128
|
value?: string;
|
129
|
+
error?: FilterError;
|
130
|
+
}
|
131
|
+
|
132
|
+
export interface FilterError {
|
133
|
+
from: number;
|
134
|
+
to: number;
|
135
|
+
message: string;
|
136
|
+
severity: "warning" | "error";
|
127
137
|
}
|
128
138
|
|
129
139
|
export type SampleMode = "none" | "single" | "many";
|
@@ -27,19 +27,19 @@ export const ExpandablePanel: FC<ExpandablePanelProps> = memo(
|
|
27
27
|
const [collapsed, setCollapsed] = useCollapsedState(id, collapse);
|
28
28
|
|
29
29
|
const [showToggle, setShowToggle] = useState(false);
|
30
|
-
const
|
30
|
+
const baseFontSizeRef = useRef<number>(0);
|
31
31
|
|
32
32
|
const checkOverflow = useCallback(
|
33
33
|
(entry: ResizeObserverEntry) => {
|
34
34
|
const element = entry.target as HTMLDivElement;
|
35
35
|
|
36
36
|
// Calculate line height if we haven't yet
|
37
|
-
if (
|
37
|
+
if (baseFontSizeRef.current === 0) {
|
38
38
|
const computedStyle = window.getComputedStyle(element);
|
39
|
-
|
39
|
+
const rootFontSize = parseFloat(computedStyle.fontSize);
|
40
|
+
baseFontSizeRef.current = rootFontSize;
|
40
41
|
}
|
41
|
-
|
42
|
-
const maxCollapsedHeight = lines * lineHeightRef.current;
|
42
|
+
const maxCollapsedHeight = baseFontSizeRef.current * lines;
|
43
43
|
const contentHeight = element.scrollHeight;
|
44
44
|
|
45
45
|
setShowToggle(contentHeight > maxCollapsedHeight);
|
@@ -132,6 +132,11 @@ export const useFilteredSamples = () => {
|
|
132
132
|
const evalDescriptor = useEvalDescriptor();
|
133
133
|
const sampleSummaries = useSampleSummaries();
|
134
134
|
const filter = useStore((state) => state.log.filter);
|
135
|
+
const setFilterError = useStore((state) => state.logActions.setFilterError);
|
136
|
+
const clearFilterError = useStore(
|
137
|
+
(state) => state.logActions.clearFilterError,
|
138
|
+
);
|
139
|
+
|
135
140
|
const epoch = useStore((state) => state.log.epoch);
|
136
141
|
const sort = useStore((state) => state.log.sort);
|
137
142
|
const samplesDescriptor = useSampleDescriptor();
|
@@ -139,10 +144,19 @@ export const useFilteredSamples = () => {
|
|
139
144
|
|
140
145
|
return useMemo(() => {
|
141
146
|
// Apply filters
|
147
|
+
const { result, error, allErrors } =
|
148
|
+
evalDescriptor && filter
|
149
|
+
? filterSamples(evalDescriptor, sampleSummaries, filter)
|
150
|
+
: { result: sampleSummaries, error: undefined, allErrors: false };
|
151
|
+
|
152
|
+
if (error && allErrors) {
|
153
|
+
setFilterError(error);
|
154
|
+
} else {
|
155
|
+
clearFilterError();
|
156
|
+
}
|
157
|
+
|
142
158
|
const prefiltered =
|
143
|
-
|
144
|
-
? filterSamples(evalDescriptor, sampleSummaries, filter.value).result
|
145
|
-
: sampleSummaries;
|
159
|
+
error === undefined || !allErrors ? result : sampleSummaries;
|
146
160
|
|
147
161
|
// Filter epochs
|
148
162
|
const filtered =
|
@@ -160,6 +174,8 @@ export const useFilteredSamples = () => {
|
|
160
174
|
evalDescriptor,
|
161
175
|
sampleSummaries,
|
162
176
|
filter,
|
177
|
+
setFilterError,
|
178
|
+
clearFilterError,
|
163
179
|
epoch,
|
164
180
|
sort,
|
165
181
|
samplesDescriptor,
|
@@ -1,4 +1,4 @@
|
|
1
|
-
import {
|
1
|
+
import { FilterError, LogState, ScoreLabel } from "../app/types";
|
2
2
|
import { EvalSummary, PendingSamples } from "../client/api/types";
|
3
3
|
import { kDefaultSort, kLogViewInfoTabId } from "../constants";
|
4
4
|
import { createLogger } from "../utils/logger";
|
@@ -23,7 +23,13 @@ export interface LogSlice {
|
|
23
23
|
setPendingSampleSummaries: (samples: PendingSamples) => void;
|
24
24
|
|
25
25
|
// Set filter criteria
|
26
|
-
setFilter: (filter:
|
26
|
+
setFilter: (filter: string) => void;
|
27
|
+
|
28
|
+
// Set the filter error
|
29
|
+
setFilterError: (error: FilterError) => void;
|
30
|
+
|
31
|
+
// Clear the filter error
|
32
|
+
clearFilterError: () => void;
|
27
33
|
|
28
34
|
// Set epoch filter
|
29
35
|
setEpoch: (epoch: string) => void;
|
@@ -60,7 +66,9 @@ const initialState = {
|
|
60
66
|
loadedLog: undefined,
|
61
67
|
|
62
68
|
// Filter state
|
63
|
-
filter:
|
69
|
+
filter: "",
|
70
|
+
filterError: undefined,
|
71
|
+
|
64
72
|
epoch: "all",
|
65
73
|
sort: kDefaultSort,
|
66
74
|
score: undefined,
|
@@ -110,10 +118,19 @@ export const createLogSlice = (
|
|
110
118
|
state.log.pendingSampleSummaries = pendingSampleSummaries;
|
111
119
|
}),
|
112
120
|
|
113
|
-
setFilter: (filter:
|
121
|
+
setFilter: (filter: string) =>
|
114
122
|
set((state) => {
|
115
123
|
state.log.filter = filter;
|
116
124
|
}),
|
125
|
+
setFilterError: (error: FilterError) =>
|
126
|
+
set((state) => {
|
127
|
+
state.log.filterError = error;
|
128
|
+
}),
|
129
|
+
clearFilterError: () => {
|
130
|
+
set((state) => {
|
131
|
+
state.log.filterError = undefined;
|
132
|
+
});
|
133
|
+
},
|
117
134
|
setEpoch: (epoch: string) =>
|
118
135
|
set((state) => {
|
119
136
|
state.log.epoch = epoch;
|
@@ -132,7 +149,8 @@ export const createLogSlice = (
|
|
132
149
|
}),
|
133
150
|
resetFiltering: () =>
|
134
151
|
set((state) => {
|
135
|
-
state.log.filter =
|
152
|
+
state.log.filter = "";
|
153
|
+
state.log.filterError = undefined;
|
136
154
|
state.log.epoch = "all";
|
137
155
|
state.log.sort = kDefaultSort;
|
138
156
|
state.log.score = undefined;
|
inspect_ai/_view/www/yarn.lock
CHANGED
@@ -4254,17 +4254,17 @@ react-refresh@^0.17.0:
|
|
4254
4254
|
resolved "https://registry.yarnpkg.com/react-refresh/-/react-refresh-0.17.0.tgz#b7e579c3657f23d04eccbe4ad2e58a8ed51e7e53"
|
4255
4255
|
integrity sha512-z6F7K9bV85EfseRCp2bzrpyQ0Gkw1uLoCel9XBVWPg/TjRj94SkJzUTGfOa4bs7iJvBWtQG0Wq7wnI0syw3EBQ==
|
4256
4256
|
|
4257
|
-
react-router-dom@^7.5.
|
4258
|
-
version "7.5.
|
4259
|
-
resolved "https://registry.yarnpkg.com/react-router-dom/-/react-router-dom-7.5.
|
4260
|
-
integrity sha512-
|
4257
|
+
react-router-dom@^7.5.3:
|
4258
|
+
version "7.5.3"
|
4259
|
+
resolved "https://registry.yarnpkg.com/react-router-dom/-/react-router-dom-7.5.3.tgz#496e9f6d90f731703c7772668b41747028e0a2d5"
|
4260
|
+
integrity sha512-cK0jSaTyW4jV9SRKAItMIQfWZ/D6WEZafgHuuCb9g+SjhLolY78qc+De4w/Cz9ybjvLzShAmaIMEXt8iF1Cm+A==
|
4261
4261
|
dependencies:
|
4262
|
-
react-router "7.5.
|
4262
|
+
react-router "7.5.3"
|
4263
4263
|
|
4264
|
-
react-router@7.5.
|
4265
|
-
version "7.5.
|
4266
|
-
resolved "https://registry.yarnpkg.com/react-router/-/react-router-7.5.
|
4267
|
-
integrity sha512
|
4264
|
+
react-router@7.5.3:
|
4265
|
+
version "7.5.3"
|
4266
|
+
resolved "https://registry.yarnpkg.com/react-router/-/react-router-7.5.3.tgz#9e5420832af8c3690740c1797d4fa54613fea06d"
|
4267
|
+
integrity sha512-3iUDM4/fZCQ89SXlDa+Ph3MevBrozBAI655OAfWQlTm9nBR0IKlrmNwFow5lPHttbwvITZfkeeeZFP6zt3F7pw==
|
4268
4268
|
dependencies:
|
4269
4269
|
cookie "^1.0.1"
|
4270
4270
|
set-cookie-parser "^2.6.0"
|
inspect_ai/agent/_as_solver.py
CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
from typing import TYPE_CHECKING, Any
|
4
4
|
|
5
5
|
from inspect_ai.util._limit import Limit, apply_limits
|
6
|
+
from inspect_ai.util._span import span
|
6
7
|
|
7
8
|
if TYPE_CHECKING:
|
8
9
|
from inspect_ai.solver._solver import Solver
|
@@ -61,7 +62,8 @@ def as_solver(agent: Agent, limits: list[Limit] = [], **agent_kwargs: Any) -> So
|
|
61
62
|
try:
|
62
63
|
# run the agent with limits
|
63
64
|
with apply_limits(limits):
|
64
|
-
|
65
|
+
async with span(name=agent_name, type="agent"):
|
66
|
+
agent_state = await agent(agent_state, **agent_kwargs)
|
65
67
|
# if an exception occurs, we still want to update the TaskState with the
|
66
68
|
# AgentState's messages + output so that it appears in the log and is scored
|
67
69
|
finally:
|