inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_cli/common.py +7 -3
- inspect_ai/_cli/eval.py +17 -2
- inspect_ai/_cli/trace.py +21 -2
- inspect_ai/_display/core/active.py +4 -3
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +4 -9
- inspect_ai/_display/textual/app.py +4 -1
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +119 -16
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/eval.py +32 -20
- inspect_ai/_eval/evalset.py +7 -5
- inspect_ai/_eval/score.py +1 -0
- inspect_ai/_eval/task/__init__.py +2 -2
- inspect_ai/_eval/task/images.py +40 -25
- inspect_ai/_eval/task/results.py +50 -22
- inspect_ai/_eval/task/run.py +180 -124
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_eval/task/task.py +140 -25
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/content.py +23 -1
- inspect_ai/_util/images.py +20 -17
- inspect_ai/_util/kvstore.py +73 -0
- inspect_ai/_util/notgiven.py +18 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_util/thread.py +5 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25375 -1846
- inspect_ai/_view/www/log-schema.json +129 -15
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +8 -10
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
- inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +75 -2
- inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +62 -27
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/Json.mjs +12 -6
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_human/util.py +2 -2
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/dataset/_sources/csv.py +2 -1
- inspect_ai/dataset/_sources/json.py +2 -1
- inspect_ai/dataset/_sources/util.py +15 -7
- inspect_ai/log/_condense.py +11 -1
- inspect_ai/log/_log.py +3 -6
- inspect_ai/log/_recorders/eval.py +19 -8
- inspect_ai/log/_samples.py +26 -5
- inspect_ai/log/_transcript.py +32 -2
- inspect_ai/model/__init__.py +10 -2
- inspect_ai/model/_call_tools.py +59 -12
- inspect_ai/model/_chat_message.py +2 -4
- inspect_ai/model/_conversation.py +61 -0
- inspect_ai/model/_generate_config.py +10 -4
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +7 -2
- inspect_ai/model/_providers/anthropic.py +109 -51
- inspect_ai/model/_providers/azureai.py +26 -24
- inspect_ai/model/_providers/bedrock.py +43 -44
- inspect_ai/model/_providers/google.py +121 -58
- inspect_ai/model/_providers/groq.py +7 -5
- inspect_ai/model/_providers/hf.py +11 -6
- inspect_ai/model/_providers/mistral.py +17 -20
- inspect_ai/model/_providers/openai.py +32 -21
- inspect_ai/model/_providers/openai_o1.py +9 -8
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/together.py +8 -8
- inspect_ai/model/_providers/vertex.py +18 -8
- inspect_ai/scorer/__init__.py +13 -2
- inspect_ai/scorer/_metrics/__init__.py +2 -2
- inspect_ai/scorer/_metrics/std.py +3 -3
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +2 -2
- inspect_ai/solver/__init__.py +2 -5
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +11 -1
- inspect_ai/tool/_tool.py +21 -3
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -3
- inspect_ai/util/{_trace.py → _conversation.py} +3 -17
- inspect_ai/util/_display.py +14 -4
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/context.py +12 -13
- inspect_ai/util/_sandbox/docker/compose.py +24 -11
- inspect_ai/util/_sandbox/docker/docker.py +84 -14
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/environment.py +27 -1
- inspect_ai/util/_sandbox/local.py +1 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- inspect_ai/model/_trace.py +0 -48
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
|
|
1
1
|
// @ts-check
|
2
2
|
import { html } from "htm/preact";
|
3
|
+
import { useCallback, useState } from "preact/hooks";
|
3
4
|
import { SampleInitEventView } from "./SampleInitEventView.mjs";
|
4
5
|
import { StateEventView } from "./state/StateEventView.mjs";
|
5
6
|
import { StepEventView } from "./StepEventView.mjs";
|
@@ -15,6 +16,8 @@ import { ApprovalEventView } from "./ApprovalEventView.mjs";
|
|
15
16
|
import { SampleLimitEventView } from "./SampleLimitEventView.mjs";
|
16
17
|
import { FontSize } from "../../appearance/Fonts.mjs";
|
17
18
|
import { EventNode } from "./Types.mjs";
|
19
|
+
// @ts-ignore
|
20
|
+
import { VirtualList } from "../../components/VirtualList.mjs";
|
18
21
|
|
19
22
|
/**
|
20
23
|
* Renders the TranscriptView component.
|
@@ -26,10 +29,61 @@ import { EventNode } from "./Types.mjs";
|
|
26
29
|
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
27
30
|
*/
|
28
31
|
export const TranscriptView = ({ id, events, depth = 0 }) => {
|
32
|
+
const [transcriptState, setTranscriptState] = useState({});
|
33
|
+
const onTranscriptState = useCallback(
|
34
|
+
(state) => {
|
35
|
+
setTranscriptState(state);
|
36
|
+
},
|
37
|
+
[transcriptState, setTranscriptState],
|
38
|
+
);
|
39
|
+
|
29
40
|
// Normalize Events themselves
|
30
41
|
const resolvedEvents = fixupEventStream(events);
|
31
42
|
const eventNodes = treeifyEvents(resolvedEvents, depth);
|
32
|
-
return html`
|
43
|
+
return html`
|
44
|
+
<${TranscriptComponent}
|
45
|
+
id=${id}
|
46
|
+
eventNodes=${eventNodes}
|
47
|
+
transcriptState=${transcriptState}
|
48
|
+
setTranscriptState=${onTranscriptState}
|
49
|
+
/>
|
50
|
+
`;
|
51
|
+
};
|
52
|
+
|
53
|
+
/**
|
54
|
+
* Renders the Transcript component.
|
55
|
+
*
|
56
|
+
* @param {Object} props - The parameters for the component.
|
57
|
+
* @param {string} props.id - The identifier for this view
|
58
|
+
* @param {import("../../types/log").Events} props.events - The transcript events to display.
|
59
|
+
* @param {Object} props.style - The transcript style to display.
|
60
|
+
* @param {number} props.depth - The base depth for this transcript view
|
61
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
|
62
|
+
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
63
|
+
*/
|
64
|
+
export const TranscriptVirtualList = (props) => {
|
65
|
+
let { id, scrollRef, events, depth, style } = props;
|
66
|
+
|
67
|
+
// Normalize Events themselves
|
68
|
+
const resolvedEvents = fixupEventStream(events);
|
69
|
+
const eventNodes = treeifyEvents(resolvedEvents, depth);
|
70
|
+
|
71
|
+
const [transcriptState, setTranscriptState] = useState({});
|
72
|
+
const onTranscriptState = useCallback(
|
73
|
+
(state) => {
|
74
|
+
setTranscriptState(state);
|
75
|
+
},
|
76
|
+
[transcriptState, setTranscriptState],
|
77
|
+
);
|
78
|
+
|
79
|
+
return html`<${TranscriptVirtualListComponent}
|
80
|
+
id=${id}
|
81
|
+
eventNodes=${eventNodes}
|
82
|
+
style=${style}
|
83
|
+
scrollRef=${scrollRef}
|
84
|
+
transcriptState=${transcriptState}
|
85
|
+
setTranscriptState=${onTranscriptState}
|
86
|
+
/>`;
|
33
87
|
};
|
34
88
|
|
35
89
|
/**
|
@@ -39,9 +93,81 @@ export const TranscriptView = ({ id, events, depth = 0 }) => {
|
|
39
93
|
* @param {string} props.id - The identifier for this view
|
40
94
|
* @param {EventNode[]} props.eventNodes - The transcript events nodes to display.
|
41
95
|
* @param {Object} props.style - The transcript style to display.
|
96
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
|
97
|
+
* @param {import("./Types.mjs").TranscriptState} props.transcriptState - The state for this transcript
|
98
|
+
* @param {(state: import("./Types.mjs").TranscriptState) => void} props.setTranscriptState - Set the transcript state for this transcript
|
42
99
|
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
43
100
|
*/
|
44
|
-
export const
|
101
|
+
export const TranscriptVirtualListComponent = ({
|
102
|
+
id,
|
103
|
+
eventNodes,
|
104
|
+
style,
|
105
|
+
scrollRef,
|
106
|
+
transcriptState,
|
107
|
+
setTranscriptState,
|
108
|
+
}) => {
|
109
|
+
const renderRow = (item, index) => {
|
110
|
+
const toggleStyle = {};
|
111
|
+
if (item.depth % 2 == 0) {
|
112
|
+
toggleStyle.backgroundColor = "var(--bs-light-bg-subtle)";
|
113
|
+
} else {
|
114
|
+
toggleStyle.backgroundColor = "var(--bs-body-bg)";
|
115
|
+
}
|
116
|
+
|
117
|
+
let paddingTop = "0";
|
118
|
+
if (index === 0) {
|
119
|
+
paddingTop = ".5em";
|
120
|
+
}
|
121
|
+
const eventId = `${id}-event${index}`;
|
122
|
+
const setEventState = useCallback(
|
123
|
+
(state) => {
|
124
|
+
setTranscriptState({ ...transcriptState, [eventId]: state });
|
125
|
+
},
|
126
|
+
[setTranscriptState, transcriptState],
|
127
|
+
);
|
128
|
+
|
129
|
+
return html`<div style=${{ paddingTop, paddingBottom: ".5em" }}>
|
130
|
+
<${RenderedEventNode}
|
131
|
+
id=${eventId}
|
132
|
+
node=${item}
|
133
|
+
style=${{
|
134
|
+
...toggleStyle,
|
135
|
+
...style,
|
136
|
+
}}
|
137
|
+
scrollRef=${scrollRef}
|
138
|
+
eventState=${transcriptState[eventId] || {}}
|
139
|
+
setEventState=${setEventState}
|
140
|
+
/>
|
141
|
+
</div>`;
|
142
|
+
};
|
143
|
+
|
144
|
+
return html`<${VirtualList}
|
145
|
+
data=${eventNodes}
|
146
|
+
tabIndex="0"
|
147
|
+
renderRow=${renderRow}
|
148
|
+
scrollRef=${scrollRef}
|
149
|
+
style=${{ width: "100%", marginTop: "1em" }}
|
150
|
+
/>`;
|
151
|
+
};
|
152
|
+
|
153
|
+
/**
|
154
|
+
* Renders the Transcript component.
|
155
|
+
*
|
156
|
+
* @param {Object} props - The parameters for the component.
|
157
|
+
* @param {string} props.id - The identifier for this view
|
158
|
+
* @param {EventNode[]} props.eventNodes - The transcript events nodes to display.
|
159
|
+
* @param {Object} props.style - The transcript style to display.
|
160
|
+
* @param {import("./Types.mjs").TranscriptState} props.transcriptState - The state for this transcript
|
161
|
+
* @param {(state: import("./Types.mjs").TranscriptState) => void} props.setTranscriptState - Set the transcript state for this transcript
|
162
|
+
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
163
|
+
*/
|
164
|
+
export const TranscriptComponent = ({
|
165
|
+
id,
|
166
|
+
transcriptState,
|
167
|
+
setTranscriptState,
|
168
|
+
eventNodes,
|
169
|
+
style,
|
170
|
+
}) => {
|
45
171
|
const rows = eventNodes.map((eventNode, index) => {
|
46
172
|
const toggleStyle = {};
|
47
173
|
if (eventNode.depth % 2 == 0) {
|
@@ -55,15 +181,32 @@ export const TranscriptComponent = ({ id, eventNodes, style }) => {
|
|
55
181
|
toggleStyle.marginBottom = "1.5em";
|
56
182
|
}
|
57
183
|
|
184
|
+
let paddingBottom = ".5em";
|
185
|
+
if (index === eventNodes.length - 1) {
|
186
|
+
paddingBottom = "0";
|
187
|
+
}
|
188
|
+
|
189
|
+
const eventId = `${id}-event${index}`;
|
190
|
+
const setEventState = useCallback(
|
191
|
+
(state) => {
|
192
|
+
setTranscriptState({ ...transcriptState, [eventId]: state });
|
193
|
+
},
|
194
|
+
[setTranscriptState, transcriptState],
|
195
|
+
);
|
196
|
+
|
58
197
|
const row = html`
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
198
|
+
<div style=${{ paddingBottom }}>
|
199
|
+
<${RenderedEventNode}
|
200
|
+
id=${eventId}
|
201
|
+
node=${eventNode}
|
202
|
+
style=${{
|
203
|
+
...toggleStyle,
|
204
|
+
...style,
|
205
|
+
}}
|
206
|
+
eventState=${transcriptState[eventId] || {}}
|
207
|
+
setEventState=${setEventState}
|
208
|
+
/>
|
209
|
+
</div>
|
67
210
|
`;
|
68
211
|
return row;
|
69
212
|
});
|
@@ -89,14 +232,26 @@ export const TranscriptComponent = ({ id, eventNodes, style }) => {
|
|
89
232
|
* @param {string} props.id - The id for this event.
|
90
233
|
* @param { EventNode } props.node - This event.
|
91
234
|
* @param { Object } props.style - The style for this node.
|
235
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
|
236
|
+
* @param {import("./Types.mjs").TranscriptEventState} props.eventState - The state for this event
|
237
|
+
* @param {(state: import("./Types.mjs").TranscriptEventState) => void} props.setEventState - Update the state for this event
|
92
238
|
* @returns {import("preact").JSX.Element} The rendered event.
|
93
239
|
*/
|
94
|
-
export const RenderedEventNode = ({
|
240
|
+
export const RenderedEventNode = ({
|
241
|
+
id,
|
242
|
+
node,
|
243
|
+
style,
|
244
|
+
scrollRef,
|
245
|
+
eventState,
|
246
|
+
setEventState,
|
247
|
+
}) => {
|
95
248
|
switch (node.event.event) {
|
96
249
|
case "sample_init":
|
97
250
|
return html`<${SampleInitEventView}
|
98
251
|
id=${id}
|
99
252
|
event=${node.event}
|
253
|
+
eventState=${eventState}
|
254
|
+
setEventState=${setEventState}
|
100
255
|
style=${style}
|
101
256
|
/>`;
|
102
257
|
|
@@ -104,6 +259,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
104
259
|
return html`<${SampleLimitEventView}
|
105
260
|
id=${id}
|
106
261
|
event=${node.event}
|
262
|
+
eventState=${eventState}
|
263
|
+
setEventState=${setEventState}
|
107
264
|
style=${style}
|
108
265
|
/>`;
|
109
266
|
|
@@ -111,6 +268,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
111
268
|
return html`<${InfoEventView}
|
112
269
|
id=${id}
|
113
270
|
event=${node.event}
|
271
|
+
eventState=${eventState}
|
272
|
+
setEventState=${setEventState}
|
114
273
|
style=${style}
|
115
274
|
/>`;
|
116
275
|
|
@@ -118,6 +277,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
118
277
|
return html`<${LoggerEventView}
|
119
278
|
id=${id}
|
120
279
|
event=${node.event}
|
280
|
+
eventState=${eventState}
|
281
|
+
setEventState=${setEventState}
|
121
282
|
style=${style}
|
122
283
|
/>`;
|
123
284
|
|
@@ -125,6 +286,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
125
286
|
return html`<${ModelEventView}
|
126
287
|
id=${id}
|
127
288
|
event=${node.event}
|
289
|
+
eventState=${eventState}
|
290
|
+
setEventState=${setEventState}
|
128
291
|
style=${style}
|
129
292
|
/>`;
|
130
293
|
|
@@ -132,6 +295,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
132
295
|
return html`<${ScoreEventView}
|
133
296
|
id=${id}
|
134
297
|
event=${node.event}
|
298
|
+
eventState=${eventState}
|
299
|
+
setEventState=${setEventState}
|
135
300
|
style=${style}
|
136
301
|
/>`;
|
137
302
|
|
@@ -139,6 +304,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
139
304
|
return html`<${StateEventView}
|
140
305
|
id=${id}
|
141
306
|
event=${node.event}
|
307
|
+
eventState=${eventState}
|
308
|
+
setEventState=${setEventState}
|
142
309
|
style=${style}
|
143
310
|
/>`;
|
144
311
|
|
@@ -146,14 +313,19 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
146
313
|
return html`<${StepEventView}
|
147
314
|
id=${id}
|
148
315
|
event=${node.event}
|
316
|
+
eventState=${eventState}
|
317
|
+
setEventState=${setEventState}
|
149
318
|
children=${node.children}
|
150
319
|
style=${style}
|
320
|
+
scrollRef=${scrollRef}
|
151
321
|
/>`;
|
152
322
|
|
153
323
|
case "store":
|
154
324
|
return html`<${StateEventView}
|
155
325
|
id=${id}
|
156
326
|
event=${node.event}
|
327
|
+
eventState=${eventState}
|
328
|
+
setEventState=${setEventState}
|
157
329
|
style=${style}
|
158
330
|
isStore=${true}
|
159
331
|
/>`;
|
@@ -162,6 +334,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
162
334
|
return html`<${SubtaskEventView}
|
163
335
|
id=${id}
|
164
336
|
event=${node.event}
|
337
|
+
eventState=${eventState}
|
338
|
+
setEventState=${setEventState}
|
165
339
|
style=${style}
|
166
340
|
depth=${node.depth}
|
167
341
|
/>`;
|
@@ -170,6 +344,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
170
344
|
return html`<${ToolEventView}
|
171
345
|
id=${id}
|
172
346
|
event=${node.event}
|
347
|
+
eventState=${eventState}
|
348
|
+
setEventState=${setEventState}
|
173
349
|
style=${style}
|
174
350
|
depth=${node.depth}
|
175
351
|
/>`;
|
@@ -178,6 +354,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
178
354
|
return html`<${InputEventView}
|
179
355
|
id=${id}
|
180
356
|
event=${node.event}
|
357
|
+
eventState=${eventState}
|
358
|
+
setEventState=${setEventState}
|
181
359
|
style=${style}
|
182
360
|
/>`;
|
183
361
|
|
@@ -185,6 +363,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
185
363
|
return html`<${ErrorEventView}
|
186
364
|
id=${id}
|
187
365
|
event=${node.event}
|
366
|
+
eventState=${eventState}
|
367
|
+
setEventState=${setEventState}
|
188
368
|
style=${style}
|
189
369
|
/>`;
|
190
370
|
|
@@ -192,6 +372,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
192
372
|
return html`<${ApprovalEventView}
|
193
373
|
id=${id}
|
194
374
|
event=${node.event}
|
375
|
+
eventState=${eventState}
|
376
|
+
setEventState=${setEventState}
|
195
377
|
style=${style}
|
196
378
|
/>`;
|
197
379
|
|
@@ -32,3 +32,13 @@ export class EventNode {
|
|
32
32
|
this.depth = depth;
|
33
33
|
}
|
34
34
|
}
|
35
|
+
|
36
|
+
/**
|
37
|
+
* @typedef {Record<string, TranscriptEventState>} TranscriptState
|
38
|
+
*/
|
39
|
+
|
40
|
+
/**
|
41
|
+
* @typedef {Object} TranscriptEventState
|
42
|
+
* @property {string} [selectedNav] - The selected nav for this event
|
43
|
+
* @property {boolean} [collapsed] - The collapse state for this event
|
44
|
+
*/
|
@@ -15,11 +15,20 @@ import { formatDateTime } from "../../../utils/Format.mjs";
|
|
15
15
|
* @param {Object} props - The properties passed to the component.
|
16
16
|
* @param { string } props.id - The id of this event.
|
17
17
|
* @param {import("../../../types/log").StateEvent } props.event - The event object to display.
|
18
|
+
* @param {import("./../Types.mjs").TranscriptEventState} props.eventState - The state for this event
|
19
|
+
* @param {(state: import("./../Types.mjs").TranscriptEventState) => void} props.setEventState - Update the state for this event
|
18
20
|
* @param { boolean } props.isStore - Whether this event view is rendering a storage (rather than a state)
|
19
21
|
* @param { Object } props.style - The style of this event.
|
20
22
|
* @returns {import("preact").JSX.Element} The component.
|
21
23
|
*/
|
22
|
-
export const StateEventView = ({
|
24
|
+
export const StateEventView = ({
|
25
|
+
id,
|
26
|
+
event,
|
27
|
+
eventState,
|
28
|
+
setEventState,
|
29
|
+
isStore,
|
30
|
+
style,
|
31
|
+
}) => {
|
23
32
|
const summary = summarizeChanges(event.changes);
|
24
33
|
|
25
34
|
// Synthesize objects for comparison
|
@@ -53,7 +62,22 @@ export const StateEventView = ({ id, event, isStore, style }) => {
|
|
53
62
|
const title = event.event === "state" ? "State Updated" : "Store Updated";
|
54
63
|
|
55
64
|
return html`
|
56
|
-
<${EventPanel}
|
65
|
+
<${EventPanel}
|
66
|
+
id=${id}
|
67
|
+
title="${title}"
|
68
|
+
subTitle=${formatDateTime(new Date(event.timestamp))}
|
69
|
+
text=${tabs.length === 1 ? summary : undefined}
|
70
|
+
collapse=${changePreview === undefined ? true : undefined}
|
71
|
+
style=${style}
|
72
|
+
selectedNav=${eventState.selectedNav || ""}
|
73
|
+
onSelectedNav=${(selectedNav) => {
|
74
|
+
setEventState({ ...eventState, selectedNav });
|
75
|
+
}}
|
76
|
+
collapsed=${eventState.collapsed}
|
77
|
+
onCollapsed=${(collapsed) => {
|
78
|
+
setEventState({ ...eventState, collapsed });
|
79
|
+
}}
|
80
|
+
>
|
57
81
|
${tabs}
|
58
82
|
</${EventPanel}>`;
|
59
83
|
};
|
@@ -32,7 +32,6 @@ export type Limit = number | [unknown, unknown] | null;
|
|
32
32
|
export type SampleId = string | number | (string | number)[] | null;
|
33
33
|
export type Epochs = number | null;
|
34
34
|
export type EpochsReducer = string[] | null;
|
35
|
-
export type Trace = boolean | null;
|
36
35
|
export type Name1 = string;
|
37
36
|
export type Tools = string | string[];
|
38
37
|
export type Approvers = ApproverPolicyConfig[];
|
@@ -77,6 +76,7 @@ export type NumChoices = number | null;
|
|
77
76
|
export type Logprobs = boolean | null;
|
78
77
|
export type TopLogprobs = number | null;
|
79
78
|
export type ParallelToolCalls = boolean | null;
|
79
|
+
export type InternalTools = boolean | null;
|
80
80
|
export type MaxToolOutput = number | null;
|
81
81
|
export type CachePrompt = "auto" | boolean | null;
|
82
82
|
export type ReasoningEffort = ("low" | "medium" | "high") | null;
|
@@ -112,35 +112,49 @@ export type Input =
|
|
112
112
|
| ChatMessageAssistant
|
113
113
|
| ChatMessageTool
|
114
114
|
)[];
|
115
|
-
export type Content =
|
115
|
+
export type Content =
|
116
|
+
| string
|
117
|
+
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
116
118
|
export type Type1 = "text";
|
117
119
|
export type Text = string;
|
118
120
|
export type Type2 = "image";
|
119
121
|
export type Image = string;
|
120
122
|
export type Detail = "auto" | "low" | "high";
|
123
|
+
export type Type3 = "audio";
|
124
|
+
export type Audio = string;
|
125
|
+
export type Format = "wav" | "mp3";
|
126
|
+
export type Type4 = "video";
|
127
|
+
export type Video = string;
|
128
|
+
export type Format1 = "mp4" | "mpeg" | "mov";
|
121
129
|
export type Source = ("input" | "generate") | null;
|
122
130
|
export type Role = "system";
|
123
|
-
export type Content1 =
|
131
|
+
export type Content1 =
|
132
|
+
| string
|
133
|
+
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
124
134
|
export type Source1 = ("input" | "generate") | null;
|
125
135
|
export type Role1 = "user";
|
126
136
|
export type ToolCallId = string | null;
|
127
|
-
export type Content2 =
|
137
|
+
export type Content2 =
|
138
|
+
| string
|
139
|
+
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
128
140
|
export type Source2 = ("input" | "generate") | null;
|
129
141
|
export type Role2 = "assistant";
|
130
142
|
export type ToolCalls = ToolCall[] | null;
|
131
143
|
export type Id1 = string;
|
132
144
|
export type Function = string;
|
133
|
-
export type
|
145
|
+
export type Type5 = "function";
|
134
146
|
export type ParseError = string | null;
|
135
147
|
export type Title = string | null;
|
136
|
-
export type
|
148
|
+
export type Format2 = "text" | "markdown";
|
137
149
|
export type Content3 = string;
|
138
|
-
export type Content4 =
|
150
|
+
export type Content4 =
|
151
|
+
| string
|
152
|
+
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
139
153
|
export type Source3 = ("input" | "generate") | null;
|
140
154
|
export type Role3 = "tool";
|
141
155
|
export type ToolCallId1 = string | null;
|
142
156
|
export type Function1 = string | null;
|
143
|
-
export type
|
157
|
+
export type Type6 =
|
144
158
|
| "parsing"
|
145
159
|
| "timeout"
|
146
160
|
| "unicode_decode"
|
@@ -218,7 +232,7 @@ export type JsonValue = unknown;
|
|
218
232
|
export type Timestamp1 = string;
|
219
233
|
export type Pending1 = boolean | null;
|
220
234
|
export type Event1 = "sample_limit";
|
221
|
-
export type
|
235
|
+
export type Type7 = "message" | "time" | "token" | "operator" | "custom";
|
222
236
|
export type Message2 = string;
|
223
237
|
export type Limit1 = number | null;
|
224
238
|
export type Timestamp2 = string;
|
@@ -244,8 +258,8 @@ export type Input2 = (
|
|
244
258
|
)[];
|
245
259
|
export type Name5 = string;
|
246
260
|
export type Description = string;
|
247
|
-
export type
|
248
|
-
export type
|
261
|
+
export type Type8 = "object";
|
262
|
+
export type Type9 =
|
249
263
|
| ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
|
250
264
|
| null;
|
251
265
|
export type Description1 = string | null;
|
@@ -261,11 +275,12 @@ export type Additionalproperties1 = boolean;
|
|
261
275
|
export type Tools1 = ToolInfo[];
|
262
276
|
export type ToolChoice = ("auto" | "any" | "none") | ToolFunction;
|
263
277
|
export type Name6 = string;
|
278
|
+
export type Error1 = string | null;
|
264
279
|
export type Cache = ("read" | "write") | null;
|
265
280
|
export type Timestamp5 = string;
|
266
281
|
export type Pending5 = boolean | null;
|
267
282
|
export type Event5 = "tool";
|
268
|
-
export type
|
283
|
+
export type Type10 = "function";
|
269
284
|
export type Id3 = string;
|
270
285
|
export type Function2 = string;
|
271
286
|
export type Result =
|
@@ -274,7 +289,9 @@ export type Result =
|
|
274
289
|
| boolean
|
275
290
|
| ContentText
|
276
291
|
| ContentImage
|
277
|
-
|
|
292
|
+
| ContentAudio
|
293
|
+
| ContentVideo
|
294
|
+
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
278
295
|
export type Truncated = [unknown, unknown] | null;
|
279
296
|
export type Timestamp6 = string;
|
280
297
|
export type Pending6 = boolean | null;
|
@@ -324,13 +341,13 @@ export type Timestamp12 = string;
|
|
324
341
|
export type Pending12 = boolean | null;
|
325
342
|
export type Event12 = "step";
|
326
343
|
export type Action = "begin" | "end";
|
327
|
-
export type
|
344
|
+
export type Type11 = string | null;
|
328
345
|
export type Name8 = string;
|
329
346
|
export type Timestamp13 = string;
|
330
347
|
export type Pending13 = boolean | null;
|
331
348
|
export type Event13 = "subtask";
|
332
349
|
export type Name9 = string;
|
333
|
-
export type
|
350
|
+
export type Type12 = string | null;
|
334
351
|
export type Events2 = (
|
335
352
|
| SampleInitEvent
|
336
353
|
| SampleLimitEvent
|
@@ -379,7 +396,13 @@ export type Events = (
|
|
379
396
|
| StepEvent
|
380
397
|
| SubtaskEvent
|
381
398
|
)[];
|
382
|
-
export type
|
399
|
+
export type Type13 =
|
400
|
+
| "context"
|
401
|
+
| "time"
|
402
|
+
| "message"
|
403
|
+
| "token"
|
404
|
+
| "operator"
|
405
|
+
| "custom";
|
383
406
|
export type Limit2 = number;
|
384
407
|
export type Reductions = EvalSampleReductions[] | null;
|
385
408
|
export type Scorer1 = string;
|
@@ -448,7 +471,6 @@ export interface EvalConfig {
|
|
448
471
|
sample_id: SampleId;
|
449
472
|
epochs: Epochs;
|
450
473
|
epochs_reducer: EpochsReducer;
|
451
|
-
trace: Trace;
|
452
474
|
approval: ApprovalPolicyConfig | null;
|
453
475
|
fail_on_error: FailOnError;
|
454
476
|
message_limit: MessageLimit;
|
@@ -531,6 +553,7 @@ export interface GenerateConfig {
|
|
531
553
|
logprobs: Logprobs;
|
532
554
|
top_logprobs: TopLogprobs;
|
533
555
|
parallel_tool_calls: ParallelToolCalls;
|
556
|
+
internal_tools: InternalTools;
|
534
557
|
max_tool_output: MaxToolOutput;
|
535
558
|
cache_prompt: CachePrompt;
|
536
559
|
reasoning_effort: ReasoningEffort;
|
@@ -614,6 +637,16 @@ export interface ContentImage {
|
|
614
637
|
image: Image;
|
615
638
|
detail: Detail;
|
616
639
|
}
|
640
|
+
export interface ContentAudio {
|
641
|
+
type: Type3;
|
642
|
+
audio: Audio;
|
643
|
+
format: Format;
|
644
|
+
}
|
645
|
+
export interface ContentVideo {
|
646
|
+
type: Type4;
|
647
|
+
video: Video;
|
648
|
+
format: Format1;
|
649
|
+
}
|
617
650
|
export interface ChatMessageUser {
|
618
651
|
content: Content1;
|
619
652
|
source: Source1;
|
@@ -630,7 +663,7 @@ export interface ToolCall {
|
|
630
663
|
id: Id1;
|
631
664
|
function: Function;
|
632
665
|
arguments: Arguments;
|
633
|
-
type:
|
666
|
+
type: Type5;
|
634
667
|
parse_error: ParseError;
|
635
668
|
view: ToolCallContent | null;
|
636
669
|
}
|
@@ -640,7 +673,7 @@ export interface Arguments {}
|
|
640
673
|
*/
|
641
674
|
export interface ToolCallContent {
|
642
675
|
title: Title;
|
643
|
-
format:
|
676
|
+
format: Format2;
|
644
677
|
content: Content3;
|
645
678
|
}
|
646
679
|
export interface ChatMessageTool {
|
@@ -652,7 +685,7 @@ export interface ChatMessageTool {
|
|
652
685
|
error: ToolCallError | null;
|
653
686
|
}
|
654
687
|
export interface ToolCallError {
|
655
|
-
type:
|
688
|
+
type: Type6;
|
656
689
|
message: Message1;
|
657
690
|
}
|
658
691
|
export interface ModelOutput {
|
@@ -735,7 +768,7 @@ export interface SampleLimitEvent {
|
|
735
768
|
timestamp: Timestamp1;
|
736
769
|
pending: Pending1;
|
737
770
|
event: Event1;
|
738
|
-
type:
|
771
|
+
type: Type7;
|
739
772
|
message: Message2;
|
740
773
|
limit: Limit1;
|
741
774
|
}
|
@@ -784,6 +817,7 @@ export interface ModelEvent {
|
|
784
817
|
tool_choice: ToolChoice;
|
785
818
|
config: GenerateConfig1;
|
786
819
|
output: ModelOutput;
|
820
|
+
error: Error1;
|
787
821
|
cache: Cache;
|
788
822
|
call: ModelCall | null;
|
789
823
|
}
|
@@ -822,7 +856,7 @@ export interface ToolInfo {
|
|
822
856
|
* Description of tool parameters object in JSON Schema format.
|
823
857
|
*/
|
824
858
|
export interface ToolParams {
|
825
|
-
type:
|
859
|
+
type: Type8;
|
826
860
|
properties: Properties;
|
827
861
|
required: Required1;
|
828
862
|
additionalProperties: Additionalproperties1;
|
@@ -834,7 +868,7 @@ export interface Properties {
|
|
834
868
|
* Description of tool parameter in JSON Schema format.
|
835
869
|
*/
|
836
870
|
export interface ToolParam {
|
837
|
-
type:
|
871
|
+
type: Type9;
|
838
872
|
description: Description1;
|
839
873
|
default: Default;
|
840
874
|
enum: Enum;
|
@@ -873,6 +907,7 @@ export interface GenerateConfig1 {
|
|
873
907
|
logprobs: Logprobs;
|
874
908
|
top_logprobs: TopLogprobs;
|
875
909
|
parallel_tool_calls: ParallelToolCalls;
|
910
|
+
internal_tools: InternalTools;
|
876
911
|
max_tool_output: MaxToolOutput;
|
877
912
|
cache_prompt: CachePrompt;
|
878
913
|
reasoning_effort: ReasoningEffort;
|
@@ -897,7 +932,7 @@ export interface ToolEvent {
|
|
897
932
|
timestamp: Timestamp5;
|
898
933
|
pending: Pending5;
|
899
934
|
event: Event5;
|
900
|
-
type:
|
935
|
+
type: Type10;
|
901
936
|
id: Id3;
|
902
937
|
function: Function2;
|
903
938
|
arguments: Arguments1;
|
@@ -999,7 +1034,7 @@ export interface StepEvent {
|
|
999
1034
|
pending: Pending12;
|
1000
1035
|
event: Event12;
|
1001
1036
|
action: Action;
|
1002
|
-
type:
|
1037
|
+
type: Type11;
|
1003
1038
|
name: Name8;
|
1004
1039
|
}
|
1005
1040
|
/**
|
@@ -1010,7 +1045,7 @@ export interface SubtaskEvent {
|
|
1010
1045
|
pending: Pending13;
|
1011
1046
|
event: Event13;
|
1012
1047
|
name: Name9;
|
1013
|
-
type:
|
1048
|
+
type: Type12;
|
1014
1049
|
input: Input4;
|
1015
1050
|
result: Result1;
|
1016
1051
|
events: Events2;
|
@@ -1026,7 +1061,7 @@ export interface Attachments {
|
|
1026
1061
|
[k: string]: string;
|
1027
1062
|
}
|
1028
1063
|
export interface EvalSampleLimit {
|
1029
|
-
type:
|
1064
|
+
type: Type13;
|
1030
1065
|
limit: Limit2;
|
1031
1066
|
}
|
1032
1067
|
export interface EvalSampleReductions {
|