inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +3 -1
- inspect_ai/_cli/eval.py +15 -9
- inspect_ai/_display/core/active.py +4 -1
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +0 -5
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +79 -12
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/eval.py +10 -1
- inspect_ai/_eval/loader.py +79 -19
- inspect_ai/_eval/registry.py +6 -0
- inspect_ai/_eval/score.py +3 -1
- inspect_ai/_eval/task/results.py +51 -22
- inspect_ai/_eval/task/run.py +47 -13
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25498 -2044
- inspect_ai/_view/www/log-schema.json +32 -2
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +14 -16
- inspect_ai/_view/www/src/Types.mjs +1 -2
- inspect_ai/_view/www/src/api/Types.ts +133 -0
- inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
- inspect_ai/_view/www/src/api/api-http.ts +219 -0
- inspect_ai/_view/www/src/api/api-shared.ts +47 -0
- inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
- inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
- inspect_ai/_view/www/src/api/index.ts +51 -0
- inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +77 -4
- inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
- inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
- inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +13 -2
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
- inspect_ai/_view/www/src/utils/vscode.ts +36 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_human/manager.py +1 -1
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/log/_log.py +1 -1
- inspect_ai/log/_samples.py +16 -0
- inspect_ai/log/_transcript.py +4 -1
- inspect_ai/model/_call_tools.py +59 -0
- inspect_ai/model/_conversation.py +16 -7
- inspect_ai/model/_generate_config.py +12 -12
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +22 -2
- inspect_ai/model/_openai.py +383 -0
- inspect_ai/model/_providers/anthropic.py +152 -55
- inspect_ai/model/_providers/azureai.py +21 -21
- inspect_ai/model/_providers/bedrock.py +37 -40
- inspect_ai/model/_providers/goodfire.py +248 -0
- inspect_ai/model/_providers/google.py +46 -54
- inspect_ai/model/_providers/groq.py +7 -3
- inspect_ai/model/_providers/hf.py +6 -0
- inspect_ai/model/_providers/mistral.py +13 -12
- inspect_ai/model/_providers/openai.py +51 -218
- inspect_ai/model/_providers/openai_o1.py +11 -12
- inspect_ai/model/_providers/providers.py +23 -1
- inspect_ai/model/_providers/together.py +12 -12
- inspect_ai/model/_providers/util/__init__.py +2 -3
- inspect_ai/model/_providers/util/hf_handler.py +1 -1
- inspect_ai/model/_providers/util/llama31.py +1 -1
- inspect_ai/model/_providers/util/util.py +0 -76
- inspect_ai/model/_providers/vertex.py +1 -4
- inspect_ai/scorer/_metric.py +3 -0
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +4 -3
- inspect_ai/solver/__init__.py +4 -5
- inspect_ai/solver/_basic_agent.py +1 -1
- inspect_ai/solver/_bridge/__init__.py +3 -0
- inspect_ai/solver/_bridge/bridge.py +100 -0
- inspect_ai/solver/_bridge/patch.py +170 -0
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_solver.py +6 -0
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +12 -1
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_display.py +5 -0
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/docker/docker.py +64 -1
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
- inspect_ai/util/_sandbox/environment.py +14 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
- inspect_ai/_view/www/src/api/Types.mjs +0 -117
- inspect_ai/_view/www/src/api/api-http.mjs +0 -300
- inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
- inspect_ai/_view/www/src/api/index.mjs +0 -49
- inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
|
|
1
1
|
// @ts-check
|
2
2
|
import { html } from "htm/preact";
|
3
|
+
import { useCallback, useState } from "preact/hooks";
|
3
4
|
import { SampleInitEventView } from "./SampleInitEventView.mjs";
|
4
5
|
import { StateEventView } from "./state/StateEventView.mjs";
|
5
6
|
import { StepEventView } from "./StepEventView.mjs";
|
@@ -15,6 +16,8 @@ import { ApprovalEventView } from "./ApprovalEventView.mjs";
|
|
15
16
|
import { SampleLimitEventView } from "./SampleLimitEventView.mjs";
|
16
17
|
import { FontSize } from "../../appearance/Fonts.mjs";
|
17
18
|
import { EventNode } from "./Types.mjs";
|
19
|
+
// @ts-ignore
|
20
|
+
import { VirtualList } from "../../components/VirtualList.mjs";
|
18
21
|
|
19
22
|
/**
|
20
23
|
* Renders the TranscriptView component.
|
@@ -26,10 +29,61 @@ import { EventNode } from "./Types.mjs";
|
|
26
29
|
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
27
30
|
*/
|
28
31
|
export const TranscriptView = ({ id, events, depth = 0 }) => {
|
32
|
+
const [transcriptState, setTranscriptState] = useState({});
|
33
|
+
const onTranscriptState = useCallback(
|
34
|
+
(state) => {
|
35
|
+
setTranscriptState(state);
|
36
|
+
},
|
37
|
+
[transcriptState, setTranscriptState],
|
38
|
+
);
|
39
|
+
|
29
40
|
// Normalize Events themselves
|
30
41
|
const resolvedEvents = fixupEventStream(events);
|
31
42
|
const eventNodes = treeifyEvents(resolvedEvents, depth);
|
32
|
-
return html`
|
43
|
+
return html`
|
44
|
+
<${TranscriptComponent}
|
45
|
+
id=${id}
|
46
|
+
eventNodes=${eventNodes}
|
47
|
+
transcriptState=${transcriptState}
|
48
|
+
setTranscriptState=${onTranscriptState}
|
49
|
+
/>
|
50
|
+
`;
|
51
|
+
};
|
52
|
+
|
53
|
+
/**
|
54
|
+
* Renders the Transcript component.
|
55
|
+
*
|
56
|
+
* @param {Object} props - The parameters for the component.
|
57
|
+
* @param {string} props.id - The identifier for this view
|
58
|
+
* @param {import("../../types/log").Events} props.events - The transcript events to display.
|
59
|
+
* @param {Object} props.style - The transcript style to display.
|
60
|
+
* @param {number} props.depth - The base depth for this transcript view
|
61
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
|
62
|
+
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
63
|
+
*/
|
64
|
+
export const TranscriptVirtualList = (props) => {
|
65
|
+
let { id, scrollRef, events, depth, style } = props;
|
66
|
+
|
67
|
+
// Normalize Events themselves
|
68
|
+
const resolvedEvents = fixupEventStream(events);
|
69
|
+
const eventNodes = treeifyEvents(resolvedEvents, depth);
|
70
|
+
|
71
|
+
const [transcriptState, setTranscriptState] = useState({});
|
72
|
+
const onTranscriptState = useCallback(
|
73
|
+
(state) => {
|
74
|
+
setTranscriptState(state);
|
75
|
+
},
|
76
|
+
[transcriptState, setTranscriptState],
|
77
|
+
);
|
78
|
+
|
79
|
+
return html`<${TranscriptVirtualListComponent}
|
80
|
+
id=${id}
|
81
|
+
eventNodes=${eventNodes}
|
82
|
+
style=${style}
|
83
|
+
scrollRef=${scrollRef}
|
84
|
+
transcriptState=${transcriptState}
|
85
|
+
setTranscriptState=${onTranscriptState}
|
86
|
+
/>`;
|
33
87
|
};
|
34
88
|
|
35
89
|
/**
|
@@ -39,9 +93,81 @@ export const TranscriptView = ({ id, events, depth = 0 }) => {
|
|
39
93
|
* @param {string} props.id - The identifier for this view
|
40
94
|
* @param {EventNode[]} props.eventNodes - The transcript events nodes to display.
|
41
95
|
* @param {Object} props.style - The transcript style to display.
|
96
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
|
97
|
+
* @param {import("./Types.mjs").TranscriptState} props.transcriptState - The state for this transcript
|
98
|
+
* @param {(state: import("./Types.mjs").TranscriptState) => void} props.setTranscriptState - Set the transcript state for this transcript
|
42
99
|
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
43
100
|
*/
|
44
|
-
export const
|
101
|
+
export const TranscriptVirtualListComponent = ({
|
102
|
+
id,
|
103
|
+
eventNodes,
|
104
|
+
style,
|
105
|
+
scrollRef,
|
106
|
+
transcriptState,
|
107
|
+
setTranscriptState,
|
108
|
+
}) => {
|
109
|
+
const renderRow = (item, index) => {
|
110
|
+
const toggleStyle = {};
|
111
|
+
if (item.depth % 2 == 0) {
|
112
|
+
toggleStyle.backgroundColor = "var(--bs-light-bg-subtle)";
|
113
|
+
} else {
|
114
|
+
toggleStyle.backgroundColor = "var(--bs-body-bg)";
|
115
|
+
}
|
116
|
+
|
117
|
+
let paddingTop = "0";
|
118
|
+
if (index === 0) {
|
119
|
+
paddingTop = ".5em";
|
120
|
+
}
|
121
|
+
const eventId = `${id}-event${index}`;
|
122
|
+
const setEventState = useCallback(
|
123
|
+
(state) => {
|
124
|
+
setTranscriptState({ ...transcriptState, [eventId]: state });
|
125
|
+
},
|
126
|
+
[setTranscriptState, transcriptState],
|
127
|
+
);
|
128
|
+
|
129
|
+
return html`<div style=${{ paddingTop, paddingBottom: ".5em" }}>
|
130
|
+
<${RenderedEventNode}
|
131
|
+
id=${eventId}
|
132
|
+
node=${item}
|
133
|
+
style=${{
|
134
|
+
...toggleStyle,
|
135
|
+
...style,
|
136
|
+
}}
|
137
|
+
scrollRef=${scrollRef}
|
138
|
+
eventState=${transcriptState[eventId] || {}}
|
139
|
+
setEventState=${setEventState}
|
140
|
+
/>
|
141
|
+
</div>`;
|
142
|
+
};
|
143
|
+
|
144
|
+
return html`<${VirtualList}
|
145
|
+
data=${eventNodes}
|
146
|
+
tabIndex="0"
|
147
|
+
renderRow=${renderRow}
|
148
|
+
scrollRef=${scrollRef}
|
149
|
+
style=${{ width: "100%", marginTop: "1em" }}
|
150
|
+
/>`;
|
151
|
+
};
|
152
|
+
|
153
|
+
/**
|
154
|
+
* Renders the Transcript component.
|
155
|
+
*
|
156
|
+
* @param {Object} props - The parameters for the component.
|
157
|
+
* @param {string} props.id - The identifier for this view
|
158
|
+
* @param {EventNode[]} props.eventNodes - The transcript events nodes to display.
|
159
|
+
* @param {Object} props.style - The transcript style to display.
|
160
|
+
* @param {import("./Types.mjs").TranscriptState} props.transcriptState - The state for this transcript
|
161
|
+
* @param {(state: import("./Types.mjs").TranscriptState) => void} props.setTranscriptState - Set the transcript state for this transcript
|
162
|
+
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
163
|
+
*/
|
164
|
+
export const TranscriptComponent = ({
|
165
|
+
id,
|
166
|
+
transcriptState,
|
167
|
+
setTranscriptState,
|
168
|
+
eventNodes,
|
169
|
+
style,
|
170
|
+
}) => {
|
45
171
|
const rows = eventNodes.map((eventNode, index) => {
|
46
172
|
const toggleStyle = {};
|
47
173
|
if (eventNode.depth % 2 == 0) {
|
@@ -55,15 +181,32 @@ export const TranscriptComponent = ({ id, eventNodes, style }) => {
|
|
55
181
|
toggleStyle.marginBottom = "1.5em";
|
56
182
|
}
|
57
183
|
|
184
|
+
let paddingBottom = ".5em";
|
185
|
+
if (index === eventNodes.length - 1) {
|
186
|
+
paddingBottom = "0";
|
187
|
+
}
|
188
|
+
|
189
|
+
const eventId = `${id}-event${index}`;
|
190
|
+
const setEventState = useCallback(
|
191
|
+
(state) => {
|
192
|
+
setTranscriptState({ ...transcriptState, [eventId]: state });
|
193
|
+
},
|
194
|
+
[setTranscriptState, transcriptState],
|
195
|
+
);
|
196
|
+
|
58
197
|
const row = html`
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
198
|
+
<div style=${{ paddingBottom }}>
|
199
|
+
<${RenderedEventNode}
|
200
|
+
id=${eventId}
|
201
|
+
node=${eventNode}
|
202
|
+
style=${{
|
203
|
+
...toggleStyle,
|
204
|
+
...style,
|
205
|
+
}}
|
206
|
+
eventState=${transcriptState[eventId] || {}}
|
207
|
+
setEventState=${setEventState}
|
208
|
+
/>
|
209
|
+
</div>
|
67
210
|
`;
|
68
211
|
return row;
|
69
212
|
});
|
@@ -89,14 +232,26 @@ export const TranscriptComponent = ({ id, eventNodes, style }) => {
|
|
89
232
|
* @param {string} props.id - The id for this event.
|
90
233
|
* @param { EventNode } props.node - This event.
|
91
234
|
* @param { Object } props.style - The style for this node.
|
235
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
|
236
|
+
* @param {import("./Types.mjs").TranscriptEventState} props.eventState - The state for this event
|
237
|
+
* @param {(state: import("./Types.mjs").TranscriptEventState) => void} props.setEventState - Update the state for this event
|
92
238
|
* @returns {import("preact").JSX.Element} The rendered event.
|
93
239
|
*/
|
94
|
-
export const RenderedEventNode = ({
|
240
|
+
export const RenderedEventNode = ({
|
241
|
+
id,
|
242
|
+
node,
|
243
|
+
style,
|
244
|
+
scrollRef,
|
245
|
+
eventState,
|
246
|
+
setEventState,
|
247
|
+
}) => {
|
95
248
|
switch (node.event.event) {
|
96
249
|
case "sample_init":
|
97
250
|
return html`<${SampleInitEventView}
|
98
251
|
id=${id}
|
99
252
|
event=${node.event}
|
253
|
+
eventState=${eventState}
|
254
|
+
setEventState=${setEventState}
|
100
255
|
style=${style}
|
101
256
|
/>`;
|
102
257
|
|
@@ -104,6 +259,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
104
259
|
return html`<${SampleLimitEventView}
|
105
260
|
id=${id}
|
106
261
|
event=${node.event}
|
262
|
+
eventState=${eventState}
|
263
|
+
setEventState=${setEventState}
|
107
264
|
style=${style}
|
108
265
|
/>`;
|
109
266
|
|
@@ -111,6 +268,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
111
268
|
return html`<${InfoEventView}
|
112
269
|
id=${id}
|
113
270
|
event=${node.event}
|
271
|
+
eventState=${eventState}
|
272
|
+
setEventState=${setEventState}
|
114
273
|
style=${style}
|
115
274
|
/>`;
|
116
275
|
|
@@ -118,6 +277,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
118
277
|
return html`<${LoggerEventView}
|
119
278
|
id=${id}
|
120
279
|
event=${node.event}
|
280
|
+
eventState=${eventState}
|
281
|
+
setEventState=${setEventState}
|
121
282
|
style=${style}
|
122
283
|
/>`;
|
123
284
|
|
@@ -125,6 +286,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
125
286
|
return html`<${ModelEventView}
|
126
287
|
id=${id}
|
127
288
|
event=${node.event}
|
289
|
+
eventState=${eventState}
|
290
|
+
setEventState=${setEventState}
|
128
291
|
style=${style}
|
129
292
|
/>`;
|
130
293
|
|
@@ -132,6 +295,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
132
295
|
return html`<${ScoreEventView}
|
133
296
|
id=${id}
|
134
297
|
event=${node.event}
|
298
|
+
eventState=${eventState}
|
299
|
+
setEventState=${setEventState}
|
135
300
|
style=${style}
|
136
301
|
/>`;
|
137
302
|
|
@@ -139,6 +304,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
139
304
|
return html`<${StateEventView}
|
140
305
|
id=${id}
|
141
306
|
event=${node.event}
|
307
|
+
eventState=${eventState}
|
308
|
+
setEventState=${setEventState}
|
142
309
|
style=${style}
|
143
310
|
/>`;
|
144
311
|
|
@@ -146,14 +313,19 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
146
313
|
return html`<${StepEventView}
|
147
314
|
id=${id}
|
148
315
|
event=${node.event}
|
316
|
+
eventState=${eventState}
|
317
|
+
setEventState=${setEventState}
|
149
318
|
children=${node.children}
|
150
319
|
style=${style}
|
320
|
+
scrollRef=${scrollRef}
|
151
321
|
/>`;
|
152
322
|
|
153
323
|
case "store":
|
154
324
|
return html`<${StateEventView}
|
155
325
|
id=${id}
|
156
326
|
event=${node.event}
|
327
|
+
eventState=${eventState}
|
328
|
+
setEventState=${setEventState}
|
157
329
|
style=${style}
|
158
330
|
isStore=${true}
|
159
331
|
/>`;
|
@@ -162,6 +334,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
162
334
|
return html`<${SubtaskEventView}
|
163
335
|
id=${id}
|
164
336
|
event=${node.event}
|
337
|
+
eventState=${eventState}
|
338
|
+
setEventState=${setEventState}
|
165
339
|
style=${style}
|
166
340
|
depth=${node.depth}
|
167
341
|
/>`;
|
@@ -170,6 +344,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
170
344
|
return html`<${ToolEventView}
|
171
345
|
id=${id}
|
172
346
|
event=${node.event}
|
347
|
+
eventState=${eventState}
|
348
|
+
setEventState=${setEventState}
|
173
349
|
style=${style}
|
174
350
|
depth=${node.depth}
|
175
351
|
/>`;
|
@@ -178,6 +354,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
178
354
|
return html`<${InputEventView}
|
179
355
|
id=${id}
|
180
356
|
event=${node.event}
|
357
|
+
eventState=${eventState}
|
358
|
+
setEventState=${setEventState}
|
181
359
|
style=${style}
|
182
360
|
/>`;
|
183
361
|
|
@@ -185,6 +363,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
185
363
|
return html`<${ErrorEventView}
|
186
364
|
id=${id}
|
187
365
|
event=${node.event}
|
366
|
+
eventState=${eventState}
|
367
|
+
setEventState=${setEventState}
|
188
368
|
style=${style}
|
189
369
|
/>`;
|
190
370
|
|
@@ -192,6 +372,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
|
|
192
372
|
return html`<${ApprovalEventView}
|
193
373
|
id=${id}
|
194
374
|
event=${node.event}
|
375
|
+
eventState=${eventState}
|
376
|
+
setEventState=${setEventState}
|
195
377
|
style=${style}
|
196
378
|
/>`;
|
197
379
|
|
@@ -32,3 +32,13 @@ export class EventNode {
|
|
32
32
|
this.depth = depth;
|
33
33
|
}
|
34
34
|
}
|
35
|
+
|
36
|
+
/**
|
37
|
+
* @typedef {Record<string, TranscriptEventState>} TranscriptState
|
38
|
+
*/
|
39
|
+
|
40
|
+
/**
|
41
|
+
* @typedef {Object} TranscriptEventState
|
42
|
+
* @property {string} [selectedNav] - The selected nav for this event
|
43
|
+
* @property {boolean} [collapsed] - The collapse state for this event
|
44
|
+
*/
|
@@ -15,11 +15,20 @@ import { formatDateTime } from "../../../utils/Format.mjs";
|
|
15
15
|
* @param {Object} props - The properties passed to the component.
|
16
16
|
* @param { string } props.id - The id of this event.
|
17
17
|
* @param {import("../../../types/log").StateEvent } props.event - The event object to display.
|
18
|
+
* @param {import("./../Types.mjs").TranscriptEventState} props.eventState - The state for this event
|
19
|
+
* @param {(state: import("./../Types.mjs").TranscriptEventState) => void} props.setEventState - Update the state for this event
|
18
20
|
* @param { boolean } props.isStore - Whether this event view is rendering a storage (rather than a state)
|
19
21
|
* @param { Object } props.style - The style of this event.
|
20
22
|
* @returns {import("preact").JSX.Element} The component.
|
21
23
|
*/
|
22
|
-
export const StateEventView = ({
|
24
|
+
export const StateEventView = ({
|
25
|
+
id,
|
26
|
+
event,
|
27
|
+
eventState,
|
28
|
+
setEventState,
|
29
|
+
isStore,
|
30
|
+
style,
|
31
|
+
}) => {
|
23
32
|
const summary = summarizeChanges(event.changes);
|
24
33
|
|
25
34
|
// Synthesize objects for comparison
|
@@ -53,7 +62,22 @@ export const StateEventView = ({ id, event, isStore, style }) => {
|
|
53
62
|
const title = event.event === "state" ? "State Updated" : "Store Updated";
|
54
63
|
|
55
64
|
return html`
|
56
|
-
<${EventPanel}
|
65
|
+
<${EventPanel}
|
66
|
+
id=${id}
|
67
|
+
title="${title}"
|
68
|
+
subTitle=${formatDateTime(new Date(event.timestamp))}
|
69
|
+
text=${tabs.length === 1 ? summary : undefined}
|
70
|
+
collapse=${changePreview === undefined ? true : undefined}
|
71
|
+
style=${style}
|
72
|
+
selectedNav=${eventState.selectedNav || ""}
|
73
|
+
onSelectedNav=${(selectedNav) => {
|
74
|
+
setEventState({ ...eventState, selectedNav });
|
75
|
+
}}
|
76
|
+
collapsed=${eventState.collapsed}
|
77
|
+
onCollapsed=${(collapsed) => {
|
78
|
+
setEventState({ ...eventState, collapsed });
|
79
|
+
}}
|
80
|
+
>
|
57
81
|
${tabs}
|
58
82
|
</${EventPanel}>`;
|
59
83
|
};
|
@@ -76,6 +76,7 @@ export type NumChoices = number | null;
|
|
76
76
|
export type Logprobs = boolean | null;
|
77
77
|
export type TopLogprobs = number | null;
|
78
78
|
export type ParallelToolCalls = boolean | null;
|
79
|
+
export type InternalTools = boolean | null;
|
79
80
|
export type MaxToolOutput = number | null;
|
80
81
|
export type CachePrompt = "auto" | boolean | null;
|
81
82
|
export type ReasoningEffort = ("low" | "medium" | "high") | null;
|
@@ -231,7 +232,7 @@ export type JsonValue = unknown;
|
|
231
232
|
export type Timestamp1 = string;
|
232
233
|
export type Pending1 = boolean | null;
|
233
234
|
export type Event1 = "sample_limit";
|
234
|
-
export type Type7 = "message" | "time" | "token" | "operator";
|
235
|
+
export type Type7 = "message" | "time" | "token" | "operator" | "custom";
|
235
236
|
export type Message2 = string;
|
236
237
|
export type Limit1 = number | null;
|
237
238
|
export type Timestamp2 = string;
|
@@ -274,6 +275,7 @@ export type Additionalproperties1 = boolean;
|
|
274
275
|
export type Tools1 = ToolInfo[];
|
275
276
|
export type ToolChoice = ("auto" | "any" | "none") | ToolFunction;
|
276
277
|
export type Name6 = string;
|
278
|
+
export type Error1 = string | null;
|
277
279
|
export type Cache = ("read" | "write") | null;
|
278
280
|
export type Timestamp5 = string;
|
279
281
|
export type Pending5 = boolean | null;
|
@@ -394,7 +396,13 @@ export type Events = (
|
|
394
396
|
| StepEvent
|
395
397
|
| SubtaskEvent
|
396
398
|
)[];
|
397
|
-
export type Type13 =
|
399
|
+
export type Type13 =
|
400
|
+
| "context"
|
401
|
+
| "time"
|
402
|
+
| "message"
|
403
|
+
| "token"
|
404
|
+
| "operator"
|
405
|
+
| "custom";
|
398
406
|
export type Limit2 = number;
|
399
407
|
export type Reductions = EvalSampleReductions[] | null;
|
400
408
|
export type Scorer1 = string;
|
@@ -545,6 +553,7 @@ export interface GenerateConfig {
|
|
545
553
|
logprobs: Logprobs;
|
546
554
|
top_logprobs: TopLogprobs;
|
547
555
|
parallel_tool_calls: ParallelToolCalls;
|
556
|
+
internal_tools: InternalTools;
|
548
557
|
max_tool_output: MaxToolOutput;
|
549
558
|
cache_prompt: CachePrompt;
|
550
559
|
reasoning_effort: ReasoningEffort;
|
@@ -808,6 +817,7 @@ export interface ModelEvent {
|
|
808
817
|
tool_choice: ToolChoice;
|
809
818
|
config: GenerateConfig1;
|
810
819
|
output: ModelOutput;
|
820
|
+
error: Error1;
|
811
821
|
cache: Cache;
|
812
822
|
call: ModelCall | null;
|
813
823
|
}
|
@@ -897,6 +907,7 @@ export interface GenerateConfig1 {
|
|
897
907
|
logprobs: Logprobs;
|
898
908
|
top_logprobs: TopLogprobs;
|
899
909
|
parallel_tool_calls: ParallelToolCalls;
|
910
|
+
internal_tools: InternalTools;
|
900
911
|
max_tool_output: MaxToolOutput;
|
901
912
|
cache_prompt: CachePrompt;
|
902
913
|
reasoning_effort: ReasoningEffort;
|
@@ -126,10 +126,17 @@ export const formatTime = (seconds) => {
|
|
126
126
|
return `${seconds} sec`;
|
127
127
|
} else if (seconds < 60 * 60) {
|
128
128
|
return `${Math.floor(seconds / 60)} min ${seconds % 60} sec`;
|
129
|
+
} else if (seconds < 60 * 60 * 24) {
|
130
|
+
const hours = Math.floor(seconds / (60 * 60));
|
131
|
+
const minutes = Math.floor((seconds % (60 * 60)) / 60);
|
132
|
+
const remainingSeconds = seconds % 60;
|
133
|
+
return `${hours} hr ${minutes} min ${remainingSeconds} sec`;
|
129
134
|
} else {
|
130
|
-
|
131
|
-
|
132
|
-
|
135
|
+
const days = Math.floor(seconds / (60 * 60 * 24));
|
136
|
+
const hours = Math.floor((seconds % (60 * 60 * 24)) / (60 * 60));
|
137
|
+
const minutes = Math.floor((seconds % (60 * 60)) / 60);
|
138
|
+
const remainingSeconds = seconds % 60;
|
139
|
+
return `${days} days ${hours} hr ${minutes} min ${remainingSeconds} sec`;
|
133
140
|
}
|
134
141
|
};
|
135
142
|
|
@@ -1,6 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
export const asyncJsonParse = async (text: string): Promise<any> => {
|
2
|
+
const encoder = new TextEncoder();
|
3
|
+
const encodedText = encoder.encode(text);
|
4
4
|
const blob = new Blob([kWorkerCode], { type: "application/javascript" });
|
5
5
|
const blobURL = URL.createObjectURL(blob);
|
6
6
|
const worker = new Worker(blobURL);
|
@@ -17,7 +17,9 @@ export const asyncJsonParse = async (text) => {
|
|
17
17
|
reject(new Error(error.message));
|
18
18
|
};
|
19
19
|
});
|
20
|
-
worker.postMessage({ scriptContent: kJson5ScriptBase64,
|
20
|
+
worker.postMessage({ scriptContent: kJson5ScriptBase64, encodedText }, [
|
21
|
+
encodedText.buffer,
|
22
|
+
]);
|
21
23
|
return await result;
|
22
24
|
} finally {
|
23
25
|
worker.terminate();
|
@@ -28,12 +30,14 @@ export const asyncJsonParse = async (text) => {
|
|
28
30
|
const kWorkerCode = `
|
29
31
|
self.onmessage = function (e) {
|
30
32
|
eval(atob(e.data.scriptContent));
|
31
|
-
const
|
33
|
+
const { encodedText } = e.data;
|
34
|
+
const decoder = new TextDecoder();
|
35
|
+
const text = decoder.decode(encodedText);
|
32
36
|
try {
|
33
|
-
const result =
|
34
|
-
|
35
|
-
} catch (
|
36
|
-
|
37
|
+
const result = JSON.parse(text);
|
38
|
+
postMessage({ success: true, result });
|
39
|
+
} catch (err) {
|
40
|
+
postMessage({ success: false, error: err.message });
|
37
41
|
}
|
38
42
|
};`;
|
39
43
|
|
@@ -0,0 +1,36 @@
|
|
1
|
+
/**
|
2
|
+
* Type definition for the VS Code API object
|
3
|
+
* Note: This is a minimal definition - expand based on your needs
|
4
|
+
*/
|
5
|
+
interface VSCodeApi {
|
6
|
+
postMessage(message: unknown): void;
|
7
|
+
getState(): unknown;
|
8
|
+
setState(state: unknown): void;
|
9
|
+
}
|
10
|
+
|
11
|
+
/**
|
12
|
+
* The cached instance of the VS Code API
|
13
|
+
*/
|
14
|
+
let vscodeApi: VSCodeApi | undefined;
|
15
|
+
|
16
|
+
// Declare the acquireVsCodeApi function on the window object
|
17
|
+
declare global {
|
18
|
+
interface Window {
|
19
|
+
acquireVsCodeApi?: () => VSCodeApi;
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
/**
|
24
|
+
* Gets or initializes the VS Code API instance
|
25
|
+
* @returns {VSCodeApi | undefined} The VS Code API instance if in VS Code environment, undefined otherwise
|
26
|
+
*/
|
27
|
+
export const getVscodeApi = (): VSCodeApi | undefined => {
|
28
|
+
if (window.acquireVsCodeApi) {
|
29
|
+
if (vscodeApi === undefined) {
|
30
|
+
vscodeApi = window.acquireVsCodeApi();
|
31
|
+
}
|
32
|
+
return vscodeApi;
|
33
|
+
} else {
|
34
|
+
return undefined;
|
35
|
+
}
|
36
|
+
};
|
@@ -43,7 +43,7 @@ import { debounce } from "../utils/sync.mjs";
|
|
43
43
|
* @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats for this eval
|
44
44
|
* @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults for this eval
|
45
45
|
* @param {import("../Types.mjs").CurrentLog} [props.log] - the current log
|
46
|
-
* @param {import("../api/Types.
|
46
|
+
* @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
|
47
47
|
* @param {import("../Types.mjs").SampleMode} props.sampleMode - the mode for displaying samples
|
48
48
|
* @param {string} props.groupBy - what to group by
|
49
49
|
* @param {string} props.groupByOrder - the grouping order
|
@@ -142,6 +142,8 @@ export const WorkSpace = ({
|
|
142
142
|
}
|
143
143
|
}, [divRef, task_id]);
|
144
144
|
|
145
|
+
const sampleTabScrollRef = useRef(/** @type {HTMLElement|null} */ (null));
|
146
|
+
|
145
147
|
const resolvedTabs = useMemo(() => {
|
146
148
|
// Tabs that are available within the app
|
147
149
|
// Include the tab contents as well as any tools that the tab provides
|
@@ -154,6 +156,7 @@ export const WorkSpace = ({
|
|
154
156
|
resolvedTabs.samples = {
|
155
157
|
id: kEvalWorkspaceTabId,
|
156
158
|
scrollable: samples.length === 1,
|
159
|
+
scrollRef: sampleTabScrollRef,
|
157
160
|
label: samples?.length > 1 ? "Samples" : "Sample",
|
158
161
|
content: () => {
|
159
162
|
return html` <${SamplesTab}
|
@@ -178,6 +181,7 @@ export const WorkSpace = ({
|
|
178
181
|
epoch=${epoch}
|
179
182
|
sampleScrollPositionRef=${sampleScrollPositionRef}
|
180
183
|
setSampleScrollPosition=${setSampleScrollPosition}
|
184
|
+
sampleTabScrollRef=${sampleTabScrollRef}
|
181
185
|
/>`;
|
182
186
|
},
|
183
187
|
tools: () => {
|
@@ -368,6 +372,7 @@ export const WorkSpace = ({
|
|
368
372
|
evalResults=${evalResults}
|
369
373
|
evalStats=${evalStats}
|
370
374
|
samples=${samples}
|
375
|
+
evalDescriptor=${samplesDescriptor.evalDescriptor}
|
371
376
|
status=${evalStatus}
|
372
377
|
tabs=${resolvedTabs}
|
373
378
|
selectedTab=${selectedTab}
|
@@ -386,6 +391,7 @@ const WorkspaceDisplay = ({
|
|
386
391
|
evalResults,
|
387
392
|
evalStats,
|
388
393
|
samples,
|
394
|
+
evalDescriptor,
|
389
395
|
status,
|
390
396
|
showToggle,
|
391
397
|
selectedTab,
|
@@ -442,6 +448,7 @@ const WorkspaceDisplay = ({
|
|
442
448
|
onSelected=${onSelected}
|
443
449
|
selected=${selectedTab === tab.id}
|
444
450
|
scrollable=${!!tab.scrollable}
|
451
|
+
scrollRef=${tab.scrollRef}
|
445
452
|
scrollPosition=${workspaceTabScrollPositionRef.current[tab.id]}
|
446
453
|
setScrollPosition=${useCallback(
|
447
454
|
(position) => {
|
@@ -456,20 +463,19 @@ const WorkspaceDisplay = ({
|
|
456
463
|
}, [tabs]);
|
457
464
|
|
458
465
|
return html`
|
459
|
-
|
460
|
-
|
461
466
|
<${Navbar}
|
462
467
|
evalSpec=${evalSpec}
|
463
468
|
evalPlan=${evalPlan}
|
464
469
|
evalResults=${evalResults}
|
465
470
|
evalStats=${evalStats}
|
466
471
|
samples=${samples}
|
472
|
+
evalDescriptor=${evalDescriptor}
|
467
473
|
status=${status}
|
468
474
|
file=${logFileName}
|
469
475
|
showToggle=${showToggle}
|
470
|
-
|
476
|
+
|
471
477
|
offcanvas=${offcanvas}
|
472
|
-
/>
|
478
|
+
/>
|
473
479
|
<div ref=${divRef} class="workspace" style=${{
|
474
480
|
paddingTop: "0rem",
|
475
481
|
overflowY: "hidden",
|