inspect-ai 0.3.56__py3-none-any.whl → 0.3.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_display/core/panel.py +1 -1
- inspect_ai/_eval/run.py +16 -11
- inspect_ai/_util/datetime.py +1 -1
- inspect_ai/_util/deprecation.py +1 -1
- inspect_ai/_util/json.py +11 -1
- inspect_ai/_util/logger.py +2 -1
- inspect_ai/_util/trace.py +39 -3
- inspect_ai/_util/transcript.py +36 -7
- inspect_ai/_view/www/.prettierrc.js +12 -0
- inspect_ai/_view/www/dist/assets/index.js +286 -224
- inspect_ai/_view/www/log-schema.json +124 -125
- inspect_ai/_view/www/src/App.mjs +18 -9
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/api/Types.mjs +15 -4
- inspect_ai/_view/www/src/api/api-http.mjs +2 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
- inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
- inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
- inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
- inspect_ai/_view/www/src/components/Tools.mjs +18 -3
- inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
- inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
- inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
- inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
- inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
- inspect_ai/_view/www/src/types/log.d.ts +2 -8
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
- inspect_ai/log/_log.py +25 -0
- inspect_ai/log/_recorders/eval.py +2 -0
- inspect_ai/model/_call_tools.py +27 -5
- inspect_ai/model/_providers/google.py +24 -6
- inspect_ai/model/_providers/openai.py +17 -3
- inspect_ai/model/_providers/openai_o1.py +10 -12
- inspect_ai/tool/_tool_info.py +2 -1
- inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -3
- inspect_ai/util/__init__.py +4 -0
- inspect_ai/util/_sandbox/docker/compose.py +1 -3
- inspect_ai/util/_sandbox/docker/util.py +2 -1
- inspect_ai/util/_sandbox/self_check.py +18 -18
- inspect_ai/util/_store.py +2 -2
- inspect_ai/util/_subprocess.py +3 -3
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +57 -56
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0
@@ -48,11 +48,13 @@ export const Navbar = ({
|
|
48
48
|
if (status === "success") {
|
49
49
|
statusPanel = html`<${ResultsPanel} results="${results}" />`;
|
50
50
|
} else if (status === "cancelled") {
|
51
|
-
statusPanel = html`<${
|
51
|
+
statusPanel = html`<${CancelledPanel}
|
52
52
|
sampleCount=${samples?.length || 0}
|
53
53
|
/>`;
|
54
54
|
} else if (status === "started") {
|
55
|
-
statusPanel = html`<${RunningPanel} />`;
|
55
|
+
statusPanel = html`<${RunningPanel} sampleCount=${samples?.length || 0} />`;
|
56
|
+
} else if (status === "error") {
|
57
|
+
statusPanel = html`<${ErroredPanel} sampleCount=${samples?.length || 0} />`;
|
56
58
|
}
|
57
59
|
|
58
60
|
// If no logfile is loaded, just show an empty navbar
|
@@ -188,48 +190,54 @@ export const Navbar = ({
|
|
188
190
|
`;
|
189
191
|
};
|
190
192
|
|
191
|
-
const
|
193
|
+
const StatusPanel = ({ icon, status, sampleCount }) => {
|
192
194
|
return html`<div
|
193
195
|
style=${{
|
194
196
|
padding: "1em",
|
195
197
|
marginTop: "0.5em",
|
196
198
|
textTransform: "uppercase",
|
197
199
|
fontSize: FontSize.smaller,
|
200
|
+
display: "grid",
|
201
|
+
gridTemplateColumns: "auto auto",
|
198
202
|
}}
|
199
203
|
>
|
200
204
|
<i
|
201
|
-
class="${
|
202
|
-
style=${{
|
205
|
+
class="${icon}"
|
206
|
+
style=${{
|
207
|
+
fontSize: FontSize.large,
|
208
|
+
marginRight: "0.3em",
|
209
|
+
marginTop: "-0.1em",
|
210
|
+
}}
|
203
211
|
/>
|
204
|
-
|
212
|
+
<div>
|
213
|
+
<div>${status}</div>
|
214
|
+
<div>(${sampleCount} ${sampleCount === 1 ? "sample" : "samples"})</div>
|
215
|
+
</div>
|
205
216
|
</div>`;
|
206
217
|
};
|
207
218
|
|
208
|
-
const
|
209
|
-
return html
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
</div>
|
231
|
-
</div>
|
232
|
-
`;
|
219
|
+
const CancelledPanel = ({ sampleCount }) => {
|
220
|
+
return html`<${StatusPanel}
|
221
|
+
icon=${ApplicationIcons.logging.info}
|
222
|
+
status="Cancelled"
|
223
|
+
sampleCount=${sampleCount}
|
224
|
+
/>`;
|
225
|
+
};
|
226
|
+
|
227
|
+
const ErroredPanel = ({ sampleCount }) => {
|
228
|
+
return html`<${StatusPanel}
|
229
|
+
icon=${ApplicationIcons.logging.error}
|
230
|
+
status="Task Failed"
|
231
|
+
sampleCount=${sampleCount}
|
232
|
+
/>`;
|
233
|
+
};
|
234
|
+
|
235
|
+
const RunningPanel = ({ sampleCount }) => {
|
236
|
+
return html`<${StatusPanel}
|
237
|
+
icon=${ApplicationIcons.running}
|
238
|
+
status="Running"
|
239
|
+
sampleCount=${sampleCount}
|
240
|
+
/>`;
|
233
241
|
};
|
234
242
|
|
235
243
|
const ResultsPanel = ({ results }) => {
|
@@ -298,6 +306,7 @@ const ResultsPanel = ({ results }) => {
|
|
298
306
|
* @returns {import("preact").JSX.Element} The TranscriptView component.
|
299
307
|
*/
|
300
308
|
const VerticalMetric = ({ metric, isFirst }) => {
|
309
|
+
// @ts-expect-error
|
301
310
|
const reducer_component = metric.reducer
|
302
311
|
? html` <div
|
303
312
|
style=${{
|
@@ -309,7 +318,10 @@ const VerticalMetric = ({ metric, isFirst }) => {
|
|
309
318
|
...TextStyle.secondary,
|
310
319
|
}}
|
311
320
|
>
|
312
|
-
${
|
321
|
+
${
|
322
|
+
// @ts-expect-error
|
323
|
+
metric.reducer
|
324
|
+
}
|
313
325
|
</div>`
|
314
326
|
: "";
|
315
327
|
|
@@ -422,8 +422,7 @@ const SampleSummary = ({ parent_id, sample, style, sampleDescriptor }) => {
|
|
422
422
|
|
423
423
|
const fullAnswer =
|
424
424
|
sample && sampleDescriptor
|
425
|
-
?
|
426
|
-
sampleDescriptor.selectedScorer(sample).answer()
|
425
|
+
? sampleDescriptor.selectedScorerDescriptor(sample).answer()
|
427
426
|
: undefined;
|
428
427
|
if (fullAnswer) {
|
429
428
|
columns.push({
|
@@ -17,7 +17,22 @@ import { inputString } from "../utils/Format.mjs";
|
|
17
17
|
const kSampleHeight = 88;
|
18
18
|
const kSeparatorHeight = 24;
|
19
19
|
|
20
|
-
|
20
|
+
/**
|
21
|
+
* Convert samples to a datastructure which contemplates grouping, etc...
|
22
|
+
*
|
23
|
+
* @param {Object} props - The parameters for the component.
|
24
|
+
* @param {Object} props.listRef - The ref for the list.
|
25
|
+
* @param {import("./SamplesTab.mjs").ListItem[]} props.items - The samples.
|
26
|
+
* @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor.
|
27
|
+
* @param {Object} props.style - The style for the element
|
28
|
+
* @param {number} props.selectedIndex - The index of the selected sample.
|
29
|
+
* @param {(index: number) => void} props.setSelectedIndex - The function to set the selected sample index.
|
30
|
+
* @param {import("../Types.mjs").ScoreLabel} props.selectedScore - The function to get the selected score.
|
31
|
+
* @param {() => void} props.nextSample - The function to move to the next sample.
|
32
|
+
* @param {() => void} props.prevSample - The function to move to the previous sample.
|
33
|
+
* @param {(index: number) => void} props.showSample - The function to show the sample.
|
34
|
+
* @returns {import("preact").JSX.Element} The SampleList component.
|
35
|
+
*/
|
21
36
|
export const SampleList = (props) => {
|
22
37
|
const {
|
23
38
|
listRef,
|
@@ -93,6 +108,7 @@ export const SampleList = (props) => {
|
|
93
108
|
}
|
94
109
|
}, [selectedIndex, rowMap, listRef]);
|
95
110
|
|
111
|
+
/** @param {import("./SamplesTab.mjs").ListItem} item */
|
96
112
|
const renderRow = (item) => {
|
97
113
|
if (item.type === "sample") {
|
98
114
|
return html`
|
@@ -145,7 +161,7 @@ export const SampleList = (props) => {
|
|
145
161
|
);
|
146
162
|
|
147
163
|
const listStyle = { ...style, flex: "1", overflowY: "auto", outline: "none" };
|
148
|
-
const { limit, answer, target } = gridColumns(sampleDescriptor);
|
164
|
+
const { input, limit, answer, target } = gridColumns(sampleDescriptor);
|
149
165
|
|
150
166
|
const headerRow = html`<div
|
151
167
|
style=${{
|
@@ -160,7 +176,7 @@ export const SampleList = (props) => {
|
|
160
176
|
}}
|
161
177
|
>
|
162
178
|
<div>Id</div>
|
163
|
-
<div
|
179
|
+
<div>${input !== "0" ? "Input" : ""}</div>
|
164
180
|
<div>${target !== "0" ? "Target" : ""}</div>
|
165
181
|
<div>${answer !== "0" ? "Answer" : ""}</div>
|
166
182
|
<div>${limit !== "0" ? "Limit" : ""}</div>
|
@@ -192,6 +208,7 @@ export const SampleList = (props) => {
|
|
192
208
|
// Count any sample errors and display a bad alerting the user
|
193
209
|
// to any errors
|
194
210
|
const errorCount = items?.reduce((previous, item) => {
|
211
|
+
// @ts-expect-error
|
195
212
|
if (item.data.error) {
|
196
213
|
return previous + 1;
|
197
214
|
} else {
|
@@ -201,6 +218,7 @@ export const SampleList = (props) => {
|
|
201
218
|
|
202
219
|
// Count limits
|
203
220
|
const limitCount = items?.reduce((previous, item) => {
|
221
|
+
// @ts-expect-error
|
204
222
|
if (item.data.limit) {
|
205
223
|
return previous + 1;
|
206
224
|
} else {
|
@@ -260,6 +278,17 @@ const SeparatorRow = ({ id, title, height }) => {
|
|
260
278
|
</div>`;
|
261
279
|
};
|
262
280
|
|
281
|
+
/**
|
282
|
+
* @param {Object} props - The parameters for the component.
|
283
|
+
* @param {string} props.id - The unique identifier for the sample.
|
284
|
+
* @param {number} props.index - The index of the sample.
|
285
|
+
* @param {import("../api/Types.mjs").SampleSummary} props.sample - The sample.
|
286
|
+
* @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor.
|
287
|
+
* @param {number} props.height - The height of the sample row.
|
288
|
+
* @param {boolean} props.selected - Whether the sample is selected.
|
289
|
+
* @param {(index: number) => void} props.showSample - The function to show the sample.
|
290
|
+
* @returns {import("preact").JSX.Element} The SampleRow component.
|
291
|
+
*/
|
263
292
|
const SampleRow = ({
|
264
293
|
id,
|
265
294
|
index,
|
@@ -339,7 +368,9 @@ const SampleRow = ({
|
|
339
368
|
${sample
|
340
369
|
? html`
|
341
370
|
<${MarkdownDiv}
|
342
|
-
markdown=${sampleDescriptor
|
371
|
+
markdown=${sampleDescriptor
|
372
|
+
?.selectedScorerDescriptor(sample)
|
373
|
+
.answer()}
|
343
374
|
style=${{ paddingLeft: "0" }}
|
344
375
|
class="no-last-para-padding"
|
345
376
|
/>
|
@@ -14,6 +14,14 @@ const labelStyle = {
|
|
14
14
|
...TextStyle.secondary,
|
15
15
|
};
|
16
16
|
|
17
|
+
/**
|
18
|
+
* @param {Object} props - The component props.
|
19
|
+
* @param {import("../types/log").EvalSample} props.sample - The sample.
|
20
|
+
* @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - The sample descriptor.
|
21
|
+
* @param {Object} props.style - The style for the element.
|
22
|
+
* @param {string} props.scorer - The scorer.
|
23
|
+
* @returns {import("preact").JSX.Element} The SampleScoreView component.
|
24
|
+
*/
|
17
25
|
export const SampleScoreView = ({
|
18
26
|
sample,
|
19
27
|
sampleDescriptor,
|
@@ -21,7 +29,7 @@ export const SampleScoreView = ({
|
|
21
29
|
scorer,
|
22
30
|
}) => {
|
23
31
|
if (!sampleDescriptor) {
|
24
|
-
return
|
32
|
+
return html``;
|
25
33
|
}
|
26
34
|
|
27
35
|
const scoreInput = inputString(sample.input);
|
@@ -34,7 +42,10 @@ export const SampleScoreView = ({
|
|
34
42
|
);
|
35
43
|
}
|
36
44
|
|
37
|
-
const scorerDescriptor = sampleDescriptor.
|
45
|
+
const scorerDescriptor = sampleDescriptor.evalDescriptor.scorerDescriptor(
|
46
|
+
sample,
|
47
|
+
{ scorer, name: scorer },
|
48
|
+
);
|
38
49
|
const explanation = scorerDescriptor.explanation() || "(No Explanation)";
|
39
50
|
const answer = scorerDescriptor.answer();
|
40
51
|
const metadata = scorerDescriptor.metadata();
|
@@ -1,9 +1,18 @@
|
|
1
1
|
import { html } from "htm/preact";
|
2
2
|
|
3
|
+
/**
|
4
|
+
* @param {Object} props
|
5
|
+
* @param {import("../api/Types.mjs").SampleSummary} props.sample
|
6
|
+
* @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor
|
7
|
+
* @param {string} props.scorer
|
8
|
+
* @returns {import("preact").JSX.Element}
|
9
|
+
*/
|
3
10
|
export const SampleScores = ({ sample, sampleDescriptor, scorer }) => {
|
4
11
|
const scores = scorer
|
5
|
-
? sampleDescriptor.
|
6
|
-
|
12
|
+
? sampleDescriptor.evalDescriptor
|
13
|
+
.scorerDescriptor(sample, { scorer, name: scorer })
|
14
|
+
.scores()
|
15
|
+
: sampleDescriptor.selectedScorerDescriptor(sample).scores();
|
7
16
|
|
8
17
|
if (scores.length === 1) {
|
9
18
|
return scores[0].rendered();
|