inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_cli/common.py +7 -3
- inspect_ai/_cli/eval.py +17 -2
- inspect_ai/_cli/trace.py +21 -2
- inspect_ai/_display/core/active.py +4 -3
- inspect_ai/_display/core/config.py +3 -3
- inspect_ai/_display/core/panel.py +7 -3
- inspect_ai/_display/plain/__init__.py +0 -0
- inspect_ai/_display/plain/display.py +203 -0
- inspect_ai/_display/rich/display.py +4 -9
- inspect_ai/_display/textual/app.py +4 -1
- inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
- inspect_ai/_display/textual/widgets/samples.py +119 -16
- inspect_ai/_display/textual/widgets/sandbox.py +37 -0
- inspect_ai/_eval/eval.py +32 -20
- inspect_ai/_eval/evalset.py +7 -5
- inspect_ai/_eval/score.py +1 -0
- inspect_ai/_eval/task/__init__.py +2 -2
- inspect_ai/_eval/task/images.py +40 -25
- inspect_ai/_eval/task/results.py +50 -22
- inspect_ai/_eval/task/run.py +180 -124
- inspect_ai/_eval/task/sandbox.py +10 -5
- inspect_ai/_eval/task/task.py +140 -25
- inspect_ai/_util/constants.py +2 -0
- inspect_ai/_util/content.py +23 -1
- inspect_ai/_util/images.py +20 -17
- inspect_ai/_util/kvstore.py +73 -0
- inspect_ai/_util/notgiven.py +18 -0
- inspect_ai/_util/port_names.py +61 -0
- inspect_ai/_util/text.py +23 -0
- inspect_ai/_util/thread.py +5 -0
- inspect_ai/_view/www/App.css +31 -1
- inspect_ai/_view/www/dist/assets/index.css +31 -1
- inspect_ai/_view/www/dist/assets/index.js +25375 -1846
- inspect_ai/_view/www/log-schema.json +129 -15
- inspect_ai/_view/www/package.json +2 -0
- inspect_ai/_view/www/src/App.mjs +8 -10
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
- inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
- inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
- inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
- inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
- inspect_ai/_view/www/src/index.js +75 -2
- inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
- inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
- inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
- inspect_ai/_view/www/src/types/log.d.ts +62 -27
- inspect_ai/_view/www/src/utils/Format.mjs +10 -3
- inspect_ai/_view/www/src/utils/Json.mjs +12 -6
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
- inspect_ai/_view/www/vite.config.js +7 -0
- inspect_ai/_view/www/yarn.lock +116 -0
- inspect_ai/approval/_human/__init__.py +0 -0
- inspect_ai/approval/_human/util.py +2 -2
- inspect_ai/approval/_policy.py +12 -6
- inspect_ai/dataset/_sources/csv.py +2 -1
- inspect_ai/dataset/_sources/json.py +2 -1
- inspect_ai/dataset/_sources/util.py +15 -7
- inspect_ai/log/_condense.py +11 -1
- inspect_ai/log/_log.py +3 -6
- inspect_ai/log/_recorders/eval.py +19 -8
- inspect_ai/log/_samples.py +26 -5
- inspect_ai/log/_transcript.py +32 -2
- inspect_ai/model/__init__.py +10 -2
- inspect_ai/model/_call_tools.py +59 -12
- inspect_ai/model/_chat_message.py +2 -4
- inspect_ai/model/_conversation.py +61 -0
- inspect_ai/model/_generate_config.py +10 -4
- inspect_ai/model/_model.py +117 -18
- inspect_ai/model/_model_output.py +7 -2
- inspect_ai/model/_providers/anthropic.py +109 -51
- inspect_ai/model/_providers/azureai.py +26 -24
- inspect_ai/model/_providers/bedrock.py +43 -44
- inspect_ai/model/_providers/google.py +121 -58
- inspect_ai/model/_providers/groq.py +7 -5
- inspect_ai/model/_providers/hf.py +11 -6
- inspect_ai/model/_providers/mistral.py +17 -20
- inspect_ai/model/_providers/openai.py +32 -21
- inspect_ai/model/_providers/openai_o1.py +9 -8
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/together.py +8 -8
- inspect_ai/model/_providers/vertex.py +18 -8
- inspect_ai/scorer/__init__.py +13 -2
- inspect_ai/scorer/_metrics/__init__.py +2 -2
- inspect_ai/scorer/_metrics/std.py +3 -3
- inspect_ai/scorer/_reducer/reducer.py +1 -1
- inspect_ai/scorer/_scorer.py +2 -2
- inspect_ai/solver/__init__.py +2 -5
- inspect_ai/solver/_prompt.py +35 -5
- inspect_ai/solver/_task_state.py +80 -38
- inspect_ai/tool/__init__.py +11 -1
- inspect_ai/tool/_tool.py +21 -3
- inspect_ai/tool/_tool_call.py +10 -0
- inspect_ai/tool/_tool_def.py +16 -5
- inspect_ai/tool/_tool_with.py +21 -4
- inspect_ai/tool/beta/__init__.py +5 -0
- inspect_ai/tool/beta/_computer/__init__.py +3 -0
- inspect_ai/tool/beta/_computer/_common.py +133 -0
- inspect_ai/tool/beta/_computer/_computer.py +155 -0
- inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
- inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
- inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
- inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
- inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
- inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
- inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
- inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/util/__init__.py +2 -3
- inspect_ai/util/{_trace.py → _conversation.py} +3 -17
- inspect_ai/util/_display.py +14 -4
- inspect_ai/util/_limit.py +26 -0
- inspect_ai/util/_sandbox/context.py +12 -13
- inspect_ai/util/_sandbox/docker/compose.py +24 -11
- inspect_ai/util/_sandbox/docker/docker.py +84 -14
- inspect_ai/util/_sandbox/docker/internal.py +3 -1
- inspect_ai/util/_sandbox/environment.py +27 -1
- inspect_ai/util/_sandbox/local.py +1 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
- inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
- inspect_ai/model/_trace.py +0 -48
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
import { html } from "htm/preact";
|
2
|
-
import { useCallback, useState } from "preact/hooks";
|
3
|
-
import { useEffect,
|
2
|
+
import { useCallback, useMemo, useState } from "preact/hooks";
|
3
|
+
import { useEffect, useRef } from "preact/hooks";
|
4
4
|
|
5
5
|
import { ApplicationStyles } from "../appearance/Styles.mjs";
|
6
6
|
import { FontSize } from "../appearance/Fonts.mjs";
|
@@ -56,57 +56,28 @@ export const SampleList = (props) => {
|
|
56
56
|
setHidden(false);
|
57
57
|
}, [items]);
|
58
58
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
values.length > 0 ? values[values.length - 1] : undefined;
|
69
|
-
const start =
|
70
|
-
previous === undefined ? 0 : previous.start + previous.height;
|
71
|
-
values.push({
|
72
|
-
index,
|
73
|
-
height,
|
74
|
-
start,
|
75
|
-
});
|
76
|
-
return values;
|
77
|
-
}, []);
|
59
|
+
// Keep a mapping of the indexes to items (skipping separators)
|
60
|
+
const itemRowMapping = useMemo(() => {
|
61
|
+
const rowIndexes = [];
|
62
|
+
items.forEach((item, index) => {
|
63
|
+
if (item.type === "sample") {
|
64
|
+
rowIndexes.push(index);
|
65
|
+
}
|
66
|
+
});
|
67
|
+
return rowIndexes;
|
78
68
|
}, [items]);
|
79
69
|
|
70
|
+
const prevSelectedIndexRef = useRef(null);
|
80
71
|
useEffect(() => {
|
81
72
|
const listEl = listRef.current;
|
82
73
|
if (listEl) {
|
83
|
-
|
84
|
-
const
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
const scrollTop = listEl.base.scrollTop;
|
90
|
-
const scrollBottom = scrollTop + listEl.base.offsetHeight;
|
91
|
-
|
92
|
-
// It is visible
|
93
|
-
if (itemTop >= scrollTop && itemBottom <= scrollBottom) {
|
94
|
-
return;
|
95
|
-
}
|
96
|
-
|
97
|
-
if (itemTop < scrollTop) {
|
98
|
-
// Top is scrolled off
|
99
|
-
listEl.base.scrollTo({ top: itemTop });
|
100
|
-
return;
|
101
|
-
}
|
102
|
-
|
103
|
-
if (itemBottom > scrollBottom) {
|
104
|
-
listEl.base.scrollTo({ top: itemBottom - listEl.base.offsetHeight });
|
105
|
-
return;
|
106
|
-
}
|
107
|
-
}
|
74
|
+
const actualRowIndex = itemRowMapping[selectedIndex];
|
75
|
+
const direction =
|
76
|
+
actualRowIndex > prevSelectedIndexRef.current ? "down" : "up";
|
77
|
+
listRef.current?.scrollToIndex(actualRowIndex, direction);
|
78
|
+
prevSelectedIndexRef.current = actualRowIndex;
|
108
79
|
}
|
109
|
-
}, [selectedIndex,
|
80
|
+
}, [selectedIndex, listRef, itemRowMapping]);
|
110
81
|
|
111
82
|
/** @param {import("./SamplesTab.mjs").ListItem} item */
|
112
83
|
const renderRow = (item) => {
|
@@ -254,7 +225,6 @@ export const SampleList = (props) => {
|
|
254
225
|
tabIndex="0"
|
255
226
|
renderRow=${renderRow}
|
256
227
|
onkeydown=${onkeydown}
|
257
|
-
rowMap=${rowMap}
|
258
228
|
style=${listStyle}
|
259
229
|
/>
|
260
230
|
${footerRow}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
// @ts-check
|
2
2
|
import { html } from "htm/preact";
|
3
|
-
import {
|
3
|
+
import { TranscriptVirtualList } from "./transcript/TranscriptView.mjs";
|
4
4
|
|
5
5
|
/**
|
6
6
|
* Renders the SampleTranscript component.
|
@@ -8,8 +8,13 @@ import { TranscriptView } from "./transcript/TranscriptView.mjs";
|
|
8
8
|
* @param {Object} props - The parameters for the component.
|
9
9
|
* @param {string} props.id - The id of this component
|
10
10
|
* @param {import("../types/log").Events} props.evalEvents - The transcript to display.
|
11
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
|
11
12
|
* @returns {import("preact").JSX.Element} The SampleTranscript component.
|
12
13
|
*/
|
13
|
-
export const SampleTranscript = ({ id, evalEvents }) => {
|
14
|
-
return html`<${
|
14
|
+
export const SampleTranscript = ({ id, evalEvents, scrollRef }) => {
|
15
|
+
return html`<${TranscriptVirtualList}
|
16
|
+
id=${id}
|
17
|
+
events=${evalEvents}
|
18
|
+
scrollRef=${scrollRef}
|
19
|
+
/>`;
|
15
20
|
};
|
@@ -83,6 +83,29 @@ import {
|
|
83
83
|
* @property {number} normalized.limit - Normalized size of the limit message.
|
84
84
|
*/
|
85
85
|
|
86
|
+
/**
|
87
|
+
* @param {import("../Types.mjs").ScoreLabel | undefined} scoreLabel
|
88
|
+
* @returns {string}
|
89
|
+
*/
|
90
|
+
export const scoreLabelKey = (scoreLabel) => {
|
91
|
+
if (!scoreLabel) {
|
92
|
+
return "No score key";
|
93
|
+
}
|
94
|
+
return `${scoreLabel.scorer}.${scoreLabel.name}`;
|
95
|
+
};
|
96
|
+
|
97
|
+
/**
|
98
|
+
* @param {string} key
|
99
|
+
* @returns {import("../Types.mjs").ScoreLabel | undefined}
|
100
|
+
*/
|
101
|
+
export const parseScoreLabelKey = (key) => {
|
102
|
+
if (key == "No score key") {
|
103
|
+
return undefined;
|
104
|
+
}
|
105
|
+
const [scorer, name] = key.split(".");
|
106
|
+
return { scorer, name };
|
107
|
+
};
|
108
|
+
|
86
109
|
/**
|
87
110
|
* @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
|
88
111
|
* @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
|
@@ -165,17 +188,6 @@ export const createEvalDescriptor = (scores, samples, epochs) => {
|
|
165
188
|
return undefined;
|
166
189
|
};
|
167
190
|
|
168
|
-
/**
|
169
|
-
* @param {import("../Types.mjs").ScoreLabel} [scoreLabel]
|
170
|
-
* @returns {string}
|
171
|
-
*/
|
172
|
-
const scoreLabelKey = (scoreLabel) => {
|
173
|
-
if (!scoreLabel) {
|
174
|
-
return "No score key";
|
175
|
-
}
|
176
|
-
return `${scoreLabel.scorer}.${scoreLabel.name}`;
|
177
|
-
};
|
178
|
-
|
179
191
|
/**
|
180
192
|
* The EvalDescriptor is memoized. Compute all descriptors now to avoid duplicate work.
|
181
193
|
* @type {Map<string, ScoreDescriptor>}
|
@@ -377,7 +389,11 @@ export const createSamplesDescriptor = (evalDescriptor, selectedScore) => {
|
|
377
389
|
(previous, current) => {
|
378
390
|
const text = inputString(current.input).join(" ");
|
379
391
|
const scoreValue = evalDescriptor.score(current, selectedScore).value;
|
380
|
-
const scoreText = scoreValue
|
392
|
+
const scoreText = scoreValue
|
393
|
+
? String(scoreValue)
|
394
|
+
: current.error
|
395
|
+
? String(current.error)
|
396
|
+
: "";
|
381
397
|
previous[0] = Math.min(Math.max(previous[0], text.length), 300);
|
382
398
|
previous[1] = Math.min(
|
383
399
|
Math.max(previous[1], arrayToString(current.target).length),
|
@@ -462,7 +478,7 @@ const scoreCategorizers = [
|
|
462
478
|
* @returns {ScoreDescriptor} a ScoreDescriptor
|
463
479
|
*/
|
464
480
|
describe: (values, types) => {
|
465
|
-
if (
|
481
|
+
if (types.length === 1 && types[0] === "boolean") {
|
466
482
|
return booleanScoreCategorizer();
|
467
483
|
}
|
468
484
|
},
|
@@ -31,6 +31,7 @@ import { EmptyPanel } from "../components/EmptyPanel.mjs";
|
|
31
31
|
* @param {import("../Types.mjs").ScoreFilter} props.filter - the selected filter
|
32
32
|
* @param {import("htm/preact").MutableRef<number>} props.sampleScrollPositionRef - the sample scroll position
|
33
33
|
* @param {(position: number) => void} props.setSampleScrollPosition - sets the sample scroll position
|
34
|
+
* @param {import("htm/preact").MutableRef<HTMLElement>} props.sampleTabScrollRef - the sample scroll element
|
34
35
|
* @param {any} props.sort - the selected sort
|
35
36
|
*
|
36
37
|
* @returns {import("preact").JSX.Element[]} The TranscriptView component.
|
@@ -54,6 +55,7 @@ export const SamplesTab = ({
|
|
54
55
|
setSelectedSampleTab,
|
55
56
|
sampleScrollPositionRef,
|
56
57
|
setSampleScrollPosition,
|
58
|
+
sampleTabScrollRef,
|
57
59
|
}) => {
|
58
60
|
/** @type {[ListItem[], function(ListItem[]): void]} */
|
59
61
|
const [items, setItems] = useState([]);
|
@@ -82,7 +84,7 @@ export const SamplesTab = ({
|
|
82
84
|
setTimeout(() => {
|
83
85
|
if (sampleListRef.current) {
|
84
86
|
// @ts-ignore
|
85
|
-
sampleListRef.current.
|
87
|
+
sampleListRef.current.focus();
|
86
88
|
}
|
87
89
|
}, 0);
|
88
90
|
}
|
@@ -152,6 +154,7 @@ export const SamplesTab = ({
|
|
152
154
|
sampleDescriptor=${sampleDescriptor}
|
153
155
|
selectedTab=${selectedSampleTab}
|
154
156
|
setSelectedTab=${setSelectedSampleTab}
|
157
|
+
scrollRef=${sampleTabScrollRef}
|
155
158
|
/>`,
|
156
159
|
);
|
157
160
|
} else if (sampleMode === "many") {
|
@@ -23,6 +23,14 @@ export const SampleTools = (props) => {
|
|
23
23
|
const hasEpochs = epochs > 1;
|
24
24
|
const tools = [];
|
25
25
|
|
26
|
+
tools.push(
|
27
|
+
html`<${SampleFilter}
|
28
|
+
evalDescriptor=${sampleDescriptor.evalDescriptor}
|
29
|
+
filter=${filter}
|
30
|
+
filterChanged=${filterChanged}
|
31
|
+
/>`,
|
32
|
+
);
|
33
|
+
|
26
34
|
if (scores.length > 1) {
|
27
35
|
tools.push(
|
28
36
|
html`<${SelectScorer}
|
@@ -43,14 +51,6 @@ export const SampleTools = (props) => {
|
|
43
51
|
);
|
44
52
|
}
|
45
53
|
|
46
|
-
tools.push(
|
47
|
-
html`<${SampleFilter}
|
48
|
-
filter=${filter}
|
49
|
-
filterChanged=${filterChanged}
|
50
|
-
descriptor=${sampleDescriptor}
|
51
|
-
/>`,
|
52
|
-
);
|
53
|
-
|
54
54
|
tools.push(
|
55
55
|
html`<${SortFilter}
|
56
56
|
sampleDescriptor=${sampleDescriptor}
|