inspect-ai 0.3.49__py3-none-any.whl → 0.3.51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/info.py +2 -2
- inspect_ai/_cli/log.py +2 -2
- inspect_ai/_cli/score.py +2 -2
- inspect_ai/_display/core/display.py +19 -0
- inspect_ai/_display/core/panel.py +37 -7
- inspect_ai/_display/core/progress.py +29 -2
- inspect_ai/_display/core/results.py +79 -40
- inspect_ai/_display/core/textual.py +21 -0
- inspect_ai/_display/rich/display.py +28 -8
- inspect_ai/_display/textual/app.py +107 -1
- inspect_ai/_display/textual/display.py +1 -1
- inspect_ai/_display/textual/widgets/samples.py +132 -91
- inspect_ai/_display/textual/widgets/task_detail.py +236 -0
- inspect_ai/_display/textual/widgets/tasks.py +74 -6
- inspect_ai/_display/textual/widgets/toggle.py +32 -0
- inspect_ai/_eval/context.py +2 -0
- inspect_ai/_eval/eval.py +4 -3
- inspect_ai/_eval/loader.py +1 -1
- inspect_ai/_eval/run.py +35 -2
- inspect_ai/_eval/task/log.py +13 -11
- inspect_ai/_eval/task/results.py +12 -3
- inspect_ai/_eval/task/run.py +139 -36
- inspect_ai/_eval/task/sandbox.py +2 -1
- inspect_ai/_util/_async.py +30 -1
- inspect_ai/_util/file.py +31 -4
- inspect_ai/_util/html.py +3 -0
- inspect_ai/_util/logger.py +6 -5
- inspect_ai/_util/platform.py +5 -6
- inspect_ai/_util/registry.py +1 -1
- inspect_ai/_view/server.py +9 -9
- inspect_ai/_view/www/App.css +2 -2
- inspect_ai/_view/www/dist/assets/index.css +2 -2
- inspect_ai/_view/www/dist/assets/index.js +352 -294
- inspect_ai/_view/www/log-schema.json +13 -0
- inspect_ai/_view/www/package.json +1 -0
- inspect_ai/_view/www/src/components/MessageBand.mjs +1 -1
- inspect_ai/_view/www/src/components/Tools.mjs +16 -13
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -3
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +52 -77
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -13
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +15 -2
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +4 -2
- inspect_ai/_view/www/src/types/log.d.ts +2 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +2 -0
- inspect_ai/_view/www/yarn.lock +9 -4
- inspect_ai/approval/__init__.py +1 -1
- inspect_ai/approval/_human/approver.py +35 -0
- inspect_ai/approval/_human/console.py +62 -0
- inspect_ai/approval/_human/manager.py +108 -0
- inspect_ai/approval/_human/panel.py +233 -0
- inspect_ai/approval/_human/util.py +51 -0
- inspect_ai/dataset/_sources/hf.py +2 -2
- inspect_ai/dataset/_sources/util.py +1 -1
- inspect_ai/log/_file.py +106 -36
- inspect_ai/log/_recorders/eval.py +226 -158
- inspect_ai/log/_recorders/file.py +9 -6
- inspect_ai/log/_recorders/json.py +35 -12
- inspect_ai/log/_recorders/recorder.py +15 -15
- inspect_ai/log/_samples.py +52 -0
- inspect_ai/model/_model.py +14 -0
- inspect_ai/model/_model_output.py +4 -0
- inspect_ai/model/_providers/azureai.py +1 -1
- inspect_ai/model/_providers/hf.py +106 -4
- inspect_ai/model/_providers/util/__init__.py +2 -0
- inspect_ai/model/_providers/util/hf_handler.py +200 -0
- inspect_ai/scorer/_common.py +1 -1
- inspect_ai/solver/_plan.py +0 -8
- inspect_ai/solver/_task_state.py +18 -1
- inspect_ai/solver/_use_tools.py +9 -1
- inspect_ai/tool/_tool_def.py +2 -2
- inspect_ai/tool/_tool_info.py +14 -2
- inspect_ai/tool/_tool_params.py +2 -1
- inspect_ai/tool/_tools/_execute.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +6 -0
- inspect_ai/util/__init__.py +5 -6
- inspect_ai/util/_panel.py +91 -0
- inspect_ai/util/_sandbox/__init__.py +2 -6
- inspect_ai/util/_sandbox/context.py +4 -3
- inspect_ai/util/_sandbox/docker/compose.py +12 -2
- inspect_ai/util/_sandbox/docker/docker.py +19 -9
- inspect_ai/util/_sandbox/docker/util.py +10 -2
- inspect_ai/util/_sandbox/environment.py +47 -41
- inspect_ai/util/_sandbox/local.py +15 -10
- inspect_ai/util/_subprocess.py +43 -3
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/RECORD +90 -82
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
- inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
- inspect_ai/approval/_human.py +0 -123
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/top_level.txt +0 -0
@@ -2506,6 +2506,18 @@
|
|
2506
2506
|
],
|
2507
2507
|
"default": null
|
2508
2508
|
},
|
2509
|
+
"time": {
|
2510
|
+
"anyOf": [
|
2511
|
+
{
|
2512
|
+
"type": "number"
|
2513
|
+
},
|
2514
|
+
{
|
2515
|
+
"type": "null"
|
2516
|
+
}
|
2517
|
+
],
|
2518
|
+
"default": null,
|
2519
|
+
"title": "Time"
|
2520
|
+
},
|
2509
2521
|
"metadata": {
|
2510
2522
|
"anyOf": [
|
2511
2523
|
{
|
@@ -2537,6 +2549,7 @@
|
|
2537
2549
|
"model",
|
2538
2550
|
"choices",
|
2539
2551
|
"usage",
|
2552
|
+
"time",
|
2540
2553
|
"metadata",
|
2541
2554
|
"error"
|
2542
2555
|
],
|
@@ -8,7 +8,7 @@ export const MessageBand = ({ message, hidden, setHidden, type }) => {
|
|
8
8
|
const bgColor =
|
9
9
|
type === "info" ? "var(--bs-light)" : "var(--bs-" + type + "-bg-subtle)";
|
10
10
|
const color =
|
11
|
-
"var(--bs-" + type === "info" ? "secondary" :
|
11
|
+
"var(--bs-" + (type === "info" ? "secondary" : type) + "-text-emphasis)";
|
12
12
|
|
13
13
|
return html`
|
14
14
|
<div
|
@@ -1,6 +1,8 @@
|
|
1
1
|
// @ts-check
|
2
2
|
/// <reference path="../types/prism.d.ts" />
|
3
3
|
import Prism from "prismjs";
|
4
|
+
import murmurhash from "murmurhash";
|
5
|
+
|
4
6
|
import "prismjs/components/prism-python";
|
5
7
|
import "prismjs/components/prism-bash";
|
6
8
|
import "prismjs/components/prism-json";
|
@@ -105,11 +107,11 @@ export const ToolCallView = ({
|
|
105
107
|
* @param {string} props.type - The function call
|
106
108
|
* @param {string | undefined } props.contents - The main input for this call
|
107
109
|
* @param {Record<string, string>} [props.style] - The style
|
108
|
-
* @param {import("../types/log").ToolCallContent} props.view - The tool call view
|
110
|
+
* @param {import("../types/log").ToolCallContent} [props.view] - The tool call view
|
109
111
|
* @returns {import("preact").JSX.Element | string} The SampleTranscript component.
|
110
112
|
*/
|
111
113
|
export const ToolInput = ({ type, contents, view, style }) => {
|
112
|
-
if (!contents) {
|
114
|
+
if (!contents && !view?.content) {
|
113
115
|
return "";
|
114
116
|
}
|
115
117
|
|
@@ -133,7 +135,7 @@ export const ToolInput = ({ type, contents, view, style }) => {
|
|
133
135
|
}
|
134
136
|
}
|
135
137
|
}
|
136
|
-
}, [
|
138
|
+
}, [contents, view, style]);
|
137
139
|
return html`<${MarkdownDiv}
|
138
140
|
markdown=${view.content}
|
139
141
|
ref=${toolInputRef}
|
@@ -144,14 +146,15 @@ export const ToolInput = ({ type, contents, view, style }) => {
|
|
144
146
|
useEffect(() => {
|
145
147
|
const tokens = Prism.languages[type];
|
146
148
|
if (toolInputRef.current && tokens) {
|
147
|
-
|
148
|
-
if (typeof contents === "object" || Array.isArray(contents)) {
|
149
|
-
resolvedContents = JSON.stringify(contents);
|
150
|
-
}
|
151
|
-
const html = Prism.highlight(resolvedContents, tokens, type);
|
152
|
-
toolInputRef.current.innerHTML = html;
|
149
|
+
Prism.highlightElement(toolInputRef.current);
|
153
150
|
}
|
154
|
-
}, [
|
151
|
+
}, [contents, type, view]);
|
152
|
+
|
153
|
+
contents =
|
154
|
+
typeof contents === "object" || Array.isArray(contents)
|
155
|
+
? JSON.stringify(contents)
|
156
|
+
: contents;
|
157
|
+
const key = murmurhash.v3(contents);
|
155
158
|
|
156
159
|
return html`<pre
|
157
160
|
class="tool-output"
|
@@ -162,9 +165,9 @@ export const ToolInput = ({ type, contents, view, style }) => {
|
|
162
165
|
...style,
|
163
166
|
}}
|
164
167
|
>
|
165
|
-
<code ref=${toolInputRef}
|
166
|
-
|
167
|
-
|
168
|
+
<code ref=${toolInputRef}
|
169
|
+
key=${key}
|
170
|
+
class="sourceCode${type ? ` language-${type}` : ""}" style=${{
|
168
171
|
overflowWrap: "anywhere",
|
169
172
|
whiteSpace: "pre-wrap",
|
170
173
|
}}>
|
@@ -150,7 +150,6 @@ export const SampleDisplay = ({
|
|
150
150
|
sample=${sample}
|
151
151
|
sampleDescriptor=${sampleDescriptor}
|
152
152
|
scorer=${Object.keys(sample.scores)[0]}
|
153
|
-
style=${{ paddingLeft: "0.8em", marginTop: "0.4em" }}
|
154
153
|
/>
|
155
154
|
</${TabPanel}>`);
|
156
155
|
} else {
|
@@ -164,7 +163,6 @@ export const SampleDisplay = ({
|
|
164
163
|
sample=${sample}
|
165
164
|
sampleDescriptor=${sampleDescriptor}
|
166
165
|
scorer=${scorer}
|
167
|
-
style=${{ paddingLeft: "0.8em", marginTop: "0.4em" }}
|
168
166
|
/>
|
169
167
|
</${TabPanel}>`);
|
170
168
|
}
|
@@ -180,7 +178,7 @@ export const SampleDisplay = ({
|
|
180
178
|
title="Metadata"
|
181
179
|
onSelected=${onSelectedTab}
|
182
180
|
selected=${selectedTab === kSampleMetdataTabId}>
|
183
|
-
<div style=${{ display: "flex", flexWrap: "wrap", alignItems: "flex-start", gap: "1em", paddingLeft: "0
|
181
|
+
<div style=${{ display: "flex", flexWrap: "wrap", alignItems: "flex-start", gap: "1em", paddingLeft: "0", marginTop: "0.5em" }}>
|
184
182
|
${sampleMetadatas}
|
185
183
|
</div>
|
186
184
|
</${TabPanel}>`,
|
@@ -3,7 +3,8 @@ import { arrayToString, inputString } from "../utils/Format.mjs";
|
|
3
3
|
import { MarkdownDiv } from "../components/MarkdownDiv.mjs";
|
4
4
|
import { SampleScores } from "./SampleScores.mjs";
|
5
5
|
import { FontSize, TextStyle } from "../appearance/Fonts.mjs";
|
6
|
-
import {
|
6
|
+
import { MetaDataGrid } from "../components/MetaDataGrid.mjs";
|
7
|
+
import { Card, CardHeader, CardBody } from "../components/Card.mjs";
|
7
8
|
|
8
9
|
const labelStyle = {
|
9
10
|
paddingRight: "2em",
|
@@ -22,6 +23,7 @@ export const SampleScoreView = ({
|
|
22
23
|
if (!sampleDescriptor) {
|
23
24
|
return "";
|
24
25
|
}
|
26
|
+
|
25
27
|
const scoreInput = inputString(sample.input);
|
26
28
|
if (sample.choices && sample.choices.length > 0) {
|
27
29
|
scoreInput.push("");
|
@@ -35,17 +37,21 @@ export const SampleScoreView = ({
|
|
35
37
|
const scorerDescriptor = sampleDescriptor.scorer(sample, scorer);
|
36
38
|
const explanation = scorerDescriptor.explanation() || "(No Explanation)";
|
37
39
|
const answer = scorerDescriptor.answer();
|
40
|
+
const metadata = scorerDescriptor.metadata();
|
38
41
|
|
39
42
|
return html`
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
43
|
+
<div
|
44
|
+
class="container-fluid"
|
45
|
+
style=${{
|
46
|
+
marginTop: "0.5em",
|
47
|
+
paddingLeft: "0",
|
48
|
+
fontSize: FontSize.base,
|
49
|
+
...style,
|
50
|
+
}}
|
51
|
+
>
|
52
|
+
<${Card}>
|
53
|
+
<${CardHeader} label="Score"/>
|
54
|
+
<${CardBody}>
|
49
55
|
<div>
|
50
56
|
<div style=${{ ...labelStyle }}>Input</div>
|
51
57
|
<div>
|
@@ -58,7 +64,7 @@ export const SampleScoreView = ({
|
|
58
64
|
|
59
65
|
<table
|
60
66
|
class="table"
|
61
|
-
style=${{ width: "100%", marginBottom: "
|
67
|
+
style=${{ width: "100%", marginBottom: "1em" }}
|
62
68
|
>
|
63
69
|
<thead style=${{ borderBottomColor: "#00000000" }}>
|
64
70
|
<tr>
|
@@ -114,73 +120,42 @@ export const SampleScoreView = ({
|
|
114
120
|
</tr>
|
115
121
|
</tbody>
|
116
122
|
</table>
|
123
|
+
</${CardBody}>
|
124
|
+
</${Card}>
|
125
|
+
|
126
|
+
${
|
127
|
+
explanation && explanation !== answer
|
128
|
+
? html`
|
129
|
+
<${Card}>
|
130
|
+
<${CardHeader} label="Explanation"/>
|
131
|
+
<${CardBody}>
|
132
|
+
<${MarkdownDiv}
|
133
|
+
markdown=${arrayToString(explanation)}
|
134
|
+
style=${{ paddingLeft: "0" }}
|
135
|
+
class="no-last-para-padding"
|
136
|
+
/>
|
137
|
+
|
138
|
+
</${CardBody}>
|
139
|
+
</${Card}>`
|
140
|
+
: ""
|
141
|
+
}
|
117
142
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
</th>
|
135
|
-
</tr>
|
136
|
-
</thead>
|
137
|
-
<tbody>
|
138
|
-
<tr>
|
139
|
-
<td style=${{ paddingLeft: "0" }}>
|
140
|
-
<${MarkdownDiv}
|
141
|
-
markdown=${arrayToString(explanation)}
|
142
|
-
style=${{ paddingLeft: "0" }}
|
143
|
-
class="no-last-para-padding"
|
144
|
-
/>
|
145
|
-
</td>
|
146
|
-
</tr>
|
147
|
-
</tbody>
|
148
|
-
</table>`
|
149
|
-
: ""}
|
150
|
-
${sample?.score?.metadata &&
|
151
|
-
Object.keys(sample?.score?.metadata).length > 0
|
152
|
-
? html` <table
|
153
|
-
class="table"
|
154
|
-
style=${{ width: "100%", marginBottom: "0" }}
|
155
|
-
>
|
156
|
-
<thead>
|
157
|
-
<tr>
|
158
|
-
<th
|
159
|
-
style=${{
|
160
|
-
paddingBottom: "0",
|
161
|
-
paddingLeft: "0",
|
162
|
-
...labelStyle,
|
163
|
-
fontWeight: "400",
|
164
|
-
}}
|
165
|
-
>
|
166
|
-
Metadata
|
167
|
-
</th>
|
168
|
-
</tr>
|
169
|
-
</thead>
|
170
|
-
<tbody>
|
171
|
-
<tr>
|
172
|
-
<td style=${{ paddingLeft: "0" }}>
|
173
|
-
<${MetaDataView}
|
174
|
-
id="task-sample-score-metadata"
|
175
|
-
classes="tab-pane"
|
176
|
-
entries="${sample?.score?.metadata}"
|
177
|
-
style=${{ marginTop: "1em" }}
|
178
|
-
/>
|
179
|
-
</td>
|
180
|
-
</tr>
|
181
|
-
</tbody>
|
182
|
-
</table>`
|
183
|
-
: ""}
|
143
|
+
${
|
144
|
+
metadata && Object.keys(metadata).length > 0
|
145
|
+
? html`
|
146
|
+
<${Card}>
|
147
|
+
<${CardHeader} label="Metadata"/>
|
148
|
+
<${CardBody}>
|
149
|
+
<${MetaDataGrid}
|
150
|
+
id="task-sample-score-metadata"
|
151
|
+
classes="tab-pane"
|
152
|
+
entries="${metadata}"
|
153
|
+
style=${{ marginTop: "0" }}
|
154
|
+
/>
|
155
|
+
</${CardBody}>
|
156
|
+
</${Card}>`
|
157
|
+
: ""
|
158
|
+
}
|
184
159
|
</div>
|
185
160
|
`;
|
186
161
|
};
|
@@ -158,6 +158,23 @@ export const createsSamplesDescriptor = (
|
|
158
158
|
}
|
159
159
|
return undefined;
|
160
160
|
};
|
161
|
+
|
162
|
+
// Retrieve the metadata for a sample
|
163
|
+
/**
|
164
|
+
* @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score
|
165
|
+
* @param {string} scorer - the scorer name
|
166
|
+
* @returns {Object} The explanation
|
167
|
+
*/
|
168
|
+
const scoreMetadata = (sample, scorer) => {
|
169
|
+
if (sample) {
|
170
|
+
const sampleScore = score(sample, scorer);
|
171
|
+
if (sampleScore && sampleScore.metadata) {
|
172
|
+
return sampleScore.metadata;
|
173
|
+
}
|
174
|
+
}
|
175
|
+
return undefined;
|
176
|
+
};
|
177
|
+
|
161
178
|
const uniqScoreValues = [
|
162
179
|
...new Set(
|
163
180
|
samples
|
@@ -280,6 +297,9 @@ export const createsSamplesDescriptor = (
|
|
280
297
|
|
281
298
|
const scorerDescriptor = (sample, scorer) => {
|
282
299
|
return {
|
300
|
+
metadata: () => {
|
301
|
+
return scoreMetadata(sample, scorer);
|
302
|
+
},
|
283
303
|
explanation: () => {
|
284
304
|
return scoreExplanation(sample, scorer);
|
285
305
|
},
|
@@ -300,24 +320,18 @@ export const createsSamplesDescriptor = (
|
|
300
320
|
});
|
301
321
|
const sampleScorer = sample.scores[scorer];
|
302
322
|
const scoreVal = sampleScorer.value;
|
323
|
+
|
303
324
|
if (typeof scoreVal === "object") {
|
304
325
|
const names = Object.keys(scoreVal);
|
326
|
+
|
327
|
+
// See if this is a dictionary of score names
|
328
|
+
// if any of the score names match, treat it
|
329
|
+
// as a scorer dictionary
|
305
330
|
if (
|
306
331
|
names.find((name) => {
|
307
|
-
return
|
332
|
+
return scoreNames.includes(name);
|
308
333
|
})
|
309
334
|
) {
|
310
|
-
// Since this dictionary contains keys which are not scores
|
311
|
-
// we just treat it like an opaque dictionary
|
312
|
-
return [
|
313
|
-
{
|
314
|
-
name: scorer,
|
315
|
-
rendered: () => {
|
316
|
-
return scoreDescriptor.render(scoreVal);
|
317
|
-
},
|
318
|
-
},
|
319
|
-
];
|
320
|
-
} else {
|
321
335
|
// Since this dictionary contains keys which are scores
|
322
336
|
// we actually render the individual scores
|
323
337
|
const scores = names.map((name) => {
|
@@ -329,6 +343,17 @@ export const createsSamplesDescriptor = (
|
|
329
343
|
};
|
330
344
|
});
|
331
345
|
return scores;
|
346
|
+
} else {
|
347
|
+
// Since this dictionary contains keys which are not scores
|
348
|
+
// we just treat it like an opaque dictionary
|
349
|
+
return [
|
350
|
+
{
|
351
|
+
name: scorer,
|
352
|
+
rendered: () => {
|
353
|
+
return scoreDescriptor.render(scoreVal);
|
354
|
+
},
|
355
|
+
},
|
356
|
+
];
|
332
357
|
}
|
333
358
|
} else {
|
334
359
|
return [
|
@@ -389,7 +414,7 @@ const scoreCategorizers = [
|
|
389
414
|
*/
|
390
415
|
describe: (values) => {
|
391
416
|
if (
|
392
|
-
|
417
|
+
values.length === 2 &&
|
393
418
|
values.every((val) => {
|
394
419
|
return val === 1 || val === 0;
|
395
420
|
})
|
@@ -14,7 +14,11 @@ import { ApplicationIcons } from "../../appearance/Icons.mjs";
|
|
14
14
|
import { MetaDataGrid } from "../../components/MetaDataGrid.mjs";
|
15
15
|
import { FontSize, TextStyle } from "../../appearance/Fonts.mjs";
|
16
16
|
import { ModelUsagePanel } from "../../usage/UsageCard.mjs";
|
17
|
-
import {
|
17
|
+
import {
|
18
|
+
formatDateTime,
|
19
|
+
formatNumber,
|
20
|
+
formatPrettyDecimal,
|
21
|
+
} from "../../utils/Format.mjs";
|
18
22
|
|
19
23
|
/**
|
20
24
|
* Renders the StateEventView component.
|
@@ -28,7 +32,16 @@ import { formatDateTime, formatNumber } from "../../utils/Format.mjs";
|
|
28
32
|
*/
|
29
33
|
export const ModelEventView = ({ id, event, style }) => {
|
30
34
|
const totalUsage = event.output.usage?.total_tokens;
|
31
|
-
const
|
35
|
+
const callTime = event.output.time;
|
36
|
+
|
37
|
+
const subItems = [];
|
38
|
+
if (totalUsage) {
|
39
|
+
subItems.push(`${formatNumber(totalUsage)} tokens`);
|
40
|
+
}
|
41
|
+
if (callTime) {
|
42
|
+
subItems.push(`${formatPrettyDecimal(callTime)} sec`);
|
43
|
+
}
|
44
|
+
const subtitle = subItems.length > 0 ? `(${subItems.join(", ")})` : "";
|
32
45
|
|
33
46
|
// Note: despite the type system saying otherwise, this has appeared empircally
|
34
47
|
// to sometimes be undefined
|
@@ -154,7 +154,7 @@ export const RenderableChangeTypes = [
|
|
154
154
|
* @typedef {Object} ToolDefinition
|
155
155
|
* @property {string} name - The name of the tool (e.g., "python").
|
156
156
|
* @property {string} description - A brief description of what the tool does.
|
157
|
-
* @property {ToolParameters} parameters - An object describing the parameters that the tool accepts.
|
157
|
+
* @property {ToolParameters} [parameters] - An object describing the parameters that the tool accepts.
|
158
158
|
*/
|
159
159
|
|
160
160
|
/**
|
@@ -168,7 +168,9 @@ export const RenderableChangeTypes = [
|
|
168
168
|
export const Tools = ({ toolDefinitions }) => {
|
169
169
|
return toolDefinitions.map((toolDefinition) => {
|
170
170
|
const toolName = toolDefinition.name;
|
171
|
-
const toolArgs =
|
171
|
+
const toolArgs = toolDefinition.parameters?.properties
|
172
|
+
? Object.keys(toolDefinition.parameters.properties)
|
173
|
+
: [];
|
172
174
|
return html`<${Tool} toolName=${toolName} toolArgs=${toolArgs} />`;
|
173
175
|
});
|
174
176
|
};
|
@@ -173,6 +173,7 @@ export type Logprob2 = number;
|
|
173
173
|
export type Bytes1 = number[] | null;
|
174
174
|
export type Content5 = Logprob[];
|
175
175
|
export type Choices1 = ChatCompletionChoice[];
|
176
|
+
export type Time = number | null;
|
176
177
|
export type Metadata4 = {} | null;
|
177
178
|
export type Error = string | null;
|
178
179
|
export type Scores1 = {
|
@@ -646,6 +647,7 @@ export interface ModelOutput {
|
|
646
647
|
model: Model1;
|
647
648
|
choices: Choices1;
|
648
649
|
usage: ModelUsage1 | null;
|
650
|
+
time: Time;
|
649
651
|
metadata: Metadata4;
|
650
652
|
error: Error;
|
651
653
|
}
|
inspect_ai/_view/www/yarn.lock
CHANGED
@@ -1120,6 +1120,11 @@ ms@2.1.2:
|
|
1120
1120
|
resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
|
1121
1121
|
integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==
|
1122
1122
|
|
1123
|
+
murmurhash@^2.0.1:
|
1124
|
+
version "2.0.1"
|
1125
|
+
resolved "https://registry.yarnpkg.com/murmurhash/-/murmurhash-2.0.1.tgz#4097720e08cf978872194ad84ea5be2dec9b610f"
|
1126
|
+
integrity sha512-5vQEh3y+DG/lMPM0mCGPDnyV8chYg/g7rl6v3Gd8WMF9S429ox3Xk8qrk174kWhG767KQMqqxLD1WnGd77hiew==
|
1127
|
+
|
1123
1128
|
nanoid@^3.3.7:
|
1124
1129
|
version "3.3.7"
|
1125
1130
|
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.7.tgz#d0c301a691bc8d54efa0a2226ccf3fe2fd656bd8"
|
@@ -1202,10 +1207,10 @@ postcss@^8.4.40:
|
|
1202
1207
|
picocolors "^1.0.1"
|
1203
1208
|
source-map-js "^1.2.0"
|
1204
1209
|
|
1205
|
-
preact@^10.24.
|
1206
|
-
version "10.
|
1207
|
-
resolved "https://registry.yarnpkg.com/preact/-/preact-10.
|
1208
|
-
integrity sha512-
|
1210
|
+
preact@^10.24.3:
|
1211
|
+
version "10.25.1"
|
1212
|
+
resolved "https://registry.yarnpkg.com/preact/-/preact-10.25.1.tgz#1c4b84253c42dee874bfbf6a92bdce45e3662665"
|
1213
|
+
integrity sha512-frxeZV2vhQSohQwJ7FvlqC40ze89+8friponWUFeVEkaCfhC6Eu4V0iND5C9CXz8JLndV07QRDeXzH1+Anz5Og==
|
1209
1214
|
|
1210
1215
|
prelude-ls@^1.2.1:
|
1211
1216
|
version "1.2.1"
|
inspect_ai/approval/__init__.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from ._approval import Approval, ApprovalDecision
|
2
2
|
from ._approver import Approver
|
3
3
|
from ._auto import auto_approver
|
4
|
-
from ._human import human_approver
|
4
|
+
from ._human.approver import human_approver
|
5
5
|
from ._policy import ApprovalPolicy
|
6
6
|
from ._registry import approver
|
7
7
|
|
@@ -0,0 +1,35 @@
|
|
1
|
+
from inspect_ai.solver._task_state import TaskState
|
2
|
+
from inspect_ai.tool._tool_call import ToolCall, ToolCallView
|
3
|
+
|
4
|
+
from .._approval import Approval, ApprovalDecision
|
5
|
+
from .._approver import Approver
|
6
|
+
from .._registry import approver
|
7
|
+
from .console import console_approval
|
8
|
+
from .panel import panel_approval
|
9
|
+
|
10
|
+
|
11
|
+
@approver(name="human")
|
12
|
+
def human_approver(
|
13
|
+
choices: list[ApprovalDecision] = ["approve", "reject", "terminate"],
|
14
|
+
) -> Approver:
|
15
|
+
"""Interactive human approver.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
Approver: Interactive human approver.
|
19
|
+
"""
|
20
|
+
|
21
|
+
async def approve(
|
22
|
+
message: str,
|
23
|
+
call: ToolCall,
|
24
|
+
view: ToolCallView,
|
25
|
+
state: TaskState | None = None,
|
26
|
+
) -> Approval:
|
27
|
+
# try to use the panel approval (available in fullscreen display)
|
28
|
+
try:
|
29
|
+
return await panel_approval(message, call, view, state, choices)
|
30
|
+
|
31
|
+
# fallback to plain console approval (available in all displays)
|
32
|
+
except NotImplementedError:
|
33
|
+
return console_approval(message, view, choices)
|
34
|
+
|
35
|
+
return approve
|
@@ -0,0 +1,62 @@
|
|
1
|
+
from rich.prompt import Prompt
|
2
|
+
|
3
|
+
from inspect_ai._util.transcript import transcript_panel
|
4
|
+
from inspect_ai.tool._tool_call import ToolCallView
|
5
|
+
from inspect_ai.util._console import input_screen
|
6
|
+
|
7
|
+
from .._approval import Approval, ApprovalDecision
|
8
|
+
from .util import (
|
9
|
+
HUMAN_APPROVED,
|
10
|
+
HUMAN_ESCALATED,
|
11
|
+
HUMAN_REJECTED,
|
12
|
+
HUMAN_TERMINATED,
|
13
|
+
render_tool_approval,
|
14
|
+
)
|
15
|
+
|
16
|
+
|
17
|
+
def console_approval(
|
18
|
+
message: str, view: ToolCallView, choices: list[ApprovalDecision]
|
19
|
+
) -> Approval:
|
20
|
+
with input_screen(width=None) as console:
|
21
|
+
console.print(
|
22
|
+
transcript_panel(
|
23
|
+
title="Approve Tool", content=render_tool_approval(message, view)
|
24
|
+
)
|
25
|
+
)
|
26
|
+
|
27
|
+
# provide choices
|
28
|
+
prompts: dict[str, str] = {}
|
29
|
+
for choice in choices:
|
30
|
+
prompts[choice[0]] = f"{choice.capitalize()} ({choice[0]})"
|
31
|
+
values = list(prompts.values())
|
32
|
+
prompt = ", ".join(values[:-1])
|
33
|
+
prompt = f"{prompt}, or {values[-1]}"
|
34
|
+
|
35
|
+
def render_approval(approval: Approval) -> Approval:
|
36
|
+
console.print(f"Decision: {approval.decision.capitalize()}")
|
37
|
+
return approval
|
38
|
+
|
39
|
+
while True:
|
40
|
+
decision = Prompt.ask(
|
41
|
+
prompt=prompt,
|
42
|
+
console=console,
|
43
|
+
choices=list(prompts.keys()),
|
44
|
+
default="a",
|
45
|
+
).lower()
|
46
|
+
|
47
|
+
if decision == "a":
|
48
|
+
return render_approval(
|
49
|
+
Approval(decision="approve", explanation=HUMAN_APPROVED)
|
50
|
+
)
|
51
|
+
elif decision == "r":
|
52
|
+
return render_approval(
|
53
|
+
Approval(decision="reject", explanation=HUMAN_REJECTED)
|
54
|
+
)
|
55
|
+
elif decision == "t":
|
56
|
+
return render_approval(
|
57
|
+
Approval(decision="terminate", explanation=HUMAN_TERMINATED)
|
58
|
+
)
|
59
|
+
elif decision == "e":
|
60
|
+
return render_approval(
|
61
|
+
Approval(decision="escalate", explanation=HUMAN_ESCALATED)
|
62
|
+
)
|