inspect-ai 0.3.49__py3-none-any.whl → 0.3.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. inspect_ai/_cli/info.py +2 -2
  2. inspect_ai/_cli/log.py +2 -2
  3. inspect_ai/_cli/score.py +2 -2
  4. inspect_ai/_display/core/display.py +19 -0
  5. inspect_ai/_display/core/panel.py +37 -7
  6. inspect_ai/_display/core/progress.py +29 -2
  7. inspect_ai/_display/core/results.py +79 -40
  8. inspect_ai/_display/core/textual.py +21 -0
  9. inspect_ai/_display/rich/display.py +28 -8
  10. inspect_ai/_display/textual/app.py +107 -1
  11. inspect_ai/_display/textual/display.py +1 -1
  12. inspect_ai/_display/textual/widgets/samples.py +132 -91
  13. inspect_ai/_display/textual/widgets/task_detail.py +236 -0
  14. inspect_ai/_display/textual/widgets/tasks.py +74 -6
  15. inspect_ai/_display/textual/widgets/toggle.py +32 -0
  16. inspect_ai/_eval/context.py +2 -0
  17. inspect_ai/_eval/eval.py +4 -3
  18. inspect_ai/_eval/loader.py +1 -1
  19. inspect_ai/_eval/run.py +35 -2
  20. inspect_ai/_eval/task/log.py +13 -11
  21. inspect_ai/_eval/task/results.py +12 -3
  22. inspect_ai/_eval/task/run.py +139 -36
  23. inspect_ai/_eval/task/sandbox.py +2 -1
  24. inspect_ai/_util/_async.py +30 -1
  25. inspect_ai/_util/file.py +31 -4
  26. inspect_ai/_util/html.py +3 -0
  27. inspect_ai/_util/logger.py +6 -5
  28. inspect_ai/_util/platform.py +5 -6
  29. inspect_ai/_util/registry.py +1 -1
  30. inspect_ai/_view/server.py +9 -9
  31. inspect_ai/_view/www/App.css +2 -2
  32. inspect_ai/_view/www/dist/assets/index.css +2 -2
  33. inspect_ai/_view/www/dist/assets/index.js +352 -294
  34. inspect_ai/_view/www/log-schema.json +13 -0
  35. inspect_ai/_view/www/package.json +1 -0
  36. inspect_ai/_view/www/src/components/MessageBand.mjs +1 -1
  37. inspect_ai/_view/www/src/components/Tools.mjs +16 -13
  38. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -3
  39. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +52 -77
  40. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -13
  41. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +15 -2
  42. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +4 -2
  43. inspect_ai/_view/www/src/types/log.d.ts +2 -0
  44. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +2 -0
  45. inspect_ai/_view/www/yarn.lock +9 -4
  46. inspect_ai/approval/__init__.py +1 -1
  47. inspect_ai/approval/_human/approver.py +35 -0
  48. inspect_ai/approval/_human/console.py +62 -0
  49. inspect_ai/approval/_human/manager.py +108 -0
  50. inspect_ai/approval/_human/panel.py +233 -0
  51. inspect_ai/approval/_human/util.py +51 -0
  52. inspect_ai/dataset/_sources/hf.py +2 -2
  53. inspect_ai/dataset/_sources/util.py +1 -1
  54. inspect_ai/log/_file.py +106 -36
  55. inspect_ai/log/_recorders/eval.py +226 -158
  56. inspect_ai/log/_recorders/file.py +9 -6
  57. inspect_ai/log/_recorders/json.py +35 -12
  58. inspect_ai/log/_recorders/recorder.py +15 -15
  59. inspect_ai/log/_samples.py +52 -0
  60. inspect_ai/model/_model.py +14 -0
  61. inspect_ai/model/_model_output.py +4 -0
  62. inspect_ai/model/_providers/azureai.py +1 -1
  63. inspect_ai/model/_providers/hf.py +106 -4
  64. inspect_ai/model/_providers/util/__init__.py +2 -0
  65. inspect_ai/model/_providers/util/hf_handler.py +200 -0
  66. inspect_ai/scorer/_common.py +1 -1
  67. inspect_ai/solver/_plan.py +0 -8
  68. inspect_ai/solver/_task_state.py +18 -1
  69. inspect_ai/solver/_use_tools.py +9 -1
  70. inspect_ai/tool/_tool_def.py +2 -2
  71. inspect_ai/tool/_tool_info.py +14 -2
  72. inspect_ai/tool/_tool_params.py +2 -1
  73. inspect_ai/tool/_tools/_execute.py +1 -1
  74. inspect_ai/tool/_tools/_web_browser/_web_browser.py +6 -0
  75. inspect_ai/util/__init__.py +5 -6
  76. inspect_ai/util/_panel.py +91 -0
  77. inspect_ai/util/_sandbox/__init__.py +2 -6
  78. inspect_ai/util/_sandbox/context.py +4 -3
  79. inspect_ai/util/_sandbox/docker/compose.py +12 -2
  80. inspect_ai/util/_sandbox/docker/docker.py +19 -9
  81. inspect_ai/util/_sandbox/docker/util.py +10 -2
  82. inspect_ai/util/_sandbox/environment.py +47 -41
  83. inspect_ai/util/_sandbox/local.py +15 -10
  84. inspect_ai/util/_subprocess.py +43 -3
  85. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/METADATA +2 -2
  86. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/RECORD +90 -82
  87. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +0 -149
  88. inspect_ai/_view/www/node_modules/flatted/python/test.py +0 -63
  89. inspect_ai/approval/_human.py +0 -123
  90. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/LICENSE +0 -0
  91. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/WHEEL +0 -0
  92. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/entry_points.txt +0 -0
  93. {inspect_ai-0.3.49.dist-info → inspect_ai-0.3.51.dist-info}/top_level.txt +0 -0
@@ -2506,6 +2506,18 @@
2506
2506
  ],
2507
2507
  "default": null
2508
2508
  },
2509
+ "time": {
2510
+ "anyOf": [
2511
+ {
2512
+ "type": "number"
2513
+ },
2514
+ {
2515
+ "type": "null"
2516
+ }
2517
+ ],
2518
+ "default": null,
2519
+ "title": "Time"
2520
+ },
2509
2521
  "metadata": {
2510
2522
  "anyOf": [
2511
2523
  {
@@ -2537,6 +2549,7 @@
2537
2549
  "model",
2538
2550
  "choices",
2539
2551
  "usage",
2552
+ "time",
2540
2553
  "metadata",
2541
2554
  "error"
2542
2555
  ],
@@ -36,6 +36,7 @@
36
36
  "json5": "^2.2.3",
37
37
  "jsondiffpatch": "^0.6.0",
38
38
  "markdown-it": "^14.1.0",
39
+ "murmurhash": "^2.0.1",
39
40
  "postcss-url": "^10.1.3",
40
41
  "preact": "^10.24.3",
41
42
  "prismjs": "^1.29.0"
@@ -8,7 +8,7 @@ export const MessageBand = ({ message, hidden, setHidden, type }) => {
8
8
  const bgColor =
9
9
  type === "info" ? "var(--bs-light)" : "var(--bs-" + type + "-bg-subtle)";
10
10
  const color =
11
- "var(--bs-" + type === "info" ? "secondary" : undefined + "-text-emphasis)";
11
+ "var(--bs-" + (type === "info" ? "secondary" : type) + "-text-emphasis)";
12
12
 
13
13
  return html`
14
14
  <div
@@ -1,6 +1,8 @@
1
1
  // @ts-check
2
2
  /// <reference path="../types/prism.d.ts" />
3
3
  import Prism from "prismjs";
4
+ import murmurhash from "murmurhash";
5
+
4
6
  import "prismjs/components/prism-python";
5
7
  import "prismjs/components/prism-bash";
6
8
  import "prismjs/components/prism-json";
@@ -105,11 +107,11 @@ export const ToolCallView = ({
105
107
  * @param {string} props.type - The function call
106
108
  * @param {string | undefined } props.contents - The main input for this call
107
109
  * @param {Record<string, string>} [props.style] - The style
108
- * @param {import("../types/log").ToolCallContent} props.view - The tool call view
110
+ * @param {import("../types/log").ToolCallContent} [props.view] - The tool call view
109
111
  * @returns {import("preact").JSX.Element | string} The SampleTranscript component.
110
112
  */
111
113
  export const ToolInput = ({ type, contents, view, style }) => {
112
- if (!contents) {
114
+ if (!contents && !view?.content) {
113
115
  return "";
114
116
  }
115
117
 
@@ -133,7 +135,7 @@ export const ToolInput = ({ type, contents, view, style }) => {
133
135
  }
134
136
  }
135
137
  }
136
- }, [toolInputRef.current]);
138
+ }, [contents, view, style]);
137
139
  return html`<${MarkdownDiv}
138
140
  markdown=${view.content}
139
141
  ref=${toolInputRef}
@@ -144,14 +146,15 @@ export const ToolInput = ({ type, contents, view, style }) => {
144
146
  useEffect(() => {
145
147
  const tokens = Prism.languages[type];
146
148
  if (toolInputRef.current && tokens) {
147
- let resolvedContents = contents;
148
- if (typeof contents === "object" || Array.isArray(contents)) {
149
- resolvedContents = JSON.stringify(contents);
150
- }
151
- const html = Prism.highlight(resolvedContents, tokens, type);
152
- toolInputRef.current.innerHTML = html;
149
+ Prism.highlightElement(toolInputRef.current);
153
150
  }
154
- }, [toolInputRef.current, contents, type, view]);
151
+ }, [contents, type, view]);
152
+
153
+ contents =
154
+ typeof contents === "object" || Array.isArray(contents)
155
+ ? JSON.stringify(contents)
156
+ : contents;
157
+ const key = murmurhash.v3(contents);
155
158
 
156
159
  return html`<pre
157
160
  class="tool-output"
@@ -162,9 +165,9 @@ export const ToolInput = ({ type, contents, view, style }) => {
162
165
  ...style,
163
166
  }}
164
167
  >
165
- <code ref=${toolInputRef} class="sourceCode${type
166
- ? ` language-${type}`
167
- : ""}" style=${{
168
+ <code ref=${toolInputRef}
169
+ key=${key}
170
+ class="sourceCode${type ? ` language-${type}` : ""}" style=${{
168
171
  overflowWrap: "anywhere",
169
172
  whiteSpace: "pre-wrap",
170
173
  }}>
@@ -150,7 +150,6 @@ export const SampleDisplay = ({
150
150
  sample=${sample}
151
151
  sampleDescriptor=${sampleDescriptor}
152
152
  scorer=${Object.keys(sample.scores)[0]}
153
- style=${{ paddingLeft: "0.8em", marginTop: "0.4em" }}
154
153
  />
155
154
  </${TabPanel}>`);
156
155
  } else {
@@ -164,7 +163,6 @@ export const SampleDisplay = ({
164
163
  sample=${sample}
165
164
  sampleDescriptor=${sampleDescriptor}
166
165
  scorer=${scorer}
167
- style=${{ paddingLeft: "0.8em", marginTop: "0.4em" }}
168
166
  />
169
167
  </${TabPanel}>`);
170
168
  }
@@ -180,7 +178,7 @@ export const SampleDisplay = ({
180
178
  title="Metadata"
181
179
  onSelected=${onSelectedTab}
182
180
  selected=${selectedTab === kSampleMetdataTabId}>
183
- <div style=${{ display: "flex", flexWrap: "wrap", alignItems: "flex-start", gap: "1em", paddingLeft: "0.8em", marginTop: "1em" }}>
181
+ <div style=${{ display: "flex", flexWrap: "wrap", alignItems: "flex-start", gap: "1em", paddingLeft: "0", marginTop: "0.5em" }}>
184
182
  ${sampleMetadatas}
185
183
  </div>
186
184
  </${TabPanel}>`,
@@ -3,7 +3,8 @@ import { arrayToString, inputString } from "../utils/Format.mjs";
3
3
  import { MarkdownDiv } from "../components/MarkdownDiv.mjs";
4
4
  import { SampleScores } from "./SampleScores.mjs";
5
5
  import { FontSize, TextStyle } from "../appearance/Fonts.mjs";
6
- import { MetaDataView } from "../components/MetaDataView.mjs";
6
+ import { MetaDataGrid } from "../components/MetaDataGrid.mjs";
7
+ import { Card, CardHeader, CardBody } from "../components/Card.mjs";
7
8
 
8
9
  const labelStyle = {
9
10
  paddingRight: "2em",
@@ -22,6 +23,7 @@ export const SampleScoreView = ({
22
23
  if (!sampleDescriptor) {
23
24
  return "";
24
25
  }
26
+
25
27
  const scoreInput = inputString(sample.input);
26
28
  if (sample.choices && sample.choices.length > 0) {
27
29
  scoreInput.push("");
@@ -35,17 +37,21 @@ export const SampleScoreView = ({
35
37
  const scorerDescriptor = sampleDescriptor.scorer(sample, scorer);
36
38
  const explanation = scorerDescriptor.explanation() || "(No Explanation)";
37
39
  const answer = scorerDescriptor.answer();
40
+ const metadata = scorerDescriptor.metadata();
38
41
 
39
42
  return html`
40
- <div
41
- class="container-fluid"
42
- style=${{
43
- paddingTop: "1em",
44
- paddingLeft: "0",
45
- fontSize: FontSize.base,
46
- ...style,
47
- }}
48
- >
43
+ <div
44
+ class="container-fluid"
45
+ style=${{
46
+ marginTop: "0.5em",
47
+ paddingLeft: "0",
48
+ fontSize: FontSize.base,
49
+ ...style,
50
+ }}
51
+ >
52
+ <${Card}>
53
+ <${CardHeader} label="Score"/>
54
+ <${CardBody}>
49
55
  <div>
50
56
  <div style=${{ ...labelStyle }}>Input</div>
51
57
  <div>
@@ -58,7 +64,7 @@ export const SampleScoreView = ({
58
64
 
59
65
  <table
60
66
  class="table"
61
- style=${{ width: "100%", marginBottom: "0", marginTop: "1em" }}
67
+ style=${{ width: "100%", marginBottom: "1em" }}
62
68
  >
63
69
  <thead style=${{ borderBottomColor: "#00000000" }}>
64
70
  <tr>
@@ -114,73 +120,42 @@ export const SampleScoreView = ({
114
120
  </tr>
115
121
  </tbody>
116
122
  </table>
123
+ </${CardBody}>
124
+ </${Card}>
125
+
126
+ ${
127
+ explanation && explanation !== answer
128
+ ? html`
129
+ <${Card}>
130
+ <${CardHeader} label="Explanation"/>
131
+ <${CardBody}>
132
+ <${MarkdownDiv}
133
+ markdown=${arrayToString(explanation)}
134
+ style=${{ paddingLeft: "0" }}
135
+ class="no-last-para-padding"
136
+ />
137
+
138
+ </${CardBody}>
139
+ </${Card}>`
140
+ : ""
141
+ }
117
142
 
118
- ${explanation && explanation !== answer
119
- ? html` <table
120
- class="table"
121
- style=${{ width: "100%", marginBottom: "0" }}
122
- >
123
- <thead>
124
- <tr>
125
- <th
126
- style=${{
127
- paddingBottom: "0",
128
- paddingLeft: "0",
129
- ...labelStyle,
130
- fontWeight: "400",
131
- }}
132
- >
133
- Explanation
134
- </th>
135
- </tr>
136
- </thead>
137
- <tbody>
138
- <tr>
139
- <td style=${{ paddingLeft: "0" }}>
140
- <${MarkdownDiv}
141
- markdown=${arrayToString(explanation)}
142
- style=${{ paddingLeft: "0" }}
143
- class="no-last-para-padding"
144
- />
145
- </td>
146
- </tr>
147
- </tbody>
148
- </table>`
149
- : ""}
150
- ${sample?.score?.metadata &&
151
- Object.keys(sample?.score?.metadata).length > 0
152
- ? html` <table
153
- class="table"
154
- style=${{ width: "100%", marginBottom: "0" }}
155
- >
156
- <thead>
157
- <tr>
158
- <th
159
- style=${{
160
- paddingBottom: "0",
161
- paddingLeft: "0",
162
- ...labelStyle,
163
- fontWeight: "400",
164
- }}
165
- >
166
- Metadata
167
- </th>
168
- </tr>
169
- </thead>
170
- <tbody>
171
- <tr>
172
- <td style=${{ paddingLeft: "0" }}>
173
- <${MetaDataView}
174
- id="task-sample-score-metadata"
175
- classes="tab-pane"
176
- entries="${sample?.score?.metadata}"
177
- style=${{ marginTop: "1em" }}
178
- />
179
- </td>
180
- </tr>
181
- </tbody>
182
- </table>`
183
- : ""}
143
+ ${
144
+ metadata && Object.keys(metadata).length > 0
145
+ ? html`
146
+ <${Card}>
147
+ <${CardHeader} label="Metadata"/>
148
+ <${CardBody}>
149
+ <${MetaDataGrid}
150
+ id="task-sample-score-metadata"
151
+ classes="tab-pane"
152
+ entries="${metadata}"
153
+ style=${{ marginTop: "0" }}
154
+ />
155
+ </${CardBody}>
156
+ </${Card}>`
157
+ : ""
158
+ }
184
159
  </div>
185
160
  `;
186
161
  };
@@ -158,6 +158,23 @@ export const createsSamplesDescriptor = (
158
158
  }
159
159
  return undefined;
160
160
  };
161
+
162
+ // Retrieve the metadata for a sample
163
+ /**
164
+ * @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score
165
+ * @param {string} scorer - the scorer name
166
+ * @returns {Object} The explanation
167
+ */
168
+ const scoreMetadata = (sample, scorer) => {
169
+ if (sample) {
170
+ const sampleScore = score(sample, scorer);
171
+ if (sampleScore && sampleScore.metadata) {
172
+ return sampleScore.metadata;
173
+ }
174
+ }
175
+ return undefined;
176
+ };
177
+
161
178
  const uniqScoreValues = [
162
179
  ...new Set(
163
180
  samples
@@ -280,6 +297,9 @@ export const createsSamplesDescriptor = (
280
297
 
281
298
  const scorerDescriptor = (sample, scorer) => {
282
299
  return {
300
+ metadata: () => {
301
+ return scoreMetadata(sample, scorer);
302
+ },
283
303
  explanation: () => {
284
304
  return scoreExplanation(sample, scorer);
285
305
  },
@@ -300,24 +320,18 @@ export const createsSamplesDescriptor = (
300
320
  });
301
321
  const sampleScorer = sample.scores[scorer];
302
322
  const scoreVal = sampleScorer.value;
323
+
303
324
  if (typeof scoreVal === "object") {
304
325
  const names = Object.keys(scoreVal);
326
+
327
+ // See if this is a dictionary of score names
328
+ // if any of the score names match, treat it
329
+ // as a scorer dictionary
305
330
  if (
306
331
  names.find((name) => {
307
- return !scoreNames.includes(name);
332
+ return scoreNames.includes(name);
308
333
  })
309
334
  ) {
310
- // Since this dictionary contains keys which are not scores
311
- // we just treat it like an opaque dictionary
312
- return [
313
- {
314
- name: scorer,
315
- rendered: () => {
316
- return scoreDescriptor.render(scoreVal);
317
- },
318
- },
319
- ];
320
- } else {
321
335
  // Since this dictionary contains keys which are scores
322
336
  // we actually render the individual scores
323
337
  const scores = names.map((name) => {
@@ -329,6 +343,17 @@ export const createsSamplesDescriptor = (
329
343
  };
330
344
  });
331
345
  return scores;
346
+ } else {
347
+ // Since this dictionary contains keys which are not scores
348
+ // we just treat it like an opaque dictionary
349
+ return [
350
+ {
351
+ name: scorer,
352
+ rendered: () => {
353
+ return scoreDescriptor.render(scoreVal);
354
+ },
355
+ },
356
+ ];
332
357
  }
333
358
  } else {
334
359
  return [
@@ -389,7 +414,7 @@ const scoreCategorizers = [
389
414
  */
390
415
  describe: (values) => {
391
416
  if (
392
- (values.length === 1 || values.length === 2) &&
417
+ values.length === 2 &&
393
418
  values.every((val) => {
394
419
  return val === 1 || val === 0;
395
420
  })
@@ -14,7 +14,11 @@ import { ApplicationIcons } from "../../appearance/Icons.mjs";
14
14
  import { MetaDataGrid } from "../../components/MetaDataGrid.mjs";
15
15
  import { FontSize, TextStyle } from "../../appearance/Fonts.mjs";
16
16
  import { ModelUsagePanel } from "../../usage/UsageCard.mjs";
17
- import { formatDateTime, formatNumber } from "../../utils/Format.mjs";
17
+ import {
18
+ formatDateTime,
19
+ formatNumber,
20
+ formatPrettyDecimal,
21
+ } from "../../utils/Format.mjs";
18
22
 
19
23
  /**
20
24
  * Renders the StateEventView component.
@@ -28,7 +32,16 @@ import { formatDateTime, formatNumber } from "../../utils/Format.mjs";
28
32
  */
29
33
  export const ModelEventView = ({ id, event, style }) => {
30
34
  const totalUsage = event.output.usage?.total_tokens;
31
- const subtitle = totalUsage ? `(${formatNumber(totalUsage)} tokens)` : "";
35
+ const callTime = event.output.time;
36
+
37
+ const subItems = [];
38
+ if (totalUsage) {
39
+ subItems.push(`${formatNumber(totalUsage)} tokens`);
40
+ }
41
+ if (callTime) {
42
+ subItems.push(`${formatPrettyDecimal(callTime)} sec`);
43
+ }
44
+ const subtitle = subItems.length > 0 ? `(${subItems.join(", ")})` : "";
32
45
 
33
46
  // Note: despite the type system saying otherwise, this has appeared empircally
34
47
  // to sometimes be undefined
@@ -154,7 +154,7 @@ export const RenderableChangeTypes = [
154
154
  * @typedef {Object} ToolDefinition
155
155
  * @property {string} name - The name of the tool (e.g., "python").
156
156
  * @property {string} description - A brief description of what the tool does.
157
- * @property {ToolParameters} parameters - An object describing the parameters that the tool accepts.
157
+ * @property {ToolParameters} [parameters] - An object describing the parameters that the tool accepts.
158
158
  */
159
159
 
160
160
  /**
@@ -168,7 +168,9 @@ export const RenderableChangeTypes = [
168
168
  export const Tools = ({ toolDefinitions }) => {
169
169
  return toolDefinitions.map((toolDefinition) => {
170
170
  const toolName = toolDefinition.name;
171
- const toolArgs = Object.keys(toolDefinition.parameters.properties);
171
+ const toolArgs = toolDefinition.parameters?.properties
172
+ ? Object.keys(toolDefinition.parameters.properties)
173
+ : [];
172
174
  return html`<${Tool} toolName=${toolName} toolArgs=${toolArgs} />`;
173
175
  });
174
176
  };
@@ -173,6 +173,7 @@ export type Logprob2 = number;
173
173
  export type Bytes1 = number[] | null;
174
174
  export type Content5 = Logprob[];
175
175
  export type Choices1 = ChatCompletionChoice[];
176
+ export type Time = number | null;
176
177
  export type Metadata4 = {} | null;
177
178
  export type Error = string | null;
178
179
  export type Scores1 = {
@@ -646,6 +647,7 @@ export interface ModelOutput {
646
647
  model: Model1;
647
648
  choices: Choices1;
648
649
  usage: ModelUsage1 | null;
650
+ time: Time;
649
651
  metadata: Metadata4;
650
652
  error: Error;
651
653
  }
@@ -356,6 +356,8 @@ export const WorkSpace = ({
356
356
  logFileName,
357
357
  capabilities,
358
358
  selectedTab,
359
+ setHidden,
360
+ hidden,
359
361
  ]);
360
362
 
361
363
  return html`<${WorkspaceDisplay}
@@ -1120,6 +1120,11 @@ ms@2.1.2:
1120
1120
  resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
1121
1121
  integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==
1122
1122
 
1123
+ murmurhash@^2.0.1:
1124
+ version "2.0.1"
1125
+ resolved "https://registry.yarnpkg.com/murmurhash/-/murmurhash-2.0.1.tgz#4097720e08cf978872194ad84ea5be2dec9b610f"
1126
+ integrity sha512-5vQEh3y+DG/lMPM0mCGPDnyV8chYg/g7rl6v3Gd8WMF9S429ox3Xk8qrk174kWhG767KQMqqxLD1WnGd77hiew==
1127
+
1123
1128
  nanoid@^3.3.7:
1124
1129
  version "3.3.7"
1125
1130
  resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.7.tgz#d0c301a691bc8d54efa0a2226ccf3fe2fd656bd8"
@@ -1202,10 +1207,10 @@ postcss@^8.4.40:
1202
1207
  picocolors "^1.0.1"
1203
1208
  source-map-js "^1.2.0"
1204
1209
 
1205
- preact@^10.24.0:
1206
- version "10.24.2"
1207
- resolved "https://registry.yarnpkg.com/preact/-/preact-10.24.2.tgz#42179771d3b06e7adb884e3f8127ddd3d99b78f6"
1208
- integrity sha512-1cSoF0aCC8uaARATfrlz4VCBqE8LwZwRfLgkxJOQwAlQt6ayTmi0D9OF7nXid1POI5SZidFuG9CnlXbDfLqY/Q==
1210
+ preact@^10.24.3:
1211
+ version "10.25.1"
1212
+ resolved "https://registry.yarnpkg.com/preact/-/preact-10.25.1.tgz#1c4b84253c42dee874bfbf6a92bdce45e3662665"
1213
+ integrity sha512-frxeZV2vhQSohQwJ7FvlqC40ze89+8friponWUFeVEkaCfhC6Eu4V0iND5C9CXz8JLndV07QRDeXzH1+Anz5Og==
1209
1214
 
1210
1215
  prelude-ls@^1.2.1:
1211
1216
  version "1.2.1"
@@ -1,7 +1,7 @@
1
1
  from ._approval import Approval, ApprovalDecision
2
2
  from ._approver import Approver
3
3
  from ._auto import auto_approver
4
- from ._human import human_approver
4
+ from ._human.approver import human_approver
5
5
  from ._policy import ApprovalPolicy
6
6
  from ._registry import approver
7
7
 
@@ -0,0 +1,35 @@
1
+ from inspect_ai.solver._task_state import TaskState
2
+ from inspect_ai.tool._tool_call import ToolCall, ToolCallView
3
+
4
+ from .._approval import Approval, ApprovalDecision
5
+ from .._approver import Approver
6
+ from .._registry import approver
7
+ from .console import console_approval
8
+ from .panel import panel_approval
9
+
10
+
11
+ @approver(name="human")
12
+ def human_approver(
13
+ choices: list[ApprovalDecision] = ["approve", "reject", "terminate"],
14
+ ) -> Approver:
15
+ """Interactive human approver.
16
+
17
+ Returns:
18
+ Approver: Interactive human approver.
19
+ """
20
+
21
+ async def approve(
22
+ message: str,
23
+ call: ToolCall,
24
+ view: ToolCallView,
25
+ state: TaskState | None = None,
26
+ ) -> Approval:
27
+ # try to use the panel approval (available in fullscreen display)
28
+ try:
29
+ return await panel_approval(message, call, view, state, choices)
30
+
31
+ # fallback to plain console approval (available in all displays)
32
+ except NotImplementedError:
33
+ return console_approval(message, view, choices)
34
+
35
+ return approve
@@ -0,0 +1,62 @@
1
+ from rich.prompt import Prompt
2
+
3
+ from inspect_ai._util.transcript import transcript_panel
4
+ from inspect_ai.tool._tool_call import ToolCallView
5
+ from inspect_ai.util._console import input_screen
6
+
7
+ from .._approval import Approval, ApprovalDecision
8
+ from .util import (
9
+ HUMAN_APPROVED,
10
+ HUMAN_ESCALATED,
11
+ HUMAN_REJECTED,
12
+ HUMAN_TERMINATED,
13
+ render_tool_approval,
14
+ )
15
+
16
+
17
+ def console_approval(
18
+ message: str, view: ToolCallView, choices: list[ApprovalDecision]
19
+ ) -> Approval:
20
+ with input_screen(width=None) as console:
21
+ console.print(
22
+ transcript_panel(
23
+ title="Approve Tool", content=render_tool_approval(message, view)
24
+ )
25
+ )
26
+
27
+ # provide choices
28
+ prompts: dict[str, str] = {}
29
+ for choice in choices:
30
+ prompts[choice[0]] = f"{choice.capitalize()} ({choice[0]})"
31
+ values = list(prompts.values())
32
+ prompt = ", ".join(values[:-1])
33
+ prompt = f"{prompt}, or {values[-1]}"
34
+
35
+ def render_approval(approval: Approval) -> Approval:
36
+ console.print(f"Decision: {approval.decision.capitalize()}")
37
+ return approval
38
+
39
+ while True:
40
+ decision = Prompt.ask(
41
+ prompt=prompt,
42
+ console=console,
43
+ choices=list(prompts.keys()),
44
+ default="a",
45
+ ).lower()
46
+
47
+ if decision == "a":
48
+ return render_approval(
49
+ Approval(decision="approve", explanation=HUMAN_APPROVED)
50
+ )
51
+ elif decision == "r":
52
+ return render_approval(
53
+ Approval(decision="reject", explanation=HUMAN_REJECTED)
54
+ )
55
+ elif decision == "t":
56
+ return render_approval(
57
+ Approval(decision="terminate", explanation=HUMAN_TERMINATED)
58
+ )
59
+ elif decision == "e":
60
+ return render_approval(
61
+ Approval(decision="escalate", explanation=HUMAN_ESCALATED)
62
+ )