inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/common.py +7 -3
  3. inspect_ai/_cli/eval.py +17 -2
  4. inspect_ai/_cli/trace.py +21 -2
  5. inspect_ai/_display/core/active.py +4 -3
  6. inspect_ai/_display/core/config.py +3 -3
  7. inspect_ai/_display/core/panel.py +7 -3
  8. inspect_ai/_display/plain/__init__.py +0 -0
  9. inspect_ai/_display/plain/display.py +203 -0
  10. inspect_ai/_display/rich/display.py +4 -9
  11. inspect_ai/_display/textual/app.py +4 -1
  12. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  13. inspect_ai/_display/textual/widgets/samples.py +119 -16
  14. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  15. inspect_ai/_eval/eval.py +32 -20
  16. inspect_ai/_eval/evalset.py +7 -5
  17. inspect_ai/_eval/score.py +1 -0
  18. inspect_ai/_eval/task/__init__.py +2 -2
  19. inspect_ai/_eval/task/images.py +40 -25
  20. inspect_ai/_eval/task/results.py +50 -22
  21. inspect_ai/_eval/task/run.py +180 -124
  22. inspect_ai/_eval/task/sandbox.py +10 -5
  23. inspect_ai/_eval/task/task.py +140 -25
  24. inspect_ai/_util/constants.py +2 -0
  25. inspect_ai/_util/content.py +23 -1
  26. inspect_ai/_util/images.py +20 -17
  27. inspect_ai/_util/kvstore.py +73 -0
  28. inspect_ai/_util/notgiven.py +18 -0
  29. inspect_ai/_util/port_names.py +61 -0
  30. inspect_ai/_util/text.py +23 -0
  31. inspect_ai/_util/thread.py +5 -0
  32. inspect_ai/_view/www/App.css +31 -1
  33. inspect_ai/_view/www/dist/assets/index.css +31 -1
  34. inspect_ai/_view/www/dist/assets/index.js +25375 -1846
  35. inspect_ai/_view/www/log-schema.json +129 -15
  36. inspect_ai/_view/www/package.json +2 -0
  37. inspect_ai/_view/www/src/App.mjs +8 -10
  38. inspect_ai/_view/www/src/Types.mjs +0 -1
  39. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  40. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  41. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  42. inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
  43. inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
  44. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  45. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  46. inspect_ai/_view/www/src/index.js +75 -2
  47. inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
  48. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
  49. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  50. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  51. inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
  52. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  53. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
  54. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
  55. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  56. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +62 -27
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/Json.mjs +12 -6
  76. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
  77. inspect_ai/_view/www/vite.config.js +7 -0
  78. inspect_ai/_view/www/yarn.lock +116 -0
  79. inspect_ai/approval/_human/__init__.py +0 -0
  80. inspect_ai/approval/_human/util.py +2 -2
  81. inspect_ai/approval/_policy.py +12 -6
  82. inspect_ai/dataset/_sources/csv.py +2 -1
  83. inspect_ai/dataset/_sources/json.py +2 -1
  84. inspect_ai/dataset/_sources/util.py +15 -7
  85. inspect_ai/log/_condense.py +11 -1
  86. inspect_ai/log/_log.py +3 -6
  87. inspect_ai/log/_recorders/eval.py +19 -8
  88. inspect_ai/log/_samples.py +26 -5
  89. inspect_ai/log/_transcript.py +32 -2
  90. inspect_ai/model/__init__.py +10 -2
  91. inspect_ai/model/_call_tools.py +59 -12
  92. inspect_ai/model/_chat_message.py +2 -4
  93. inspect_ai/model/_conversation.py +61 -0
  94. inspect_ai/model/_generate_config.py +10 -4
  95. inspect_ai/model/_model.py +117 -18
  96. inspect_ai/model/_model_output.py +7 -2
  97. inspect_ai/model/_providers/anthropic.py +109 -51
  98. inspect_ai/model/_providers/azureai.py +26 -24
  99. inspect_ai/model/_providers/bedrock.py +43 -44
  100. inspect_ai/model/_providers/google.py +121 -58
  101. inspect_ai/model/_providers/groq.py +7 -5
  102. inspect_ai/model/_providers/hf.py +11 -6
  103. inspect_ai/model/_providers/mistral.py +17 -20
  104. inspect_ai/model/_providers/openai.py +32 -21
  105. inspect_ai/model/_providers/openai_o1.py +9 -8
  106. inspect_ai/model/_providers/providers.py +1 -1
  107. inspect_ai/model/_providers/together.py +8 -8
  108. inspect_ai/model/_providers/vertex.py +18 -8
  109. inspect_ai/scorer/__init__.py +13 -2
  110. inspect_ai/scorer/_metrics/__init__.py +2 -2
  111. inspect_ai/scorer/_metrics/std.py +3 -3
  112. inspect_ai/scorer/_reducer/reducer.py +1 -1
  113. inspect_ai/scorer/_scorer.py +2 -2
  114. inspect_ai/solver/__init__.py +2 -5
  115. inspect_ai/solver/_prompt.py +35 -5
  116. inspect_ai/solver/_task_state.py +80 -38
  117. inspect_ai/tool/__init__.py +11 -1
  118. inspect_ai/tool/_tool.py +21 -3
  119. inspect_ai/tool/_tool_call.py +10 -0
  120. inspect_ai/tool/_tool_def.py +16 -5
  121. inspect_ai/tool/_tool_with.py +21 -4
  122. inspect_ai/tool/beta/__init__.py +5 -0
  123. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  124. inspect_ai/tool/beta/_computer/_common.py +133 -0
  125. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  126. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  127. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  128. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  129. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  130. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  131. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  134. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  135. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  136. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  137. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  138. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  139. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  144. inspect_ai/util/__init__.py +2 -3
  145. inspect_ai/util/{_trace.py → _conversation.py} +3 -17
  146. inspect_ai/util/_display.py +14 -4
  147. inspect_ai/util/_limit.py +26 -0
  148. inspect_ai/util/_sandbox/context.py +12 -13
  149. inspect_ai/util/_sandbox/docker/compose.py +24 -11
  150. inspect_ai/util/_sandbox/docker/docker.py +84 -14
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/environment.py +27 -1
  153. inspect_ai/util/_sandbox/local.py +1 -0
  154. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
  155. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
  156. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  157. inspect_ai/model/_trace.py +0 -48
  158. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
  159. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
  160. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
  161. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
@@ -126,10 +126,17 @@ export const formatTime = (seconds) => {
126
126
  return `${seconds} sec`;
127
127
  } else if (seconds < 60 * 60) {
128
128
  return `${Math.floor(seconds / 60)} min ${seconds % 60} sec`;
129
+ } else if (seconds < 60 * 60 * 24) {
130
+ const hours = Math.floor(seconds / (60 * 60));
131
+ const minutes = Math.floor((seconds % (60 * 60)) / 60);
132
+ const remainingSeconds = seconds % 60;
133
+ return `${hours} hr ${minutes} min ${remainingSeconds} sec`;
129
134
  } else {
130
- return `${Math.floor(seconds / (60 * 60 * 24))} days ${Math.floor(
131
- seconds / 60,
132
- )} min ${seconds % 60} sec`;
135
+ const days = Math.floor(seconds / (60 * 60 * 24));
136
+ const hours = Math.floor((seconds % (60 * 60 * 24)) / (60 * 60));
137
+ const minutes = Math.floor((seconds % (60 * 60)) / 60);
138
+ const remainingSeconds = seconds % 60;
139
+ return `${days} days ${hours} hr ${minutes} min ${remainingSeconds} sec`;
133
140
  }
134
141
  };
135
142
 
@@ -1,6 +1,8 @@
1
1
  // @ts-check
2
2
 
3
3
  export const asyncJsonParse = async (text) => {
4
+ const encoder = new TextEncoder();
5
+ const encodedText = encoder.encode(text);
4
6
  const blob = new Blob([kWorkerCode], { type: "application/javascript" });
5
7
  const blobURL = URL.createObjectURL(blob);
6
8
  const worker = new Worker(blobURL);
@@ -17,7 +19,9 @@ export const asyncJsonParse = async (text) => {
17
19
  reject(new Error(error.message));
18
20
  };
19
21
  });
20
- worker.postMessage({ scriptContent: kJson5ScriptBase64, text });
22
+ worker.postMessage({ scriptContent: kJson5ScriptBase64, encodedText }, [
23
+ encodedText.buffer,
24
+ ]);
21
25
  return await result;
22
26
  } finally {
23
27
  worker.terminate();
@@ -28,12 +32,14 @@ export const asyncJsonParse = async (text) => {
28
32
  const kWorkerCode = `
29
33
  self.onmessage = function (e) {
30
34
  eval(atob(e.data.scriptContent));
31
- const text = e.data.text;
35
+ const { encodedText } = e.data;
36
+ const decoder = new TextDecoder();
37
+ const text = decoder.decode(encodedText);
32
38
  try {
33
- const result = JSON5.parse(text);
34
- self.postMessage({ success: true, result });
35
- } catch (error) {
36
- self.postMessage({ success: false, error: error.message });
39
+ const result = JSON.parse(text);
40
+ postMessage({ success: true, result });
41
+ } catch (err) {
42
+ postMessage({ success: false, error: err.message });
37
43
  }
38
44
  };`;
39
45
 
@@ -142,6 +142,8 @@ export const WorkSpace = ({
142
142
  }
143
143
  }, [divRef, task_id]);
144
144
 
145
+ const sampleTabScrollRef = useRef(/** @type {HTMLElement|null} */ (null));
146
+
145
147
  const resolvedTabs = useMemo(() => {
146
148
  // Tabs that are available within the app
147
149
  // Include the tab contents as well as any tools that the tab provides
@@ -154,6 +156,7 @@ export const WorkSpace = ({
154
156
  resolvedTabs.samples = {
155
157
  id: kEvalWorkspaceTabId,
156
158
  scrollable: samples.length === 1,
159
+ scrollRef: sampleTabScrollRef,
157
160
  label: samples?.length > 1 ? "Samples" : "Sample",
158
161
  content: () => {
159
162
  return html` <${SamplesTab}
@@ -178,6 +181,7 @@ export const WorkSpace = ({
178
181
  epoch=${epoch}
179
182
  sampleScrollPositionRef=${sampleScrollPositionRef}
180
183
  setSampleScrollPosition=${setSampleScrollPosition}
184
+ sampleTabScrollRef=${sampleTabScrollRef}
181
185
  />`;
182
186
  },
183
187
  tools: () => {
@@ -368,6 +372,7 @@ export const WorkSpace = ({
368
372
  evalResults=${evalResults}
369
373
  evalStats=${evalStats}
370
374
  samples=${samples}
375
+ evalDescriptor=${samplesDescriptor.evalDescriptor}
371
376
  status=${evalStatus}
372
377
  tabs=${resolvedTabs}
373
378
  selectedTab=${selectedTab}
@@ -386,6 +391,7 @@ const WorkspaceDisplay = ({
386
391
  evalResults,
387
392
  evalStats,
388
393
  samples,
394
+ evalDescriptor,
389
395
  status,
390
396
  showToggle,
391
397
  selectedTab,
@@ -442,6 +448,7 @@ const WorkspaceDisplay = ({
442
448
  onSelected=${onSelected}
443
449
  selected=${selectedTab === tab.id}
444
450
  scrollable=${!!tab.scrollable}
451
+ scrollRef=${tab.scrollRef}
445
452
  scrollPosition=${workspaceTabScrollPositionRef.current[tab.id]}
446
453
  setScrollPosition=${useCallback(
447
454
  (position) => {
@@ -456,20 +463,19 @@ const WorkspaceDisplay = ({
456
463
  }, [tabs]);
457
464
 
458
465
  return html`
459
-
460
-
461
466
  <${Navbar}
462
467
  evalSpec=${evalSpec}
463
468
  evalPlan=${evalPlan}
464
469
  evalResults=${evalResults}
465
470
  evalStats=${evalStats}
466
471
  samples=${samples}
472
+ evalDescriptor=${evalDescriptor}
467
473
  status=${status}
468
474
  file=${logFileName}
469
475
  showToggle=${showToggle}
470
-
476
+
471
477
  offcanvas=${offcanvas}
472
- />
478
+ />
473
479
  <div ref=${divRef} class="workspace" style=${{
474
480
  paddingTop: "0rem",
475
481
  overflowY: "hidden",
@@ -26,4 +26,11 @@ export default defineConfig({
26
26
  css: true,
27
27
  }),
28
28
  ],
29
+ resolve: {
30
+ alias: {
31
+ 'react': 'preact/compat',
32
+ 'react-dom': 'preact/compat',
33
+ 'react-dom/test-utils': 'preact/test-utils'
34
+ }
35
+ }
29
36
  });
@@ -169,6 +169,70 @@
169
169
  "@babel/helper-validator-identifier" "^7.24.7"
170
170
  to-fast-properties "^2.0.0"
171
171
 
172
+ "@codemirror/autocomplete@^6.0.0":
173
+ version "6.18.3"
174
+ resolved "https://registry.yarnpkg.com/@codemirror/autocomplete/-/autocomplete-6.18.3.tgz#f9ea79a2f369662516f71bc0b2f819454d3c8e00"
175
+ integrity sha512-1dNIOmiM0z4BIBwxmxEfA1yoxh1MF/6KPBbh20a5vphGV0ictKlgQsbJs6D6SkR6iJpGbpwRsa6PFMNlg9T9pQ==
176
+ dependencies:
177
+ "@codemirror/language" "^6.0.0"
178
+ "@codemirror/state" "^6.0.0"
179
+ "@codemirror/view" "^6.17.0"
180
+ "@lezer/common" "^1.0.0"
181
+
182
+ "@codemirror/commands@^6.0.0":
183
+ version "6.7.1"
184
+ resolved "https://registry.yarnpkg.com/@codemirror/commands/-/commands-6.7.1.tgz#04561e95bc0779eaa49efd63e916c4efb3bbf6d6"
185
+ integrity sha512-llTrboQYw5H4THfhN4U3qCnSZ1SOJ60ohhz+SzU0ADGtwlc533DtklQP0vSFaQuCPDn3BPpOd1GbbnUtwNjsrw==
186
+ dependencies:
187
+ "@codemirror/language" "^6.0.0"
188
+ "@codemirror/state" "^6.4.0"
189
+ "@codemirror/view" "^6.27.0"
190
+ "@lezer/common" "^1.1.0"
191
+
192
+ "@codemirror/language@^6.0.0":
193
+ version "6.10.6"
194
+ resolved "https://registry.yarnpkg.com/@codemirror/language/-/language-6.10.6.tgz#3770aa55fce575b45b1037b390b576907f0061c7"
195
+ integrity sha512-KrsbdCnxEztLVbB5PycWXFxas4EOyk/fPAfruSOnDDppevQgid2XZ+KbJ9u+fDikP/e7MW7HPBTvTb8JlZK9vA==
196
+ dependencies:
197
+ "@codemirror/state" "^6.0.0"
198
+ "@codemirror/view" "^6.23.0"
199
+ "@lezer/common" "^1.1.0"
200
+ "@lezer/highlight" "^1.0.0"
201
+ "@lezer/lr" "^1.0.0"
202
+ style-mod "^4.0.0"
203
+
204
+ "@codemirror/lint@^6.0.0":
205
+ version "6.8.4"
206
+ resolved "https://registry.yarnpkg.com/@codemirror/lint/-/lint-6.8.4.tgz#7d8aa5d1a6dec89ffcc23ad45ddca2e12e90982d"
207
+ integrity sha512-u4q7PnZlJUojeRe8FJa/njJcMctISGgPQ4PnWsd9268R4ZTtU+tfFYmwkBvgcrK2+QQ8tYFVALVb5fVJykKc5A==
208
+ dependencies:
209
+ "@codemirror/state" "^6.0.0"
210
+ "@codemirror/view" "^6.35.0"
211
+ crelt "^1.0.5"
212
+
213
+ "@codemirror/search@^6.0.0":
214
+ version "6.5.8"
215
+ resolved "https://registry.yarnpkg.com/@codemirror/search/-/search-6.5.8.tgz#b59b3659b46184cc75d6108d7c050a4ca344c3a0"
216
+ integrity sha512-PoWtZvo7c1XFeZWmmyaOp2G0XVbOnm+fJzvghqGAktBW3cufwJUWvSCcNG0ppXiBEM05mZu6RhMtXPv2hpllig==
217
+ dependencies:
218
+ "@codemirror/state" "^6.0.0"
219
+ "@codemirror/view" "^6.0.0"
220
+ crelt "^1.0.5"
221
+
222
+ "@codemirror/state@^6.0.0", "@codemirror/state@^6.4.0":
223
+ version "6.4.1"
224
+ resolved "https://registry.yarnpkg.com/@codemirror/state/-/state-6.4.1.tgz#da57143695c056d9a3c38705ed34136e2b68171b"
225
+ integrity sha512-QkEyUiLhsJoZkbumGZlswmAhA7CBU02Wrz7zvH4SrcifbsqwlXShVXg65f3v/ts57W3dqyamEriMhij1Z3Zz4A==
226
+
227
+ "@codemirror/view@^6.0.0", "@codemirror/view@^6.17.0", "@codemirror/view@^6.23.0", "@codemirror/view@^6.27.0", "@codemirror/view@^6.35.0":
228
+ version "6.35.0"
229
+ resolved "https://registry.yarnpkg.com/@codemirror/view/-/view-6.35.0.tgz#890e8e31a58edf65cdf193049fe9f3fdec20cc82"
230
+ integrity sha512-I0tYy63q5XkaWsJ8QRv5h6ves7kvtrBWjBcnf/bzohFJQc5c14a1AQRdE8QpPF9eMp5Mq2FMm59TCj1gDfE7kw==
231
+ dependencies:
232
+ "@codemirror/state" "^6.4.0"
233
+ style-mod "^4.1.0"
234
+ w3c-keyname "^2.2.4"
235
+
172
236
  "@esbuild/aix-ppc64@0.21.5":
173
237
  version "0.21.5"
174
238
  resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz#c7184a326533fcdf1b8ee0733e21c713b975575f"
@@ -372,6 +436,25 @@
372
436
  "@jridgewell/resolve-uri" "^3.1.0"
373
437
  "@jridgewell/sourcemap-codec" "^1.4.14"
374
438
 
439
+ "@lezer/common@^1.0.0", "@lezer/common@^1.1.0":
440
+ version "1.2.3"
441
+ resolved "https://registry.yarnpkg.com/@lezer/common/-/common-1.2.3.tgz#138fcddab157d83da557554851017c6c1e5667fd"
442
+ integrity sha512-w7ojc8ejBqr2REPsWxJjrMFsA/ysDCFICn8zEOR9mrqzOu2amhITYuLD8ag6XZf0CFXDrhKqw7+tW8cX66NaDA==
443
+
444
+ "@lezer/highlight@^1.0.0":
445
+ version "1.2.1"
446
+ resolved "https://registry.yarnpkg.com/@lezer/highlight/-/highlight-1.2.1.tgz#596fa8f9aeb58a608be0a563e960c373cbf23f8b"
447
+ integrity sha512-Z5duk4RN/3zuVO7Jq0pGLJ3qynpxUVsh7IbUbGj88+uV2ApSAn6kWg2au3iJb+0Zi7kKtqffIESgNcRXWZWmSA==
448
+ dependencies:
449
+ "@lezer/common" "^1.0.0"
450
+
451
+ "@lezer/lr@^1.0.0":
452
+ version "1.4.2"
453
+ resolved "https://registry.yarnpkg.com/@lezer/lr/-/lr-1.4.2.tgz#931ea3dea8e9de84e90781001dae30dea9ff1727"
454
+ integrity sha512-pu0K1jCIdnQ12aWNaAVU5bzi7Bd1w54J3ECgANPmYLtQKP0HBj2cE/5coBD66MT10xbtIuUr7tg0Shbsvk0mDA==
455
+ dependencies:
456
+ "@lezer/common" "^1.0.0"
457
+
375
458
  "@nodelib/fs.scandir@2.1.5":
376
459
  version "2.1.5"
377
460
  resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5"
@@ -619,6 +702,19 @@ clipboard@^2.0.11:
619
702
  select "^1.1.2"
620
703
  tiny-emitter "^2.0.0"
621
704
 
705
+ codemirror@^6.0.1:
706
+ version "6.0.1"
707
+ resolved "https://registry.yarnpkg.com/codemirror/-/codemirror-6.0.1.tgz#62b91142d45904547ee3e0e0e4c1a79158035a29"
708
+ integrity sha512-J8j+nZ+CdWmIeFIGXEFbFPtpiYacFMDR8GlHK3IyHQJMCaVRfGx9NT+Hxivv1ckLWPvNdZqndbr/7lVhrf/Svg==
709
+ dependencies:
710
+ "@codemirror/autocomplete" "^6.0.0"
711
+ "@codemirror/commands" "^6.0.0"
712
+ "@codemirror/language" "^6.0.0"
713
+ "@codemirror/lint" "^6.0.0"
714
+ "@codemirror/search" "^6.0.0"
715
+ "@codemirror/state" "^6.0.0"
716
+ "@codemirror/view" "^6.0.0"
717
+
622
718
  color-convert@^1.9.0:
623
719
  version "1.9.3"
624
720
  resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8"
@@ -653,6 +749,11 @@ convert-source-map@^2.0.0:
653
749
  resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-2.0.0.tgz#4b560f649fc4e918dd0ab75cf4961e8bc882d82a"
654
750
  integrity sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==
655
751
 
752
+ crelt@^1.0.5:
753
+ version "1.0.6"
754
+ resolved "https://registry.yarnpkg.com/crelt/-/crelt-1.0.6.tgz#7cc898ea74e190fb6ef9dae57f8f81cf7302df72"
755
+ integrity sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==
756
+
656
757
  cross-spawn@^7.0.2:
657
758
  version "7.0.3"
658
759
  resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
@@ -885,6 +986,11 @@ file-entry-cache@^8.0.0:
885
986
  dependencies:
886
987
  flat-cache "^4.0.0"
887
988
 
989
+ filtrex@^3.1.0:
990
+ version "3.1.0"
991
+ resolved "https://registry.yarnpkg.com/filtrex/-/filtrex-3.1.0.tgz#5ec00994615ff10e5e09c89bb290c855cb408c21"
992
+ integrity sha512-mHzZ2wUISETF1OaEcNRiGz1ljuIV8c/C9td9qyAZ+wTwigkAk5RO9YrCxQKk5H9v7joDRFIBik9U5RTK9eXZ/A==
993
+
888
994
  find-up@^5.0.0:
889
995
  version "5.0.0"
890
996
  resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc"
@@ -1367,6 +1473,11 @@ strip-json-comments@^3.1.1:
1367
1473
  resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
1368
1474
  integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==
1369
1475
 
1476
+ style-mod@^4.0.0, style-mod@^4.1.0:
1477
+ version "4.1.2"
1478
+ resolved "https://registry.yarnpkg.com/style-mod/-/style-mod-4.1.2.tgz#ca238a1ad4786520f7515a8539d5a63691d7bf67"
1479
+ integrity sha512-wnD1HyVqpJUI2+eKZ+eo1UwghftP6yuFheBqqe+bWCotBjC2K1YnteJILRMs3SM4V/0dLEW1SC27MWP5y+mwmw==
1480
+
1370
1481
  supports-color@^5.3.0:
1371
1482
  version "5.5.0"
1372
1483
  resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f"
@@ -1442,6 +1553,11 @@ vite@^5.3.2:
1442
1553
  optionalDependencies:
1443
1554
  fsevents "~2.3.3"
1444
1555
 
1556
+ w3c-keyname@^2.2.4:
1557
+ version "2.2.8"
1558
+ resolved "https://registry.yarnpkg.com/w3c-keyname/-/w3c-keyname-2.2.8.tgz#7b17c8c6883d4e8b86ac8aba79d39e880f8869c5"
1559
+ integrity sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==
1560
+
1445
1561
  which@^2.0.1:
1446
1562
  version "2.0.2"
1447
1563
  resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1"
File without changes
@@ -5,7 +5,7 @@ from rich.text import Text
5
5
 
6
6
  from inspect_ai._util.transcript import transcript_markdown
7
7
  from inspect_ai.tool._tool_call import ToolCallContent, ToolCallView
8
- from inspect_ai.util._trace import trace_enabled
8
+ from inspect_ai.util._display import display_type
9
9
 
10
10
  HUMAN_APPROVED = "Human operator approved tool call."
11
11
  HUMAN_REJECTED = "Human operator rejected the tool call."
@@ -18,7 +18,7 @@ def render_tool_approval(message: str, view: ToolCallView) -> list[RenderableTyp
18
18
  text_highlighter = ReprHighlighter()
19
19
 
20
20
  # ignore content if trace enabled
21
- message = message.strip() if not trace_enabled() else ""
21
+ message = message.strip() if display_type() != "conversation" else ""
22
22
 
23
23
  def add_view_content(view_content: ToolCallContent) -> None:
24
24
  if view_content.title:
@@ -1,13 +1,13 @@
1
1
  import fnmatch
2
- import re
2
+ import sys
3
3
  from dataclasses import dataclass
4
4
  from pathlib import Path
5
- from re import Pattern
6
5
  from typing import Any, Generator, cast
7
6
 
8
7
  from pydantic import BaseModel, Field, model_validator
9
8
 
10
9
  from inspect_ai._util.config import read_config_object
10
+ from inspect_ai._util.format import format_function_call
11
11
  from inspect_ai._util.registry import registry_create, registry_lookup
12
12
  from inspect_ai.solver._task_state import TaskState
13
13
  from inspect_ai.tool._tool_call import ToolCall, ToolCallView
@@ -30,17 +30,23 @@ def policy_approver(policies: str | list[ApprovalPolicy]) -> Approver:
30
30
  policies = approval_policies_from_config(policies)
31
31
 
32
32
  # compile policy into approvers and regexes for matching
33
- policy_matchers: list[tuple[list[Pattern[str]], Approver]] = []
33
+ policy_matchers: list[tuple[list[str], Approver]] = []
34
34
  for policy in policies:
35
35
  tools = [policy.tools] if isinstance(policy.tools, str) else policy.tools
36
- patterns = [re.compile(fnmatch.translate(tool)) for tool in tools]
37
- policy_matchers.append((patterns, policy.approver))
36
+ globs = [f"{tool}*" for tool in tools]
37
+ policy_matchers.append((globs, policy.approver))
38
38
 
39
39
  # generator for policies that match a tool_call
40
40
  def tool_approvers(tool_call: ToolCall) -> Generator[Approver, None, None]:
41
41
  for policy_matcher in iter(policy_matchers):
42
+ function_call = format_function_call(
43
+ tool_call.function, tool_call.arguments, width=sys.maxsize
44
+ )
42
45
  if any(
43
- [pattern.match(tool_call.function) for pattern in policy_matcher[0]]
46
+ [
47
+ fnmatch.fnmatch(function_call, pattern)
48
+ for pattern in policy_matcher[0]
49
+ ]
44
50
  ):
45
51
  yield policy_matcher[1]
46
52
 
@@ -1,4 +1,5 @@
1
1
  import csv
2
+ import os
2
3
  from io import TextIOWrapper
3
4
  from pathlib import Path
4
5
  from typing import Any
@@ -75,7 +76,7 @@ def csv_dataset(
75
76
  dataset = MemoryDataset(
76
77
  samples=data_to_samples(valid_data, data_to_sample, auto_id),
77
78
  name=name,
78
- location=csv_file,
79
+ location=os.path.abspath(csv_file),
79
80
  )
80
81
 
81
82
  # resolve relative file paths
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import os
2
3
  from io import TextIOWrapper
3
4
  from pathlib import Path
4
5
  from typing import Any, cast
@@ -75,7 +76,7 @@ def json_dataset(
75
76
  dataset = MemoryDataset(
76
77
  samples=data_to_samples(dataset_reader(f), data_to_sample, auto_id),
77
78
  name=name,
78
- location=json_file,
79
+ location=os.path.abspath(json_file),
79
80
  )
80
81
 
81
82
  # resolve relative file paths
@@ -1,6 +1,6 @@
1
1
  from typing import Callable
2
2
 
3
- from inspect_ai._util.content import Content, ContentImage
3
+ from inspect_ai._util.content import Content, ContentAudio, ContentImage, ContentVideo
4
4
  from inspect_ai._util.file import filesystem
5
5
  from inspect_ai.model._chat_message import ChatMessage, ChatMessageUser
6
6
  from inspect_ai.util._sandbox.environment import SandboxEnvironmentSpec
@@ -44,24 +44,28 @@ def resolve_sample_files(dataset: Dataset) -> None:
44
44
  for path in sample.files.keys():
45
45
  sample.files[path] = resolve_file(sample.files[path])
46
46
 
47
+ # check for setup script
48
+ if sample.setup is not None:
49
+ sample.setup = resolve_file(sample.setup)
50
+
47
51
  # check for image paths
48
52
  if not isinstance(sample.input, str):
49
- sample.input = messages_with_resolved_images(sample.input, resolve_file)
53
+ sample.input = messages_with_resolved_content(sample.input, resolve_file)
50
54
 
51
55
 
52
- def messages_with_resolved_images(
56
+ def messages_with_resolved_content(
53
57
  messages: list[ChatMessage], resolver: Callable[[str], str]
54
58
  ) -> list[ChatMessage]:
55
- return [message_with_resolved_image(message, resolver) for message in messages]
59
+ return [message_with_resolved_content(message, resolver) for message in messages]
56
60
 
57
61
 
58
- def message_with_resolved_image(
62
+ def message_with_resolved_content(
59
63
  message: ChatMessage, resolver: Callable[[str], str]
60
64
  ) -> ChatMessage:
61
65
  if isinstance(message, ChatMessageUser) and not isinstance(message.content, str):
62
66
  return ChatMessageUser(
63
67
  content=[
64
- chat_content_with_resolved_image(content, resolver)
68
+ chat_content_with_resolved_content(content, resolver)
65
69
  for content in message.content
66
70
  ],
67
71
  source=message.source,
@@ -70,7 +74,7 @@ def message_with_resolved_image(
70
74
  return message
71
75
 
72
76
 
73
- def chat_content_with_resolved_image(
77
+ def chat_content_with_resolved_content(
74
78
  content: Content, resolver: Callable[[str], str]
75
79
  ) -> Content:
76
80
  if isinstance(content, ContentImage):
@@ -78,5 +82,9 @@ def chat_content_with_resolved_image(
78
82
  image=resolver(content.image),
79
83
  detail=content.detail,
80
84
  )
85
+ elif isinstance(content, ContentAudio):
86
+ return ContentAudio(audio=resolver(content.audio), format=content.format)
87
+ elif isinstance(content, ContentVideo):
88
+ return ContentVideo(video=resolver(content.video), format=content.format)
81
89
  else:
82
90
  return content
@@ -6,7 +6,13 @@ from typing import (
6
6
  from pydantic import JsonValue
7
7
 
8
8
  from inspect_ai._util.constants import BASE_64_DATA_REMOVED
9
- from inspect_ai._util.content import Content, ContentImage, ContentText
9
+ from inspect_ai._util.content import (
10
+ Content,
11
+ ContentAudio,
12
+ ContentImage,
13
+ ContentText,
14
+ ContentVideo,
15
+ )
10
16
  from inspect_ai._util.hash import mm3_hash
11
17
  from inspect_ai._util.json import JsonChange
12
18
  from inspect_ai._util.url import is_data_uri
@@ -304,3 +310,7 @@ def walk_content(content: Content, content_fn: Callable[[str], str]) -> Content:
304
310
  return content.model_copy(update=dict(text=content_fn(content.text)))
305
311
  elif isinstance(content, ContentImage):
306
312
  return content.model_copy(update=dict(image=content_fn(content.image)))
313
+ elif isinstance(content, ContentAudio):
314
+ return content.model_copy(update=dict(audio=content_fn(content.audio)))
315
+ elif isinstance(content, ContentVideo):
316
+ return content.model_copy(update=dict(video=content_fn(content.video)))
inspect_ai/log/_log.py CHANGED
@@ -48,9 +48,6 @@ class EvalConfig(BaseModel):
48
48
  epochs_reducer: list[str] | None = Field(default=None)
49
49
  """Reducers for aggregating per-sample scores."""
50
50
 
51
- trace: bool | None = Field(default=None)
52
- """Trace message interactions with evaluated model to terminal."""
53
-
54
51
  approval: ApprovalPolicyConfig | None = Field(default=None)
55
52
  """Approval policy for tool use."""
56
53
 
@@ -117,7 +114,7 @@ class EvalConfig(BaseModel):
117
114
 
118
115
 
119
116
  class EvalSampleLimit(BaseModel):
120
- type: Literal["context", "time", "message", "token", "operator"]
117
+ type: Literal["context", "time", "message", "token", "operator", "custom"]
121
118
  """The type of limit"""
122
119
 
123
120
  limit: int
@@ -355,7 +352,7 @@ class EvalResults(BaseModel):
355
352
  """Scorer used to compute results (deprecated)."""
356
353
  warn_once(
357
354
  logger,
358
- "The 'scorer' field is deprecated. Use 'scorers' instead.",
355
+ "The 'scorer' field is deprecated. Use 'scores' instead.",
359
356
  )
360
357
  return self.scores[0] if self.scores else None
361
358
 
@@ -364,7 +361,7 @@ class EvalResults(BaseModel):
364
361
  """Metrics computed (deprecated)."""
365
362
  warn_once(
366
363
  logger,
367
- "The 'metrics' field is deprecated. Access metrics through 'scorers' instead.",
364
+ "The 'metrics' field is deprecated. Access metrics through 'scores' instead.",
368
365
  )
369
366
  return self.scores[0].metrics if self.scores else {}
370
367
 
@@ -13,7 +13,12 @@ from pydantic_core import to_json
13
13
  from typing_extensions import override
14
14
 
15
15
  from inspect_ai._util.constants import LOG_SCHEMA_VERSION
16
- from inspect_ai._util.content import ContentImage, ContentText
16
+ from inspect_ai._util.content import (
17
+ ContentAudio,
18
+ ContentImage,
19
+ ContentText,
20
+ ContentVideo,
21
+ )
17
22
  from inspect_ai._util.error import EvalError
18
23
  from inspect_ai._util.file import FileSystem, async_fileystem, dirname, file, filesystem
19
24
  from inspect_ai._util.json import jsonable_python
@@ -90,9 +95,11 @@ class EvalRecorder(FileRecorder):
90
95
  self.data: dict[str, ZipLogFile] = {}
91
96
 
92
97
  @override
93
- async def log_init(self, eval: EvalSpec, location: str | None = None) -> str:
98
+ async def log_init(
99
+ self, eval: EvalSpec, location: str | None = None, *, clean: bool = False
100
+ ) -> str:
94
101
  # if the file exists then read summaries
95
- if location is not None and self.fs.exists(location):
102
+ if not clean and location is not None and self.fs.exists(location):
96
103
  with file(location, "rb") as f:
97
104
  with ZipFile(f, "r") as zip:
98
105
  log_start = _read_start(zip)
@@ -229,7 +236,7 @@ class EvalRecorder(FileRecorder):
229
236
  async def write_log(cls, location: str, log: EvalLog) -> None:
230
237
  # write using the recorder (so we get all of the extra streams)
231
238
  recorder = EvalRecorder(dirname(location))
232
- await recorder.log_init(log.eval, location)
239
+ await recorder.log_init(log.eval, location, clean=True)
233
240
  await recorder.log_start(log.eval, log.plan)
234
241
  for sample in log.samples or []:
235
242
  await recorder.log_sample(log.eval, sample)
@@ -244,12 +251,16 @@ def text_inputs(inputs: str | list[ChatMessage]) -> str | list[ChatMessage]:
244
251
  input: list[ChatMessage] = []
245
252
  for message in inputs:
246
253
  if not isinstance(message.content, str):
247
- filtered_content: list[ContentText | ContentImage] = []
254
+ filtered_content: list[
255
+ ContentText | ContentImage | ContentAudio | ContentVideo
256
+ ] = []
248
257
  for content in message.content:
249
- if content.type != "image":
258
+ if content.type == "text":
250
259
  filtered_content.append(content)
251
- if len(filtered_content) == 0:
252
- filtered_content.append(ContentText(text="(Image)"))
260
+ else:
261
+ filtered_content.append(
262
+ ContentText(text=f"({content.type.capitalize()})")
263
+ )
253
264
  message.content = filtered_content
254
265
  input.append(message)
255
266
  else:
@@ -29,7 +29,7 @@ class ActiveSample:
29
29
  sandboxes: dict[str, SandboxConnection],
30
30
  ) -> None:
31
31
  self.id = uuid()
32
- self.started = datetime.now().timestamp()
32
+ self.started: float | None = None
33
33
  self.completed: float | None = None
34
34
  self.task = task
35
35
  self.model = model
@@ -48,10 +48,15 @@ class ActiveSample:
48
48
 
49
49
  @property
50
50
  def execution_time(self) -> float:
51
- completed = (
52
- self.completed if self.completed is not None else datetime.now().timestamp()
53
- )
54
- return completed - self.started
51
+ if self.started is not None:
52
+ completed = (
53
+ self.completed
54
+ if self.completed is not None
55
+ else datetime.now().timestamp()
56
+ )
57
+ return completed - self.started
58
+ else:
59
+ return 0
55
60
 
56
61
  def interrupt(self, action: Literal["score", "error"]) -> None:
57
62
  self._interrupt_action = action
@@ -108,6 +113,14 @@ def sample_active() -> ActiveSample | None:
108
113
  return _sample_active.get(None)
109
114
 
110
115
 
116
+ def active_sample_token_limit() -> int | None:
117
+ active = sample_active()
118
+ if active:
119
+ return active.token_limit
120
+ else:
121
+ return None
122
+
123
+
111
124
  def set_active_sample_token_limit(token_limit: int | None) -> None:
112
125
  active = sample_active()
113
126
  if active:
@@ -120,6 +133,14 @@ def set_active_sample_total_tokens(total_tokens: int) -> None:
120
133
  active.total_tokens = total_tokens
121
134
 
122
135
 
136
+ def active_sample_message_limit() -> int | None:
137
+ active = sample_active()
138
+ if active:
139
+ return active.message_limit
140
+ else:
141
+ return None
142
+
143
+
123
144
  def set_active_sample_message_limit(message_limit: int | None) -> None:
124
145
  active = sample_active()
125
146
  if active: