inspect-ai 0.3.61__py3-none-any.whl → 0.3.63__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (430) hide show
  1. inspect_ai/_cli/eval.py +13 -0
  2. inspect_ai/_cli/main.py +1 -1
  3. inspect_ai/_cli/trace.py +8 -0
  4. inspect_ai/_cli/view.py +4 -0
  5. inspect_ai/_display/core/active.py +2 -3
  6. inspect_ai/_display/textual/widgets/transcript.py +15 -9
  7. inspect_ai/_eval/eval.py +4 -4
  8. inspect_ai/_eval/evalset.py +6 -6
  9. inspect_ai/_eval/task/error.py +10 -14
  10. inspect_ai/_eval/task/run.py +13 -8
  11. inspect_ai/_util/hash.py +1 -1
  12. inspect_ai/_util/transcript.py +11 -0
  13. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  14. inspect_ai/_view/www/.vscode/settings.json +8 -0
  15. inspect_ai/_view/www/App.css +92 -29
  16. inspect_ai/_view/www/dist/assets/index.css +16636 -14674
  17. inspect_ai/_view/www/dist/assets/index.js +43585 -36122
  18. inspect_ai/_view/www/dist/index.html +1 -1
  19. inspect_ai/_view/www/index.html +2 -2
  20. inspect_ai/_view/www/log-schema.json +36 -19
  21. inspect_ai/_view/www/package.json +22 -4
  22. inspect_ai/_view/www/postcss.config.cjs +8 -9
  23. inspect_ai/_view/www/src/{App.mjs → App.tsx} +355 -365
  24. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  25. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  26. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  27. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  28. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  29. inspect_ai/_view/www/src/api/index.ts +4 -4
  30. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  31. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  32. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  33. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  34. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  35. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  36. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  37. inspect_ai/_view/www/src/components/Card.css +60 -0
  38. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  39. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  40. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  41. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  42. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  43. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  45. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  46. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  47. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  48. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  49. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  50. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  51. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  52. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  53. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  54. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  55. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  56. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  57. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  58. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  59. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  60. inspect_ai/_view/www/src/components/LargeModal.tsx +199 -0
  61. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  62. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  63. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  64. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  65. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  66. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  67. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  68. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  69. inspect_ai/_view/www/src/components/NavPills.tsx +99 -0
  70. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  71. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  72. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  73. inspect_ai/_view/www/src/components/TabSet.tsx +200 -0
  74. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  75. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  76. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  77. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  78. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  79. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -7
  80. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  81. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  82. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  83. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  84. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  85. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  86. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  87. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  88. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  89. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  90. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  91. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  92. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  93. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  94. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +309 -0
  95. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  96. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  97. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  98. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  99. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  100. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  101. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  102. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  103. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +326 -0
  104. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  105. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +175 -0
  106. inspect_ai/_view/www/src/samples/SamplesTools.tsx +60 -0
  107. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  108. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  109. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  110. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  111. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  112. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +46 -0
  113. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  114. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  115. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  116. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +143 -0
  117. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  118. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +131 -0
  119. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  120. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +145 -0
  121. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  122. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +86 -0
  123. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  124. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  126. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  127. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +107 -0
  128. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +363 -0
  129. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  130. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  131. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  132. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  133. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  134. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  135. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  136. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  137. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  138. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  139. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  140. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  141. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  142. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  143. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  144. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  145. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  146. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  147. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  148. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  149. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  150. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  151. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  152. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  153. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  154. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  155. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  156. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  157. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  158. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +173 -0
  159. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  160. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +182 -0
  161. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  162. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  163. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  164. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  165. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  166. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  167. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  168. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  169. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  170. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  171. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  172. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  173. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  174. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  175. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  176. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  177. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  179. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  180. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  181. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  182. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +108 -0
  183. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  184. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  185. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  186. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  187. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  188. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  189. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  190. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  191. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +91 -0
  192. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  193. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  194. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  196. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  197. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +38 -0
  198. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  199. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +190 -0
  200. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  201. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  202. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  203. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  204. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  205. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +274 -0
  206. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  207. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  208. inspect_ai/_view/www/src/samples/transcript/state/{StateEventView.mjs → StateEventView.tsx} +148 -110
  209. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  210. inspect_ai/_view/www/src/types/log.d.ts +7 -4
  211. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  212. inspect_ai/_view/www/src/types.ts +71 -0
  213. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +22 -0
  214. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  215. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +95 -0
  216. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  217. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  218. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  219. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  220. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  221. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  222. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  223. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  224. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  225. inspect_ai/_view/www/src/utils/format.ts +194 -0
  226. inspect_ai/_view/www/src/utils/git.ts +7 -0
  227. inspect_ai/_view/www/src/utils/html.ts +6 -0
  228. inspect_ai/_view/www/src/utils/http.ts +14 -0
  229. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  230. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  231. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  232. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  233. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  234. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  235. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  236. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  237. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +160 -0
  238. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  239. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  240. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  241. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  242. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  243. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +113 -0
  244. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +67 -0
  245. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +156 -0
  246. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  247. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +222 -0
  248. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  249. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  250. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  251. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  252. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  253. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  254. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  255. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  256. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  257. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  258. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  259. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +41 -0
  260. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  261. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +61 -0
  262. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +80 -0
  263. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  264. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  265. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  266. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  267. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  268. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  269. inspect_ai/_view/www/tsconfig.json +23 -9
  270. inspect_ai/_view/www/vite.config.js +8 -17
  271. inspect_ai/_view/www/yarn.lock +627 -556
  272. inspect_ai/dataset/_dataset.py +36 -0
  273. inspect_ai/dataset/_sources/csv.py +8 -0
  274. inspect_ai/dataset/_sources/file.py +4 -0
  275. inspect_ai/dataset/_sources/hf.py +11 -1
  276. inspect_ai/dataset/_sources/json.py +8 -0
  277. inspect_ai/log/_log.py +3 -6
  278. inspect_ai/log/_message.py +1 -1
  279. inspect_ai/log/_recorders/eval.py +1 -1
  280. inspect_ai/log/_recorders/json.py +5 -7
  281. inspect_ai/model/_call_tools.py +2 -1
  282. inspect_ai/model/_chat_message.py +27 -0
  283. inspect_ai/model/_conversation.py +10 -3
  284. inspect_ai/model/_generate_config.py +6 -0
  285. inspect_ai/model/_model.py +74 -0
  286. inspect_ai/model/_openai.py +33 -1
  287. inspect_ai/model/_providers/anthropic.py +12 -0
  288. inspect_ai/model/_providers/groq.py +4 -0
  289. inspect_ai/model/_providers/openai.py +21 -9
  290. inspect_ai/model/_providers/openai_o1.py +3 -5
  291. inspect_ai/model/_providers/openrouter.py +86 -0
  292. inspect_ai/model/_providers/providers.py +12 -1
  293. inspect_ai/model/_reasoning.py +17 -0
  294. inspect_ai/scorer/_answer.py +7 -7
  295. inspect_ai/scorer/_classification.py +34 -18
  296. inspect_ai/scorer/_common.py +2 -8
  297. inspect_ai/solver/_basic_agent.py +19 -9
  298. inspect_ai/solver/_multiple_choice.py +24 -9
  299. inspect_ai/tool/__init__.py +2 -0
  300. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +2 -5
  301. inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +4 -0
  302. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  303. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +3 -0
  304. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +61 -0
  305. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +10 -0
  306. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  307. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  308. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  309. inspect_ai/tool/_tools/_execute.py +8 -2
  310. inspect_ai/tool/beta.py +3 -0
  311. inspect_ai/util/_sandbox/docker/docker.py +32 -85
  312. inspect_ai/util/_sandbox/self_check.py +124 -16
  313. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/METADATA +2 -1
  314. inspect_ai-0.3.63.dist-info/RECORD +618 -0
  315. inspect_ai/_view/www/src/Register.mjs +0 -3
  316. inspect_ai/_view/www/src/Types.mjs +0 -38
  317. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  318. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  319. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  320. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  321. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  322. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  323. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  324. inspect_ai/_view/www/src/components/ChatView.mjs +0 -418
  325. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  326. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  327. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  328. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  329. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  330. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  331. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  332. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  333. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  334. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  335. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  336. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  337. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  338. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  339. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  340. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  341. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  342. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  343. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  344. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  345. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  346. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  347. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  348. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  349. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  350. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  351. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  352. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  353. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  354. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  355. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  356. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  357. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  358. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  359. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  360. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  361. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  362. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  363. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  364. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  365. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  366. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  367. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  368. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  369. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  370. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  371. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  372. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  373. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  374. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  375. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  376. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  377. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  378. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  379. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  380. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  381. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  382. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  383. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  384. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  385. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  386. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  387. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  388. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  389. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  390. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  391. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  392. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  393. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  394. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  395. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  396. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  397. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  398. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  399. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  400. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  401. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  402. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  403. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  404. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  405. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  406. inspect_ai/tool/beta/__init__.py +0 -5
  407. inspect_ai-0.3.61.dist-info/RECORD +0 -476
  408. /inspect_ai/{tool/beta/_computer/_resources/tool/__init__.py → _view/www/src/components/MorePopOver.css} +0 -0
  409. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  410. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _view/www/src/workspace/tabs/InfoTab.module.css} +0 -0
  411. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  412. /inspect_ai/tool/{beta → _tools}/_computer/_common.py +0 -0
  413. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  414. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  415. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  416. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  417. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  418. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  419. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  420. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  421. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  422. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  423. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  424. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  425. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  426. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  427. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/LICENSE +0 -0
  428. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/WHEEL +0 -0
  429. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/entry_points.txt +0 -0
  430. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/top_level.txt +0 -0
@@ -27,11 +27,7 @@ from inspect_ai.tool import ToolCall, ToolInfo
27
27
  from .._call_tools import parse_tool_call, tool_parse_error_message
28
28
  from .._model_call import ModelCall
29
29
  from .._model_output import ModelUsage, StopReason, as_stop_reason
30
- from .._providers.util import (
31
- ChatAPIHandler,
32
- ChatAPIMessage,
33
- chat_api_input,
34
- )
30
+ from .._providers.util import ChatAPIHandler, ChatAPIMessage, chat_api_input
35
31
 
36
32
  logger = getLogger(__name__)
37
33
 
@@ -85,6 +81,8 @@ def handle_bad_request(model: str, ex: BadRequestError) -> ModelOutput | Excepti
85
81
  stop_reason: StopReason | None = "model_length"
86
82
  elif ex.code == "invalid_prompt":
87
83
  stop_reason = "content_filter"
84
+ else:
85
+ stop_reason = None
88
86
 
89
87
  if stop_reason:
90
88
  return ModelOutput.from_content(
@@ -0,0 +1,86 @@
1
+ import os
2
+ from typing import Any
3
+
4
+ from typing_extensions import override
5
+
6
+ from inspect_ai._util.error import PrerequisiteError
7
+ from inspect_ai.model._providers.util import model_base_url
8
+ from inspect_ai.model._providers.util.util import environment_prerequisite_error
9
+
10
+ from .._generate_config import GenerateConfig
11
+ from .openai import OpenAIAPI
12
+
13
+ OPENROUTER_API_KEY = "OPENROUTER_API_KEY"
14
+
15
+
16
+ class OpenRouterAPI(OpenAIAPI):
17
+ def __init__(
18
+ self,
19
+ model_name: str,
20
+ base_url: str | None = None,
21
+ api_key: str | None = None,
22
+ config: GenerateConfig = GenerateConfig(),
23
+ **model_args: Any,
24
+ ) -> None:
25
+ # api_key
26
+ if not api_key:
27
+ api_key = os.environ.get(OPENROUTER_API_KEY, None)
28
+ if not api_key:
29
+ raise environment_prerequisite_error("OpenRouter", OPENROUTER_API_KEY)
30
+
31
+ # base_url
32
+ base_url = model_base_url(base_url, "OPENROUTER_BASE_URL")
33
+ base_url = base_url if base_url else "https://openrouter.ai/api/v1"
34
+
35
+ # collect known model args that we forward to generate
36
+ def collect_model_arg(name: str) -> Any | None:
37
+ nonlocal model_args
38
+ value = model_args.get(name, None)
39
+ if value is not None:
40
+ model_args.pop(name)
41
+ return value
42
+
43
+ # models arg
44
+ self.models = collect_model_arg("models")
45
+ if self.models is not None:
46
+ if not isinstance(self.models, list):
47
+ raise PrerequisiteError("models must be a list of strings")
48
+
49
+ # providers arg
50
+ self.provider = collect_model_arg("provider")
51
+ if self.provider is not None:
52
+ if not isinstance(self.provider, dict):
53
+ raise PrerequisiteError("provider must be a dict")
54
+
55
+ # transforms arg
56
+ self.transforms = collect_model_arg("transforms")
57
+ if self.transforms is not None:
58
+ if not isinstance(self.transforms, list):
59
+ raise PrerequisiteError("transforms must be a list of strings")
60
+
61
+ # call super
62
+ super().__init__(
63
+ model_name=model_name,
64
+ base_url=base_url,
65
+ api_key=api_key,
66
+ config=config,
67
+ **model_args,
68
+ )
69
+
70
+ @override
71
+ def completion_params(self, config: GenerateConfig, tools: bool) -> dict[str, Any]:
72
+ # default params
73
+ params = super().completion_params(config, tools)
74
+
75
+ # pass args if specifed
76
+ EXTRA_BODY = "extra_body"
77
+ if self.models or self.provider or self.transforms:
78
+ params[EXTRA_BODY] = params.get(EXTRA_BODY, {})
79
+ if self.models:
80
+ params[EXTRA_BODY]["models"] = self.models
81
+ if self.provider:
82
+ params[EXTRA_BODY]["provider"] = self.provider
83
+ if self.transforms:
84
+ params[EXTRA_BODY]["tranforms"] = self.transforms
85
+
86
+ return params
@@ -16,7 +16,7 @@ from .._registry import modelapi
16
16
  def groq() -> type[ModelAPI]:
17
17
  FEATURE = "Groq API"
18
18
  PACKAGE = "groq"
19
- MIN_VERSION = "0.11.0"
19
+ MIN_VERSION = "0.16.0"
20
20
 
21
21
  # verify we have the package
22
22
  try:
@@ -198,6 +198,17 @@ def ollama() -> type[ModelAPI]:
198
198
  return OllamaAPI
199
199
 
200
200
 
201
+ @modelapi(name="openrouter")
202
+ def openrouter() -> type[ModelAPI]:
203
+ # validate
204
+ validate_openai_client("OpenRouter API")
205
+
206
+ # in the clear
207
+ from .openrouter import OpenRouterAPI
208
+
209
+ return OpenRouterAPI
210
+
211
+
201
212
  @modelapi(name="llama-cpp-python")
202
213
  def llama_cpp_python() -> type[ModelAPI]:
203
214
  # validate
@@ -0,0 +1,17 @@
1
+ import re
2
+ from typing import NamedTuple
3
+
4
+
5
+ class ContentWithReasoning(NamedTuple):
6
+ content: str
7
+ reasoning: str
8
+
9
+
10
+ def parse_content_with_reasoning(content: str) -> ContentWithReasoning | None:
11
+ match = re.match(r"\s*<think>(.*?)</think>(.*)", content, re.DOTALL)
12
+ if match:
13
+ return ContentWithReasoning(
14
+ content=match.group(2).strip(), reasoning=match.group(1).strip()
15
+ )
16
+ else:
17
+ return None
@@ -8,7 +8,7 @@ from inspect_ai._util.pattern import (
8
8
  )
9
9
 
10
10
  from ._metrics import accuracy, stderr
11
- from ._pattern import pattern
11
+ from ._pattern import pattern as make_pattern
12
12
  from ._scorer import Scorer, scorer
13
13
 
14
14
 
@@ -33,7 +33,7 @@ class AnswerPattern(str, Enum):
33
33
 
34
34
 
35
35
  @scorer(metrics=[accuracy(), stderr()])
36
- def answer(type: Literal["letter", "word", "line"]) -> Scorer:
36
+ def answer(pattern: Literal["letter", "word", "line"]) -> Scorer:
37
37
  """Scorer for model output that preceded answers with ANSWER:.
38
38
 
39
39
  Some solvers including multiple_choice solicit answers from
@@ -43,7 +43,7 @@ def answer(type: Literal["letter", "word", "line"]) -> Scorer:
43
43
  Note that you must specify a `type` for the answer scorer.
44
44
 
45
45
  Args:
46
- type: (Literal["letter", "word", "line"]): Type of answer
46
+ pattern: (Literal["letter", "word", "line"]): Type of answer
47
47
  to extract. "letter" is used with multiple choice and
48
48
  extracts a single letter; "word" will extract the next
49
49
  word (often used for yes/no answers); "line" will take
@@ -53,10 +53,10 @@ def answer(type: Literal["letter", "word", "line"]) -> Scorer:
53
53
  with a separate line at the end.
54
54
 
55
55
  """
56
- match type:
56
+ match pattern:
57
57
  case "letter":
58
- return pattern(AnswerPattern.LETTER)
58
+ return make_pattern(AnswerPattern.LETTER)
59
59
  case "word":
60
- return pattern(AnswerPattern.WORD)
60
+ return make_pattern(AnswerPattern.WORD)
61
61
  case "line":
62
- return pattern(AnswerPattern.LINE)
62
+ return make_pattern(AnswerPattern.LINE)
@@ -12,7 +12,7 @@ from ._target import Target
12
12
 
13
13
  @scorer(metrics=[mean(), stderr()])
14
14
  def f1(
15
- answer_fn: Callable[[str], str] | None = None,
15
+ answer_fn: Callable[[str], str] | None = None, stop_words: list[str] | None = None
16
16
  ) -> Scorer:
17
17
  """Scorer which produces an F1 score
18
18
 
@@ -26,7 +26,7 @@ def f1(
26
26
  )
27
27
  targets = target.target
28
28
 
29
- f1_score = max_f1_score(answer, targets)
29
+ f1_score = max_f1_score(answer, targets, stop_words=stop_words)
30
30
  return Score(
31
31
  value=f1_score,
32
32
  answer=answer,
@@ -53,12 +53,14 @@ def exact() -> Scorer:
53
53
  return score
54
54
 
55
55
 
56
- def max_f1_score(answer: str, targets: List[str]) -> float:
56
+ def max_f1_score(
57
+ answer: str, targets: List[str], stop_words: list[str] | None = None
58
+ ) -> float:
57
59
  # Find the maximum F1 score for this answer
58
60
  max_f1 = 0.0
59
61
  for target in targets:
60
62
  if target[0].strip():
61
- f1_score = compute_f1(answer, target)
63
+ f1_score = compute_f1(answer, target, stop_words)
62
64
  max_f1 = max(max_f1, f1_score)
63
65
  return round(max_f1, 2)
64
66
 
@@ -75,18 +77,16 @@ def max_exact_score(answer: str, targets: List[str]) -> float:
75
77
  return max_exact
76
78
 
77
79
 
78
- def compute_f1(answer: str, target: str) -> float:
80
+ def compute_f1(answer: str, target: str, stop_words: list[str] | None = None) -> float:
79
81
  """Takes a predicted answer and a gold answer (that are both either a string or a list of strings), and returns exact match and the SQuAD F1 metric for the prediction."""
80
- answer_words = _to_words(answer)
81
- target_words = _to_words(target)
82
+ answer_words = _to_words(answer, stop_words)
83
+ target_words = _to_words(target, stop_words)
82
84
 
83
85
  return _f1(answer_words=answer_words, target_words=target_words)
84
86
 
85
87
 
86
- def _to_words(
87
- answer: str,
88
- ) -> set[str]:
89
- normalized = _normalize(answer)
88
+ def _to_words(answer: str, stop_words: list[str] | None = None) -> set[str]:
89
+ normalized = _normalize(answer, stop_words)
90
90
  token_bag = set(normalized.split())
91
91
  return token_bag
92
92
 
@@ -147,16 +147,32 @@ def _tokenize(text: str) -> List[str]:
147
147
  return re.split(" |-", text)
148
148
 
149
149
 
150
- def _normalize(answer: str) -> str:
150
+ def _normalize(text: str, stop_words: list[str] | None = None) -> str:
151
151
  """Normalize text to remove extraneous characters and words."""
152
152
  tokens = []
153
- tokenized_answer = _tokenize(answer)
153
+ tokenized_answer = _tokenize(text)
154
+
155
+ # Process stop words, if present
156
+ if stop_words is not None:
157
+ folded_stop_words = [_normalize_token(word) for word in stop_words]
158
+ else:
159
+ folded_stop_words = []
160
+
161
+ # Now process the text
154
162
  for token in tokenized_answer:
155
- token = _remove_punc(token.casefold())
156
- token = _normalize_number(token)
157
- token = _remove_articles(token)
158
- token = _normalize_whitespace(token)
159
- tokens.append(token)
163
+ token = _normalize_token(token)
164
+ if folded_stop_words is None or token not in folded_stop_words:
165
+ tokens.append(token)
166
+
167
+ # re-join the tokens into a normalized string
160
168
  tokens = [token for token in tokens if token.strip()]
161
169
  normalized = " ".join(tokens).strip()
162
170
  return normalized
171
+
172
+
173
+ def _normalize_token(token: str) -> str:
174
+ token = _remove_punc(token.casefold())
175
+ token = _normalize_number(token)
176
+ token = _remove_articles(token)
177
+ token = _normalize_whitespace(token)
178
+ return token
@@ -25,19 +25,13 @@ def str_match_scorer(match: Callable[[str, str], tuple[str, bool]]) -> Scorer:
25
25
  for value in target:
26
26
  answer, matched = match(state.output.completion, value)
27
27
  if matched:
28
- explanation = (
29
- state.output.completion
30
- if state.output.completion != answer
31
- else None
32
- )
33
28
  return Score(
34
29
  value=CORRECT, answer=answer, explanation=state.output.completion
35
30
  )
36
31
 
37
- explanation = (
38
- state.output.completion if state.output.completion != answer else None
32
+ return Score(
33
+ value=INCORRECT, answer=answer, explanation=state.output.completion
39
34
  )
40
- return Score(value=INCORRECT, answer=answer, explanation=explanation)
41
35
 
42
36
  return score
43
37
 
@@ -1,8 +1,9 @@
1
1
  from logging import getLogger
2
- from typing import Callable, cast
2
+ from typing import Awaitable, Callable, cast
3
3
 
4
4
  from typing_extensions import TypedDict, Unpack
5
5
 
6
+ from inspect_ai._util._async import is_callable_coroutine
6
7
  from inspect_ai.model._cache import CachePolicy
7
8
  from inspect_ai.model._call_tools import call_tools
8
9
  from inspect_ai.model._chat_message import ChatMessageTool, ChatMessageUser
@@ -58,7 +59,9 @@ def basic_agent(
58
59
  max_tool_output: int | None = None,
59
60
  score_value: ValueToFloat | None = None,
60
61
  incorrect_message: str
61
- | Callable[[TaskState, list[Score]], str] = DEFAULT_INCORRECT_MESSAGE,
62
+ | Callable[
63
+ [TaskState, list[Score]], str | Awaitable[str]
64
+ ] = DEFAULT_INCORRECT_MESSAGE,
62
65
  continue_message: str = DEFAULT_CONTINUE_MESSAGE,
63
66
  submit_name: str = DEFAULT_SUBMIT_NAME,
64
67
  submit_description: str = DEFAULT_SUBMIT_DESCRIPTION,
@@ -93,8 +96,9 @@ def basic_agent(
93
96
  Defaults to max_tool_output from active GenerateConfig.
94
97
  score_value (ValueToFloat): Function used to extract float from scores (defaults
95
98
  to standard value_to_float())
96
- incorrect_message (str | Callable[[TaskState, list[Score]], str]): User message reply for an
97
- incorrect submission from the model. Alternatively, a function which returns a message.
99
+ incorrect_message (str | Callable[[TaskState, list[Score]], str | Awaitable[str]]):
100
+ User message reply for an incorrect submission from the model. Alternatively,
101
+ a function which returns a message (function may optionally be async)
98
102
  continue_message (str): User message to urge the model to continue when it
99
103
  doesn't make a tool call.
100
104
  submit_name (str): Name for tool used to make submissions
@@ -216,11 +220,17 @@ def basic_agent(
216
220
 
217
221
  # otherwise notify the model that it was incorrect and continue
218
222
  else:
219
- response_message = (
220
- incorrect_message(state, answer_scores)
221
- if callable(incorrect_message)
222
- else incorrect_message
223
- )
223
+ if is_callable_coroutine(incorrect_message):
224
+ response_message: str = await incorrect_message(
225
+ state, answer_scores
226
+ ) # type: ignore[misc,operator]
227
+ elif callable(incorrect_message):
228
+ response_message = cast(
229
+ str, incorrect_message(state, answer_scores)
230
+ )
231
+ else:
232
+ response_message = incorrect_message
233
+
224
234
  state.messages.append(
225
235
  ChatMessageUser(content=response_message)
226
236
  )
@@ -1,13 +1,19 @@
1
+ import logging
1
2
  import re
2
3
  from enum import Enum
3
4
  from random import Random
4
- from typing import Match
5
+ from typing import Match, TypedDict
5
6
 
7
+ from typing_extensions import Unpack
8
+
9
+ from inspect_ai._util.logger import warn_once
6
10
  from inspect_ai.util import resource
7
11
 
8
12
  from ._solver import Generate, Solver, solver
9
13
  from ._task_state import Choices, TaskState
10
14
 
15
+ logger = logging.getLogger(__name__)
16
+
11
17
  SINGLE_ANSWER_TEMPLATE = r"""
12
18
  Answer the following multiple choice question. The entire content of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of {letters}.
13
19
 
@@ -201,13 +207,17 @@ class MultipleChoiceTemplate(str, Enum):
201
207
  MULTIPLE_ANSWER_COT = MULTIPLE_ANSWER_TEMPLATE_COT
202
208
 
203
209
 
210
+ class DeprecatedArgs(TypedDict, total=False):
211
+ shuffle: bool | Random
212
+
213
+
204
214
  @solver
205
215
  def multiple_choice(
206
216
  *,
207
217
  template: str | None = None,
208
218
  cot: bool = False,
209
219
  multiple_correct: bool = False,
210
- shuffle: bool | Random = False,
220
+ **kwargs: Unpack[DeprecatedArgs],
211
221
  ) -> Solver:
212
222
  """Multiple choice question solver.
213
223
 
@@ -223,10 +233,7 @@ def multiple_choice(
223
233
 
224
234
  ### Shuffling
225
235
 
226
- If the choices are shuffled, we will unshuffle them in the message history
227
- after the model has been called, essentially rewriting history. It is
228
- something to be aware of if writing custom scorers or solvers that interact
229
- with this scorer.
236
+ You can shuffle choices when you load your dataset by using the `shuffle_choices` method or parameter of the datasets API.
230
237
 
231
238
  Args:
232
239
  template (str | None): Template to use for the multiple choice question.
@@ -243,10 +250,18 @@ def multiple_choice(
243
250
  squares? A) 3, B) 4, C) 9" has multiple correct answers, B and C. Leave
244
251
  as `False` if there's exactly one correct answer from the choices
245
252
  available. NOTE: this has no effect if you provide a custom template.
246
- shuffle (bool | Random): Default `False`. Whether to shuffle the choices
247
- in the multiple. Passing a `Random` instance will use that for shuffling,
248
- if `True` a new `Random` instance will be created.
253
+ **kwargs (Any): Deprecated arguments for backward compatibility.
249
254
  """
255
+ shuffle: bool | Random = False
256
+ if "shuffle" in kwargs:
257
+ shuffle = kwargs["shuffle"]
258
+
259
+ if shuffle:
260
+ warn_once(
261
+ logger,
262
+ "The multiple choice shuffle parameter is deprecated. Please shuffle choices at the time your dataset is read by using the shuffle_choices method/parameter of the datasets API.",
263
+ )
264
+
250
265
  if template and not valid_template(template):
251
266
  raise ValueError(
252
267
  "The template must contain '{question}' and '{choices}' placeholders for string substitution."
@@ -21,12 +21,14 @@ from ._tool_def import ToolDef
21
21
  from ._tool_info import ToolInfo
22
22
  from ._tool_params import ToolParam, ToolParams
23
23
  from ._tool_with import tool_with
24
+ from ._tools._computer import computer
24
25
  from ._tools._execute import bash, python
25
26
  from ._tools._web_browser import web_browser
26
27
  from ._tools._web_search import web_search
27
28
 
28
29
  __all__ = [
29
30
  "bash",
31
+ "computer",
30
32
  "python",
31
33
  "web_browser",
32
34
  "web_search",
@@ -2,10 +2,7 @@ from typing import Awaitable, Callable
2
2
 
3
3
  from inspect_ai._util.content import Content, ContentImage, ContentText
4
4
  from inspect_ai.tool import Tool, ToolResult, tool
5
- from inspect_ai.tool._tool import (
6
- TOOL_INIT_MODEL_INPUT,
7
- ToolParsingError,
8
- )
5
+ from inspect_ai.tool._tool import TOOL_INIT_MODEL_INPUT, ToolParsingError
9
6
  from inspect_ai.tool._tool_call import ToolCallModelInput
10
7
 
11
8
  from . import _common as common
@@ -84,7 +81,7 @@ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool
84
81
  if coordinate is not None:
85
82
  raise ToolParsingError(f"coordinate is not accepted for {action}")
86
83
  if not isinstance(text, str):
87
- raise ToolParsingError(output=f"{text} must be a string")
84
+ raise ToolParsingError(f"{text} must be a string")
88
85
 
89
86
  if action == "key":
90
87
  return await common.press_key(text, timeout=timeout)
@@ -60,6 +60,10 @@ RUN apt-get install -y \
60
60
  # configure noVNC
61
61
  RUN ln -s /usr/share/novnc/vnc.html /usr/share/novnc/index.html
62
62
 
63
+ # configure python alias
64
+ RUN ln -s /usr/bin/python3 /usr/bin/python
65
+
66
+
63
67
  # We copy requirements.txt by itself so that changes to the scripts will be in a later layer
64
68
  # and we only pip install if requirements.txt changes
65
69
  COPY tool/requirements.txt /opt/inspect/tool/requirements.txt
@@ -0,0 +1,3 @@
1
+ {
2
+ "security.workspace.trust.enabled": false
3
+ }
@@ -0,0 +1,61 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+
3
+ <channel name="xfce4-panel" version="1.0">
4
+ <property name="configver" type="int" value="2"/>
5
+ <property name="panels" type="array">
6
+ <value type="int" value="1"/>
7
+ <property name="dark-mode" type="bool" value="true"/>
8
+ <property name="panel-1" type="empty">
9
+ <property name="position" type="string" value="p=6;x=0;y=0"/>
10
+ <property name="length" type="uint" value="100"/>
11
+ <property name="position-locked" type="bool" value="true"/>
12
+ <property name="icon-size" type="uint" value="16"/>
13
+ <property name="size" type="uint" value="26"/>
14
+ <property name="plugin-ids" type="array">
15
+ <value type="int" value="1"/>
16
+ <value type="int" value="2"/>
17
+ <value type="int" value="3"/>
18
+ <value type="int" value="4"/>
19
+ <value type="int" value="5"/>
20
+ <value type="int" value="6"/>
21
+ <value type="int" value="8"/>
22
+ <value type="int" value="10"/>
23
+ <value type="int" value="11"/>
24
+ <value type="int" value="12"/>
25
+ <value type="int" value="13"/>
26
+ <value type="int" value="14"/>
27
+ </property>
28
+ </property>
29
+ </property>
30
+ <property name="plugins" type="empty">
31
+ <property name="plugin-1" type="string" value="applicationsmenu"/>
32
+ <property name="plugin-2" type="string" value="tasklist">
33
+ <property name="grouping" type="uint" value="1"/>
34
+ </property>
35
+ <property name="plugin-3" type="string" value="separator">
36
+ <property name="expand" type="bool" value="true"/>
37
+ <property name="style" type="uint" value="0"/>
38
+ </property>
39
+ <property name="plugin-4" type="string" value="pager"/>
40
+ <property name="plugin-5" type="string" value="separator">
41
+ <property name="style" type="uint" value="0"/>
42
+ </property>
43
+ <property name="plugin-6" type="string" value="systray">
44
+ <property name="square-icons" type="bool" value="true"/>
45
+ </property>
46
+ <property name="plugin-8" type="string" value="pulseaudio">
47
+ <property name="enable-keyboard-shortcuts" type="bool" value="true"/>
48
+ <property name="show-notifications" type="bool" value="true"/>
49
+ </property>
50
+ <property name="plugin-9" type="string" value="power-manager-plugin"/>
51
+ <property name="plugin-10" type="string" value="notification-plugin"/>
52
+ <property name="plugin-11" type="string" value="separator">
53
+ <property name="style" type="uint" value="0"/>
54
+ </property>
55
+ <property name="plugin-12" type="string" value="clock"/>
56
+ <property name="plugin-13" type="string" value="separator">
57
+ <property name="style" type="uint" value="0"/>
58
+ </property>
59
+ <property name="plugin-14" type="string" value="actions"/>
60
+ </property>
61
+ </channel>
@@ -0,0 +1,10 @@
1
+ [Desktop Entry]
2
+ Version=1.0
3
+ Type=Application
4
+ Name=Terminal
5
+ Comment=Open Terminal
6
+ Exec=/usr/bin/exo-open --launch TerminalEmulator
7
+ Icon=utilities-terminal
8
+ Path=
9
+ Terminal=false
10
+ StartupNotify=false
@@ -138,7 +138,7 @@ class X11Client:
138
138
  if coordinate is not None:
139
139
  raise ToolError(f"coordinate is not accepted for {action}")
140
140
  if not isinstance(text, str):
141
- raise ToolError(output=f"{text} must be a string")
141
+ raise ToolError(f"{text} must be a string")
142
142
 
143
143
  if action == "key":
144
144
  return await self.shell(
@@ -74,8 +74,14 @@ def python(timeout: int | None = None, user: str | None = None) -> Tool:
74
74
  """
75
75
  Use the python function to execute Python code.
76
76
 
77
- The python function will only return you the stdout of the script,
78
- so make sure to use print to see the output.
77
+ The Python tool executes single-run Python scripts. Important notes:
78
+ 1. Each execution is independent - no state is preserved between runs
79
+ 2. You must explicitly use print() statements to see any output
80
+ 3. Simply writing expressions (like in notebooks) will not display results
81
+ 4. The script cannot accept interactive input during execution
82
+ 5. Return statements alone won't produce visible output
83
+ 6. All variables and imports are cleared between executions
84
+ 7. Standard output (via print()) is the only way to see results
79
85
 
80
86
  Args:
81
87
  code (str): The python code to execute.
@@ -0,0 +1,3 @@
1
+ from inspect_ai._util.deprecation import relocated_module_attribute
2
+
3
+ relocated_module_attribute("computer", "inspect_ai.tool.computer", "0.3.62", "0.4")