inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (518) hide show
  1. inspect_ai/_cli/cache.py +8 -7
  2. inspect_ai/_cli/common.py +0 -12
  3. inspect_ai/_cli/eval.py +32 -4
  4. inspect_ai/_cli/info.py +1 -0
  5. inspect_ai/_cli/list.py +1 -1
  6. inspect_ai/_cli/log.py +2 -0
  7. inspect_ai/_cli/main.py +1 -1
  8. inspect_ai/_cli/sandbox.py +4 -1
  9. inspect_ai/_cli/score.py +181 -32
  10. inspect_ai/_cli/trace.py +10 -0
  11. inspect_ai/_cli/view.py +4 -2
  12. inspect_ai/_display/core/active.py +2 -3
  13. inspect_ai/_display/core/config.py +7 -1
  14. inspect_ai/_display/textual/widgets/samples.py +4 -3
  15. inspect_ai/_display/textual/widgets/sandbox.py +6 -0
  16. inspect_ai/_eval/eval.py +104 -101
  17. inspect_ai/_eval/evalset.py +75 -75
  18. inspect_ai/_eval/loader.py +122 -12
  19. inspect_ai/_eval/registry.py +1 -1
  20. inspect_ai/_eval/run.py +14 -0
  21. inspect_ai/_eval/score.py +125 -36
  22. inspect_ai/_eval/task/log.py +105 -4
  23. inspect_ai/_eval/task/results.py +92 -38
  24. inspect_ai/_eval/task/run.py +9 -2
  25. inspect_ai/_eval/task/sandbox.py +35 -2
  26. inspect_ai/_eval/task/task.py +49 -46
  27. inspect_ai/_util/constants.py +1 -1
  28. inspect_ai/_util/content.py +8 -0
  29. inspect_ai/_util/error.py +2 -0
  30. inspect_ai/_util/file.py +15 -1
  31. inspect_ai/_util/hash.py +1 -1
  32. inspect_ai/_util/logger.py +4 -2
  33. inspect_ai/_util/registry.py +7 -1
  34. inspect_ai/_view/view.py +1 -2
  35. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  36. inspect_ai/_view/www/.vscode/settings.json +8 -0
  37. inspect_ai/_view/www/App.css +97 -29
  38. inspect_ai/_view/www/README.md +1 -1
  39. inspect_ai/_view/www/dist/assets/index.css +16663 -14674
  40. inspect_ai/_view/www/dist/assets/index.js +58808 -51348
  41. inspect_ai/_view/www/dist/index.html +1 -1
  42. inspect_ai/_view/www/index.html +2 -2
  43. inspect_ai/_view/www/log-schema.json +87 -73
  44. inspect_ai/_view/www/package.json +22 -4
  45. inspect_ai/_view/www/postcss.config.cjs +8 -9
  46. inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
  47. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  48. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  49. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  50. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  51. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  52. inspect_ai/_view/www/src/api/index.ts +4 -4
  53. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  54. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  55. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  56. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  57. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  58. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  59. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  60. inspect_ai/_view/www/src/components/Card.css +60 -0
  61. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  62. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  63. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  64. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  65. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  66. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  67. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  68. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  69. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  70. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  71. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  72. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  73. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  74. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  75. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  76. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  77. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  78. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  79. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  80. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  81. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  82. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  83. inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
  84. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  85. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  86. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  87. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  88. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  89. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  90. inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
  91. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  92. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  93. inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
  94. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  95. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  96. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  97. inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
  98. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  99. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  100. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  101. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  102. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  103. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
  104. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  105. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  106. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  107. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  108. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  109. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  110. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  111. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  112. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  113. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  114. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  115. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  116. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  117. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  118. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
  119. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  120. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  121. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  122. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  123. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  124. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  126. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  127. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
  128. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  129. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
  130. inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
  131. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  132. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  133. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  134. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  135. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  136. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
  137. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  138. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  139. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  140. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
  141. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  142. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
  143. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  144. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
  145. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  146. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
  147. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  148. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
  149. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  150. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  151. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
  152. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
  153. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  154. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  155. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  156. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  157. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  158. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  159. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  160. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  161. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  162. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  163. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  164. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  165. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  166. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  167. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  168. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  169. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  170. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  171. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  172. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  173. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  174. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  175. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  176. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  177. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  179. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  180. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  181. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  182. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
  183. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  184. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
  185. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  186. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  187. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  188. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  189. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  190. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  191. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  192. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  193. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  194. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  196. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  197. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  198. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  199. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  200. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  201. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  202. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  203. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  204. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  205. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  206. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
  207. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  208. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  209. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  210. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  211. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  212. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  213. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  214. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  215. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
  216. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  217. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  218. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  219. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  220. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  221. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
  222. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  223. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
  224. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  225. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  226. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  227. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  228. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  229. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
  230. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  231. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  232. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
  233. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  234. inspect_ai/_view/www/src/types/log.d.ts +108 -19
  235. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  236. inspect_ai/_view/www/src/types.ts +71 -0
  237. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
  238. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  239. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
  240. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  241. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  242. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  243. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  244. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  245. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  246. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  247. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  248. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  249. inspect_ai/_view/www/src/utils/format.ts +194 -0
  250. inspect_ai/_view/www/src/utils/git.ts +7 -0
  251. inspect_ai/_view/www/src/utils/html.ts +6 -0
  252. inspect_ai/_view/www/src/utils/http.ts +14 -0
  253. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  254. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  255. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  256. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  257. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  258. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  259. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  260. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  261. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
  262. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  263. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  264. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  265. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  266. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  267. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
  268. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
  269. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
  270. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  271. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
  272. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  273. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  274. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  275. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  276. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  277. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  278. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  279. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  280. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  281. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  282. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  283. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
  284. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  285. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
  286. inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
  287. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
  288. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  289. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  290. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  291. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  292. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  293. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  294. inspect_ai/_view/www/src/workspace/utils.ts +34 -0
  295. inspect_ai/_view/www/tsconfig.json +23 -9
  296. inspect_ai/_view/www/vite.config.js +8 -17
  297. inspect_ai/_view/www/yarn.lock +627 -556
  298. inspect_ai/approval/_approval.py +2 -0
  299. inspect_ai/approval/_approver.py +4 -4
  300. inspect_ai/approval/_auto.py +1 -1
  301. inspect_ai/approval/_human/approver.py +3 -0
  302. inspect_ai/approval/_policy.py +5 -0
  303. inspect_ai/approval/_registry.py +2 -2
  304. inspect_ai/dataset/_dataset.py +64 -37
  305. inspect_ai/dataset/_sources/__init__.py +0 -0
  306. inspect_ai/dataset/_sources/csv.py +20 -12
  307. inspect_ai/dataset/_sources/file.py +4 -0
  308. inspect_ai/dataset/_sources/hf.py +39 -29
  309. inspect_ai/dataset/_sources/json.py +17 -9
  310. inspect_ai/log/__init__.py +2 -0
  311. inspect_ai/log/_convert.py +3 -3
  312. inspect_ai/log/_file.py +24 -9
  313. inspect_ai/log/_log.py +101 -13
  314. inspect_ai/log/_message.py +4 -2
  315. inspect_ai/log/_recorders/file.py +4 -0
  316. inspect_ai/log/_recorders/json.py +5 -7
  317. inspect_ai/log/_recorders/recorder.py +3 -0
  318. inspect_ai/log/_transcript.py +19 -8
  319. inspect_ai/model/__init__.py +2 -0
  320. inspect_ai/model/_cache.py +39 -21
  321. inspect_ai/model/_call_tools.py +4 -3
  322. inspect_ai/model/_chat_message.py +14 -4
  323. inspect_ai/model/_generate_config.py +1 -1
  324. inspect_ai/model/_model.py +31 -24
  325. inspect_ai/model/_model_output.py +14 -1
  326. inspect_ai/model/_openai.py +10 -18
  327. inspect_ai/model/_providers/anthropic.py +3 -3
  328. inspect_ai/model/_providers/google.py +9 -5
  329. inspect_ai/model/_providers/openai.py +5 -9
  330. inspect_ai/model/_providers/openai_o1.py +3 -5
  331. inspect_ai/model/_providers/openrouter.py +86 -0
  332. inspect_ai/model/_providers/providers.py +11 -0
  333. inspect_ai/scorer/__init__.py +6 -1
  334. inspect_ai/scorer/_answer.py +7 -7
  335. inspect_ai/scorer/_classification.py +38 -18
  336. inspect_ai/scorer/_common.py +2 -8
  337. inspect_ai/scorer/_match.py +4 -5
  338. inspect_ai/scorer/_metric.py +87 -28
  339. inspect_ai/scorer/_metrics/__init__.py +3 -3
  340. inspect_ai/scorer/_metrics/accuracy.py +8 -10
  341. inspect_ai/scorer/_metrics/mean.py +3 -17
  342. inspect_ai/scorer/_metrics/std.py +111 -30
  343. inspect_ai/scorer/_model.py +12 -12
  344. inspect_ai/scorer/_pattern.py +3 -3
  345. inspect_ai/scorer/_reducer/reducer.py +36 -21
  346. inspect_ai/scorer/_reducer/registry.py +2 -2
  347. inspect_ai/scorer/_reducer/types.py +7 -1
  348. inspect_ai/scorer/_score.py +11 -1
  349. inspect_ai/scorer/_scorer.py +110 -16
  350. inspect_ai/solver/__init__.py +1 -1
  351. inspect_ai/solver/_basic_agent.py +19 -22
  352. inspect_ai/solver/_bridge/__init__.py +0 -3
  353. inspect_ai/solver/_bridge/bridge.py +3 -3
  354. inspect_ai/solver/_chain.py +1 -2
  355. inspect_ai/solver/_critique.py +3 -3
  356. inspect_ai/solver/_fork.py +2 -2
  357. inspect_ai/solver/_human_agent/__init__.py +0 -0
  358. inspect_ai/solver/_human_agent/agent.py +5 -8
  359. inspect_ai/solver/_human_agent/commands/clock.py +14 -10
  360. inspect_ai/solver/_human_agent/commands/note.py +1 -1
  361. inspect_ai/solver/_human_agent/commands/score.py +0 -11
  362. inspect_ai/solver/_multiple_choice.py +38 -26
  363. inspect_ai/solver/_prompt.py +7 -7
  364. inspect_ai/solver/_solver.py +53 -52
  365. inspect_ai/solver/_task_state.py +80 -69
  366. inspect_ai/solver/_use_tools.py +9 -9
  367. inspect_ai/tool/__init__.py +4 -1
  368. inspect_ai/tool/_tool.py +43 -14
  369. inspect_ai/tool/_tool_call.py +6 -2
  370. inspect_ai/tool/_tool_choice.py +3 -1
  371. inspect_ai/tool/_tool_def.py +10 -8
  372. inspect_ai/tool/_tool_params.py +24 -0
  373. inspect_ai/tool/_tool_with.py +7 -7
  374. inspect_ai/tool/_tools/__init__.py +0 -0
  375. inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
  376. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
  377. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  378. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  379. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  380. inspect_ai/tool/_tools/_execute.py +23 -11
  381. inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
  382. inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
  383. inspect_ai/tool/_tools/_web_search.py +7 -5
  384. inspect_ai/tool/beta.py +3 -0
  385. inspect_ai/util/_concurrency.py +3 -3
  386. inspect_ai/util/_panel.py +2 -0
  387. inspect_ai/util/_resource.py +12 -12
  388. inspect_ai/util/_sandbox/docker/compose.py +23 -20
  389. inspect_ai/util/_sandbox/docker/config.py +2 -1
  390. inspect_ai/util/_sandbox/docker/docker.py +42 -86
  391. inspect_ai/util/_sandbox/docker/service.py +100 -0
  392. inspect_ai/util/_sandbox/environment.py +99 -96
  393. inspect_ai/util/_sandbox/self_check.py +124 -16
  394. inspect_ai/util/_subprocess.py +5 -3
  395. inspect_ai/util/_subtask.py +15 -16
  396. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
  397. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
  398. inspect_ai-0.3.64.dist-info/RECORD +625 -0
  399. inspect_ai/_view/www/src/Register.mjs +0 -3
  400. inspect_ai/_view/www/src/Types.mjs +0 -38
  401. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  402. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  403. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  404. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  405. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  406. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  407. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  408. inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
  409. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  410. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  411. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  412. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  413. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  414. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  415. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  416. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  417. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  418. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  419. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  420. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  421. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  422. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  423. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  424. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  425. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  426. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  427. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  428. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  429. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  430. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  431. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  432. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  433. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  434. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  435. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  436. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  437. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  438. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  439. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  440. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  441. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  442. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  443. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  444. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  445. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  446. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  447. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  448. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  449. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  450. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  451. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  452. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  453. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  454. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  455. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  456. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  457. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  458. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  459. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  460. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  461. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  462. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  463. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  464. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  465. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  466. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  467. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  468. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  469. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  470. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  471. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  472. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  473. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  474. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  475. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  476. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  477. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
  478. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  479. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  480. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  481. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  482. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  483. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  484. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  485. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  486. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  487. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  488. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  489. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  490. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  491. inspect_ai/tool/beta/__init__.py +0 -5
  492. inspect_ai-0.3.62.dist-info/RECORD +0 -481
  493. /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
  494. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
  495. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  496. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  497. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  498. /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
  499. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  500. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  501. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  502. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  503. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  504. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  505. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  506. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
  507. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
  508. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  509. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  510. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
  511. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  512. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  513. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  514. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  515. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  516. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
  517. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
  518. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -123,11 +123,11 @@ async def computer_sandbox() -> SandboxEnvironment:
123
123
  else:
124
124
  raise PrerequisiteError(
125
125
  dedent("""
126
- The computer tool service was not found in any of the sandboxes for this sample. Please add the computer tool service to your configuration. For example, the following Docker compose file uses the aisiuk/inspect-computer-tool:latest-beta image as its default sandbox:
126
+ The computer tool service was not found in any of the sandboxes for this sample. Please add the computer tool service to your configuration. For example, the following Docker compose file uses the aisiuk/inspect-computer-tool:latest image as its default sandbox:
127
127
 
128
128
  services:
129
129
  default:
130
- image: "aisiuk/inspect-computer-tool:latest-beta"
130
+ image: "aisiuk/inspect-computer-tool:latest"
131
131
  init: true
132
132
  """).strip()
133
133
  )
@@ -2,10 +2,7 @@ from typing import Awaitable, Callable
2
2
 
3
3
  from inspect_ai._util.content import Content, ContentImage, ContentText
4
4
  from inspect_ai.tool import Tool, ToolResult, tool
5
- from inspect_ai.tool._tool import (
6
- TOOL_INIT_MODEL_INPUT,
7
- ToolParsingError,
8
- )
5
+ from inspect_ai.tool._tool import TOOL_INIT_MODEL_INPUT, ToolParsingError
9
6
  from inspect_ai.tool._tool_call import ToolCallModelInput
10
7
 
11
8
  from . import _common as common
@@ -16,6 +13,17 @@ ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
16
13
 
17
14
  @tool
18
15
  def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool:
16
+ """Desktop computer tool.
17
+
18
+ See documentation at <https://inspect.ai-safety-institute.org.uk/tools.html#sec-computer>.
19
+
20
+ Args:
21
+ max_screenshots: The maximum number of screenshots to play
22
+ back to the model as input. Defaults to 1 (set to `None` to have no limit).
23
+ timeout: Timeout in seconds for computer tool actions.
24
+ Defaults to 180 (set to `None` for no timeout).
25
+ """
26
+
19
27
  async def execute(
20
28
  action: Action,
21
29
  text: str | None = None,
@@ -84,7 +92,7 @@ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool
84
92
  if coordinate is not None:
85
93
  raise ToolParsingError(f"coordinate is not accepted for {action}")
86
94
  if not isinstance(text, str):
87
- raise ToolParsingError(output=f"{text} must be a string")
95
+ raise ToolParsingError(f"{text} must be a string")
88
96
 
89
97
  if action == "key":
90
98
  return await common.press_key(text, timeout=timeout)
@@ -138,7 +138,7 @@ class X11Client:
138
138
  if coordinate is not None:
139
139
  raise ToolError(f"coordinate is not accepted for {action}")
140
140
  if not isinstance(text, str):
141
- raise ToolError(output=f"{text} must be a string")
141
+ raise ToolError(f"{text} must be a string")
142
142
 
143
143
  if action == "key":
144
144
  return await self.shell(
@@ -1,4 +1,4 @@
1
- from inspect_ai.util import sandbox
1
+ from inspect_ai.util import sandbox as sandbox_env
2
2
 
3
3
  from .._tool import Tool, tool
4
4
  from .._tool_call import ToolCall, ToolCallContent, ToolCallView, ToolCallViewer
@@ -20,14 +20,17 @@ def code_viewer(language: str, code_param: str) -> ToolCallViewer:
20
20
 
21
21
 
22
22
  @tool(viewer=code_viewer("bash", "cmd"))
23
- def bash(timeout: int | None = None, user: str | None = None) -> Tool:
23
+ def bash(
24
+ timeout: int | None = None, user: str | None = None, sandbox: str | None = None
25
+ ) -> Tool:
24
26
  """Bash shell command execution tool.
25
27
 
26
28
  Execute bash shell commands using a sandbox environment (e.g. "docker").
27
29
 
28
30
  Args:
29
- timeout (int | None): Timeout (in seconds) for command.
30
- user (str | None): User to execute commands as.
31
+ timeout: Timeout (in seconds) for command.
32
+ user: User to execute commands as.
33
+ sandbox: Optional sandbox environmnent name.
31
34
 
32
35
  Returns:
33
36
  String with command output (stdout) or command error (stderr).
@@ -44,7 +47,7 @@ def bash(timeout: int | None = None, user: str | None = None) -> Tool:
44
47
  The output of the command.
45
48
  """
46
49
  # execute the command
47
- result = await sandbox().exec(
50
+ result = await sandbox_env(sandbox).exec(
48
51
  cmd=["bash", "--login", "-c", cmd], timeout=timeout, user=user
49
52
  )
50
53
  # return output (including stderr if any)
@@ -57,14 +60,17 @@ def bash(timeout: int | None = None, user: str | None = None) -> Tool:
57
60
 
58
61
 
59
62
  @tool(viewer=code_viewer("python", "code"))
60
- def python(timeout: int | None = None, user: str | None = None) -> Tool:
63
+ def python(
64
+ timeout: int | None = None, user: str | None = None, sandbox: str | None = None
65
+ ) -> Tool:
61
66
  """Python code execution tool.
62
67
 
63
68
  Execute Python code using a sandbox environment (e.g. "docker").
64
69
 
65
70
  Args:
66
- timeout (int | None): Timeout (in seconds) for command.
67
- user (str | None): User to execute commands as.
71
+ timeout: Timeout (in seconds) for command.
72
+ user: User to execute commands as.
73
+ sandbox: Optional sandbox environmnent name.
68
74
 
69
75
  Returns:
70
76
  String with command output (stdout) or command error (stderr).
@@ -74,8 +80,14 @@ def python(timeout: int | None = None, user: str | None = None) -> Tool:
74
80
  """
75
81
  Use the python function to execute Python code.
76
82
 
77
- The python function will only return you the stdout of the script,
78
- so make sure to use print to see the output.
83
+ The Python tool executes single-run Python scripts. Important notes:
84
+ 1. Each execution is independent - no state is preserved between runs
85
+ 2. You must explicitly use print() statements to see any output
86
+ 3. Simply writing expressions (like in notebooks) will not display results
87
+ 4. The script cannot accept interactive input during execution
88
+ 5. Return statements alone won't produce visible output
89
+ 6. All variables and imports are cleared between executions
90
+ 7. Standard output (via print()) is the only way to see results
79
91
 
80
92
  Args:
81
93
  code (str): The python code to execute.
@@ -83,7 +95,7 @@ def python(timeout: int | None = None, user: str | None = None) -> Tool:
83
95
  Returns:
84
96
  The output of the Python code.
85
97
  """
86
- result = await sandbox().exec(
98
+ result = await sandbox_env(sandbox).exec(
87
99
  cmd=["python3"], input=code, timeout=timeout, user=user
88
100
  )
89
101
  # return output (including stderr if any)
@@ -40,7 +40,7 @@ The result will be printed out in _stdout_ in the following format:
40
40
  error: <an ERROR message if one occured>
41
41
  info: <general info about the container>
42
42
  web_url: <the URL of the page the browser is currently at>
43
- wen_at: <accessibility tree of the visible elements of the page>
43
+ web_at: <accessibility tree of the visible elements of the page>
44
44
  ```
45
45
 
46
46
 
@@ -57,7 +57,7 @@ The tool consists of the following components:
57
57
  * _web_environment.py_ - an environment which gets instantiated by the servicer and which launches the browser, stores its state and maps client commands to Playwright API.
58
58
  * _playwright_crawler.py_ - a wrapper over the sync Playwright API.
59
59
 
60
- * [WebClient](web_client.py) - a simple stateless client to interract with the server. When launched, the client:
60
+ * [WebClient](web_client.py) - a simple stateless client to interact with the server. When launched, the client:
61
61
  1. creates a connection with the server;
62
62
  2. sends user command to the server;
63
63
  3. receives the response in the form of observations and prints them to stdout;
@@ -16,10 +16,12 @@ from inspect_ai.util._store_model import StoreModel, store_as
16
16
  def web_browser(interactive: bool = True) -> list[Tool]:
17
17
  """Tools used for web browser navigation.
18
18
 
19
+ See documentation at <https://inspect.ai-safety-institute.org.uk/tools.html#sec-web-browser>.
20
+
19
21
  Args:
20
- interactive (bool): Provide interactive tools (enable
21
- clicking, typing, and submitting forms). Defaults
22
- to True.
22
+ interactive: Provide interactive tools (enable
23
+ clicking, typing, and submitting forms). Defaults
24
+ to True.
23
25
 
24
26
  Returns:
25
27
  List of tools used for web browser navigation.
@@ -41,14 +41,16 @@ def web_search(
41
41
  A web search is conducted using the specified provider, the results are parsed for relevance
42
42
  using the specified model, and the top 'num_results' relevant pages are returned.
43
43
 
44
+ See further documentation at <https://inspect.ai-safety-institute.org.uk/tools.html#sec-web-search>.
45
+
44
46
  Args:
45
- provider (Literal["google"]): Search provider (defaults to "google", currently
47
+ provider: Search provider (defaults to "google", currently
46
48
  the only provider). Possible future providers include "brave" and "bing".
47
- num_results (int): Number of web search result pages to return to the model.
48
- max_provider_calls (int): Maximum number of search calls to make to the search provider.
49
- max_connections (int): Maximum number of concurrent connections to API
49
+ num_results: Number of web search result pages to return to the model.
50
+ max_provider_calls: Maximum number of search calls to make to the search provider.
51
+ max_connections: Maximum number of concurrent connections to API
50
52
  endpoint of search provider.
51
- model (str | Model): Model used to parse web pages for relevance.
53
+ model: Model used to parse web pages for relevance.
52
54
 
53
55
  Returns:
54
56
  A tool that can be registered for use by models to search the web.
@@ -0,0 +1,3 @@
1
+ from inspect_ai._util.deprecation import relocated_module_attribute
2
+
3
+ relocated_module_attribute("computer", "inspect_ai.tool.computer", "0.3.62", "0.4")
@@ -23,12 +23,12 @@ def concurrency(
23
23
  for launching subprocesses is handled via the `subprocess` function.
24
24
 
25
25
  Args:
26
- name (str): Name for concurrency context. This serves as the
26
+ name: Name for concurrency context. This serves as the
27
27
  display name for the context, and also the unique context
28
28
  key (if the `key` parameter is omitted)
29
- concurrency (int): Maximum number of coroutines that can
29
+ concurrency: Maximum number of coroutines that can
30
30
  enter the context.
31
- key (str | None): Unique context key for this context. Optional.
31
+ key: Unique context key for this context. Optional.
32
32
  Used if the unique key isn't human readable -- e.g. includes
33
33
  api tokens or account ids so that the more readable `name`
34
34
  can be presented to users e.g in console UI>
inspect_ai/util/_panel.py CHANGED
@@ -5,6 +5,8 @@ from typing_extensions import Self
5
5
 
6
6
 
7
7
  class InputPanel(Container):
8
+ """Base class for for Inspect input panels."""
9
+
8
10
  DEFAULT_TITLE = "Panel"
9
11
 
10
12
  DEFAULT_CLASSES = "task-input-panel"
@@ -33,18 +33,18 @@ def resource(
33
33
  `resource("templates/prompt.txt", type="file")`
34
34
 
35
35
  Args:
36
- resource (str): Path to local or remote (e.g. s3://)
37
- resource, or for `type="auto"` (the default),
38
- a string containing the literal resource value.
39
- type (Literal["auto", "file"]): For "auto" (the default),
40
- interpret the resource as a literal string if its not
41
- a valid path. For "file", always interpret it as
42
- a file path.
43
- fs_options (dict[str, Any]): Optional. Additional
44
- arguments to pass through to the `fsspec` filesystem
45
- provider (e.g. `S3FileSystem`). Use `{"anon": True }`
46
- if you are accessing a public S3 bucket with no
47
- credentials.
36
+ resource: Path to local or remote (e.g. s3://)
37
+ resource, or for `type="auto"` (the default),
38
+ a string containing the literal resource value.
39
+ type: For "auto" (the default),
40
+ interpret the resource as a literal string if its not
41
+ a valid path. For "file", always interpret it as
42
+ a file path.
43
+ fs_options: Optional. Additional
44
+ arguments to pass through to the `fsspec` filesystem
45
+ provider (e.g. `S3FileSystem`). Use `{"anon": True }`
46
+ if you are accessing a public S3 bucket with no
47
+ credentials.
48
48
 
49
49
  Returns:
50
50
  Text content of resource.
@@ -3,12 +3,13 @@ import os
3
3
  import shlex
4
4
  from logging import getLogger
5
5
  from pathlib import Path
6
- from typing import Any, Literal, TypedDict, cast
6
+ from typing import Any, Literal, cast
7
7
 
8
8
  import yaml
9
9
  from pydantic import BaseModel
10
10
 
11
11
  from inspect_ai._util.error import PrerequisiteError
12
+ from inspect_ai._util.trace import trace_message
12
13
  from inspect_ai.util._display import display_type
13
14
  from inspect_ai.util._subprocess import ExecResult, subprocess
14
15
 
@@ -16,26 +17,39 @@ from .prereqs import (
16
17
  DOCKER_COMPOSE_REQUIRED_VERSION_PULL_POLICY,
17
18
  validate_docker_compose,
18
19
  )
20
+ from .service import ComposeService, services_healthcheck_time
19
21
  from .util import ComposeProject, is_inspect_project
20
22
 
21
23
  logger = getLogger(__name__)
22
24
 
23
25
  # How long to wait for compose environment to pass a health check
24
- COMPOSE_WAIT = "120"
26
+ COMPOSE_WAIT = 120
25
27
 
26
28
 
27
- async def compose_up(project: ComposeProject) -> None:
29
+ async def compose_up(
30
+ project: ComposeProject, services: dict[str, ComposeService]
31
+ ) -> None:
32
+ # compute the maximum amount of time we will
33
+ up_command = ["up", "--detach", "--wait"]
34
+
35
+ # are there healthchecks in the service definitions? if so then peg our timeout
36
+ # at the maximum total wait time. otherwise, pick a reasonable default
37
+ healthcheck_time = services_healthcheck_time(services)
38
+ if healthcheck_time > 0:
39
+ timeout: int = healthcheck_time
40
+ trace_message(logger, "Docker", "Docker services heathcheck timeout: {timeout}")
41
+ else:
42
+ timeout = COMPOSE_WAIT
43
+
44
+ # align global wait timeout to maximum healthcheck timeout
45
+ up_command.extend(["--wait-timeout", str(timeout + 1)])
46
+
28
47
  # Start the environment. Note that we don't check the result because docker will
29
48
  # return a non-zero exit code for services that exit (even successfully) when
30
49
  # passing the --wait flag (see https://github.com/docker/compose/issues/10596).
31
50
  # In practice, we will catch any errors when calling compose_check_running()
32
51
  # immediately after we call compose_up().
33
- await compose_command(
34
- ["up", "--detach", "--wait", "--wait-timeout", COMPOSE_WAIT],
35
- project=project,
36
- # wait up to 5 minutes for container to go up (compose wait + 3 minutes)
37
- timeout=300,
38
- )
52
+ await compose_command(up_command, project=project, timeout=timeout)
39
53
 
40
54
 
41
55
  async def compose_down(project: ComposeProject, quiet: bool = True) -> None:
@@ -191,17 +205,6 @@ async def compose_exec(
191
205
  )
192
206
 
193
207
 
194
- ComposeService = TypedDict(
195
- "ComposeService",
196
- {
197
- "image": str | None,
198
- "build": str | None,
199
- "x-default": bool | None,
200
- "x-local": bool | None,
201
- },
202
- )
203
-
204
-
205
208
  async def compose_services(project: ComposeProject) -> dict[str, ComposeService]:
206
209
  result = await compose_command(["config"], project=project, timeout=60)
207
210
  if not result.success:
@@ -42,7 +42,8 @@ def find_compose_file(parent: str = "") -> str | None:
42
42
 
43
43
 
44
44
  def is_dockerfile(file: str) -> bool:
45
- return os.path.basename(file) == DOCKERFILE
45
+ path = Path(file)
46
+ return path.name == DOCKERFILE or path.suffix == f".{DOCKERFILE}"
46
47
 
47
48
 
48
49
  def has_dockerfile(parent: str = "") -> bool:
@@ -1,3 +1,4 @@
1
+ import base64
1
2
  import errno
2
3
  import json
3
4
  import os
@@ -8,6 +9,7 @@ from typing import Literal, Union, cast, overload
8
9
 
9
10
  from typing_extensions import override
10
11
 
12
+ from inspect_ai._util.error import PrerequisiteError
11
13
  from inspect_ai.util._subprocess import ExecResult, subprocess
12
14
 
13
15
  from ..environment import (
@@ -34,7 +36,6 @@ from .compose import (
34
36
  compose_build,
35
37
  compose_check_running,
36
38
  compose_cleanup_images,
37
- compose_command,
38
39
  compose_cp,
39
40
  compose_exec,
40
41
  compose_ps,
@@ -85,6 +86,14 @@ class DockerSandboxEnvironment(SandboxEnvironment):
85
86
 
86
87
  services = await compose_services(project)
87
88
  for name, service in services.items():
89
+ # if the service has an explicit container_name then
90
+ # error (as this won't work w/ epochs > 1)
91
+ container_name = service.get("container_name", None)
92
+ if container_name:
93
+ raise PrerequisiteError(
94
+ f"ERROR: Docker service '{name}' includes an explicitly configured container_name ('{container_name}'). This is not permitted, as container names should be provisioned by Docker compose and an explicit container_name will not work with epochs > 1."
95
+ )
96
+
88
97
  # build internal images
89
98
  image = service.get("image", None)
90
99
  if image and is_internal_image(image):
@@ -139,7 +148,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
139
148
  services = await compose_services(project)
140
149
 
141
150
  # start the services
142
- await compose_up(project)
151
+ await compose_up(project, services)
143
152
 
144
153
  # check to ensure that the services are running
145
154
  running_services = await compose_check_running(
@@ -270,103 +279,50 @@ class DockerSandboxEnvironment(SandboxEnvironment):
270
279
 
271
280
  @override
272
281
  async def write_file(self, file: str, contents: str | bytes) -> None:
273
- # exec function w/ timeout
274
- async def exec(cmd: list[str]) -> ExecResult[str]:
275
- return await self.exec(cmd, timeout=60)
276
-
277
282
  # resolve relative file paths
278
283
  file = self.container_file(file)
279
284
 
280
- # We want to be able to write a file in the container,
281
- # but only if the container's user would be allowed to do that.
282
- # We need to avoid implicitly trusting the provided "file" string.
283
- # For example, it shouldn't be passed as part of a shell command,
284
- # because of the risk of shell injection.
285
-
286
- local_tmpfile = tempfile.NamedTemporaryFile()
285
+ # ensure that the directory exists
286
+ parent = Path(file).parent.as_posix()
287
+ if parent != ".":
288
+ result = await self.exec(["mkdir", "-p", parent])
289
+ if not result.success:
290
+ msg = f"Failed to create container directory {parent}: {result.stderr}"
291
+ raise RuntimeError(msg)
287
292
 
288
- # write contents into a local tmp file (not in the container)
293
+ # write the file
289
294
  if isinstance(contents, str):
290
- local_tmpfile.write(contents.encode("utf-8"))
295
+ result = await self.exec(
296
+ ["sh", "-e", "-c", 'tee -- "$1"', "write_file_script", file],
297
+ input=contents,
298
+ )
291
299
  else:
292
- local_tmpfile.write(contents)
293
-
294
- local_tmpfile.flush()
295
-
296
- # Copy the local tmp file into a tmp file on the container.
297
- # Both tmp files have safe names as we created them ourselves
298
-
299
- # We write the tmp file in the default directory,
300
- # because of strangeness with /tmp on GitHub action runners.
301
-
302
- # We are reusing the generated local tmp file name within
303
- # the sandbox to save on a container roundtrip. There is a very slight
304
- # risk of collision if another write_file call happens
305
- # to get the same local tmp file name. But we assume tmp file
306
- # names have enough randomness for us to ignore that.
307
-
308
- container_tmpfile = (
309
- f".tmp_inspect_sandbox_{os.path.basename(local_tmpfile.name)}"
310
- )
311
-
312
- # compose cp will leave the file owned by root
313
- await compose_cp(
314
- src=local_tmpfile.name,
315
- dest=f"{self._service}:{self.container_file(container_tmpfile)}",
316
- project=self._project,
317
- )
318
-
319
- local_tmpfile.close() # this will also delete the file
320
-
321
- if not hasattr(self, "_docker_user"):
322
- uid = (await exec(["id", "-u"])).stdout.strip()
323
- gid = (await exec(["id", "-g"])).stdout.strip()
324
- self._docker_user = (uid, gid)
325
-
326
- await compose_command(
327
- [
328
- "exec",
329
- "--user",
330
- "root",
331
- self._service,
332
- "chown",
333
- f"{self._docker_user[0]}:{self._docker_user[1]}",
334
- container_tmpfile,
335
- ],
336
- project=self._project,
337
- timeout=60,
338
- )
339
-
340
- parent = PurePosixPath(file).parent
341
-
342
- # We do these steps in a shell script for efficiency to avoid round-trips to docker.
343
- res_cp = await exec(
344
- [
345
- "sh",
346
- "-e",
347
- "-c",
348
- 'mkdir -p -- "$1"; cp -T -- "$2" "$3"; rm -- "$2"',
349
- "copy_script",
350
- str(parent),
351
- container_tmpfile,
352
- file,
353
- ]
354
- )
355
-
356
- if res_cp.returncode != 0:
357
- if "Permission denied" in res_cp.stderr:
358
- ls_result = await exec(["ls", "-la", "."])
359
- error_string = f"Permission was denied. Error details: {res_cp.stderr}; ls -la: {ls_result.stdout}; {self._docker_user=}"
300
+ base64_contents = base64.b64encode(contents).decode("US-ASCII")
301
+ result = await self.exec(
302
+ [
303
+ "sh",
304
+ "-e",
305
+ "-c",
306
+ 'base64 -d | tee -- "$1" > /dev/null',
307
+ "write_file_script",
308
+ file,
309
+ ],
310
+ input=base64_contents,
311
+ )
312
+ if result.returncode != 0:
313
+ if "permission denied" in result.stderr.casefold():
314
+ ls_result = await self.exec(["ls", "-la", "."])
315
+ error_string = f"Permission was denied. Error details: {result.stderr}; ls -la: {ls_result.stdout}"
360
316
  raise PermissionError(error_string)
361
317
  elif (
362
- "cannot overwrite directory" in res_cp.stderr
363
- or "is a directory" in res_cp.stderr
318
+ "cannot overwrite directory" in result.stderr.casefold()
319
+ or "is a directory" in result.stderr.casefold()
364
320
  ):
365
321
  raise IsADirectoryError(
366
322
  f"Failed to write file: {file} because it is a directory already"
367
323
  )
368
324
  else:
369
- raise RuntimeError(f"failed to copy during write_file: {res_cp}")
325
+ raise RuntimeError(f"failed to copy during write_file: {result}")
370
326
 
371
327
  @overload
372
328
  async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
@@ -0,0 +1,100 @@
1
+ import re
2
+ from dataclasses import dataclass
3
+ from typing import TypedDict
4
+
5
+
6
+ class ComposeServiceHealthcheck(TypedDict, total=False):
7
+ start_period: str
8
+ interval: str
9
+ retries: int
10
+ timeout: str
11
+
12
+
13
+ ComposeService = TypedDict(
14
+ "ComposeService",
15
+ {
16
+ "image": str,
17
+ "build": str,
18
+ "container_name": str,
19
+ "x-default": bool,
20
+ "x-local": bool,
21
+ "healthcheck": ComposeServiceHealthcheck,
22
+ },
23
+ total=False,
24
+ )
25
+
26
+
27
+ def services_healthcheck_time(services: dict[str, ComposeService]) -> int:
28
+ max_time = 0
29
+
30
+ for _, service in services.items():
31
+ service_time = service_healthcheck_time(service)
32
+ max_time = max(max_time, service_time)
33
+
34
+ return max_time
35
+
36
+
37
+ def service_healthcheck_time(service: ComposeService) -> int:
38
+ """
39
+ Calculate the maximum time a single service's healthcheck could take.
40
+
41
+ The total time is:
42
+ (retries * (interval + timeout))
43
+
44
+ Default values (from Docker documentation):
45
+ - retries: 3
46
+ - interval: 30s
47
+ - timeout: 30s
48
+ """
49
+ healthcheck = service.get("healthcheck", None)
50
+ if healthcheck is None:
51
+ return 0
52
+
53
+ # Parse duration strings with defaults
54
+ retries = healthcheck.get("retries", 3)
55
+ interval = parse_duration(healthcheck.get("interval", "30s"))
56
+ timeout = parse_duration(healthcheck.get("timeout", "30s"))
57
+
58
+ # Calculate total time in seconds
59
+ total_time = retries * (interval.seconds + timeout.seconds)
60
+
61
+ return int(total_time)
62
+
63
+
64
+ @dataclass
65
+ class Duration:
66
+ nanoseconds: int
67
+
68
+ @property
69
+ def seconds(self) -> float:
70
+ return self.nanoseconds / 1_000_000_000
71
+
72
+
73
+ def parse_duration(duration_str: str) -> Duration:
74
+ """Parse a Docker compose style duration string."""
75
+ if not duration_str:
76
+ return Duration(0)
77
+
78
+ units = {
79
+ "ns": 1,
80
+ "us": 1_000,
81
+ "ms": 1_000_000,
82
+ "s": 1_000_000_000,
83
+ "m": 60_000_000_000,
84
+ "h": 3_600_000_000_000,
85
+ }
86
+
87
+ duration_str = "".join(duration_str.split())
88
+ pattern = re.compile(r"(\d+)([a-z]+)")
89
+ matches = pattern.findall(duration_str)
90
+
91
+ if not matches:
92
+ raise ValueError(f"Invalid duration format: {duration_str}")
93
+
94
+ total_nanoseconds = 0
95
+ for number, unit in matches:
96
+ if unit not in units:
97
+ raise ValueError(f"Invalid unit: {unit}")
98
+ total_nanoseconds += int(number) * units[unit]
99
+
100
+ return Duration(total_nanoseconds)