inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (518) hide show
  1. inspect_ai/_cli/cache.py +8 -7
  2. inspect_ai/_cli/common.py +0 -12
  3. inspect_ai/_cli/eval.py +32 -4
  4. inspect_ai/_cli/info.py +1 -0
  5. inspect_ai/_cli/list.py +1 -1
  6. inspect_ai/_cli/log.py +2 -0
  7. inspect_ai/_cli/main.py +1 -1
  8. inspect_ai/_cli/sandbox.py +4 -1
  9. inspect_ai/_cli/score.py +181 -32
  10. inspect_ai/_cli/trace.py +10 -0
  11. inspect_ai/_cli/view.py +4 -2
  12. inspect_ai/_display/core/active.py +2 -3
  13. inspect_ai/_display/core/config.py +7 -1
  14. inspect_ai/_display/textual/widgets/samples.py +4 -3
  15. inspect_ai/_display/textual/widgets/sandbox.py +6 -0
  16. inspect_ai/_eval/eval.py +104 -101
  17. inspect_ai/_eval/evalset.py +75 -75
  18. inspect_ai/_eval/loader.py +122 -12
  19. inspect_ai/_eval/registry.py +1 -1
  20. inspect_ai/_eval/run.py +14 -0
  21. inspect_ai/_eval/score.py +125 -36
  22. inspect_ai/_eval/task/log.py +105 -4
  23. inspect_ai/_eval/task/results.py +92 -38
  24. inspect_ai/_eval/task/run.py +9 -2
  25. inspect_ai/_eval/task/sandbox.py +35 -2
  26. inspect_ai/_eval/task/task.py +49 -46
  27. inspect_ai/_util/constants.py +1 -1
  28. inspect_ai/_util/content.py +8 -0
  29. inspect_ai/_util/error.py +2 -0
  30. inspect_ai/_util/file.py +15 -1
  31. inspect_ai/_util/hash.py +1 -1
  32. inspect_ai/_util/logger.py +4 -2
  33. inspect_ai/_util/registry.py +7 -1
  34. inspect_ai/_view/view.py +1 -2
  35. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  36. inspect_ai/_view/www/.vscode/settings.json +8 -0
  37. inspect_ai/_view/www/App.css +97 -29
  38. inspect_ai/_view/www/README.md +1 -1
  39. inspect_ai/_view/www/dist/assets/index.css +16663 -14674
  40. inspect_ai/_view/www/dist/assets/index.js +58808 -51348
  41. inspect_ai/_view/www/dist/index.html +1 -1
  42. inspect_ai/_view/www/index.html +2 -2
  43. inspect_ai/_view/www/log-schema.json +87 -73
  44. inspect_ai/_view/www/package.json +22 -4
  45. inspect_ai/_view/www/postcss.config.cjs +8 -9
  46. inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
  47. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  48. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  49. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  50. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  51. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  52. inspect_ai/_view/www/src/api/index.ts +4 -4
  53. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  54. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  55. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  56. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  57. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  58. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  59. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  60. inspect_ai/_view/www/src/components/Card.css +60 -0
  61. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  62. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  63. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  64. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  65. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  66. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  67. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  68. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  69. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  70. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  71. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  72. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  73. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  74. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  75. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  76. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  77. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  78. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  79. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  80. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  81. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  82. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  83. inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
  84. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  85. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  86. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  87. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  88. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  89. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  90. inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
  91. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  92. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  93. inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
  94. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  95. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  96. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  97. inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
  98. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  99. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  100. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  101. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  102. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  103. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
  104. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  105. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  106. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  107. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  108. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  109. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  110. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  111. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  112. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  113. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  114. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  115. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  116. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  117. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  118. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
  119. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  120. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  121. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  122. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  123. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  124. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  126. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  127. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
  128. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  129. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
  130. inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
  131. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  132. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  133. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  134. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  135. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  136. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
  137. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  138. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  139. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  140. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
  141. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  142. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
  143. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  144. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
  145. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  146. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
  147. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  148. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
  149. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  150. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  151. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
  152. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
  153. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  154. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  155. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  156. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  157. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  158. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  159. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  160. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  161. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  162. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  163. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  164. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  165. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  166. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  167. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  168. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  169. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  170. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  171. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  172. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  173. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  174. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  175. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  176. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  177. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  179. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  180. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  181. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  182. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
  183. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  184. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
  185. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  186. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  187. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  188. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  189. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  190. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  191. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  192. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  193. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  194. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  196. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  197. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  198. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  199. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  200. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  201. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  202. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  203. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  204. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  205. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  206. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
  207. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  208. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  209. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  210. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  211. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  212. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  213. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  214. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  215. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
  216. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  217. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  218. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  219. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  220. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  221. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
  222. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  223. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
  224. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  225. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  226. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  227. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  228. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  229. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
  230. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  231. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  232. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
  233. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  234. inspect_ai/_view/www/src/types/log.d.ts +108 -19
  235. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  236. inspect_ai/_view/www/src/types.ts +71 -0
  237. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
  238. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  239. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
  240. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  241. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  242. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  243. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  244. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  245. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  246. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  247. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  248. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  249. inspect_ai/_view/www/src/utils/format.ts +194 -0
  250. inspect_ai/_view/www/src/utils/git.ts +7 -0
  251. inspect_ai/_view/www/src/utils/html.ts +6 -0
  252. inspect_ai/_view/www/src/utils/http.ts +14 -0
  253. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  254. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  255. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  256. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  257. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  258. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  259. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  260. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  261. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
  262. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  263. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  264. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  265. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  266. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  267. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
  268. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
  269. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
  270. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  271. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
  272. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  273. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  274. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  275. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  276. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  277. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  278. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  279. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  280. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  281. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  282. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  283. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
  284. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  285. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
  286. inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
  287. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
  288. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  289. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  290. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  291. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  292. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  293. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  294. inspect_ai/_view/www/src/workspace/utils.ts +34 -0
  295. inspect_ai/_view/www/tsconfig.json +23 -9
  296. inspect_ai/_view/www/vite.config.js +8 -17
  297. inspect_ai/_view/www/yarn.lock +627 -556
  298. inspect_ai/approval/_approval.py +2 -0
  299. inspect_ai/approval/_approver.py +4 -4
  300. inspect_ai/approval/_auto.py +1 -1
  301. inspect_ai/approval/_human/approver.py +3 -0
  302. inspect_ai/approval/_policy.py +5 -0
  303. inspect_ai/approval/_registry.py +2 -2
  304. inspect_ai/dataset/_dataset.py +64 -37
  305. inspect_ai/dataset/_sources/__init__.py +0 -0
  306. inspect_ai/dataset/_sources/csv.py +20 -12
  307. inspect_ai/dataset/_sources/file.py +4 -0
  308. inspect_ai/dataset/_sources/hf.py +39 -29
  309. inspect_ai/dataset/_sources/json.py +17 -9
  310. inspect_ai/log/__init__.py +2 -0
  311. inspect_ai/log/_convert.py +3 -3
  312. inspect_ai/log/_file.py +24 -9
  313. inspect_ai/log/_log.py +101 -13
  314. inspect_ai/log/_message.py +4 -2
  315. inspect_ai/log/_recorders/file.py +4 -0
  316. inspect_ai/log/_recorders/json.py +5 -7
  317. inspect_ai/log/_recorders/recorder.py +3 -0
  318. inspect_ai/log/_transcript.py +19 -8
  319. inspect_ai/model/__init__.py +2 -0
  320. inspect_ai/model/_cache.py +39 -21
  321. inspect_ai/model/_call_tools.py +4 -3
  322. inspect_ai/model/_chat_message.py +14 -4
  323. inspect_ai/model/_generate_config.py +1 -1
  324. inspect_ai/model/_model.py +31 -24
  325. inspect_ai/model/_model_output.py +14 -1
  326. inspect_ai/model/_openai.py +10 -18
  327. inspect_ai/model/_providers/anthropic.py +3 -3
  328. inspect_ai/model/_providers/google.py +9 -5
  329. inspect_ai/model/_providers/openai.py +5 -9
  330. inspect_ai/model/_providers/openai_o1.py +3 -5
  331. inspect_ai/model/_providers/openrouter.py +86 -0
  332. inspect_ai/model/_providers/providers.py +11 -0
  333. inspect_ai/scorer/__init__.py +6 -1
  334. inspect_ai/scorer/_answer.py +7 -7
  335. inspect_ai/scorer/_classification.py +38 -18
  336. inspect_ai/scorer/_common.py +2 -8
  337. inspect_ai/scorer/_match.py +4 -5
  338. inspect_ai/scorer/_metric.py +87 -28
  339. inspect_ai/scorer/_metrics/__init__.py +3 -3
  340. inspect_ai/scorer/_metrics/accuracy.py +8 -10
  341. inspect_ai/scorer/_metrics/mean.py +3 -17
  342. inspect_ai/scorer/_metrics/std.py +111 -30
  343. inspect_ai/scorer/_model.py +12 -12
  344. inspect_ai/scorer/_pattern.py +3 -3
  345. inspect_ai/scorer/_reducer/reducer.py +36 -21
  346. inspect_ai/scorer/_reducer/registry.py +2 -2
  347. inspect_ai/scorer/_reducer/types.py +7 -1
  348. inspect_ai/scorer/_score.py +11 -1
  349. inspect_ai/scorer/_scorer.py +110 -16
  350. inspect_ai/solver/__init__.py +1 -1
  351. inspect_ai/solver/_basic_agent.py +19 -22
  352. inspect_ai/solver/_bridge/__init__.py +0 -3
  353. inspect_ai/solver/_bridge/bridge.py +3 -3
  354. inspect_ai/solver/_chain.py +1 -2
  355. inspect_ai/solver/_critique.py +3 -3
  356. inspect_ai/solver/_fork.py +2 -2
  357. inspect_ai/solver/_human_agent/__init__.py +0 -0
  358. inspect_ai/solver/_human_agent/agent.py +5 -8
  359. inspect_ai/solver/_human_agent/commands/clock.py +14 -10
  360. inspect_ai/solver/_human_agent/commands/note.py +1 -1
  361. inspect_ai/solver/_human_agent/commands/score.py +0 -11
  362. inspect_ai/solver/_multiple_choice.py +38 -26
  363. inspect_ai/solver/_prompt.py +7 -7
  364. inspect_ai/solver/_solver.py +53 -52
  365. inspect_ai/solver/_task_state.py +80 -69
  366. inspect_ai/solver/_use_tools.py +9 -9
  367. inspect_ai/tool/__init__.py +4 -1
  368. inspect_ai/tool/_tool.py +43 -14
  369. inspect_ai/tool/_tool_call.py +6 -2
  370. inspect_ai/tool/_tool_choice.py +3 -1
  371. inspect_ai/tool/_tool_def.py +10 -8
  372. inspect_ai/tool/_tool_params.py +24 -0
  373. inspect_ai/tool/_tool_with.py +7 -7
  374. inspect_ai/tool/_tools/__init__.py +0 -0
  375. inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
  376. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
  377. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  378. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  379. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  380. inspect_ai/tool/_tools/_execute.py +23 -11
  381. inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
  382. inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
  383. inspect_ai/tool/_tools/_web_search.py +7 -5
  384. inspect_ai/tool/beta.py +3 -0
  385. inspect_ai/util/_concurrency.py +3 -3
  386. inspect_ai/util/_panel.py +2 -0
  387. inspect_ai/util/_resource.py +12 -12
  388. inspect_ai/util/_sandbox/docker/compose.py +23 -20
  389. inspect_ai/util/_sandbox/docker/config.py +2 -1
  390. inspect_ai/util/_sandbox/docker/docker.py +42 -86
  391. inspect_ai/util/_sandbox/docker/service.py +100 -0
  392. inspect_ai/util/_sandbox/environment.py +99 -96
  393. inspect_ai/util/_sandbox/self_check.py +124 -16
  394. inspect_ai/util/_subprocess.py +5 -3
  395. inspect_ai/util/_subtask.py +15 -16
  396. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
  397. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
  398. inspect_ai-0.3.64.dist-info/RECORD +625 -0
  399. inspect_ai/_view/www/src/Register.mjs +0 -3
  400. inspect_ai/_view/www/src/Types.mjs +0 -38
  401. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  402. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  403. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  404. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  405. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  406. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  407. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  408. inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
  409. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  410. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  411. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  412. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  413. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  414. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  415. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  416. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  417. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  418. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  419. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  420. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  421. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  422. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  423. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  424. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  425. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  426. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  427. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  428. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  429. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  430. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  431. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  432. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  433. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  434. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  435. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  436. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  437. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  438. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  439. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  440. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  441. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  442. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  443. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  444. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  445. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  446. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  447. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  448. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  449. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  450. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  451. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  452. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  453. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  454. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  455. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  456. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  457. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  458. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  459. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  460. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  461. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  462. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  463. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  464. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  465. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  466. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  467. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  468. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  469. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  470. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  471. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  472. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  473. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  474. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  475. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  476. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  477. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
  478. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  479. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  480. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  481. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  482. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  483. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  484. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  485. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  486. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  487. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  488. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  489. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  490. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  491. inspect_ai/tool/beta/__init__.py +0 -5
  492. inspect_ai-0.3.62.dist-info/RECORD +0 -481
  493. /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
  494. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
  495. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  496. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  497. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  498. /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
  499. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  500. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  501. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  502. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  503. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  504. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  505. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  506. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
  507. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
  508. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  509. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  510. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
  511. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  512. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  513. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  514. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  515. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  516. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
  517. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
  518. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -65,91 +65,6 @@ class SandboxEnvironment(abc.ABC):
65
65
  filesystem context to copy samples files into and resolve relative paths to.
66
66
  """
67
67
 
68
- @classmethod
69
- def config_files(cls) -> list[str]:
70
- """Standard config files for this provider (used for automatic discovery)"""
71
- return []
72
-
73
- @classmethod
74
- def default_concurrency(cls) -> int | None:
75
- """Default max_sandboxes for this provider (`None` means no maximum)"""
76
- return None
77
-
78
- @classmethod
79
- async def task_init(
80
- cls, task_name: str, config: SandboxEnvironmentConfigType | None
81
- ) -> None:
82
- """Called at task startup initialize resources.
83
-
84
- Args:
85
- task_name (str): Name of task using the sandbox environment.
86
- config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
87
- """
88
- pass
89
-
90
- @classmethod
91
- async def sample_init(
92
- cls,
93
- task_name: str,
94
- config: SandboxEnvironmentConfigType | None,
95
- metadata: dict[str, str],
96
- ) -> dict[str, "SandboxEnvironment"]:
97
- """Initialize sandbox environments for a sample.
98
-
99
- Args:
100
- task_name (str): Name of task using the sandbox environment.
101
- config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
102
- metadata (dict[str,str]): Sample `metadata` field
103
-
104
- Returns:
105
- Dictionary of named sandbox environments. The environment which represents
106
- the default environment (resolved by `sandbox("default")` or `sandbox()`) must
107
- be the first key/value pair in the dictionary.
108
- """
109
- return {}
110
-
111
- @classmethod
112
- @abc.abstractmethod
113
- async def sample_cleanup(
114
- cls,
115
- task_name: str,
116
- config: SandboxEnvironmentConfigType | None,
117
- environments: dict[str, "SandboxEnvironment"],
118
- interrupted: bool,
119
- ) -> None:
120
- """Cleanup sandbox environments.
121
-
122
- Args:
123
- task_name (str): Name of task using the sandbox environment.
124
- config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
125
- environments (dict[str,SandboxEnvironment]): Sandbox environments created for this sample.
126
- interrupted (bool): Was the task interrupted by an error or cancellation
127
- """
128
- ...
129
-
130
- @classmethod
131
- async def task_cleanup(
132
- cls, task_name: str, config: SandboxEnvironmentConfigType | None, cleanup: bool
133
- ) -> None:
134
- """Called at task exit as a last chance to cleanup resources.
135
-
136
- Args:
137
- task_name (str): Name of task using the sandbox environment.
138
- config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
139
- cleanup (bool): Whether to actually cleanup environment resources
140
- (False if `--no-sandbox-cleanup` was specified)
141
- """
142
- pass
143
-
144
- @classmethod
145
- async def cli_cleanup(cls, id: str | None) -> None:
146
- """Handle a cleanup invoked from the CLI (e.g. inspect sandbox cleanup).
147
-
148
- Args:
149
- id (str | None): Optional ID to limit scope of cleanup.
150
- """
151
- pass
152
-
153
68
  @abc.abstractmethod
154
69
  async def exec(
155
70
  self,
@@ -170,13 +85,13 @@ class SandboxEnvironment(abc.ABC):
170
85
  `OutputLimitExceededError` will be raised.
171
86
 
172
87
  Args:
173
- cmd (str | list[str]): Command or command and arguments to execute.
174
- input (str | bytes | None): Standard input (optional).
175
- cwd (str | None): Current working dir (optional). If relative, will be relative to the per-sample filesystem context.
176
- env (dict[str,str]): Environment variables for execution.
177
- user (str | None): Optional username or UID to run the command as.
178
- timeout (int | None): Optional execution timeout (seconds).
179
- timeout_retry (bool): Retry the command in the case that it times out.
88
+ cmd: Command or command and arguments to execute.
89
+ input: Standard input (optional).
90
+ cwd: Current working dir (optional). If relative, will be relative to the per-sample filesystem context.
91
+ env: Environment variables for execution.
92
+ user: Optional username or UID to run the command as.
93
+ timeout: Optional execution timeout (seconds).
94
+ timeout_retry: Retry the command in the case that it times out.
180
95
  Commands will be retried up to twice, with a timeout of no greater
181
96
  than 60 seconds for the first retry and 30 for the second.
182
97
 
@@ -204,9 +119,9 @@ class SandboxEnvironment(abc.ABC):
204
119
  should be automatically created.
205
120
 
206
121
  Args:
207
- file (str): Path to file (relative file paths will resolve to the
122
+ file: Path to file (relative file paths will resolve to the
208
123
  per-sample working directory).
209
- contents (str | bytes): Text or binary file contents.
124
+ contents: Text or binary file contents.
210
125
 
211
126
  Raises:
212
127
  PermissionError: If the current user does not have permission to
@@ -233,9 +148,9 @@ class SandboxEnvironment(abc.ABC):
233
148
  to specifying `newline=""` in a call to the Python `open()` function.
234
149
 
235
150
  Args:
236
- file (str): Path to file (relative file paths will resolve to the
151
+ file: Path to file (relative file paths will resolve to the
237
152
  per-sample working directory).
238
- text (bool): Read as a utf-8 encoded text file.
153
+ text: Read as a utf-8 encoded text file.
239
154
 
240
155
  Returns:
241
156
  Contents of file (as str or bytes for binary files)
@@ -265,6 +180,91 @@ class SandboxEnvironment(abc.ABC):
265
180
  """
266
181
  raise NotImplementedError("connection not implemented")
267
182
 
183
+ @classmethod
184
+ def config_files(cls) -> list[str]:
185
+ """Standard config files for this provider (used for automatic discovery)"""
186
+ return []
187
+
188
+ @classmethod
189
+ def default_concurrency(cls) -> int | None:
190
+ """Default max_sandboxes for this provider (`None` means no maximum)"""
191
+ return None
192
+
193
+ @classmethod
194
+ async def task_init(
195
+ cls, task_name: str, config: SandboxEnvironmentConfigType | None
196
+ ) -> None:
197
+ """Called at task startup initialize resources.
198
+
199
+ Args:
200
+ task_name: Name of task using the sandbox environment.
201
+ config: Implementation defined configuration (optional).
202
+ """
203
+ pass
204
+
205
+ @classmethod
206
+ async def sample_init(
207
+ cls,
208
+ task_name: str,
209
+ config: SandboxEnvironmentConfigType | None,
210
+ metadata: dict[str, str],
211
+ ) -> dict[str, "SandboxEnvironment"]:
212
+ """Initialize sandbox environments for a sample.
213
+
214
+ Args:
215
+ task_name: Name of task using the sandbox environment.
216
+ config: Implementation defined configuration (optional).
217
+ metadata: Sample `metadata` field
218
+
219
+ Returns:
220
+ Dictionary of named sandbox environments. The environment which represents
221
+ the default environment (resolved by `sandbox("default")` or `sandbox()`) must
222
+ be the first key/value pair in the dictionary.
223
+ """
224
+ return {}
225
+
226
+ @classmethod
227
+ @abc.abstractmethod
228
+ async def sample_cleanup(
229
+ cls,
230
+ task_name: str,
231
+ config: SandboxEnvironmentConfigType | None,
232
+ environments: dict[str, "SandboxEnvironment"],
233
+ interrupted: bool,
234
+ ) -> None:
235
+ """Cleanup sandbox environments.
236
+
237
+ Args:
238
+ task_name: Name of task using the sandbox environment.
239
+ config: Implementation defined configuration (optional).
240
+ environments: Sandbox environments created for this sample.
241
+ interrupted: Was the task interrupted by an error or cancellation
242
+ """
243
+ ...
244
+
245
+ @classmethod
246
+ async def task_cleanup(
247
+ cls, task_name: str, config: SandboxEnvironmentConfigType | None, cleanup: bool
248
+ ) -> None:
249
+ """Called at task exit as a last chance to cleanup resources.
250
+
251
+ Args:
252
+ task_name: Name of task using the sandbox environment.
253
+ config: Implementation defined configuration (optional).
254
+ cleanup: Whether to actually cleanup environment resources
255
+ (False if `--no-sandbox-cleanup` was specified)
256
+ """
257
+ pass
258
+
259
+ @classmethod
260
+ async def cli_cleanup(cls, id: str | None) -> None:
261
+ """Handle a cleanup invoked from the CLI (e.g. inspect sandbox cleanup).
262
+
263
+ Args:
264
+ id: Optional ID to limit scope of cleanup.
265
+ """
266
+ pass
267
+
268
268
 
269
269
  @dataclass
270
270
  class SandboxEnvironments:
@@ -284,7 +284,10 @@ class SandboxEnvironmentSpec(NamedTuple):
284
284
  """Specification of a SandboxEnvironment."""
285
285
 
286
286
  type: str
287
+ """Sandbox type (e.g. 'local', 'docker')"""
288
+
287
289
  config: SandboxEnvironmentConfigType | None = None
290
+ """Sandbox configuration (filename or config object)."""
288
291
 
289
292
 
290
293
  SandboxEnvironmentConfigType = BaseModel | str
@@ -32,6 +32,7 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
32
32
  for fn in [
33
33
  test_read_and_write_file_text,
34
34
  test_read_and_write_file_binary,
35
+ test_read_and_write_large_file_binary,
35
36
  test_write_file_text_utf,
36
37
  test_read_and_write_file_including_directory_absolute,
37
38
  test_read_and_write_file_including_directory_relative,
@@ -41,12 +42,19 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
41
42
  test_read_file_is_directory,
42
43
  test_read_file_nonsense_name,
43
44
  test_read_file_limit,
44
- test_write_file_zero_length,
45
- test_write_file_space,
46
- test_write_file_is_directory,
47
- test_write_file_without_permissions,
48
- test_write_file_exists,
45
+ test_write_text_file_zero_length,
46
+ test_write_text_file_space,
47
+ test_write_text_file_is_directory,
48
+ test_write_text_file_without_permissions,
49
+ test_write_text_file_exists,
50
+ test_write_binary_file_zero_length,
51
+ test_write_binary_file_space,
52
+ test_write_binary_file_is_directory,
53
+ test_write_binary_file_without_permissions,
54
+ test_write_binary_file_exists,
49
55
  test_exec_output,
56
+ test_exec_stderr,
57
+ test_exec_returncode,
50
58
  test_exec_timeout,
51
59
  test_exec_permission_error,
52
60
  test_exec_as_user,
@@ -100,6 +108,17 @@ async def test_read_and_write_file_binary(sandbox_env: SandboxEnvironment) -> No
100
108
  await _cleanup_file(sandbox_env, file_name)
101
109
 
102
110
 
111
+ async def test_read_and_write_large_file_binary(
112
+ sandbox_env: SandboxEnvironment,
113
+ ) -> None:
114
+ file_name = "test_read_and_write_large_file_binary.file"
115
+ long_bytes = b"\xc3" * 5_000_000
116
+ await sandbox_env.write_file(file_name, long_bytes)
117
+ written_file_bytes = await sandbox_env.read_file(file_name, text=False)
118
+ assert long_bytes == written_file_bytes
119
+ await _cleanup_file(sandbox_env, file_name)
120
+
121
+
103
122
  async def test_read_and_write_file_including_directory_absolute(
104
123
  sandbox_env: SandboxEnvironment,
105
124
  ) -> None:
@@ -176,7 +195,7 @@ async def test_read_file_limit(sandbox_env: SandboxEnvironment) -> None:
176
195
  await _cleanup_file(sandbox_env, file_name)
177
196
 
178
197
 
179
- async def test_write_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
198
+ async def test_write_text_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
180
199
  file_name = "zero_length_file.file"
181
200
  await sandbox_env.write_file(file_name, "")
182
201
  zero_length = await sandbox_env.read_file(file_name, text=True)
@@ -185,7 +204,7 @@ async def test_write_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
185
204
  await _cleanup_file(sandbox_env, file_name)
186
205
 
187
206
 
188
- async def test_write_file_space(sandbox_env: SandboxEnvironment) -> None:
207
+ async def test_write_text_file_space(sandbox_env: SandboxEnvironment) -> None:
189
208
  space = "to the moon"
190
209
  file_name = "file with space.file"
191
210
  await sandbox_env.write_file(file_name, space)
@@ -195,28 +214,28 @@ async def test_write_file_space(sandbox_env: SandboxEnvironment) -> None:
195
214
  await _cleanup_file(sandbox_env, file_name)
196
215
 
197
216
 
198
- async def test_write_file_is_directory(
217
+ async def test_write_text_file_is_directory(
199
218
  sandbox_env: SandboxEnvironment,
200
219
  ) -> None:
201
220
  # ensure /tmp/directory exists
202
221
  await sandbox_env.write_file(
203
- "/tmp/inspect_ai_test_write_file_is_directory/file", "unused content"
222
+ "/tmp/inspect_ai_test_write_text_file_is_directory/file", "unused content"
204
223
  )
205
224
  with Raises(IsADirectoryError) as e_info:
206
225
  await sandbox_env.write_file(
207
- "/tmp/inspect_ai_test_write_file_is_directory",
226
+ "/tmp/inspect_ai_test_write_text_file_is_directory",
208
227
  "content cannot go in a directory, dummy",
209
228
  )
210
229
  assert "directory" in str(e_info.value)
211
230
  await sandbox_env.exec(
212
- ["rm", "-rf", "/tmp/inspect_ai_test_write_file_is_directory"]
231
+ ["rm", "-rf", "/tmp/inspect_ai_test_write_text_file_is_directory"]
213
232
  )
214
233
 
215
234
 
216
- async def test_write_file_without_permissions(
235
+ async def test_write_text_file_without_permissions(
217
236
  sandbox_env: SandboxEnvironment,
218
237
  ) -> None:
219
- file_name = "test_write_file_without_permissions.file"
238
+ file_name = "test_write_text_file_without_permissions.file"
220
239
  await sandbox_env.write_file(file_name, "impervious #content")
221
240
  await sandbox_env.exec(["chmod", "-w", file_name])
222
241
  with Raises(PermissionError) as e_info:
@@ -226,7 +245,7 @@ async def test_write_file_without_permissions(
226
245
  await _cleanup_file(sandbox_env, file_name)
227
246
 
228
247
 
229
- async def test_write_file_exists(
248
+ async def test_write_text_file_exists(
230
249
  sandbox_env: SandboxEnvironment,
231
250
  ) -> None:
232
251
  file_name = "file_exists.file"
@@ -237,6 +256,67 @@ async def test_write_file_exists(
237
256
  await _cleanup_file(sandbox_env, file_name)
238
257
 
239
258
 
259
+ async def test_write_binary_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
260
+ file_name = "zero_length_file.file"
261
+ await sandbox_env.write_file(file_name, b"")
262
+ zero_length = await sandbox_env.read_file(file_name, text=False)
263
+ assert isinstance(zero_length, bytes)
264
+ assert zero_length == b""
265
+ await _cleanup_file(sandbox_env, file_name)
266
+
267
+
268
+ async def test_write_binary_file_space(sandbox_env: SandboxEnvironment) -> None:
269
+ binary_content = b"\xc3\x28"
270
+ file_name = "file with space.file"
271
+ await sandbox_env.write_file(file_name, binary_content)
272
+ file_with_space = await sandbox_env.read_file(file_name, text=False)
273
+ assert isinstance(file_with_space, bytes)
274
+ assert file_with_space == binary_content
275
+ await _cleanup_file(sandbox_env, file_name)
276
+
277
+
278
+ async def test_write_binary_file_is_directory(
279
+ sandbox_env: SandboxEnvironment,
280
+ ) -> None:
281
+ # ensure /tmp/directory exists
282
+ await sandbox_env.write_file(
283
+ "/tmp/inspect_ai_test_write_binary_file_is_directory/file", "unused content"
284
+ )
285
+ with Raises(IsADirectoryError) as e_info:
286
+ await sandbox_env.write_file(
287
+ "/tmp/inspect_ai_test_write_binary_file_is_directory",
288
+ b"\xc3\x28",
289
+ )
290
+ assert "directory" in str(e_info.value)
291
+ await sandbox_env.exec(
292
+ ["rm", "-rf", "/tmp/inspect_ai_test_write_binary_file_is_directory"]
293
+ )
294
+
295
+
296
+ async def test_write_binary_file_without_permissions(
297
+ sandbox_env: SandboxEnvironment,
298
+ ) -> None:
299
+ file_name = "test_write_binary_file_without_permissions.file"
300
+ await sandbox_env.write_file(file_name, "impervious #content")
301
+ await sandbox_env.exec(["chmod", "-w", file_name])
302
+ with Raises(PermissionError) as e_info:
303
+ await sandbox_env.write_file(file_name, b"\xc3\x28")
304
+ assert file_name in str(e_info.value)
305
+ await sandbox_env.exec(["chmod", "+w", file_name])
306
+ await _cleanup_file(sandbox_env, file_name)
307
+
308
+
309
+ async def test_write_binary_file_exists(
310
+ sandbox_env: SandboxEnvironment,
311
+ ) -> None:
312
+ file_name = "file_exists.file"
313
+ await sandbox_env.write_file(file_name, b"\xc3\x28")
314
+ await sandbox_env.write_file(file_name, b"\xc3\x29")
315
+ altered_content = await sandbox_env.read_file(file_name, text=False)
316
+ assert altered_content == b"\xc3\x29"
317
+ await _cleanup_file(sandbox_env, file_name)
318
+
319
+
240
320
  async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
241
321
  exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; echo bar"])
242
322
  expected = "foo\nbar\n"
@@ -246,9 +326,19 @@ async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
246
326
  )
247
327
 
248
328
 
329
+ async def test_exec_stderr(sandbox_env: SandboxEnvironment) -> None:
330
+ exec_result = await sandbox_env.exec(["sh", "-c", "echo boof; echo baz >&2"])
331
+ assert exec_result.stderr == "baz\n"
332
+
333
+
334
+ async def test_exec_returncode(sandbox_env: SandboxEnvironment) -> None:
335
+ exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; exit 70"])
336
+ assert exec_result.returncode == 70
337
+
338
+
249
339
  async def test_exec_timeout(sandbox_env: SandboxEnvironment) -> None:
250
340
  with Raises(TimeoutError):
251
- await sandbox_env.exec(["sleep", "2"], timeout=1)
341
+ await sandbox_env.exec(["sleep", "4"], timeout=2)
252
342
 
253
343
 
254
344
  async def test_exec_permission_error(sandbox_env: SandboxEnvironment) -> None:
@@ -259,10 +349,28 @@ async def test_exec_permission_error(sandbox_env: SandboxEnvironment) -> None:
259
349
 
260
350
  async def test_exec_as_user(sandbox_env: SandboxEnvironment) -> None:
261
351
  username = "inspect-ai-test-exec-as-user"
352
+
353
+ # Neither adduser nor useradd are part of POSIX, so we need some brittle logic here
354
+ adduser_help_exec_result = await sandbox_env.exec(["adduser", "--help"])
355
+ adduser_help_text = (
356
+ adduser_help_exec_result.stdout + adduser_help_exec_result.stderr
357
+ )
358
+
359
+ if "BusyBox" in adduser_help_text:
360
+ adduser_command = ["adduser", "-D", username]
361
+ else:
362
+ adduser_command = [
363
+ "adduser",
364
+ "--comment",
365
+ "self_check.py",
366
+ "--disabled-password",
367
+ username,
368
+ ]
369
+
262
370
  try:
263
371
  # Create a new user
264
372
  add_user_result = await sandbox_env.exec(
265
- ["adduser", "--comment", "self_check.py", "--disabled-password", username],
373
+ adduser_command,
266
374
  user="root",
267
375
  timeout=10, # in one case adduser decided to ask for input which caused the test to hang indefinitely
268
376
  )
@@ -20,6 +20,8 @@ T = TypeVar("T", str, bytes)
20
20
 
21
21
  @dataclass
22
22
  class ExecResult(Generic[T]):
23
+ """Execution result from call to `subprocess()`."""
24
+
23
25
  success: bool
24
26
  """Did the process exit with success."""
25
27
 
@@ -85,11 +87,11 @@ async def subprocess(
85
87
  cwd (str | Path | None): Switch to directory for execution.
86
88
  env (dict[str, str]): Additional environment variables.
87
89
  capture_output (bool): Capture stderr and stdout into ExecResult
88
- (if False, then output is redirected to parent stderr/stdout)
90
+ (if False, then output is redirected to parent stderr/stdout)
89
91
  output_limit (int | None): Stop reading output if it exceeds
90
- the specified limit (in bytes).
92
+ the specified limit (in bytes).
91
93
  timeout (int | None): Timeout. If the timeout expires then
92
- a `TimeoutError` will be raised.
94
+ a `TimeoutError` will be raised.
93
95
 
94
96
  Returns:
95
97
  Subprocess result (text or binary depending on `text` param)
@@ -27,21 +27,21 @@ logger = getLogger(__name__)
27
27
 
28
28
  @runtime_checkable
29
29
  class Subtask(Protocol):
30
- """Subtask with distinct `Store` and `Transcript`.
31
-
32
- Args:
33
- *args (Any): Arguments for the subtask.
34
- **kwargs (Any): Keyword arguments for the subtask.
35
-
36
- Returns:
37
- Result of subtask.
38
- """
39
-
40
30
  async def __call__(
41
31
  self,
42
32
  *args: Any,
43
33
  **kwargs: Any,
44
- ) -> Any: ...
34
+ ) -> Any:
35
+ """Subtask with distinct `Store` and `Transcript`.
36
+
37
+ Args:
38
+ *args (Any): Arguments for the subtask.
39
+ **kwargs (Any): Keyword arguments for the subtask.
40
+
41
+ Returns:
42
+ Result of subtask.
43
+ """
44
+ ...
45
45
 
46
46
 
47
47
  @overload
@@ -71,11 +71,10 @@ def subtask(
71
71
  r"""Decorator for subtasks.
72
72
 
73
73
  Args:
74
- func (Subtask): Subtask implementation.
75
- name (str | None): Name for subtask (defaults to function name)
76
- store (store | None): Store to use for subtask
77
- type (str | None): Type to use for subtask
78
- input (dict[str, Any] | None): Input to log for subtask
74
+ name: Name for subtask (defaults to function name)
75
+ store: Store to use for subtask
76
+ type: Type to use for subtask
77
+ input: Input to log for subtask
79
78
 
80
79
  Returns:
81
80
  Function which runs the Subtask, providing an isolated
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 UK AI Safety Institute
3
+ Copyright (c) 2024 UK AI Security Institute
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: inspect_ai
3
- Version: 0.3.62
3
+ Version: 0.3.64
4
4
  Summary: Framework for large language model evaluations
5
- Author: UK AI Safety Institute
5
+ Author: UK AI Security Institute
6
6
  License: MIT License
7
7
  Project-URL: Documentation, https://inspect.ai-safety-institute.org.uk/
8
8
  Project-URL: Source Code, https://github.com/UKGovernmentBEIS/inspect_ai
@@ -55,6 +55,7 @@ Requires-Dist: azure-ai-inference; extra == "dev"
55
55
  Requires-Dist: google-cloud-aiplatform; extra == "dev"
56
56
  Requires-Dist: google-generativeai; extra == "dev"
57
57
  Requires-Dist: goodfire; extra == "dev"
58
+ Requires-Dist: griffe; extra == "dev"
58
59
  Requires-Dist: groq; extra == "dev"
59
60
  Requires-Dist: ipython; extra == "dev"
60
61
  Requires-Dist: mistralai; extra == "dev"
@@ -63,13 +64,15 @@ Requires-Dist: mypy; extra == "dev"
63
64
  Requires-Dist: nbformat; extra == "dev"
64
65
  Requires-Dist: openai; extra == "dev"
65
66
  Requires-Dist: pre-commit; extra == "dev"
67
+ Requires-Dist: pylint; extra == "dev"
66
68
  Requires-Dist: pytest; extra == "dev"
67
69
  Requires-Dist: pytest-asyncio; extra == "dev"
68
70
  Requires-Dist: pytest-cov; extra == "dev"
69
71
  Requires-Dist: pytest-dotenv; extra == "dev"
70
72
  Requires-Dist: pytest-xdist; extra == "dev"
71
- Requires-Dist: ruff==0.9.4; extra == "dev"
73
+ Requires-Dist: ruff==0.9.5; extra == "dev"
72
74
  Requires-Dist: textual-dev>=0.86.2; extra == "dev"
75
+ Requires-Dist: types-Markdown; extra == "dev"
73
76
  Requires-Dist: types-PyYAML; extra == "dev"
74
77
  Requires-Dist: types-beautifulsoup4; extra == "dev"
75
78
  Requires-Dist: types-aioboto3; extra == "dev"
@@ -81,15 +84,17 @@ Requires-Dist: types-protobuf; extra == "dev"
81
84
  Requires-Dist: types-psutil; extra == "dev"
82
85
  Requires-Dist: types-python-dateutil; extra == "dev"
83
86
  Provides-Extra: doc
84
- Requires-Dist: quarto-cli; extra == "doc"
87
+ Requires-Dist: quarto-cli==1.5.57; extra == "doc"
85
88
  Requires-Dist: jupyter; extra == "doc"
89
+ Requires-Dist: panflute; extra == "doc"
90
+ Requires-Dist: markdown; extra == "doc"
86
91
  Provides-Extra: dist
87
92
  Requires-Dist: twine; extra == "dist"
88
93
  Requires-Dist: build; extra == "dist"
89
94
 
90
- [<img width="295" src="https://inspect.ai-safety-institute.org.uk/images/aisi-logo.png" />](https://aisi.gov.uk/)
95
+ [<img width="295" src="https://inspect.ai-safety-institute.org.uk/images/aisi-logo.svg" />](https://aisi.gov.uk/)
91
96
 
92
- Welcome to Inspect, a framework for large language model evaluations created by the [UK AI Safety Institute](https://aisi.gov.uk/).
97
+ Welcome to Inspect, a framework for large language model evaluations created by the [UK AI Security Institute](https://aisi.gov.uk/).
93
98
 
94
99
  Inspect provides many built-in components, including facilities for prompt engineering, tool usage, multi-turn dialog, and model graded evaluations. Extensions to Inspect (e.g. to support new elicitation and scoring techniques) can be provided by other Python packages.
95
100