inspect-ai 0.3.61__py3-none-any.whl → 0.3.63__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (430) hide show
  1. inspect_ai/_cli/eval.py +13 -0
  2. inspect_ai/_cli/main.py +1 -1
  3. inspect_ai/_cli/trace.py +8 -0
  4. inspect_ai/_cli/view.py +4 -0
  5. inspect_ai/_display/core/active.py +2 -3
  6. inspect_ai/_display/textual/widgets/transcript.py +15 -9
  7. inspect_ai/_eval/eval.py +4 -4
  8. inspect_ai/_eval/evalset.py +6 -6
  9. inspect_ai/_eval/task/error.py +10 -14
  10. inspect_ai/_eval/task/run.py +13 -8
  11. inspect_ai/_util/hash.py +1 -1
  12. inspect_ai/_util/transcript.py +11 -0
  13. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  14. inspect_ai/_view/www/.vscode/settings.json +8 -0
  15. inspect_ai/_view/www/App.css +92 -29
  16. inspect_ai/_view/www/dist/assets/index.css +16636 -14674
  17. inspect_ai/_view/www/dist/assets/index.js +43585 -36122
  18. inspect_ai/_view/www/dist/index.html +1 -1
  19. inspect_ai/_view/www/index.html +2 -2
  20. inspect_ai/_view/www/log-schema.json +36 -19
  21. inspect_ai/_view/www/package.json +22 -4
  22. inspect_ai/_view/www/postcss.config.cjs +8 -9
  23. inspect_ai/_view/www/src/{App.mjs → App.tsx} +355 -365
  24. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  25. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  26. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  27. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  28. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  29. inspect_ai/_view/www/src/api/index.ts +4 -4
  30. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  31. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  32. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  33. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  34. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  35. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  36. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  37. inspect_ai/_view/www/src/components/Card.css +60 -0
  38. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  39. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  40. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  41. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  42. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  43. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  45. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  46. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  47. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  48. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  49. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  50. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  51. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  52. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  53. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  54. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  55. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  56. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  57. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  58. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  59. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  60. inspect_ai/_view/www/src/components/LargeModal.tsx +199 -0
  61. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  62. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  63. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  64. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  65. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  66. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  67. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  68. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  69. inspect_ai/_view/www/src/components/NavPills.tsx +99 -0
  70. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  71. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  72. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  73. inspect_ai/_view/www/src/components/TabSet.tsx +200 -0
  74. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  75. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  76. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  77. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  78. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  79. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -7
  80. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  81. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  82. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  83. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  84. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  85. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  86. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  87. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  88. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  89. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  90. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  91. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  92. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  93. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  94. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +309 -0
  95. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  96. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  97. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  98. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  99. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  100. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  101. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  102. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  103. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +326 -0
  104. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  105. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +175 -0
  106. inspect_ai/_view/www/src/samples/SamplesTools.tsx +60 -0
  107. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  108. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  109. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  110. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  111. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  112. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +46 -0
  113. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  114. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  115. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  116. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +143 -0
  117. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  118. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +131 -0
  119. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  120. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +145 -0
  121. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  122. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +86 -0
  123. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  124. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  126. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  127. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +107 -0
  128. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +363 -0
  129. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  130. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  131. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  132. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  133. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  134. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  135. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  136. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  137. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  138. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  139. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  140. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  141. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  142. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  143. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  144. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  145. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  146. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  147. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  148. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  149. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  150. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  151. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  152. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  153. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  154. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  155. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  156. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  157. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  158. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +173 -0
  159. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  160. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +182 -0
  161. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  162. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  163. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  164. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  165. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  166. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  167. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  168. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  169. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  170. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  171. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  172. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  173. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  174. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  175. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  176. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  177. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  179. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  180. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  181. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  182. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +108 -0
  183. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  184. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  185. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  186. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  187. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  188. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  189. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  190. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  191. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +91 -0
  192. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  193. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  194. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  196. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  197. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +38 -0
  198. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  199. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +190 -0
  200. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  201. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  202. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  203. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  204. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  205. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +274 -0
  206. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  207. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  208. inspect_ai/_view/www/src/samples/transcript/state/{StateEventView.mjs → StateEventView.tsx} +148 -110
  209. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  210. inspect_ai/_view/www/src/types/log.d.ts +7 -4
  211. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  212. inspect_ai/_view/www/src/types.ts +71 -0
  213. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +22 -0
  214. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  215. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +95 -0
  216. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  217. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  218. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  219. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  220. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  221. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  222. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  223. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  224. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  225. inspect_ai/_view/www/src/utils/format.ts +194 -0
  226. inspect_ai/_view/www/src/utils/git.ts +7 -0
  227. inspect_ai/_view/www/src/utils/html.ts +6 -0
  228. inspect_ai/_view/www/src/utils/http.ts +14 -0
  229. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  230. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  231. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  232. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  233. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  234. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  235. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  236. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  237. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +160 -0
  238. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  239. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  240. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  241. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  242. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  243. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +113 -0
  244. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +67 -0
  245. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +156 -0
  246. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  247. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +222 -0
  248. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  249. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  250. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  251. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  252. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  253. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  254. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  255. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  256. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  257. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  258. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  259. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +41 -0
  260. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  261. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +61 -0
  262. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +80 -0
  263. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  264. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  265. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  266. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  267. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  268. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  269. inspect_ai/_view/www/tsconfig.json +23 -9
  270. inspect_ai/_view/www/vite.config.js +8 -17
  271. inspect_ai/_view/www/yarn.lock +627 -556
  272. inspect_ai/dataset/_dataset.py +36 -0
  273. inspect_ai/dataset/_sources/csv.py +8 -0
  274. inspect_ai/dataset/_sources/file.py +4 -0
  275. inspect_ai/dataset/_sources/hf.py +11 -1
  276. inspect_ai/dataset/_sources/json.py +8 -0
  277. inspect_ai/log/_log.py +3 -6
  278. inspect_ai/log/_message.py +1 -1
  279. inspect_ai/log/_recorders/eval.py +1 -1
  280. inspect_ai/log/_recorders/json.py +5 -7
  281. inspect_ai/model/_call_tools.py +2 -1
  282. inspect_ai/model/_chat_message.py +27 -0
  283. inspect_ai/model/_conversation.py +10 -3
  284. inspect_ai/model/_generate_config.py +6 -0
  285. inspect_ai/model/_model.py +74 -0
  286. inspect_ai/model/_openai.py +33 -1
  287. inspect_ai/model/_providers/anthropic.py +12 -0
  288. inspect_ai/model/_providers/groq.py +4 -0
  289. inspect_ai/model/_providers/openai.py +21 -9
  290. inspect_ai/model/_providers/openai_o1.py +3 -5
  291. inspect_ai/model/_providers/openrouter.py +86 -0
  292. inspect_ai/model/_providers/providers.py +12 -1
  293. inspect_ai/model/_reasoning.py +17 -0
  294. inspect_ai/scorer/_answer.py +7 -7
  295. inspect_ai/scorer/_classification.py +34 -18
  296. inspect_ai/scorer/_common.py +2 -8
  297. inspect_ai/solver/_basic_agent.py +19 -9
  298. inspect_ai/solver/_multiple_choice.py +24 -9
  299. inspect_ai/tool/__init__.py +2 -0
  300. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +2 -5
  301. inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +4 -0
  302. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  303. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +3 -0
  304. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +61 -0
  305. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +10 -0
  306. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  307. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  308. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  309. inspect_ai/tool/_tools/_execute.py +8 -2
  310. inspect_ai/tool/beta.py +3 -0
  311. inspect_ai/util/_sandbox/docker/docker.py +32 -85
  312. inspect_ai/util/_sandbox/self_check.py +124 -16
  313. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/METADATA +2 -1
  314. inspect_ai-0.3.63.dist-info/RECORD +618 -0
  315. inspect_ai/_view/www/src/Register.mjs +0 -3
  316. inspect_ai/_view/www/src/Types.mjs +0 -38
  317. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  318. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  319. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  320. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  321. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  322. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  323. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  324. inspect_ai/_view/www/src/components/ChatView.mjs +0 -418
  325. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  326. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  327. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  328. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  329. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  330. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  331. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  332. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  333. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  334. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  335. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  336. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  337. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  338. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  339. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  340. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  341. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  342. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  343. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  344. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  345. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  346. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  347. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  348. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  349. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  350. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  351. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  352. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  353. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  354. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  355. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  356. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  357. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  358. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  359. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  360. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  361. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  362. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  363. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  364. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  365. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  366. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  367. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  368. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  369. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  370. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  371. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  372. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  373. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  374. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  375. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  376. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  377. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  378. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  379. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  380. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  381. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  382. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  383. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  384. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  385. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  386. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  387. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  388. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  389. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  390. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  391. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  392. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  393. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  394. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  395. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  396. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  397. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  398. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  399. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  400. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  401. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  402. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  403. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  404. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  405. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  406. inspect_ai/tool/beta/__init__.py +0 -5
  407. inspect_ai-0.3.61.dist-info/RECORD +0 -476
  408. /inspect_ai/{tool/beta/_computer/_resources/tool/__init__.py → _view/www/src/components/MorePopOver.css} +0 -0
  409. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  410. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _view/www/src/workspace/tabs/InfoTab.module.css} +0 -0
  411. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  412. /inspect_ai/tool/{beta → _tools}/_computer/_common.py +0 -0
  413. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  414. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  415. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  416. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  417. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  418. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  419. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  420. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  421. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  422. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  423. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  424. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  425. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  426. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  427. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/LICENSE +0 -0
  428. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/WHEEL +0 -0
  429. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/entry_points.txt +0 -0
  430. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import base64
1
2
  import errno
2
3
  import json
3
4
  import os
@@ -34,7 +35,6 @@ from .compose import (
34
35
  compose_build,
35
36
  compose_check_running,
36
37
  compose_cleanup_images,
37
- compose_command,
38
38
  compose_cp,
39
39
  compose_exec,
40
40
  compose_ps,
@@ -270,103 +270,50 @@ class DockerSandboxEnvironment(SandboxEnvironment):
270
270
 
271
271
  @override
272
272
  async def write_file(self, file: str, contents: str | bytes) -> None:
273
- # exec function w/ timeout
274
- async def exec(cmd: list[str]) -> ExecResult[str]:
275
- return await self.exec(cmd, timeout=60)
276
-
277
273
  # resolve relative file paths
278
274
  file = self.container_file(file)
279
275
 
280
- # We want to be able to write a file in the container,
281
- # but only if the container's user would be allowed to do that.
282
- # We need to avoid implicitly trusting the provided "file" string.
283
- # For example, it shouldn't be passed as part of a shell command,
284
- # because of the risk of shell injection.
285
-
286
- local_tmpfile = tempfile.NamedTemporaryFile()
276
+ # ensure that the directory exists
277
+ parent = Path(file).parent.as_posix()
278
+ if parent != ".":
279
+ result = await self.exec(["mkdir", "-p", parent])
280
+ if not result.success:
281
+ msg = f"Failed to create container directory {parent}: {result.stderr}"
282
+ raise RuntimeError(msg)
287
283
 
288
- # write contents into a local tmp file (not in the container)
284
+ # write the file
289
285
  if isinstance(contents, str):
290
- local_tmpfile.write(contents.encode("utf-8"))
286
+ result = await self.exec(
287
+ ["sh", "-e", "-c", 'tee -- "$1"', "write_file_script", file],
288
+ input=contents,
289
+ )
291
290
  else:
292
- local_tmpfile.write(contents)
293
-
294
- local_tmpfile.flush()
295
-
296
- # Copy the local tmp file into a tmp file on the container.
297
- # Both tmp files have safe names as we created them ourselves
298
-
299
- # We write the tmp file in the default directory,
300
- # because of strangeness with /tmp on GitHub action runners.
301
-
302
- # We are reusing the generated local tmp file name within
303
- # the sandbox to save on a container roundtrip. There is a very slight
304
- # risk of collision if another write_file call happens
305
- # to get the same local tmp file name. But we assume tmp file
306
- # names have enough randomness for us to ignore that.
307
-
308
- container_tmpfile = (
309
- f".tmp_inspect_sandbox_{os.path.basename(local_tmpfile.name)}"
310
- )
311
-
312
- # compose cp will leave the file owned by root
313
- await compose_cp(
314
- src=local_tmpfile.name,
315
- dest=f"{self._service}:{self.container_file(container_tmpfile)}",
316
- project=self._project,
317
- )
318
-
319
- local_tmpfile.close() # this will also delete the file
320
-
321
- if not hasattr(self, "_docker_user"):
322
- uid = (await exec(["id", "-u"])).stdout.strip()
323
- gid = (await exec(["id", "-g"])).stdout.strip()
324
- self._docker_user = (uid, gid)
325
-
326
- await compose_command(
327
- [
328
- "exec",
329
- "--user",
330
- "root",
331
- self._service,
332
- "chown",
333
- f"{self._docker_user[0]}:{self._docker_user[1]}",
334
- container_tmpfile,
335
- ],
336
- project=self._project,
337
- timeout=60,
338
- )
339
-
340
- parent = PurePosixPath(file).parent
341
-
342
- # We do these steps in a shell script for efficiency to avoid round-trips to docker.
343
- res_cp = await exec(
344
- [
345
- "sh",
346
- "-e",
347
- "-c",
348
- 'mkdir -p -- "$1"; cp -T -- "$2" "$3"; rm -- "$2"',
349
- "copy_script",
350
- str(parent),
351
- container_tmpfile,
352
- file,
353
- ]
354
- )
355
-
356
- if res_cp.returncode != 0:
357
- if "Permission denied" in res_cp.stderr:
358
- ls_result = await exec(["ls", "-la", "."])
359
- error_string = f"Permission was denied. Error details: {res_cp.stderr}; ls -la: {ls_result.stdout}; {self._docker_user=}"
291
+ base64_contents = base64.b64encode(contents).decode("US-ASCII")
292
+ result = await self.exec(
293
+ [
294
+ "sh",
295
+ "-e",
296
+ "-c",
297
+ 'base64 -d | tee -- "$1" > /dev/null',
298
+ "write_file_script",
299
+ file,
300
+ ],
301
+ input=base64_contents,
302
+ )
303
+ if result.returncode != 0:
304
+ if "permission denied" in result.stderr.casefold():
305
+ ls_result = await self.exec(["ls", "-la", "."])
306
+ error_string = f"Permission was denied. Error details: {result.stderr}; ls -la: {ls_result.stdout}"
360
307
  raise PermissionError(error_string)
361
308
  elif (
362
- "cannot overwrite directory" in res_cp.stderr
363
- or "is a directory" in res_cp.stderr
309
+ "cannot overwrite directory" in result.stderr.casefold()
310
+ or "is a directory" in result.stderr.casefold()
364
311
  ):
365
312
  raise IsADirectoryError(
366
313
  f"Failed to write file: {file} because it is a directory already"
367
314
  )
368
315
  else:
369
- raise RuntimeError(f"failed to copy during write_file: {res_cp}")
316
+ raise RuntimeError(f"failed to copy during write_file: {result}")
370
317
 
371
318
  @overload
372
319
  async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
@@ -32,6 +32,7 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
32
32
  for fn in [
33
33
  test_read_and_write_file_text,
34
34
  test_read_and_write_file_binary,
35
+ test_read_and_write_large_file_binary,
35
36
  test_write_file_text_utf,
36
37
  test_read_and_write_file_including_directory_absolute,
37
38
  test_read_and_write_file_including_directory_relative,
@@ -41,12 +42,19 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
41
42
  test_read_file_is_directory,
42
43
  test_read_file_nonsense_name,
43
44
  test_read_file_limit,
44
- test_write_file_zero_length,
45
- test_write_file_space,
46
- test_write_file_is_directory,
47
- test_write_file_without_permissions,
48
- test_write_file_exists,
45
+ test_write_text_file_zero_length,
46
+ test_write_text_file_space,
47
+ test_write_text_file_is_directory,
48
+ test_write_text_file_without_permissions,
49
+ test_write_text_file_exists,
50
+ test_write_binary_file_zero_length,
51
+ test_write_binary_file_space,
52
+ test_write_binary_file_is_directory,
53
+ test_write_binary_file_without_permissions,
54
+ test_write_binary_file_exists,
49
55
  test_exec_output,
56
+ test_exec_stderr,
57
+ test_exec_returncode,
50
58
  test_exec_timeout,
51
59
  test_exec_permission_error,
52
60
  test_exec_as_user,
@@ -100,6 +108,17 @@ async def test_read_and_write_file_binary(sandbox_env: SandboxEnvironment) -> No
100
108
  await _cleanup_file(sandbox_env, file_name)
101
109
 
102
110
 
111
+ async def test_read_and_write_large_file_binary(
112
+ sandbox_env: SandboxEnvironment,
113
+ ) -> None:
114
+ file_name = "test_read_and_write_large_file_binary.file"
115
+ long_bytes = b"\xc3" * 5_000_000
116
+ await sandbox_env.write_file(file_name, long_bytes)
117
+ written_file_bytes = await sandbox_env.read_file(file_name, text=False)
118
+ assert long_bytes == written_file_bytes
119
+ await _cleanup_file(sandbox_env, file_name)
120
+
121
+
103
122
  async def test_read_and_write_file_including_directory_absolute(
104
123
  sandbox_env: SandboxEnvironment,
105
124
  ) -> None:
@@ -176,7 +195,7 @@ async def test_read_file_limit(sandbox_env: SandboxEnvironment) -> None:
176
195
  await _cleanup_file(sandbox_env, file_name)
177
196
 
178
197
 
179
- async def test_write_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
198
+ async def test_write_text_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
180
199
  file_name = "zero_length_file.file"
181
200
  await sandbox_env.write_file(file_name, "")
182
201
  zero_length = await sandbox_env.read_file(file_name, text=True)
@@ -185,7 +204,7 @@ async def test_write_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
185
204
  await _cleanup_file(sandbox_env, file_name)
186
205
 
187
206
 
188
- async def test_write_file_space(sandbox_env: SandboxEnvironment) -> None:
207
+ async def test_write_text_file_space(sandbox_env: SandboxEnvironment) -> None:
189
208
  space = "to the moon"
190
209
  file_name = "file with space.file"
191
210
  await sandbox_env.write_file(file_name, space)
@@ -195,28 +214,28 @@ async def test_write_file_space(sandbox_env: SandboxEnvironment) -> None:
195
214
  await _cleanup_file(sandbox_env, file_name)
196
215
 
197
216
 
198
- async def test_write_file_is_directory(
217
+ async def test_write_text_file_is_directory(
199
218
  sandbox_env: SandboxEnvironment,
200
219
  ) -> None:
201
220
  # ensure /tmp/directory exists
202
221
  await sandbox_env.write_file(
203
- "/tmp/inspect_ai_test_write_file_is_directory/file", "unused content"
222
+ "/tmp/inspect_ai_test_write_text_file_is_directory/file", "unused content"
204
223
  )
205
224
  with Raises(IsADirectoryError) as e_info:
206
225
  await sandbox_env.write_file(
207
- "/tmp/inspect_ai_test_write_file_is_directory",
226
+ "/tmp/inspect_ai_test_write_text_file_is_directory",
208
227
  "content cannot go in a directory, dummy",
209
228
  )
210
229
  assert "directory" in str(e_info.value)
211
230
  await sandbox_env.exec(
212
- ["rm", "-rf", "/tmp/inspect_ai_test_write_file_is_directory"]
231
+ ["rm", "-rf", "/tmp/inspect_ai_test_write_text_file_is_directory"]
213
232
  )
214
233
 
215
234
 
216
- async def test_write_file_without_permissions(
235
+ async def test_write_text_file_without_permissions(
217
236
  sandbox_env: SandboxEnvironment,
218
237
  ) -> None:
219
- file_name = "test_write_file_without_permissions.file"
238
+ file_name = "test_write_text_file_without_permissions.file"
220
239
  await sandbox_env.write_file(file_name, "impervious #content")
221
240
  await sandbox_env.exec(["chmod", "-w", file_name])
222
241
  with Raises(PermissionError) as e_info:
@@ -226,7 +245,7 @@ async def test_write_file_without_permissions(
226
245
  await _cleanup_file(sandbox_env, file_name)
227
246
 
228
247
 
229
- async def test_write_file_exists(
248
+ async def test_write_text_file_exists(
230
249
  sandbox_env: SandboxEnvironment,
231
250
  ) -> None:
232
251
  file_name = "file_exists.file"
@@ -237,6 +256,67 @@ async def test_write_file_exists(
237
256
  await _cleanup_file(sandbox_env, file_name)
238
257
 
239
258
 
259
+ async def test_write_binary_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
260
+ file_name = "zero_length_file.file"
261
+ await sandbox_env.write_file(file_name, b"")
262
+ zero_length = await sandbox_env.read_file(file_name, text=False)
263
+ assert isinstance(zero_length, bytes)
264
+ assert zero_length == b""
265
+ await _cleanup_file(sandbox_env, file_name)
266
+
267
+
268
+ async def test_write_binary_file_space(sandbox_env: SandboxEnvironment) -> None:
269
+ binary_content = b"\xc3\x28"
270
+ file_name = "file with space.file"
271
+ await sandbox_env.write_file(file_name, binary_content)
272
+ file_with_space = await sandbox_env.read_file(file_name, text=False)
273
+ assert isinstance(file_with_space, bytes)
274
+ assert file_with_space == binary_content
275
+ await _cleanup_file(sandbox_env, file_name)
276
+
277
+
278
+ async def test_write_binary_file_is_directory(
279
+ sandbox_env: SandboxEnvironment,
280
+ ) -> None:
281
+ # ensure /tmp/directory exists
282
+ await sandbox_env.write_file(
283
+ "/tmp/inspect_ai_test_write_binary_file_is_directory/file", "unused content"
284
+ )
285
+ with Raises(IsADirectoryError) as e_info:
286
+ await sandbox_env.write_file(
287
+ "/tmp/inspect_ai_test_write_binary_file_is_directory",
288
+ b"\xc3\x28",
289
+ )
290
+ assert "directory" in str(e_info.value)
291
+ await sandbox_env.exec(
292
+ ["rm", "-rf", "/tmp/inspect_ai_test_write_binary_file_is_directory"]
293
+ )
294
+
295
+
296
+ async def test_write_binary_file_without_permissions(
297
+ sandbox_env: SandboxEnvironment,
298
+ ) -> None:
299
+ file_name = "test_write_binary_file_without_permissions.file"
300
+ await sandbox_env.write_file(file_name, "impervious #content")
301
+ await sandbox_env.exec(["chmod", "-w", file_name])
302
+ with Raises(PermissionError) as e_info:
303
+ await sandbox_env.write_file(file_name, b"\xc3\x28")
304
+ assert file_name in str(e_info.value)
305
+ await sandbox_env.exec(["chmod", "+w", file_name])
306
+ await _cleanup_file(sandbox_env, file_name)
307
+
308
+
309
+ async def test_write_binary_file_exists(
310
+ sandbox_env: SandboxEnvironment,
311
+ ) -> None:
312
+ file_name = "file_exists.file"
313
+ await sandbox_env.write_file(file_name, b"\xc3\x28")
314
+ await sandbox_env.write_file(file_name, b"\xc3\x29")
315
+ altered_content = await sandbox_env.read_file(file_name, text=False)
316
+ assert altered_content == b"\xc3\x29"
317
+ await _cleanup_file(sandbox_env, file_name)
318
+
319
+
240
320
  async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
241
321
  exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; echo bar"])
242
322
  expected = "foo\nbar\n"
@@ -246,9 +326,19 @@ async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
246
326
  )
247
327
 
248
328
 
329
+ async def test_exec_stderr(sandbox_env: SandboxEnvironment) -> None:
330
+ exec_result = await sandbox_env.exec(["sh", "-c", "echo boof; echo baz >&2"])
331
+ assert exec_result.stderr == "baz\n"
332
+
333
+
334
+ async def test_exec_returncode(sandbox_env: SandboxEnvironment) -> None:
335
+ exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; exit 70"])
336
+ assert exec_result.returncode == 70
337
+
338
+
249
339
  async def test_exec_timeout(sandbox_env: SandboxEnvironment) -> None:
250
340
  with Raises(TimeoutError):
251
- await sandbox_env.exec(["sleep", "2"], timeout=1)
341
+ await sandbox_env.exec(["sleep", "4"], timeout=2)
252
342
 
253
343
 
254
344
  async def test_exec_permission_error(sandbox_env: SandboxEnvironment) -> None:
@@ -259,10 +349,28 @@ async def test_exec_permission_error(sandbox_env: SandboxEnvironment) -> None:
259
349
 
260
350
  async def test_exec_as_user(sandbox_env: SandboxEnvironment) -> None:
261
351
  username = "inspect-ai-test-exec-as-user"
352
+
353
+ # Neither adduser nor useradd are part of POSIX, so we need some brittle logic here
354
+ adduser_help_exec_result = await sandbox_env.exec(["adduser", "--help"])
355
+ adduser_help_text = (
356
+ adduser_help_exec_result.stdout + adduser_help_exec_result.stderr
357
+ )
358
+
359
+ if "BusyBox" in adduser_help_text:
360
+ adduser_command = ["adduser", "-D", username]
361
+ else:
362
+ adduser_command = [
363
+ "adduser",
364
+ "--comment",
365
+ "self_check.py",
366
+ "--disabled-password",
367
+ username,
368
+ ]
369
+
262
370
  try:
263
371
  # Create a new user
264
372
  add_user_result = await sandbox_env.exec(
265
- ["adduser", "--comment", "self_check.py", "--disabled-password", username],
373
+ adduser_command,
266
374
  user="root",
267
375
  timeout=10, # in one case adduser decided to ask for input which caused the test to hang indefinitely
268
376
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: inspect_ai
3
- Version: 0.3.61
3
+ Version: 0.3.63
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Safety Institute
6
6
  License: MIT License
@@ -63,6 +63,7 @@ Requires-Dist: mypy; extra == "dev"
63
63
  Requires-Dist: nbformat; extra == "dev"
64
64
  Requires-Dist: openai; extra == "dev"
65
65
  Requires-Dist: pre-commit; extra == "dev"
66
+ Requires-Dist: pylint; extra == "dev"
66
67
  Requires-Dist: pytest; extra == "dev"
67
68
  Requires-Dist: pytest-asyncio; extra == "dev"
68
69
  Requires-Dist: pytest-cov; extra == "dev"