inspect-ai 0.3.61__py3-none-any.whl → 0.3.63__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (430) hide show
  1. inspect_ai/_cli/eval.py +13 -0
  2. inspect_ai/_cli/main.py +1 -1
  3. inspect_ai/_cli/trace.py +8 -0
  4. inspect_ai/_cli/view.py +4 -0
  5. inspect_ai/_display/core/active.py +2 -3
  6. inspect_ai/_display/textual/widgets/transcript.py +15 -9
  7. inspect_ai/_eval/eval.py +4 -4
  8. inspect_ai/_eval/evalset.py +6 -6
  9. inspect_ai/_eval/task/error.py +10 -14
  10. inspect_ai/_eval/task/run.py +13 -8
  11. inspect_ai/_util/hash.py +1 -1
  12. inspect_ai/_util/transcript.py +11 -0
  13. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  14. inspect_ai/_view/www/.vscode/settings.json +8 -0
  15. inspect_ai/_view/www/App.css +92 -29
  16. inspect_ai/_view/www/dist/assets/index.css +16636 -14674
  17. inspect_ai/_view/www/dist/assets/index.js +43585 -36122
  18. inspect_ai/_view/www/dist/index.html +1 -1
  19. inspect_ai/_view/www/index.html +2 -2
  20. inspect_ai/_view/www/log-schema.json +36 -19
  21. inspect_ai/_view/www/package.json +22 -4
  22. inspect_ai/_view/www/postcss.config.cjs +8 -9
  23. inspect_ai/_view/www/src/{App.mjs → App.tsx} +355 -365
  24. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  25. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  26. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  27. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  28. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  29. inspect_ai/_view/www/src/api/index.ts +4 -4
  30. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  31. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  32. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  33. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  34. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  35. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  36. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  37. inspect_ai/_view/www/src/components/Card.css +60 -0
  38. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  39. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  40. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  41. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  42. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  43. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  45. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  46. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  47. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  48. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  49. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  50. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  51. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  52. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  53. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  54. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  55. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  56. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  57. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  58. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  59. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  60. inspect_ai/_view/www/src/components/LargeModal.tsx +199 -0
  61. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  62. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  63. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  64. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  65. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  66. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  67. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  68. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  69. inspect_ai/_view/www/src/components/NavPills.tsx +99 -0
  70. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  71. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  72. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  73. inspect_ai/_view/www/src/components/TabSet.tsx +200 -0
  74. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  75. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  76. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  77. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  78. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  79. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -7
  80. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  81. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  82. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  83. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  84. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  85. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  86. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  87. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  88. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  89. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  90. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  91. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  92. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  93. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  94. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +309 -0
  95. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  96. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  97. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  98. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  99. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  100. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  101. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  102. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  103. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +326 -0
  104. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  105. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +175 -0
  106. inspect_ai/_view/www/src/samples/SamplesTools.tsx +60 -0
  107. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  108. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  109. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  110. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  111. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  112. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +46 -0
  113. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  114. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  115. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  116. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +143 -0
  117. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  118. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +131 -0
  119. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  120. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +145 -0
  121. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  122. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +86 -0
  123. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  124. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  126. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  127. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +107 -0
  128. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +363 -0
  129. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  130. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  131. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  132. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  133. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  134. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  135. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  136. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  137. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  138. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  139. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  140. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  141. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  142. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  143. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  144. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  145. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  146. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  147. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  148. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  149. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  150. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  151. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  152. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  153. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  154. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  155. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  156. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  157. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  158. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +173 -0
  159. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  160. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +182 -0
  161. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  162. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  163. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  164. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  165. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  166. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  167. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  168. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  169. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  170. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  171. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  172. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  173. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  174. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  175. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  176. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  177. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  179. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  180. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  181. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  182. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +108 -0
  183. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  184. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  185. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  186. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  187. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  188. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  189. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  190. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  191. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +91 -0
  192. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  193. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  194. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  196. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  197. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +38 -0
  198. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  199. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +190 -0
  200. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  201. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  202. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  203. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  204. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  205. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +274 -0
  206. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  207. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  208. inspect_ai/_view/www/src/samples/transcript/state/{StateEventView.mjs → StateEventView.tsx} +148 -110
  209. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  210. inspect_ai/_view/www/src/types/log.d.ts +7 -4
  211. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  212. inspect_ai/_view/www/src/types.ts +71 -0
  213. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +22 -0
  214. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  215. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +95 -0
  216. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  217. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  218. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  219. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  220. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  221. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  222. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  223. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  224. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  225. inspect_ai/_view/www/src/utils/format.ts +194 -0
  226. inspect_ai/_view/www/src/utils/git.ts +7 -0
  227. inspect_ai/_view/www/src/utils/html.ts +6 -0
  228. inspect_ai/_view/www/src/utils/http.ts +14 -0
  229. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  230. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  231. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  232. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  233. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  234. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  235. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  236. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  237. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +160 -0
  238. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  239. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  240. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  241. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  242. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  243. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +113 -0
  244. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +67 -0
  245. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +156 -0
  246. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  247. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +222 -0
  248. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  249. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  250. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  251. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  252. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  253. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  254. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  255. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  256. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  257. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  258. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  259. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +41 -0
  260. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  261. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +61 -0
  262. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +80 -0
  263. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  264. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  265. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  266. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  267. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  268. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  269. inspect_ai/_view/www/tsconfig.json +23 -9
  270. inspect_ai/_view/www/vite.config.js +8 -17
  271. inspect_ai/_view/www/yarn.lock +627 -556
  272. inspect_ai/dataset/_dataset.py +36 -0
  273. inspect_ai/dataset/_sources/csv.py +8 -0
  274. inspect_ai/dataset/_sources/file.py +4 -0
  275. inspect_ai/dataset/_sources/hf.py +11 -1
  276. inspect_ai/dataset/_sources/json.py +8 -0
  277. inspect_ai/log/_log.py +3 -6
  278. inspect_ai/log/_message.py +1 -1
  279. inspect_ai/log/_recorders/eval.py +1 -1
  280. inspect_ai/log/_recorders/json.py +5 -7
  281. inspect_ai/model/_call_tools.py +2 -1
  282. inspect_ai/model/_chat_message.py +27 -0
  283. inspect_ai/model/_conversation.py +10 -3
  284. inspect_ai/model/_generate_config.py +6 -0
  285. inspect_ai/model/_model.py +74 -0
  286. inspect_ai/model/_openai.py +33 -1
  287. inspect_ai/model/_providers/anthropic.py +12 -0
  288. inspect_ai/model/_providers/groq.py +4 -0
  289. inspect_ai/model/_providers/openai.py +21 -9
  290. inspect_ai/model/_providers/openai_o1.py +3 -5
  291. inspect_ai/model/_providers/openrouter.py +86 -0
  292. inspect_ai/model/_providers/providers.py +12 -1
  293. inspect_ai/model/_reasoning.py +17 -0
  294. inspect_ai/scorer/_answer.py +7 -7
  295. inspect_ai/scorer/_classification.py +34 -18
  296. inspect_ai/scorer/_common.py +2 -8
  297. inspect_ai/solver/_basic_agent.py +19 -9
  298. inspect_ai/solver/_multiple_choice.py +24 -9
  299. inspect_ai/tool/__init__.py +2 -0
  300. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +2 -5
  301. inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +4 -0
  302. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  303. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +3 -0
  304. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +61 -0
  305. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +10 -0
  306. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  307. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  308. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  309. inspect_ai/tool/_tools/_execute.py +8 -2
  310. inspect_ai/tool/beta.py +3 -0
  311. inspect_ai/util/_sandbox/docker/docker.py +32 -85
  312. inspect_ai/util/_sandbox/self_check.py +124 -16
  313. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/METADATA +2 -1
  314. inspect_ai-0.3.63.dist-info/RECORD +618 -0
  315. inspect_ai/_view/www/src/Register.mjs +0 -3
  316. inspect_ai/_view/www/src/Types.mjs +0 -38
  317. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  318. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  319. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  320. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  321. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  322. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  323. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  324. inspect_ai/_view/www/src/components/ChatView.mjs +0 -418
  325. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  326. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  327. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  328. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  329. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  330. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  331. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  332. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  333. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  334. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  335. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  336. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  337. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  338. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  339. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  340. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  341. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  342. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  343. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  344. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  345. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  346. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  347. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  348. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  349. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  350. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  351. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  352. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  353. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  354. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  355. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  356. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  357. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  358. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  359. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  360. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  361. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  362. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  363. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  364. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  365. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  366. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  367. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  368. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  369. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  370. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  371. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  372. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  373. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  374. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  375. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  376. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  377. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  378. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  379. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  380. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  381. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  382. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  383. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  384. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  385. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  386. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  387. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  388. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  389. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  390. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  391. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  392. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  393. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  394. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  395. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  396. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  397. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  398. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  399. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  400. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  401. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  402. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  403. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  404. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  405. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  406. inspect_ai/tool/beta/__init__.py +0 -5
  407. inspect_ai-0.3.61.dist-info/RECORD +0 -476
  408. /inspect_ai/{tool/beta/_computer/_resources/tool/__init__.py → _view/www/src/components/MorePopOver.css} +0 -0
  409. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  410. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _view/www/src/workspace/tabs/InfoTab.module.css} +0 -0
  411. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  412. /inspect_ai/tool/{beta → _tools}/_computer/_common.py +0 -0
  413. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  414. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  415. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  416. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  417. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  418. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  419. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  420. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  421. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  422. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  423. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  424. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  425. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  426. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  427. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/LICENSE +0 -0
  428. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/WHEEL +0 -0
  429. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/entry_points.txt +0 -0
  430. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/top_level.txt +0 -0
inspect_ai/_cli/eval.py CHANGED
@@ -385,6 +385,14 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
385
385
  help="Constrains effort on reasoning for reasoning models. Open AI o1 models only.",
386
386
  envvar="INSPECT_EVAL_REASONING_EFFORT",
387
387
  )
388
+ @click.option(
389
+ "--reasoning-history/--no-reasoning-history",
390
+ type=bool,
391
+ is_flag=True,
392
+ default=True,
393
+ help="Include reasoning in chat message history sent to generate.",
394
+ envvar="INSPECT_EVAL_REASONING_HISTORY",
395
+ )
388
396
  @click.option(
389
397
  "--log-format",
390
398
  type=click.Choice(["eval", "json"], case_sensitive=False),
@@ -444,6 +452,7 @@ def eval_command(
444
452
  max_tool_output: int | None,
445
453
  cache_prompt: str | None,
446
454
  reasoning_effort: str | None,
455
+ reasoning_history: bool | None,
447
456
  message_limit: int | None,
448
457
  token_limit: int | None,
449
458
  time_limit: int | None,
@@ -603,6 +612,7 @@ def eval_set_command(
603
612
  max_tool_output: int | None,
604
613
  cache_prompt: str | None,
605
614
  reasoning_effort: str | None,
615
+ reasoning_history: bool | None,
606
616
  message_limit: int | None,
607
617
  token_limit: int | None,
608
618
  time_limit: int | None,
@@ -841,6 +851,9 @@ def config_from_locals(locals: dict[str, Any]) -> GenerateConfigArgs:
841
851
  if key == "internal_tools":
842
852
  if value is not False:
843
853
  value = None
854
+ if key == "reasoning_history":
855
+ if value is not False:
856
+ value = None
844
857
  config[key] = value # type: ignore
845
858
  return config
846
859
 
inspect_ai/_cli/main.py CHANGED
@@ -53,7 +53,7 @@ inspect.add_command(trace_command)
53
53
  def main() -> None:
54
54
  set_exception_hook()
55
55
  init_dotenv()
56
- inspect(auto_envvar_prefix="INSPECT")
56
+ inspect(auto_envvar_prefix="INSPECT") # pylint: disable=no-value-for-parameter
57
57
 
58
58
 
59
59
  if __name__ == "__main__":
inspect_ai/_cli/trace.py CHANGED
@@ -109,11 +109,13 @@ def anomolies_command(trace_file: str | None, filter: str | None, all: bool) ->
109
109
  canceled_actions: dict[str, ActionTraceRecord] = {}
110
110
  error_actions: dict[str, ActionTraceRecord] = {}
111
111
  timeout_actions: dict[str, ActionTraceRecord] = {}
112
+ start_trace: ActionTraceRecord | None = None
112
113
 
113
114
  def action_started(trace: ActionTraceRecord) -> None:
114
115
  running_actions[trace.trace_id] = trace
115
116
 
116
117
  def action_completed(trace: ActionTraceRecord) -> ActionTraceRecord:
118
+ nonlocal start_trace
117
119
  start_trace = running_actions.get(trace.trace_id)
118
120
  if start_trace:
119
121
  del running_actions[trace.trace_id]
@@ -122,14 +124,20 @@ def anomolies_command(trace_file: str | None, filter: str | None, all: bool) ->
122
124
  raise RuntimeError(f"Expected {trace.trace_id} in action dictionary.")
123
125
 
124
126
  def action_failed(trace: ActionTraceRecord) -> None:
127
+ nonlocal start_trace
125
128
  if all:
129
+ assert start_trace
126
130
  error_actions[start_trace.trace_id] = trace
127
131
 
128
132
  def action_canceled(trace: ActionTraceRecord) -> None:
133
+ nonlocal start_trace
134
+ assert start_trace
129
135
  canceled_actions[start_trace.trace_id] = trace
130
136
 
131
137
  def action_timeout(trace: ActionTraceRecord) -> None:
138
+ nonlocal start_trace
132
139
  if all:
140
+ assert start_trace
133
141
  timeout_actions[start_trace.trace_id] = trace
134
142
 
135
143
  for trace in traces:
inspect_ai/_cli/view.py CHANGED
@@ -63,6 +63,10 @@ def start(
63
63
  INSPECT_VIEW_AUTHORIZATION_TOKEN = "INSPECT_VIEW_AUTHORIZATION_TOKEN"
64
64
  authorization = os.environ.get(INSPECT_VIEW_AUTHORIZATION_TOKEN, None)
65
65
  if authorization:
66
+ # this indicates we are in vscode -- we want to set the log level to HTTP
67
+ # in vscode, updated versions of the extension do this but we set it
68
+ # manually here as a temporary bridge for running against older versions
69
+ common["log_level"] = "HTTP"
66
70
  del os.environ[INSPECT_VIEW_AUTHORIZATION_TOKEN]
67
71
  os.unsetenv(INSPECT_VIEW_AUTHORIZATION_TOKEN)
68
72
 
@@ -10,6 +10,8 @@ from ..rich.display import RichDisplay
10
10
  from ..textual.display import TextualDisplay
11
11
  from .display import Display, TaskScreen
12
12
 
13
+ _active_display: Display | None = None
14
+
13
15
 
14
16
  def display() -> Display:
15
17
  global _active_display
@@ -28,9 +30,6 @@ def display() -> Display:
28
30
  return _active_display
29
31
 
30
32
 
31
- _active_display: Display | None = None
32
-
33
-
34
33
  def task_screen() -> TaskScreen:
35
34
  screen = _active_task_screen.get(None)
36
35
  if screen is None:
@@ -15,6 +15,7 @@ from inspect_ai._util.transcript import (
15
15
  set_transcript_markdown_options,
16
16
  transcript_function,
17
17
  transcript_markdown,
18
+ transcript_reasoning,
18
19
  transcript_separator,
19
20
  )
20
21
  from inspect_ai.log._samples import ActiveSample
@@ -33,7 +34,11 @@ from inspect_ai.log._transcript import (
33
34
  SubtaskEvent,
34
35
  ToolEvent,
35
36
  )
36
- from inspect_ai.model._chat_message import ChatMessage, ChatMessageUser
37
+ from inspect_ai.model._chat_message import (
38
+ ChatMessage,
39
+ ChatMessageAssistant,
40
+ ChatMessageUser,
41
+ )
37
42
  from inspect_ai.model._render import messages_preceding_assistant
38
43
  from inspect_ai.tool._tool import ToolResult
39
44
  from inspect_ai.tool._tool_transcript import transcript_tool_call
@@ -171,8 +176,8 @@ def render_model_event(event: ModelEvent) -> EventDisplay:
171
176
  # content
172
177
  content: list[RenderableType] = []
173
178
 
174
- def append_message(message: ChatMessage, text: str | None = None) -> None:
175
- content.extend(render_message(message, text))
179
+ def append_message(message: ChatMessage) -> None:
180
+ content.extend(render_message(message))
176
181
 
177
182
  # render preceding messages
178
183
  preceding = messages_preceding_assistant(event.input)
@@ -309,16 +314,17 @@ def render_as_json(json: Any) -> RenderableType:
309
314
  )
310
315
 
311
316
 
312
- def render_message(
313
- message: ChatMessage, text: str | None = None
314
- ) -> list[RenderableType]:
317
+ def render_message(message: ChatMessage) -> list[RenderableType]:
315
318
  content: list[RenderableType] = [
316
319
  Text(message.role.capitalize(), style="bold"),
317
320
  Text(),
318
321
  ]
319
- text = text or message.text
320
- if text:
321
- content.extend([transcript_markdown(text.strip(), escape=True)])
322
+
323
+ if isinstance(message, ChatMessageAssistant) and message.reasoning:
324
+ content.extend(transcript_reasoning(message.reasoning))
325
+
326
+ if message.text:
327
+ content.extend([transcript_markdown(message.text.strip(), escape=True)])
322
328
  return content
323
329
 
324
330
 
inspect_ai/_eval/eval.py CHANGED
@@ -200,6 +200,10 @@ def eval(
200
200
  )
201
201
 
202
202
 
203
+ # single call to eval_async at a time
204
+ _eval_async_running = False
205
+
206
+
203
207
  async def eval_async(
204
208
  tasks: Tasks,
205
209
  model: str | Model | list[str] | list[Model] | None = None,
@@ -461,10 +465,6 @@ async def eval_async(
461
465
  return logs
462
466
 
463
467
 
464
- # single call to eval_async at a time
465
- _eval_async_running = False
466
-
467
-
468
468
  def eval_retry(
469
469
  tasks: str | EvalLogInfo | EvalLog | list[str] | list[EvalLogInfo] | list[EvalLog],
470
470
  log_level: str | None = None,
@@ -43,6 +43,12 @@ from .task.task import PreviousTask, Task
43
43
  logger = logging.getLogger(__name__)
44
44
 
45
45
 
46
+ class Log(NamedTuple):
47
+ info: EvalLogInfo
48
+ header: EvalLog
49
+ task_identifier: str
50
+
51
+
46
52
  def eval_set(
47
53
  tasks: Tasks,
48
54
  log_dir: str,
@@ -452,12 +458,6 @@ def return_last_value(retry_state: RetryCallState) -> list[EvalLog]:
452
458
  return []
453
459
 
454
460
 
455
- class Log(NamedTuple):
456
- info: EvalLogInfo
457
- header: EvalLog
458
- task_identifier: str
459
-
460
-
461
461
  # list all eval logs
462
462
  def list_all_eval_logs(log_dir: str) -> list[Log]:
463
463
  log_files = list_eval_logs(log_dir)
@@ -8,28 +8,24 @@ class SampleErrorHandler:
8
8
  self.fail_on_error = True if fail_on_error is None else fail_on_error
9
9
  self.total_samples = float(total_samples)
10
10
 
11
- def __call__(self, ex: BaseException) -> EvalError:
11
+ def __call__(self, ex: BaseException) -> tuple[EvalError, BaseException | None]:
12
12
  # increment error count
13
13
  self.error_count += 1
14
14
 
15
15
  # create error (we may return it)
16
- def sample_error() -> EvalError:
17
- return eval_error(ex, type(ex), ex, ex.__traceback__)
16
+ def sample_error(
17
+ *, raise_error: bool
18
+ ) -> tuple[EvalError, BaseException | None]:
19
+ return eval_error(
20
+ ex, type(ex), ex, ex.__traceback__
21
+ ), ex if raise_error else None
18
22
 
19
23
  # check against limits
20
24
  if isinstance(self.fail_on_error, bool):
21
- if self.fail_on_error:
22
- raise ex
23
- else:
24
- return sample_error()
25
+ return sample_error(raise_error=self.fail_on_error)
25
26
  else:
26
27
  if self.fail_on_error < 1:
27
28
  max_errors = self.fail_on_error * self.total_samples
28
- if self.error_count >= max_errors:
29
- raise ex
30
- else:
31
- return sample_error()
32
- elif self.error_count >= self.fail_on_error:
33
- raise ex
29
+ return sample_error(raise_error=self.error_count >= max_errors)
34
30
  else:
35
- return sample_error()
31
+ return sample_error(raise_error=self.error_count >= self.fail_on_error)
@@ -496,7 +496,7 @@ async def task_run_sample(
496
496
  logger: TaskLogger | None,
497
497
  log_images: bool,
498
498
  sample_source: EvalSampleSource | None,
499
- sample_error: Callable[[BaseException], EvalError],
499
+ sample_error: SampleErrorHandler,
500
500
  sample_complete: Callable[[dict[str, SampleScore]], None],
501
501
  fails_on_error: bool,
502
502
  time_limit: int | None,
@@ -548,12 +548,12 @@ async def task_run_sample(
548
548
  )
549
549
 
550
550
  # helper to handle exceptions (will throw if we've exceeded the limit)
551
- def handle_error(ex: BaseException) -> EvalError:
551
+ def handle_error(ex: BaseException) -> tuple[EvalError, BaseException | None]:
552
552
  err = sample_error(ex)
553
553
  py_logger.warning(
554
554
  f"Sample error (id: {sample.id}, epoch: {state.epoch}): {exception_message(ex)})"
555
555
  )
556
- transcript()._event(ErrorEvent(error=err))
556
+ transcript()._event(ErrorEvent(error=err[0]))
557
557
  return err
558
558
 
559
559
  # solver loop
@@ -572,6 +572,7 @@ async def task_run_sample(
572
572
  ) as active,
573
573
  ):
574
574
  error: EvalError | None = None
575
+ raise_error: BaseException | None = None
575
576
  results: dict[str, SampleScore] = {}
576
577
  try:
577
578
  async with sandboxenv_cm:
@@ -640,7 +641,7 @@ async def task_run_sample(
640
641
  state = sample_state() or state
641
642
  case "error":
642
643
  # default error handling
643
- error = handle_error(ex)
644
+ error, raise_error = handle_error(ex)
644
645
 
645
646
  else:
646
647
  raise
@@ -660,7 +661,7 @@ async def task_run_sample(
660
661
  state.completed = True
661
662
 
662
663
  except BaseException as ex:
663
- error = handle_error(ex)
664
+ error, raise_error = handle_error(ex)
664
665
 
665
666
  # set timeout for scoring. if the original timeout was hit we still
666
667
  # want to provide opportunity for scoring, but we don't necessarily
@@ -710,6 +711,9 @@ async def task_run_sample(
710
711
  results[name] = SampleScore(
711
712
  score=score, sample_id=state.sample_id
712
713
  )
714
+ transcript()._event(
715
+ ScoreEvent(score=score, target=sample.target)
716
+ )
713
717
 
714
718
  # propagate results into scores
715
719
  state.scores = {k: v.score for k, v in results.items()}
@@ -737,11 +741,10 @@ async def task_run_sample(
737
741
  )
738
742
 
739
743
  # handle error (this will throw if we've exceeded the limit)
740
- error = handle_error(ex)
744
+ error, raise_error = handle_error(ex)
741
745
 
742
- # handle sandboxenv init errors
743
746
  except Exception as ex:
744
- error = handle_error(ex)
747
+ error, raise_error = handle_error(ex)
745
748
 
746
749
  # complete the sample
747
750
  progress(SAMPLE_TOTAL_PROGRESS_UNITS)
@@ -772,6 +775,8 @@ async def task_run_sample(
772
775
  if results is not None:
773
776
  sample_complete(results)
774
777
  return results
778
+ elif raise_error:
779
+ raise raise_error
775
780
  else:
776
781
  return None
777
782
 
inspect_ai/_util/hash.py CHANGED
@@ -3,7 +3,7 @@ import mmh3
3
3
 
4
4
  def mm3_hash(message: str) -> str:
5
5
  # Generate the 128-bit hash as two 64-bit integers
6
- h1, h2 = mmh3.hash64(message.encode("utf-8"))
6
+ h1, h2 = mmh3.hash64(message.encode("utf-8")) # pylint: disable=E0633
7
7
 
8
8
  # Convert to unsigned integers and then to hexadecimal
9
9
  return f"{h1 & 0xFFFFFFFFFFFFFFFF:016x}{h2 & 0xFFFFFFFFFFFFFFFF:016x}"
@@ -111,6 +111,17 @@ def transcript_panel(
111
111
  )
112
112
 
113
113
 
114
+ def transcript_reasoning(reasoning: str) -> list[RenderableType]:
115
+ content: list[RenderableType] = []
116
+ content.append(
117
+ transcript_markdown(
118
+ f"**<think>** \n{reasoning} \n**</think>**\n\n", escape=True
119
+ )
120
+ )
121
+ content.append(Text())
122
+ return content
123
+
124
+
114
125
  def transcript_separator(title: str, color: str) -> RenderableType:
115
126
  return Rule(title=title, style=f"{color} bold", align="center", end="\n\n")
116
127
 
@@ -0,0 +1,3 @@
1
+ {
2
+ "recommendations": ["esbenp.prettier-vscode", "dbaeumer.vscode-eslint"]
3
+ }
@@ -0,0 +1,8 @@
1
+ {
2
+ "editor.formatOnSave": true,
3
+ "editor.defaultFormatter": "esbenp.prettier-vscode",
4
+ "editor.codeActionsOnSave": {
5
+ "source.organizeImports": "explicit",
6
+ "source.fixAll": "explicit"
7
+ }
8
+ }
@@ -9,12 +9,27 @@
9
9
  --inspect-input-border: var(--bs-light-border-subtle);
10
10
  --inspect-diff-add-color: #dafbe1;
11
11
  --inspect-diff-remove-color: #ffebe9;
12
- --inspect-inactive-selection-background: var(--vscode-editor-inactiveSelectionBackground, #d9d9d9);
13
- --inspect-active-selection-background: var(--vscode-editor-selectionBackground, #d7d4f0);
12
+ --inspect-inactive-selection-background: var(
13
+ --vscode-editor-inactiveSelectionBackground,
14
+ #d9d9d9
15
+ );
16
+ --inspect-active-selection-background: var(
17
+ --vscode-editor-selectionBackground,
18
+ #d7d4f0
19
+ );
14
20
  --inspect-focus-border-color: #86b7fe;
15
21
  --inspect-focus-border-shadow: 0 0 0 0.25rem rgba(var(--bs-primary-rgb), 0.25);
16
22
  --inspect-focus-border-gray-color: #808080;
17
23
  --inspect-focus-border-gray-shadow: 0 0 0 0.25rem rgba(48, 48, 48, 0.25);
24
+
25
+ /* Inspect Font Sizes */
26
+ --inspect-font-size-title: 1.5rem;
27
+ --inspect-font-size-title-secondary: 1.3rem;
28
+ --inspect-font-size-larger: 1.1rem;
29
+ --inspect-font-size-large: 1rem;
30
+ --inspect-font-size-base: 0.9rem;
31
+ --inspect-font-size-small: 0.8rem;
32
+ --inspect-font-size-smaller: 0.8rem;
18
33
  }
19
34
 
20
35
  body:not([class^="vscode-"]) button {
@@ -47,6 +62,61 @@ body[class^="vscode-"] .app-main-grid {
47
62
  grid-template-rows: max-content max-content 1fr;
48
63
  }
49
64
 
65
+ /* Inspect Text Styles */
66
+ .text-style-label {
67
+ text-transform: uppercase;
68
+ }
69
+
70
+ .text-style-secondary {
71
+ color: var(--bs-secondary);
72
+ }
73
+
74
+ .text-style-tertiary {
75
+ color: var(--bs-tertiary-color);
76
+ }
77
+
78
+ /* Inspect Font Size Styles */
79
+ .text-size-title {
80
+ font-size: var(--inspect-font-size-title);
81
+ }
82
+
83
+ .text-size-title-secondary {
84
+ font-size: var(--inspect-font-size-title-secondary);
85
+ }
86
+
87
+ .text-size-larger {
88
+ font-size: var(--inspect-font-size-larger);
89
+ }
90
+
91
+ .text-size-large {
92
+ font-size: var(--inspect-font-size-large);
93
+ }
94
+
95
+ .text-size-base {
96
+ font-size: var(--inspect-font-size-base);
97
+ }
98
+
99
+ .text-size-small {
100
+ font-size: var(--inspect-font-size-small);
101
+ }
102
+
103
+ .text-size-smaller {
104
+ font-size: var(--inspect-font-size-smaller);
105
+ }
106
+
107
+ .text-truncate {
108
+ white-space: nowrap;
109
+ text-overflow: ellipsis;
110
+ overflow: hidden;
111
+ }
112
+
113
+ .three-line-clamp {
114
+ display: -webkit-box;
115
+ -webkit-line-clamp: 3;
116
+ -webkit-box-orient: vertical;
117
+ overflow: hidden;
118
+ }
119
+
50
120
  body[class^="vscode-"] {
51
121
  --bs-border-radius: 0;
52
122
  --bs-border-radius-lg: 0;
@@ -87,7 +157,7 @@ html.vscode {
87
157
 
88
158
  html.vscode .sample-input {
89
159
  line-height: 1.3em;
90
- -webkit-line-clamp: 4 !important
160
+ -webkit-line-clamp: 4 !important;
91
161
  }
92
162
 
93
163
  body[class^="vscode-"] .modal-backdrop {
@@ -276,7 +346,7 @@ body {
276
346
  }
277
347
 
278
348
  @media (max-width: 575px) {
279
- .tab-tools select {
349
+ .tab-tools select {
280
350
  width: 50px;
281
351
  }
282
352
  }
@@ -312,12 +382,6 @@ body {
312
382
  font-size: 1.5em;
313
383
  }
314
384
 
315
- .sidebar {
316
- --bs-offcanvas-width: var(--sidebar-width);
317
- width: var(--sidebar-width);
318
- overflow-y: auto;
319
- }
320
-
321
385
  .nav-link.active {
322
386
  border-bottom-width: 0 !important;
323
387
  }
@@ -644,7 +708,7 @@ table.table.table-sm td {
644
708
 
645
709
  .tab-tools .btn {
646
710
  font-size: 0.7rem;
647
- padding: 0.4em 0.8em;
711
+ padding: 0.2em 0.8em;
648
712
  }
649
713
 
650
714
  .tab-tools {
@@ -724,7 +788,7 @@ table.table.table-sm td {
724
788
  }
725
789
 
726
790
  @keyframes moveLeftToRight {
727
- from {
791
+ from {
728
792
  margin-left: 0;
729
793
  }
730
794
  to {
@@ -760,7 +824,6 @@ pre[class*="language-"].tool-output {
760
824
 
761
825
  /* lightbox styles */
762
826
 
763
-
764
827
  .lightbox-overlay .close-button,
765
828
  .lightbox-overlay .nav-button {
766
829
  /* Hide by default */
@@ -868,38 +931,38 @@ ul.jsondiffpatch-delta {
868
931
  vertical-align: top;
869
932
  }
870
933
  .jsondiffpatch-property-name:after {
871
- content: ': ';
934
+ content: ": ";
872
935
  }
873
936
  .jsondiffpatch-child-node-type-array > .jsondiffpatch-property-name:after {
874
- content: ': [';
937
+ content: ": [";
875
938
  }
876
939
  .jsondiffpatch-child-node-type-array:after {
877
- content: '],';
940
+ content: "],";
878
941
  }
879
942
  div.jsondiffpatch-child-node-type-array:before {
880
- content: '[';
943
+ content: "[";
881
944
  }
882
945
  div.jsondiffpatch-child-node-type-array:after {
883
- content: ']';
946
+ content: "]";
884
947
  }
885
948
  .jsondiffpatch-child-node-type-object > .jsondiffpatch-property-name:after {
886
- content: ': {';
949
+ content: ": {";
887
950
  }
888
951
  .jsondiffpatch-child-node-type-object:after {
889
- content: '},';
952
+ content: "},";
890
953
  }
891
954
  div.jsondiffpatch-child-node-type-object:before {
892
- content: '{';
955
+ content: "{";
893
956
  }
894
957
  div.jsondiffpatch-child-node-type-object:after {
895
- content: '}';
958
+ content: "}";
896
959
  }
897
960
  .jsondiffpatch-value pre:after {
898
- content: ',';
961
+ content: ",";
899
962
  }
900
963
  li:last-child > .jsondiffpatch-value pre:after,
901
964
  .jsondiffpatch-modified > .jsondiffpatch-left-value pre:after {
902
- content: '';
965
+ content: "";
903
966
  }
904
967
  .jsondiffpatch-modified .jsondiffpatch-value {
905
968
  display: inline-block;
@@ -916,7 +979,7 @@ li:last-child > .jsondiffpatch-value pre:after,
916
979
  color: #888;
917
980
  }
918
981
  .jsondiffpatch-moved .jsondiffpatch-moved-destination:before {
919
- content: ' => ';
982
+ content: " => ";
920
983
  }
921
984
  ul.jsondiffpatch-textdiff {
922
985
  padding: 0;
@@ -930,7 +993,7 @@ ul.jsondiffpatch-textdiff {
930
993
  display: inline-block;
931
994
  }
932
995
  .jsondiffpatch-textdiff-line-number:after {
933
- content: ',';
996
+ content: ",";
934
997
  }
935
998
  .jsondiffpatch-error {
936
999
  background: red;
@@ -976,14 +1039,14 @@ ul.jsondiffpatch-textdiff {
976
1039
  padding: 1em;
977
1040
  margin: 0.5em 0;
978
1041
  overflow: auto;
979
- border: 0.3em solid #7a6651;
1042
+ /* border: 0.3em solid #7a6651; */
980
1043
  border-radius: 0.5em;
981
1044
  box-shadow: 1px 1px 0.5em #000 inset;
982
1045
  }
983
1046
  .vscode-dark :not(pre) > code[class*="language-"] {
984
1047
  padding: 0.15em 0.2em 0.05em;
985
1048
  border-radius: 0.3em;
986
- border: 0.13em solid #7a6651;
1049
+ /* border: 0.13em solid #7a6651; */
987
1050
  box-shadow: 1px 1px 0.3em -0.1em #000 inset;
988
1051
  white-space: normal;
989
1052
  }
@@ -1045,4 +1108,4 @@ ul.jsondiffpatch-textdiff {
1045
1108
  .vscode-dark .token.deleted {
1046
1109
  color: red;
1047
1110
  }
1048
- /* END PrismJS */
1111
+ /* END PrismJS */