inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (518) hide show
  1. inspect_ai/_cli/cache.py +8 -7
  2. inspect_ai/_cli/common.py +0 -12
  3. inspect_ai/_cli/eval.py +32 -4
  4. inspect_ai/_cli/info.py +1 -0
  5. inspect_ai/_cli/list.py +1 -1
  6. inspect_ai/_cli/log.py +2 -0
  7. inspect_ai/_cli/main.py +1 -1
  8. inspect_ai/_cli/sandbox.py +4 -1
  9. inspect_ai/_cli/score.py +181 -32
  10. inspect_ai/_cli/trace.py +10 -0
  11. inspect_ai/_cli/view.py +4 -2
  12. inspect_ai/_display/core/active.py +2 -3
  13. inspect_ai/_display/core/config.py +7 -1
  14. inspect_ai/_display/textual/widgets/samples.py +4 -3
  15. inspect_ai/_display/textual/widgets/sandbox.py +6 -0
  16. inspect_ai/_eval/eval.py +104 -101
  17. inspect_ai/_eval/evalset.py +75 -75
  18. inspect_ai/_eval/loader.py +122 -12
  19. inspect_ai/_eval/registry.py +1 -1
  20. inspect_ai/_eval/run.py +14 -0
  21. inspect_ai/_eval/score.py +125 -36
  22. inspect_ai/_eval/task/log.py +105 -4
  23. inspect_ai/_eval/task/results.py +92 -38
  24. inspect_ai/_eval/task/run.py +9 -2
  25. inspect_ai/_eval/task/sandbox.py +35 -2
  26. inspect_ai/_eval/task/task.py +49 -46
  27. inspect_ai/_util/constants.py +1 -1
  28. inspect_ai/_util/content.py +8 -0
  29. inspect_ai/_util/error.py +2 -0
  30. inspect_ai/_util/file.py +15 -1
  31. inspect_ai/_util/hash.py +1 -1
  32. inspect_ai/_util/logger.py +4 -2
  33. inspect_ai/_util/registry.py +7 -1
  34. inspect_ai/_view/view.py +1 -2
  35. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  36. inspect_ai/_view/www/.vscode/settings.json +8 -0
  37. inspect_ai/_view/www/App.css +97 -29
  38. inspect_ai/_view/www/README.md +1 -1
  39. inspect_ai/_view/www/dist/assets/index.css +16663 -14674
  40. inspect_ai/_view/www/dist/assets/index.js +58808 -51348
  41. inspect_ai/_view/www/dist/index.html +1 -1
  42. inspect_ai/_view/www/index.html +2 -2
  43. inspect_ai/_view/www/log-schema.json +87 -73
  44. inspect_ai/_view/www/package.json +22 -4
  45. inspect_ai/_view/www/postcss.config.cjs +8 -9
  46. inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
  47. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  48. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  49. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  50. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  51. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  52. inspect_ai/_view/www/src/api/index.ts +4 -4
  53. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  54. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  55. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  56. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  57. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  58. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  59. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  60. inspect_ai/_view/www/src/components/Card.css +60 -0
  61. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  62. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  63. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  64. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  65. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  66. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  67. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  68. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  69. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  70. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  71. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  72. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  73. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  74. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  75. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  76. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  77. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  78. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  79. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  80. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  81. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  82. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  83. inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
  84. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  85. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  86. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  87. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  88. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  89. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  90. inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
  91. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  92. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  93. inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
  94. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  95. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  96. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  97. inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
  98. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  99. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  100. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  101. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  102. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  103. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
  104. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  105. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  106. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  107. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  108. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  109. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  110. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  111. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  112. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  113. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  114. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  115. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  116. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  117. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  118. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
  119. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  120. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  121. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  122. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  123. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  124. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  126. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  127. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
  128. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  129. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
  130. inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
  131. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  132. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  133. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  134. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  135. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  136. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
  137. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  138. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  139. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  140. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
  141. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  142. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
  143. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  144. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
  145. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  146. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
  147. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  148. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
  149. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  150. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  151. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
  152. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
  153. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  154. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  155. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  156. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  157. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  158. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  159. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  160. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  161. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  162. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  163. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  164. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  165. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  166. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  167. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  168. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  169. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  170. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  171. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  172. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  173. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  174. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  175. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  176. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  177. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  179. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  180. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  181. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  182. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
  183. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  184. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
  185. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  186. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  187. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  188. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  189. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  190. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  191. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  192. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  193. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  194. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  196. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  197. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  198. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  199. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  200. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  201. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  202. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  203. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  204. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  205. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  206. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
  207. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  208. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  209. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  210. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  211. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  212. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  213. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  214. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  215. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
  216. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  217. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  218. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  219. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  220. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  221. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
  222. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  223. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
  224. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  225. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  226. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  227. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  228. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  229. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
  230. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  231. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  232. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
  233. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  234. inspect_ai/_view/www/src/types/log.d.ts +108 -19
  235. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  236. inspect_ai/_view/www/src/types.ts +71 -0
  237. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
  238. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  239. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
  240. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  241. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  242. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  243. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  244. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  245. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  246. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  247. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  248. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  249. inspect_ai/_view/www/src/utils/format.ts +194 -0
  250. inspect_ai/_view/www/src/utils/git.ts +7 -0
  251. inspect_ai/_view/www/src/utils/html.ts +6 -0
  252. inspect_ai/_view/www/src/utils/http.ts +14 -0
  253. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  254. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  255. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  256. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  257. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  258. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  259. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  260. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  261. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
  262. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  263. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  264. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  265. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  266. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  267. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
  268. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
  269. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
  270. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  271. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
  272. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  273. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  274. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  275. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  276. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  277. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  278. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  279. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  280. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  281. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  282. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  283. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
  284. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  285. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
  286. inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
  287. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
  288. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  289. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  290. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  291. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  292. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  293. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  294. inspect_ai/_view/www/src/workspace/utils.ts +34 -0
  295. inspect_ai/_view/www/tsconfig.json +23 -9
  296. inspect_ai/_view/www/vite.config.js +8 -17
  297. inspect_ai/_view/www/yarn.lock +627 -556
  298. inspect_ai/approval/_approval.py +2 -0
  299. inspect_ai/approval/_approver.py +4 -4
  300. inspect_ai/approval/_auto.py +1 -1
  301. inspect_ai/approval/_human/approver.py +3 -0
  302. inspect_ai/approval/_policy.py +5 -0
  303. inspect_ai/approval/_registry.py +2 -2
  304. inspect_ai/dataset/_dataset.py +64 -37
  305. inspect_ai/dataset/_sources/__init__.py +0 -0
  306. inspect_ai/dataset/_sources/csv.py +20 -12
  307. inspect_ai/dataset/_sources/file.py +4 -0
  308. inspect_ai/dataset/_sources/hf.py +39 -29
  309. inspect_ai/dataset/_sources/json.py +17 -9
  310. inspect_ai/log/__init__.py +2 -0
  311. inspect_ai/log/_convert.py +3 -3
  312. inspect_ai/log/_file.py +24 -9
  313. inspect_ai/log/_log.py +101 -13
  314. inspect_ai/log/_message.py +4 -2
  315. inspect_ai/log/_recorders/file.py +4 -0
  316. inspect_ai/log/_recorders/json.py +5 -7
  317. inspect_ai/log/_recorders/recorder.py +3 -0
  318. inspect_ai/log/_transcript.py +19 -8
  319. inspect_ai/model/__init__.py +2 -0
  320. inspect_ai/model/_cache.py +39 -21
  321. inspect_ai/model/_call_tools.py +4 -3
  322. inspect_ai/model/_chat_message.py +14 -4
  323. inspect_ai/model/_generate_config.py +1 -1
  324. inspect_ai/model/_model.py +31 -24
  325. inspect_ai/model/_model_output.py +14 -1
  326. inspect_ai/model/_openai.py +10 -18
  327. inspect_ai/model/_providers/anthropic.py +3 -3
  328. inspect_ai/model/_providers/google.py +9 -5
  329. inspect_ai/model/_providers/openai.py +5 -9
  330. inspect_ai/model/_providers/openai_o1.py +3 -5
  331. inspect_ai/model/_providers/openrouter.py +86 -0
  332. inspect_ai/model/_providers/providers.py +11 -0
  333. inspect_ai/scorer/__init__.py +6 -1
  334. inspect_ai/scorer/_answer.py +7 -7
  335. inspect_ai/scorer/_classification.py +38 -18
  336. inspect_ai/scorer/_common.py +2 -8
  337. inspect_ai/scorer/_match.py +4 -5
  338. inspect_ai/scorer/_metric.py +87 -28
  339. inspect_ai/scorer/_metrics/__init__.py +3 -3
  340. inspect_ai/scorer/_metrics/accuracy.py +8 -10
  341. inspect_ai/scorer/_metrics/mean.py +3 -17
  342. inspect_ai/scorer/_metrics/std.py +111 -30
  343. inspect_ai/scorer/_model.py +12 -12
  344. inspect_ai/scorer/_pattern.py +3 -3
  345. inspect_ai/scorer/_reducer/reducer.py +36 -21
  346. inspect_ai/scorer/_reducer/registry.py +2 -2
  347. inspect_ai/scorer/_reducer/types.py +7 -1
  348. inspect_ai/scorer/_score.py +11 -1
  349. inspect_ai/scorer/_scorer.py +110 -16
  350. inspect_ai/solver/__init__.py +1 -1
  351. inspect_ai/solver/_basic_agent.py +19 -22
  352. inspect_ai/solver/_bridge/__init__.py +0 -3
  353. inspect_ai/solver/_bridge/bridge.py +3 -3
  354. inspect_ai/solver/_chain.py +1 -2
  355. inspect_ai/solver/_critique.py +3 -3
  356. inspect_ai/solver/_fork.py +2 -2
  357. inspect_ai/solver/_human_agent/__init__.py +0 -0
  358. inspect_ai/solver/_human_agent/agent.py +5 -8
  359. inspect_ai/solver/_human_agent/commands/clock.py +14 -10
  360. inspect_ai/solver/_human_agent/commands/note.py +1 -1
  361. inspect_ai/solver/_human_agent/commands/score.py +0 -11
  362. inspect_ai/solver/_multiple_choice.py +38 -26
  363. inspect_ai/solver/_prompt.py +7 -7
  364. inspect_ai/solver/_solver.py +53 -52
  365. inspect_ai/solver/_task_state.py +80 -69
  366. inspect_ai/solver/_use_tools.py +9 -9
  367. inspect_ai/tool/__init__.py +4 -1
  368. inspect_ai/tool/_tool.py +43 -14
  369. inspect_ai/tool/_tool_call.py +6 -2
  370. inspect_ai/tool/_tool_choice.py +3 -1
  371. inspect_ai/tool/_tool_def.py +10 -8
  372. inspect_ai/tool/_tool_params.py +24 -0
  373. inspect_ai/tool/_tool_with.py +7 -7
  374. inspect_ai/tool/_tools/__init__.py +0 -0
  375. inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
  376. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
  377. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  378. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  379. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  380. inspect_ai/tool/_tools/_execute.py +23 -11
  381. inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
  382. inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
  383. inspect_ai/tool/_tools/_web_search.py +7 -5
  384. inspect_ai/tool/beta.py +3 -0
  385. inspect_ai/util/_concurrency.py +3 -3
  386. inspect_ai/util/_panel.py +2 -0
  387. inspect_ai/util/_resource.py +12 -12
  388. inspect_ai/util/_sandbox/docker/compose.py +23 -20
  389. inspect_ai/util/_sandbox/docker/config.py +2 -1
  390. inspect_ai/util/_sandbox/docker/docker.py +42 -86
  391. inspect_ai/util/_sandbox/docker/service.py +100 -0
  392. inspect_ai/util/_sandbox/environment.py +99 -96
  393. inspect_ai/util/_sandbox/self_check.py +124 -16
  394. inspect_ai/util/_subprocess.py +5 -3
  395. inspect_ai/util/_subtask.py +15 -16
  396. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
  397. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
  398. inspect_ai-0.3.64.dist-info/RECORD +625 -0
  399. inspect_ai/_view/www/src/Register.mjs +0 -3
  400. inspect_ai/_view/www/src/Types.mjs +0 -38
  401. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  402. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  403. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  404. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  405. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  406. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  407. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  408. inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
  409. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  410. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  411. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  412. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  413. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  414. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  415. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  416. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  417. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  418. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  419. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  420. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  421. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  422. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  423. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  424. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  425. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  426. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  427. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  428. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  429. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  430. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  431. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  432. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  433. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  434. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  435. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  436. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  437. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  438. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  439. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  440. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  441. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  442. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  443. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  444. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  445. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  446. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  447. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  448. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  449. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  450. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  451. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  452. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  453. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  454. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  455. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  456. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  457. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  458. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  459. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  460. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  461. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  462. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  463. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  464. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  465. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  466. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  467. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  468. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  469. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  470. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  471. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  472. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  473. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  474. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  475. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  476. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  477. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
  478. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  479. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  480. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  481. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  482. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  483. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  484. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  485. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  486. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  487. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  488. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  489. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  490. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  491. inspect_ai/tool/beta/__init__.py +0 -5
  492. inspect_ai-0.3.62.dist-info/RECORD +0 -481
  493. /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
  494. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
  495. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  496. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  497. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  498. /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
  499. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  500. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  501. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  502. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  503. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  504. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  505. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  506. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
  507. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
  508. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  509. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  510. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
  511. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  512. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  513. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  514. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  515. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  516. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
  517. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
  518. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
inspect_ai/_cli/cache.py CHANGED
@@ -42,7 +42,10 @@ def _print_table(title: str, paths: list[tuple[str, int]]) -> None:
42
42
 
43
43
  @click.group("cache")
44
44
  def cache_command() -> None:
45
- """Manage the inspect cache."""
45
+ """Manage the inspect model output cache.
46
+
47
+ Learn more about model output caching at https://inspect.ai-safety-institute.org.uk/caching.html.
48
+ """
46
49
  return None
47
50
 
48
51
 
@@ -62,11 +65,9 @@ def cache_command() -> None:
62
65
  type=str,
63
66
  help="Clear the cache for a specific model (e.g. --model=openai/gpt-4). Can be passed multiple times.",
64
67
  )
65
- def clear(
66
- all: bool, model: tuple[str, ...], log_level: str, log_level_transcript: str
67
- ) -> None:
68
+ def clear(all: bool, model: tuple[str, ...], log_level: str) -> None:
68
69
  """Clear all cache files. Requires either --all or --model flags."""
69
- init_logger(log_level, log_level_transcript)
70
+ init_logger(log_level)
70
71
 
71
72
  if model:
72
73
  _print_table(
@@ -119,14 +120,14 @@ def list_caches(pruneable: bool) -> None:
119
120
  type=str,
120
121
  help="Only prune a specific model (e.g. --model=openai/gpt-4). Can be passed multiple times.",
121
122
  )
122
- def prune(log_level: str, log_level_transcript: str, model: tuple[str, ...]) -> None:
123
+ def prune(log_level: str, model: tuple[str, ...]) -> None:
123
124
  """Prune all expired cache entries
124
125
 
125
126
  Over time the cache directory can grow, but many cache entries will be
126
127
  expired. This command will remove all expired cache entries for ease of
127
128
  maintenance.
128
129
  """
129
- init_logger(log_level, log_level_transcript)
130
+ init_logger(log_level)
130
131
 
131
132
  expired_cache_entries = cache_list_expired(list(model))
132
133
 
inspect_ai/_cli/common.py CHANGED
@@ -9,14 +9,12 @@ from inspect_ai._util.constants import (
9
9
  ALL_LOG_LEVELS,
10
10
  DEFAULT_DISPLAY,
11
11
  DEFAULT_LOG_LEVEL,
12
- DEFAULT_LOG_LEVEL_TRANSCRIPT,
13
12
  )
14
13
  from inspect_ai.util._display import init_display_type
15
14
 
16
15
 
17
16
  class CommonOptions(TypedDict):
18
17
  log_level: str
19
- log_level_transcript: str
20
18
  log_dir: str
21
19
  display: Literal["full", "conversation", "rich", "plain", "none"]
22
20
  no_ansi: bool | None
@@ -36,16 +34,6 @@ def log_level_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
36
34
  envvar="INSPECT_LOG_LEVEL",
37
35
  help=f"Set the log level (defaults to '{DEFAULT_LOG_LEVEL}')",
38
36
  )
39
- @click.option(
40
- "--log-level-transcript",
41
- type=click.Choice(
42
- [level.lower() for level in ALL_LOG_LEVELS],
43
- case_sensitive=False,
44
- ),
45
- default=DEFAULT_LOG_LEVEL_TRANSCRIPT,
46
- envvar="INSPECT_LOG_LEVEL_TRANSCRIPT",
47
- help=f"Set the log level of the transcript (defaults to '{DEFAULT_LOG_LEVEL_TRANSCRIPT}')",
48
- )
49
37
  @functools.wraps(func)
50
38
  def wrapper(*args: Any, **kwargs: Any) -> click.Context:
51
39
  return cast(click.Context, func(*args, **kwargs))
inspect_ai/_cli/eval.py CHANGED
@@ -7,7 +7,9 @@ from typing_extensions import Unpack
7
7
  from inspect_ai import Epochs, eval, eval_retry
8
8
  from inspect_ai._eval.evalset import eval_set
9
9
  from inspect_ai._util.constants import (
10
+ ALL_LOG_LEVELS,
10
11
  DEFAULT_EPOCHS,
12
+ DEFAULT_LOG_LEVEL_TRANSCRIPT,
11
13
  DEFAULT_MAX_CONNECTIONS,
12
14
  DEFAULT_MAX_RETRIES,
13
15
  )
@@ -399,6 +401,16 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
399
401
  envvar=["INSPECT_LOG_FORMAT", "INSPECT_EVAL_LOG_FORMAT"],
400
402
  help="Format for writing log files.",
401
403
  )
404
+ @click.option(
405
+ "--log-level-transcript",
406
+ type=click.Choice(
407
+ [level.lower() for level in ALL_LOG_LEVELS],
408
+ case_sensitive=False,
409
+ ),
410
+ default=DEFAULT_LOG_LEVEL_TRANSCRIPT,
411
+ envvar="INSPECT_LOG_LEVEL_TRANSCRIPT",
412
+ help=f"Set the log level of the transcript (defaults to '{DEFAULT_LOG_LEVEL_TRANSCRIPT}')",
413
+ )
402
414
  @common_options
403
415
  @functools.wraps(func)
404
416
  def wrapper(*args: Any, **kwargs: Any) -> click.Context:
@@ -468,6 +480,7 @@ def eval_command(
468
480
  no_score: bool | None,
469
481
  no_score_display: bool | None,
470
482
  log_format: Literal["eval", "json"] | None,
483
+ log_level_transcript: str,
471
484
  **common: Unpack[CommonOptions],
472
485
  ) -> None:
473
486
  """Evaluate tasks."""
@@ -482,7 +495,7 @@ def eval_command(
482
495
  tasks=tasks,
483
496
  solver=solver,
484
497
  log_level=common["log_level"],
485
- log_level_transcript=common["log_level_transcript"],
498
+ log_level_transcript=log_level_transcript,
486
499
  log_dir=common["log_dir"],
487
500
  log_format=log_format,
488
501
  model=model,
@@ -630,9 +643,13 @@ def eval_set_command(
630
643
  bundle_dir: str | None,
631
644
  bundle_overwrite: bool | None,
632
645
  log_format: Literal["eval", "json"] | None,
646
+ log_level_transcript: str,
633
647
  **common: Unpack[CommonOptions],
634
648
  ) -> int:
635
- """Evaluate a set of tasks."""
649
+ """Evaluate a set of tasks with retries.
650
+
651
+ Learn more about eval sets at https://inspect.ai-safety-institute.org.uk/eval-sets.html.
652
+ """
636
653
  # read config
637
654
  config = config_from_locals(dict(locals()))
638
655
 
@@ -644,7 +661,7 @@ def eval_set_command(
644
661
  tasks=tasks,
645
662
  solver=solver,
646
663
  log_level=common["log_level"],
647
- log_level_transcript=common["log_level_transcript"],
664
+ log_level_transcript=log_level_transcript,
648
665
  log_dir=common["log_dir"],
649
666
  log_format=log_format,
650
667
  model=model,
@@ -967,6 +984,16 @@ def parse_comma_separated(value: str | None) -> list[str] | None:
967
984
  "--max-retries", type=int, help=MAX_RETRIES_HELP, envvar="INSPECT_EVAL_MAX_RETRIES"
968
985
  )
969
986
  @click.option("--timeout", type=int, help=TIMEOUT_HELP, envvar="INSPECT_EVAL_TIMEOUT")
987
+ @click.option(
988
+ "--log-level-transcript",
989
+ type=click.Choice(
990
+ [level.lower() for level in ALL_LOG_LEVELS],
991
+ case_sensitive=False,
992
+ ),
993
+ default=DEFAULT_LOG_LEVEL_TRANSCRIPT,
994
+ envvar="INSPECT_LOG_LEVEL_TRANSCRIPT",
995
+ help=f"Set the log level of the transcript (defaults to '{DEFAULT_LOG_LEVEL_TRANSCRIPT}')",
996
+ )
970
997
  @common_options
971
998
  def eval_retry_command(
972
999
  log_files: tuple[str],
@@ -986,6 +1013,7 @@ def eval_retry_command(
986
1013
  max_connections: int | None,
987
1014
  max_retries: int | None,
988
1015
  timeout: int | None,
1016
+ log_level_transcript: str,
989
1017
  **common: Unpack[CommonOptions],
990
1018
  ) -> None:
991
1019
  """Retry failed evaluation(s)"""
@@ -1014,7 +1042,7 @@ def eval_retry_command(
1014
1042
  eval_retry(
1015
1043
  retry_log_files,
1016
1044
  log_level=common["log_level"],
1017
- log_level_transcript=common["log_level_transcript"],
1045
+ log_level_transcript=log_level_transcript,
1018
1046
  log_dir=common["log_dir"],
1019
1047
  max_samples=max_samples,
1020
1048
  max_tasks=max_tasks,
inspect_ai/_cli/info.py CHANGED
@@ -25,6 +25,7 @@ def info_command() -> None:
25
25
  help="Output version and path info as JSON",
26
26
  )
27
27
  def version(json: bool) -> None:
28
+ """Output version and path info."""
28
29
  if json:
29
30
  print(dumps(dict(version=__version__, path=PKG_PATH.as_posix()), indent=2))
30
31
  else:
inspect_ai/_cli/list.py CHANGED
@@ -14,7 +14,7 @@ from inspect_ai._eval.task import TaskInfo
14
14
 
15
15
  @click.group("list")
16
16
  def list_command() -> None:
17
- """List tasks or eval logs."""
17
+ """List tasks on the filesystem."""
18
18
  return None
19
19
 
20
20
 
inspect_ai/_cli/log.py CHANGED
@@ -29,6 +29,8 @@ def log_command() -> None:
29
29
  The default format is 'eval'. You can change this by setting the INSPECT_LOG_FORMAT environment variable or using the --log-format command line option.
30
30
 
31
31
  The 'log' commands enable you to read Inspect logs uniformly as JSON no matter their physical storage format, and also enable you to read only the headers (everything but the samples) from log files, which is useful for very large logs.
32
+
33
+ Learn more about managing log files at https://inspect.ai-safety-institute.org.uk/eval-logs.html.
32
34
  """
33
35
  return None
34
36
 
inspect_ai/_cli/main.py CHANGED
@@ -53,7 +53,7 @@ inspect.add_command(trace_command)
53
53
  def main() -> None:
54
54
  set_exception_hook()
55
55
  init_dotenv()
56
- inspect(auto_envvar_prefix="INSPECT")
56
+ inspect(auto_envvar_prefix="INSPECT") # pylint: disable=no-value-for-parameter
57
57
 
58
58
 
59
59
  if __name__ == "__main__":
@@ -7,7 +7,10 @@ from inspect_ai.util._sandbox.registry import registry_find_sandboxenv
7
7
 
8
8
  @click.group("sandbox")
9
9
  def sandbox_command() -> None:
10
- """Manage Sandbox Environments."""
10
+ """Manage Sandbox Environments.
11
+
12
+ Learn more about sandboxing at https://inspect.ai-safety-institute.org.uk/sandboxing.html.
13
+ """
11
14
  return None
12
15
 
13
16
 
inspect_ai/_cli/score.py CHANGED
@@ -2,33 +2,61 @@ import asyncio
2
2
  import os
3
3
 
4
4
  import click
5
+ import rich
6
+ from rich.panel import Panel
7
+ from rich.prompt import Prompt
8
+ from rich.table import Table
5
9
  from typing_extensions import Unpack
6
10
 
11
+ from inspect_ai._cli.util import parse_cli_config
7
12
  from inspect_ai._display import display
13
+ from inspect_ai._display.core.rich import rich_theme
8
14
  from inspect_ai._eval.context import init_eval_context, init_task_context
9
- from inspect_ai._eval.loader import load_tasks
10
- from inspect_ai._eval.score import task_score
11
- from inspect_ai._util.constants import SCORED_SUFFIX
15
+ from inspect_ai._eval.score import ScoreAction, task_score
16
+ from inspect_ai._util.file import basename, dirname, exists
17
+ from inspect_ai.log._log import EvalLog
12
18
  from inspect_ai.log._recorders import create_recorder_for_location
13
19
  from inspect_ai.model import get_model
14
20
 
15
21
  from .common import CommonOptions, common_options, process_common_options
16
22
 
23
+ SCORES_PER_ROW = 4
24
+
17
25
 
18
26
  @click.command("score")
19
- @click.argument("task", type=str)
20
27
  @click.argument("log-file", type=str, required=True)
21
28
  @click.option(
22
- "--no-overwrite",
29
+ "--scorer",
30
+ type=str,
31
+ envvar="INSPECT_SCORE_SCORER",
32
+ help="Scorer to use for scoring",
33
+ )
34
+ @click.option(
35
+ "-S",
36
+ multiple=True,
37
+ type=str,
38
+ envvar="INSPECT_SCORE_SCORER_ARGS",
39
+ help="One or more scorer arguments (e.g. -S arg=value)",
40
+ )
41
+ @click.option(
42
+ "--action",
43
+ type=click.Choice(["append", "overwrite"]),
44
+ envvar="INSPECT_SCORE_SCORER_ACTION",
45
+ help="Whether to append or overwrite the existing scores.",
46
+ )
47
+ @click.option(
48
+ "--overwrite",
23
49
  type=bool,
24
50
  is_flag=True,
25
- help="Do not overwrite unscored log_files with the scored version (instead write a new file w/ '-scored' appended)",
51
+ help="Overwrite log file with the scored version",
26
52
  )
27
53
  @common_options
28
54
  def score_command(
29
- task: str,
30
55
  log_file: str,
31
- no_overwrite: bool | None,
56
+ overwrite: bool | None,
57
+ scorer: str | None,
58
+ s: tuple[str] | None,
59
+ action: ScoreAction | None,
32
60
  **common: Unpack[CommonOptions],
33
61
  ) -> None:
34
62
  """Score a previous evaluation run."""
@@ -38,31 +66,43 @@ def score_command(
38
66
  # score
39
67
  asyncio.run(
40
68
  score(
41
- task,
42
- common["log_dir"],
43
- log_file,
44
- False if no_overwrite else True,
45
- common["log_level"],
46
- common["log_level_transcript"],
69
+ log_dir=common["log_dir"],
70
+ log_file=log_file,
71
+ scorer=scorer,
72
+ s=s,
73
+ overwrite=False if overwrite is None else overwrite,
74
+ action=action,
75
+ log_level=common["log_level"],
47
76
  )
48
77
  )
49
78
 
50
79
 
51
80
  async def score(
52
- task: str,
53
81
  log_dir: str,
54
82
  log_file: str,
83
+ scorer: str | None,
84
+ s: tuple[str] | None,
55
85
  overwrite: bool,
86
+ action: ScoreAction | None,
56
87
  log_level: str | None,
57
- log_level_transcript: str | None,
88
+ output_file: str | None = None,
58
89
  ) -> None:
59
90
  # init eval context
60
- init_eval_context(log_level, log_level_transcript)
91
+ init_eval_context(log_level, None)
92
+ scorer_args = parse_cli_config(args=s, config=None)
61
93
 
62
94
  # read the eval log
63
95
  recorder = create_recorder_for_location(log_file, log_dir)
64
96
  eval_log = await recorder.read_log(log_file)
65
97
 
98
+ # resolve the target output file (prompts user)
99
+ output_file = resolve_output_file(
100
+ log_file, output_file=output_file, overwrite=overwrite
101
+ )
102
+
103
+ # resolve action
104
+ action = resolve_action(eval_log, action)
105
+
66
106
  # check that there are samples therein
67
107
  if eval_log.samples is None or len(eval_log.samples) == 0:
68
108
  raise ValueError(f"{log_file} does not include samples to score")
@@ -77,23 +117,132 @@ async def score(
77
117
  # initialize active model
78
118
  init_task_context(model)
79
119
 
80
- # instantiate the task so we can get its scorer and metrics
81
- score_task = load_tasks([task], model)[0]
82
-
83
120
  # re-score the task
84
- eval_log = await task_score(score_task, eval_log)
121
+ eval_log = await task_score(
122
+ log=eval_log, scorer=scorer, scorer_args=scorer_args, action=action
123
+ )
85
124
 
86
- # re-write the log (w/ a -score suffix if requested)
87
- _, ext = os.path.splitext(log_file)
88
- scored = f"{SCORED_SUFFIX}{ext}"
89
- if not overwrite and not log_file.endswith(scored):
90
- log_file = log_file.removesuffix(ext) + scored
91
- await recorder.write_log(log_file, eval_log)
125
+ # re-write the log
126
+ await recorder.write_log(output_file, eval_log)
92
127
 
93
128
  # print results
94
- display().print(f"\n{eval_log.eval.task}")
129
+ print_results(output_file, eval_log)
130
+
131
+
132
+ def print_results(output_file: str, eval_log: EvalLog) -> None:
133
+ # the theme
134
+ theme = rich_theme()
135
+
136
+ # Create results panel
137
+ grid = Table.grid(expand=True)
138
+ grid.add_column()
139
+ grid.add_row("")
140
+
95
141
  if eval_log.results:
96
- for score in eval_log.results.scores:
97
- for name, metric in score.metrics.items():
98
- display().print(f"{name}: {metric.value}")
99
- display().print(f"log: {log_file}\n")
142
+ # Process scores in groups
143
+ for i in range(0, len(eval_log.results.scores), SCORES_PER_ROW):
144
+ # Create a grid for this row of scores
145
+ score_row = Table.grid(
146
+ expand=False,
147
+ padding=(0, 2, 0, 0),
148
+ )
149
+
150
+ # Add columns for each score in this row
151
+ for _ in range(SCORES_PER_ROW):
152
+ score_row.add_column()
153
+
154
+ # Create individual score tables and add them to the row
155
+ score_tables: list[Table | str] = []
156
+ for score in eval_log.results.scores[i : i + SCORES_PER_ROW]:
157
+ table = Table(
158
+ show_header=False, show_lines=False, box=None, show_edge=False
159
+ )
160
+ table.add_column()
161
+ table.add_column()
162
+
163
+ # Add score name and metrics
164
+ table.add_row(f"[bold]{score.name}[/bold]")
165
+ for name, metric in score.metrics.items():
166
+ table.add_row(f"{name}", f"{metric.value:.3f}")
167
+
168
+ score_tables.append(table)
169
+
170
+ # Fill remaining slots with empty tables if needed
171
+ while len(score_tables) < SCORES_PER_ROW:
172
+ score_tables.append("")
173
+
174
+ # Add the score tables to this row
175
+ score_row.add_row(*score_tables)
176
+
177
+ # Add this row of scores to the main grid
178
+ grid.add_row(score_row)
179
+
180
+ grid.add_row("")
181
+ grid.add_row(f" Log: [{theme.link}]{output_file}[/{theme.link}]")
182
+
183
+ p = Panel(
184
+ title=f"[bold][{theme.meta}]Results for {eval_log.eval.task}[/bold][/{theme.meta}]",
185
+ title_align="left",
186
+ renderable=grid,
187
+ )
188
+
189
+ # Print the results panel
190
+ display().print("")
191
+ console = rich.get_console()
192
+ console.print(p)
193
+
194
+
195
+ def resolve_output_file(log_file: str, output_file: str | None, overwrite: bool) -> str:
196
+ # resolve the output file (we may overwrite, use the passed file name, or suggest a new name)
197
+ if output_file is None:
198
+ if overwrite:
199
+ # explicitly asked to overwrite
200
+ return log_file
201
+ else:
202
+ if exists(log_file):
203
+ # Ask if we should overwrite
204
+ file_action = Prompt.ask(
205
+ "Overwrite existing log file or create new log file?",
206
+ choices=["overwrite", "create", "o", "c"],
207
+ default="create",
208
+ )
209
+ if file_action in ["overwrite", "o"]:
210
+ return log_file
211
+ else:
212
+ file_name = basename(log_file)
213
+ base_dir = dirname(log_file)
214
+ _, ext = os.path.splitext(file_name)
215
+
216
+ count = 0
217
+
218
+ def filename() -> str:
219
+ if count > 0:
220
+ return f"{file_name.removesuffix(ext)}-scored-{count}{ext}"
221
+ else:
222
+ return f"{file_name.removesuffix(ext)}-scored{ext}"
223
+
224
+ while exists(f"{os.path.join(base_dir, filename())}"):
225
+ count = count + 1
226
+
227
+ suggested_file = filename()
228
+ user_file = Prompt.ask("Output file name?", default=suggested_file)
229
+ return os.path.join(base_dir, user_file)
230
+ else:
231
+ return log_file
232
+ else:
233
+ return output_file
234
+
235
+
236
+ def resolve_action(eval_log: EvalLog, action: ScoreAction | None) -> ScoreAction:
237
+ if action is not None:
238
+ return action
239
+
240
+ if eval_log.results is not None and len(eval_log.results.scores) > 0:
241
+ user_action = Prompt.ask(
242
+ "Overwrite existing scores or append as additional scores?",
243
+ choices=["overwrite", "append", "o", "a"],
244
+ default="append",
245
+ )
246
+ return "overwrite" if user_action in ["ovewrite", "o"] else "append"
247
+ else:
248
+ return "overwrite"
inspect_ai/_cli/trace.py CHANGED
@@ -26,6 +26,8 @@ def trace_command() -> None:
26
26
  """List and read execution traces.
27
27
 
28
28
  Inspect includes a TRACE log-level which is right below the HTTP and INFO log levels (so not written to the console by default). However, TRACE logs are always recorded to a separate file, and the last 10 TRACE logs are preserved. The 'trace' command provides ways to list and read these traces.
29
+
30
+ Learn more about execution traces at https://inspect.ai-safety-institute.org.uk/tracing.html.
29
31
  """
30
32
  return None
31
33
 
@@ -109,11 +111,13 @@ def anomolies_command(trace_file: str | None, filter: str | None, all: bool) ->
109
111
  canceled_actions: dict[str, ActionTraceRecord] = {}
110
112
  error_actions: dict[str, ActionTraceRecord] = {}
111
113
  timeout_actions: dict[str, ActionTraceRecord] = {}
114
+ start_trace: ActionTraceRecord | None = None
112
115
 
113
116
  def action_started(trace: ActionTraceRecord) -> None:
114
117
  running_actions[trace.trace_id] = trace
115
118
 
116
119
  def action_completed(trace: ActionTraceRecord) -> ActionTraceRecord:
120
+ nonlocal start_trace
117
121
  start_trace = running_actions.get(trace.trace_id)
118
122
  if start_trace:
119
123
  del running_actions[trace.trace_id]
@@ -122,14 +126,20 @@ def anomolies_command(trace_file: str | None, filter: str | None, all: bool) ->
122
126
  raise RuntimeError(f"Expected {trace.trace_id} in action dictionary.")
123
127
 
124
128
  def action_failed(trace: ActionTraceRecord) -> None:
129
+ nonlocal start_trace
125
130
  if all:
131
+ assert start_trace
126
132
  error_actions[start_trace.trace_id] = trace
127
133
 
128
134
  def action_canceled(trace: ActionTraceRecord) -> None:
135
+ nonlocal start_trace
136
+ assert start_trace
129
137
  canceled_actions[start_trace.trace_id] = trace
130
138
 
131
139
  def action_timeout(trace: ActionTraceRecord) -> None:
140
+ nonlocal start_trace
132
141
  if all:
142
+ assert start_trace
133
143
  timeout_actions[start_trace.trace_id] = trace
134
144
 
135
145
  for trace in traces:
inspect_ai/_cli/view.py CHANGED
@@ -39,7 +39,10 @@ def start_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
39
39
  @common_options
40
40
  @click.pass_context
41
41
  def view_command(ctx: click.Context, **kwargs: Unpack[CommonOptions]) -> None:
42
- """View command group."""
42
+ """Inspect log viewer.
43
+
44
+ Learn more about using the log viewer at https://inspect.ai-safety-institute.org.uk/log-viewer.html.
45
+ """
43
46
  if ctx.invoked_subcommand is None:
44
47
  ctx.invoke(start, **kwargs)
45
48
  else:
@@ -78,7 +81,6 @@ def start(
78
81
  port=port,
79
82
  authorization=authorization,
80
83
  log_level=common["log_level"],
81
- log_level_transcript=common["log_level_transcript"],
82
84
  )
83
85
 
84
86
 
@@ -10,6 +10,8 @@ from ..rich.display import RichDisplay
10
10
  from ..textual.display import TextualDisplay
11
11
  from .display import Display, TaskScreen
12
12
 
13
+ _active_display: Display | None = None
14
+
13
15
 
14
16
  def display() -> Display:
15
17
  global _active_display
@@ -28,9 +30,6 @@ def display() -> Display:
28
30
  return _active_display
29
31
 
30
32
 
31
- _active_display: Display | None = None
32
-
33
-
34
33
  def task_screen() -> TaskScreen:
35
34
  screen = _active_task_screen.get(None)
36
35
  if screen is None:
@@ -1,4 +1,5 @@
1
1
  from inspect_ai._util.registry import is_registry_dict
2
+ from inspect_ai.log._log import eval_config_defaults
2
3
 
3
4
  from .display import TaskProfile
4
5
 
@@ -13,7 +14,12 @@ def task_config(
13
14
  value = task_args[key]
14
15
  if is_registry_dict(value):
15
16
  task_args[key] = value["name"]
16
- config = dict(profile.eval_config.model_dump(exclude_none=True)) | task_args
17
+ # get eval_config overrides
18
+ eval_config = dict(profile.eval_config.model_dump(exclude_none=True))
19
+ for name, default_value in eval_config_defaults().items():
20
+ if eval_config.get(name, None) == default_value:
21
+ del eval_config[name]
22
+ config = eval_config | task_args
17
23
  if generate_config:
18
24
  config = dict(profile.generate_config.model_dump(exclude_none=True)) | config
19
25
  if profile.tags:
@@ -347,7 +347,7 @@ class SampleLimits(Widget):
347
347
  class SandboxesView(Vertical):
348
348
  DEFAULT_CSS = """
349
349
  SandboxesView {
350
- padding: 1 0 1 0;
350
+ padding: 1 0 0 0;
351
351
  background: transparent;
352
352
  height: auto;
353
353
  }
@@ -358,6 +358,7 @@ class SandboxesView(Vertical):
358
358
  background: transparent;
359
359
  }
360
360
  .clipboard-message {
361
+ height: auto;
361
362
  margin-top: 1;
362
363
  }
363
364
  """
@@ -372,7 +373,6 @@ class SandboxesView(Vertical):
372
373
  async def sync_sample(self, sample: ActiveSample) -> None:
373
374
  if len(sample.sandboxes) > 0:
374
375
  multiple_sandboxes = len(sample.sandboxes) > 1
375
- self.display = True
376
376
  sandboxes_caption = cast(Static, self.query_one("#sandboxes-caption"))
377
377
  sandboxes_caption.update(
378
378
  f"[bold]sandbox container{'s' if multiple_sandboxes else ''}:[/bold]"
@@ -395,6 +395,7 @@ class SandboxesView(Vertical):
395
395
  markup=True,
396
396
  )
397
397
  )
398
+ self.display = True
398
399
  else:
399
400
  self.display = False
400
401
 
@@ -473,7 +474,7 @@ class SampleToolbar(Horizontal):
473
474
  else None
474
475
  )
475
476
  if isinstance(last_event, ToolEvent):
476
- last_event.cancel()
477
+ last_event._cancel()
477
478
  elif event.button.id == self.CANCEL_SCORE_OUTPUT:
478
479
  self.sample.interrupt("score")
479
480
  elif event.button.id == self.CANCEL_RAISE_ERROR:
@@ -9,6 +9,12 @@ from .port_mappings import PortMappingsView
9
9
 
10
10
  class SandboxView(Vertical):
11
11
  DEFAULT_CSS = """
12
+ SandboxView {
13
+ height: auto;
14
+ }
15
+ SandboxView * {
16
+ height: auto;
17
+ }
12
18
  .indent {
13
19
  width: 2;
14
20
  }