inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (518) hide show
  1. inspect_ai/_cli/cache.py +8 -7
  2. inspect_ai/_cli/common.py +0 -12
  3. inspect_ai/_cli/eval.py +32 -4
  4. inspect_ai/_cli/info.py +1 -0
  5. inspect_ai/_cli/list.py +1 -1
  6. inspect_ai/_cli/log.py +2 -0
  7. inspect_ai/_cli/main.py +1 -1
  8. inspect_ai/_cli/sandbox.py +4 -1
  9. inspect_ai/_cli/score.py +181 -32
  10. inspect_ai/_cli/trace.py +10 -0
  11. inspect_ai/_cli/view.py +4 -2
  12. inspect_ai/_display/core/active.py +2 -3
  13. inspect_ai/_display/core/config.py +7 -1
  14. inspect_ai/_display/textual/widgets/samples.py +4 -3
  15. inspect_ai/_display/textual/widgets/sandbox.py +6 -0
  16. inspect_ai/_eval/eval.py +104 -101
  17. inspect_ai/_eval/evalset.py +75 -75
  18. inspect_ai/_eval/loader.py +122 -12
  19. inspect_ai/_eval/registry.py +1 -1
  20. inspect_ai/_eval/run.py +14 -0
  21. inspect_ai/_eval/score.py +125 -36
  22. inspect_ai/_eval/task/log.py +105 -4
  23. inspect_ai/_eval/task/results.py +92 -38
  24. inspect_ai/_eval/task/run.py +9 -2
  25. inspect_ai/_eval/task/sandbox.py +35 -2
  26. inspect_ai/_eval/task/task.py +49 -46
  27. inspect_ai/_util/constants.py +1 -1
  28. inspect_ai/_util/content.py +8 -0
  29. inspect_ai/_util/error.py +2 -0
  30. inspect_ai/_util/file.py +15 -1
  31. inspect_ai/_util/hash.py +1 -1
  32. inspect_ai/_util/logger.py +4 -2
  33. inspect_ai/_util/registry.py +7 -1
  34. inspect_ai/_view/view.py +1 -2
  35. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  36. inspect_ai/_view/www/.vscode/settings.json +8 -0
  37. inspect_ai/_view/www/App.css +97 -29
  38. inspect_ai/_view/www/README.md +1 -1
  39. inspect_ai/_view/www/dist/assets/index.css +16663 -14674
  40. inspect_ai/_view/www/dist/assets/index.js +58808 -51348
  41. inspect_ai/_view/www/dist/index.html +1 -1
  42. inspect_ai/_view/www/index.html +2 -2
  43. inspect_ai/_view/www/log-schema.json +87 -73
  44. inspect_ai/_view/www/package.json +22 -4
  45. inspect_ai/_view/www/postcss.config.cjs +8 -9
  46. inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
  47. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  48. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  49. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  50. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  51. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  52. inspect_ai/_view/www/src/api/index.ts +4 -4
  53. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  54. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  55. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  56. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  57. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  58. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  59. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  60. inspect_ai/_view/www/src/components/Card.css +60 -0
  61. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  62. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  63. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  64. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  65. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  66. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  67. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  68. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  69. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  70. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  71. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  72. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  73. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  74. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  75. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  76. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  77. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  78. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  79. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  80. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  81. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  82. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  83. inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
  84. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  85. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  86. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  87. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  88. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  89. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  90. inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
  91. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  92. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  93. inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
  94. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  95. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  96. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  97. inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
  98. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  99. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  100. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  101. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  102. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  103. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
  104. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  105. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  106. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  107. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  108. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  109. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  110. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  111. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  112. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  113. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  114. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  115. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  116. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  117. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  118. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
  119. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  120. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  121. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  122. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  123. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  124. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  126. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  127. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
  128. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  129. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
  130. inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
  131. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  132. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  133. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  134. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  135. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  136. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
  137. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  138. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  139. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  140. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
  141. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  142. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
  143. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  144. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
  145. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  146. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
  147. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  148. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
  149. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  150. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  151. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
  152. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
  153. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  154. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  155. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  156. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  157. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  158. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  159. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  160. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  161. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  162. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  163. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  164. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  165. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  166. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  167. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  168. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  169. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  170. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  171. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  172. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  173. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  174. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  175. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  176. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  177. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  179. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  180. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  181. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  182. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
  183. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  184. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
  185. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  186. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  187. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  188. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  189. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  190. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  191. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  192. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  193. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  194. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  196. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  197. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  198. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  199. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  200. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  201. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  202. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  203. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  204. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  205. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  206. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
  207. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  208. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  209. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  210. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  211. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  212. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  213. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  214. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  215. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
  216. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  217. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  218. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  219. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  220. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  221. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
  222. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  223. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
  224. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  225. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  226. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  227. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  228. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  229. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
  230. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  231. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  232. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
  233. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  234. inspect_ai/_view/www/src/types/log.d.ts +108 -19
  235. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  236. inspect_ai/_view/www/src/types.ts +71 -0
  237. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
  238. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  239. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
  240. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  241. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  242. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  243. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  244. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  245. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  246. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  247. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  248. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  249. inspect_ai/_view/www/src/utils/format.ts +194 -0
  250. inspect_ai/_view/www/src/utils/git.ts +7 -0
  251. inspect_ai/_view/www/src/utils/html.ts +6 -0
  252. inspect_ai/_view/www/src/utils/http.ts +14 -0
  253. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  254. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  255. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  256. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  257. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  258. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  259. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  260. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  261. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
  262. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  263. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  264. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  265. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  266. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  267. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
  268. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
  269. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
  270. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  271. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
  272. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  273. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  274. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  275. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  276. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  277. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  278. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  279. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  280. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  281. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  282. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  283. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
  284. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  285. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
  286. inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
  287. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
  288. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  289. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  290. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  291. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  292. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  293. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  294. inspect_ai/_view/www/src/workspace/utils.ts +34 -0
  295. inspect_ai/_view/www/tsconfig.json +23 -9
  296. inspect_ai/_view/www/vite.config.js +8 -17
  297. inspect_ai/_view/www/yarn.lock +627 -556
  298. inspect_ai/approval/_approval.py +2 -0
  299. inspect_ai/approval/_approver.py +4 -4
  300. inspect_ai/approval/_auto.py +1 -1
  301. inspect_ai/approval/_human/approver.py +3 -0
  302. inspect_ai/approval/_policy.py +5 -0
  303. inspect_ai/approval/_registry.py +2 -2
  304. inspect_ai/dataset/_dataset.py +64 -37
  305. inspect_ai/dataset/_sources/__init__.py +0 -0
  306. inspect_ai/dataset/_sources/csv.py +20 -12
  307. inspect_ai/dataset/_sources/file.py +4 -0
  308. inspect_ai/dataset/_sources/hf.py +39 -29
  309. inspect_ai/dataset/_sources/json.py +17 -9
  310. inspect_ai/log/__init__.py +2 -0
  311. inspect_ai/log/_convert.py +3 -3
  312. inspect_ai/log/_file.py +24 -9
  313. inspect_ai/log/_log.py +101 -13
  314. inspect_ai/log/_message.py +4 -2
  315. inspect_ai/log/_recorders/file.py +4 -0
  316. inspect_ai/log/_recorders/json.py +5 -7
  317. inspect_ai/log/_recorders/recorder.py +3 -0
  318. inspect_ai/log/_transcript.py +19 -8
  319. inspect_ai/model/__init__.py +2 -0
  320. inspect_ai/model/_cache.py +39 -21
  321. inspect_ai/model/_call_tools.py +4 -3
  322. inspect_ai/model/_chat_message.py +14 -4
  323. inspect_ai/model/_generate_config.py +1 -1
  324. inspect_ai/model/_model.py +31 -24
  325. inspect_ai/model/_model_output.py +14 -1
  326. inspect_ai/model/_openai.py +10 -18
  327. inspect_ai/model/_providers/anthropic.py +3 -3
  328. inspect_ai/model/_providers/google.py +9 -5
  329. inspect_ai/model/_providers/openai.py +5 -9
  330. inspect_ai/model/_providers/openai_o1.py +3 -5
  331. inspect_ai/model/_providers/openrouter.py +86 -0
  332. inspect_ai/model/_providers/providers.py +11 -0
  333. inspect_ai/scorer/__init__.py +6 -1
  334. inspect_ai/scorer/_answer.py +7 -7
  335. inspect_ai/scorer/_classification.py +38 -18
  336. inspect_ai/scorer/_common.py +2 -8
  337. inspect_ai/scorer/_match.py +4 -5
  338. inspect_ai/scorer/_metric.py +87 -28
  339. inspect_ai/scorer/_metrics/__init__.py +3 -3
  340. inspect_ai/scorer/_metrics/accuracy.py +8 -10
  341. inspect_ai/scorer/_metrics/mean.py +3 -17
  342. inspect_ai/scorer/_metrics/std.py +111 -30
  343. inspect_ai/scorer/_model.py +12 -12
  344. inspect_ai/scorer/_pattern.py +3 -3
  345. inspect_ai/scorer/_reducer/reducer.py +36 -21
  346. inspect_ai/scorer/_reducer/registry.py +2 -2
  347. inspect_ai/scorer/_reducer/types.py +7 -1
  348. inspect_ai/scorer/_score.py +11 -1
  349. inspect_ai/scorer/_scorer.py +110 -16
  350. inspect_ai/solver/__init__.py +1 -1
  351. inspect_ai/solver/_basic_agent.py +19 -22
  352. inspect_ai/solver/_bridge/__init__.py +0 -3
  353. inspect_ai/solver/_bridge/bridge.py +3 -3
  354. inspect_ai/solver/_chain.py +1 -2
  355. inspect_ai/solver/_critique.py +3 -3
  356. inspect_ai/solver/_fork.py +2 -2
  357. inspect_ai/solver/_human_agent/__init__.py +0 -0
  358. inspect_ai/solver/_human_agent/agent.py +5 -8
  359. inspect_ai/solver/_human_agent/commands/clock.py +14 -10
  360. inspect_ai/solver/_human_agent/commands/note.py +1 -1
  361. inspect_ai/solver/_human_agent/commands/score.py +0 -11
  362. inspect_ai/solver/_multiple_choice.py +38 -26
  363. inspect_ai/solver/_prompt.py +7 -7
  364. inspect_ai/solver/_solver.py +53 -52
  365. inspect_ai/solver/_task_state.py +80 -69
  366. inspect_ai/solver/_use_tools.py +9 -9
  367. inspect_ai/tool/__init__.py +4 -1
  368. inspect_ai/tool/_tool.py +43 -14
  369. inspect_ai/tool/_tool_call.py +6 -2
  370. inspect_ai/tool/_tool_choice.py +3 -1
  371. inspect_ai/tool/_tool_def.py +10 -8
  372. inspect_ai/tool/_tool_params.py +24 -0
  373. inspect_ai/tool/_tool_with.py +7 -7
  374. inspect_ai/tool/_tools/__init__.py +0 -0
  375. inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
  376. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
  377. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  378. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  379. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  380. inspect_ai/tool/_tools/_execute.py +23 -11
  381. inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
  382. inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
  383. inspect_ai/tool/_tools/_web_search.py +7 -5
  384. inspect_ai/tool/beta.py +3 -0
  385. inspect_ai/util/_concurrency.py +3 -3
  386. inspect_ai/util/_panel.py +2 -0
  387. inspect_ai/util/_resource.py +12 -12
  388. inspect_ai/util/_sandbox/docker/compose.py +23 -20
  389. inspect_ai/util/_sandbox/docker/config.py +2 -1
  390. inspect_ai/util/_sandbox/docker/docker.py +42 -86
  391. inspect_ai/util/_sandbox/docker/service.py +100 -0
  392. inspect_ai/util/_sandbox/environment.py +99 -96
  393. inspect_ai/util/_sandbox/self_check.py +124 -16
  394. inspect_ai/util/_subprocess.py +5 -3
  395. inspect_ai/util/_subtask.py +15 -16
  396. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
  397. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
  398. inspect_ai-0.3.64.dist-info/RECORD +625 -0
  399. inspect_ai/_view/www/src/Register.mjs +0 -3
  400. inspect_ai/_view/www/src/Types.mjs +0 -38
  401. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  402. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  403. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  404. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  405. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  406. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  407. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  408. inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
  409. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  410. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  411. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  412. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  413. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  414. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  415. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  416. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  417. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  418. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  419. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  420. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  421. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  422. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  423. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  424. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  425. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  426. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  427. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  428. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  429. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  430. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  431. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  432. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  433. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  434. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  435. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  436. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  437. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  438. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  439. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  440. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  441. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  442. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  443. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  444. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  445. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  446. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  447. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  448. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  449. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  450. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  451. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  452. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  453. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  454. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  455. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  456. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  457. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  458. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  459. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  460. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  461. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  462. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  463. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  464. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  465. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  466. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  467. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  468. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  469. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  470. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  471. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  472. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  473. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  474. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  475. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  476. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  477. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
  478. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  479. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  480. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  481. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  482. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  483. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  484. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  485. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  486. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  487. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  488. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  489. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  490. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  491. inspect_ai/tool/beta/__init__.py +0 -5
  492. inspect_ai-0.3.62.dist-info/RECORD +0 -481
  493. /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
  494. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
  495. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  496. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  497. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  498. /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
  499. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  500. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  501. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  502. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  503. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  504. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  505. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  506. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
  507. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
  508. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  509. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  510. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
  511. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  512. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  513. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  514. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  515. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  516. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
  517. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
  518. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -190,7 +190,7 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
190
190
  if task.setup:
191
191
  plan.steps = unroll(task.setup) + plan.steps
192
192
 
193
- # reaolve the scorer
193
+ # resolve the scorer
194
194
  score = score and task.scorer is not None
195
195
  scorers: list[Scorer] | None = task.scorer if (score and task.scorer) else None
196
196
  scorer_profiles = (
@@ -519,6 +519,7 @@ async def task_run_sample(
519
519
  key: SampleScore(
520
520
  score=score,
521
521
  sample_id=previous_sample.id,
522
+ sample_metadata=previous_sample.metadata,
522
523
  )
523
524
  for key, score in previous_sample.scores.items()
524
525
  }
@@ -696,6 +697,7 @@ async def task_run_sample(
696
697
  sample_score = SampleScore(
697
698
  score=score_result,
698
699
  sample_id=sample.id,
700
+ sample_metadata=sample.metadata,
699
701
  scorer=registry_unqualified_name(scorer),
700
702
  )
701
703
  transcript()._event(
@@ -709,7 +711,12 @@ async def task_run_sample(
709
711
  if state.scores is not None:
710
712
  for name, score in state.scores.items():
711
713
  results[name] = SampleScore(
712
- score=score, sample_id=state.sample_id
714
+ score=score,
715
+ sample_id=state.sample_id,
716
+ sample_metadata=state.metadata,
717
+ )
718
+ transcript()._event(
719
+ ScoreEvent(score=score, target=sample.target)
713
720
  )
714
721
 
715
722
  # propagate results into scores
@@ -5,11 +5,20 @@ from random import random
5
5
  from typing import AsyncGenerator, Callable, NamedTuple, cast
6
6
 
7
7
  import httpx
8
+ from tenacity import (
9
+ retry,
10
+ retry_if_exception,
11
+ stop_after_attempt,
12
+ stop_after_delay,
13
+ wait_exponential_jitter,
14
+ )
8
15
 
9
16
  from inspect_ai._eval.task.task import Task
10
17
  from inspect_ai._eval.task.util import task_run_dir
18
+ from inspect_ai._util.constants import DEFAULT_MAX_RETRIES, DEFAULT_TIMEOUT
11
19
  from inspect_ai._util.file import file, filesystem
12
20
  from inspect_ai._util.registry import registry_unqualified_name
21
+ from inspect_ai._util.retry import httpx_should_retry, log_retry_attempt
13
22
  from inspect_ai._util.url import data_uri_to_base64, is_data_uri, is_http_url
14
23
  from inspect_ai.dataset import Sample
15
24
  from inspect_ai.util._concurrency import concurrency
@@ -115,8 +124,7 @@ async def read_sandboxenv_file(contents: str) -> bytes:
115
124
  contents_base64 = data_uri_to_base64(contents)
116
125
  file_bytes = base64.b64decode(contents_base64)
117
126
  elif is_http_url(contents):
118
- client = httpx.AsyncClient()
119
- file_bytes = (await client.get(contents, follow_redirects=True)).content
127
+ file_bytes = await _retrying_httpx_get(contents)
120
128
  else:
121
129
  # try to read as a file (if it doesn't exist or has a path not cool w/
122
130
  # the filesystem then we fall back to contents)
@@ -172,3 +180,28 @@ def resolve_sandbox(
172
180
  return sample.sandbox
173
181
  else:
174
182
  return None
183
+
184
+
185
+ async def _retrying_httpx_get(
186
+ url: str,
187
+ client: httpx.AsyncClient = httpx.AsyncClient(),
188
+ timeout: int = 30, # per-attempt timeout
189
+ max_retries: int = DEFAULT_MAX_RETRIES,
190
+ total_timeout: int = DEFAULT_TIMEOUT, # timeout for the whole retry loop. not for an individual attempt
191
+ ) -> bytes:
192
+ @retry(
193
+ wait=wait_exponential_jitter(),
194
+ stop=(stop_after_attempt(max_retries) | stop_after_delay(total_timeout)),
195
+ retry=retry_if_exception(httpx_should_retry),
196
+ before_sleep=log_retry_attempt(url),
197
+ )
198
+ async def do_get() -> bytes:
199
+ response = await client.get(
200
+ url=url,
201
+ follow_redirects=True,
202
+ timeout=(timeout, timeout, timeout, timeout),
203
+ )
204
+ response.raise_for_status()
205
+ return response.content
206
+
207
+ return await do_get()
@@ -39,38 +39,6 @@ class Task:
39
39
  r"""Evaluation task.
40
40
 
41
41
  Tasks are the basis for defining and running evaluations.
42
-
43
- Args:
44
- dataset (Dataset | Sequence[Sample]): Dataset to evaluate
45
- setup: (Solver | list[Solver] | None): Setup step (always run
46
- even when the main `solver` is replaced).
47
- solver: (Solver | list[Solver]): Solver or list of solvers.
48
- Defaults to generate(), a normal call to the model.
49
- scorer: (Scorer | list[Scorer] | None): Scorer used to evaluate model output.
50
- metrics (list[Metric] | dict[str, list[Metric]] | None):
51
- Alternative metrics (overrides the metrics provided by the specified scorer).
52
- config (GenerateConfig): Model generation config.
53
- sandbox (SandboxEnvironmentType | None): Sandbox environment type
54
- (or optionally a str or tuple with a shorthand spec)
55
- approval: (str | list[ApprovalPolicy] | None): Tool use approval policies.
56
- Either a path to an approval policy config file or a list of approval policies.
57
- Defaults to no approval policy.
58
- epochs (int | Epochs | None): Epochs to repeat samples for and optional score
59
- reducer function(s) used to combine sample scores (defaults to "mean")
60
- fail_on_error (bool | float | None): `True` to fail on first sample error
61
- (default); `False` to never fail on sample errors; Value between 0 and 1
62
- to fail if a proportion of total samples fails. Value greater than 1 to fail
63
- eval if a count of samples fails.
64
- message_limit (int | None): Limit on total messages used for each sample.
65
- token_limit (int | None): Limit on total tokens used for each sample.
66
- time_limit (int | None): Limit on time (in seconds) for execution of each sample.
67
- name: (str | None): Task name. If not specified is automatically
68
- determined based on the name of the task directory (or "task")
69
- if its anonymous task (e.g. created in a notebook and passed to
70
- eval() directly)
71
- version: (int): Version of task (to distinguish evolutions
72
- of the task spec or breaking changes to it)
73
- metadata: (dict[str, Any] | None): Additional metadata to associate with the task.
74
42
  """
75
43
 
76
44
  def __init__(
@@ -93,6 +61,41 @@ class Task:
93
61
  metadata: dict[str, Any] | None = None,
94
62
  **kwargs: Unpack[TaskDeprecatedArgs],
95
63
  ) -> None:
64
+ """Create a task.
65
+
66
+ Args:
67
+ dataset (Dataset | Sequence[Sample]): Dataset to evaluate
68
+ setup: (Solver | list[Solver] | None): Setup step (always run
69
+ even when the main `solver` is replaced).
70
+ solver: (Solver | list[Solver]): Solver or list of solvers.
71
+ Defaults to generate(), a normal call to the model.
72
+ scorer: (Scorer | list[Scorer] | None): Scorer used to evaluate model output.
73
+ metrics (list[Metric] | dict[str, list[Metric]] | None):
74
+ Alternative metrics (overrides the metrics provided by the specified scorer).
75
+ config (GenerateConfig): Model generation config.
76
+ sandbox (SandboxEnvironmentType | None): Sandbox environment type
77
+ (or optionally a str or tuple with a shorthand spec)
78
+ approval: (str | list[ApprovalPolicy] | None): Tool use approval policies.
79
+ Either a path to an approval policy config file or a list of approval policies.
80
+ Defaults to no approval policy.
81
+ epochs (int | Epochs | None): Epochs to repeat samples for and optional score
82
+ reducer function(s) used to combine sample scores (defaults to "mean")
83
+ fail_on_error (bool | float | None): `True` to fail on first sample error
84
+ (default); `False` to never fail on sample errors; Value between 0 and 1
85
+ to fail if a proportion of total samples fails. Value greater than 1 to fail
86
+ eval if a count of samples fails.
87
+ message_limit (int | None): Limit on total messages used for each sample.
88
+ token_limit (int | None): Limit on total tokens used for each sample.
89
+ time_limit (int | None): Limit on time (in seconds) for execution of each sample.
90
+ name: (str | None): Task name. If not specified is automatically
91
+ determined based on the name of the task directory (or "task")
92
+ if its anonymous task (e.g. created in a notebook and passed to
93
+ eval() directly)
94
+ version: (int): Version of task (to distinguish evolutions
95
+ of the task spec or breaking changes to it)
96
+ metadata: (dict[str, Any] | None): Additional metadata to associate with the task.
97
+ **kwargs: Deprecated arguments.
98
+ """
96
99
  # handle deprecated args
97
100
  for arg, value in kwargs.items():
98
101
  newarg = ""
@@ -179,33 +182,33 @@ def task_with(
179
182
  task (Task): Task to adapt (it is deep copied prior to mutating options)
180
183
  dataset (Dataset | Sequence[Sample]): Dataset to evaluate
181
184
  setup: (Solver | list[Solver] | None): Setup step (always run
182
- even when the main `solver` is replaced).
185
+ even when the main `solver` is replaced).
183
186
  solver: (Solver | list[Solver]): Solver or list of solvers.
184
- Defaults to generate(), a normal call to the model.
187
+ Defaults to generate(), a normal call to the model.
185
188
  scorer: (Scorer | list[Scorer] | None): Scorer used to evaluate model output.
186
189
  metrics (list[Metric] | dict[str, list[Metric]] | None):
187
- Alternative metrics (overrides the metrics provided by the specified scorer).
190
+ Alternative metrics (overrides the metrics provided by the specified scorer).
188
191
  config (GenerateConfig): Model generation config.
189
192
  sandbox (SandboxEnvironmentType | None): Sandbox environment type
190
- (or optionally a str or tuple with a shorthand spec)
193
+ (or optionally a str or tuple with a shorthand spec)
191
194
  approval: (str | list[ApprovalPolicy] | None): Tool use approval policies.
192
- Either a path to an approval policy config file or a list of approval policies.
193
- Defaults to no approval policy.
195
+ Either a path to an approval policy config file or a list of approval policies.
196
+ Defaults to no approval policy.
194
197
  epochs (int | Epochs | None): Epochs to repeat samples for and optional score
195
- reducer function(s) used to combine sample scores (defaults to "mean")
198
+ reducer function(s) used to combine sample scores (defaults to "mean")
196
199
  fail_on_error (bool | float | None): `True` to fail on first sample error
197
- (default); `False` to never fail on sample errors; Value between 0 and 1
198
- to fail if a proportion of total samples fails. Value greater than 1 to fail
199
- eval if a count of samples fails.
200
+ (default); `False` to never fail on sample errors; Value between 0 and 1
201
+ to fail if a proportion of total samples fails. Value greater than 1 to fail
202
+ eval if a count of samples fails.
200
203
  message_limit (int | None): Limit on total messages used for each sample.
201
204
  token_limit (int | None): Limit on total tokens used for each sample.
202
205
  time_limit (int | None): Limit on time (in seconds) for execution of each sample.
203
206
  name: (str | None): Task name. If not specified is automatically
204
- determined based on the name of the task directory (or "task")
205
- if its anonymous task (e.g. created in a notebook and passed to
206
- eval() directly)
207
+ determined based on the name of the task directory (or "task")
208
+ if its anonymous task (e.g. created in a notebook and passed to
209
+ eval() directly)
207
210
  version: (int): Version of task (to distinguish evolutions
208
- of the task spec or breaking changes to it)
211
+ of the task spec or breaking changes to it)
209
212
  metadata: (dict[str, Any] | None): Additional metadata to associate with the task.
210
213
 
211
214
  Returns:
@@ -1,7 +1,7 @@
1
1
  from pathlib import Path
2
2
  from typing import Literal
3
3
 
4
- PKG_AUTHOR = "UK AI Safety Institute"
4
+ PKG_AUTHOR = "UK AI Security Institute"
5
5
  PKG_AUTHOR_DIR = "UK-AISI"
6
6
  PKG_NAME = Path(__file__).parent.parent.stem
7
7
  PKG_PATH = Path(__file__).parent.parent
@@ -4,6 +4,8 @@ from pydantic import BaseModel, Field
4
4
 
5
5
 
6
6
  class ContentText(BaseModel):
7
+ """Text content."""
8
+
7
9
  type: Literal["text"] = Field(default="text")
8
10
  """Type."""
9
11
 
@@ -12,6 +14,8 @@ class ContentText(BaseModel):
12
14
 
13
15
 
14
16
  class ContentImage(BaseModel):
17
+ """Image content."""
18
+
15
19
  type: Literal["image"] = Field(default="image")
16
20
  """Type."""
17
21
 
@@ -26,6 +30,8 @@ class ContentImage(BaseModel):
26
30
 
27
31
 
28
32
  class ContentAudio(BaseModel):
33
+ """Audio content."""
34
+
29
35
  type: Literal["audio"] = Field(default="audio")
30
36
  """Type."""
31
37
 
@@ -37,6 +43,8 @@ class ContentAudio(BaseModel):
37
43
 
38
44
 
39
45
  class ContentVideo(BaseModel):
46
+ """Video content."""
47
+
40
48
  type: Literal["video"] = Field(default="video")
41
49
  """Type."""
42
50
 
inspect_ai/_util/error.py CHANGED
@@ -9,6 +9,8 @@ from rich.console import RenderableType
9
9
 
10
10
 
11
11
  class EvalError(BaseModel):
12
+ """Eval error details."""
13
+
12
14
  message: str
13
15
  """Error message."""
14
16
 
inspect_ai/_util/file.py CHANGED
@@ -18,6 +18,7 @@ from fsspec.core import split_protocol # type: ignore # type: ignore
18
18
  from fsspec.implementations.local import make_path_posix # type: ignore
19
19
  from pydantic import BaseModel
20
20
  from s3fs import S3FileSystem # type: ignore
21
+ from shortuuid import uuid
21
22
 
22
23
  # https://filesystem-spec.readthedocs.io/en/latest/_modules/fsspec/spec.html#AbstractFileSystem
23
24
  # https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.generic.GenericFileSystem
@@ -169,6 +170,9 @@ class FileSystem:
169
170
  def exists(self, path: str) -> bool:
170
171
  return self.fs.exists(path) is True
171
172
 
173
+ def touch(self, path: str) -> None:
174
+ self.fs.touch(path)
175
+
172
176
  def rm(
173
177
  self, path: str, recursive: bool = False, maxdepth: int | None = None
174
178
  ) -> None:
@@ -218,6 +222,16 @@ class FileSystem:
218
222
  def is_local(self) -> bool:
219
223
  return isinstance(self.fs, fsspec.implementations.local.LocalFileSystem)
220
224
 
225
+ def is_writeable(self, path: str) -> bool:
226
+ try:
227
+ path = path.rstrip("/\\")
228
+ touch_file = f"{path}{self.fs.sep}{uuid()}"
229
+ self.touch(touch_file)
230
+ self.rm(touch_file)
231
+ return True
232
+ except PermissionError:
233
+ return False
234
+
221
235
  def is_async(self) -> bool:
222
236
  return isinstance(self.fs, fsspec.asyn.AsyncFileSystem)
223
237
 
@@ -354,7 +368,7 @@ def safe_filename(s: str, max_length: int = 255) -> str:
354
368
  Returns:
355
369
  str: A safe filename string
356
370
 
357
- Example:
371
+ Examples:
358
372
  >>> safe_filename("Hello/World?.txt")
359
373
  'Hello_World.txt'
360
374
  """
inspect_ai/_util/hash.py CHANGED
@@ -3,7 +3,7 @@ import mmh3
3
3
 
4
4
  def mm3_hash(message: str) -> str:
5
5
  # Generate the 128-bit hash as two 64-bit integers
6
- h1, h2 = mmh3.hash64(message.encode("utf-8"))
6
+ h1, h2 = mmh3.hash64(message.encode("utf-8")) # pylint: disable=E0633
7
7
 
8
8
  # Convert to unsigned integers and then to hexadecimal
9
9
  return f"{h1 & 0xFFFFFFFFFFFFFFFF:016x}{h2 & 0xFFFFFFFFFFFFFFFF:016x}"
@@ -161,7 +161,7 @@ def init_logger(
161
161
  getLogger().addHandler(_logHandler)
162
162
 
163
163
  # establish default capture level
164
- capture_level = min(TRACE, levelno)
164
+ capture_level = min(TRACE, levelno, transcript_levelno)
165
165
 
166
166
  # see all the messages (we won't actually display/write all of them)
167
167
  getLogger().setLevel(capture_level)
@@ -181,7 +181,9 @@ def notify_logger_record(record: LogRecord, write: bool) -> None:
181
181
  from inspect_ai.log._transcript import LoggerEvent, transcript
182
182
 
183
183
  if write:
184
- transcript()._event(LoggerEvent(message=LoggingMessage.from_log_record(record)))
184
+ transcript()._event(
185
+ LoggerEvent(message=LoggingMessage._from_log_record(record))
186
+ )
185
187
  global _rate_limit_count
186
188
  if (record.levelno <= INFO and re.search(r"\b429\b", record.getMessage())) or (
187
189
  record.levelno == DEBUG
@@ -209,7 +209,13 @@ def registry_create(type: RegistryType, name: str, **kwargs: Any) -> object:
209
209
  if isclass(obj):
210
210
  return with_registry_info(obj(**kwargs))
211
211
  elif callable(obj):
212
- return_type = getattr(get_annotations(obj)["return"], "__name__", None)
212
+ return_type = get_annotations(obj).get("return")
213
+ # Until we remove the MetricDeprecated symbol we need this extra
214
+ # bit to map the Metric union back to Metric
215
+ if "_metric.Metric" in str(return_type):
216
+ return_type = "Metric"
217
+ else:
218
+ return_type = getattr(return_type, "__name__", None)
213
219
  if return_type and return_type.lower() == type:
214
220
  return with_registry_info(obj(**kwargs))
215
221
  else:
inspect_ai/_view/view.py CHANGED
@@ -28,11 +28,10 @@ def view(
28
28
  port: int = DEFAULT_VIEW_PORT,
29
29
  authorization: str | None = None,
30
30
  log_level: str | None = None,
31
- log_level_transcript: str | None = None,
32
31
  fs_options: dict[str, Any] = {},
33
32
  ) -> None:
34
33
  init_dotenv()
35
- init_logger(log_level, log_level_transcript)
34
+ init_logger(log_level)
36
35
 
37
36
  # initialize the log_dir
38
37
  log_dir = log_dir if log_dir else os.getenv("INSPECT_LOG_DIR", "./logs")
@@ -0,0 +1,3 @@
1
+ {
2
+ "recommendations": ["esbenp.prettier-vscode", "dbaeumer.vscode-eslint"]
3
+ }
@@ -0,0 +1,8 @@
1
+ {
2
+ "editor.formatOnSave": true,
3
+ "editor.defaultFormatter": "esbenp.prettier-vscode",
4
+ "editor.codeActionsOnSave": {
5
+ "source.organizeImports": "explicit",
6
+ "source.fixAll": "explicit"
7
+ }
8
+ }
@@ -9,12 +9,28 @@
9
9
  --inspect-input-border: var(--bs-light-border-subtle);
10
10
  --inspect-diff-add-color: #dafbe1;
11
11
  --inspect-diff-remove-color: #ffebe9;
12
- --inspect-inactive-selection-background: var(--vscode-editor-inactiveSelectionBackground, #d9d9d9);
13
- --inspect-active-selection-background: var(--vscode-editor-selectionBackground, #d7d4f0);
12
+ --inspect-inactive-selection-background: var(
13
+ --vscode-editor-inactiveSelectionBackground,
14
+ #d9d9d9
15
+ );
16
+ --inspect-active-selection-background: var(
17
+ --vscode-editor-selectionBackground,
18
+ #d7d4f0
19
+ );
14
20
  --inspect-focus-border-color: #86b7fe;
15
21
  --inspect-focus-border-shadow: 0 0 0 0.25rem rgba(var(--bs-primary-rgb), 0.25);
16
22
  --inspect-focus-border-gray-color: #808080;
17
23
  --inspect-focus-border-gray-shadow: 0 0 0 0.25rem rgba(48, 48, 48, 0.25);
24
+
25
+ /* Inspect Font Sizes */
26
+ --inspect-font-size-title: 1.5rem;
27
+ --inspect-font-size-title-secondary: 1.3rem;
28
+ --inspect-font-size-largest: 1.2rem;
29
+ --inspect-font-size-larger: 1.1rem;
30
+ --inspect-font-size-large: 1rem;
31
+ --inspect-font-size-base: 0.9rem;
32
+ --inspect-font-size-small: 0.8rem;
33
+ --inspect-font-size-smaller: 0.8rem;
18
34
  }
19
35
 
20
36
  body:not([class^="vscode-"]) button {
@@ -47,6 +63,65 @@ body[class^="vscode-"] .app-main-grid {
47
63
  grid-template-rows: max-content max-content 1fr;
48
64
  }
49
65
 
66
+ /* Inspect Text Styles */
67
+ .text-style-label {
68
+ text-transform: uppercase;
69
+ }
70
+
71
+ .text-style-secondary {
72
+ color: var(--bs-secondary);
73
+ }
74
+
75
+ .text-style-tertiary {
76
+ color: var(--bs-tertiary-color);
77
+ }
78
+
79
+ /* Inspect Font Size Styles */
80
+ .text-size-title {
81
+ font-size: var(--inspect-font-size-title);
82
+ }
83
+
84
+ .text-size-title-secondary {
85
+ font-size: var(--inspect-font-size-title-secondary);
86
+ }
87
+
88
+ .text-size-largest {
89
+ font-size: var(--inspect-font-size-largest);
90
+ }
91
+
92
+ .text-size-larger {
93
+ font-size: var(--inspect-font-size-larger);
94
+ }
95
+
96
+ .text-size-large {
97
+ font-size: var(--inspect-font-size-large);
98
+ }
99
+
100
+ .text-size-base {
101
+ font-size: var(--inspect-font-size-base);
102
+ }
103
+
104
+ .text-size-small {
105
+ font-size: var(--inspect-font-size-small);
106
+ }
107
+
108
+ .text-size-smaller {
109
+ font-size: var(--inspect-font-size-smaller);
110
+ }
111
+
112
+ .text-truncate {
113
+ white-space: nowrap;
114
+ text-overflow: ellipsis;
115
+ overflow: hidden;
116
+ }
117
+
118
+ .three-line-clamp {
119
+ display: -webkit-box;
120
+ -webkit-line-clamp: 3;
121
+ -webkit-box-orient: vertical;
122
+ overflow: hidden;
123
+ }
124
+
50
125
  body[class^="vscode-"] {
51
126
  --bs-border-radius: 0;
52
127
  --bs-border-radius-lg: 0;
@@ -87,7 +162,7 @@ html.vscode {
87
162
 
88
163
  html.vscode .sample-input {
89
164
  line-height: 1.3em;
90
- -webkit-line-clamp: 4 !important
165
+ -webkit-line-clamp: 4 !important;
91
166
  }
92
167
 
93
168
  body[class^="vscode-"] .modal-backdrop {
@@ -276,7 +351,7 @@ body {
276
351
  }
277
352
 
278
353
  @media (max-width: 575px) {
279
- .tab-tools select {
354
+ .tab-tools select {
280
355
  width: 50px;
281
356
  }
282
357
  }
@@ -312,12 +387,6 @@ body {
312
387
  font-size: 1.5em;
313
388
  }
314
389
 
315
- .sidebar {
316
- --bs-offcanvas-width: var(--sidebar-width);
317
- width: var(--sidebar-width);
318
- overflow-y: auto;
319
- }
320
-
321
390
  .nav-link.active {
322
391
  border-bottom-width: 0 !important;
323
392
  }
@@ -644,7 +713,7 @@ table.table.table-sm td {
644
713
 
645
714
  .tab-tools .btn {
646
715
  font-size: 0.7rem;
647
- padding: 0.4em 0.8em;
716
+ padding: 0.2em 0.8em;
648
717
  }
649
718
 
650
719
  .tab-tools {
@@ -724,7 +793,7 @@ table.table.table-sm td {
724
793
  }
725
794
 
726
795
  @keyframes moveLeftToRight {
727
- from {
796
+ from {
728
797
  margin-left: 0;
729
798
  }
730
799
  to {
@@ -760,7 +829,6 @@ pre[class*="language-"].tool-output {
760
829
 
761
830
  /* lightbox styles */
762
831
 
763
-
764
832
  .lightbox-overlay .close-button,
765
833
  .lightbox-overlay .nav-button {
766
834
  /* Hide by default */
@@ -868,38 +936,38 @@ ul.jsondiffpatch-delta {
868
936
  vertical-align: top;
869
937
  }
870
938
  .jsondiffpatch-property-name:after {
871
- content: ': ';
939
+ content: ": ";
872
940
  }
873
941
  .jsondiffpatch-child-node-type-array > .jsondiffpatch-property-name:after {
874
- content: ': [';
942
+ content: ": [";
875
943
  }
876
944
  .jsondiffpatch-child-node-type-array:after {
877
- content: '],';
945
+ content: "],";
878
946
  }
879
947
  div.jsondiffpatch-child-node-type-array:before {
880
- content: '[';
948
+ content: "[";
881
949
  }
882
950
  div.jsondiffpatch-child-node-type-array:after {
883
- content: ']';
951
+ content: "]";
884
952
  }
885
953
  .jsondiffpatch-child-node-type-object > .jsondiffpatch-property-name:after {
886
- content: ': {';
954
+ content: ": {";
887
955
  }
888
956
  .jsondiffpatch-child-node-type-object:after {
889
- content: '},';
957
+ content: "},";
890
958
  }
891
959
  div.jsondiffpatch-child-node-type-object:before {
892
- content: '{';
960
+ content: "{";
893
961
  }
894
962
  div.jsondiffpatch-child-node-type-object:after {
895
- content: '}';
963
+ content: "}";
896
964
  }
897
965
  .jsondiffpatch-value pre:after {
898
- content: ',';
966
+ content: ",";
899
967
  }
900
968
  li:last-child > .jsondiffpatch-value pre:after,
901
969
  .jsondiffpatch-modified > .jsondiffpatch-left-value pre:after {
902
- content: '';
970
+ content: "";
903
971
  }
904
972
  .jsondiffpatch-modified .jsondiffpatch-value {
905
973
  display: inline-block;
@@ -916,7 +984,7 @@ li:last-child > .jsondiffpatch-value pre:after,
916
984
  color: #888;
917
985
  }
918
986
  .jsondiffpatch-moved .jsondiffpatch-moved-destination:before {
919
- content: ' => ';
987
+ content: " => ";
920
988
  }
921
989
  ul.jsondiffpatch-textdiff {
922
990
  padding: 0;
@@ -930,7 +998,7 @@ ul.jsondiffpatch-textdiff {
930
998
  display: inline-block;
931
999
  }
932
1000
  .jsondiffpatch-textdiff-line-number:after {
933
- content: ',';
1001
+ content: ",";
934
1002
  }
935
1003
  .jsondiffpatch-error {
936
1004
  background: red;
@@ -976,14 +1044,14 @@ ul.jsondiffpatch-textdiff {
976
1044
  padding: 1em;
977
1045
  margin: 0.5em 0;
978
1046
  overflow: auto;
979
- border: 0.3em solid #7a6651;
1047
+ /* border: 0.3em solid #7a6651; */
980
1048
  border-radius: 0.5em;
981
1049
  box-shadow: 1px 1px 0.5em #000 inset;
982
1050
  }
983
1051
  .vscode-dark :not(pre) > code[class*="language-"] {
984
1052
  padding: 0.15em 0.2em 0.05em;
985
1053
  border-radius: 0.3em;
986
- border: 0.13em solid #7a6651;
1054
+ /* border: 0.13em solid #7a6651; */
987
1055
  box-shadow: 1px 1px 0.3em -0.1em #000 inset;
988
1056
  white-space: normal;
989
1057
  }
@@ -1045,4 +1113,4 @@ ul.jsondiffpatch-textdiff {
1045
1113
  .vscode-dark .token.deleted {
1046
1114
  color: red;
1047
1115
  }
1048
- /* END PrismJS */
1116
+ /* END PrismJS */
@@ -19,7 +19,7 @@ Use the following commands (run in the `src/inspect_ai/_view/www` dir) to ensure
19
19
  yarn prettier:write
20
20
  ```
21
21
 
22
- 3. Build the bundled output to `dist`
22
+ 3. Build the bundled output into the `dist` directory.
23
23
 
24
24
  ```bash
25
25
  yarn build