inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (518) hide show
  1. inspect_ai/_cli/cache.py +8 -7
  2. inspect_ai/_cli/common.py +0 -12
  3. inspect_ai/_cli/eval.py +32 -4
  4. inspect_ai/_cli/info.py +1 -0
  5. inspect_ai/_cli/list.py +1 -1
  6. inspect_ai/_cli/log.py +2 -0
  7. inspect_ai/_cli/main.py +1 -1
  8. inspect_ai/_cli/sandbox.py +4 -1
  9. inspect_ai/_cli/score.py +181 -32
  10. inspect_ai/_cli/trace.py +10 -0
  11. inspect_ai/_cli/view.py +4 -2
  12. inspect_ai/_display/core/active.py +2 -3
  13. inspect_ai/_display/core/config.py +7 -1
  14. inspect_ai/_display/textual/widgets/samples.py +4 -3
  15. inspect_ai/_display/textual/widgets/sandbox.py +6 -0
  16. inspect_ai/_eval/eval.py +104 -101
  17. inspect_ai/_eval/evalset.py +75 -75
  18. inspect_ai/_eval/loader.py +122 -12
  19. inspect_ai/_eval/registry.py +1 -1
  20. inspect_ai/_eval/run.py +14 -0
  21. inspect_ai/_eval/score.py +125 -36
  22. inspect_ai/_eval/task/log.py +105 -4
  23. inspect_ai/_eval/task/results.py +92 -38
  24. inspect_ai/_eval/task/run.py +9 -2
  25. inspect_ai/_eval/task/sandbox.py +35 -2
  26. inspect_ai/_eval/task/task.py +49 -46
  27. inspect_ai/_util/constants.py +1 -1
  28. inspect_ai/_util/content.py +8 -0
  29. inspect_ai/_util/error.py +2 -0
  30. inspect_ai/_util/file.py +15 -1
  31. inspect_ai/_util/hash.py +1 -1
  32. inspect_ai/_util/logger.py +4 -2
  33. inspect_ai/_util/registry.py +7 -1
  34. inspect_ai/_view/view.py +1 -2
  35. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  36. inspect_ai/_view/www/.vscode/settings.json +8 -0
  37. inspect_ai/_view/www/App.css +97 -29
  38. inspect_ai/_view/www/README.md +1 -1
  39. inspect_ai/_view/www/dist/assets/index.css +16663 -14674
  40. inspect_ai/_view/www/dist/assets/index.js +58808 -51348
  41. inspect_ai/_view/www/dist/index.html +1 -1
  42. inspect_ai/_view/www/index.html +2 -2
  43. inspect_ai/_view/www/log-schema.json +87 -73
  44. inspect_ai/_view/www/package.json +22 -4
  45. inspect_ai/_view/www/postcss.config.cjs +8 -9
  46. inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
  47. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  48. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  49. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  50. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  51. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  52. inspect_ai/_view/www/src/api/index.ts +4 -4
  53. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  54. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  55. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  56. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  57. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  58. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  59. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  60. inspect_ai/_view/www/src/components/Card.css +60 -0
  61. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  62. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  63. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  64. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  65. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  66. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  67. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  68. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  69. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  70. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  71. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  72. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  73. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  74. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  75. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  76. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  77. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  78. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  79. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  80. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  81. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  82. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  83. inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
  84. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  85. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  86. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  87. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  88. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  89. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  90. inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
  91. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  92. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  93. inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
  94. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  95. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  96. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  97. inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
  98. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  99. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  100. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  101. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  102. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  103. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
  104. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  105. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  106. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  107. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  108. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  109. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  110. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  111. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  112. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  113. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  114. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  115. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  116. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  117. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  118. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
  119. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  120. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  121. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  122. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  123. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  124. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  126. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  127. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
  128. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  129. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
  130. inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
  131. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  132. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  133. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  134. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  135. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  136. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
  137. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  138. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  139. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  140. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
  141. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  142. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
  143. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  144. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
  145. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  146. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
  147. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  148. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
  149. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  150. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  151. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
  152. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
  153. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  154. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  155. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  156. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  157. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  158. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  159. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  160. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  161. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  162. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  163. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  164. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  165. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  166. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  167. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  168. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  169. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  170. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  171. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  172. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  173. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  174. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  175. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  176. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  177. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  179. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  180. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  181. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  182. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
  183. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  184. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
  185. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  186. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  187. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  188. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  189. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  190. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  191. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  192. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  193. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  194. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  196. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  197. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  198. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  199. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  200. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  201. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  202. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  203. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  204. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  205. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  206. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
  207. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  208. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  209. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  210. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  211. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  212. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  213. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  214. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  215. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
  216. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  217. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  218. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  219. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  220. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  221. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
  222. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  223. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
  224. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  225. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  226. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  227. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  228. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  229. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
  230. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  231. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  232. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
  233. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  234. inspect_ai/_view/www/src/types/log.d.ts +108 -19
  235. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  236. inspect_ai/_view/www/src/types.ts +71 -0
  237. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
  238. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  239. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
  240. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  241. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  242. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  243. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  244. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  245. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  246. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  247. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  248. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  249. inspect_ai/_view/www/src/utils/format.ts +194 -0
  250. inspect_ai/_view/www/src/utils/git.ts +7 -0
  251. inspect_ai/_view/www/src/utils/html.ts +6 -0
  252. inspect_ai/_view/www/src/utils/http.ts +14 -0
  253. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  254. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  255. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  256. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  257. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  258. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  259. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  260. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  261. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
  262. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  263. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  264. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  265. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  266. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  267. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
  268. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
  269. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
  270. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  271. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
  272. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  273. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  274. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  275. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  276. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  277. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  278. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  279. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  280. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  281. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  282. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  283. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
  284. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  285. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
  286. inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
  287. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
  288. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  289. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  290. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  291. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  292. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  293. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  294. inspect_ai/_view/www/src/workspace/utils.ts +34 -0
  295. inspect_ai/_view/www/tsconfig.json +23 -9
  296. inspect_ai/_view/www/vite.config.js +8 -17
  297. inspect_ai/_view/www/yarn.lock +627 -556
  298. inspect_ai/approval/_approval.py +2 -0
  299. inspect_ai/approval/_approver.py +4 -4
  300. inspect_ai/approval/_auto.py +1 -1
  301. inspect_ai/approval/_human/approver.py +3 -0
  302. inspect_ai/approval/_policy.py +5 -0
  303. inspect_ai/approval/_registry.py +2 -2
  304. inspect_ai/dataset/_dataset.py +64 -37
  305. inspect_ai/dataset/_sources/__init__.py +0 -0
  306. inspect_ai/dataset/_sources/csv.py +20 -12
  307. inspect_ai/dataset/_sources/file.py +4 -0
  308. inspect_ai/dataset/_sources/hf.py +39 -29
  309. inspect_ai/dataset/_sources/json.py +17 -9
  310. inspect_ai/log/__init__.py +2 -0
  311. inspect_ai/log/_convert.py +3 -3
  312. inspect_ai/log/_file.py +24 -9
  313. inspect_ai/log/_log.py +101 -13
  314. inspect_ai/log/_message.py +4 -2
  315. inspect_ai/log/_recorders/file.py +4 -0
  316. inspect_ai/log/_recorders/json.py +5 -7
  317. inspect_ai/log/_recorders/recorder.py +3 -0
  318. inspect_ai/log/_transcript.py +19 -8
  319. inspect_ai/model/__init__.py +2 -0
  320. inspect_ai/model/_cache.py +39 -21
  321. inspect_ai/model/_call_tools.py +4 -3
  322. inspect_ai/model/_chat_message.py +14 -4
  323. inspect_ai/model/_generate_config.py +1 -1
  324. inspect_ai/model/_model.py +31 -24
  325. inspect_ai/model/_model_output.py +14 -1
  326. inspect_ai/model/_openai.py +10 -18
  327. inspect_ai/model/_providers/anthropic.py +3 -3
  328. inspect_ai/model/_providers/google.py +9 -5
  329. inspect_ai/model/_providers/openai.py +5 -9
  330. inspect_ai/model/_providers/openai_o1.py +3 -5
  331. inspect_ai/model/_providers/openrouter.py +86 -0
  332. inspect_ai/model/_providers/providers.py +11 -0
  333. inspect_ai/scorer/__init__.py +6 -1
  334. inspect_ai/scorer/_answer.py +7 -7
  335. inspect_ai/scorer/_classification.py +38 -18
  336. inspect_ai/scorer/_common.py +2 -8
  337. inspect_ai/scorer/_match.py +4 -5
  338. inspect_ai/scorer/_metric.py +87 -28
  339. inspect_ai/scorer/_metrics/__init__.py +3 -3
  340. inspect_ai/scorer/_metrics/accuracy.py +8 -10
  341. inspect_ai/scorer/_metrics/mean.py +3 -17
  342. inspect_ai/scorer/_metrics/std.py +111 -30
  343. inspect_ai/scorer/_model.py +12 -12
  344. inspect_ai/scorer/_pattern.py +3 -3
  345. inspect_ai/scorer/_reducer/reducer.py +36 -21
  346. inspect_ai/scorer/_reducer/registry.py +2 -2
  347. inspect_ai/scorer/_reducer/types.py +7 -1
  348. inspect_ai/scorer/_score.py +11 -1
  349. inspect_ai/scorer/_scorer.py +110 -16
  350. inspect_ai/solver/__init__.py +1 -1
  351. inspect_ai/solver/_basic_agent.py +19 -22
  352. inspect_ai/solver/_bridge/__init__.py +0 -3
  353. inspect_ai/solver/_bridge/bridge.py +3 -3
  354. inspect_ai/solver/_chain.py +1 -2
  355. inspect_ai/solver/_critique.py +3 -3
  356. inspect_ai/solver/_fork.py +2 -2
  357. inspect_ai/solver/_human_agent/__init__.py +0 -0
  358. inspect_ai/solver/_human_agent/agent.py +5 -8
  359. inspect_ai/solver/_human_agent/commands/clock.py +14 -10
  360. inspect_ai/solver/_human_agent/commands/note.py +1 -1
  361. inspect_ai/solver/_human_agent/commands/score.py +0 -11
  362. inspect_ai/solver/_multiple_choice.py +38 -26
  363. inspect_ai/solver/_prompt.py +7 -7
  364. inspect_ai/solver/_solver.py +53 -52
  365. inspect_ai/solver/_task_state.py +80 -69
  366. inspect_ai/solver/_use_tools.py +9 -9
  367. inspect_ai/tool/__init__.py +4 -1
  368. inspect_ai/tool/_tool.py +43 -14
  369. inspect_ai/tool/_tool_call.py +6 -2
  370. inspect_ai/tool/_tool_choice.py +3 -1
  371. inspect_ai/tool/_tool_def.py +10 -8
  372. inspect_ai/tool/_tool_params.py +24 -0
  373. inspect_ai/tool/_tool_with.py +7 -7
  374. inspect_ai/tool/_tools/__init__.py +0 -0
  375. inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
  376. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
  377. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  378. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  379. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  380. inspect_ai/tool/_tools/_execute.py +23 -11
  381. inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
  382. inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
  383. inspect_ai/tool/_tools/_web_search.py +7 -5
  384. inspect_ai/tool/beta.py +3 -0
  385. inspect_ai/util/_concurrency.py +3 -3
  386. inspect_ai/util/_panel.py +2 -0
  387. inspect_ai/util/_resource.py +12 -12
  388. inspect_ai/util/_sandbox/docker/compose.py +23 -20
  389. inspect_ai/util/_sandbox/docker/config.py +2 -1
  390. inspect_ai/util/_sandbox/docker/docker.py +42 -86
  391. inspect_ai/util/_sandbox/docker/service.py +100 -0
  392. inspect_ai/util/_sandbox/environment.py +99 -96
  393. inspect_ai/util/_sandbox/self_check.py +124 -16
  394. inspect_ai/util/_subprocess.py +5 -3
  395. inspect_ai/util/_subtask.py +15 -16
  396. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
  397. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
  398. inspect_ai-0.3.64.dist-info/RECORD +625 -0
  399. inspect_ai/_view/www/src/Register.mjs +0 -3
  400. inspect_ai/_view/www/src/Types.mjs +0 -38
  401. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  402. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  403. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  404. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  405. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  406. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  407. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  408. inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
  409. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  410. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  411. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  412. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  413. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  414. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  415. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  416. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  417. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  418. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  419. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  420. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  421. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  422. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  423. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  424. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  425. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  426. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  427. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  428. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  429. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  430. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  431. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  432. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  433. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  434. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  435. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  436. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  437. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  438. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  439. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  440. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  441. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  442. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  443. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  444. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  445. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  446. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  447. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  448. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  449. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  450. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  451. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  452. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  453. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  454. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  455. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  456. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  457. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  458. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  459. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  460. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  461. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  462. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  463. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  464. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  465. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  466. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  467. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  468. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  469. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  470. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  471. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  472. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  473. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  474. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  475. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  476. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  477. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
  478. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  479. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  480. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  481. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  482. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  483. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  484. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  485. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  486. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  487. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  488. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  489. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  490. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  491. inspect_ai/tool/beta/__init__.py +0 -5
  492. inspect_ai-0.3.62.dist-info/RECORD +0 -481
  493. /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
  494. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
  495. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  496. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  497. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  498. /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
  499. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  500. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  501. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  502. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  503. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  504. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  505. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  506. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
  507. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
  508. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  509. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  510. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
  511. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  512. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  513. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  514. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  515. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  516. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
  517. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
  518. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -112,6 +112,7 @@ export type Input =
112
112
  | ChatMessageAssistant
113
113
  | ChatMessageTool
114
114
  )[];
115
+ export type Role = "system";
115
116
  export type Content =
116
117
  | string
117
118
  | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
@@ -127,18 +128,17 @@ export type Type4 = "video";
127
128
  export type Video = string;
128
129
  export type Format1 = "mp4" | "mpeg" | "mov";
129
130
  export type Source = ("input" | "generate") | null;
130
- export type Role = "system";
131
+ export type Role1 = "user";
131
132
  export type Content1 =
132
133
  | string
133
134
  | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
134
135
  export type Source1 = ("input" | "generate") | null;
135
- export type Role1 = "user";
136
136
  export type ToolCallId = string[] | null;
137
+ export type Role2 = "assistant";
137
138
  export type Content2 =
138
139
  | string
139
140
  | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
140
141
  export type Source2 = ("input" | "generate") | null;
141
- export type Role2 = "assistant";
142
142
  export type ToolCalls = ToolCall[] | null;
143
143
  export type Id1 = string;
144
144
  export type Function = string;
@@ -148,11 +148,11 @@ export type Title = string | null;
148
148
  export type Format2 = "text" | "markdown";
149
149
  export type Content3 = string;
150
150
  export type Reasoning = string | null;
151
+ export type Role3 = "tool";
151
152
  export type Content4 =
152
153
  | string
153
154
  | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
154
155
  export type Source3 = ("input" | "generate") | null;
155
- export type Role3 = "tool";
156
156
  export type ToolCallId1 = string | null;
157
157
  export type Function1 = string | null;
158
158
  export type Type6 =
@@ -315,6 +315,7 @@ export type Timestamp8 = string;
315
315
  export type Pending8 = boolean | null;
316
316
  export type Event8 = "score";
317
317
  export type Target2 = string | string[] | null;
318
+ export type Intermediate = boolean;
318
319
  export type Timestamp9 = string;
319
320
  export type Pending9 = boolean | null;
320
321
  export type Event9 = "error";
@@ -324,6 +325,7 @@ export type Event10 = "logger";
324
325
  export type Name7 = string | null;
325
326
  export type Level =
326
327
  | "debug"
328
+ | "trace"
327
329
  | "http"
328
330
  | "sandbox"
329
331
  | "info"
@@ -338,6 +340,7 @@ export type Lineno = number;
338
340
  export type Timestamp11 = string;
339
341
  export type Pending11 = boolean | null;
340
342
  export type Event11 = "info";
343
+ export type Source4 = string | null;
341
344
  export type Timestamp12 = string;
342
345
  export type Pending12 = boolean | null;
343
346
  export type Event12 = "step";
@@ -423,6 +426,9 @@ export type SampleId1 = string | number | null;
423
426
  export type Samples2 = EvalSampleScore[];
424
427
  export type Location1 = string;
425
428
 
429
+ /**
430
+ * Evaluation log.
431
+ */
426
432
  export interface EvalLog {
427
433
  version?: Version;
428
434
  status?: Status;
@@ -435,6 +441,9 @@ export interface EvalLog {
435
441
  reductions?: Reductions;
436
442
  location?: Location1;
437
443
  }
444
+ /**
445
+ * Eval target and configuration.
446
+ */
438
447
  export interface EvalSpec {
439
448
  run_id: RunId;
440
449
  created: Created;
@@ -459,6 +468,9 @@ export interface EvalSpec {
459
468
  }
460
469
  export interface TaskAttribs {}
461
470
  export interface TaskArgs {}
471
+ /**
472
+ * Dataset used for evaluation.
473
+ */
462
474
  export interface EvalDataset {
463
475
  name: Name;
464
476
  location: Location;
@@ -467,6 +479,9 @@ export interface EvalDataset {
467
479
  shuffled: Shuffled;
468
480
  }
469
481
  export interface ModelArgs {}
482
+ /**
483
+ * Configuration used for evaluation.
484
+ */
470
485
  export interface EvalConfig {
471
486
  limit: Limit;
472
487
  sample_id: SampleId;
@@ -512,6 +527,9 @@ export interface ApproverPolicyConfig {
512
527
  params: Params;
513
528
  }
514
529
  export interface Params {}
530
+ /**
531
+ * Git revision for evaluation.
532
+ */
515
533
  export interface EvalRevision {
516
534
  type: Type;
517
535
  origin: Origin;
@@ -520,19 +538,25 @@ export interface EvalRevision {
520
538
  export interface Packages {
521
539
  [k: string]: string;
522
540
  }
541
+ /**
542
+ * Plan (solvers) used in evaluation.
543
+ */
523
544
  export interface EvalPlan {
524
545
  name: Name2;
525
546
  steps: Steps;
526
547
  finish: EvalPlanStep | null;
527
548
  config: GenerateConfig;
528
549
  }
550
+ /**
551
+ * Solver step.
552
+ */
529
553
  export interface EvalPlanStep {
530
554
  solver: Solver1;
531
555
  params: Params1;
532
556
  }
533
557
  export interface Params1 {}
534
558
  /**
535
- * Base class for model generation configs.
559
+ * Model generation options.
536
560
  */
537
561
  export interface GenerateConfig {
538
562
  max_retries: MaxRetries;
@@ -559,12 +583,18 @@ export interface GenerateConfig {
559
583
  reasoning_effort: ReasoningEffort;
560
584
  reasoning_history: ReasoningHistory;
561
585
  }
586
+ /**
587
+ * Scoring results from evaluation.
588
+ */
562
589
  export interface EvalResults {
563
590
  total_samples: TotalSamples;
564
591
  completed_samples: CompletedSamples;
565
592
  scores: Scores;
566
593
  metadata: Metadata3;
567
594
  }
595
+ /**
596
+ * Score for evaluation task.
597
+ */
568
598
  export interface EvalScore {
569
599
  name: Name3;
570
600
  scorer: Scorer;
@@ -577,13 +607,19 @@ export interface Params2 {}
577
607
  export interface Metrics {
578
608
  [k: string]: EvalMetric;
579
609
  }
610
+ /**
611
+ * Metric for evaluation score.
612
+ */
580
613
  export interface EvalMetric {
581
614
  name: Name4;
582
615
  value: Value;
583
- options: Options;
616
+ params: Params3;
584
617
  metadata: Metadata1;
585
618
  }
586
- export interface Options {}
619
+ export interface Params3 {}
620
+ /**
621
+ * Timing and usage statistics.
622
+ */
587
623
  export interface EvalStats {
588
624
  started_at: StartedAt;
589
625
  completed_at: CompletedAt;
@@ -592,6 +628,9 @@ export interface EvalStats {
592
628
  export interface ModelUsage {
593
629
  [k: string]: ModelUsage1;
594
630
  }
631
+ /**
632
+ * Token usage for completion.
633
+ */
595
634
  export interface ModelUsage1 {
596
635
  input_tokens: InputTokens;
597
636
  output_tokens: OutputTokens;
@@ -599,11 +638,17 @@ export interface ModelUsage1 {
599
638
  input_tokens_cache_write: InputTokensCacheWrite;
600
639
  input_tokens_cache_read: InputTokensCacheRead;
601
640
  }
641
+ /**
642
+ * Eval error details.
643
+ */
602
644
  export interface EvalError {
603
645
  message: Message;
604
646
  traceback: Traceback;
605
647
  traceback_ansi: TracebackAnsi;
606
648
  }
649
+ /**
650
+ * Sample from evaluation task.
651
+ */
607
652
  export interface EvalSample {
608
653
  id: Id;
609
654
  epoch: Epoch;
@@ -624,40 +669,61 @@ export interface EvalSample {
624
669
  attachments: Attachments;
625
670
  limit: EvalSampleLimit | null;
626
671
  }
672
+ /**
673
+ * System chat message.
674
+ */
627
675
  export interface ChatMessageSystem {
676
+ role: Role;
628
677
  content: Content;
629
678
  source: Source;
630
- role: Role;
631
679
  }
680
+ /**
681
+ * Text content.
682
+ */
632
683
  export interface ContentText {
633
684
  type: Type1;
634
685
  text: Text;
635
686
  }
687
+ /**
688
+ * Image content.
689
+ */
636
690
  export interface ContentImage {
637
691
  type: Type2;
638
692
  image: Image;
639
693
  detail: Detail;
640
694
  }
695
+ /**
696
+ * Audio content.
697
+ */
641
698
  export interface ContentAudio {
642
699
  type: Type3;
643
700
  audio: Audio;
644
701
  format: Format;
645
702
  }
703
+ /**
704
+ * Video content.
705
+ */
646
706
  export interface ContentVideo {
647
707
  type: Type4;
648
708
  video: Video;
649
709
  format: Format1;
650
710
  }
711
+ /**
712
+ * User chat message.
713
+ */
651
714
  export interface ChatMessageUser {
715
+ role: Role1;
652
716
  content: Content1;
653
717
  source: Source1;
654
- role: Role1;
655
718
  tool_call_id: ToolCallId;
656
719
  }
720
+ /**
721
+ * Assistant chat message.
722
+ */
657
723
  export interface ChatMessageAssistant {
724
+ role: Role2;
658
725
  content: Content2;
659
726
  source: Source2;
660
- role: Role2;
661
727
  tool_calls: ToolCalls;
662
728
  reasoning: Reasoning;
663
729
  }
@@ -678,10 +744,13 @@ export interface ToolCallContent {
678
744
  format: Format2;
679
745
  content: Content3;
680
746
  }
747
+ /**
748
+ * Tool chat message.
749
+ */
681
750
  export interface ChatMessageTool {
751
+ role: Role3;
682
752
  content: Content4;
683
753
  source: Source3;
684
- role: Role3;
685
754
  tool_call_id: ToolCallId1;
686
755
  function: Function1;
687
756
  error: ToolCallError | null;
@@ -690,6 +759,9 @@ export interface ToolCallError {
690
759
  type: Type6;
691
760
  message: Message1;
692
761
  }
762
+ /**
763
+ * Output from model generation.
764
+ */
693
765
  export interface ModelOutput {
694
766
  model: Model1;
695
767
  choices: Choices1;
@@ -698,6 +770,9 @@ export interface ModelOutput {
698
770
  metadata: Metadata4;
699
771
  error: Error;
700
772
  }
773
+ /**
774
+ * Choice generated for completion.
775
+ */
701
776
  export interface ChatCompletionChoice {
702
777
  message: ChatMessageAssistant;
703
778
  stop_reason: StopReason;
@@ -728,12 +803,6 @@ export interface TopLogprob {
728
803
  }
729
804
  /**
730
805
  * Score generated by a scorer.
731
- *
732
- * Args:
733
- * value (Value): Score value.
734
- * answer (str | None): Answer extracted from model output (optional).
735
- * explanation (str | None): Explanation of score (optional).
736
- * metadata (dict[str,Any]): Additional metadata related to the score.
737
806
  */
738
807
  export interface Score {
739
808
  value: Value1;
@@ -753,6 +822,9 @@ export interface SampleInitEvent {
753
822
  sample: Sample;
754
823
  state: JsonValue;
755
824
  }
825
+ /**
826
+ * Sample for an evaluation task.
827
+ */
756
828
  export interface Sample {
757
829
  input: Input1;
758
830
  choices: Choices2;
@@ -887,7 +959,7 @@ export interface ToolFunction {
887
959
  name: Name6;
888
960
  }
889
961
  /**
890
- * Base class for model generation configs.
962
+ * Model generation options.
891
963
  */
892
964
  export interface GenerateConfig1 {
893
965
  max_retries: MaxRetries;
@@ -983,7 +1055,10 @@ export interface InputEvent {
983
1055
  input_ansi: InputAnsi;
984
1056
  }
985
1057
  /**
986
- * Event with sample score.
1058
+ * Event with score.
1059
+ *
1060
+ * Can be the final score for a `Sample`, or can be an intermediate score
1061
+ * resulting from a call to `score`.
987
1062
  */
988
1063
  export interface ScoreEvent {
989
1064
  timestamp: Timestamp8;
@@ -991,6 +1066,7 @@ export interface ScoreEvent {
991
1066
  event: Event8;
992
1067
  score: Score;
993
1068
  target: Target2;
1069
+ intermediate: Intermediate;
994
1070
  }
995
1071
  /**
996
1072
  * Event with sample error.
@@ -1010,6 +1086,9 @@ export interface LoggerEvent {
1010
1086
  event: Event10;
1011
1087
  message: LoggingMessage;
1012
1088
  }
1089
+ /**
1090
+ * Message written to Python log.
1091
+ */
1013
1092
  export interface LoggingMessage {
1014
1093
  name: Name7;
1015
1094
  level: Level;
@@ -1026,6 +1105,7 @@ export interface InfoEvent {
1026
1105
  timestamp: Timestamp11;
1027
1106
  pending: Pending11;
1028
1107
  event: Event11;
1108
+ source: Source4;
1029
1109
  data: JsonValue;
1030
1110
  }
1031
1111
  /**
@@ -1062,15 +1142,24 @@ export interface ModelUsage2 {
1062
1142
  export interface Attachments {
1063
1143
  [k: string]: string;
1064
1144
  }
1145
+ /**
1146
+ * Limit encontered by sample.
1147
+ */
1065
1148
  export interface EvalSampleLimit {
1066
1149
  type: Type13;
1067
1150
  limit: Limit2;
1068
1151
  }
1152
+ /**
1153
+ * Score reductions.
1154
+ */
1069
1155
  export interface EvalSampleReductions {
1070
1156
  scorer: Scorer1;
1071
1157
  reducer: Reducer1;
1072
1158
  samples: Samples2;
1073
1159
  }
1160
+ /**
1161
+ * Score and sample_id scored.
1162
+ */
1074
1163
  export interface EvalSampleScore {
1075
1164
  value: Value2;
1076
1165
  answer: Answer1;
@@ -1,4 +1,15 @@
1
1
  declare var Prism: {
2
2
  languages: any;
3
3
  highlight(contents: any, tokens: any, type: any): string;
4
+ highlightElement(
5
+ element: HTMLElement,
6
+ async?: boolean,
7
+ callback?: (element: HTMLElement) => void,
8
+ );
9
+
10
+ highlightAllUnder(
11
+ element: HTMLElement,
12
+ async?: boolean,
13
+ callback?: (element: HTMLElement) => void,
14
+ );
4
15
  };
@@ -0,0 +1,71 @@
1
+ import {
2
+ EvalLogHeader,
3
+ EvalSummary,
4
+ LogFiles,
5
+ SampleSummary,
6
+ } from "./api/types";
7
+ import { ContentImage, ContentText, EvalSample } from "./types/log";
8
+
9
+ export interface ApplicationState {
10
+ logs?: LogFiles;
11
+ selectedLogIndex?: number;
12
+ logHeaders?: Record<string, EvalLogHeader>;
13
+ headersLoading?: boolean;
14
+ selectedLog?: CurrentLog;
15
+ selectedWorkspaceTab?: string;
16
+ selectedSampleIndex?: number;
17
+ selectedSample?: EvalSample;
18
+ sampleStatus?: "loading" | "ok" | "error";
19
+ sampleError?: Error;
20
+ selectedSampleTab?: string;
21
+ sampleScrollPosition?: number;
22
+ showingSampleDialog?: boolean;
23
+ status?: AppStatus;
24
+ offcanvas?: boolean;
25
+ showFind?: boolean;
26
+ filter?: ScoreFilter;
27
+ epoch?: string;
28
+ sort?: string;
29
+ scores?: ScoreLabel[];
30
+ score?: ScoreLabel;
31
+ filteredSamples?: SampleSummary[];
32
+ groupBy?: "none" | "epoch" | "sample";
33
+ groupByOrder?: "asc" | "desc";
34
+ workspaceTabScrollPosition?: Record<string, number>;
35
+ }
36
+
37
+ export interface AppStatus {
38
+ loading: boolean;
39
+ error?: Error;
40
+ }
41
+
42
+ export interface Capabilities {
43
+ downloadFiles: boolean;
44
+ webWorkers: boolean;
45
+ }
46
+
47
+ export interface CurrentLog {
48
+ name: string;
49
+ contents: EvalSummary;
50
+ }
51
+
52
+ export interface Logs {
53
+ log_dir: string;
54
+ files: string[];
55
+ }
56
+
57
+ export interface ScoreLabel {
58
+ name: string;
59
+ scorer: string;
60
+ }
61
+
62
+ export interface ScoreFilter {
63
+ value?: string;
64
+ }
65
+
66
+ export type SampleMode = "none" | "single" | "many";
67
+
68
+ export interface ContentTool {
69
+ type: "tool";
70
+ content: (ContentImage | ContentText)[];
71
+ }
@@ -0,0 +1,28 @@
1
+ import { TokenHeader, TokenRow, TokenTable } from "./TokenTable";
2
+
3
+ interface ModelTokenTable {
4
+ model_usage: any;
5
+ className?: string | string[];
6
+ }
7
+
8
+ export const ModelTokenTable: React.FC<ModelTokenTable> = ({
9
+ model_usage,
10
+ className,
11
+ }) => {
12
+ return (
13
+ <TokenTable className={className}>
14
+ <TokenHeader />
15
+ <tbody>
16
+ {Object.keys(model_usage).map((key) => {
17
+ return (
18
+ <TokenRow
19
+ key={key}
20
+ model={`${key}-token-row`}
21
+ usage={model_usage[key]}
22
+ />
23
+ );
24
+ })}
25
+ </tbody>
26
+ </TokenTable>
27
+ );
28
+ };
@@ -0,0 +1,24 @@
1
+ .wrapper {
2
+ display: grid;
3
+ grid-template-columns: 0 auto auto;
4
+ column-gap: 1.5em;
5
+ row-gap: 0.2em;
6
+ }
7
+
8
+ .col2 {
9
+ grid-column: 2;
10
+ }
11
+
12
+ .col1_3 {
13
+ grid-column: 1/3;
14
+ }
15
+
16
+ .col3 {
17
+ grid-column: 3;
18
+ }
19
+
20
+ .separator {
21
+ grid-column: -1/1;
22
+ height: 1px;
23
+ background-color: var(--bs-light-border-subtle);
24
+ }
@@ -0,0 +1,97 @@
1
+ import clsx from "clsx";
2
+ import { Fragment } from "react";
3
+ import { ModelUsage1 } from "../types/log";
4
+ import { formatNumber } from "../utils/format";
5
+ import styles from "./ModelUsagePanel.module.css";
6
+
7
+ interface ModelUsageProps {
8
+ usage: ModelUsage1;
9
+ }
10
+
11
+ interface ModelUsageRow {
12
+ label: string | "---";
13
+ value?: number;
14
+ secondary?: boolean;
15
+ bordered?: boolean;
16
+ }
17
+
18
+ /**
19
+ * Renders the ModelUsagePanel component.
20
+ */
21
+ export const ModelUsagePanel: React.FC<ModelUsageProps> = ({ usage }) => {
22
+ if (!usage) {
23
+ return null;
24
+ }
25
+
26
+ const rows: ModelUsageRow[] = [
27
+ {
28
+ label: "input",
29
+ value: usage.input_tokens,
30
+ secondary: false,
31
+ },
32
+ ];
33
+
34
+ if (usage.input_tokens_cache_read) {
35
+ rows.push({
36
+ label: "cache_read",
37
+ value: usage.input_tokens_cache_read,
38
+ secondary: true,
39
+ });
40
+ }
41
+
42
+ if (usage.input_tokens_cache_write) {
43
+ rows.push({
44
+ label: "cache_write",
45
+ value: usage.input_tokens_cache_write,
46
+ secondary: true,
47
+ });
48
+ }
49
+
50
+ rows.push({
51
+ label: "Output",
52
+ value: usage.output_tokens,
53
+ secondary: false,
54
+ bordered: true,
55
+ });
56
+
57
+ rows.push({
58
+ label: "---",
59
+ value: undefined,
60
+ secondary: false,
61
+ });
62
+
63
+ rows.push({
64
+ label: "Total",
65
+ value: usage.total_tokens,
66
+ secondary: false,
67
+ });
68
+
69
+ return (
70
+ <div className={clsx("text-size-small", styles.wrapper)}>
71
+ {rows.map((row, idx) => {
72
+ if (row.label === "---") {
73
+ return (
74
+ <div key={`$usage-sep-${idx}`} className={styles.separator}></div>
75
+ );
76
+ } else {
77
+ return (
78
+ <Fragment key={`$usage-row-${idx}`}>
79
+ <div
80
+ className={clsx(
81
+ "text-style-label",
82
+ "text-style-secondary",
83
+ row.secondary ? styles.col2 : styles.col1_3,
84
+ )}
85
+ >
86
+ {row.label}
87
+ </div>
88
+ <div className={styles.col3}>
89
+ {row.value ? formatNumber(row.value) : ""}
90
+ </div>
91
+ </Fragment>
92
+ );
93
+ }
94
+ })}
95
+ </div>
96
+ );
97
+ };
@@ -0,0 +1,17 @@
1
+ .table {
2
+ width: 100%;
3
+ margin-top: 0.7rem;
4
+ }
5
+
6
+ .tableTokens {
7
+ padding-bottom: 0.7rem;
8
+ }
9
+
10
+ .tableH {
11
+ padding: 0;
12
+ font-weight: 300;
13
+ }
14
+
15
+ .model {
16
+ padding-right: 1em;
17
+ }