inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (518) hide show
  1. inspect_ai/_cli/cache.py +8 -7
  2. inspect_ai/_cli/common.py +0 -12
  3. inspect_ai/_cli/eval.py +32 -4
  4. inspect_ai/_cli/info.py +1 -0
  5. inspect_ai/_cli/list.py +1 -1
  6. inspect_ai/_cli/log.py +2 -0
  7. inspect_ai/_cli/main.py +1 -1
  8. inspect_ai/_cli/sandbox.py +4 -1
  9. inspect_ai/_cli/score.py +181 -32
  10. inspect_ai/_cli/trace.py +10 -0
  11. inspect_ai/_cli/view.py +4 -2
  12. inspect_ai/_display/core/active.py +2 -3
  13. inspect_ai/_display/core/config.py +7 -1
  14. inspect_ai/_display/textual/widgets/samples.py +4 -3
  15. inspect_ai/_display/textual/widgets/sandbox.py +6 -0
  16. inspect_ai/_eval/eval.py +104 -101
  17. inspect_ai/_eval/evalset.py +75 -75
  18. inspect_ai/_eval/loader.py +122 -12
  19. inspect_ai/_eval/registry.py +1 -1
  20. inspect_ai/_eval/run.py +14 -0
  21. inspect_ai/_eval/score.py +125 -36
  22. inspect_ai/_eval/task/log.py +105 -4
  23. inspect_ai/_eval/task/results.py +92 -38
  24. inspect_ai/_eval/task/run.py +9 -2
  25. inspect_ai/_eval/task/sandbox.py +35 -2
  26. inspect_ai/_eval/task/task.py +49 -46
  27. inspect_ai/_util/constants.py +1 -1
  28. inspect_ai/_util/content.py +8 -0
  29. inspect_ai/_util/error.py +2 -0
  30. inspect_ai/_util/file.py +15 -1
  31. inspect_ai/_util/hash.py +1 -1
  32. inspect_ai/_util/logger.py +4 -2
  33. inspect_ai/_util/registry.py +7 -1
  34. inspect_ai/_view/view.py +1 -2
  35. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  36. inspect_ai/_view/www/.vscode/settings.json +8 -0
  37. inspect_ai/_view/www/App.css +97 -29
  38. inspect_ai/_view/www/README.md +1 -1
  39. inspect_ai/_view/www/dist/assets/index.css +16663 -14674
  40. inspect_ai/_view/www/dist/assets/index.js +58808 -51348
  41. inspect_ai/_view/www/dist/index.html +1 -1
  42. inspect_ai/_view/www/index.html +2 -2
  43. inspect_ai/_view/www/log-schema.json +87 -73
  44. inspect_ai/_view/www/package.json +22 -4
  45. inspect_ai/_view/www/postcss.config.cjs +8 -9
  46. inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
  47. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  48. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  49. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  50. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  51. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  52. inspect_ai/_view/www/src/api/index.ts +4 -4
  53. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  54. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  55. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  56. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  57. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  58. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  59. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  60. inspect_ai/_view/www/src/components/Card.css +60 -0
  61. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  62. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  63. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  64. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  65. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  66. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  67. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  68. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  69. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  70. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  71. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  72. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  73. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  74. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  75. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  76. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  77. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  78. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  79. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  80. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  81. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  82. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  83. inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
  84. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  85. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  86. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  87. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  88. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  89. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  90. inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
  91. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  92. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  93. inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
  94. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  95. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  96. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  97. inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
  98. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  99. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  100. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  101. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  102. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  103. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
  104. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  105. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  106. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  107. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  108. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  109. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  110. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  111. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  112. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  113. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  114. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  115. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  116. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  117. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  118. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
  119. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  120. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  121. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  122. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  123. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  124. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  126. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  127. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
  128. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  129. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
  130. inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
  131. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  132. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  133. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  134. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  135. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  136. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
  137. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  138. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  139. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  140. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
  141. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  142. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
  143. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  144. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
  145. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  146. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
  147. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  148. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
  149. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  150. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  151. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
  152. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
  153. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  154. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  155. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  156. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  157. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  158. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  159. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  160. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  161. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  162. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  163. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  164. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  165. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  166. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  167. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  168. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  169. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  170. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  171. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  172. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  173. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  174. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  175. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  176. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  177. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  179. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  180. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  181. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  182. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
  183. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  184. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
  185. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  186. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  187. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  188. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  189. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  190. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  191. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  192. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  193. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  194. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  196. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  197. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  198. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  199. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  200. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  201. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  202. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  203. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  204. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  205. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  206. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
  207. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  208. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  209. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  210. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  211. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  212. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  213. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  214. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  215. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
  216. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  217. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  218. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  219. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  220. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  221. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
  222. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  223. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
  224. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  225. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  226. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  227. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  228. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  229. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
  230. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  231. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  232. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
  233. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  234. inspect_ai/_view/www/src/types/log.d.ts +108 -19
  235. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  236. inspect_ai/_view/www/src/types.ts +71 -0
  237. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
  238. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  239. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
  240. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  241. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  242. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  243. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  244. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  245. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  246. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  247. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  248. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  249. inspect_ai/_view/www/src/utils/format.ts +194 -0
  250. inspect_ai/_view/www/src/utils/git.ts +7 -0
  251. inspect_ai/_view/www/src/utils/html.ts +6 -0
  252. inspect_ai/_view/www/src/utils/http.ts +14 -0
  253. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  254. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  255. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  256. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  257. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  258. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  259. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  260. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  261. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
  262. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  263. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  264. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  265. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  266. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  267. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
  268. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
  269. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
  270. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  271. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
  272. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  273. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  274. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  275. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  276. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  277. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  278. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  279. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  280. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  281. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  282. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  283. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
  284. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  285. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
  286. inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
  287. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
  288. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  289. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  290. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  291. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  292. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  293. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  294. inspect_ai/_view/www/src/workspace/utils.ts +34 -0
  295. inspect_ai/_view/www/tsconfig.json +23 -9
  296. inspect_ai/_view/www/vite.config.js +8 -17
  297. inspect_ai/_view/www/yarn.lock +627 -556
  298. inspect_ai/approval/_approval.py +2 -0
  299. inspect_ai/approval/_approver.py +4 -4
  300. inspect_ai/approval/_auto.py +1 -1
  301. inspect_ai/approval/_human/approver.py +3 -0
  302. inspect_ai/approval/_policy.py +5 -0
  303. inspect_ai/approval/_registry.py +2 -2
  304. inspect_ai/dataset/_dataset.py +64 -37
  305. inspect_ai/dataset/_sources/__init__.py +0 -0
  306. inspect_ai/dataset/_sources/csv.py +20 -12
  307. inspect_ai/dataset/_sources/file.py +4 -0
  308. inspect_ai/dataset/_sources/hf.py +39 -29
  309. inspect_ai/dataset/_sources/json.py +17 -9
  310. inspect_ai/log/__init__.py +2 -0
  311. inspect_ai/log/_convert.py +3 -3
  312. inspect_ai/log/_file.py +24 -9
  313. inspect_ai/log/_log.py +101 -13
  314. inspect_ai/log/_message.py +4 -2
  315. inspect_ai/log/_recorders/file.py +4 -0
  316. inspect_ai/log/_recorders/json.py +5 -7
  317. inspect_ai/log/_recorders/recorder.py +3 -0
  318. inspect_ai/log/_transcript.py +19 -8
  319. inspect_ai/model/__init__.py +2 -0
  320. inspect_ai/model/_cache.py +39 -21
  321. inspect_ai/model/_call_tools.py +4 -3
  322. inspect_ai/model/_chat_message.py +14 -4
  323. inspect_ai/model/_generate_config.py +1 -1
  324. inspect_ai/model/_model.py +31 -24
  325. inspect_ai/model/_model_output.py +14 -1
  326. inspect_ai/model/_openai.py +10 -18
  327. inspect_ai/model/_providers/anthropic.py +3 -3
  328. inspect_ai/model/_providers/google.py +9 -5
  329. inspect_ai/model/_providers/openai.py +5 -9
  330. inspect_ai/model/_providers/openai_o1.py +3 -5
  331. inspect_ai/model/_providers/openrouter.py +86 -0
  332. inspect_ai/model/_providers/providers.py +11 -0
  333. inspect_ai/scorer/__init__.py +6 -1
  334. inspect_ai/scorer/_answer.py +7 -7
  335. inspect_ai/scorer/_classification.py +38 -18
  336. inspect_ai/scorer/_common.py +2 -8
  337. inspect_ai/scorer/_match.py +4 -5
  338. inspect_ai/scorer/_metric.py +87 -28
  339. inspect_ai/scorer/_metrics/__init__.py +3 -3
  340. inspect_ai/scorer/_metrics/accuracy.py +8 -10
  341. inspect_ai/scorer/_metrics/mean.py +3 -17
  342. inspect_ai/scorer/_metrics/std.py +111 -30
  343. inspect_ai/scorer/_model.py +12 -12
  344. inspect_ai/scorer/_pattern.py +3 -3
  345. inspect_ai/scorer/_reducer/reducer.py +36 -21
  346. inspect_ai/scorer/_reducer/registry.py +2 -2
  347. inspect_ai/scorer/_reducer/types.py +7 -1
  348. inspect_ai/scorer/_score.py +11 -1
  349. inspect_ai/scorer/_scorer.py +110 -16
  350. inspect_ai/solver/__init__.py +1 -1
  351. inspect_ai/solver/_basic_agent.py +19 -22
  352. inspect_ai/solver/_bridge/__init__.py +0 -3
  353. inspect_ai/solver/_bridge/bridge.py +3 -3
  354. inspect_ai/solver/_chain.py +1 -2
  355. inspect_ai/solver/_critique.py +3 -3
  356. inspect_ai/solver/_fork.py +2 -2
  357. inspect_ai/solver/_human_agent/__init__.py +0 -0
  358. inspect_ai/solver/_human_agent/agent.py +5 -8
  359. inspect_ai/solver/_human_agent/commands/clock.py +14 -10
  360. inspect_ai/solver/_human_agent/commands/note.py +1 -1
  361. inspect_ai/solver/_human_agent/commands/score.py +0 -11
  362. inspect_ai/solver/_multiple_choice.py +38 -26
  363. inspect_ai/solver/_prompt.py +7 -7
  364. inspect_ai/solver/_solver.py +53 -52
  365. inspect_ai/solver/_task_state.py +80 -69
  366. inspect_ai/solver/_use_tools.py +9 -9
  367. inspect_ai/tool/__init__.py +4 -1
  368. inspect_ai/tool/_tool.py +43 -14
  369. inspect_ai/tool/_tool_call.py +6 -2
  370. inspect_ai/tool/_tool_choice.py +3 -1
  371. inspect_ai/tool/_tool_def.py +10 -8
  372. inspect_ai/tool/_tool_params.py +24 -0
  373. inspect_ai/tool/_tool_with.py +7 -7
  374. inspect_ai/tool/_tools/__init__.py +0 -0
  375. inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
  376. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
  377. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  378. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  379. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  380. inspect_ai/tool/_tools/_execute.py +23 -11
  381. inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
  382. inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
  383. inspect_ai/tool/_tools/_web_search.py +7 -5
  384. inspect_ai/tool/beta.py +3 -0
  385. inspect_ai/util/_concurrency.py +3 -3
  386. inspect_ai/util/_panel.py +2 -0
  387. inspect_ai/util/_resource.py +12 -12
  388. inspect_ai/util/_sandbox/docker/compose.py +23 -20
  389. inspect_ai/util/_sandbox/docker/config.py +2 -1
  390. inspect_ai/util/_sandbox/docker/docker.py +42 -86
  391. inspect_ai/util/_sandbox/docker/service.py +100 -0
  392. inspect_ai/util/_sandbox/environment.py +99 -96
  393. inspect_ai/util/_sandbox/self_check.py +124 -16
  394. inspect_ai/util/_subprocess.py +5 -3
  395. inspect_ai/util/_subtask.py +15 -16
  396. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
  397. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
  398. inspect_ai-0.3.64.dist-info/RECORD +625 -0
  399. inspect_ai/_view/www/src/Register.mjs +0 -3
  400. inspect_ai/_view/www/src/Types.mjs +0 -38
  401. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  402. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  403. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  404. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  405. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  406. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  407. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  408. inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
  409. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  410. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  411. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  412. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  413. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  414. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  415. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  416. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  417. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  418. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  419. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  420. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  421. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  422. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  423. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  424. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  425. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  426. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  427. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  428. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  429. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  430. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  431. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  432. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  433. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  434. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  435. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  436. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  437. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  438. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  439. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  440. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  441. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  442. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  443. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  444. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  445. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  446. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  447. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  448. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  449. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  450. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  451. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  452. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  453. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  454. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  455. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  456. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  457. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  458. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  459. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  460. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  461. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  462. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  463. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  464. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  465. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  466. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  467. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  468. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  469. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  470. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  471. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  472. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  473. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  474. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  475. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  476. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  477. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
  478. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  479. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  480. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  481. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  482. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  483. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  484. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  485. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  486. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  487. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  488. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  489. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  490. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  491. inspect_ai/tool/beta/__init__.py +0 -5
  492. inspect_ai-0.3.62.dist-info/RECORD +0 -481
  493. /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
  494. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
  495. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  496. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  497. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  498. /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
  499. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  500. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  501. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  502. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  503. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  504. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  505. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  506. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
  507. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
  508. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  509. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  510. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
  511. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  512. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  513. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  514. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  515. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  516. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
  517. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
  518. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -21,7 +21,7 @@
21
21
  <link rel="stylesheet" crossorigin href="./assets/index.css">
22
22
  </head>
23
23
 
24
- <body style="min-width: 450px;">
24
+ <body style="min-width: 450px">
25
25
  <div id="app"></div>
26
26
  </body>
27
27
  </html>
@@ -19,8 +19,8 @@
19
19
  </script>
20
20
  </head>
21
21
 
22
- <body style="min-width: 450px;">
22
+ <body style="min-width: 450px">
23
23
  <div id="app"></div>
24
- <script type="module" src="./src/index.js"></script>
24
+ <script type="module" src="./src/index.tsx"></script>
25
25
  </body>
26
26
  </html>
@@ -157,6 +157,7 @@
157
157
  "type": "object"
158
158
  },
159
159
  "ChatCompletionChoice": {
160
+ "description": "Choice generated for completion.",
160
161
  "properties": {
161
162
  "message": {
162
163
  "$ref": "#/$defs/ChatMessageAssistant"
@@ -196,7 +197,14 @@
196
197
  "additionalProperties": false
197
198
  },
198
199
  "ChatMessageAssistant": {
200
+ "description": "Assistant chat message.",
199
201
  "properties": {
202
+ "role": {
203
+ "const": "assistant",
204
+ "default": "assistant",
205
+ "title": "Role",
206
+ "type": "string"
207
+ },
200
208
  "content": {
201
209
  "anyOf": [
202
210
  {
@@ -240,12 +248,6 @@
240
248
  "default": null,
241
249
  "title": "Source"
242
250
  },
243
- "role": {
244
- "const": "assistant",
245
- "default": "assistant",
246
- "title": "Role",
247
- "type": "string"
248
- },
249
251
  "tool_calls": {
250
252
  "anyOf": [
251
253
  {
@@ -275,9 +277,9 @@
275
277
  }
276
278
  },
277
279
  "required": [
280
+ "role",
278
281
  "content",
279
282
  "source",
280
- "role",
281
283
  "tool_calls",
282
284
  "reasoning"
283
285
  ],
@@ -286,7 +288,14 @@
286
288
  "additionalProperties": false
287
289
  },
288
290
  "ChatMessageSystem": {
291
+ "description": "System chat message.",
289
292
  "properties": {
293
+ "role": {
294
+ "const": "system",
295
+ "default": "system",
296
+ "title": "Role",
297
+ "type": "string"
298
+ },
290
299
  "content": {
291
300
  "anyOf": [
292
301
  {
@@ -329,25 +338,26 @@
329
338
  ],
330
339
  "default": null,
331
340
  "title": "Source"
332
- },
333
- "role": {
334
- "const": "system",
335
- "default": "system",
336
- "title": "Role",
337
- "type": "string"
338
341
  }
339
342
  },
340
343
  "required": [
344
+ "role",
341
345
  "content",
342
- "source",
343
- "role"
346
+ "source"
344
347
  ],
345
348
  "title": "ChatMessageSystem",
346
349
  "type": "object",
347
350
  "additionalProperties": false
348
351
  },
349
352
  "ChatMessageTool": {
353
+ "description": "Tool chat message.",
350
354
  "properties": {
355
+ "role": {
356
+ "const": "tool",
357
+ "default": "tool",
358
+ "title": "Role",
359
+ "type": "string"
360
+ },
351
361
  "content": {
352
362
  "anyOf": [
353
363
  {
@@ -391,12 +401,6 @@
391
401
  "default": null,
392
402
  "title": "Source"
393
403
  },
394
- "role": {
395
- "const": "tool",
396
- "default": "tool",
397
- "title": "Role",
398
- "type": "string"
399
- },
400
404
  "tool_call_id": {
401
405
  "anyOf": [
402
406
  {
@@ -434,9 +438,9 @@
434
438
  }
435
439
  },
436
440
  "required": [
441
+ "role",
437
442
  "content",
438
443
  "source",
439
- "role",
440
444
  "tool_call_id",
441
445
  "function",
442
446
  "error"
@@ -446,7 +450,14 @@
446
450
  "additionalProperties": false
447
451
  },
448
452
  "ChatMessageUser": {
453
+ "description": "User chat message.",
449
454
  "properties": {
455
+ "role": {
456
+ "const": "user",
457
+ "default": "user",
458
+ "title": "Role",
459
+ "type": "string"
460
+ },
450
461
  "content": {
451
462
  "anyOf": [
452
463
  {
@@ -490,12 +501,6 @@
490
501
  "default": null,
491
502
  "title": "Source"
492
503
  },
493
- "role": {
494
- "const": "user",
495
- "default": "user",
496
- "title": "Role",
497
- "type": "string"
498
- },
499
504
  "tool_call_id": {
500
505
  "anyOf": [
501
506
  {
@@ -513,9 +518,9 @@
513
518
  }
514
519
  },
515
520
  "required": [
521
+ "role",
516
522
  "content",
517
523
  "source",
518
- "role",
519
524
  "tool_call_id"
520
525
  ],
521
526
  "title": "ChatMessageUser",
@@ -523,6 +528,7 @@
523
528
  "additionalProperties": false
524
529
  },
525
530
  "ContentAudio": {
531
+ "description": "Audio content.",
526
532
  "properties": {
527
533
  "type": {
528
534
  "const": "audio",
@@ -553,6 +559,7 @@
553
559
  "additionalProperties": false
554
560
  },
555
561
  "ContentImage": {
562
+ "description": "Image content.",
556
563
  "properties": {
557
564
  "type": {
558
565
  "const": "image",
@@ -585,6 +592,7 @@
585
592
  "additionalProperties": false
586
593
  },
587
594
  "ContentText": {
595
+ "description": "Text content.",
588
596
  "properties": {
589
597
  "type": {
590
598
  "const": "text",
@@ -606,6 +614,7 @@
606
614
  "additionalProperties": false
607
615
  },
608
616
  "ContentVideo": {
617
+ "description": "Video content.",
609
618
  "properties": {
610
619
  "type": {
611
620
  "const": "video",
@@ -677,6 +686,7 @@
677
686
  "additionalProperties": false
678
687
  },
679
688
  "EvalConfig": {
689
+ "description": "Configuration used for evaluation.",
680
690
  "properties": {
681
691
  "limit": {
682
692
  "anyOf": [
@@ -954,6 +964,7 @@
954
964
  "additionalProperties": false
955
965
  },
956
966
  "EvalDataset": {
967
+ "description": "Dataset used for evaluation.",
957
968
  "properties": {
958
969
  "name": {
959
970
  "anyOf": [
@@ -1038,6 +1049,7 @@
1038
1049
  "additionalProperties": false
1039
1050
  },
1040
1051
  "EvalError": {
1052
+ "description": "Eval error details.",
1041
1053
  "properties": {
1042
1054
  "message": {
1043
1055
  "title": "Message",
@@ -1062,6 +1074,7 @@
1062
1074
  "additionalProperties": false
1063
1075
  },
1064
1076
  "EvalMetric": {
1077
+ "description": "Metric for evaluation score.",
1065
1078
  "properties": {
1066
1079
  "name": {
1067
1080
  "title": "Name",
@@ -1078,8 +1091,8 @@
1078
1091
  ],
1079
1092
  "title": "Value"
1080
1093
  },
1081
- "options": {
1082
- "title": "Options",
1094
+ "params": {
1095
+ "title": "Params",
1083
1096
  "type": "object"
1084
1097
  },
1085
1098
  "metadata": {
@@ -1098,7 +1111,7 @@
1098
1111
  "required": [
1099
1112
  "name",
1100
1113
  "value",
1101
- "options",
1114
+ "params",
1102
1115
  "metadata"
1103
1116
  ],
1104
1117
  "title": "EvalMetric",
@@ -1106,6 +1119,7 @@
1106
1119
  "additionalProperties": false
1107
1120
  },
1108
1121
  "EvalPlan": {
1122
+ "description": "Plan (solvers) used in evaluation.",
1109
1123
  "properties": {
1110
1124
  "name": {
1111
1125
  "default": "plan",
@@ -1171,6 +1185,7 @@
1171
1185
  "additionalProperties": false
1172
1186
  },
1173
1187
  "EvalPlanStep": {
1188
+ "description": "Solver step.",
1174
1189
  "properties": {
1175
1190
  "solver": {
1176
1191
  "title": "Solver",
@@ -1190,6 +1205,7 @@
1190
1205
  "additionalProperties": false
1191
1206
  },
1192
1207
  "EvalResults": {
1208
+ "description": "Scoring results from evaluation.",
1193
1209
  "properties": {
1194
1210
  "total_samples": {
1195
1211
  "default": 0,
@@ -1233,6 +1249,7 @@
1233
1249
  "additionalProperties": false
1234
1250
  },
1235
1251
  "EvalRevision": {
1252
+ "description": "Git revision for evaluation.",
1236
1253
  "properties": {
1237
1254
  "type": {
1238
1255
  "const": "git",
@@ -1258,6 +1275,7 @@
1258
1275
  "additionalProperties": false
1259
1276
  },
1260
1277
  "EvalSample": {
1278
+ "description": "Sample from evaluation task.",
1261
1279
  "properties": {
1262
1280
  "id": {
1263
1281
  "anyOf": [
@@ -1526,6 +1544,7 @@
1526
1544
  "additionalProperties": false
1527
1545
  },
1528
1546
  "EvalSampleLimit": {
1547
+ "description": "Limit encontered by sample.",
1529
1548
  "properties": {
1530
1549
  "type": {
1531
1550
  "enum": [
@@ -1553,6 +1572,7 @@
1553
1572
  "additionalProperties": false
1554
1573
  },
1555
1574
  "EvalSampleReductions": {
1575
+ "description": "Score reductions.",
1556
1576
  "properties": {
1557
1577
  "scorer": {
1558
1578
  "title": "Scorer",
@@ -1588,6 +1608,7 @@
1588
1608
  "additionalProperties": false
1589
1609
  },
1590
1610
  "EvalSampleScore": {
1611
+ "description": "Score and sample_id scored.",
1591
1612
  "properties": {
1592
1613
  "value": {
1593
1614
  "anyOf": [
@@ -1711,6 +1732,7 @@
1711
1732
  "additionalProperties": false
1712
1733
  },
1713
1734
  "EvalScore": {
1735
+ "description": "Score for evaluation task.",
1714
1736
  "properties": {
1715
1737
  "name": {
1716
1738
  "title": "Name",
@@ -1769,6 +1791,7 @@
1769
1791
  "additionalProperties": false
1770
1792
  },
1771
1793
  "EvalSpec": {
1794
+ "description": "Eval target and configuration.",
1772
1795
  "properties": {
1773
1796
  "run_id": {
1774
1797
  "title": "Run Id",
@@ -1945,6 +1968,7 @@
1945
1968
  "additionalProperties": false
1946
1969
  },
1947
1970
  "EvalStats": {
1971
+ "description": "Timing and usage statistics.",
1948
1972
  "properties": {
1949
1973
  "started_at": {
1950
1974
  "title": "Started At",
@@ -1972,7 +1996,7 @@
1972
1996
  "additionalProperties": false
1973
1997
  },
1974
1998
  "GenerateConfig": {
1975
- "description": "Base class for model generation configs.",
1999
+ "description": "Model generation options.",
1976
2000
  "properties": {
1977
2001
  "max_retries": {
1978
2002
  "anyOf": [
@@ -2321,6 +2345,18 @@
2321
2345
  "title": "Event",
2322
2346
  "type": "string"
2323
2347
  },
2348
+ "source": {
2349
+ "anyOf": [
2350
+ {
2351
+ "type": "string"
2352
+ },
2353
+ {
2354
+ "type": "null"
2355
+ }
2356
+ ],
2357
+ "default": null,
2358
+ "title": "Source"
2359
+ },
2324
2360
  "data": {
2325
2361
  "$ref": "#/$defs/JsonValue"
2326
2362
  }
@@ -2329,6 +2365,7 @@
2329
2365
  "timestamp",
2330
2366
  "pending",
2331
2367
  "event",
2368
+ "source",
2332
2369
  "data"
2333
2370
  ],
2334
2371
  "title": "InfoEvent",
@@ -2474,6 +2511,7 @@
2474
2511
  "additionalProperties": false
2475
2512
  },
2476
2513
  "LoggingMessage": {
2514
+ "description": "Message written to Python log.",
2477
2515
  "properties": {
2478
2516
  "name": {
2479
2517
  "anyOf": [
@@ -2490,6 +2528,7 @@
2490
2528
  "level": {
2491
2529
  "enum": [
2492
2530
  "debug",
2531
+ "trace",
2493
2532
  "http",
2494
2533
  "sandbox",
2495
2534
  "info",
@@ -2771,6 +2810,7 @@
2771
2810
  "additionalProperties": false
2772
2811
  },
2773
2812
  "ModelOutput": {
2813
+ "description": "Output from model generation.",
2774
2814
  "properties": {
2775
2815
  "model": {
2776
2816
  "title": "Model",
@@ -2845,6 +2885,7 @@
2845
2885
  "additionalProperties": false
2846
2886
  },
2847
2887
  "ModelUsage": {
2888
+ "description": "Token usage for completion.",
2848
2889
  "properties": {
2849
2890
  "input_tokens": {
2850
2891
  "default": 0,
@@ -2898,6 +2939,7 @@
2898
2939
  "additionalProperties": false
2899
2940
  },
2900
2941
  "Sample": {
2942
+ "description": "Sample for an evaluation task.",
2901
2943
  "properties": {
2902
2944
  "input": {
2903
2945
  "anyOf": [
@@ -3172,7 +3214,7 @@
3172
3214
  "type": "array"
3173
3215
  },
3174
3216
  "Score": {
3175
- "description": "Score generated by a scorer.\n\nArgs:\n value (Value): Score value.\n answer (str | None): Answer extracted from model output (optional).\n explanation (str | None): Explanation of score (optional).\n metadata (dict[str,Any]): Additional metadata related to the score.",
3217
+ "description": "Score generated by a scorer.",
3176
3218
  "properties": {
3177
3219
  "value": {
3178
3220
  "anyOf": [
@@ -3280,7 +3322,7 @@
3280
3322
  "additionalProperties": false
3281
3323
  },
3282
3324
  "ScoreEvent": {
3283
- "description": "Event with sample score.",
3325
+ "description": "Event with score.\n\nCan be the final score for a `Sample`, or can be an intermediate score\nresulting from a call to `score`.",
3284
3326
  "properties": {
3285
3327
  "timestamp": {
3286
3328
  "format": "date-time",
@@ -3325,6 +3367,11 @@
3325
3367
  ],
3326
3368
  "default": null,
3327
3369
  "title": "Target"
3370
+ },
3371
+ "intermediate": {
3372
+ "default": false,
3373
+ "title": "Intermediate",
3374
+ "type": "boolean"
3328
3375
  }
3329
3376
  },
3330
3377
  "required": [
@@ -3332,7 +3379,8 @@
3332
3379
  "pending",
3333
3380
  "event",
3334
3381
  "score",
3335
- "target"
3382
+ "target",
3383
+ "intermediate"
3336
3384
  ],
3337
3385
  "title": "ScoreEvent",
3338
3386
  "type": "object",
@@ -4222,6 +4270,7 @@
4222
4270
  "additionalProperties": false
4223
4271
  }
4224
4272
  },
4273
+ "description": "Evaluation log.",
4225
4274
  "properties": {
4226
4275
  "version": {
4227
4276
  "default": 2,
@@ -4243,37 +4292,7 @@
4243
4292
  "$ref": "#/$defs/EvalSpec"
4244
4293
  },
4245
4294
  "plan": {
4246
- "$ref": "#/$defs/EvalPlan",
4247
- "default": {
4248
- "name": "plan",
4249
- "steps": [],
4250
- "finish": null,
4251
- "config": {
4252
- "best_of": null,
4253
- "cache_prompt": null,
4254
- "frequency_penalty": null,
4255
- "internal_tools": null,
4256
- "logit_bias": null,
4257
- "logprobs": null,
4258
- "max_connections": null,
4259
- "max_retries": null,
4260
- "max_tokens": null,
4261
- "max_tool_output": null,
4262
- "num_choices": null,
4263
- "parallel_tool_calls": null,
4264
- "presence_penalty": null,
4265
- "reasoning_effort": null,
4266
- "reasoning_history": null,
4267
- "seed": null,
4268
- "stop_seqs": null,
4269
- "system_message": null,
4270
- "temperature": null,
4271
- "timeout": null,
4272
- "top_k": null,
4273
- "top_logprobs": null,
4274
- "top_p": null
4275
- }
4276
- }
4295
+ "$ref": "#/$defs/EvalPlan"
4277
4296
  },
4278
4297
  "results": {
4279
4298
  "anyOf": [
@@ -4287,12 +4306,7 @@
4287
4306
  "default": null
4288
4307
  },
4289
4308
  "stats": {
4290
- "$ref": "#/$defs/EvalStats",
4291
- "default": {
4292
- "started_at": "",
4293
- "completed_at": "",
4294
- "model_usage": {}
4295
- }
4309
+ "$ref": "#/$defs/EvalStats"
4296
4310
  },
4297
4311
  "error": {
4298
4312
  "anyOf": [
@@ -8,6 +8,7 @@
8
8
  "scripts": {
9
9
  "build": "vite build",
10
10
  "watch": "vite build --watch",
11
+ "dev-watch": "NODE_ENV=development vite build --mode development --watch",
11
12
  "dev": "vite",
12
13
  "prettier:check": "prettier --check src",
13
14
  "prettier:write": "prettier --write src",
@@ -18,18 +19,34 @@
18
19
  },
19
20
  "devDependencies": {
20
21
  "@eslint/js": "^9.5.0",
22
+ "@types/bootstrap": "^5.2.10",
23
+ "@types/clipboard": "^2.0.10",
24
+ "@types/codemirror": "^5.60.15",
25
+ "@types/css-modules": "^1.0.5",
26
+ "@types/markdown-it": "^14.1.2",
27
+ "@types/prismjs": "^1.26.5",
28
+ "@types/react": "^19.0.7",
29
+ "@types/react-dom": "^19.0.3",
30
+ "@vitejs/plugin-react": "^4.3.4",
21
31
  "eslint": "9.x",
22
32
  "globals": "^15.6.0",
23
33
  "prettier": "^3.3.3",
24
- "vite": "^5.3.2",
25
- "vite-plugin-prismjs": "^0.0.11"
34
+ "typescript": "^5.7.3",
35
+ "vite": "^5.3.2"
26
36
  },
27
37
  "dependencies": {
38
+ "@codemirror/autocomplete": "^6.18.4",
39
+ "@codemirror/language": "^6.10.8",
40
+ "@codemirror/lint": "^6.8.4",
41
+ "@codemirror/state": "^6.5.1",
42
+ "@lezer/highlight": "^1.2.1",
28
43
  "@popperjs/core": "^2.11.8",
29
44
  "asciinema-player": "^3.8.2",
45
+ "ansi-output": "^0.0.9",
30
46
  "bootstrap": "^5.3.3",
31
47
  "bootstrap-icons": "^1.11.3",
32
48
  "clipboard": "^2.0.11",
49
+ "clsx": "^2.1.1",
33
50
  "codemirror": "^6.0.1",
34
51
  "fast-json-patch": "^3.1.1",
35
52
  "fflate": "^0.8.2",
@@ -41,7 +58,8 @@
41
58
  "markdown-it": "^14.1.0",
42
59
  "murmurhash": "^2.0.1",
43
60
  "postcss-url": "^10.1.3",
44
- "preact": "^10.24.3",
45
- "prismjs": "^1.29.0"
61
+ "prismjs": "^1.29.0",
62
+ "react": "^19.0.0",
63
+ "react-dom": "^19.0.0"
46
64
  }
47
65
  }
@@ -1,11 +1,10 @@
1
-
2
1
  // postcss.config.js
3
2
  module.exports = {
4
- plugins: [
5
- require('postcss-url')({
6
- url: 'inline', // Inline all assets
7
- maxSize: Infinity, // Maximum file size to inline (in kilobytes). Adjust as needed.
8
- fallback: 'copy', // Copy files to output directory if they are larger than the maxSize
9
- }),
10
- ],
11
- };
3
+ plugins: [
4
+ require("postcss-url")({
5
+ url: "inline", // Inline all assets
6
+ maxSize: Infinity, // Maximum file size to inline (in kilobytes). Adjust as needed.
7
+ fallback: "copy", // Copy files to output directory if they are larger than the maxSize
8
+ }),
9
+ ],
10
+ };