inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (518) hide show
  1. inspect_ai/_cli/cache.py +8 -7
  2. inspect_ai/_cli/common.py +0 -12
  3. inspect_ai/_cli/eval.py +32 -4
  4. inspect_ai/_cli/info.py +1 -0
  5. inspect_ai/_cli/list.py +1 -1
  6. inspect_ai/_cli/log.py +2 -0
  7. inspect_ai/_cli/main.py +1 -1
  8. inspect_ai/_cli/sandbox.py +4 -1
  9. inspect_ai/_cli/score.py +181 -32
  10. inspect_ai/_cli/trace.py +10 -0
  11. inspect_ai/_cli/view.py +4 -2
  12. inspect_ai/_display/core/active.py +2 -3
  13. inspect_ai/_display/core/config.py +7 -1
  14. inspect_ai/_display/textual/widgets/samples.py +4 -3
  15. inspect_ai/_display/textual/widgets/sandbox.py +6 -0
  16. inspect_ai/_eval/eval.py +104 -101
  17. inspect_ai/_eval/evalset.py +75 -75
  18. inspect_ai/_eval/loader.py +122 -12
  19. inspect_ai/_eval/registry.py +1 -1
  20. inspect_ai/_eval/run.py +14 -0
  21. inspect_ai/_eval/score.py +125 -36
  22. inspect_ai/_eval/task/log.py +105 -4
  23. inspect_ai/_eval/task/results.py +92 -38
  24. inspect_ai/_eval/task/run.py +9 -2
  25. inspect_ai/_eval/task/sandbox.py +35 -2
  26. inspect_ai/_eval/task/task.py +49 -46
  27. inspect_ai/_util/constants.py +1 -1
  28. inspect_ai/_util/content.py +8 -0
  29. inspect_ai/_util/error.py +2 -0
  30. inspect_ai/_util/file.py +15 -1
  31. inspect_ai/_util/hash.py +1 -1
  32. inspect_ai/_util/logger.py +4 -2
  33. inspect_ai/_util/registry.py +7 -1
  34. inspect_ai/_view/view.py +1 -2
  35. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  36. inspect_ai/_view/www/.vscode/settings.json +8 -0
  37. inspect_ai/_view/www/App.css +97 -29
  38. inspect_ai/_view/www/README.md +1 -1
  39. inspect_ai/_view/www/dist/assets/index.css +16663 -14674
  40. inspect_ai/_view/www/dist/assets/index.js +58808 -51348
  41. inspect_ai/_view/www/dist/index.html +1 -1
  42. inspect_ai/_view/www/index.html +2 -2
  43. inspect_ai/_view/www/log-schema.json +87 -73
  44. inspect_ai/_view/www/package.json +22 -4
  45. inspect_ai/_view/www/postcss.config.cjs +8 -9
  46. inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
  47. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  48. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  49. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  50. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  51. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  52. inspect_ai/_view/www/src/api/index.ts +4 -4
  53. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  54. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  55. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  56. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  57. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  58. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  59. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  60. inspect_ai/_view/www/src/components/Card.css +60 -0
  61. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  62. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  63. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  64. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  65. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  66. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  67. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  68. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  69. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  70. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  71. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  72. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  73. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  74. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  75. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  76. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  77. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  78. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  79. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  80. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  81. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  82. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  83. inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
  84. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  85. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  86. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  87. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  88. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  89. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  90. inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
  91. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  92. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  93. inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
  94. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  95. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  96. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  97. inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
  98. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  99. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  100. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  101. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  102. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  103. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
  104. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  105. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  106. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  107. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  108. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  109. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  110. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  111. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  112. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  113. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  114. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  115. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  116. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  117. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  118. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
  119. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  120. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  121. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  122. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  123. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  124. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  126. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  127. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
  128. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  129. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
  130. inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
  131. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  132. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  133. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  134. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  135. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  136. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
  137. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  138. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  139. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  140. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
  141. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  142. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
  143. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  144. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
  145. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  146. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
  147. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  148. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
  149. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  150. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  151. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
  152. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
  153. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  154. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  155. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  156. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  157. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  158. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  159. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  160. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  161. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  162. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  163. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  164. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  165. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  166. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  167. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  168. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  169. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  170. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  171. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  172. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  173. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  174. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  175. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  176. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  177. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  179. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  180. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  181. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  182. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
  183. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  184. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
  185. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  186. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  187. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  188. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  189. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  190. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  191. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  192. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  193. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  194. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  196. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  197. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  198. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  199. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  200. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  201. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  202. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  203. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  204. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  205. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  206. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
  207. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  208. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  209. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  210. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  211. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  212. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  213. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  214. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  215. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
  216. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  217. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  218. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  219. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  220. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  221. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
  222. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  223. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
  224. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  225. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  226. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  227. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  228. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  229. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
  230. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  231. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  232. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
  233. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  234. inspect_ai/_view/www/src/types/log.d.ts +108 -19
  235. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  236. inspect_ai/_view/www/src/types.ts +71 -0
  237. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
  238. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  239. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
  240. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  241. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  242. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  243. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  244. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  245. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  246. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  247. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  248. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  249. inspect_ai/_view/www/src/utils/format.ts +194 -0
  250. inspect_ai/_view/www/src/utils/git.ts +7 -0
  251. inspect_ai/_view/www/src/utils/html.ts +6 -0
  252. inspect_ai/_view/www/src/utils/http.ts +14 -0
  253. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  254. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  255. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  256. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  257. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  258. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  259. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  260. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  261. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
  262. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  263. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  264. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  265. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  266. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  267. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
  268. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
  269. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
  270. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  271. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
  272. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  273. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  274. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  275. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  276. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  277. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  278. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  279. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  280. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  281. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  282. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  283. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
  284. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  285. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
  286. inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
  287. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
  288. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  289. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  290. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  291. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  292. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  293. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  294. inspect_ai/_view/www/src/workspace/utils.ts +34 -0
  295. inspect_ai/_view/www/tsconfig.json +23 -9
  296. inspect_ai/_view/www/vite.config.js +8 -17
  297. inspect_ai/_view/www/yarn.lock +627 -556
  298. inspect_ai/approval/_approval.py +2 -0
  299. inspect_ai/approval/_approver.py +4 -4
  300. inspect_ai/approval/_auto.py +1 -1
  301. inspect_ai/approval/_human/approver.py +3 -0
  302. inspect_ai/approval/_policy.py +5 -0
  303. inspect_ai/approval/_registry.py +2 -2
  304. inspect_ai/dataset/_dataset.py +64 -37
  305. inspect_ai/dataset/_sources/__init__.py +0 -0
  306. inspect_ai/dataset/_sources/csv.py +20 -12
  307. inspect_ai/dataset/_sources/file.py +4 -0
  308. inspect_ai/dataset/_sources/hf.py +39 -29
  309. inspect_ai/dataset/_sources/json.py +17 -9
  310. inspect_ai/log/__init__.py +2 -0
  311. inspect_ai/log/_convert.py +3 -3
  312. inspect_ai/log/_file.py +24 -9
  313. inspect_ai/log/_log.py +101 -13
  314. inspect_ai/log/_message.py +4 -2
  315. inspect_ai/log/_recorders/file.py +4 -0
  316. inspect_ai/log/_recorders/json.py +5 -7
  317. inspect_ai/log/_recorders/recorder.py +3 -0
  318. inspect_ai/log/_transcript.py +19 -8
  319. inspect_ai/model/__init__.py +2 -0
  320. inspect_ai/model/_cache.py +39 -21
  321. inspect_ai/model/_call_tools.py +4 -3
  322. inspect_ai/model/_chat_message.py +14 -4
  323. inspect_ai/model/_generate_config.py +1 -1
  324. inspect_ai/model/_model.py +31 -24
  325. inspect_ai/model/_model_output.py +14 -1
  326. inspect_ai/model/_openai.py +10 -18
  327. inspect_ai/model/_providers/anthropic.py +3 -3
  328. inspect_ai/model/_providers/google.py +9 -5
  329. inspect_ai/model/_providers/openai.py +5 -9
  330. inspect_ai/model/_providers/openai_o1.py +3 -5
  331. inspect_ai/model/_providers/openrouter.py +86 -0
  332. inspect_ai/model/_providers/providers.py +11 -0
  333. inspect_ai/scorer/__init__.py +6 -1
  334. inspect_ai/scorer/_answer.py +7 -7
  335. inspect_ai/scorer/_classification.py +38 -18
  336. inspect_ai/scorer/_common.py +2 -8
  337. inspect_ai/scorer/_match.py +4 -5
  338. inspect_ai/scorer/_metric.py +87 -28
  339. inspect_ai/scorer/_metrics/__init__.py +3 -3
  340. inspect_ai/scorer/_metrics/accuracy.py +8 -10
  341. inspect_ai/scorer/_metrics/mean.py +3 -17
  342. inspect_ai/scorer/_metrics/std.py +111 -30
  343. inspect_ai/scorer/_model.py +12 -12
  344. inspect_ai/scorer/_pattern.py +3 -3
  345. inspect_ai/scorer/_reducer/reducer.py +36 -21
  346. inspect_ai/scorer/_reducer/registry.py +2 -2
  347. inspect_ai/scorer/_reducer/types.py +7 -1
  348. inspect_ai/scorer/_score.py +11 -1
  349. inspect_ai/scorer/_scorer.py +110 -16
  350. inspect_ai/solver/__init__.py +1 -1
  351. inspect_ai/solver/_basic_agent.py +19 -22
  352. inspect_ai/solver/_bridge/__init__.py +0 -3
  353. inspect_ai/solver/_bridge/bridge.py +3 -3
  354. inspect_ai/solver/_chain.py +1 -2
  355. inspect_ai/solver/_critique.py +3 -3
  356. inspect_ai/solver/_fork.py +2 -2
  357. inspect_ai/solver/_human_agent/__init__.py +0 -0
  358. inspect_ai/solver/_human_agent/agent.py +5 -8
  359. inspect_ai/solver/_human_agent/commands/clock.py +14 -10
  360. inspect_ai/solver/_human_agent/commands/note.py +1 -1
  361. inspect_ai/solver/_human_agent/commands/score.py +0 -11
  362. inspect_ai/solver/_multiple_choice.py +38 -26
  363. inspect_ai/solver/_prompt.py +7 -7
  364. inspect_ai/solver/_solver.py +53 -52
  365. inspect_ai/solver/_task_state.py +80 -69
  366. inspect_ai/solver/_use_tools.py +9 -9
  367. inspect_ai/tool/__init__.py +4 -1
  368. inspect_ai/tool/_tool.py +43 -14
  369. inspect_ai/tool/_tool_call.py +6 -2
  370. inspect_ai/tool/_tool_choice.py +3 -1
  371. inspect_ai/tool/_tool_def.py +10 -8
  372. inspect_ai/tool/_tool_params.py +24 -0
  373. inspect_ai/tool/_tool_with.py +7 -7
  374. inspect_ai/tool/_tools/__init__.py +0 -0
  375. inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
  376. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
  377. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  378. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  379. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  380. inspect_ai/tool/_tools/_execute.py +23 -11
  381. inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
  382. inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
  383. inspect_ai/tool/_tools/_web_search.py +7 -5
  384. inspect_ai/tool/beta.py +3 -0
  385. inspect_ai/util/_concurrency.py +3 -3
  386. inspect_ai/util/_panel.py +2 -0
  387. inspect_ai/util/_resource.py +12 -12
  388. inspect_ai/util/_sandbox/docker/compose.py +23 -20
  389. inspect_ai/util/_sandbox/docker/config.py +2 -1
  390. inspect_ai/util/_sandbox/docker/docker.py +42 -86
  391. inspect_ai/util/_sandbox/docker/service.py +100 -0
  392. inspect_ai/util/_sandbox/environment.py +99 -96
  393. inspect_ai/util/_sandbox/self_check.py +124 -16
  394. inspect_ai/util/_subprocess.py +5 -3
  395. inspect_ai/util/_subtask.py +15 -16
  396. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
  397. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
  398. inspect_ai-0.3.64.dist-info/RECORD +625 -0
  399. inspect_ai/_view/www/src/Register.mjs +0 -3
  400. inspect_ai/_view/www/src/Types.mjs +0 -38
  401. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  402. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  403. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  404. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  405. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  406. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  407. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  408. inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
  409. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  410. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  411. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  412. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  413. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  414. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  415. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  416. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  417. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  418. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  419. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  420. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  421. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  422. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  423. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  424. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  425. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  426. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  427. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  428. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  429. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  430. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  431. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  432. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  433. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  434. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  435. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  436. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  437. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  438. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  439. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  440. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  441. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  442. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  443. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  444. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  445. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  446. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  447. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  448. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  449. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  450. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  451. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  452. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  453. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  454. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  455. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  456. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  457. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  458. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  459. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  460. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  461. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  462. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  463. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  464. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  465. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  466. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  467. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  468. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  469. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  470. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  471. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  472. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  473. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  474. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  475. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  476. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  477. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
  478. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  479. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  480. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  481. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  482. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  483. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  484. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  485. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  486. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  487. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  488. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  489. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  490. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  491. inspect_ai/tool/beta/__init__.py +0 -5
  492. inspect_ai-0.3.62.dist-info/RECORD +0 -481
  493. /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
  494. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
  495. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  496. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  497. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  498. /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
  499. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  500. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  501. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  502. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  503. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  504. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  505. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  506. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
  507. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
  508. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  509. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  510. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
  511. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  512. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  513. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  514. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  515. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  516. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
  517. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
  518. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,346 @@
1
+ import {
2
+ Completion,
3
+ CompletionContext,
4
+ CompletionResult,
5
+ CompletionSection,
6
+ } from "@codemirror/autocomplete";
7
+ import { EditorView } from "codemirror";
8
+ import {
9
+ kScoreTypeBoolean,
10
+ kScoreTypeCategorical,
11
+ kScoreTypeNumeric,
12
+ kScoreTypeOther,
13
+ kScoreTypePassFail,
14
+ } from "../../../constants";
15
+ import { ScoreFilterItem } from "../filters";
16
+ import { KEYWORDS, MATH_FUNCTIONS, SAMPLE_FUNCTIONS } from "./language";
17
+ import { Token, tokenize } from "./tokenize";
18
+
19
+ interface CompletionOptions {
20
+ autocompleteInTheMiddle?: boolean;
21
+ enforceOrder?: boolean;
22
+ autoSpaceAfter?: boolean;
23
+ includeDefault?: boolean;
24
+ }
25
+
26
+ interface CanonicalNameCompletionProps {
27
+ autoSpaceIf?: (item: ScoreFilterItem) => boolean;
28
+ }
29
+
30
+ const isLiteral = (token: Token): boolean =>
31
+ ["string", "unterminatedString", "number"].includes(token?.type);
32
+
33
+ const isLogicalOp = (token: Token): boolean =>
34
+ ["and", "or", "not"].includes(token?.text);
35
+
36
+ const autocompleteImmediatelyAfter = (token: Token): boolean =>
37
+ ["(", "."].includes(token?.text);
38
+
39
+ const applyWithCall = (
40
+ view: EditorView,
41
+ completion: Completion,
42
+ from: number,
43
+ to: number,
44
+ ): void => {
45
+ view.dispatch({
46
+ changes: { from, to, insert: `${completion.label}()` },
47
+ selection: { anchor: from + completion.label.length + 1 },
48
+ });
49
+ };
50
+
51
+ const makeKeywordCompletion = (k: string): Completion => ({
52
+ label: k,
53
+ type: "keyword",
54
+ boost: -20,
55
+ });
56
+
57
+ const makeMathFunctionCompletion = ([label, info]: [
58
+ string,
59
+ string,
60
+ ]): Completion => ({
61
+ label,
62
+ type: "function",
63
+ info,
64
+ apply: applyWithCall,
65
+ boost: -10,
66
+ });
67
+
68
+ const makeSampleFunctionCompletion = ([label, info]: [
69
+ string,
70
+ string,
71
+ ]): Completion => ({
72
+ label,
73
+ type: "function",
74
+ info,
75
+ apply: applyWithCall,
76
+ boost: 0,
77
+ });
78
+
79
+ const makeLiteralCompletion = (k: string): Completion => ({
80
+ label: k,
81
+ type: "text",
82
+ boost: 10,
83
+ });
84
+
85
+ const makeCanonicalNameCompletion = (
86
+ item: ScoreFilterItem,
87
+ { autoSpaceIf = () => false }: CanonicalNameCompletionProps = {},
88
+ ): Completion => ({
89
+ label: item.canonicalName + (autoSpaceIf(item) ? " " : ""),
90
+ type: "variable",
91
+ info: item.tooltip,
92
+ boost: 20,
93
+ });
94
+
95
+ const makeMemberAccessCompletion = (item: ScoreFilterItem): Completion => ({
96
+ label: item.qualifiedName?.split(".")[1] || "",
97
+ type: "variable",
98
+ info: item.tooltip,
99
+ boost: 20,
100
+ });
101
+
102
+ const getMemberScoreItems = (
103
+ filterItems: ScoreFilterItem[],
104
+ scorer: string,
105
+ ): ScoreFilterItem[] =>
106
+ filterItems.filter((item) => item?.qualifiedName?.startsWith(`${scorer}.`));
107
+
108
+ /**
109
+ * Generates completions for the filter expression. The main goal is to make the
110
+ * sample filter intuitive for beginners and to provide a smooth experience for
111
+ * simple cases. To this end, we proactively try to suggest the next step of the
112
+ * expression, in a wizard-style fashion. This logic is primarily intended to
113
+ * support unsophisticated expressions of the form
114
+ * SUBEXPR and/or SUBEXPR or/not SUBEXPR ...
115
+ * where each SUBEXPR is
116
+ * VARIABLE ==/!=/</>/in/... VALUE
117
+ * and VALUE is a literal (string, number, etc.)
118
+ * It does support some expressions more complex than that, but the completion
119
+ * algorithm is not intended to be comprehensive. This is why we usually add
120
+ * default completions to the list in case our guess was off.
121
+ */
122
+ export function getCompletions(
123
+ context: CompletionContext,
124
+ filterItems: ScoreFilterItem[],
125
+ ): CompletionResult | null {
126
+ const keywordCompletionItems = KEYWORDS.map(makeKeywordCompletion);
127
+ const mathFunctionCompletionItems = MATH_FUNCTIONS.map(
128
+ makeMathFunctionCompletion,
129
+ );
130
+ const sampleFunctionCompletionItems = SAMPLE_FUNCTIONS.map(
131
+ makeSampleFunctionCompletion,
132
+ );
133
+ const variableCompletionItems = filterItems.map((item) =>
134
+ makeCanonicalNameCompletion(item),
135
+ );
136
+
137
+ const defaultCompletionItems = [
138
+ ...keywordCompletionItems,
139
+ ...mathFunctionCompletionItems,
140
+ ...sampleFunctionCompletionItems,
141
+ ...variableCompletionItems,
142
+ ];
143
+
144
+ const doc = context.state.doc;
145
+ const input = doc.toString().slice(0, context.pos);
146
+ const tokens = tokenize(input);
147
+ const lastToken = tokens[tokens.length - 1];
148
+ const isCompletionInsideToken =
149
+ lastToken &&
150
+ context.pos === lastToken.to &&
151
+ !autocompleteImmediatelyAfter(lastToken);
152
+ const currentTokenIndex = isCompletionInsideToken
153
+ ? tokens.length - 1
154
+ : tokens.length;
155
+
156
+ const prevToken = (index: number): Token => tokens[currentTokenIndex - index];
157
+ const currentToken = prevToken(0);
158
+ const completionStart = currentToken ? currentToken.from : context.pos;
159
+ const completingAtEnd = context.pos === doc.length;
160
+
161
+ const findFilterItem = (endIndex: number): ScoreFilterItem | undefined => {
162
+ if (prevToken(endIndex)?.type !== "variable") return undefined;
163
+
164
+ let name = prevToken(endIndex).text;
165
+ let i = endIndex;
166
+
167
+ while (prevToken(i + 1)?.text === ".") {
168
+ if (prevToken(i + 2)?.type === "variable") {
169
+ name = `${prevToken(i + 2).text}.${name}`;
170
+ i += 2;
171
+ } else {
172
+ break;
173
+ }
174
+ }
175
+
176
+ return filterItems.find((item) => item.canonicalName === name);
177
+ };
178
+
179
+ const makeCompletions = (
180
+ priorityCompletions: Completion[],
181
+ {
182
+ autocompleteInTheMiddle = false,
183
+ enforceOrder = false,
184
+ autoSpaceAfter = false,
185
+ includeDefault = true,
186
+ }: CompletionOptions = {},
187
+ ): CompletionResult | null => {
188
+ if (!autocompleteInTheMiddle && !completingAtEnd && !context.explicit) {
189
+ return null;
190
+ }
191
+
192
+ const priorityCompletionsOrdered = enforceOrder
193
+ ? priorityCompletions.map((c, idx) => ({ ...c, boost: -idx }))
194
+ : priorityCompletions;
195
+
196
+ const priorityCompletionsAdjusted = autoSpaceAfter
197
+ ? priorityCompletionsOrdered.map((c) =>
198
+ !c.apply && !c.label.endsWith(" ")
199
+ ? { ...c, label: `${c.label} ` }
200
+ : c,
201
+ )
202
+ : priorityCompletionsOrdered;
203
+
204
+ if (!includeDefault) {
205
+ return {
206
+ from: completionStart,
207
+ options: priorityCompletionsAdjusted,
208
+ };
209
+ }
210
+
211
+ const miscSection: CompletionSection = {
212
+ name: "misc",
213
+ header: () => {
214
+ const element = document.createElement("hr");
215
+ element.style.display = "list-item";
216
+ element.style.margin = "2px 0";
217
+ return element;
218
+ },
219
+ };
220
+
221
+ const priorityLabels = new Set(priorityCompletions.map((c) => c.label));
222
+ const defaultCompletionsAdjusted = defaultCompletionItems
223
+ .filter((c) => !priorityLabels.has(c.label))
224
+ .map((c) => ({ ...c, section: miscSection }));
225
+
226
+ return {
227
+ from: completionStart,
228
+ options: [...priorityCompletionsAdjusted, ...defaultCompletionsAdjusted],
229
+ };
230
+ };
231
+
232
+ const defaultCompletions = () => makeCompletions([]);
233
+ const noCompletions = () => (context.explicit ? defaultCompletions() : null);
234
+
235
+ const newExpressionCompletions = () =>
236
+ makeCompletions([
237
+ ...filterItems.map((item) =>
238
+ makeCanonicalNameCompletion(item, {
239
+ autoSpaceIf: (item) =>
240
+ completingAtEnd && item.scoreType !== kScoreTypeBoolean,
241
+ }),
242
+ ),
243
+ ...sampleFunctionCompletionItems,
244
+ ]);
245
+
246
+ const variableCompletions = () => makeCompletions(variableCompletionItems);
247
+
248
+ const memberAccessCompletions = (items: ScoreFilterItem[]) =>
249
+ makeCompletions(items.map(makeMemberAccessCompletion), {
250
+ autocompleteInTheMiddle: true,
251
+ includeDefault: false,
252
+ });
253
+
254
+ const logicalOpCompletions = () =>
255
+ makeCompletions(["and", "or"].map(makeKeywordCompletion), {
256
+ enforceOrder: true,
257
+ autoSpaceAfter: completingAtEnd,
258
+ });
259
+
260
+ const descreteRelationCompletions = () =>
261
+ makeCompletions(["==", "!=", "in", "not in"].map(makeKeywordCompletion), {
262
+ enforceOrder: true,
263
+ autoSpaceAfter: completingAtEnd,
264
+ });
265
+
266
+ const continuousRelationCompletions = () =>
267
+ makeCompletions(
268
+ ["<", "<=", ">", ">=", "==", "!="].map(makeKeywordCompletion),
269
+ { enforceOrder: true, autoSpaceAfter: completingAtEnd },
270
+ );
271
+
272
+ const customRelationCompletions = () =>
273
+ makeCompletions(
274
+ ["<", "<=", ">", ">=", "==", "!=", "~="].map(makeKeywordCompletion),
275
+ { enforceOrder: true, autoSpaceAfter: completingAtEnd },
276
+ );
277
+
278
+ const rhsCompletions = (options: string[]) =>
279
+ makeCompletions(options.map(makeLiteralCompletion));
280
+
281
+ // Handle specific completion scenarios
282
+ if (!prevToken(1)) return newExpressionCompletions();
283
+
284
+ // Member access
285
+ if (prevToken(1)?.text === ".") {
286
+ const scorer = prevToken(2)?.text;
287
+ if (scorer) {
288
+ return memberAccessCompletions(getMemberScoreItems(filterItems, scorer));
289
+ }
290
+ }
291
+
292
+ // Function call or bracketed expression start
293
+ if (prevToken(1)?.text === "(") {
294
+ if (prevToken(2)?.type === "mathFunction") return variableCompletions();
295
+ if (prevToken(2)?.type === "sampleFunction") return noCompletions();
296
+ return newExpressionCompletions();
297
+ }
298
+
299
+ // Function call or bracketed expression end
300
+ // Don't try to guess: too unpredictable. Could continue with an arithmetic
301
+ // operator (if constructing a complex expression), with a comparison (if
302
+ // comparing function call result to something) or with a logical connector
303
+ // (if a new subexpression is starting). Very hard to figure out what is
304
+ // going on without an AST, which we don't have here.
305
+ if (prevToken(1)?.text === ")") return noCompletions();
306
+
307
+ // Variable type-based relation suggestions
308
+ if (prevToken(1)?.type === "variable") {
309
+ const scoreType = findFilterItem(1)?.scoreType || "";
310
+
311
+ switch (scoreType) {
312
+ case kScoreTypePassFail:
313
+ case kScoreTypeCategorical:
314
+ return descreteRelationCompletions();
315
+ case kScoreTypeNumeric:
316
+ return continuousRelationCompletions();
317
+ case kScoreTypeOther:
318
+ return customRelationCompletions();
319
+ case kScoreTypeBoolean:
320
+ return logicalOpCompletions();
321
+ default:
322
+ return noCompletions();
323
+ }
324
+ }
325
+
326
+ // RHS comparison suggestions
327
+ if (prevToken(1)?.type === "relation") {
328
+ const item = findFilterItem(2);
329
+ if (item?.categories?.length) {
330
+ return rhsCompletions(item.categories);
331
+ }
332
+ return variableCompletions();
333
+ }
334
+
335
+ // Post-subexpression connector suggestions
336
+ if (isLiteral(prevToken(1)) && prevToken(2)?.type === "relation") {
337
+ return logicalOpCompletions();
338
+ }
339
+
340
+ // New subexpression after logical connector
341
+ if (isLogicalOp(prevToken(1))) return newExpressionCompletions();
342
+
343
+ // Something unusual is going on. We don't have any good guesses, but the user
344
+ // can trigger completion manually with Ctrl+Space if they want.
345
+ return noCompletions();
346
+ }
@@ -0,0 +1,19 @@
1
+ export const KEYWORDS: string[] = ["and", "or", "not", "in", "not in", "mod"];
2
+
3
+ export const MATH_FUNCTIONS: [string, string][] = [
4
+ ["min", "Minimum of two or more values"],
5
+ ["max", "Maximum of two or more values"],
6
+ ["abs", "Absolute value"],
7
+ ["round", "Round to the nearest integer"],
8
+ ["floor", "Round down to the nearest integer"],
9
+ ["ceil", "Round up to the nearest integer"],
10
+ ["sqrt", "Square root"],
11
+ ["log", "Natural logarithm"],
12
+ ["log2", "Base 2 logarithm"],
13
+ ["log10", "Base 10 logarithm"],
14
+ ];
15
+
16
+ export const SAMPLE_FUNCTIONS: [string, string][] = [
17
+ ["input_contains", "Checks if input contains a regular expression"],
18
+ ["target_contains", "Checks if target contains a regular expression"],
19
+ ];
@@ -0,0 +1,97 @@
1
+ import { StreamLanguage, StringStream } from "@codemirror/language";
2
+ import { tags } from "@lezer/highlight";
3
+ import { KEYWORDS, MATH_FUNCTIONS, SAMPLE_FUNCTIONS } from "./language";
4
+
5
+ // Types
6
+ export interface Token {
7
+ type: string;
8
+ text: string;
9
+ from: number;
10
+ to: number;
11
+ }
12
+
13
+ // Constants
14
+ const TOKEN_PATTERNS = {
15
+ STRING: /^"[^"]*"/,
16
+ UNTERMINATED_STRING: /^"[^"]*/,
17
+ NUMBER: /^(-|\+)?\d+(\.\d+)?/,
18
+ RELATION: /^(==|!=|<=|>=|<|>|~=)/,
19
+ MISC_OPERATOR: /^(=|!|~)/,
20
+ OPERATOR: /^(\+|-|\*|\/|\^|\(|\)|,|\.)/,
21
+ VARIABLE: /^[a-zA-Z_][a-zA-Z0-9_]*/,
22
+ };
23
+
24
+ // Utilities
25
+ const createWordRegex = (words: string[]): RegExp =>
26
+ new RegExp(`^(${words.join("|")})\\b`);
27
+
28
+ const countSpaces = (word: string): number => word.split(" ").length - 1;
29
+
30
+ // Regular expressions for functions and keywords
31
+ const mathFunctionsRegex = createWordRegex(
32
+ MATH_FUNCTIONS.map(([label]) => label),
33
+ );
34
+ const sampleFunctionsRegex = createWordRegex(
35
+ SAMPLE_FUNCTIONS.map(([label]) => label),
36
+ );
37
+ const keywordsRegex = createWordRegex(
38
+ // Ensure 'not in' matches first
39
+ KEYWORDS.sort((a, b) => countSpaces(b) - countSpaces(a)),
40
+ );
41
+
42
+ // Token recognition
43
+ function nextToken(stream: StringStream): string | null {
44
+ // Check patterns in order of specificity
45
+ if (stream.match(TOKEN_PATTERNS.STRING)) return "string";
46
+ if (stream.match(TOKEN_PATTERNS.UNTERMINATED_STRING))
47
+ return "unterminatedString";
48
+ if (stream.match(TOKEN_PATTERNS.NUMBER)) return "number";
49
+ if (stream.match(keywordsRegex)) return "keyword";
50
+ if (stream.match(mathFunctionsRegex)) return "mathFunction";
51
+ if (stream.match(sampleFunctionsRegex)) return "sampleFunction";
52
+ if (stream.match(TOKEN_PATTERNS.VARIABLE)) return "variable";
53
+ if (stream.match(TOKEN_PATTERNS.RELATION)) return "relation";
54
+ if (stream.match(TOKEN_PATTERNS.MISC_OPERATOR)) return "miscOperator";
55
+ if (stream.match(TOKEN_PATTERNS.OPERATOR)) return "miscOperator";
56
+
57
+ stream.next();
58
+ return null;
59
+ }
60
+
61
+ // Main tokenizer function
62
+ export function tokenize(input: string): Token[] {
63
+ const tokens: Token[] = [];
64
+ const stream = new StringStream(input, 0, 0);
65
+
66
+ while (stream.pos < input.length) {
67
+ const from = stream.pos;
68
+ const type = nextToken(stream);
69
+
70
+ if (type) {
71
+ tokens.push({
72
+ type,
73
+ text: input.slice(from, stream.pos),
74
+ from,
75
+ to: stream.pos,
76
+ });
77
+ }
78
+ }
79
+
80
+ return tokens;
81
+ }
82
+
83
+ // Language definition
84
+ export const language = StreamLanguage.define({
85
+ token: nextToken,
86
+ tokenTable: {
87
+ string: tags.string,
88
+ unterminatedString: tags.string,
89
+ number: tags.number,
90
+ keyword: tags.keyword,
91
+ mathFunction: tags.function(tags.variableName),
92
+ sampleFunction: tags.function(tags.variableName),
93
+ variable: tags.variableName,
94
+ relation: tags.operator,
95
+ miscOperator: tags.operator,
96
+ },
97
+ });
@@ -1,12 +1,9 @@
1
- //
1
+ import { Type11 } from "../types/log";
2
2
 
3
3
  /**
4
4
  * Formats a limit message
5
- *
6
- * @param {import("../types/log").Type11} [type] - The limit type
7
- * @returns {string} The limit message
8
5
  */
9
- export const sampleLimitMessage = (type) => {
6
+ export const sampleLimitMessage = (type: Type11): string => {
10
7
  switch (type) {
11
8
  case "operator":
12
9
  return "Sample terminated due to operator limit.";
@@ -19,6 +16,6 @@ export const sampleLimitMessage = (type) => {
19
16
  case "context":
20
17
  return "Sample terminated due to context limit.";
21
18
  default:
22
- return undefined;
19
+ return "An unknown limit terminated this sample.";
23
20
  }
24
21
  };
@@ -0,0 +1,53 @@
1
+ .container {
2
+ margin-top: 0.5em;
3
+ padding-left: 0;
4
+ }
5
+
6
+ .label {
7
+ padding-right: 2em;
8
+ padding-left: 0;
9
+ padding-bottom: 0;
10
+ font-weight: 400;
11
+ padding-bottom: 0;
12
+ }
13
+
14
+ .wordBreak {
15
+ word-break: break-all;
16
+ }
17
+
18
+ .scoreTable {
19
+ width: 100%;
20
+ margin-bottom: 1em;
21
+ }
22
+
23
+ .bottomBorder {
24
+ border-bottom-color: #00000000;
25
+ }
26
+
27
+ .headerScore {
28
+ padding-left: 2em;
29
+ }
30
+
31
+ .targetValue {
32
+ padding-right: 2em;
33
+ padding-left: 0;
34
+ padding-top: 0;
35
+ }
36
+
37
+ .answerValue {
38
+ padding-left: 0;
39
+ padding-top: 0;
40
+ }
41
+
42
+ .scoreValue {
43
+ padding-left: 2em;
44
+ padding-top: 0;
45
+ }
46
+
47
+ .noLeft {
48
+ padding-left: 0;
49
+ }
50
+
51
+ .noTop {
52
+ margin-top: 0;
53
+ }
@@ -0,0 +1,168 @@
1
+ import clsx from "clsx";
2
+ import { Card, CardBody, CardHeader } from "../../components/Card";
3
+ import { MarkdownDiv } from "../../components/MarkdownDiv";
4
+ import { MetaDataGrid } from "../../metadata/MetaDataGrid";
5
+ import { EvalSample } from "../../types/log";
6
+ import { arrayToString, inputString } from "../../utils/format";
7
+ import { SamplesDescriptor } from "../descriptor/samplesDescriptor";
8
+ import { SampleScores } from "./SampleScores";
9
+
10
+ import { SampleSummary } from "../../api/types";
11
+ import styles from "./SampleScoreView.module.css";
12
+
13
+ interface SampleScoreViewProps {
14
+ sample: EvalSample;
15
+ sampleDescriptor: SamplesDescriptor;
16
+ scorer: string;
17
+ className?: string | string[];
18
+ }
19
+
20
+ export const SampleScoreView: React.FC<SampleScoreViewProps> = ({
21
+ sample,
22
+ sampleDescriptor,
23
+ className,
24
+ scorer,
25
+ }) => {
26
+ if (!sampleDescriptor) {
27
+ return null;
28
+ }
29
+
30
+ const scoreInput = inputString(sample.input);
31
+ if (sample.choices && sample.choices.length > 0) {
32
+ scoreInput.push("");
33
+ scoreInput.push(
34
+ ...sample.choices.map((choice, index) => {
35
+ return `${String.fromCharCode(65 + index)}) ${choice}`;
36
+ }),
37
+ );
38
+ }
39
+
40
+ const scorerDescriptor = sampleDescriptor.evalDescriptor.scorerDescriptor(
41
+ sample,
42
+ { scorer, name: scorer },
43
+ );
44
+ const explanation = scorerDescriptor.explanation() || "(No Explanation)";
45
+ const answer = scorerDescriptor.answer();
46
+ const metadata = scorerDescriptor.metadata();
47
+
48
+ return (
49
+ <div
50
+ className={clsx(
51
+ "container-fluid",
52
+ className,
53
+ "font-size-base",
54
+ styles.container,
55
+ )}
56
+ >
57
+ <Card>
58
+ <CardHeader label="Score" />
59
+ <CardBody>
60
+ <div>
61
+ <div
62
+ className={clsx(
63
+ styles.label,
64
+ "text-style-label",
65
+ "text-style-secondary",
66
+ )}
67
+ >
68
+ Input
69
+ </div>
70
+ <div>
71
+ <MarkdownDiv
72
+ markdown={scoreInput.join("\n")}
73
+ className={styles.wordBreak}
74
+ />
75
+ </div>
76
+ </div>
77
+
78
+ <table className={clsx("table", styles.scoreTable)}>
79
+ <thead className={styles.bottomBorder}>
80
+ <tr>
81
+ <th
82
+ className={clsx(
83
+ styles.label,
84
+ "text-style-label",
85
+ "text-style-secondary",
86
+ )}
87
+ >
88
+ Target
89
+ </th>
90
+ <th
91
+ className={clsx(
92
+ styles.label,
93
+ "text-style-label",
94
+ "text-style-secondary",
95
+ )}
96
+ >
97
+ Answer
98
+ </th>
99
+ <th
100
+ className={clsx(
101
+ styles.label,
102
+ "text-style-label",
103
+ "text-style-secondary",
104
+ styles.headerScore,
105
+ )}
106
+ >
107
+ Score
108
+ </th>
109
+ </tr>
110
+ </thead>
111
+ <tbody className={styles.bottomBorder}>
112
+ <tr>
113
+ <td className={styles.targetValue}>
114
+ <MarkdownDiv
115
+ markdown={arrayToString(
116
+ arrayToString(sample?.target || "none"),
117
+ )}
118
+ className={clsx("no-last-para-padding", styles.noLeft)}
119
+ />
120
+ </td>
121
+ <td className={clsx(styles.answerValue)}>
122
+ <MarkdownDiv
123
+ className={clsx("no-last-para-padding", styles.noLeft)}
124
+ markdown={answer}
125
+ />
126
+ </td>
127
+ <td className={clsx(styles.scoreValue)}>
128
+ <SampleScores
129
+ sample={sample as any as SampleSummary}
130
+ sampleDescriptor={sampleDescriptor}
131
+ scorer={scorer}
132
+ />
133
+ </td>
134
+ </tr>
135
+ </tbody>
136
+ </table>
137
+ </CardBody>
138
+ </Card>
139
+ {explanation && explanation !== answer ? (
140
+ <Card>
141
+ <CardHeader label="Explanation" />
142
+ <CardBody>
143
+ <MarkdownDiv
144
+ markdown={arrayToString(explanation)}
145
+ className={clsx("no-last-para-padding", styles.noLeft)}
146
+ />
147
+ </CardBody>
148
+ </Card>
149
+ ) : (
150
+ ""
151
+ )}
152
+ {metadata && Object.keys(metadata).length > 0 ? (
153
+ <Card>
154
+ <CardHeader label="Metadata" />
155
+ <CardBody>
156
+ <MetaDataGrid
157
+ id="task-sample-score-metadata"
158
+ className={clsx("tab-pane", styles.noTop)}
159
+ entries={metadata}
160
+ />
161
+ </CardBody>
162
+ </Card>
163
+ ) : (
164
+ ""
165
+ )}
166
+ </div>
167
+ );
168
+ };