inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (518) hide show
  1. inspect_ai/_cli/cache.py +8 -7
  2. inspect_ai/_cli/common.py +0 -12
  3. inspect_ai/_cli/eval.py +32 -4
  4. inspect_ai/_cli/info.py +1 -0
  5. inspect_ai/_cli/list.py +1 -1
  6. inspect_ai/_cli/log.py +2 -0
  7. inspect_ai/_cli/main.py +1 -1
  8. inspect_ai/_cli/sandbox.py +4 -1
  9. inspect_ai/_cli/score.py +181 -32
  10. inspect_ai/_cli/trace.py +10 -0
  11. inspect_ai/_cli/view.py +4 -2
  12. inspect_ai/_display/core/active.py +2 -3
  13. inspect_ai/_display/core/config.py +7 -1
  14. inspect_ai/_display/textual/widgets/samples.py +4 -3
  15. inspect_ai/_display/textual/widgets/sandbox.py +6 -0
  16. inspect_ai/_eval/eval.py +104 -101
  17. inspect_ai/_eval/evalset.py +75 -75
  18. inspect_ai/_eval/loader.py +122 -12
  19. inspect_ai/_eval/registry.py +1 -1
  20. inspect_ai/_eval/run.py +14 -0
  21. inspect_ai/_eval/score.py +125 -36
  22. inspect_ai/_eval/task/log.py +105 -4
  23. inspect_ai/_eval/task/results.py +92 -38
  24. inspect_ai/_eval/task/run.py +9 -2
  25. inspect_ai/_eval/task/sandbox.py +35 -2
  26. inspect_ai/_eval/task/task.py +49 -46
  27. inspect_ai/_util/constants.py +1 -1
  28. inspect_ai/_util/content.py +8 -0
  29. inspect_ai/_util/error.py +2 -0
  30. inspect_ai/_util/file.py +15 -1
  31. inspect_ai/_util/hash.py +1 -1
  32. inspect_ai/_util/logger.py +4 -2
  33. inspect_ai/_util/registry.py +7 -1
  34. inspect_ai/_view/view.py +1 -2
  35. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  36. inspect_ai/_view/www/.vscode/settings.json +8 -0
  37. inspect_ai/_view/www/App.css +97 -29
  38. inspect_ai/_view/www/README.md +1 -1
  39. inspect_ai/_view/www/dist/assets/index.css +16663 -14674
  40. inspect_ai/_view/www/dist/assets/index.js +58808 -51348
  41. inspect_ai/_view/www/dist/index.html +1 -1
  42. inspect_ai/_view/www/index.html +2 -2
  43. inspect_ai/_view/www/log-schema.json +87 -73
  44. inspect_ai/_view/www/package.json +22 -4
  45. inspect_ai/_view/www/postcss.config.cjs +8 -9
  46. inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
  47. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  48. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  49. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  50. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  51. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  52. inspect_ai/_view/www/src/api/index.ts +4 -4
  53. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  54. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  55. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  56. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  57. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  58. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  59. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  60. inspect_ai/_view/www/src/components/Card.css +60 -0
  61. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  62. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  63. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  64. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  65. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  66. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  67. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  68. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  69. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  70. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  71. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  72. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  73. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  74. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  75. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  76. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  77. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  78. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  79. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  80. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  81. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  82. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  83. inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
  84. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  85. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  86. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  87. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  88. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  89. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  90. inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
  91. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  92. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  93. inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
  94. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  95. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  96. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  97. inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
  98. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  99. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  100. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  101. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  102. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  103. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
  104. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  105. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  106. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  107. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  108. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  109. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  110. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  111. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  112. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  113. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  114. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  115. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  116. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  117. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  118. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
  119. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  120. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  121. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  122. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  123. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  124. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  126. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  127. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
  128. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  129. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
  130. inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
  131. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  132. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  133. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  134. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  135. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  136. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
  137. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  138. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  139. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  140. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
  141. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  142. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
  143. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  144. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
  145. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  146. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
  147. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  148. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
  149. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  150. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  151. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
  152. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
  153. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  154. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  155. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  156. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  157. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  158. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  159. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  160. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  161. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  162. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  163. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  164. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  165. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  166. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  167. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  168. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  169. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  170. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  171. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  172. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  173. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  174. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  175. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  176. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  177. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  179. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  180. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  181. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  182. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
  183. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  184. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
  185. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  186. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  187. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  188. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  189. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  190. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  191. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  192. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  193. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  194. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  196. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  197. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  198. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  199. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  200. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  201. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  202. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  203. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  204. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  205. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  206. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
  207. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  208. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  209. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  210. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  211. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  212. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  213. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  214. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  215. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
  216. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  217. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  218. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  219. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  220. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  221. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
  222. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  223. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
  224. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  225. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  226. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  227. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  228. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  229. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
  230. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  231. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  232. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
  233. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  234. inspect_ai/_view/www/src/types/log.d.ts +108 -19
  235. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  236. inspect_ai/_view/www/src/types.ts +71 -0
  237. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
  238. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  239. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
  240. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  241. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  242. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  243. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  244. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  245. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  246. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  247. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  248. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  249. inspect_ai/_view/www/src/utils/format.ts +194 -0
  250. inspect_ai/_view/www/src/utils/git.ts +7 -0
  251. inspect_ai/_view/www/src/utils/html.ts +6 -0
  252. inspect_ai/_view/www/src/utils/http.ts +14 -0
  253. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  254. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  255. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  256. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  257. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  258. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  259. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  260. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  261. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
  262. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  263. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  264. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  265. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  266. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  267. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
  268. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
  269. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
  270. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  271. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
  272. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  273. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  274. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  275. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  276. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  277. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  278. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  279. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  280. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  281. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  282. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  283. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
  284. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  285. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
  286. inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
  287. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
  288. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  289. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  290. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  291. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  292. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  293. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  294. inspect_ai/_view/www/src/workspace/utils.ts +34 -0
  295. inspect_ai/_view/www/tsconfig.json +23 -9
  296. inspect_ai/_view/www/vite.config.js +8 -17
  297. inspect_ai/_view/www/yarn.lock +627 -556
  298. inspect_ai/approval/_approval.py +2 -0
  299. inspect_ai/approval/_approver.py +4 -4
  300. inspect_ai/approval/_auto.py +1 -1
  301. inspect_ai/approval/_human/approver.py +3 -0
  302. inspect_ai/approval/_policy.py +5 -0
  303. inspect_ai/approval/_registry.py +2 -2
  304. inspect_ai/dataset/_dataset.py +64 -37
  305. inspect_ai/dataset/_sources/__init__.py +0 -0
  306. inspect_ai/dataset/_sources/csv.py +20 -12
  307. inspect_ai/dataset/_sources/file.py +4 -0
  308. inspect_ai/dataset/_sources/hf.py +39 -29
  309. inspect_ai/dataset/_sources/json.py +17 -9
  310. inspect_ai/log/__init__.py +2 -0
  311. inspect_ai/log/_convert.py +3 -3
  312. inspect_ai/log/_file.py +24 -9
  313. inspect_ai/log/_log.py +101 -13
  314. inspect_ai/log/_message.py +4 -2
  315. inspect_ai/log/_recorders/file.py +4 -0
  316. inspect_ai/log/_recorders/json.py +5 -7
  317. inspect_ai/log/_recorders/recorder.py +3 -0
  318. inspect_ai/log/_transcript.py +19 -8
  319. inspect_ai/model/__init__.py +2 -0
  320. inspect_ai/model/_cache.py +39 -21
  321. inspect_ai/model/_call_tools.py +4 -3
  322. inspect_ai/model/_chat_message.py +14 -4
  323. inspect_ai/model/_generate_config.py +1 -1
  324. inspect_ai/model/_model.py +31 -24
  325. inspect_ai/model/_model_output.py +14 -1
  326. inspect_ai/model/_openai.py +10 -18
  327. inspect_ai/model/_providers/anthropic.py +3 -3
  328. inspect_ai/model/_providers/google.py +9 -5
  329. inspect_ai/model/_providers/openai.py +5 -9
  330. inspect_ai/model/_providers/openai_o1.py +3 -5
  331. inspect_ai/model/_providers/openrouter.py +86 -0
  332. inspect_ai/model/_providers/providers.py +11 -0
  333. inspect_ai/scorer/__init__.py +6 -1
  334. inspect_ai/scorer/_answer.py +7 -7
  335. inspect_ai/scorer/_classification.py +38 -18
  336. inspect_ai/scorer/_common.py +2 -8
  337. inspect_ai/scorer/_match.py +4 -5
  338. inspect_ai/scorer/_metric.py +87 -28
  339. inspect_ai/scorer/_metrics/__init__.py +3 -3
  340. inspect_ai/scorer/_metrics/accuracy.py +8 -10
  341. inspect_ai/scorer/_metrics/mean.py +3 -17
  342. inspect_ai/scorer/_metrics/std.py +111 -30
  343. inspect_ai/scorer/_model.py +12 -12
  344. inspect_ai/scorer/_pattern.py +3 -3
  345. inspect_ai/scorer/_reducer/reducer.py +36 -21
  346. inspect_ai/scorer/_reducer/registry.py +2 -2
  347. inspect_ai/scorer/_reducer/types.py +7 -1
  348. inspect_ai/scorer/_score.py +11 -1
  349. inspect_ai/scorer/_scorer.py +110 -16
  350. inspect_ai/solver/__init__.py +1 -1
  351. inspect_ai/solver/_basic_agent.py +19 -22
  352. inspect_ai/solver/_bridge/__init__.py +0 -3
  353. inspect_ai/solver/_bridge/bridge.py +3 -3
  354. inspect_ai/solver/_chain.py +1 -2
  355. inspect_ai/solver/_critique.py +3 -3
  356. inspect_ai/solver/_fork.py +2 -2
  357. inspect_ai/solver/_human_agent/__init__.py +0 -0
  358. inspect_ai/solver/_human_agent/agent.py +5 -8
  359. inspect_ai/solver/_human_agent/commands/clock.py +14 -10
  360. inspect_ai/solver/_human_agent/commands/note.py +1 -1
  361. inspect_ai/solver/_human_agent/commands/score.py +0 -11
  362. inspect_ai/solver/_multiple_choice.py +38 -26
  363. inspect_ai/solver/_prompt.py +7 -7
  364. inspect_ai/solver/_solver.py +53 -52
  365. inspect_ai/solver/_task_state.py +80 -69
  366. inspect_ai/solver/_use_tools.py +9 -9
  367. inspect_ai/tool/__init__.py +4 -1
  368. inspect_ai/tool/_tool.py +43 -14
  369. inspect_ai/tool/_tool_call.py +6 -2
  370. inspect_ai/tool/_tool_choice.py +3 -1
  371. inspect_ai/tool/_tool_def.py +10 -8
  372. inspect_ai/tool/_tool_params.py +24 -0
  373. inspect_ai/tool/_tool_with.py +7 -7
  374. inspect_ai/tool/_tools/__init__.py +0 -0
  375. inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
  376. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
  377. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  378. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  379. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  380. inspect_ai/tool/_tools/_execute.py +23 -11
  381. inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
  382. inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
  383. inspect_ai/tool/_tools/_web_search.py +7 -5
  384. inspect_ai/tool/beta.py +3 -0
  385. inspect_ai/util/_concurrency.py +3 -3
  386. inspect_ai/util/_panel.py +2 -0
  387. inspect_ai/util/_resource.py +12 -12
  388. inspect_ai/util/_sandbox/docker/compose.py +23 -20
  389. inspect_ai/util/_sandbox/docker/config.py +2 -1
  390. inspect_ai/util/_sandbox/docker/docker.py +42 -86
  391. inspect_ai/util/_sandbox/docker/service.py +100 -0
  392. inspect_ai/util/_sandbox/environment.py +99 -96
  393. inspect_ai/util/_sandbox/self_check.py +124 -16
  394. inspect_ai/util/_subprocess.py +5 -3
  395. inspect_ai/util/_subtask.py +15 -16
  396. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
  397. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
  398. inspect_ai-0.3.64.dist-info/RECORD +625 -0
  399. inspect_ai/_view/www/src/Register.mjs +0 -3
  400. inspect_ai/_view/www/src/Types.mjs +0 -38
  401. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  402. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  403. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  404. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  405. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  406. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  407. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  408. inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
  409. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  410. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  411. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  412. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  413. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  414. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  415. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  416. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  417. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  418. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  419. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  420. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  421. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  422. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  423. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  424. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  425. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  426. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  427. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  428. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  429. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  430. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  431. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  432. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  433. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  434. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  435. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  436. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  437. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  438. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  439. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  440. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  441. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  442. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  443. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  444. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  445. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  446. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  447. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  448. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  449. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  450. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  451. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  452. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  453. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  454. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  455. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  456. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  457. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  458. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  459. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  460. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  461. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  462. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  463. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  464. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  465. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  466. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  467. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  468. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  469. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  470. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  471. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  472. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  473. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  474. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  475. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  476. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  477. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
  478. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  479. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  480. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  481. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  482. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  483. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  484. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  485. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  486. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  487. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  488. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  489. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  490. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  491. inspect_ai/tool/beta/__init__.py +0 -5
  492. inspect_ai-0.3.62.dist-info/RECORD +0 -481
  493. /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
  494. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
  495. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  496. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  497. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  498. /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
  499. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  500. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  501. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  502. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  503. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  504. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  505. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  506. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
  507. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
  508. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  509. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  510. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
  511. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  512. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  513. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  514. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  515. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  516. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
  517. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
  518. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -1,23 +1,31 @@
1
1
  import { compileExpression } from "filtrex";
2
- import { kScoreTypeBoolean } from "../../constants.mjs";
3
- import { inputString } from "../../utils/Format.mjs";
2
+ import { SampleSummary } from "../../api/types";
3
+ import { kScoreTypeBoolean } from "../../constants";
4
+ import { ScoreLabel } from "../../types";
5
+ import { Scores1 } from "../../types/log";
6
+ import { inputString } from "../../utils/format";
7
+ import { EvalDescriptor, ScoreDescriptor } from "../descriptor/types";
4
8
 
5
- /**
6
- * @typedef {Object} FilterError
7
- * @property {number=} from - The start of the error.
8
- * @property {number=} to - The end of the error.
9
- * @property {string} message - The error message.
10
- * @property {"warning" | "error"} severity - The severity of the error.
11
- */
9
+ export interface FilterError {
10
+ from: number;
11
+ to: number;
12
+ message: string;
13
+ severity: "warning" | "error";
14
+ }
15
+
16
+ export interface ScoreFilterItem {
17
+ shortName?: string;
18
+ qualifiedName?: string;
19
+ canonicalName: string;
20
+ tooltip?: string;
21
+ categories: string[];
22
+ scoreType: string;
23
+ }
12
24
 
13
25
  /**
14
26
  * Coerces a value to the type expected by the score.
15
- *
16
- * @param {any} value
17
- * @param {import("../../samples/SamplesDescriptor.mjs").ScoreDescriptor} descriptor
18
- * @returns {any}
19
27
  */
20
- const coerceValue = (value, descriptor) => {
28
+ const coerceValue = (value: unknown, descriptor: ScoreDescriptor): unknown => {
21
29
  if (descriptor && descriptor.scoreType === kScoreTypeBoolean) {
22
30
  return Boolean(value);
23
31
  } else {
@@ -25,24 +33,18 @@ const coerceValue = (value, descriptor) => {
25
33
  }
26
34
  };
27
35
 
28
- /**
29
- * @param {any} value
30
- * @returns {boolean}
31
- */
32
- const isFilteringSupportedForValue = (value) =>
36
+ // Whether a particular value is filter-able
37
+ const isFilteringSupportedForValue = (value: unknown): boolean =>
33
38
  ["string", "number", "boolean"].includes(typeof value);
34
39
 
35
40
  /**
36
41
  * Returns the names of scores that are not allowed to be used as short names in
37
42
  * filter expressions because they are not unique. This should be applied only to
38
43
  * the nested scores, not to the top-level scorer names.
39
- *
40
- * @param {import("../../Types.mjs").ScoreLabel[]} scores
41
- * @returns {Set<string>}
42
44
  */
43
- const bannedShortScoreNames = (scores) => {
44
- const used = new Set();
45
- const banned = new Set();
45
+ const bannedShortScoreNames = (scores: ScoreLabel[]): Set<string> => {
46
+ const used: Set<string> = new Set();
47
+ const banned: Set<string> = new Set();
46
48
  for (const { scorer, name } of scores) {
47
49
  banned.add(scorer);
48
50
  if (used.has(name)) {
@@ -60,19 +62,22 @@ const bannedShortScoreNames = (scores) => {
60
62
  * Child metrics are accessed using dot notation (e.g. `scorer_name.score_name`) or
61
63
  * directly by name when it is unique.
62
64
  *
63
- * @param {import("../../samples/SamplesDescriptor.mjs").EvalDescriptor} evalDescriptor
65
+ * @param {import("../../samples/descriptor/samplesDescriptor").EvalDescriptor} evalDescriptor
64
66
  * @param {import("../../types/log").Scores1} sampleScores
65
67
  * @returns {Object<string, any>}
66
68
  */
67
- const scoreVariables = (evalDescriptor, sampleScores) => {
69
+ const scoreVariables = (
70
+ evalDescriptor: EvalDescriptor,
71
+ sampleScores: Scores1,
72
+ ) => {
68
73
  const bannedShortNames = bannedShortScoreNames(evalDescriptor.scores);
69
- const variables = {};
74
+ const variables: Record<string, unknown> = {};
70
75
 
71
- /**
72
- * @param {import("../../Types.mjs").ScoreLabel} scoreLabel
73
- * @param {any} value
74
- */
75
- const addScore = (variableName, scoreLabel, value) => {
76
+ const addScore = (
77
+ variableName: string,
78
+ scoreLabel: ScoreLabel,
79
+ value: unknown,
80
+ ) => {
76
81
  const coercedValue = coerceValue(
77
82
  value,
78
83
  evalDescriptor.scoreDescriptor(scoreLabel),
@@ -82,7 +87,7 @@ const scoreVariables = (evalDescriptor, sampleScores) => {
82
87
  }
83
88
  };
84
89
 
85
- for (const [scorer, score] of Object.entries(sampleScores)) {
90
+ for (const [scorer, score] of Object.entries(sampleScores || {})) {
86
91
  addScore(scorer, { scorer, name: scorer }, score.value);
87
92
  if (typeof score.value === "object") {
88
93
  for (const [name, value] of Object.entries(score.value)) {
@@ -96,39 +101,34 @@ const scoreVariables = (evalDescriptor, sampleScores) => {
96
101
  return variables;
97
102
  };
98
103
 
99
- /**
100
- * @typedef {Object} ScoreFilterItem
101
- * @property {string | undefined} shortName - The short name of the score, if doesn't conflict with other short names.
102
- * @property {string | undefined} qualifiedName - The `scorer.score` name for children of complex scorers.
103
- * @property {string} canonicalName - The canonical name: either `shortName` or `qualifiedName` (at least one must exist).
104
- * @property {string} tooltip - The informational tooltip for the score.
105
- * @property {string[]} categories - Category values for categorical scores.
106
- * @property {string} scoreType - The type of the score (e.g., 'numeric', 'categorical', 'boolean').
107
- */
108
-
109
104
  /**
110
105
  * Generates a dictionary of variables that can be used in the filter expression.
111
106
  * High-level scorer metrics can be accessed by name directly.
112
107
  * Child metrics are accessed using dot notation (e.g. `scorer_name.score_name`) or
113
108
  * directly by name when it is unique.
114
- *
115
- * @param {import("../../samples/SamplesDescriptor.mjs").EvalDescriptor} evalDescriptor
116
- * @returns {ScoreFilterItem[]}
117
109
  */
118
- export const scoreFilterItems = (evalDescriptor) => {
119
- /** @type {ScoreFilterItem[]} */
120
- const items = [];
110
+ export const scoreFilterItems = (
111
+ evalDescriptor: EvalDescriptor,
112
+ ): ScoreFilterItem[] => {
113
+ const items: ScoreFilterItem[] = [];
121
114
  const bannedShortNames = bannedShortScoreNames(evalDescriptor.scores);
122
- const valueToString = (value) =>
115
+ const valueToString = (value: unknown) =>
123
116
  typeof value === "string" ? `"${value}"` : String(value);
124
117
 
125
118
  /**
126
119
  * @param {string | undefined} shortName
127
120
  * @param {string | undefined} qualifiedName
128
- * @param {import("../../Types.mjs").ScoreLabel} scoreLabel
121
+ * @param {import("../../types").ScoreLabel} scoreLabel
129
122
  */
130
- const addScore = (shortName, qualifiedName, scoreLabel) => {
123
+ const addScore = (
124
+ scoreLabel: ScoreLabel,
125
+ shortName?: string,
126
+ qualifiedName?: string,
127
+ ) => {
131
128
  const canonicalName = shortName || qualifiedName;
129
+ if (!canonicalName) {
130
+ throw new Error("Unable to create a canonical name for a score");
131
+ }
132
132
  const descriptor = evalDescriptor.scoreDescriptor(scoreLabel);
133
133
  const scoreType = descriptor?.scoreType;
134
134
  if (!descriptor) {
@@ -143,17 +143,20 @@ export const scoreFilterItems = (evalDescriptor) => {
143
143
  return;
144
144
  }
145
145
  var tooltip = `${canonicalName}: ${descriptor.scoreType}`;
146
- var categories = [];
146
+ var categories: string[] = [];
147
147
  if (descriptor.min !== undefined || descriptor.max !== undefined) {
148
- const rounded = (num) => {
148
+ const rounded = (num: number) => {
149
149
  // Additional round-trip to remove trailing zeros.
150
150
  return parseFloat(num.toPrecision(3)).toString();
151
151
  };
152
- tooltip += `\nrange: ${rounded(descriptor.min)} to ${rounded(descriptor.max)}`;
152
+ tooltip += `\nrange: ${rounded(descriptor.min || 0)} to ${rounded(descriptor.max || 0)}`;
153
153
  }
154
154
  if (descriptor.categories) {
155
- tooltip += `\ncategories: ${descriptor.categories.map((cat) => cat.val).join(", ")}`;
156
- categories = descriptor.categories.map((cat) => valueToString(cat.val));
155
+ categories = descriptor.categories.map((cat) => {
156
+ const val = (cat as Record<string, unknown>).val;
157
+ return valueToString(val);
158
+ });
159
+ tooltip += `\ncategories: ${categories.join(" ")}`;
157
160
  }
158
161
  items.push({
159
162
  shortName,
@@ -170,29 +173,24 @@ export const scoreFilterItems = (evalDescriptor) => {
170
173
  const hasQualifiedName = name !== scorer;
171
174
  const shortName = hasShortName ? name : undefined;
172
175
  const qualifiedName = hasQualifiedName ? `${scorer}.${name}` : undefined;
173
- addScore(shortName, qualifiedName, { name, scorer });
176
+ addScore({ name, scorer }, shortName, qualifiedName);
174
177
  }
175
178
  return items;
176
179
  };
177
180
 
178
- /**
179
- * TODO: Add case-insensitive string comparison.
180
- *
181
- * @param {import("../../samples/SamplesDescriptor.mjs").EvalDescriptor} evalDescriptor
182
- * @param {import("../../api/Types.mjs").SampleSummary} sample
183
- * @param {string} filterValue
184
- * @returns {{matches: boolean, error: FilterError | undefined}}
185
- */
186
- export const filterExpression = (evalDescriptor, sample, filterValue) => {
181
+ // TODO: Add case-insensitive string comparison.
182
+ export const filterExpression = (
183
+ evalDescriptor: EvalDescriptor,
184
+ sample: SampleSummary,
185
+ filterValue: string,
186
+ ) => {
187
187
  try {
188
- /** @type {(regex: string) => boolean} */
189
- const inputContains = (regex) => {
188
+ const inputContains = (regex: string): boolean => {
190
189
  return inputString(sample.input).some((msg) =>
191
190
  msg.match(new RegExp(regex, "i")),
192
191
  );
193
192
  };
194
- /** @type {(regex: string) => boolean} */
195
- const targetContains = (regex) => {
193
+ const targetContains = (regex: string): boolean => {
196
194
  let targets = Array.isArray(sample.target)
197
195
  ? sample.target
198
196
  : [sample.target];
@@ -217,7 +215,8 @@ export const filterExpression = (evalDescriptor, sample, filterValue) => {
217
215
  }
218
216
  } catch (error) {
219
217
  if (error instanceof ReferenceError) {
220
- const propertyName = error["propertyName"];
218
+ const errorObj = error as any as Record<string, unknown>;
219
+ const propertyName: string = (errorObj["propertyName"] as string) || "";
221
220
  if (propertyName) {
222
221
  const regex = new RegExp(`\\b${propertyName}\\b`);
223
222
  const match = regex.exec(filterValue);
@@ -234,14 +233,16 @@ export const filterExpression = (evalDescriptor, sample, filterValue) => {
234
233
  }
235
234
  }
236
235
  }
236
+
237
+ const message = error instanceof Error ? error.message : "";
237
238
  if (
238
- error.message.startsWith("Parse error") ||
239
- error.message.startsWith("Lexical error")
239
+ message.startsWith("Parse error") ||
240
+ message.startsWith("Lexical error")
240
241
  ) {
241
242
  // Filterex uses formatting like this:
242
243
  // foo and
243
244
  // ----^
244
- const from = error.message.match(/^(-*)\^$/m)?.[1]?.length;
245
+ const from = message.match(/^(-*)\^$/m)?.[1]?.length;
245
246
  return {
246
247
  matches: false,
247
248
  error: {
@@ -255,7 +256,7 @@ export const filterExpression = (evalDescriptor, sample, filterValue) => {
255
256
  return {
256
257
  matches: false,
257
258
  error: {
258
- message: error.message,
259
+ message: message,
259
260
  severity: "error",
260
261
  },
261
262
  };
@@ -263,12 +264,16 @@ export const filterExpression = (evalDescriptor, sample, filterValue) => {
263
264
  };
264
265
 
265
266
  /**
266
- * @param {import("../../samples/SamplesDescriptor.mjs").EvalDescriptor} evalDescriptor
267
- * @param {import("../../api/Types.mjs").SampleSummary[]} samples
267
+ * @param {import("../../samples/descriptor/samplesDescriptor").EvalDescriptor} evalDescriptor
268
+ * @param {import("../../api/types").SampleSummary[]} samples
268
269
  * @param {string} filterValue
269
- * @returns {{result: import("../../api/Types.mjs").SampleSummary[], error: FilterError | undefined}}
270
+ * @returns {}
270
271
  */
271
- export const filterSamples = (evalDescriptor, samples, filterValue) => {
272
+ export const filterSamples = (
273
+ evalDescriptor: EvalDescriptor,
274
+ samples: SampleSummary[],
275
+ filterValue: string,
276
+ ): { result: SampleSummary[]; error: FilterError | undefined } => {
272
277
  var error = undefined;
273
278
  const result = samples.filter((sample) => {
274
279
  if (filterValue) {
@@ -0,0 +1,16 @@
1
+ .label {
2
+ align-self: center;
3
+ margin-right: 0.3em;
4
+ margin-left: 0.2em;
5
+ }
6
+
7
+ .input {
8
+ width: 300px;
9
+ }
10
+
11
+ .help {
12
+ position: relative;
13
+ margin-left: 0.5em;
14
+ cursor: help;
15
+ align-self: center;
16
+ }
@@ -0,0 +1,288 @@
1
+ import { autocompletion, startCompletion } from "@codemirror/autocomplete";
2
+ import {
3
+ bracketMatching,
4
+ HighlightStyle,
5
+ syntaxHighlighting,
6
+ } from "@codemirror/language";
7
+ import { Diagnostic, linter } from "@codemirror/lint";
8
+ import {
9
+ Compartment,
10
+ EditorState,
11
+ Transaction,
12
+ TransactionSpec,
13
+ } from "@codemirror/state";
14
+ import { tags } from "@lezer/highlight";
15
+ import clsx from "clsx";
16
+ import { EditorView, minimalSetup } from "codemirror";
17
+ import { useEffect, useMemo, useRef, useState } from "react";
18
+
19
+ import { ScoreFilter } from "../../../types";
20
+ import { EvalDescriptor } from "../../descriptor/types";
21
+ import { FilterError, filterSamples, scoreFilterItems } from "../filters";
22
+ import { getCompletions } from "./completions";
23
+ import styles from "./SampleFilter.module.css";
24
+ import { language } from "./tokenize";
25
+
26
+ // Types
27
+ interface FilteringResult {
28
+ numSamples: number;
29
+ error?: FilterError;
30
+ }
31
+
32
+ interface SampleFilterProps {
33
+ evalDescriptor: EvalDescriptor;
34
+ scoreFilter: ScoreFilter;
35
+ setScoreFilter: (filter: ScoreFilter) => void;
36
+ }
37
+
38
+ // Constants
39
+ const FILTER_TOOLTIP = `
40
+ Filter samples by:
41
+ • Scores
42
+ • Input and target regex search: input_contains, target_contains
43
+
44
+ Supported expressions:
45
+ • Arithmetic: +, -, *, /, mod, ^
46
+ • Comparison: <, <=, >, >=, ==, !=, including chain comparisons, e.g. "10 <= x < 20"
47
+ • Boolean: and, or, not
48
+ • Regex matching: ~= (case-sensitive)
49
+ • Set operations: in, not in; e.g. "x in (2, 3, 5)"
50
+ • Functions: min, max, abs, round, floor, ceil, sqrt, log, log2, log10
51
+ `.trim();
52
+
53
+ // Styles
54
+ const highlightStyle = HighlightStyle.define([
55
+ { tag: tags.string, class: "token string" },
56
+ { tag: tags.number, class: "token number" },
57
+ { tag: tags.keyword, class: "token keyword" },
58
+ ]);
59
+
60
+ const editorTheme = EditorView.theme({
61
+ "&": {
62
+ fontSize: "inherit",
63
+ color: "var(--inspect-input-foreground)",
64
+ backgroundColor: "var(--inspect-input-background)",
65
+ border: "1px solid var(--inspect-input-border)",
66
+ borderRadius: "var(--bs-border-radius)",
67
+ },
68
+ ".cm-cursor.cm-cursor-primary": {
69
+ borderLeftColor: "var(--bs-body-color)",
70
+ },
71
+ ".cm-selectionBackground": {
72
+ backgroundColor: "var(--inspect-inactive-selection-background)",
73
+ },
74
+ "&.cm-focused > .cm-scroller > .cm-selectionLayer > .cm-selectionBackground":
75
+ {
76
+ backgroundColor: "var(--inspect-active-selection-background)",
77
+ },
78
+ "&.cm-focused": {
79
+ outline: "none",
80
+ borderColor: "var(--inspect-focus-border-color)",
81
+ boxShadow: "var(--inspect-focus-border-shadow)",
82
+ },
83
+ ".filter-pending > &.cm-focused": {
84
+ borderColor: "var(--inspect-focus-border-gray-color)",
85
+ boxShadow: "var(--inspect-focus-border-gray-shadow)",
86
+ },
87
+ ".cm-tooltip": {
88
+ backgroundColor: "var(--bs-light)",
89
+ border: "1px solid var(--bs-border-color)",
90
+ color: "var(--bs-body-color)",
91
+ },
92
+ ".cm-tooltip.cm-tooltip-autocomplete > ul > li": {
93
+ color: "var(--bs-body-color)",
94
+ },
95
+ ".cm-tooltip.cm-tooltip-autocomplete > ul > li[aria-selected]": {
96
+ backgroundColor: "var(--inspect-active-selection-background)",
97
+ color: "var(--bs-body-color)",
98
+ },
99
+ ".cm-scroller": {
100
+ overflow: "hidden",
101
+ },
102
+ });
103
+
104
+ // Helper functions
105
+ const getFilteringResult = (
106
+ evalDescriptor: EvalDescriptor,
107
+ filterValue: string,
108
+ ): FilteringResult => {
109
+ const { result, error } = filterSamples(
110
+ evalDescriptor,
111
+ evalDescriptor.samples,
112
+ filterValue,
113
+ );
114
+ return { numSamples: result.length, error };
115
+ };
116
+
117
+ const ensureOneLine = (tr: Transaction): TransactionSpec => {
118
+ const newDoc = tr.newDoc.toString();
119
+ if (!newDoc.includes("\n")) return tr;
120
+
121
+ if (tr.isUserEvent("input.paste")) {
122
+ return {
123
+ changes: {
124
+ from: 0,
125
+ to: tr.startState.doc.length,
126
+ insert: newDoc.replace(/\n/g, " ").trim(),
127
+ },
128
+ };
129
+ }
130
+ return {};
131
+ };
132
+
133
+ const getLints = (
134
+ view: EditorView,
135
+ filterError?: FilterError,
136
+ ): Diagnostic[] => {
137
+ if (!filterError) return [];
138
+ return [
139
+ {
140
+ from: filterError.from || 0,
141
+ to: filterError.to || view.state.doc.length,
142
+ severity: filterError.severity,
143
+ message: filterError.message,
144
+ },
145
+ ];
146
+ };
147
+
148
+ // Main component
149
+ export const SampleFilter: React.FC<SampleFilterProps> = ({
150
+ evalDescriptor,
151
+ scoreFilter,
152
+ setScoreFilter,
153
+ }) => {
154
+ const editorRef = useRef<HTMLDivElement>(null);
155
+ const editorViewRef = useRef<EditorView>(null);
156
+ const linterCompartment = useRef<Compartment>(new Compartment());
157
+ const autocompletionCompartment = useRef<Compartment>(new Compartment());
158
+ const updateListenerCompartment = useRef<Compartment>(new Compartment());
159
+
160
+ const filterItems = useMemo(
161
+ () => scoreFilterItems(evalDescriptor),
162
+ [evalDescriptor],
163
+ );
164
+
165
+ const [filteringResultInstant, setFilteringResultInstant] =
166
+ useState<FilteringResult | null>(null);
167
+
168
+ const handleFocus = (event: FocusEvent, view: EditorView) => {
169
+ if (event.isTrusted && view.state.doc.toString() === "") {
170
+ setTimeout(() => startCompletion(view), 0);
171
+ }
172
+ };
173
+
174
+ const makeAutocompletion = () =>
175
+ autocompletion({
176
+ override: [(context) => getCompletions(context, filterItems)],
177
+ activateOnCompletion: (c) => c.label.endsWith(" "),
178
+ });
179
+
180
+ const makeLinter = () =>
181
+ linter((view) => getLints(view, filteringResultInstant?.error));
182
+
183
+ const makeUpdateListener = () =>
184
+ EditorView.updateListener.of((update) => {
185
+ if (update.docChanged) {
186
+ const newValue = update.state.doc.toString();
187
+ const filteringResult = getFilteringResult(evalDescriptor, newValue);
188
+ if (!filteringResult.error) {
189
+ setScoreFilter({ value: newValue });
190
+ }
191
+ setFilteringResultInstant(filteringResult);
192
+ }
193
+ });
194
+
195
+ // Initialize editor
196
+ useEffect(() => {
197
+ editorViewRef.current?.destroy();
198
+
199
+ editorViewRef.current = new EditorView({
200
+ parent: editorRef.current ?? undefined,
201
+ state: EditorState.create({
202
+ doc: scoreFilter.value || "",
203
+ extensions: [
204
+ minimalSetup,
205
+ bracketMatching(),
206
+ editorTheme,
207
+ EditorState.transactionFilter.of(ensureOneLine),
208
+ updateListenerCompartment.current.of(makeUpdateListener()),
209
+ EditorView.domEventHandlers({ focus: handleFocus }),
210
+ language,
211
+ syntaxHighlighting(highlightStyle),
212
+ autocompletionCompartment.current.of(makeAutocompletion()),
213
+ linterCompartment.current.of(makeLinter()),
214
+ ],
215
+ }),
216
+ });
217
+
218
+ return () => editorViewRef.current?.destroy();
219
+ }, []);
220
+
221
+ // Handle filter value changes
222
+ useEffect(() => {
223
+ if (!editorViewRef.current) return;
224
+
225
+ const currentValue = editorViewRef.current.state.doc.toString();
226
+ if (scoreFilter.value === currentValue) return;
227
+
228
+ setFilteringResultInstant(
229
+ getFilteringResult(evalDescriptor, scoreFilter.value || ""),
230
+ );
231
+ editorViewRef.current.dispatch({
232
+ changes: {
233
+ from: 0,
234
+ to: currentValue.length,
235
+ insert: scoreFilter.value || "",
236
+ },
237
+ });
238
+ }, [evalDescriptor, scoreFilter.value]);
239
+
240
+ // Update compartments when dependencies change
241
+ useEffect(() => {
242
+ editorViewRef.current?.dispatch({
243
+ effects:
244
+ updateListenerCompartment.current.reconfigure(makeUpdateListener()),
245
+ });
246
+ }, [evalDescriptor]);
247
+
248
+ useEffect(() => {
249
+ editorViewRef.current?.dispatch({
250
+ effects:
251
+ autocompletionCompartment.current.reconfigure(makeAutocompletion()),
252
+ });
253
+ }, [filterItems]);
254
+
255
+ useEffect(() => {
256
+ editorViewRef.current?.dispatch({
257
+ effects: linterCompartment.current.reconfigure(makeLinter()),
258
+ });
259
+ }, [filteringResultInstant?.error]);
260
+
261
+ return (
262
+ <div style={{ display: "flex" }}>
263
+ <span
264
+ className={clsx(
265
+ "sample-filter-label",
266
+ "text-size-smaller",
267
+ "text-style-label",
268
+ "text-style-secondary",
269
+ styles.label,
270
+ )}
271
+ >
272
+ Filter:
273
+ </span>
274
+ <div
275
+ ref={editorRef}
276
+ className={clsx(
277
+ filteringResultInstant?.error && "filter-pending",
278
+ styles.input,
279
+ )}
280
+ />
281
+ <span
282
+ className={clsx("bi", "bi-question-circle", styles.help)}
283
+ data-tooltip={FILTER_TOOLTIP}
284
+ data-tooltip-position="bottom-left"
285
+ />
286
+ </div>
287
+ );
288
+ };