inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (518) hide show
  1. inspect_ai/_cli/cache.py +8 -7
  2. inspect_ai/_cli/common.py +0 -12
  3. inspect_ai/_cli/eval.py +32 -4
  4. inspect_ai/_cli/info.py +1 -0
  5. inspect_ai/_cli/list.py +1 -1
  6. inspect_ai/_cli/log.py +2 -0
  7. inspect_ai/_cli/main.py +1 -1
  8. inspect_ai/_cli/sandbox.py +4 -1
  9. inspect_ai/_cli/score.py +181 -32
  10. inspect_ai/_cli/trace.py +10 -0
  11. inspect_ai/_cli/view.py +4 -2
  12. inspect_ai/_display/core/active.py +2 -3
  13. inspect_ai/_display/core/config.py +7 -1
  14. inspect_ai/_display/textual/widgets/samples.py +4 -3
  15. inspect_ai/_display/textual/widgets/sandbox.py +6 -0
  16. inspect_ai/_eval/eval.py +104 -101
  17. inspect_ai/_eval/evalset.py +75 -75
  18. inspect_ai/_eval/loader.py +122 -12
  19. inspect_ai/_eval/registry.py +1 -1
  20. inspect_ai/_eval/run.py +14 -0
  21. inspect_ai/_eval/score.py +125 -36
  22. inspect_ai/_eval/task/log.py +105 -4
  23. inspect_ai/_eval/task/results.py +92 -38
  24. inspect_ai/_eval/task/run.py +9 -2
  25. inspect_ai/_eval/task/sandbox.py +35 -2
  26. inspect_ai/_eval/task/task.py +49 -46
  27. inspect_ai/_util/constants.py +1 -1
  28. inspect_ai/_util/content.py +8 -0
  29. inspect_ai/_util/error.py +2 -0
  30. inspect_ai/_util/file.py +15 -1
  31. inspect_ai/_util/hash.py +1 -1
  32. inspect_ai/_util/logger.py +4 -2
  33. inspect_ai/_util/registry.py +7 -1
  34. inspect_ai/_view/view.py +1 -2
  35. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  36. inspect_ai/_view/www/.vscode/settings.json +8 -0
  37. inspect_ai/_view/www/App.css +97 -29
  38. inspect_ai/_view/www/README.md +1 -1
  39. inspect_ai/_view/www/dist/assets/index.css +16663 -14674
  40. inspect_ai/_view/www/dist/assets/index.js +58808 -51348
  41. inspect_ai/_view/www/dist/index.html +1 -1
  42. inspect_ai/_view/www/index.html +2 -2
  43. inspect_ai/_view/www/log-schema.json +87 -73
  44. inspect_ai/_view/www/package.json +22 -4
  45. inspect_ai/_view/www/postcss.config.cjs +8 -9
  46. inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
  47. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  48. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  49. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  50. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  51. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  52. inspect_ai/_view/www/src/api/index.ts +4 -4
  53. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  54. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  55. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  56. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  57. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  58. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  59. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  60. inspect_ai/_view/www/src/components/Card.css +60 -0
  61. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  62. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  63. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  64. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  65. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  66. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  67. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  68. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  69. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  70. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  71. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  72. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  73. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  74. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  75. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  76. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  77. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  78. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  79. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  80. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  81. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  82. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  83. inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
  84. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  85. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  86. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  87. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  88. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  89. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  90. inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
  91. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  92. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  93. inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
  94. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  95. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  96. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  97. inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
  98. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  99. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  100. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  101. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  102. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  103. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
  104. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  105. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  106. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  107. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  108. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  109. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  110. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  111. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  112. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  113. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  114. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  115. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  116. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  117. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  118. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
  119. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  120. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  121. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  122. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  123. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  124. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  126. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  127. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
  128. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  129. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
  130. inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
  131. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  132. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  133. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  134. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  135. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  136. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
  137. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  138. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  139. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  140. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
  141. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  142. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
  143. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  144. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
  145. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  146. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
  147. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  148. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
  149. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  150. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  151. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
  152. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
  153. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  154. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  155. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  156. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  157. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  158. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  159. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  160. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  161. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  162. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  163. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  164. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  165. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  166. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  167. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  168. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  169. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  170. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  171. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  172. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  173. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  174. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  175. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  176. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  177. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  179. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  180. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  181. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  182. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
  183. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  184. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
  185. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  186. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  187. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  188. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  189. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  190. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  191. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  192. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  193. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  194. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  196. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  197. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  198. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  199. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  200. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  201. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  202. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  203. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  204. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  205. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  206. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
  207. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  208. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  209. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  210. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  211. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  212. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  213. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  214. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  215. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
  216. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  217. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  218. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  219. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  220. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  221. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
  222. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  223. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
  224. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  225. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  226. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  227. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  228. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  229. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
  230. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  231. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  232. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
  233. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  234. inspect_ai/_view/www/src/types/log.d.ts +108 -19
  235. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  236. inspect_ai/_view/www/src/types.ts +71 -0
  237. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
  238. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  239. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
  240. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  241. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  242. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  243. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  244. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  245. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  246. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  247. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  248. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  249. inspect_ai/_view/www/src/utils/format.ts +194 -0
  250. inspect_ai/_view/www/src/utils/git.ts +7 -0
  251. inspect_ai/_view/www/src/utils/html.ts +6 -0
  252. inspect_ai/_view/www/src/utils/http.ts +14 -0
  253. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  254. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  255. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  256. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  257. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  258. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  259. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  260. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  261. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
  262. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  263. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  264. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  265. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  266. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  267. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
  268. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
  269. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
  270. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  271. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
  272. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  273. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  274. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  275. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  276. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  277. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  278. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  279. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  280. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  281. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  282. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  283. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
  284. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  285. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
  286. inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
  287. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
  288. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  289. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  290. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  291. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  292. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  293. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  294. inspect_ai/_view/www/src/workspace/utils.ts +34 -0
  295. inspect_ai/_view/www/tsconfig.json +23 -9
  296. inspect_ai/_view/www/vite.config.js +8 -17
  297. inspect_ai/_view/www/yarn.lock +627 -556
  298. inspect_ai/approval/_approval.py +2 -0
  299. inspect_ai/approval/_approver.py +4 -4
  300. inspect_ai/approval/_auto.py +1 -1
  301. inspect_ai/approval/_human/approver.py +3 -0
  302. inspect_ai/approval/_policy.py +5 -0
  303. inspect_ai/approval/_registry.py +2 -2
  304. inspect_ai/dataset/_dataset.py +64 -37
  305. inspect_ai/dataset/_sources/__init__.py +0 -0
  306. inspect_ai/dataset/_sources/csv.py +20 -12
  307. inspect_ai/dataset/_sources/file.py +4 -0
  308. inspect_ai/dataset/_sources/hf.py +39 -29
  309. inspect_ai/dataset/_sources/json.py +17 -9
  310. inspect_ai/log/__init__.py +2 -0
  311. inspect_ai/log/_convert.py +3 -3
  312. inspect_ai/log/_file.py +24 -9
  313. inspect_ai/log/_log.py +101 -13
  314. inspect_ai/log/_message.py +4 -2
  315. inspect_ai/log/_recorders/file.py +4 -0
  316. inspect_ai/log/_recorders/json.py +5 -7
  317. inspect_ai/log/_recorders/recorder.py +3 -0
  318. inspect_ai/log/_transcript.py +19 -8
  319. inspect_ai/model/__init__.py +2 -0
  320. inspect_ai/model/_cache.py +39 -21
  321. inspect_ai/model/_call_tools.py +4 -3
  322. inspect_ai/model/_chat_message.py +14 -4
  323. inspect_ai/model/_generate_config.py +1 -1
  324. inspect_ai/model/_model.py +31 -24
  325. inspect_ai/model/_model_output.py +14 -1
  326. inspect_ai/model/_openai.py +10 -18
  327. inspect_ai/model/_providers/anthropic.py +3 -3
  328. inspect_ai/model/_providers/google.py +9 -5
  329. inspect_ai/model/_providers/openai.py +5 -9
  330. inspect_ai/model/_providers/openai_o1.py +3 -5
  331. inspect_ai/model/_providers/openrouter.py +86 -0
  332. inspect_ai/model/_providers/providers.py +11 -0
  333. inspect_ai/scorer/__init__.py +6 -1
  334. inspect_ai/scorer/_answer.py +7 -7
  335. inspect_ai/scorer/_classification.py +38 -18
  336. inspect_ai/scorer/_common.py +2 -8
  337. inspect_ai/scorer/_match.py +4 -5
  338. inspect_ai/scorer/_metric.py +87 -28
  339. inspect_ai/scorer/_metrics/__init__.py +3 -3
  340. inspect_ai/scorer/_metrics/accuracy.py +8 -10
  341. inspect_ai/scorer/_metrics/mean.py +3 -17
  342. inspect_ai/scorer/_metrics/std.py +111 -30
  343. inspect_ai/scorer/_model.py +12 -12
  344. inspect_ai/scorer/_pattern.py +3 -3
  345. inspect_ai/scorer/_reducer/reducer.py +36 -21
  346. inspect_ai/scorer/_reducer/registry.py +2 -2
  347. inspect_ai/scorer/_reducer/types.py +7 -1
  348. inspect_ai/scorer/_score.py +11 -1
  349. inspect_ai/scorer/_scorer.py +110 -16
  350. inspect_ai/solver/__init__.py +1 -1
  351. inspect_ai/solver/_basic_agent.py +19 -22
  352. inspect_ai/solver/_bridge/__init__.py +0 -3
  353. inspect_ai/solver/_bridge/bridge.py +3 -3
  354. inspect_ai/solver/_chain.py +1 -2
  355. inspect_ai/solver/_critique.py +3 -3
  356. inspect_ai/solver/_fork.py +2 -2
  357. inspect_ai/solver/_human_agent/__init__.py +0 -0
  358. inspect_ai/solver/_human_agent/agent.py +5 -8
  359. inspect_ai/solver/_human_agent/commands/clock.py +14 -10
  360. inspect_ai/solver/_human_agent/commands/note.py +1 -1
  361. inspect_ai/solver/_human_agent/commands/score.py +0 -11
  362. inspect_ai/solver/_multiple_choice.py +38 -26
  363. inspect_ai/solver/_prompt.py +7 -7
  364. inspect_ai/solver/_solver.py +53 -52
  365. inspect_ai/solver/_task_state.py +80 -69
  366. inspect_ai/solver/_use_tools.py +9 -9
  367. inspect_ai/tool/__init__.py +4 -1
  368. inspect_ai/tool/_tool.py +43 -14
  369. inspect_ai/tool/_tool_call.py +6 -2
  370. inspect_ai/tool/_tool_choice.py +3 -1
  371. inspect_ai/tool/_tool_def.py +10 -8
  372. inspect_ai/tool/_tool_params.py +24 -0
  373. inspect_ai/tool/_tool_with.py +7 -7
  374. inspect_ai/tool/_tools/__init__.py +0 -0
  375. inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
  376. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
  377. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  378. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  379. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  380. inspect_ai/tool/_tools/_execute.py +23 -11
  381. inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
  382. inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
  383. inspect_ai/tool/_tools/_web_search.py +7 -5
  384. inspect_ai/tool/beta.py +3 -0
  385. inspect_ai/util/_concurrency.py +3 -3
  386. inspect_ai/util/_panel.py +2 -0
  387. inspect_ai/util/_resource.py +12 -12
  388. inspect_ai/util/_sandbox/docker/compose.py +23 -20
  389. inspect_ai/util/_sandbox/docker/config.py +2 -1
  390. inspect_ai/util/_sandbox/docker/docker.py +42 -86
  391. inspect_ai/util/_sandbox/docker/service.py +100 -0
  392. inspect_ai/util/_sandbox/environment.py +99 -96
  393. inspect_ai/util/_sandbox/self_check.py +124 -16
  394. inspect_ai/util/_subprocess.py +5 -3
  395. inspect_ai/util/_subtask.py +15 -16
  396. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
  397. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
  398. inspect_ai-0.3.64.dist-info/RECORD +625 -0
  399. inspect_ai/_view/www/src/Register.mjs +0 -3
  400. inspect_ai/_view/www/src/Types.mjs +0 -38
  401. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  402. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  403. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  404. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  405. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  406. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  407. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  408. inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
  409. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  410. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  411. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  412. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  413. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  414. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  415. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  416. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  417. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  418. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  419. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  420. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  421. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  422. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  423. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  424. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  425. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  426. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  427. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  428. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  429. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  430. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  431. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  432. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  433. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  434. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  435. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  436. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  437. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  438. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  439. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  440. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  441. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  442. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  443. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  444. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  445. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  446. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  447. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  448. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  449. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  450. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  451. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  452. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  453. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  454. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  455. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  456. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  457. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  458. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  459. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  460. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  461. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  462. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  463. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  464. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  465. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  466. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  467. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  468. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  469. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  470. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  471. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  472. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  473. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  474. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  475. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  476. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  477. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
  478. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  479. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  480. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  481. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  482. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  483. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  484. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  485. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  486. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  487. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  488. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  489. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  490. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  491. inspect_ai/tool/beta/__init__.py +0 -5
  492. inspect_ai-0.3.62.dist-info/RECORD +0 -481
  493. /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
  494. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
  495. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  496. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  497. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  498. /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
  499. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  500. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  501. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  502. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  503. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  504. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  505. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  506. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
  507. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
  508. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  509. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  510. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
  511. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  512. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  513. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  514. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  515. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  516. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
  517. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
  518. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -1,771 +0,0 @@
1
- import { html } from "htm/preact";
2
- import { FontSize } from "../appearance/Fonts.mjs";
3
- import { ApplicationStyles } from "../appearance/Styles.mjs";
4
- import {
5
- formatPrettyDecimal,
6
- formatDecimalNoTrailingZeroes,
7
- inputString,
8
- arrayToString,
9
- } from "../utils/Format.mjs";
10
- import { RenderedContent } from "../components/RenderedContent/RenderedContent.mjs";
11
- import { isNumeric } from "../utils/Type.mjs";
12
- import {
13
- kScoreTypeCategorical,
14
- kScoreTypeNumeric,
15
- kScoreTypeObject,
16
- kScoreTypeOther,
17
- kScoreTypePassFail,
18
- } from "../constants.mjs";
19
-
20
- /**
21
- * Represents a utility summary of the samples that doesn't change with the selected score.
22
- * @typedef {Object} EvalDescriptor
23
- * @property {number} epochs - The number of epochs.
24
- * @property {import("../api/Types.ts").SampleSummary[]} samples - The list of sample summaries.
25
- * @property {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
26
- * @property {(sample: import("../api/Types.ts").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => ScorerDescriptor} scorerDescriptor - Returns the scorer descriptor for a sample and a specified scorer.
27
- * @property {(scoreLabel: import("../Types.mjs").ScoreLabel) => ScoreDescriptor} scoreDescriptor - Provides information about the score types and how to render them.
28
- * @property {(sample: import("../api/Types.ts").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => SelectedScore} score - Returns information about a score for a sample.
29
- * @property {(sample: import("../api/Types.ts").BasicSampleData, scorer: string) => string} scoreAnswer - Returns the answer for a sample and a specified scorer.
30
- */
31
-
32
- /**
33
- * Represents a utility summary of the samples.
34
- * @typedef {Object} SamplesDescriptor
35
- * @property {EvalDescriptor} evalDescriptor - The EvalDescriptor.
36
- * @property {MessageShape} messageShape - The normalized sizes of input, target, and answer messages.
37
- * @property {ScoreDescriptor} selectedScoreDescriptor - Provides information about the score types and how to render them.
38
- * @property {(sample: import("../api/Types.ts").BasicSampleData) => SelectedScore} selectedScore - Returns the selected score for a sample.
39
- * @property {(sample: import("../api/Types.ts").BasicSampleData) => ScorerDescriptor} selectedScorerDescriptor - Returns the scorer descriptor for a sample using the selected scorer.
40
- */
41
-
42
- /**
43
- * Provides information about the score types and rendering functions.
44
- * @typedef {Object} ScoreDescriptor
45
- * @property {string} scoreType - The type of the score (e.g., 'numeric', 'categorical', 'boolean').
46
- * @property {Array<Object>} [categories] - The categories for categorical scores.
47
- * @property {number} [min] - The minimum value for numeric scores.
48
- * @property {number} [max] - The maximum value for numeric scores.
49
- * @property {(a: import("../types/log").Value2, b: import("../types/log").Value2) => number} compare - Function to compare two score values.
50
- * @property {(score: import("../types/log").Value2) => any} render - Function to render the score value.
51
- */
52
-
53
- /**
54
- * Provides descriptor functions for a scorer.
55
- * @typedef {Object} ScorerDescriptor
56
- * @property {() => string} metadata - Function to retrieve the metadata of the score.
57
- * @property {() => string} explanation - Function to retrieve the explanation of the score.
58
- * @property {() => string} answer - Function to retrieve the answer associated with the score.
59
- * @property {function(): Array<{name: string, rendered: function(): any}>} scores - Function to retrieve scores with their render functions.
60
- */
61
-
62
- /**
63
- * Represents a score for a sample, including its value and render function.
64
- * @typedef {Object} SelectedScore
65
- * @property {import("../types/log").Value2} value - The value of the selected score.
66
- * @property {function(): any} render - Function to render the selected score.
67
- */
68
-
69
- /**
70
- * Describes the shape of the messages based on their sizes.
71
- * @typedef {Object} MessageShape
72
- * @property {Object} raw
73
- * @property {number} raw.id - Normalized size of the id
74
- * @property {number} raw.input - Normalized size of the input message.
75
- * @property {number} raw.target - Normalized size of the target message.
76
- * @property {number} raw.answer - Normalized size of the answer message.
77
- * @property {number} raw.limit - Normalized size of the limit message.
78
- * @property {Object} normalized
79
- * @property {number} normalized.id - Normalized size of the id
80
- * @property {number} normalized.input - Normalized size of the input message.
81
- * @property {number} normalized.target - Normalized size of the target message.
82
- * @property {number} normalized.answer - Normalized size of the answer message.
83
- * @property {number} normalized.limit - Normalized size of the limit message.
84
- */
85
-
86
- /**
87
- * @param {import("../Types.mjs").ScoreLabel | undefined} scoreLabel
88
- * @returns {string}
89
- */
90
- export const scoreLabelKey = (scoreLabel) => {
91
- if (!scoreLabel) {
92
- return "No score key";
93
- }
94
- return `${scoreLabel.scorer}.${scoreLabel.name}`;
95
- };
96
-
97
- /**
98
- * @param {string} key
99
- * @returns {import("../Types.mjs").ScoreLabel | undefined}
100
- */
101
- export const parseScoreLabelKey = (key) => {
102
- if (key == "No score key") {
103
- return undefined;
104
- }
105
- const [scorer, name] = key.split(".");
106
- return { scorer, name };
107
- };
108
-
109
- /**
110
- * @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
111
- * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
112
- * @param {number} epochs - The number of epochs
113
- * @returns {EvalDescriptor} The EvalDescriptor
114
- */
115
- export const createEvalDescriptor = (scores, samples, epochs) => {
116
- if (!samples) {
117
- return undefined;
118
- }
119
-
120
- /**
121
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
122
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel - the score label
123
- * @returns {import("../types/log").Value2} The Score
124
- */
125
- const scoreValue = (sample, scoreLabel) => {
126
- // no scores, no value
127
- if (Object.keys(sample.scores).length === 0 || !scoreLabel) {
128
- return undefined;
129
- }
130
-
131
- if (
132
- scoreLabel.scorer !== scoreLabel.name &&
133
- sample.scores[scoreLabel.scorer] &&
134
- sample.scores[scoreLabel.scorer].value
135
- ) {
136
- return sample.scores[scoreLabel.scorer].value[scoreLabel.name];
137
- } else if (sample.scores[scoreLabel.name]) {
138
- return sample.scores[scoreLabel.name].value;
139
- } else {
140
- return undefined;
141
- }
142
- };
143
-
144
- /**
145
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
146
- * @param {string} scorer - the scorer name
147
- * @returns {string} The answer
148
- */
149
- const scoreAnswer = (sample, scorer) => {
150
- if (sample) {
151
- const sampleScore = sample.scores[scorer];
152
- if (sampleScore && sampleScore.answer) {
153
- return sampleScore.answer;
154
- }
155
- } else {
156
- return undefined;
157
- }
158
- };
159
-
160
- /**
161
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
162
- * @param {string} scorer - the scorer name
163
- * @returns {string} The explanation
164
- */
165
- const scoreExplanation = (sample, scorer) => {
166
- if (sample) {
167
- const sampleScore = sample.scores[scorer];
168
- if (sampleScore && sampleScore.explanation) {
169
- return sampleScore.explanation;
170
- }
171
- }
172
- return undefined;
173
- };
174
-
175
- // Retrieve the metadata for a sample
176
- /**
177
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
178
- * @param {string} scorer - the scorer name
179
- * @returns {Object} The explanation
180
- */
181
- const scoreMetadata = (sample, scorer) => {
182
- if (sample) {
183
- const sampleScore = sample.scores[scorer];
184
- if (sampleScore && sampleScore.metadata) {
185
- return sampleScore.metadata;
186
- }
187
- }
188
- return undefined;
189
- };
190
-
191
- /**
192
- * The EvalDescriptor is memoized. Compute all descriptors now to avoid duplicate work.
193
- * @type {Map<string, ScoreDescriptor>}
194
- */
195
- const scoreDescriptorMap = new Map();
196
- for (const scoreLabel of scores) {
197
- const uniqScoreValues = [
198
- ...new Set(
199
- samples
200
- .filter((sample) => !!sample.scores)
201
- .filter((sample) => {
202
- // There is no selected scorer, so include this value
203
- if (!scoreLabel) {
204
- return true;
205
- }
206
-
207
- if (scoreLabel.scorer !== scoreLabel.name) {
208
- return (
209
- Object.keys(sample.scores).includes(scoreLabel.scorer) &&
210
- Object.keys(sample.scores[scoreLabel.scorer].value).includes(
211
- scoreLabel.name,
212
- )
213
- );
214
- } else {
215
- return Object.keys(sample.scores).includes(scoreLabel.name);
216
- }
217
- })
218
- .map((sample) => {
219
- return scoreValue(sample, scoreLabel);
220
- })
221
- .filter((value) => {
222
- return value !== null;
223
- }),
224
- ),
225
- ];
226
- const uniqScoreTypes = [
227
- ...new Set(uniqScoreValues.map((scoreValue) => typeof scoreValue)),
228
- ];
229
-
230
- for (const categorizer of scoreCategorizers) {
231
- const scoreDescriptor = categorizer.describe(
232
- uniqScoreValues,
233
- uniqScoreTypes,
234
- );
235
- if (scoreDescriptor) {
236
- scoreDescriptorMap.set(scoreLabelKey(scoreLabel), scoreDescriptor);
237
- break;
238
- }
239
- }
240
- }
241
-
242
- /**
243
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
244
- * @returns {ScoreDescriptor | undefined}
245
- */
246
- const scoreDescriptor = (scoreLabel) => {
247
- return scoreDescriptorMap.get(scoreLabelKey(scoreLabel));
248
- };
249
-
250
- /**
251
- * @param {import("../api/Types.ts").BasicSampleData} sample
252
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
253
- * @returns {any}
254
- */
255
- const scoreRendered = (sample, scoreLabel) => {
256
- const descriptor = scoreDescriptor(scoreLabel);
257
- const score = scoreValue(sample, scoreLabel);
258
- if (score === null || score === "undefined") {
259
- return "null";
260
- } else if (descriptor && descriptor.render) {
261
- return descriptor.render(score);
262
- } else {
263
- return score;
264
- }
265
- };
266
-
267
- /**
268
- * @param {import("../api/Types.ts").BasicSampleData} sample
269
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
270
- * @returns {ScorerDescriptor}
271
- */
272
- const scorerDescriptor = (sample, scoreLabel) => {
273
- return {
274
- metadata: () => {
275
- return scoreMetadata(sample, scoreLabel.scorer);
276
- },
277
- explanation: () => {
278
- return scoreExplanation(sample, scoreLabel.scorer);
279
- },
280
- answer: () => {
281
- return scoreAnswer(sample, scoreLabel.scorer);
282
- },
283
- scores: () => {
284
- if (!sample || !sample.scores) {
285
- return [];
286
- }
287
- const myScoreDescriptor = scoreDescriptor(scoreLabel);
288
- if (!myScoreDescriptor) {
289
- return [];
290
- }
291
-
292
- // Make a list of all the valid score names (this is
293
- // used to distinguish between dictionaries that contain
294
- // scores that should be treated as standlone scores and
295
- // dictionaries that just contain random values, which is allowed)
296
- const scoreNames = scores.map((score) => {
297
- return score.name;
298
- });
299
- const sampleScorer = sample.scores[scoreLabel.scorer];
300
- const scoreVal = sampleScorer.value;
301
-
302
- if (typeof scoreVal === "object") {
303
- const names = Object.keys(scoreVal);
304
-
305
- // See if this is a dictionary of score names
306
- // if any of the score names match, treat it
307
- // as a scorer dictionary
308
- if (
309
- names.find((name) => {
310
- return scoreNames.includes(name);
311
- })
312
- ) {
313
- // Since this dictionary contains keys which are scores
314
- // we actually render the individual scores
315
- const scores = names.map((name) => {
316
- return {
317
- name,
318
- rendered: () => {
319
- return myScoreDescriptor.render(scoreVal[name]);
320
- },
321
- };
322
- });
323
- return scores;
324
- } else {
325
- // Since this dictionary contains keys which are not scores
326
- // we just treat it like an opaque dictionary
327
- return [
328
- {
329
- name: scoreLabel.scorer,
330
- rendered: () => {
331
- return myScoreDescriptor.render(scoreVal);
332
- },
333
- },
334
- ];
335
- }
336
- } else {
337
- return [
338
- {
339
- name: scoreLabel.scorer,
340
- rendered: () => {
341
- return myScoreDescriptor.render(scoreVal);
342
- },
343
- },
344
- ];
345
- }
346
- },
347
- };
348
- };
349
-
350
- /**
351
- * @param {import("../api/Types.ts").BasicSampleData} sample
352
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
353
- * @returns {SelectedScore}
354
- */
355
- const score = (sample, scoreLabel) => {
356
- return {
357
- value: scoreValue(sample, scoreLabel),
358
- render: () => {
359
- return scoreRendered(sample, scoreLabel);
360
- },
361
- };
362
- };
363
-
364
- return {
365
- epochs,
366
- samples,
367
- scores,
368
- scorerDescriptor,
369
- scoreDescriptor,
370
- score,
371
- scoreAnswer,
372
- };
373
- };
374
-
375
- /**
376
- * Provides a utility summary of the samples
377
- *
378
- * @param {EvalDescriptor} evalDescriptor - The EvalDescriptor.
379
- * @param {import("../Types.mjs").ScoreLabel} selectedScore - Selected score.
380
- * @returns {SamplesDescriptor} - The SamplesDescriptor.
381
- */
382
- export const createSamplesDescriptor = (evalDescriptor, selectedScore) => {
383
- if (!evalDescriptor) {
384
- return undefined;
385
- }
386
-
387
- // Find the total length of the value so we can compute an average
388
- const sizes = evalDescriptor.samples.reduce(
389
- (previous, current) => {
390
- const text = inputString(current.input).join(" ");
391
- const scoreValue = evalDescriptor.score(current, selectedScore).value;
392
- const scoreText = scoreValue
393
- ? String(scoreValue)
394
- : current.error
395
- ? String(current.error)
396
- : "";
397
- previous[0] = Math.min(Math.max(previous[0], text.length), 300);
398
- previous[1] = Math.min(
399
- Math.max(previous[1], arrayToString(current.target).length),
400
- 300,
401
- );
402
- previous[2] = Math.min(
403
- Math.max(
404
- previous[2],
405
- evalDescriptor.scoreAnswer(current, selectedScore?.name)?.length || 0,
406
- ),
407
- 300,
408
- );
409
- previous[3] = Math.min(
410
- Math.max(previous[3], current.limit ? current.limit.length : 0),
411
- 50,
412
- );
413
- previous[4] = Math.min(
414
- Math.max(previous[4], String(current.id).length),
415
- 10,
416
- );
417
- previous[5] = Math.min(Math.max(previous[5], scoreText.length), 30);
418
-
419
- return previous;
420
- },
421
- [0, 0, 0, 0, 0, 0],
422
- );
423
-
424
- // normalize to base 1
425
- const maxSizes = {
426
- input: Math.min(sizes[0], 300),
427
- target: Math.min(sizes[1], 300),
428
- answer: Math.min(sizes[2], 300),
429
- limit: Math.min(sizes[3], 50),
430
- id: Math.min(sizes[4], 10),
431
- score: Math.min(sizes[4], 30),
432
- };
433
- const base =
434
- maxSizes.input +
435
- maxSizes.target +
436
- maxSizes.answer +
437
- maxSizes.limit +
438
- maxSizes.id +
439
- maxSizes.score || 1;
440
- const messageShape = {
441
- raw: {
442
- input: sizes[0],
443
- target: sizes[1],
444
- answer: sizes[2],
445
- limit: sizes[3],
446
- id: sizes[4],
447
- score: sizes[5],
448
- },
449
- normalized: {
450
- input: maxSizes.input / base,
451
- target: maxSizes.target / base,
452
- answer: maxSizes.answer / base,
453
- limit: maxSizes.limit / base,
454
- id: maxSizes.id / base,
455
- score: maxSizes.score / base,
456
- },
457
- };
458
-
459
- return {
460
- evalDescriptor,
461
- messageShape,
462
- selectedScoreDescriptor: evalDescriptor.scoreDescriptor(selectedScore),
463
- selectedScore: (sample) => evalDescriptor.score(sample, selectedScore),
464
- selectedScorerDescriptor: (sample) =>
465
- evalDescriptor.scorerDescriptor(sample, selectedScore),
466
- };
467
- };
468
-
469
- /**
470
- * @typedef {Object} ScoreCategorizer
471
- * @property {(values: import("../types/log").Value2[], types?: ("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]) => ScoreDescriptor} describe
472
- */
473
- const scoreCategorizers = [
474
- {
475
- /**
476
- * @param {import("../types/log").Value2[]} values - the currently selected score
477
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
478
- * @returns {ScoreDescriptor} a ScoreDescriptor
479
- */
480
- describe: (values, types) => {
481
- if (types.length === 1 && types[0] === "boolean") {
482
- return booleanScoreCategorizer();
483
- }
484
- },
485
- },
486
- {
487
- /**
488
- * @param {import("../types/log").Value2[]} values - the currently selected score
489
- * @returns {ScoreDescriptor} a ScoreDescriptor
490
- */
491
- describe: (values) => {
492
- if (
493
- values.length === 2 &&
494
- values.every((val) => {
495
- return val === 1 || val === 0;
496
- })
497
- ) {
498
- return booleanScoreCategorizer();
499
- }
500
- },
501
- },
502
- {
503
- /**
504
- * @param {import("../types/log").Value2[]} values - the currently selected score
505
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
506
- * @returns {ScoreDescriptor} a ScoreDescriptor
507
- */
508
- describe: (values, types) => {
509
- if (
510
- types[0] === "string" &&
511
- types.length === 1 &&
512
- values.length < 5 &&
513
- !values.find((val) => {
514
- return val !== "I" && val !== "C" && val !== "P" && val !== "N";
515
- })
516
- ) {
517
- return passFailScoreCategorizer(values);
518
- }
519
- },
520
- },
521
- {
522
- /**
523
- * @param {import("../types/log").Value2[]} values - the currently selected score
524
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
525
- * @returns {ScoreDescriptor} a ScoreDescriptor
526
- */
527
- describe: (values, types) => {
528
- if (values.length < 10 && types.length === 1 && types[0] === "string") {
529
- return {
530
- scoreType: kScoreTypeCategorical,
531
- categories: values,
532
- compare: (a, b) => {
533
- return String(a).localeCompare(String(b));
534
- },
535
- render: (score) => {
536
- return score;
537
- },
538
- };
539
- }
540
- },
541
- },
542
- {
543
- /**
544
- * @param {import("../types/log").Value2[]} values - the currently selected score
545
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
546
- * @returns {ScoreDescriptor} a ScoreDescriptor
547
- */
548
- describe: (values, types) => {
549
- if (types.length !== 0 && types[0] === "number") {
550
- const onlyNumeric = values.filter((val) => {
551
- return typeof val === "number";
552
- });
553
-
554
- return {
555
- scoreType: kScoreTypeNumeric,
556
- min: Math.min(...onlyNumeric),
557
- max: Math.max(...onlyNumeric),
558
- compare: (a, b) => {
559
- if (typeof a === "number" && typeof b === "number") {
560
- return a - b;
561
- } else {
562
- console.warn(
563
- "Comparing non-numerics using a nuermic score descriptor",
564
- );
565
- return 0;
566
- }
567
- },
568
- render: (score) => {
569
- return formatDecimalNoTrailingZeroes(Number(score));
570
- },
571
- };
572
- }
573
- },
574
- },
575
- {
576
- /**
577
- * @param {import("../types/log").Value2[]} values - the currently selected score
578
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
579
- * @returns {ScoreDescriptor} a ScoreDescriptor
580
- */
581
- describe: (values, types) => {
582
- if (types.length !== 0 && types[0] === "object") {
583
- const buckets = values.map((val) => {
584
- return JSON.stringify(val);
585
- });
586
- const vals = new Set(buckets);
587
- let categories = undefined;
588
- if (vals.size < 10) {
589
- categories = Array.from(vals).map((val) => {
590
- return {
591
- val,
592
- text: val,
593
- };
594
- });
595
- }
596
-
597
- return {
598
- scoreType: kScoreTypeObject,
599
- categories,
600
- compare: () => {
601
- return 0;
602
- },
603
- render: (score) => {
604
- if (score === null || score === undefined) {
605
- return "[null]";
606
- }
607
-
608
- const scores = [];
609
- const keys = Object.keys(score);
610
- keys.forEach((key, index) => {
611
- const value = score[key];
612
- const formattedValue = isNumeric(value)
613
- ? formatPrettyDecimal(parseFloat(value))
614
- : value;
615
- const style = {
616
- display: "flex",
617
- flexDirection: "column",
618
- alignItems: "center",
619
- marginLeft: "0.5rem",
620
- };
621
- if (index + 1 < keys.length) {
622
- style["paddingBottom"] = "1em";
623
- }
624
- scores.push(html`
625
- <div style=${style}>
626
- <div style=${{ fontSize: FontSize.smaller, fontWeight: 300 }}>
627
- ${key}
628
- </div>
629
- <div style=${{ fontSize: FontSize.title, fontWeight: 600 }}>
630
- ${formattedValue}
631
- </div>
632
- </div>
633
- `);
634
- });
635
-
636
- return scores;
637
- },
638
- };
639
- }
640
- },
641
- },
642
- {
643
- /**
644
- * @returns {ScoreDescriptor} a ScoreDescriptor
645
- */
646
- // @ts-ignore
647
- describe: () => {
648
- return {
649
- scoreType: kScoreTypeOther,
650
- compare: () => {
651
- return 0;
652
- },
653
- render: (score) => {
654
- return html`<${RenderedContent}
655
- id="other-score-value"
656
- entry=${{ value: score }}
657
- />`;
658
- },
659
- };
660
- },
661
- },
662
- ];
663
-
664
- const filledCircleStyle = {
665
- fontSize: FontSize.small,
666
- fontFamily: "Consola Regular",
667
- width: "20px",
668
- height: "20px",
669
- display: "inline-flex",
670
- justifyContent: "center",
671
- alignItems: "center",
672
- borderRadius: "50%",
673
- paddingTop: "1px",
674
- };
675
-
676
- const booleanScoreCategorizer = () => {
677
- return {
678
- scoreType: "boolean",
679
- compare: (a, b) => {
680
- return Number(a.value) - Number(b.value);
681
- },
682
- render: (score) => {
683
- const scoreColorStyle = score
684
- ? ApplicationStyles.scoreFills.green
685
- : ApplicationStyles.scoreFills.red;
686
-
687
- return html`<span
688
- style=${{
689
- ...scoreColorStyle,
690
- ...filledCircleStyle,
691
- }}
692
- >${score}</span
693
- >`;
694
- },
695
- };
696
- };
697
-
698
- const passFailScoreCategorizer = (values) => {
699
- const categories = [];
700
- if (values.includes("C")) {
701
- categories.push({
702
- val: "C",
703
- text: "Correct",
704
- });
705
- }
706
- if (values.includes("P")) {
707
- categories.push({
708
- val: "P",
709
- text: "Partial",
710
- });
711
- }
712
- if (values.includes("I")) {
713
- categories.push({
714
- val: "I",
715
- text: "Incorrect",
716
- });
717
- }
718
- if (values.includes("N")) {
719
- categories.push({
720
- val: "N",
721
- text: "Refusal",
722
- });
723
- }
724
- const order = ["C", "P", "I", "N"];
725
-
726
- return {
727
- scoreType: kScoreTypePassFail,
728
- categories,
729
- render: (score) => {
730
- if (score === "C") {
731
- return html`<span
732
- style=${{
733
- ...ApplicationStyles.scoreFills.green,
734
- ...filledCircleStyle,
735
- }}
736
- >C</span
737
- >`;
738
- } else if (score === "I") {
739
- return html`<span
740
- style=${{
741
- ...ApplicationStyles.scoreFills.red,
742
- ...filledCircleStyle,
743
- }}
744
- >I</span
745
- >`;
746
- } else if (score === "P") {
747
- return html`<span
748
- style=${{
749
- ...ApplicationStyles.scoreFills.orange,
750
- ...filledCircleStyle,
751
- }}
752
- >P</span
753
- >`;
754
- } else if (score === "N") {
755
- return html`<span
756
- style=${{
757
- ...ApplicationStyles.scoreFills.red,
758
- ...filledCircleStyle,
759
- }}
760
- >N</span
761
- >`;
762
- } else {
763
- return score;
764
- }
765
- },
766
- compare: (a, b) => {
767
- const sort = order.indexOf(a.value) - order.indexOf(b.value);
768
- return sort;
769
- },
770
- };
771
- };