inspect-ai 0.3.62__py3-none-any.whl → 0.3.63__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (415) hide show
  1. inspect_ai/_cli/main.py +1 -1
  2. inspect_ai/_cli/trace.py +8 -0
  3. inspect_ai/_display/core/active.py +2 -3
  4. inspect_ai/_eval/eval.py +4 -4
  5. inspect_ai/_eval/evalset.py +6 -6
  6. inspect_ai/_eval/task/run.py +3 -0
  7. inspect_ai/_util/hash.py +1 -1
  8. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  9. inspect_ai/_view/www/.vscode/settings.json +8 -0
  10. inspect_ai/_view/www/App.css +92 -29
  11. inspect_ai/_view/www/dist/assets/index.css +16637 -14676
  12. inspect_ai/_view/www/dist/assets/index.js +58897 -51440
  13. inspect_ai/_view/www/dist/index.html +1 -1
  14. inspect_ai/_view/www/index.html +2 -2
  15. inspect_ai/_view/www/log-schema.json +1 -0
  16. inspect_ai/_view/www/package.json +22 -4
  17. inspect_ai/_view/www/postcss.config.cjs +8 -9
  18. inspect_ai/_view/www/src/{App.mjs → App.tsx} +355 -365
  19. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  20. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  21. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  22. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  23. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  24. inspect_ai/_view/www/src/api/index.ts +4 -4
  25. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  26. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  27. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  28. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  29. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  30. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  31. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  32. inspect_ai/_view/www/src/components/Card.css +60 -0
  33. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  34. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  35. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  36. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  37. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  38. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  39. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  40. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  41. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  42. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  43. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  44. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  46. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  47. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  48. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  49. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  50. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  51. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  52. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  53. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  54. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  55. inspect_ai/_view/www/src/components/LargeModal.tsx +199 -0
  56. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  57. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  58. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  59. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  60. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  61. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  62. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  63. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  64. inspect_ai/_view/www/src/components/NavPills.tsx +99 -0
  65. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  66. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  67. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  68. inspect_ai/_view/www/src/components/TabSet.tsx +200 -0
  69. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  70. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  71. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  72. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  73. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  74. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -7
  75. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  76. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  77. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  78. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  79. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  80. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  81. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  82. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  83. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  84. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  85. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  86. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  87. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  88. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  89. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +309 -0
  90. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  91. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  92. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  93. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  94. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  95. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  96. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  97. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  98. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +326 -0
  99. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  100. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +175 -0
  101. inspect_ai/_view/www/src/samples/SamplesTools.tsx +60 -0
  102. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  103. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  104. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  105. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  106. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  107. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +46 -0
  108. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  109. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  110. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  111. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +143 -0
  112. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  113. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +131 -0
  114. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  115. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +145 -0
  116. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  117. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +86 -0
  118. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  119. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +53 -0
  120. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  121. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  122. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +107 -0
  123. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +363 -0
  124. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  125. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  126. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  127. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  128. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  129. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  130. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  131. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  132. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  133. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  134. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  135. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  136. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  137. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  138. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  139. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  140. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  141. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  142. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  143. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  144. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  145. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  146. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  147. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  148. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  149. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  150. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  151. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  152. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  153. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +173 -0
  154. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  155. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +182 -0
  156. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  157. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  158. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  159. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  160. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  161. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  162. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  163. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  164. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  165. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  166. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  167. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  168. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  169. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  170. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  171. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  172. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  173. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  174. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  175. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  176. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  177. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +108 -0
  178. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  179. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  180. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  181. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  182. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  183. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  184. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  185. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  186. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +91 -0
  187. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  188. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  189. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  190. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  191. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  192. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +38 -0
  193. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  194. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +190 -0
  195. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  196. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  197. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  198. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  199. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  200. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +274 -0
  201. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  202. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  203. inspect_ai/_view/www/src/samples/transcript/state/{StateEventView.mjs → StateEventView.tsx} +148 -110
  204. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  205. inspect_ai/_view/www/src/types/log.d.ts +1 -0
  206. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  207. inspect_ai/_view/www/src/types.ts +71 -0
  208. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +22 -0
  209. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  210. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +95 -0
  211. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  212. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  213. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  214. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  215. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  216. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  217. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  218. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  219. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  220. inspect_ai/_view/www/src/utils/format.ts +194 -0
  221. inspect_ai/_view/www/src/utils/git.ts +7 -0
  222. inspect_ai/_view/www/src/utils/html.ts +6 -0
  223. inspect_ai/_view/www/src/utils/http.ts +14 -0
  224. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  225. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  226. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  227. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  228. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  229. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  230. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  231. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  232. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +160 -0
  233. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  234. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  235. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  236. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  237. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  238. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +113 -0
  239. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +67 -0
  240. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +156 -0
  241. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  242. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +222 -0
  243. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  244. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  245. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  246. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  247. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  248. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  249. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  250. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  251. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  252. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  253. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  254. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +41 -0
  255. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  256. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +61 -0
  257. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +80 -0
  258. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  259. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  260. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  261. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  262. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  263. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  264. inspect_ai/_view/www/tsconfig.json +23 -9
  265. inspect_ai/_view/www/vite.config.js +8 -17
  266. inspect_ai/_view/www/yarn.lock +627 -556
  267. inspect_ai/dataset/_dataset.py +36 -0
  268. inspect_ai/dataset/_sources/csv.py +8 -0
  269. inspect_ai/dataset/_sources/file.py +4 -0
  270. inspect_ai/dataset/_sources/hf.py +11 -1
  271. inspect_ai/dataset/_sources/json.py +8 -0
  272. inspect_ai/log/_log.py +3 -6
  273. inspect_ai/log/_message.py +1 -1
  274. inspect_ai/log/_recorders/json.py +5 -7
  275. inspect_ai/model/_call_tools.py +2 -1
  276. inspect_ai/model/_providers/anthropic.py +3 -3
  277. inspect_ai/model/_providers/openai_o1.py +3 -5
  278. inspect_ai/model/_providers/openrouter.py +86 -0
  279. inspect_ai/model/_providers/providers.py +11 -0
  280. inspect_ai/scorer/_answer.py +7 -7
  281. inspect_ai/scorer/_classification.py +34 -18
  282. inspect_ai/scorer/_common.py +2 -8
  283. inspect_ai/solver/_multiple_choice.py +24 -9
  284. inspect_ai/tool/__init__.py +2 -0
  285. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +2 -5
  286. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  287. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  288. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  289. inspect_ai/tool/_tools/_execute.py +8 -2
  290. inspect_ai/tool/beta.py +3 -0
  291. inspect_ai/util/_sandbox/docker/docker.py +32 -85
  292. inspect_ai/util/_sandbox/self_check.py +124 -16
  293. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.63.dist-info}/METADATA +2 -1
  294. inspect_ai-0.3.63.dist-info/RECORD +618 -0
  295. inspect_ai/_view/www/src/Register.mjs +0 -3
  296. inspect_ai/_view/www/src/Types.mjs +0 -38
  297. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  298. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  299. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  300. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  301. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  302. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  303. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  304. inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
  305. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  306. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  307. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  308. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  309. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  310. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  311. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  312. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  313. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  314. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  315. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  316. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  317. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  318. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  319. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  320. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  321. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  322. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  323. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  324. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  325. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  326. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  327. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  328. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  329. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  330. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  331. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  332. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  333. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  334. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  335. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  336. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  337. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  338. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  339. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  340. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  341. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  342. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  343. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  344. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  345. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  346. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  347. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  348. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  349. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  350. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  351. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  352. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  353. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  354. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  355. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  356. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  357. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  358. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  359. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  360. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  361. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  362. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  363. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  364. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  365. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  366. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  367. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  368. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  369. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  370. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  371. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  372. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  373. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  374. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  375. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  376. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  377. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  378. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  379. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  380. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  381. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  382. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  383. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  384. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  385. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  386. inspect_ai/tool/beta/__init__.py +0 -5
  387. inspect_ai-0.3.62.dist-info/RECORD +0 -481
  388. /inspect_ai/{tool/beta/_computer/_resources/tool/__init__.py → _view/www/src/components/MorePopOver.css} +0 -0
  389. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  390. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _view/www/src/workspace/tabs/InfoTab.module.css} +0 -0
  391. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  392. /inspect_ai/tool/{beta → _tools}/_computer/_common.py +0 -0
  393. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  394. /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
  395. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  396. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  397. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  398. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  399. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  400. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  401. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  402. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
  403. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
  404. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  405. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  406. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
  407. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  408. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  409. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  410. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  411. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  412. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.63.dist-info}/LICENSE +0 -0
  413. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.63.dist-info}/WHEEL +0 -0
  414. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.63.dist-info}/entry_points.txt +0 -0
  415. {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.63.dist-info}/top_level.txt +0 -0
@@ -1,771 +0,0 @@
1
- import { html } from "htm/preact";
2
- import { FontSize } from "../appearance/Fonts.mjs";
3
- import { ApplicationStyles } from "../appearance/Styles.mjs";
4
- import {
5
- formatPrettyDecimal,
6
- formatDecimalNoTrailingZeroes,
7
- inputString,
8
- arrayToString,
9
- } from "../utils/Format.mjs";
10
- import { RenderedContent } from "../components/RenderedContent/RenderedContent.mjs";
11
- import { isNumeric } from "../utils/Type.mjs";
12
- import {
13
- kScoreTypeCategorical,
14
- kScoreTypeNumeric,
15
- kScoreTypeObject,
16
- kScoreTypeOther,
17
- kScoreTypePassFail,
18
- } from "../constants.mjs";
19
-
20
- /**
21
- * Represents a utility summary of the samples that doesn't change with the selected score.
22
- * @typedef {Object} EvalDescriptor
23
- * @property {number} epochs - The number of epochs.
24
- * @property {import("../api/Types.ts").SampleSummary[]} samples - The list of sample summaries.
25
- * @property {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
26
- * @property {(sample: import("../api/Types.ts").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => ScorerDescriptor} scorerDescriptor - Returns the scorer descriptor for a sample and a specified scorer.
27
- * @property {(scoreLabel: import("../Types.mjs").ScoreLabel) => ScoreDescriptor} scoreDescriptor - Provides information about the score types and how to render them.
28
- * @property {(sample: import("../api/Types.ts").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => SelectedScore} score - Returns information about a score for a sample.
29
- * @property {(sample: import("../api/Types.ts").BasicSampleData, scorer: string) => string} scoreAnswer - Returns the answer for a sample and a specified scorer.
30
- */
31
-
32
- /**
33
- * Represents a utility summary of the samples.
34
- * @typedef {Object} SamplesDescriptor
35
- * @property {EvalDescriptor} evalDescriptor - The EvalDescriptor.
36
- * @property {MessageShape} messageShape - The normalized sizes of input, target, and answer messages.
37
- * @property {ScoreDescriptor} selectedScoreDescriptor - Provides information about the score types and how to render them.
38
- * @property {(sample: import("../api/Types.ts").BasicSampleData) => SelectedScore} selectedScore - Returns the selected score for a sample.
39
- * @property {(sample: import("../api/Types.ts").BasicSampleData) => ScorerDescriptor} selectedScorerDescriptor - Returns the scorer descriptor for a sample using the selected scorer.
40
- */
41
-
42
- /**
43
- * Provides information about the score types and rendering functions.
44
- * @typedef {Object} ScoreDescriptor
45
- * @property {string} scoreType - The type of the score (e.g., 'numeric', 'categorical', 'boolean').
46
- * @property {Array<Object>} [categories] - The categories for categorical scores.
47
- * @property {number} [min] - The minimum value for numeric scores.
48
- * @property {number} [max] - The maximum value for numeric scores.
49
- * @property {(a: import("../types/log").Value2, b: import("../types/log").Value2) => number} compare - Function to compare two score values.
50
- * @property {(score: import("../types/log").Value2) => any} render - Function to render the score value.
51
- */
52
-
53
- /**
54
- * Provides descriptor functions for a scorer.
55
- * @typedef {Object} ScorerDescriptor
56
- * @property {() => string} metadata - Function to retrieve the metadata of the score.
57
- * @property {() => string} explanation - Function to retrieve the explanation of the score.
58
- * @property {() => string} answer - Function to retrieve the answer associated with the score.
59
- * @property {function(): Array<{name: string, rendered: function(): any}>} scores - Function to retrieve scores with their render functions.
60
- */
61
-
62
- /**
63
- * Represents a score for a sample, including its value and render function.
64
- * @typedef {Object} SelectedScore
65
- * @property {import("../types/log").Value2} value - The value of the selected score.
66
- * @property {function(): any} render - Function to render the selected score.
67
- */
68
-
69
- /**
70
- * Describes the shape of the messages based on their sizes.
71
- * @typedef {Object} MessageShape
72
- * @property {Object} raw
73
- * @property {number} raw.id - Normalized size of the id
74
- * @property {number} raw.input - Normalized size of the input message.
75
- * @property {number} raw.target - Normalized size of the target message.
76
- * @property {number} raw.answer - Normalized size of the answer message.
77
- * @property {number} raw.limit - Normalized size of the limit message.
78
- * @property {Object} normalized
79
- * @property {number} normalized.id - Normalized size of the id
80
- * @property {number} normalized.input - Normalized size of the input message.
81
- * @property {number} normalized.target - Normalized size of the target message.
82
- * @property {number} normalized.answer - Normalized size of the answer message.
83
- * @property {number} normalized.limit - Normalized size of the limit message.
84
- */
85
-
86
- /**
87
- * @param {import("../Types.mjs").ScoreLabel | undefined} scoreLabel
88
- * @returns {string}
89
- */
90
- export const scoreLabelKey = (scoreLabel) => {
91
- if (!scoreLabel) {
92
- return "No score key";
93
- }
94
- return `${scoreLabel.scorer}.${scoreLabel.name}`;
95
- };
96
-
97
- /**
98
- * @param {string} key
99
- * @returns {import("../Types.mjs").ScoreLabel | undefined}
100
- */
101
- export const parseScoreLabelKey = (key) => {
102
- if (key == "No score key") {
103
- return undefined;
104
- }
105
- const [scorer, name] = key.split(".");
106
- return { scorer, name };
107
- };
108
-
109
- /**
110
- * @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
111
- * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
112
- * @param {number} epochs - The number of epochs
113
- * @returns {EvalDescriptor} The EvalDescriptor
114
- */
115
- export const createEvalDescriptor = (scores, samples, epochs) => {
116
- if (!samples) {
117
- return undefined;
118
- }
119
-
120
- /**
121
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
122
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel - the score label
123
- * @returns {import("../types/log").Value2} The Score
124
- */
125
- const scoreValue = (sample, scoreLabel) => {
126
- // no scores, no value
127
- if (Object.keys(sample.scores).length === 0 || !scoreLabel) {
128
- return undefined;
129
- }
130
-
131
- if (
132
- scoreLabel.scorer !== scoreLabel.name &&
133
- sample.scores[scoreLabel.scorer] &&
134
- sample.scores[scoreLabel.scorer].value
135
- ) {
136
- return sample.scores[scoreLabel.scorer].value[scoreLabel.name];
137
- } else if (sample.scores[scoreLabel.name]) {
138
- return sample.scores[scoreLabel.name].value;
139
- } else {
140
- return undefined;
141
- }
142
- };
143
-
144
- /**
145
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
146
- * @param {string} scorer - the scorer name
147
- * @returns {string} The answer
148
- */
149
- const scoreAnswer = (sample, scorer) => {
150
- if (sample) {
151
- const sampleScore = sample.scores[scorer];
152
- if (sampleScore && sampleScore.answer) {
153
- return sampleScore.answer;
154
- }
155
- } else {
156
- return undefined;
157
- }
158
- };
159
-
160
- /**
161
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
162
- * @param {string} scorer - the scorer name
163
- * @returns {string} The explanation
164
- */
165
- const scoreExplanation = (sample, scorer) => {
166
- if (sample) {
167
- const sampleScore = sample.scores[scorer];
168
- if (sampleScore && sampleScore.explanation) {
169
- return sampleScore.explanation;
170
- }
171
- }
172
- return undefined;
173
- };
174
-
175
- // Retrieve the metadata for a sample
176
- /**
177
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
178
- * @param {string} scorer - the scorer name
179
- * @returns {Object} The explanation
180
- */
181
- const scoreMetadata = (sample, scorer) => {
182
- if (sample) {
183
- const sampleScore = sample.scores[scorer];
184
- if (sampleScore && sampleScore.metadata) {
185
- return sampleScore.metadata;
186
- }
187
- }
188
- return undefined;
189
- };
190
-
191
- /**
192
- * The EvalDescriptor is memoized. Compute all descriptors now to avoid duplicate work.
193
- * @type {Map<string, ScoreDescriptor>}
194
- */
195
- const scoreDescriptorMap = new Map();
196
- for (const scoreLabel of scores) {
197
- const uniqScoreValues = [
198
- ...new Set(
199
- samples
200
- .filter((sample) => !!sample.scores)
201
- .filter((sample) => {
202
- // There is no selected scorer, so include this value
203
- if (!scoreLabel) {
204
- return true;
205
- }
206
-
207
- if (scoreLabel.scorer !== scoreLabel.name) {
208
- return (
209
- Object.keys(sample.scores).includes(scoreLabel.scorer) &&
210
- Object.keys(sample.scores[scoreLabel.scorer].value).includes(
211
- scoreLabel.name,
212
- )
213
- );
214
- } else {
215
- return Object.keys(sample.scores).includes(scoreLabel.name);
216
- }
217
- })
218
- .map((sample) => {
219
- return scoreValue(sample, scoreLabel);
220
- })
221
- .filter((value) => {
222
- return value !== null;
223
- }),
224
- ),
225
- ];
226
- const uniqScoreTypes = [
227
- ...new Set(uniqScoreValues.map((scoreValue) => typeof scoreValue)),
228
- ];
229
-
230
- for (const categorizer of scoreCategorizers) {
231
- const scoreDescriptor = categorizer.describe(
232
- uniqScoreValues,
233
- uniqScoreTypes,
234
- );
235
- if (scoreDescriptor) {
236
- scoreDescriptorMap.set(scoreLabelKey(scoreLabel), scoreDescriptor);
237
- break;
238
- }
239
- }
240
- }
241
-
242
- /**
243
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
244
- * @returns {ScoreDescriptor | undefined}
245
- */
246
- const scoreDescriptor = (scoreLabel) => {
247
- return scoreDescriptorMap.get(scoreLabelKey(scoreLabel));
248
- };
249
-
250
- /**
251
- * @param {import("../api/Types.ts").BasicSampleData} sample
252
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
253
- * @returns {any}
254
- */
255
- const scoreRendered = (sample, scoreLabel) => {
256
- const descriptor = scoreDescriptor(scoreLabel);
257
- const score = scoreValue(sample, scoreLabel);
258
- if (score === null || score === "undefined") {
259
- return "null";
260
- } else if (descriptor && descriptor.render) {
261
- return descriptor.render(score);
262
- } else {
263
- return score;
264
- }
265
- };
266
-
267
- /**
268
- * @param {import("../api/Types.ts").BasicSampleData} sample
269
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
270
- * @returns {ScorerDescriptor}
271
- */
272
- const scorerDescriptor = (sample, scoreLabel) => {
273
- return {
274
- metadata: () => {
275
- return scoreMetadata(sample, scoreLabel.scorer);
276
- },
277
- explanation: () => {
278
- return scoreExplanation(sample, scoreLabel.scorer);
279
- },
280
- answer: () => {
281
- return scoreAnswer(sample, scoreLabel.scorer);
282
- },
283
- scores: () => {
284
- if (!sample || !sample.scores) {
285
- return [];
286
- }
287
- const myScoreDescriptor = scoreDescriptor(scoreLabel);
288
- if (!myScoreDescriptor) {
289
- return [];
290
- }
291
-
292
- // Make a list of all the valid score names (this is
293
- // used to distinguish between dictionaries that contain
294
- // scores that should be treated as standlone scores and
295
- // dictionaries that just contain random values, which is allowed)
296
- const scoreNames = scores.map((score) => {
297
- return score.name;
298
- });
299
- const sampleScorer = sample.scores[scoreLabel.scorer];
300
- const scoreVal = sampleScorer.value;
301
-
302
- if (typeof scoreVal === "object") {
303
- const names = Object.keys(scoreVal);
304
-
305
- // See if this is a dictionary of score names
306
- // if any of the score names match, treat it
307
- // as a scorer dictionary
308
- if (
309
- names.find((name) => {
310
- return scoreNames.includes(name);
311
- })
312
- ) {
313
- // Since this dictionary contains keys which are scores
314
- // we actually render the individual scores
315
- const scores = names.map((name) => {
316
- return {
317
- name,
318
- rendered: () => {
319
- return myScoreDescriptor.render(scoreVal[name]);
320
- },
321
- };
322
- });
323
- return scores;
324
- } else {
325
- // Since this dictionary contains keys which are not scores
326
- // we just treat it like an opaque dictionary
327
- return [
328
- {
329
- name: scoreLabel.scorer,
330
- rendered: () => {
331
- return myScoreDescriptor.render(scoreVal);
332
- },
333
- },
334
- ];
335
- }
336
- } else {
337
- return [
338
- {
339
- name: scoreLabel.scorer,
340
- rendered: () => {
341
- return myScoreDescriptor.render(scoreVal);
342
- },
343
- },
344
- ];
345
- }
346
- },
347
- };
348
- };
349
-
350
- /**
351
- * @param {import("../api/Types.ts").BasicSampleData} sample
352
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
353
- * @returns {SelectedScore}
354
- */
355
- const score = (sample, scoreLabel) => {
356
- return {
357
- value: scoreValue(sample, scoreLabel),
358
- render: () => {
359
- return scoreRendered(sample, scoreLabel);
360
- },
361
- };
362
- };
363
-
364
- return {
365
- epochs,
366
- samples,
367
- scores,
368
- scorerDescriptor,
369
- scoreDescriptor,
370
- score,
371
- scoreAnswer,
372
- };
373
- };
374
-
375
- /**
376
- * Provides a utility summary of the samples
377
- *
378
- * @param {EvalDescriptor} evalDescriptor - The EvalDescriptor.
379
- * @param {import("../Types.mjs").ScoreLabel} selectedScore - Selected score.
380
- * @returns {SamplesDescriptor} - The SamplesDescriptor.
381
- */
382
- export const createSamplesDescriptor = (evalDescriptor, selectedScore) => {
383
- if (!evalDescriptor) {
384
- return undefined;
385
- }
386
-
387
- // Find the total length of the value so we can compute an average
388
- const sizes = evalDescriptor.samples.reduce(
389
- (previous, current) => {
390
- const text = inputString(current.input).join(" ");
391
- const scoreValue = evalDescriptor.score(current, selectedScore).value;
392
- const scoreText = scoreValue
393
- ? String(scoreValue)
394
- : current.error
395
- ? String(current.error)
396
- : "";
397
- previous[0] = Math.min(Math.max(previous[0], text.length), 300);
398
- previous[1] = Math.min(
399
- Math.max(previous[1], arrayToString(current.target).length),
400
- 300,
401
- );
402
- previous[2] = Math.min(
403
- Math.max(
404
- previous[2],
405
- evalDescriptor.scoreAnswer(current, selectedScore?.name)?.length || 0,
406
- ),
407
- 300,
408
- );
409
- previous[3] = Math.min(
410
- Math.max(previous[3], current.limit ? current.limit.length : 0),
411
- 50,
412
- );
413
- previous[4] = Math.min(
414
- Math.max(previous[4], String(current.id).length),
415
- 10,
416
- );
417
- previous[5] = Math.min(Math.max(previous[5], scoreText.length), 30);
418
-
419
- return previous;
420
- },
421
- [0, 0, 0, 0, 0, 0],
422
- );
423
-
424
- // normalize to base 1
425
- const maxSizes = {
426
- input: Math.min(sizes[0], 300),
427
- target: Math.min(sizes[1], 300),
428
- answer: Math.min(sizes[2], 300),
429
- limit: Math.min(sizes[3], 50),
430
- id: Math.min(sizes[4], 10),
431
- score: Math.min(sizes[4], 30),
432
- };
433
- const base =
434
- maxSizes.input +
435
- maxSizes.target +
436
- maxSizes.answer +
437
- maxSizes.limit +
438
- maxSizes.id +
439
- maxSizes.score || 1;
440
- const messageShape = {
441
- raw: {
442
- input: sizes[0],
443
- target: sizes[1],
444
- answer: sizes[2],
445
- limit: sizes[3],
446
- id: sizes[4],
447
- score: sizes[5],
448
- },
449
- normalized: {
450
- input: maxSizes.input / base,
451
- target: maxSizes.target / base,
452
- answer: maxSizes.answer / base,
453
- limit: maxSizes.limit / base,
454
- id: maxSizes.id / base,
455
- score: maxSizes.score / base,
456
- },
457
- };
458
-
459
- return {
460
- evalDescriptor,
461
- messageShape,
462
- selectedScoreDescriptor: evalDescriptor.scoreDescriptor(selectedScore),
463
- selectedScore: (sample) => evalDescriptor.score(sample, selectedScore),
464
- selectedScorerDescriptor: (sample) =>
465
- evalDescriptor.scorerDescriptor(sample, selectedScore),
466
- };
467
- };
468
-
469
- /**
470
- * @typedef {Object} ScoreCategorizer
471
- * @property {(values: import("../types/log").Value2[], types?: ("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]) => ScoreDescriptor} describe
472
- */
473
- const scoreCategorizers = [
474
- {
475
- /**
476
- * @param {import("../types/log").Value2[]} values - the currently selected score
477
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
478
- * @returns {ScoreDescriptor} a ScoreDescriptor
479
- */
480
- describe: (values, types) => {
481
- if (types.length === 1 && types[0] === "boolean") {
482
- return booleanScoreCategorizer();
483
- }
484
- },
485
- },
486
- {
487
- /**
488
- * @param {import("../types/log").Value2[]} values - the currently selected score
489
- * @returns {ScoreDescriptor} a ScoreDescriptor
490
- */
491
- describe: (values) => {
492
- if (
493
- values.length === 2 &&
494
- values.every((val) => {
495
- return val === 1 || val === 0;
496
- })
497
- ) {
498
- return booleanScoreCategorizer();
499
- }
500
- },
501
- },
502
- {
503
- /**
504
- * @param {import("../types/log").Value2[]} values - the currently selected score
505
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
506
- * @returns {ScoreDescriptor} a ScoreDescriptor
507
- */
508
- describe: (values, types) => {
509
- if (
510
- types[0] === "string" &&
511
- types.length === 1 &&
512
- values.length < 5 &&
513
- !values.find((val) => {
514
- return val !== "I" && val !== "C" && val !== "P" && val !== "N";
515
- })
516
- ) {
517
- return passFailScoreCategorizer(values);
518
- }
519
- },
520
- },
521
- {
522
- /**
523
- * @param {import("../types/log").Value2[]} values - the currently selected score
524
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
525
- * @returns {ScoreDescriptor} a ScoreDescriptor
526
- */
527
- describe: (values, types) => {
528
- if (values.length < 10 && types.length === 1 && types[0] === "string") {
529
- return {
530
- scoreType: kScoreTypeCategorical,
531
- categories: values,
532
- compare: (a, b) => {
533
- return String(a).localeCompare(String(b));
534
- },
535
- render: (score) => {
536
- return score;
537
- },
538
- };
539
- }
540
- },
541
- },
542
- {
543
- /**
544
- * @param {import("../types/log").Value2[]} values - the currently selected score
545
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
546
- * @returns {ScoreDescriptor} a ScoreDescriptor
547
- */
548
- describe: (values, types) => {
549
- if (types.length !== 0 && types[0] === "number") {
550
- const onlyNumeric = values.filter((val) => {
551
- return typeof val === "number";
552
- });
553
-
554
- return {
555
- scoreType: kScoreTypeNumeric,
556
- min: Math.min(...onlyNumeric),
557
- max: Math.max(...onlyNumeric),
558
- compare: (a, b) => {
559
- if (typeof a === "number" && typeof b === "number") {
560
- return a - b;
561
- } else {
562
- console.warn(
563
- "Comparing non-numerics using a nuermic score descriptor",
564
- );
565
- return 0;
566
- }
567
- },
568
- render: (score) => {
569
- return formatDecimalNoTrailingZeroes(Number(score));
570
- },
571
- };
572
- }
573
- },
574
- },
575
- {
576
- /**
577
- * @param {import("../types/log").Value2[]} values - the currently selected score
578
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
579
- * @returns {ScoreDescriptor} a ScoreDescriptor
580
- */
581
- describe: (values, types) => {
582
- if (types.length !== 0 && types[0] === "object") {
583
- const buckets = values.map((val) => {
584
- return JSON.stringify(val);
585
- });
586
- const vals = new Set(buckets);
587
- let categories = undefined;
588
- if (vals.size < 10) {
589
- categories = Array.from(vals).map((val) => {
590
- return {
591
- val,
592
- text: val,
593
- };
594
- });
595
- }
596
-
597
- return {
598
- scoreType: kScoreTypeObject,
599
- categories,
600
- compare: () => {
601
- return 0;
602
- },
603
- render: (score) => {
604
- if (score === null || score === undefined) {
605
- return "[null]";
606
- }
607
-
608
- const scores = [];
609
- const keys = Object.keys(score);
610
- keys.forEach((key, index) => {
611
- const value = score[key];
612
- const formattedValue = isNumeric(value)
613
- ? formatPrettyDecimal(parseFloat(value))
614
- : value;
615
- const style = {
616
- display: "flex",
617
- flexDirection: "column",
618
- alignItems: "center",
619
- marginLeft: "0.5rem",
620
- };
621
- if (index + 1 < keys.length) {
622
- style["paddingBottom"] = "1em";
623
- }
624
- scores.push(html`
625
- <div style=${style}>
626
- <div style=${{ fontSize: FontSize.smaller, fontWeight: 300 }}>
627
- ${key}
628
- </div>
629
- <div style=${{ fontSize: FontSize.title, fontWeight: 600 }}>
630
- ${formattedValue}
631
- </div>
632
- </div>
633
- `);
634
- });
635
-
636
- return scores;
637
- },
638
- };
639
- }
640
- },
641
- },
642
- {
643
- /**
644
- * @returns {ScoreDescriptor} a ScoreDescriptor
645
- */
646
- // @ts-ignore
647
- describe: () => {
648
- return {
649
- scoreType: kScoreTypeOther,
650
- compare: () => {
651
- return 0;
652
- },
653
- render: (score) => {
654
- return html`<${RenderedContent}
655
- id="other-score-value"
656
- entry=${{ value: score }}
657
- />`;
658
- },
659
- };
660
- },
661
- },
662
- ];
663
-
664
- const filledCircleStyle = {
665
- fontSize: FontSize.small,
666
- fontFamily: "Consola Regular",
667
- width: "20px",
668
- height: "20px",
669
- display: "inline-flex",
670
- justifyContent: "center",
671
- alignItems: "center",
672
- borderRadius: "50%",
673
- paddingTop: "1px",
674
- };
675
-
676
- const booleanScoreCategorizer = () => {
677
- return {
678
- scoreType: "boolean",
679
- compare: (a, b) => {
680
- return Number(a.value) - Number(b.value);
681
- },
682
- render: (score) => {
683
- const scoreColorStyle = score
684
- ? ApplicationStyles.scoreFills.green
685
- : ApplicationStyles.scoreFills.red;
686
-
687
- return html`<span
688
- style=${{
689
- ...scoreColorStyle,
690
- ...filledCircleStyle,
691
- }}
692
- >${score}</span
693
- >`;
694
- },
695
- };
696
- };
697
-
698
- const passFailScoreCategorizer = (values) => {
699
- const categories = [];
700
- if (values.includes("C")) {
701
- categories.push({
702
- val: "C",
703
- text: "Correct",
704
- });
705
- }
706
- if (values.includes("P")) {
707
- categories.push({
708
- val: "P",
709
- text: "Partial",
710
- });
711
- }
712
- if (values.includes("I")) {
713
- categories.push({
714
- val: "I",
715
- text: "Incorrect",
716
- });
717
- }
718
- if (values.includes("N")) {
719
- categories.push({
720
- val: "N",
721
- text: "Refusal",
722
- });
723
- }
724
- const order = ["C", "P", "I", "N"];
725
-
726
- return {
727
- scoreType: kScoreTypePassFail,
728
- categories,
729
- render: (score) => {
730
- if (score === "C") {
731
- return html`<span
732
- style=${{
733
- ...ApplicationStyles.scoreFills.green,
734
- ...filledCircleStyle,
735
- }}
736
- >C</span
737
- >`;
738
- } else if (score === "I") {
739
- return html`<span
740
- style=${{
741
- ...ApplicationStyles.scoreFills.red,
742
- ...filledCircleStyle,
743
- }}
744
- >I</span
745
- >`;
746
- } else if (score === "P") {
747
- return html`<span
748
- style=${{
749
- ...ApplicationStyles.scoreFills.orange,
750
- ...filledCircleStyle,
751
- }}
752
- >P</span
753
- >`;
754
- } else if (score === "N") {
755
- return html`<span
756
- style=${{
757
- ...ApplicationStyles.scoreFills.red,
758
- ...filledCircleStyle,
759
- }}
760
- >N</span
761
- >`;
762
- } else {
763
- return score;
764
- }
765
- },
766
- compare: (a, b) => {
767
- const sort = order.indexOf(a.value) - order.indexOf(b.value);
768
- return sort;
769
- },
770
- };
771
- };