inspect-ai 0.3.61__py3-none-any.whl → 0.3.63__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (430) hide show
  1. inspect_ai/_cli/eval.py +13 -0
  2. inspect_ai/_cli/main.py +1 -1
  3. inspect_ai/_cli/trace.py +8 -0
  4. inspect_ai/_cli/view.py +4 -0
  5. inspect_ai/_display/core/active.py +2 -3
  6. inspect_ai/_display/textual/widgets/transcript.py +15 -9
  7. inspect_ai/_eval/eval.py +4 -4
  8. inspect_ai/_eval/evalset.py +6 -6
  9. inspect_ai/_eval/task/error.py +10 -14
  10. inspect_ai/_eval/task/run.py +13 -8
  11. inspect_ai/_util/hash.py +1 -1
  12. inspect_ai/_util/transcript.py +11 -0
  13. inspect_ai/_view/www/.vscode/extensions.json +3 -0
  14. inspect_ai/_view/www/.vscode/settings.json +8 -0
  15. inspect_ai/_view/www/App.css +92 -29
  16. inspect_ai/_view/www/dist/assets/index.css +16636 -14674
  17. inspect_ai/_view/www/dist/assets/index.js +43585 -36122
  18. inspect_ai/_view/www/dist/index.html +1 -1
  19. inspect_ai/_view/www/index.html +2 -2
  20. inspect_ai/_view/www/log-schema.json +36 -19
  21. inspect_ai/_view/www/package.json +22 -4
  22. inspect_ai/_view/www/postcss.config.cjs +8 -9
  23. inspect_ai/_view/www/src/{App.mjs → App.tsx} +355 -365
  24. inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
  25. inspect_ai/_view/www/src/api/api-browser.ts +2 -2
  26. inspect_ai/_view/www/src/api/api-http.ts +3 -5
  27. inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
  28. inspect_ai/_view/www/src/api/client-api.ts +4 -4
  29. inspect_ai/_view/www/src/api/index.ts +4 -4
  30. inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
  31. inspect_ai/_view/www/src/appearance/colors.ts +9 -0
  32. inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
  33. inspect_ai/_view/www/src/appearance/icons.ts +100 -0
  34. inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
  35. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
  36. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
  37. inspect_ai/_view/www/src/components/Card.css +60 -0
  38. inspect_ai/_view/www/src/components/Card.tsx +109 -0
  39. inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
  40. inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
  41. inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
  42. inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
  43. inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
  45. inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
  46. inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
  47. inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
  48. inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
  49. inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
  50. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
  51. inspect_ai/_view/www/src/components/FindBand.css +49 -0
  52. inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
  53. inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
  54. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
  55. inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
  56. inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
  57. inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
  58. inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
  59. inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
  60. inspect_ai/_view/www/src/components/LargeModal.tsx +199 -0
  61. inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
  62. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
  63. inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
  64. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
  65. inspect_ai/_view/www/src/components/MessageBand.css +43 -0
  66. inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
  67. inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
  68. inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
  69. inspect_ai/_view/www/src/components/NavPills.tsx +99 -0
  70. inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
  71. inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
  72. inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
  73. inspect_ai/_view/www/src/components/TabSet.tsx +200 -0
  74. inspect_ai/_view/www/src/components/ToolButton.css +3 -0
  75. inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
  76. inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
  77. inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
  78. inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
  79. inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -7
  80. inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
  81. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
  82. inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
  83. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
  84. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
  85. inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
  86. inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
  87. inspect_ai/_view/www/src/metadata/types.ts +18 -0
  88. inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
  89. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
  90. inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
  91. inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
  92. inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
  93. inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
  94. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +309 -0
  95. inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
  96. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
  97. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
  98. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
  99. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
  100. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
  101. inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
  102. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
  103. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +326 -0
  104. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
  105. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +175 -0
  106. inspect_ai/_view/www/src/samples/SamplesTools.tsx +60 -0
  107. inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
  108. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
  109. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
  110. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
  111. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
  112. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +46 -0
  113. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
  114. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
  115. inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
  116. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +143 -0
  117. inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
  118. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +131 -0
  119. inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
  120. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +145 -0
  121. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
  122. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +86 -0
  123. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
  124. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +53 -0
  125. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
  126. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
  127. inspect_ai/_view/www/src/samples/chat/tools/tool.ts +107 -0
  128. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +363 -0
  129. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
  130. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
  131. inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
  132. inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
  133. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
  134. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
  135. inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
  136. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
  137. inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
  138. inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
  139. inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
  140. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
  141. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
  142. inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
  143. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
  144. inspect_ai/_view/www/src/samples/error/error.ts +15 -0
  145. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
  146. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
  147. inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
  148. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
  149. inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
  150. inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
  151. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
  152. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
  153. inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
  154. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
  155. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
  156. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
  157. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
  158. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +173 -0
  159. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
  160. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +182 -0
  161. inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
  162. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
  163. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
  164. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
  165. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
  166. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
  167. inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
  168. inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
  169. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
  170. inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
  171. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
  172. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
  173. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
  174. inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
  175. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
  176. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
  177. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
  178. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
  179. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
  180. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
  181. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
  182. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +108 -0
  183. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
  184. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
  185. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
  186. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
  187. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
  188. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
  189. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
  190. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
  191. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +91 -0
  192. inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
  193. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
  194. inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
  195. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
  196. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
  197. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +38 -0
  198. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
  199. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +190 -0
  200. inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
  201. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
  202. inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
  203. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
  204. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
  205. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +274 -0
  206. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
  207. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
  208. inspect_ai/_view/www/src/samples/transcript/state/{StateEventView.mjs → StateEventView.tsx} +148 -110
  209. inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
  210. inspect_ai/_view/www/src/types/log.d.ts +7 -4
  211. inspect_ai/_view/www/src/types/prism.d.ts +11 -0
  212. inspect_ai/_view/www/src/types.ts +71 -0
  213. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +22 -0
  214. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
  215. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +95 -0
  216. inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
  217. inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
  218. inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
  219. inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
  220. inspect_ai/_view/www/src/utils/attachments.ts +42 -0
  221. inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
  222. inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
  223. inspect_ai/_view/www/src/utils/debugging.ts +28 -0
  224. inspect_ai/_view/www/src/utils/dom.ts +30 -0
  225. inspect_ai/_view/www/src/utils/format.ts +194 -0
  226. inspect_ai/_view/www/src/utils/git.ts +7 -0
  227. inspect_ai/_view/www/src/utils/html.ts +6 -0
  228. inspect_ai/_view/www/src/utils/http.ts +14 -0
  229. inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
  230. inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
  231. inspect_ai/_view/www/src/utils/queue.ts +51 -0
  232. inspect_ai/_view/www/src/utils/sync.ts +114 -0
  233. inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
  234. inspect_ai/_view/www/src/utils/vscode.ts +13 -0
  235. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
  236. inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
  237. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +160 -0
  238. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
  239. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
  240. inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
  241. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
  242. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
  243. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +113 -0
  244. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +67 -0
  245. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +156 -0
  246. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
  247. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +222 -0
  248. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
  249. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
  250. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
  251. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
  252. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
  253. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
  254. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
  255. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
  256. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
  257. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
  258. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
  259. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +41 -0
  260. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
  261. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +61 -0
  262. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +80 -0
  263. inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
  264. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
  265. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
  266. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
  267. inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
  268. inspect_ai/_view/www/src/workspace/types.ts +10 -0
  269. inspect_ai/_view/www/tsconfig.json +23 -9
  270. inspect_ai/_view/www/vite.config.js +8 -17
  271. inspect_ai/_view/www/yarn.lock +627 -556
  272. inspect_ai/dataset/_dataset.py +36 -0
  273. inspect_ai/dataset/_sources/csv.py +8 -0
  274. inspect_ai/dataset/_sources/file.py +4 -0
  275. inspect_ai/dataset/_sources/hf.py +11 -1
  276. inspect_ai/dataset/_sources/json.py +8 -0
  277. inspect_ai/log/_log.py +3 -6
  278. inspect_ai/log/_message.py +1 -1
  279. inspect_ai/log/_recorders/eval.py +1 -1
  280. inspect_ai/log/_recorders/json.py +5 -7
  281. inspect_ai/model/_call_tools.py +2 -1
  282. inspect_ai/model/_chat_message.py +27 -0
  283. inspect_ai/model/_conversation.py +10 -3
  284. inspect_ai/model/_generate_config.py +6 -0
  285. inspect_ai/model/_model.py +74 -0
  286. inspect_ai/model/_openai.py +33 -1
  287. inspect_ai/model/_providers/anthropic.py +12 -0
  288. inspect_ai/model/_providers/groq.py +4 -0
  289. inspect_ai/model/_providers/openai.py +21 -9
  290. inspect_ai/model/_providers/openai_o1.py +3 -5
  291. inspect_ai/model/_providers/openrouter.py +86 -0
  292. inspect_ai/model/_providers/providers.py +12 -1
  293. inspect_ai/model/_reasoning.py +17 -0
  294. inspect_ai/scorer/_answer.py +7 -7
  295. inspect_ai/scorer/_classification.py +34 -18
  296. inspect_ai/scorer/_common.py +2 -8
  297. inspect_ai/solver/_basic_agent.py +19 -9
  298. inspect_ai/solver/_multiple_choice.py +24 -9
  299. inspect_ai/tool/__init__.py +2 -0
  300. inspect_ai/tool/{beta → _tools}/_computer/_computer.py +2 -5
  301. inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +4 -0
  302. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  303. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +3 -0
  304. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +61 -0
  305. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +10 -0
  306. inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
  307. inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
  308. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  309. inspect_ai/tool/_tools/_execute.py +8 -2
  310. inspect_ai/tool/beta.py +3 -0
  311. inspect_ai/util/_sandbox/docker/docker.py +32 -85
  312. inspect_ai/util/_sandbox/self_check.py +124 -16
  313. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/METADATA +2 -1
  314. inspect_ai-0.3.63.dist-info/RECORD +618 -0
  315. inspect_ai/_view/www/src/Register.mjs +0 -3
  316. inspect_ai/_view/www/src/Types.mjs +0 -38
  317. inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
  318. inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
  319. inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
  320. inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
  321. inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
  322. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
  323. inspect_ai/_view/www/src/components/Card.mjs +0 -126
  324. inspect_ai/_view/www/src/components/ChatView.mjs +0 -418
  325. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
  326. inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
  327. inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
  328. inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
  329. inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
  330. inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
  331. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
  332. inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
  333. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
  334. inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
  335. inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
  336. inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
  337. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
  338. inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
  339. inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
  340. inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
  341. inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
  342. inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
  343. inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
  344. inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
  345. inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
  346. inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
  347. inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
  348. inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
  349. inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
  350. inspect_ai/_view/www/src/components/Tools.mjs +0 -376
  351. inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
  352. inspect_ai/_view/www/src/components/ansi-output.js +0 -932
  353. inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
  354. inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
  355. inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
  356. inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
  357. inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
  358. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
  359. inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
  360. inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
  361. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
  362. inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
  363. inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
  364. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
  365. inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
  366. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
  367. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
  368. inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
  369. inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
  370. inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
  371. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
  372. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
  373. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
  374. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
  375. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
  376. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
  377. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
  378. inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
  379. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
  380. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
  381. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
  382. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
  383. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
  384. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
  385. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
  386. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
  387. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
  388. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
  389. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
  390. inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
  391. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
  392. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
  393. inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
  394. inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
  395. inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
  396. inspect_ai/_view/www/src/utils/Format.mjs +0 -260
  397. inspect_ai/_view/www/src/utils/Git.mjs +0 -12
  398. inspect_ai/_view/www/src/utils/Html.mjs +0 -21
  399. inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
  400. inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
  401. inspect_ai/_view/www/src/utils/http.mjs +0 -18
  402. inspect_ai/_view/www/src/utils/queue.mjs +0 -67
  403. inspect_ai/_view/www/src/utils/sync.mjs +0 -101
  404. inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
  405. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
  406. inspect_ai/tool/beta/__init__.py +0 -5
  407. inspect_ai-0.3.61.dist-info/RECORD +0 -476
  408. /inspect_ai/{tool/beta/_computer/_resources/tool/__init__.py → _view/www/src/components/MorePopOver.css} +0 -0
  409. /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
  410. /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _view/www/src/workspace/tabs/InfoTab.module.css} +0 -0
  411. /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
  412. /inspect_ai/tool/{beta → _tools}/_computer/_common.py +0 -0
  413. /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
  414. /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
  415. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
  416. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
  417. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
  418. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
  419. /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
  420. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
  421. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
  422. /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
  423. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
  424. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
  425. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
  426. /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
  427. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/LICENSE +0 -0
  428. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/WHEEL +0 -0
  429. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/entry_points.txt +0 -0
  430. {inspect_ai-0.3.61.dist-info → inspect_ai-0.3.63.dist-info}/top_level.txt +0 -0
@@ -1,771 +0,0 @@
1
- import { html } from "htm/preact";
2
- import { FontSize } from "../appearance/Fonts.mjs";
3
- import { ApplicationStyles } from "../appearance/Styles.mjs";
4
- import {
5
- formatPrettyDecimal,
6
- formatDecimalNoTrailingZeroes,
7
- inputString,
8
- arrayToString,
9
- } from "../utils/Format.mjs";
10
- import { RenderedContent } from "../components/RenderedContent/RenderedContent.mjs";
11
- import { isNumeric } from "../utils/Type.mjs";
12
- import {
13
- kScoreTypeCategorical,
14
- kScoreTypeNumeric,
15
- kScoreTypeObject,
16
- kScoreTypeOther,
17
- kScoreTypePassFail,
18
- } from "../constants.mjs";
19
-
20
- /**
21
- * Represents a utility summary of the samples that doesn't change with the selected score.
22
- * @typedef {Object} EvalDescriptor
23
- * @property {number} epochs - The number of epochs.
24
- * @property {import("../api/Types.ts").SampleSummary[]} samples - The list of sample summaries.
25
- * @property {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
26
- * @property {(sample: import("../api/Types.ts").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => ScorerDescriptor} scorerDescriptor - Returns the scorer descriptor for a sample and a specified scorer.
27
- * @property {(scoreLabel: import("../Types.mjs").ScoreLabel) => ScoreDescriptor} scoreDescriptor - Provides information about the score types and how to render them.
28
- * @property {(sample: import("../api/Types.ts").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => SelectedScore} score - Returns information about a score for a sample.
29
- * @property {(sample: import("../api/Types.ts").BasicSampleData, scorer: string) => string} scoreAnswer - Returns the answer for a sample and a specified scorer.
30
- */
31
-
32
- /**
33
- * Represents a utility summary of the samples.
34
- * @typedef {Object} SamplesDescriptor
35
- * @property {EvalDescriptor} evalDescriptor - The EvalDescriptor.
36
- * @property {MessageShape} messageShape - The normalized sizes of input, target, and answer messages.
37
- * @property {ScoreDescriptor} selectedScoreDescriptor - Provides information about the score types and how to render them.
38
- * @property {(sample: import("../api/Types.ts").BasicSampleData) => SelectedScore} selectedScore - Returns the selected score for a sample.
39
- * @property {(sample: import("../api/Types.ts").BasicSampleData) => ScorerDescriptor} selectedScorerDescriptor - Returns the scorer descriptor for a sample using the selected scorer.
40
- */
41
-
42
- /**
43
- * Provides information about the score types and rendering functions.
44
- * @typedef {Object} ScoreDescriptor
45
- * @property {string} scoreType - The type of the score (e.g., 'numeric', 'categorical', 'boolean').
46
- * @property {Array<Object>} [categories] - The categories for categorical scores.
47
- * @property {number} [min] - The minimum value for numeric scores.
48
- * @property {number} [max] - The maximum value for numeric scores.
49
- * @property {(a: import("../types/log").Value2, b: import("../types/log").Value2) => number} compare - Function to compare two score values.
50
- * @property {(score: import("../types/log").Value2) => any} render - Function to render the score value.
51
- */
52
-
53
- /**
54
- * Provides descriptor functions for a scorer.
55
- * @typedef {Object} ScorerDescriptor
56
- * @property {() => string} metadata - Function to retrieve the metadata of the score.
57
- * @property {() => string} explanation - Function to retrieve the explanation of the score.
58
- * @property {() => string} answer - Function to retrieve the answer associated with the score.
59
- * @property {function(): Array<{name: string, rendered: function(): any}>} scores - Function to retrieve scores with their render functions.
60
- */
61
-
62
- /**
63
- * Represents a score for a sample, including its value and render function.
64
- * @typedef {Object} SelectedScore
65
- * @property {import("../types/log").Value2} value - The value of the selected score.
66
- * @property {function(): any} render - Function to render the selected score.
67
- */
68
-
69
- /**
70
- * Describes the shape of the messages based on their sizes.
71
- * @typedef {Object} MessageShape
72
- * @property {Object} raw
73
- * @property {number} raw.id - Normalized size of the id
74
- * @property {number} raw.input - Normalized size of the input message.
75
- * @property {number} raw.target - Normalized size of the target message.
76
- * @property {number} raw.answer - Normalized size of the answer message.
77
- * @property {number} raw.limit - Normalized size of the limit message.
78
- * @property {Object} normalized
79
- * @property {number} normalized.id - Normalized size of the id
80
- * @property {number} normalized.input - Normalized size of the input message.
81
- * @property {number} normalized.target - Normalized size of the target message.
82
- * @property {number} normalized.answer - Normalized size of the answer message.
83
- * @property {number} normalized.limit - Normalized size of the limit message.
84
- */
85
-
86
- /**
87
- * @param {import("../Types.mjs").ScoreLabel | undefined} scoreLabel
88
- * @returns {string}
89
- */
90
- export const scoreLabelKey = (scoreLabel) => {
91
- if (!scoreLabel) {
92
- return "No score key";
93
- }
94
- return `${scoreLabel.scorer}.${scoreLabel.name}`;
95
- };
96
-
97
- /**
98
- * @param {string} key
99
- * @returns {import("../Types.mjs").ScoreLabel | undefined}
100
- */
101
- export const parseScoreLabelKey = (key) => {
102
- if (key == "No score key") {
103
- return undefined;
104
- }
105
- const [scorer, name] = key.split(".");
106
- return { scorer, name };
107
- };
108
-
109
- /**
110
- * @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
111
- * @param {import("../api/Types.ts").SampleSummary[]} samples - the list of sample summaries
112
- * @param {number} epochs - The number of epochs
113
- * @returns {EvalDescriptor} The EvalDescriptor
114
- */
115
- export const createEvalDescriptor = (scores, samples, epochs) => {
116
- if (!samples) {
117
- return undefined;
118
- }
119
-
120
- /**
121
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
122
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel - the score label
123
- * @returns {import("../types/log").Value2} The Score
124
- */
125
- const scoreValue = (sample, scoreLabel) => {
126
- // no scores, no value
127
- if (Object.keys(sample.scores).length === 0 || !scoreLabel) {
128
- return undefined;
129
- }
130
-
131
- if (
132
- scoreLabel.scorer !== scoreLabel.name &&
133
- sample.scores[scoreLabel.scorer] &&
134
- sample.scores[scoreLabel.scorer].value
135
- ) {
136
- return sample.scores[scoreLabel.scorer].value[scoreLabel.name];
137
- } else if (sample.scores[scoreLabel.name]) {
138
- return sample.scores[scoreLabel.name].value;
139
- } else {
140
- return undefined;
141
- }
142
- };
143
-
144
- /**
145
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
146
- * @param {string} scorer - the scorer name
147
- * @returns {string} The answer
148
- */
149
- const scoreAnswer = (sample, scorer) => {
150
- if (sample) {
151
- const sampleScore = sample.scores[scorer];
152
- if (sampleScore && sampleScore.answer) {
153
- return sampleScore.answer;
154
- }
155
- } else {
156
- return undefined;
157
- }
158
- };
159
-
160
- /**
161
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
162
- * @param {string} scorer - the scorer name
163
- * @returns {string} The explanation
164
- */
165
- const scoreExplanation = (sample, scorer) => {
166
- if (sample) {
167
- const sampleScore = sample.scores[scorer];
168
- if (sampleScore && sampleScore.explanation) {
169
- return sampleScore.explanation;
170
- }
171
- }
172
- return undefined;
173
- };
174
-
175
- // Retrieve the metadata for a sample
176
- /**
177
- * @param {import("../api/Types.ts").BasicSampleData} sample - the currently selected score
178
- * @param {string} scorer - the scorer name
179
- * @returns {Object} The explanation
180
- */
181
- const scoreMetadata = (sample, scorer) => {
182
- if (sample) {
183
- const sampleScore = sample.scores[scorer];
184
- if (sampleScore && sampleScore.metadata) {
185
- return sampleScore.metadata;
186
- }
187
- }
188
- return undefined;
189
- };
190
-
191
- /**
192
- * The EvalDescriptor is memoized. Compute all descriptors now to avoid duplicate work.
193
- * @type {Map<string, ScoreDescriptor>}
194
- */
195
- const scoreDescriptorMap = new Map();
196
- for (const scoreLabel of scores) {
197
- const uniqScoreValues = [
198
- ...new Set(
199
- samples
200
- .filter((sample) => !!sample.scores)
201
- .filter((sample) => {
202
- // There is no selected scorer, so include this value
203
- if (!scoreLabel) {
204
- return true;
205
- }
206
-
207
- if (scoreLabel.scorer !== scoreLabel.name) {
208
- return (
209
- Object.keys(sample.scores).includes(scoreLabel.scorer) &&
210
- Object.keys(sample.scores[scoreLabel.scorer].value).includes(
211
- scoreLabel.name,
212
- )
213
- );
214
- } else {
215
- return Object.keys(sample.scores).includes(scoreLabel.name);
216
- }
217
- })
218
- .map((sample) => {
219
- return scoreValue(sample, scoreLabel);
220
- })
221
- .filter((value) => {
222
- return value !== null;
223
- }),
224
- ),
225
- ];
226
- const uniqScoreTypes = [
227
- ...new Set(uniqScoreValues.map((scoreValue) => typeof scoreValue)),
228
- ];
229
-
230
- for (const categorizer of scoreCategorizers) {
231
- const scoreDescriptor = categorizer.describe(
232
- uniqScoreValues,
233
- uniqScoreTypes,
234
- );
235
- if (scoreDescriptor) {
236
- scoreDescriptorMap.set(scoreLabelKey(scoreLabel), scoreDescriptor);
237
- break;
238
- }
239
- }
240
- }
241
-
242
- /**
243
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
244
- * @returns {ScoreDescriptor | undefined}
245
- */
246
- const scoreDescriptor = (scoreLabel) => {
247
- return scoreDescriptorMap.get(scoreLabelKey(scoreLabel));
248
- };
249
-
250
- /**
251
- * @param {import("../api/Types.ts").BasicSampleData} sample
252
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
253
- * @returns {any}
254
- */
255
- const scoreRendered = (sample, scoreLabel) => {
256
- const descriptor = scoreDescriptor(scoreLabel);
257
- const score = scoreValue(sample, scoreLabel);
258
- if (score === null || score === "undefined") {
259
- return "null";
260
- } else if (descriptor && descriptor.render) {
261
- return descriptor.render(score);
262
- } else {
263
- return score;
264
- }
265
- };
266
-
267
- /**
268
- * @param {import("../api/Types.ts").BasicSampleData} sample
269
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
270
- * @returns {ScorerDescriptor}
271
- */
272
- const scorerDescriptor = (sample, scoreLabel) => {
273
- return {
274
- metadata: () => {
275
- return scoreMetadata(sample, scoreLabel.scorer);
276
- },
277
- explanation: () => {
278
- return scoreExplanation(sample, scoreLabel.scorer);
279
- },
280
- answer: () => {
281
- return scoreAnswer(sample, scoreLabel.scorer);
282
- },
283
- scores: () => {
284
- if (!sample || !sample.scores) {
285
- return [];
286
- }
287
- const myScoreDescriptor = scoreDescriptor(scoreLabel);
288
- if (!myScoreDescriptor) {
289
- return [];
290
- }
291
-
292
- // Make a list of all the valid score names (this is
293
- // used to distinguish between dictionaries that contain
294
- // scores that should be treated as standlone scores and
295
- // dictionaries that just contain random values, which is allowed)
296
- const scoreNames = scores.map((score) => {
297
- return score.name;
298
- });
299
- const sampleScorer = sample.scores[scoreLabel.scorer];
300
- const scoreVal = sampleScorer.value;
301
-
302
- if (typeof scoreVal === "object") {
303
- const names = Object.keys(scoreVal);
304
-
305
- // See if this is a dictionary of score names
306
- // if any of the score names match, treat it
307
- // as a scorer dictionary
308
- if (
309
- names.find((name) => {
310
- return scoreNames.includes(name);
311
- })
312
- ) {
313
- // Since this dictionary contains keys which are scores
314
- // we actually render the individual scores
315
- const scores = names.map((name) => {
316
- return {
317
- name,
318
- rendered: () => {
319
- return myScoreDescriptor.render(scoreVal[name]);
320
- },
321
- };
322
- });
323
- return scores;
324
- } else {
325
- // Since this dictionary contains keys which are not scores
326
- // we just treat it like an opaque dictionary
327
- return [
328
- {
329
- name: scoreLabel.scorer,
330
- rendered: () => {
331
- return myScoreDescriptor.render(scoreVal);
332
- },
333
- },
334
- ];
335
- }
336
- } else {
337
- return [
338
- {
339
- name: scoreLabel.scorer,
340
- rendered: () => {
341
- return myScoreDescriptor.render(scoreVal);
342
- },
343
- },
344
- ];
345
- }
346
- },
347
- };
348
- };
349
-
350
- /**
351
- * @param {import("../api/Types.ts").BasicSampleData} sample
352
- * @param {import("../Types.mjs").ScoreLabel} scoreLabel
353
- * @returns {SelectedScore}
354
- */
355
- const score = (sample, scoreLabel) => {
356
- return {
357
- value: scoreValue(sample, scoreLabel),
358
- render: () => {
359
- return scoreRendered(sample, scoreLabel);
360
- },
361
- };
362
- };
363
-
364
- return {
365
- epochs,
366
- samples,
367
- scores,
368
- scorerDescriptor,
369
- scoreDescriptor,
370
- score,
371
- scoreAnswer,
372
- };
373
- };
374
-
375
- /**
376
- * Provides a utility summary of the samples
377
- *
378
- * @param {EvalDescriptor} evalDescriptor - The EvalDescriptor.
379
- * @param {import("../Types.mjs").ScoreLabel} selectedScore - Selected score.
380
- * @returns {SamplesDescriptor} - The SamplesDescriptor.
381
- */
382
- export const createSamplesDescriptor = (evalDescriptor, selectedScore) => {
383
- if (!evalDescriptor) {
384
- return undefined;
385
- }
386
-
387
- // Find the total length of the value so we can compute an average
388
- const sizes = evalDescriptor.samples.reduce(
389
- (previous, current) => {
390
- const text = inputString(current.input).join(" ");
391
- const scoreValue = evalDescriptor.score(current, selectedScore).value;
392
- const scoreText = scoreValue
393
- ? String(scoreValue)
394
- : current.error
395
- ? String(current.error)
396
- : "";
397
- previous[0] = Math.min(Math.max(previous[0], text.length), 300);
398
- previous[1] = Math.min(
399
- Math.max(previous[1], arrayToString(current.target).length),
400
- 300,
401
- );
402
- previous[2] = Math.min(
403
- Math.max(
404
- previous[2],
405
- evalDescriptor.scoreAnswer(current, selectedScore?.name)?.length || 0,
406
- ),
407
- 300,
408
- );
409
- previous[3] = Math.min(
410
- Math.max(previous[3], current.limit ? current.limit.length : 0),
411
- 50,
412
- );
413
- previous[4] = Math.min(
414
- Math.max(previous[4], String(current.id).length),
415
- 10,
416
- );
417
- previous[5] = Math.min(Math.max(previous[5], scoreText.length), 30);
418
-
419
- return previous;
420
- },
421
- [0, 0, 0, 0, 0, 0],
422
- );
423
-
424
- // normalize to base 1
425
- const maxSizes = {
426
- input: Math.min(sizes[0], 300),
427
- target: Math.min(sizes[1], 300),
428
- answer: Math.min(sizes[2], 300),
429
- limit: Math.min(sizes[3], 50),
430
- id: Math.min(sizes[4], 10),
431
- score: Math.min(sizes[4], 30),
432
- };
433
- const base =
434
- maxSizes.input +
435
- maxSizes.target +
436
- maxSizes.answer +
437
- maxSizes.limit +
438
- maxSizes.id +
439
- maxSizes.score || 1;
440
- const messageShape = {
441
- raw: {
442
- input: sizes[0],
443
- target: sizes[1],
444
- answer: sizes[2],
445
- limit: sizes[3],
446
- id: sizes[4],
447
- score: sizes[5],
448
- },
449
- normalized: {
450
- input: maxSizes.input / base,
451
- target: maxSizes.target / base,
452
- answer: maxSizes.answer / base,
453
- limit: maxSizes.limit / base,
454
- id: maxSizes.id / base,
455
- score: maxSizes.score / base,
456
- },
457
- };
458
-
459
- return {
460
- evalDescriptor,
461
- messageShape,
462
- selectedScoreDescriptor: evalDescriptor.scoreDescriptor(selectedScore),
463
- selectedScore: (sample) => evalDescriptor.score(sample, selectedScore),
464
- selectedScorerDescriptor: (sample) =>
465
- evalDescriptor.scorerDescriptor(sample, selectedScore),
466
- };
467
- };
468
-
469
- /**
470
- * @typedef {Object} ScoreCategorizer
471
- * @property {(values: import("../types/log").Value2[], types?: ("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]) => ScoreDescriptor} describe
472
- */
473
- const scoreCategorizers = [
474
- {
475
- /**
476
- * @param {import("../types/log").Value2[]} values - the currently selected score
477
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
478
- * @returns {ScoreDescriptor} a ScoreDescriptor
479
- */
480
- describe: (values, types) => {
481
- if (types.length === 1 && types[0] === "boolean") {
482
- return booleanScoreCategorizer();
483
- }
484
- },
485
- },
486
- {
487
- /**
488
- * @param {import("../types/log").Value2[]} values - the currently selected score
489
- * @returns {ScoreDescriptor} a ScoreDescriptor
490
- */
491
- describe: (values) => {
492
- if (
493
- values.length === 2 &&
494
- values.every((val) => {
495
- return val === 1 || val === 0;
496
- })
497
- ) {
498
- return booleanScoreCategorizer();
499
- }
500
- },
501
- },
502
- {
503
- /**
504
- * @param {import("../types/log").Value2[]} values - the currently selected score
505
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
506
- * @returns {ScoreDescriptor} a ScoreDescriptor
507
- */
508
- describe: (values, types) => {
509
- if (
510
- types[0] === "string" &&
511
- types.length === 1 &&
512
- values.length < 5 &&
513
- !values.find((val) => {
514
- return val !== "I" && val !== "C" && val !== "P" && val !== "N";
515
- })
516
- ) {
517
- return passFailScoreCategorizer(values);
518
- }
519
- },
520
- },
521
- {
522
- /**
523
- * @param {import("../types/log").Value2[]} values - the currently selected score
524
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
525
- * @returns {ScoreDescriptor} a ScoreDescriptor
526
- */
527
- describe: (values, types) => {
528
- if (values.length < 10 && types.length === 1 && types[0] === "string") {
529
- return {
530
- scoreType: kScoreTypeCategorical,
531
- categories: values,
532
- compare: (a, b) => {
533
- return String(a).localeCompare(String(b));
534
- },
535
- render: (score) => {
536
- return score;
537
- },
538
- };
539
- }
540
- },
541
- },
542
- {
543
- /**
544
- * @param {import("../types/log").Value2[]} values - the currently selected score
545
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
546
- * @returns {ScoreDescriptor} a ScoreDescriptor
547
- */
548
- describe: (values, types) => {
549
- if (types.length !== 0 && types[0] === "number") {
550
- const onlyNumeric = values.filter((val) => {
551
- return typeof val === "number";
552
- });
553
-
554
- return {
555
- scoreType: kScoreTypeNumeric,
556
- min: Math.min(...onlyNumeric),
557
- max: Math.max(...onlyNumeric),
558
- compare: (a, b) => {
559
- if (typeof a === "number" && typeof b === "number") {
560
- return a - b;
561
- } else {
562
- console.warn(
563
- "Comparing non-numerics using a nuermic score descriptor",
564
- );
565
- return 0;
566
- }
567
- },
568
- render: (score) => {
569
- return formatDecimalNoTrailingZeroes(Number(score));
570
- },
571
- };
572
- }
573
- },
574
- },
575
- {
576
- /**
577
- * @param {import("../types/log").Value2[]} values - the currently selected score
578
- * @param {("string" | "number" | "bigint" | "boolean" | "symbol" | "undefined" | "object" | "function")[]} [types] - the scorer name
579
- * @returns {ScoreDescriptor} a ScoreDescriptor
580
- */
581
- describe: (values, types) => {
582
- if (types.length !== 0 && types[0] === "object") {
583
- const buckets = values.map((val) => {
584
- return JSON.stringify(val);
585
- });
586
- const vals = new Set(buckets);
587
- let categories = undefined;
588
- if (vals.size < 10) {
589
- categories = Array.from(vals).map((val) => {
590
- return {
591
- val,
592
- text: val,
593
- };
594
- });
595
- }
596
-
597
- return {
598
- scoreType: kScoreTypeObject,
599
- categories,
600
- compare: () => {
601
- return 0;
602
- },
603
- render: (score) => {
604
- if (score === null || score === undefined) {
605
- return "[null]";
606
- }
607
-
608
- const scores = [];
609
- const keys = Object.keys(score);
610
- keys.forEach((key, index) => {
611
- const value = score[key];
612
- const formattedValue = isNumeric(value)
613
- ? formatPrettyDecimal(parseFloat(value))
614
- : value;
615
- const style = {
616
- display: "flex",
617
- flexDirection: "column",
618
- alignItems: "center",
619
- marginLeft: "0.5rem",
620
- };
621
- if (index + 1 < keys.length) {
622
- style["paddingBottom"] = "1em";
623
- }
624
- scores.push(html`
625
- <div style=${style}>
626
- <div style=${{ fontSize: FontSize.smaller, fontWeight: 300 }}>
627
- ${key}
628
- </div>
629
- <div style=${{ fontSize: FontSize.title, fontWeight: 600 }}>
630
- ${formattedValue}
631
- </div>
632
- </div>
633
- `);
634
- });
635
-
636
- return scores;
637
- },
638
- };
639
- }
640
- },
641
- },
642
- {
643
- /**
644
- * @returns {ScoreDescriptor} a ScoreDescriptor
645
- */
646
- // @ts-ignore
647
- describe: () => {
648
- return {
649
- scoreType: kScoreTypeOther,
650
- compare: () => {
651
- return 0;
652
- },
653
- render: (score) => {
654
- return html`<${RenderedContent}
655
- id="other-score-value"
656
- entry=${{ value: score }}
657
- />`;
658
- },
659
- };
660
- },
661
- },
662
- ];
663
-
664
- const filledCircleStyle = {
665
- fontSize: FontSize.small,
666
- fontFamily: "Consola Regular",
667
- width: "20px",
668
- height: "20px",
669
- display: "inline-flex",
670
- justifyContent: "center",
671
- alignItems: "center",
672
- borderRadius: "50%",
673
- paddingTop: "1px",
674
- };
675
-
676
- const booleanScoreCategorizer = () => {
677
- return {
678
- scoreType: "boolean",
679
- compare: (a, b) => {
680
- return Number(a.value) - Number(b.value);
681
- },
682
- render: (score) => {
683
- const scoreColorStyle = score
684
- ? ApplicationStyles.scoreFills.green
685
- : ApplicationStyles.scoreFills.red;
686
-
687
- return html`<span
688
- style=${{
689
- ...scoreColorStyle,
690
- ...filledCircleStyle,
691
- }}
692
- >${score}</span
693
- >`;
694
- },
695
- };
696
- };
697
-
698
- const passFailScoreCategorizer = (values) => {
699
- const categories = [];
700
- if (values.includes("C")) {
701
- categories.push({
702
- val: "C",
703
- text: "Correct",
704
- });
705
- }
706
- if (values.includes("P")) {
707
- categories.push({
708
- val: "P",
709
- text: "Partial",
710
- });
711
- }
712
- if (values.includes("I")) {
713
- categories.push({
714
- val: "I",
715
- text: "Incorrect",
716
- });
717
- }
718
- if (values.includes("N")) {
719
- categories.push({
720
- val: "N",
721
- text: "Refusal",
722
- });
723
- }
724
- const order = ["C", "P", "I", "N"];
725
-
726
- return {
727
- scoreType: kScoreTypePassFail,
728
- categories,
729
- render: (score) => {
730
- if (score === "C") {
731
- return html`<span
732
- style=${{
733
- ...ApplicationStyles.scoreFills.green,
734
- ...filledCircleStyle,
735
- }}
736
- >C</span
737
- >`;
738
- } else if (score === "I") {
739
- return html`<span
740
- style=${{
741
- ...ApplicationStyles.scoreFills.red,
742
- ...filledCircleStyle,
743
- }}
744
- >I</span
745
- >`;
746
- } else if (score === "P") {
747
- return html`<span
748
- style=${{
749
- ...ApplicationStyles.scoreFills.orange,
750
- ...filledCircleStyle,
751
- }}
752
- >P</span
753
- >`;
754
- } else if (score === "N") {
755
- return html`<span
756
- style=${{
757
- ...ApplicationStyles.scoreFills.red,
758
- ...filledCircleStyle,
759
- }}
760
- >N</span
761
- >`;
762
- } else {
763
- return score;
764
- }
765
- },
766
- compare: (a, b) => {
767
- const sort = order.indexOf(a.value) - order.indexOf(b.value);
768
- return sort;
769
- },
770
- };
771
- };