inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/eval.py +35 -2
  3. inspect_ai/_cli/util.py +44 -1
  4. inspect_ai/_display/core/config.py +1 -1
  5. inspect_ai/_display/core/display.py +13 -4
  6. inspect_ai/_display/core/results.py +1 -1
  7. inspect_ai/_display/textual/app.py +14 -3
  8. inspect_ai/_display/textual/display.py +4 -0
  9. inspect_ai/_display/textual/widgets/samples.py +9 -3
  10. inspect_ai/_display/textual/widgets/task_detail.py +8 -8
  11. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  12. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  13. inspect_ai/_eval/eval.py +74 -25
  14. inspect_ai/_eval/evalset.py +22 -18
  15. inspect_ai/_eval/loader.py +34 -11
  16. inspect_ai/_eval/run.py +13 -15
  17. inspect_ai/_eval/score.py +13 -3
  18. inspect_ai/_eval/task/generate.py +8 -9
  19. inspect_ai/_eval/task/log.py +55 -6
  20. inspect_ai/_eval/task/run.py +51 -10
  21. inspect_ai/_eval/task/task.py +23 -9
  22. inspect_ai/_util/constants.py +2 -0
  23. inspect_ai/_util/file.py +30 -1
  24. inspect_ai/_util/json.py +37 -1
  25. inspect_ai/_util/registry.py +1 -0
  26. inspect_ai/_util/vscode.py +37 -0
  27. inspect_ai/_view/server.py +113 -1
  28. inspect_ai/_view/www/App.css +7 -1
  29. inspect_ai/_view/www/dist/assets/index.css +813 -415
  30. inspect_ai/_view/www/dist/assets/index.js +54475 -32003
  31. inspect_ai/_view/www/eslint.config.mjs +1 -1
  32. inspect_ai/_view/www/log-schema.json +137 -31
  33. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  34. inspect_ai/_view/www/package.json +11 -2
  35. inspect_ai/_view/www/src/App.tsx +161 -853
  36. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  37. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  38. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  39. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  40. inspect_ai/_view/www/src/api/types.ts +107 -2
  41. inspect_ai/_view/www/src/appearance/icons.ts +2 -0
  42. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  43. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  46. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  47. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  48. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  49. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  50. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  51. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  52. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  53. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  54. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  55. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
  56. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  57. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  58. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  59. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  60. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  61. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  62. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  63. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  64. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  65. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  66. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  67. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  68. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  69. inspect_ai/_view/www/src/index.tsx +26 -94
  70. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  71. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  72. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  73. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  74. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  75. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  76. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  77. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  78. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
  79. inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
  80. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  81. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
  82. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
  83. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
  84. inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
  85. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  86. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  87. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  88. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
  89. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
  90. inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
  91. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  92. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
  93. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  94. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
  95. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  96. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  98. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
  99. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  100. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
  101. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
  102. inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
  103. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  104. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
  105. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  106. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  107. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  108. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  109. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  110. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
  111. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  112. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  113. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  114. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  115. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  116. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  117. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  118. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  119. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  120. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  121. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  122. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  123. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  124. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  125. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  126. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  127. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
  128. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  129. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  130. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  131. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  132. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
  133. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  134. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  135. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
  136. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  137. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  138. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  139. inspect_ai/_view/www/src/state/hooks.ts +399 -0
  140. inspect_ai/_view/www/src/state/logPolling.ts +200 -0
  141. inspect_ai/_view/www/src/state/logSlice.ts +224 -0
  142. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  143. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  144. inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
  145. inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
  146. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  147. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  148. inspect_ai/_view/www/src/state/store.ts +168 -0
  149. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  150. inspect_ai/_view/www/src/state/utils.ts +23 -0
  151. inspect_ai/_view/www/src/storage/index.ts +26 -0
  152. inspect_ai/_view/www/src/types/log.d.ts +36 -26
  153. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  154. inspect_ai/_view/www/src/types.ts +94 -32
  155. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  156. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  157. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  158. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  159. inspect_ai/_view/www/src/utils/react.ts +30 -0
  160. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  161. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
  162. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  163. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  164. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  165. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  166. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
  167. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
  168. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  169. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  170. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  171. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  172. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
  173. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  174. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  175. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  176. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  177. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  178. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  179. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  180. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  181. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
  182. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  183. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  184. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  185. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  186. inspect_ai/_view/www/vite.config.js +6 -0
  187. inspect_ai/_view/www/yarn.lock +464 -355
  188. inspect_ai/agent/__init__.py +36 -0
  189. inspect_ai/agent/_agent.py +268 -0
  190. inspect_ai/agent/_as_solver.py +72 -0
  191. inspect_ai/agent/_as_tool.py +122 -0
  192. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  193. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  194. inspect_ai/agent/_filter.py +46 -0
  195. inspect_ai/agent/_handoff.py +93 -0
  196. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  197. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  198. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  199. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  200. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  201. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  202. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  203. inspect_ai/agent/_react.py +241 -0
  204. inspect_ai/agent/_run.py +36 -0
  205. inspect_ai/agent/_types.py +81 -0
  206. inspect_ai/log/_condense.py +26 -0
  207. inspect_ai/log/_log.py +17 -5
  208. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  209. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  210. inspect_ai/log/_recorders/buffer/database.py +685 -0
  211. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  212. inspect_ai/log/_recorders/buffer/types.py +84 -0
  213. inspect_ai/log/_recorders/eval.py +2 -11
  214. inspect_ai/log/_recorders/types.py +30 -0
  215. inspect_ai/log/_transcript.py +32 -2
  216. inspect_ai/model/__init__.py +7 -1
  217. inspect_ai/model/_call_tools.py +257 -52
  218. inspect_ai/model/_chat_message.py +7 -4
  219. inspect_ai/model/_conversation.py +13 -62
  220. inspect_ai/model/_display.py +85 -0
  221. inspect_ai/model/_generate_config.py +2 -2
  222. inspect_ai/model/_model.py +114 -14
  223. inspect_ai/model/_model_output.py +14 -9
  224. inspect_ai/model/_openai.py +16 -4
  225. inspect_ai/model/_openai_computer_use.py +162 -0
  226. inspect_ai/model/_openai_responses.py +319 -165
  227. inspect_ai/model/_providers/anthropic.py +20 -21
  228. inspect_ai/model/_providers/azureai.py +24 -13
  229. inspect_ai/model/_providers/bedrock.py +1 -7
  230. inspect_ai/model/_providers/cloudflare.py +3 -3
  231. inspect_ai/model/_providers/goodfire.py +2 -6
  232. inspect_ai/model/_providers/google.py +11 -10
  233. inspect_ai/model/_providers/groq.py +6 -3
  234. inspect_ai/model/_providers/hf.py +7 -3
  235. inspect_ai/model/_providers/mistral.py +7 -10
  236. inspect_ai/model/_providers/openai.py +47 -17
  237. inspect_ai/model/_providers/openai_o1.py +11 -4
  238. inspect_ai/model/_providers/openai_responses.py +12 -14
  239. inspect_ai/model/_providers/providers.py +2 -2
  240. inspect_ai/model/_providers/together.py +12 -2
  241. inspect_ai/model/_providers/util/chatapi.py +7 -2
  242. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  243. inspect_ai/model/_providers/util/llama31.py +4 -2
  244. inspect_ai/model/_providers/vertex.py +11 -9
  245. inspect_ai/model/_providers/vllm.py +4 -4
  246. inspect_ai/scorer/__init__.py +2 -0
  247. inspect_ai/scorer/_metrics/__init__.py +2 -0
  248. inspect_ai/scorer/_metrics/grouped.py +84 -0
  249. inspect_ai/scorer/_score.py +26 -6
  250. inspect_ai/solver/__init__.py +2 -2
  251. inspect_ai/solver/_basic_agent.py +22 -9
  252. inspect_ai/solver/_bridge.py +31 -0
  253. inspect_ai/solver/_chain.py +20 -12
  254. inspect_ai/solver/_fork.py +5 -1
  255. inspect_ai/solver/_human_agent.py +52 -0
  256. inspect_ai/solver/_prompt.py +3 -1
  257. inspect_ai/solver/_run.py +59 -0
  258. inspect_ai/solver/_solver.py +14 -4
  259. inspect_ai/solver/_task_state.py +5 -3
  260. inspect_ai/tool/_tool_call.py +15 -8
  261. inspect_ai/tool/_tool_def.py +17 -12
  262. inspect_ai/tool/_tool_support_helpers.py +4 -4
  263. inspect_ai/tool/_tool_with.py +14 -11
  264. inspect_ai/tool/_tools/_bash_session.py +11 -2
  265. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  266. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  267. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  268. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  269. inspect_ai/tool/_tools/_think.py +1 -1
  270. inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
  271. inspect_ai/util/__init__.py +2 -0
  272. inspect_ai/util/_anyio.py +27 -0
  273. inspect_ai/util/_sandbox/__init__.py +2 -1
  274. inspect_ai/util/_sandbox/context.py +32 -7
  275. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  276. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  277. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  278. inspect_ai/util/_store_model.py +30 -7
  279. inspect_ai/util/_subprocess.py +13 -3
  280. inspect_ai/util/_subtask.py +1 -0
  281. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  282. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
  283. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
  284. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  285. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  286. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  287. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  288. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  289. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  290. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  291. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  292. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  293. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  294. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  295. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  296. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  297. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -1,46 +1,19 @@
1
1
  import clsx from "clsx";
2
- import { highlightElement } from "prismjs";
3
- import { FC, memo, useEffect, useRef } from "react";
2
+ import { FC } from "react";
4
3
  import { MarkdownDiv } from "../../../components/MarkdownDiv";
5
4
 
5
+ import { usePrismHighlight } from "../../../state/hooks";
6
6
  import styles from "./ToolInput.module.css";
7
7
 
8
- export const useCodeHighlight = (language?: string) => {
9
- const codeRef = useRef<HTMLElement>(null);
10
-
11
- useEffect(() => {
12
- if (codeRef.current && language) {
13
- highlightElement(codeRef.current);
14
- }
15
- }, [language]);
16
-
17
- return codeRef;
18
- };
19
-
20
8
  interface ToolInputProps {
21
9
  highlightLanguage?: string;
22
10
  contents?: string | object;
23
11
  toolCallView?: { content: string };
24
12
  }
25
- export const ToolInput: FC<ToolInputProps> = memo((props) => {
13
+ export const ToolInput: FC<ToolInputProps> = (props) => {
26
14
  const { highlightLanguage, contents, toolCallView } = props;
27
15
 
28
- const codeRef = useCodeHighlight(highlightLanguage);
29
- const toolViewRef = useRef<HTMLDivElement>(null);
30
-
31
- useEffect(() => {
32
- if (toolCallView?.content && toolViewRef.current) {
33
- requestAnimationFrame(() => {
34
- const codeBlocks = toolViewRef.current!.querySelectorAll("pre code");
35
- codeBlocks.forEach((block) => {
36
- if (block.className.includes("language-")) {
37
- block.classList.add("sourceCode");
38
- highlightElement(block as HTMLElement);
39
- }
40
- });
41
- });
42
- }
43
- }, [toolCallView?.content]);
16
+ const prismParentRef = usePrismHighlight(toolCallView?.content);
44
17
 
45
18
  if (!contents && !toolCallView?.content) return null;
46
19
 
@@ -48,8 +21,8 @@ export const ToolInput: FC<ToolInputProps> = memo((props) => {
48
21
  return (
49
22
  <MarkdownDiv
50
23
  markdown={toolCallView.content}
51
- ref={toolViewRef}
52
- className={clsx("text-size-small", "tool-output")}
24
+ ref={prismParentRef}
25
+ className={clsx(styles.bottomPadding, "text-size-small", "tool-output")}
53
26
  />
54
27
  );
55
28
  }
@@ -58,18 +31,21 @@ export const ToolInput: FC<ToolInputProps> = memo((props) => {
58
31
  typeof contents === "object" ? JSON.stringify(contents) : contents;
59
32
 
60
33
  return (
61
- <pre className={clsx("tool-output", styles.outputPre, styles.bottomMargin)}>
62
- <code
63
- ref={codeRef}
64
- className={clsx(
65
- "source-code",
66
- "sourceCode",
67
- highlightLanguage ? `language-${highlightLanguage}` : undefined,
68
- styles.outputCode,
69
- )}
34
+ <div ref={prismParentRef}>
35
+ <pre
36
+ className={clsx("tool-output", styles.outputPre, styles.bottomMargin)}
70
37
  >
71
- {formattedContent}
72
- </code>
73
- </pre>
38
+ <code
39
+ className={clsx(
40
+ "source-code",
41
+ "sourceCode",
42
+ highlightLanguage ? `language-${highlightLanguage}` : undefined,
43
+ styles.outputCode,
44
+ )}
45
+ >
46
+ {formattedContent}
47
+ </code>
48
+ </pre>
49
+ </div>
74
50
  );
75
- });
51
+ };
@@ -15,14 +15,14 @@ import {
15
15
  export interface SamplesDescriptor {
16
16
  evalDescriptor: EvalDescriptor;
17
17
  messageShape: MessageShape;
18
- selectedScoreDescriptor?: ScoreDescriptor;
19
18
  selectedScore: (sample: BasicSampleData) => SelectedScore | undefined;
20
- selectedScorerDescriptor: (sample: BasicSampleData) => ScorerDescriptor;
19
+ selectedScorerDescriptor: (
20
+ sample: BasicSampleData,
21
+ ) => ScorerDescriptor | undefined;
21
22
  }
22
23
 
23
24
  export const createEvalDescriptor = (
24
25
  scores: ScoreLabel[],
25
- epochs: number,
26
26
  samples?: SampleSummary[],
27
27
  ): EvalDescriptor | undefined => {
28
28
  if (!samples) {
@@ -47,7 +47,13 @@ export const createEvalDescriptor = (
47
47
  sample.scores[scoreLabel.scorer] &&
48
48
  sample.scores[scoreLabel.scorer].value
49
49
  ) {
50
- return sample.scores[scoreLabel.scorer].value;
50
+ if (typeof sample.scores[scoreLabel.scorer].value === "object") {
51
+ return (
52
+ sample.scores[scoreLabel.scorer].value as Record<string, Value2>
53
+ )[scoreLabel.name];
54
+ } else {
55
+ return sample.scores[scoreLabel.scorer].value;
56
+ }
51
57
  } else if (sample.scores[scoreLabel.name]) {
52
58
  return sample.scores[scoreLabel.name].value;
53
59
  } else {
@@ -57,10 +63,10 @@ export const createEvalDescriptor = (
57
63
 
58
64
  const scoreAnswer = (
59
65
  sample: BasicSampleData,
60
- scorer: string,
66
+ scorer: ScoreLabel,
61
67
  ): string | undefined => {
62
68
  if (sample && sample.scores) {
63
- const sampleScore = sample.scores[scorer];
69
+ const sampleScore = sample.scores[scorer.name];
64
70
  if (sampleScore && sampleScore.answer) {
65
71
  return sampleScore.answer;
66
72
  }
@@ -162,7 +168,7 @@ export const createEvalDescriptor = (
162
168
  return "null";
163
169
  } else if (score === undefined) {
164
170
  return "";
165
- } else if (score && descriptor && descriptor.render) {
171
+ } else if (descriptor && descriptor.render) {
166
172
  return descriptor.render(score);
167
173
  } else {
168
174
  return <span>{String(score)}</span>;
@@ -181,7 +187,7 @@ export const createEvalDescriptor = (
181
187
  return scoreExplanation(sample, scoreLabel.scorer) || "";
182
188
  },
183
189
  answer: () => {
184
- return scoreAnswer(sample, scoreLabel.scorer) || "";
190
+ return scoreAnswer(sample, scoreLabel) || "";
185
191
  },
186
192
  scores: () => {
187
193
  if (!sample || !sample.scores) {
@@ -252,8 +258,11 @@ export const createEvalDescriptor = (
252
258
 
253
259
  const score = (
254
260
  sample: BasicSampleData,
255
- scoreLabel: ScoreLabel,
256
- ): SelectedScore => {
261
+ scoreLabel?: ScoreLabel,
262
+ ): SelectedScore | undefined => {
263
+ if (!scoreLabel) {
264
+ return undefined;
265
+ }
257
266
  return {
258
267
  value: scoreValue(sample, scoreLabel),
259
268
  render: () => {
@@ -263,8 +272,6 @@ export const createEvalDescriptor = (
263
272
  };
264
273
 
265
274
  return {
266
- epochs,
267
- samples,
268
275
  scores,
269
276
  scorerDescriptor,
270
277
  scoreDescriptor,
@@ -274,14 +281,17 @@ export const createEvalDescriptor = (
274
281
  };
275
282
 
276
283
  export const createSamplesDescriptor = (
284
+ samples: SampleSummary[],
277
285
  evalDescriptor: EvalDescriptor,
278
- selectedScore: ScoreLabel,
286
+ selectedScore?: ScoreLabel,
279
287
  ): SamplesDescriptor | undefined => {
280
288
  // Find the total length of the value so we can compute an average
281
- const sizes = evalDescriptor.samples.reduce(
289
+ const sizes = samples.reduce(
282
290
  (previous, current) => {
283
291
  const text = inputString(current.input).join(" ");
284
- const score = evalDescriptor.score(current, selectedScore);
292
+ const score = selectedScore
293
+ ? evalDescriptor.score(current, selectedScore)
294
+ : undefined;
285
295
  const scoreValue = score?.value;
286
296
  const scoreText = scoreValue
287
297
  ? String(scoreValue)
@@ -296,7 +306,9 @@ export const createSamplesDescriptor = (
296
306
  previous[2] = Math.min(
297
307
  Math.max(
298
308
  previous[2],
299
- evalDescriptor.scoreAnswer(current, selectedScore?.name)?.length || 0,
309
+ selectedScore
310
+ ? evalDescriptor.scoreAnswer(current, selectedScore)?.length || 0
311
+ : 0,
300
312
  ),
301
313
  300,
302
314
  );
@@ -322,7 +334,7 @@ export const createSamplesDescriptor = (
322
334
  answer: Math.min(sizes[2], 300),
323
335
  limit: Math.min(sizes[3], 50),
324
336
  id: Math.min(sizes[4], 10),
325
- score: Math.min(sizes[4], 30),
337
+ score: Math.min(sizes[5], 30),
326
338
  };
327
339
  const base =
328
340
  maxSizes.input +
@@ -353,10 +365,12 @@ export const createSamplesDescriptor = (
353
365
  return {
354
366
  evalDescriptor,
355
367
  messageShape,
356
- selectedScoreDescriptor: evalDescriptor.scoreDescriptor(selectedScore),
357
- selectedScore: (sample) => evalDescriptor.score(sample, selectedScore),
368
+ selectedScore: (sample) =>
369
+ selectedScore ? evalDescriptor.score(sample, selectedScore) : undefined,
358
370
  selectedScorerDescriptor: (sample) =>
359
- evalDescriptor.scorerDescriptor(sample, selectedScore),
371
+ selectedScore
372
+ ? evalDescriptor.scorerDescriptor(sample, selectedScore)
373
+ : undefined,
360
374
  };
361
375
  };
362
376
 
@@ -1,11 +1,11 @@
1
1
  .circle {
2
2
  font-family: "Consola Regular";
3
- width: 20px;
4
- height: 20px;
3
+ width: 40px;
4
+ height: 30px;
5
5
  display: inline-flex;
6
6
  justify-content: center;
7
7
  align-items: center;
8
- border-radius: 50%;
8
+ border-radius: 15px;
9
9
  padding-top: 1px;
10
10
  }
11
11
 
@@ -15,7 +15,7 @@ export const booleanScoreDescriptor = (): ScoreDescriptor => {
15
15
  className={clsx(
16
16
  styles.circle,
17
17
  "text-size-small",
18
- score ? "green" : "red",
18
+ score ? styles.green : styles.red,
19
19
  )}
20
20
  >
21
21
  {String(score)}
@@ -1,8 +1,8 @@
1
1
  .container {
2
- display: flex;
3
- flex-direction: column;
4
- align-items: center;
5
- margin-left: 0.5rem;
2
+ display: grid;
3
+ grid-template-columns: auto auto;
4
+ grid-auto-rows: auto;
5
+ column-gap: 1rem;
6
6
  }
7
7
 
8
8
  .padded {
@@ -35,7 +35,7 @@ export const objectScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
35
35
 
36
36
  const scores: JSX.Element[] = [];
37
37
  const keys = Object.keys(score);
38
- keys.forEach((key, index) => {
38
+ keys.forEach((key) => {
39
39
  if (typeof score !== "object" || Array.isArray(score)) {
40
40
  throw new Error(
41
41
  "Unexpected us of object score descriptor for non-score object",
@@ -50,22 +50,22 @@ export const objectScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
50
50
  : parseFloat(value === true ? "1" : value),
51
51
  )
52
52
  : String(value);
53
+
53
54
  scores.push(
54
- <div
55
- className={clsx(
56
- styles.container,
57
- index + 1 < keys.length ? styles.padded : undefined,
58
- )}
59
- >
55
+ <>
60
56
  <div className={clsx(styles.key, "text-size-smaller")}>{key}</div>
61
- <div className={clsx(styles.value, "text-size-title")}>
57
+ <div className={clsx(styles.value, "text-size-base")}>
62
58
  {formattedValue}
63
59
  </div>
64
- </div>,
60
+ </>,
65
61
  );
66
62
  });
67
63
 
68
- return scores;
64
+ return (
65
+ <div key={`score-value`} className={clsx(styles.container)}>
66
+ {scores}
67
+ </div>
68
+ );
69
69
  },
70
70
  };
71
71
  };
@@ -1,11 +1,9 @@
1
1
  import { ReactNode } from "react";
2
- import { BasicSampleData, SampleSummary } from "../../api/types";
2
+ import { BasicSampleData } from "../../api/types";
3
3
  import { ScoreLabel } from "../../types";
4
4
  import { Value2 } from "../../types/log";
5
5
 
6
6
  export interface EvalDescriptor {
7
- epochs: number;
8
- samples: SampleSummary[];
9
7
  scores: ScoreLabel[];
10
8
  scoreDescriptor: (scoreLabel: ScoreLabel) => ScoreDescriptor;
11
9
  scorerDescriptor: (
@@ -14,9 +12,12 @@ export interface EvalDescriptor {
14
12
  ) => ScorerDescriptor;
15
13
  score: (
16
14
  sample: BasicSampleData,
17
- scoreLabel: ScoreLabel,
15
+ scoreLabel?: ScoreLabel,
18
16
  ) => SelectedScore | undefined;
19
- scoreAnswer: (sample: BasicSampleData, scorer: string) => string | undefined;
17
+ scoreAnswer: (
18
+ sample: BasicSampleData,
19
+ scorer: ScoreLabel,
20
+ ) => string | undefined;
20
21
  }
21
22
 
22
23
  export interface ScorerDescriptor {
@@ -2,8 +2,27 @@
2
2
  border-top: solid var(--bs-light-border-subtle) 1px;
3
3
  background: var(--bs-light-bg-subtle);
4
4
  display: grid;
5
- grid-template-columns: max-content;
6
- justify-content: end;
7
- align-content: end;
5
+ grid-template-columns: max-content max-content;
6
+ justify-content: space-between;
7
+
8
8
  padding: 0.2em 1em;
9
9
  }
10
+
11
+ .spinnerContainer {
12
+ display: grid;
13
+ grid-template-columns: max-content max-content;
14
+ column-gap: 0.3em;
15
+ padding-top: 0.2em;
16
+ }
17
+
18
+ .spinner {
19
+ height: 11px;
20
+ width: 11px;
21
+ color: var(--bs-secondary);
22
+ border-width: 1px;
23
+ }
24
+
25
+ .label {
26
+ margin-left: 0.1em;
27
+ margin-top: -3px;
28
+ }
@@ -1,15 +1,40 @@
1
1
  interface SampleFooterProps {
2
2
  sampleCount: number;
3
+ totalSampleCount: number;
4
+ running: boolean;
3
5
  }
4
6
 
5
7
  import clsx from "clsx";
6
8
  import { FC } from "react";
7
9
  import styles from "./SampleFooter.module.css";
8
10
 
9
- export const SampleFooter: FC<SampleFooterProps> = ({ sampleCount }) => {
11
+ export const SampleFooter: FC<SampleFooterProps> = ({
12
+ sampleCount,
13
+ totalSampleCount,
14
+ running,
15
+ }) => {
10
16
  return (
11
17
  <div className={clsx("text-size-smaller", styles.footer)}>
12
- <div>{sampleCount} Samples</div>
18
+ <div>
19
+ {running ? (
20
+ <div className={clsx(styles.spinnerContainer)}>
21
+ <div
22
+ className={clsx("spinner-border", styles.spinner)}
23
+ role="status"
24
+ >
25
+ <span className={clsx("visually-hidden")}>Running...</span>
26
+ </div>
27
+ <div className={clsx("text-style-secondary", styles.label)}>
28
+ running...
29
+ </div>
30
+ </div>
31
+ ) : undefined}
32
+ </div>
33
+ <div>
34
+ {sampleCount < totalSampleCount
35
+ ? `${sampleCount} / ${totalSampleCount} Samples`
36
+ : `${sampleCount} Samples`}
37
+ </div>
13
38
  </div>
14
39
  );
15
40
  };