inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/eval.py +35 -2
  3. inspect_ai/_cli/util.py +44 -1
  4. inspect_ai/_display/core/config.py +1 -1
  5. inspect_ai/_display/core/display.py +13 -4
  6. inspect_ai/_display/core/results.py +1 -1
  7. inspect_ai/_display/textual/app.py +14 -3
  8. inspect_ai/_display/textual/display.py +4 -0
  9. inspect_ai/_display/textual/widgets/samples.py +9 -3
  10. inspect_ai/_display/textual/widgets/task_detail.py +8 -8
  11. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  12. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  13. inspect_ai/_eval/eval.py +74 -25
  14. inspect_ai/_eval/evalset.py +22 -18
  15. inspect_ai/_eval/loader.py +34 -11
  16. inspect_ai/_eval/run.py +13 -15
  17. inspect_ai/_eval/score.py +13 -3
  18. inspect_ai/_eval/task/generate.py +8 -9
  19. inspect_ai/_eval/task/log.py +55 -6
  20. inspect_ai/_eval/task/run.py +51 -10
  21. inspect_ai/_eval/task/task.py +23 -9
  22. inspect_ai/_util/constants.py +2 -0
  23. inspect_ai/_util/file.py +30 -1
  24. inspect_ai/_util/json.py +37 -1
  25. inspect_ai/_util/registry.py +1 -0
  26. inspect_ai/_util/vscode.py +37 -0
  27. inspect_ai/_view/server.py +113 -1
  28. inspect_ai/_view/www/App.css +7 -1
  29. inspect_ai/_view/www/dist/assets/index.css +813 -415
  30. inspect_ai/_view/www/dist/assets/index.js +54475 -32003
  31. inspect_ai/_view/www/eslint.config.mjs +1 -1
  32. inspect_ai/_view/www/log-schema.json +137 -31
  33. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  34. inspect_ai/_view/www/package.json +11 -2
  35. inspect_ai/_view/www/src/App.tsx +161 -853
  36. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  37. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  38. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  39. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  40. inspect_ai/_view/www/src/api/types.ts +107 -2
  41. inspect_ai/_view/www/src/appearance/icons.ts +2 -0
  42. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  43. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  46. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  47. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  48. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  49. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  50. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  51. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  52. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  53. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  54. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  55. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
  56. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  57. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  58. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  59. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  60. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  61. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  62. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  63. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  64. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  65. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  66. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  67. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  68. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  69. inspect_ai/_view/www/src/index.tsx +26 -94
  70. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  71. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  72. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  73. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  74. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  75. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  76. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  77. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  78. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
  79. inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
  80. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  81. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
  82. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
  83. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
  84. inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
  85. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  86. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  87. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  88. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
  89. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
  90. inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
  91. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  92. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
  93. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  94. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
  95. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  96. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  98. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
  99. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  100. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
  101. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
  102. inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
  103. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  104. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
  105. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  106. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  107. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  108. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  109. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  110. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
  111. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  112. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  113. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  114. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  115. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  116. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  117. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  118. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  119. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  120. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  121. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  122. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  123. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  124. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  125. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  126. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  127. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
  128. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  129. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  130. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  131. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  132. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
  133. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  134. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  135. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
  136. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  137. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  138. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  139. inspect_ai/_view/www/src/state/hooks.ts +399 -0
  140. inspect_ai/_view/www/src/state/logPolling.ts +200 -0
  141. inspect_ai/_view/www/src/state/logSlice.ts +224 -0
  142. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  143. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  144. inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
  145. inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
  146. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  147. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  148. inspect_ai/_view/www/src/state/store.ts +168 -0
  149. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  150. inspect_ai/_view/www/src/state/utils.ts +23 -0
  151. inspect_ai/_view/www/src/storage/index.ts +26 -0
  152. inspect_ai/_view/www/src/types/log.d.ts +36 -26
  153. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  154. inspect_ai/_view/www/src/types.ts +94 -32
  155. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  156. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  157. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  158. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  159. inspect_ai/_view/www/src/utils/react.ts +30 -0
  160. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  161. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
  162. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  163. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  164. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  165. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  166. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
  167. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
  168. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  169. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  170. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  171. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  172. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
  173. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  174. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  175. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  176. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  177. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  178. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  179. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  180. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  181. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
  182. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  183. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  184. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  185. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  186. inspect_ai/_view/www/vite.config.js +6 -0
  187. inspect_ai/_view/www/yarn.lock +464 -355
  188. inspect_ai/agent/__init__.py +36 -0
  189. inspect_ai/agent/_agent.py +268 -0
  190. inspect_ai/agent/_as_solver.py +72 -0
  191. inspect_ai/agent/_as_tool.py +122 -0
  192. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  193. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  194. inspect_ai/agent/_filter.py +46 -0
  195. inspect_ai/agent/_handoff.py +93 -0
  196. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  197. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  198. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  199. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  200. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  201. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  202. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  203. inspect_ai/agent/_react.py +241 -0
  204. inspect_ai/agent/_run.py +36 -0
  205. inspect_ai/agent/_types.py +81 -0
  206. inspect_ai/log/_condense.py +26 -0
  207. inspect_ai/log/_log.py +17 -5
  208. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  209. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  210. inspect_ai/log/_recorders/buffer/database.py +685 -0
  211. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  212. inspect_ai/log/_recorders/buffer/types.py +84 -0
  213. inspect_ai/log/_recorders/eval.py +2 -11
  214. inspect_ai/log/_recorders/types.py +30 -0
  215. inspect_ai/log/_transcript.py +32 -2
  216. inspect_ai/model/__init__.py +7 -1
  217. inspect_ai/model/_call_tools.py +257 -52
  218. inspect_ai/model/_chat_message.py +7 -4
  219. inspect_ai/model/_conversation.py +13 -62
  220. inspect_ai/model/_display.py +85 -0
  221. inspect_ai/model/_generate_config.py +2 -2
  222. inspect_ai/model/_model.py +114 -14
  223. inspect_ai/model/_model_output.py +14 -9
  224. inspect_ai/model/_openai.py +16 -4
  225. inspect_ai/model/_openai_computer_use.py +162 -0
  226. inspect_ai/model/_openai_responses.py +319 -165
  227. inspect_ai/model/_providers/anthropic.py +20 -21
  228. inspect_ai/model/_providers/azureai.py +24 -13
  229. inspect_ai/model/_providers/bedrock.py +1 -7
  230. inspect_ai/model/_providers/cloudflare.py +3 -3
  231. inspect_ai/model/_providers/goodfire.py +2 -6
  232. inspect_ai/model/_providers/google.py +11 -10
  233. inspect_ai/model/_providers/groq.py +6 -3
  234. inspect_ai/model/_providers/hf.py +7 -3
  235. inspect_ai/model/_providers/mistral.py +7 -10
  236. inspect_ai/model/_providers/openai.py +47 -17
  237. inspect_ai/model/_providers/openai_o1.py +11 -4
  238. inspect_ai/model/_providers/openai_responses.py +12 -14
  239. inspect_ai/model/_providers/providers.py +2 -2
  240. inspect_ai/model/_providers/together.py +12 -2
  241. inspect_ai/model/_providers/util/chatapi.py +7 -2
  242. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  243. inspect_ai/model/_providers/util/llama31.py +4 -2
  244. inspect_ai/model/_providers/vertex.py +11 -9
  245. inspect_ai/model/_providers/vllm.py +4 -4
  246. inspect_ai/scorer/__init__.py +2 -0
  247. inspect_ai/scorer/_metrics/__init__.py +2 -0
  248. inspect_ai/scorer/_metrics/grouped.py +84 -0
  249. inspect_ai/scorer/_score.py +26 -6
  250. inspect_ai/solver/__init__.py +2 -2
  251. inspect_ai/solver/_basic_agent.py +22 -9
  252. inspect_ai/solver/_bridge.py +31 -0
  253. inspect_ai/solver/_chain.py +20 -12
  254. inspect_ai/solver/_fork.py +5 -1
  255. inspect_ai/solver/_human_agent.py +52 -0
  256. inspect_ai/solver/_prompt.py +3 -1
  257. inspect_ai/solver/_run.py +59 -0
  258. inspect_ai/solver/_solver.py +14 -4
  259. inspect_ai/solver/_task_state.py +5 -3
  260. inspect_ai/tool/_tool_call.py +15 -8
  261. inspect_ai/tool/_tool_def.py +17 -12
  262. inspect_ai/tool/_tool_support_helpers.py +4 -4
  263. inspect_ai/tool/_tool_with.py +14 -11
  264. inspect_ai/tool/_tools/_bash_session.py +11 -2
  265. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  266. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  267. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  268. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  269. inspect_ai/tool/_tools/_think.py +1 -1
  270. inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
  271. inspect_ai/util/__init__.py +2 -0
  272. inspect_ai/util/_anyio.py +27 -0
  273. inspect_ai/util/_sandbox/__init__.py +2 -1
  274. inspect_ai/util/_sandbox/context.py +32 -7
  275. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  276. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  277. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  278. inspect_ai/util/_store_model.py +30 -7
  279. inspect_ai/util/_subprocess.py +13 -3
  280. inspect_ai/util/_subtask.py +1 -0
  281. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  282. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
  283. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
  284. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  285. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  286. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  287. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  288. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  289. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  290. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  291. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  292. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  293. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  294. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  295. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  296. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  297. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,18 @@
1
1
  import clsx from "clsx";
2
2
  import { MarkdownDiv } from "../components/MarkdownDiv";
3
- import { EvalSample, WorkingTime } from "../types/log";
3
+ import { EvalSample, Target, TotalTime, WorkingTime } from "../types/log";
4
4
  import { arrayToString, formatTime, inputString } from "../utils/format";
5
- import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
6
5
  import { FlatSampleError } from "./error/FlatSampleErrorView";
7
6
 
8
7
  import { FC, ReactNode } from "react";
8
+ import { SampleSummary } from "../api/types";
9
+ import { useSampleDescriptor, useScore } from "../state/hooks";
9
10
  import styles from "./SampleSummaryView.module.css";
11
+ import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
10
12
 
11
13
  interface SampleSummaryViewProps {
12
14
  parent_id: string;
13
- sample: EvalSample;
14
- sampleDescriptor: SamplesDescriptor;
15
+ sample: SampleSummary | EvalSample;
15
16
  }
16
17
 
17
18
  interface SummaryColumn {
@@ -23,14 +24,73 @@ interface SummaryColumn {
23
24
  title?: string;
24
25
  }
25
26
 
27
+ interface SampleFields {
28
+ id: string | number;
29
+ input: string[];
30
+ target: Target;
31
+ answer?: string;
32
+ limit?: string;
33
+ working_time?: WorkingTime;
34
+ total_time?: TotalTime;
35
+ error?: string;
36
+ }
37
+
38
+ function isEvalSample(
39
+ sample: SampleSummary | EvalSample,
40
+ ): sample is EvalSample {
41
+ return "choices" in sample && Array.isArray((sample as EvalSample).choices);
42
+ }
43
+
44
+ const resolveSample = (
45
+ sample: SampleSummary | EvalSample,
46
+ sampleDescriptor: SamplesDescriptor,
47
+ ): SampleFields => {
48
+ const input = inputString(sample.input);
49
+ if (isEvalSample(sample) && sample.choices && sample.choices.length > 0) {
50
+ input.push("");
51
+ input.push(
52
+ ...sample.choices.map((choice, index) => {
53
+ return `${String.fromCharCode(65 + index)}) ${choice}`;
54
+ }),
55
+ );
56
+ }
57
+
58
+ const target = sample.target;
59
+ const answer =
60
+ sample && sampleDescriptor
61
+ ? sampleDescriptor.selectedScorerDescriptor(sample)?.answer()
62
+ : undefined;
63
+ const limit = isEvalSample(sample) ? sample.limit?.type : undefined;
64
+ const working_time = isEvalSample(sample) ? sample.working_time : undefined;
65
+ const total_time = isEvalSample(sample) ? sample.total_time : undefined;
66
+ const error = isEvalSample(sample) ? sample.error?.message : undefined;
67
+
68
+ return {
69
+ id: sample.id,
70
+ input,
71
+ target,
72
+ answer,
73
+ limit,
74
+ working_time,
75
+ total_time,
76
+ error,
77
+ };
78
+ };
79
+
26
80
  /**
27
81
  * Component to display a sample with relevant context and visibility control.
28
82
  */
29
83
  export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
30
84
  parent_id,
31
85
  sample,
32
- sampleDescriptor,
33
86
  }) => {
87
+ const sampleDescriptor = useSampleDescriptor();
88
+ const currentScore = useScore();
89
+ if (!sampleDescriptor) {
90
+ return undefined;
91
+ }
92
+ const fields = resolveSample(sample, sampleDescriptor);
93
+
34
94
  const input =
35
95
  sampleDescriptor?.messageShape.normalized.input > 0
36
96
  ? Math.max(0.15, sampleDescriptor.messageShape.normalized.input)
@@ -47,43 +107,33 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
47
107
  sampleDescriptor?.messageShape.normalized.limit > 0
48
108
  ? Math.max(0.15, sampleDescriptor.messageShape.normalized.limit)
49
109
  : 0;
50
- const timeSize = sample.working_time || sample.total_time ? 0.15 : 0;
110
+ const timeSize = fields.working_time || fields.total_time ? 0.15 : 0;
51
111
  const idSize = Math.max(
52
112
  2,
53
113
  Math.min(10, sampleDescriptor?.messageShape.raw.id),
54
114
  );
55
115
 
56
- const scoreInput = inputString(sample.input);
57
- if (sample.choices && sample.choices.length > 0) {
58
- scoreInput.push("");
59
- scoreInput.push(
60
- ...sample.choices.map((choice, index) => {
61
- return `${String.fromCharCode(65 + index)}) ${choice}`;
62
- }),
63
- );
64
- }
65
-
66
116
  // The columns for the sample
67
117
  const columns: SummaryColumn[] = [];
68
118
  columns.push({
69
119
  label: "Id",
70
- value: sample.id,
120
+ value: fields.id,
71
121
  size: `${idSize}em`,
72
122
  });
73
123
 
74
124
  columns.push({
75
125
  label: "Input",
76
- value: scoreInput,
126
+ value: <MarkdownDiv markdown={fields.input.join(" ")} />,
77
127
  size: `${input}fr`,
78
128
  clamp: true,
79
129
  });
80
130
 
81
- if (sample.target) {
131
+ if (fields.target) {
82
132
  columns.push({
83
133
  label: "Target",
84
134
  value: (
85
135
  <MarkdownDiv
86
- markdown={arrayToString(arrayToString(sample?.target || "none"))}
136
+ markdown={arrayToString(fields?.target || "none")}
87
137
  className={clsx("no-last-para-padding", styles.target)}
88
138
  />
89
139
  ),
@@ -92,16 +142,12 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
92
142
  });
93
143
  }
94
144
 
95
- const fullAnswer =
96
- sample && sampleDescriptor
97
- ? sampleDescriptor.selectedScorerDescriptor(sample).answer()
98
- : undefined;
99
- if (fullAnswer) {
145
+ if (fields.answer) {
100
146
  columns.push({
101
147
  label: "Answer",
102
148
  value: sample ? (
103
149
  <MarkdownDiv
104
- markdown={fullAnswer}
150
+ markdown={fields.answer}
105
151
  className={clsx("no-last-para-padding", styles.answer)}
106
152
  />
107
153
  ) : (
@@ -119,20 +165,20 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
119
165
  return `Working time: ${formatTime(working_time)}`;
120
166
  };
121
167
 
122
- if (sample.total_time) {
168
+ if (fields.total_time) {
123
169
  columns.push({
124
170
  label: "Time",
125
- value: formatTime(sample.total_time),
171
+ value: formatTime(fields.total_time),
126
172
  size: `${timeSize}fr`,
127
173
  center: true,
128
- title: toolTip(sample.working_time),
174
+ title: toolTip(fields.working_time),
129
175
  });
130
176
  }
131
177
 
132
- if (sample?.limit && limitSize > 0) {
178
+ if (fields?.limit && limitSize > 0) {
133
179
  columns.push({
134
180
  label: "Limit",
135
- value: sample.limit.type,
181
+ value: fields.limit,
136
182
  size: `${limitSize}fr`,
137
183
  center: true,
138
184
  });
@@ -140,11 +186,11 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
140
186
 
141
187
  columns.push({
142
188
  label: "Score",
143
- value: sample.error ? (
144
- <FlatSampleError message={sample.error.message} />
189
+ value: fields.error ? (
190
+ <FlatSampleError message={fields.error} />
145
191
  ) : (
146
- // TODO: Cleanup once the PR lands which makes sample / sample summary share common interface
147
- sampleDescriptor?.selectedScore(sample)?.render() || ""
192
+ sampleDescriptor?.evalDescriptor.score(sample, currentScore)?.render() ||
193
+ ""
148
194
  ),
149
195
  size: "minmax(2em, 30em)",
150
196
  center: true,
@@ -184,9 +230,10 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
184
230
  <div
185
231
  key={`sample-summ-val-${idx}`}
186
232
  className={clsx(
233
+ styles.value,
187
234
  styles.wrap,
188
235
  col.clamp ? "three-line-clamp" : undefined,
189
- col.center ? styles.centerLabel : undefined,
236
+ col.center ? styles.centerValue : undefined,
190
237
  )}
191
238
  >
192
239
  {col.value}
@@ -1,47 +1,40 @@
1
1
  import { FC } from "react";
2
2
  import { Fragment } from "react/jsx-runtime";
3
- import { ScoreFilter, ScoreLabel } from "../types";
4
- import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
3
+ import { SampleSummary } from "../api/types";
4
+ import { useScore, useScores } from "../state/hooks";
5
+ import { useStore } from "../state/store";
5
6
  import { EpochFilter } from "./sample-tools/EpochFilter";
6
7
  import { SampleFilter } from "./sample-tools/sample-filter/SampleFilter";
7
8
  import { SelectScorer } from "./sample-tools/SelectScorer";
8
9
  import { SortFilter } from "./sample-tools/SortFilter";
9
10
 
10
11
  interface SampleToolsProps {
11
- epoch: string;
12
- setEpoch: (epoch: string) => void;
13
- epochs: number;
14
- scoreFilter: ScoreFilter;
15
- setScoreFilter: (filter: ScoreFilter) => void;
16
- sort: string;
17
- setSort: (sort: string) => void;
18
- score?: ScoreLabel;
19
- setScore: (score: ScoreLabel) => void;
20
- scores: ScoreLabel[];
21
- sampleDescriptor: SamplesDescriptor;
12
+ samples: SampleSummary[];
22
13
  }
23
14
 
24
- export const SampleTools: FC<SampleToolsProps> = ({
25
- epoch,
26
- setEpoch,
27
- epochs,
28
- scoreFilter,
29
- setScoreFilter,
30
- sort,
31
- setSort,
32
- score,
33
- setScore,
34
- scores,
35
- sampleDescriptor,
36
- }) => {
15
+ export const SampleTools: FC<SampleToolsProps> = ({ samples }) => {
16
+ const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
17
+
18
+ const filter = useStore((state) => state.log.filter);
19
+ const setFilter = useStore((state) => state.logActions.setFilter);
20
+
21
+ const scores = useScores();
22
+ const score = useScore();
23
+ const setScore = useStore((state) => state.logActions.setScore);
24
+ const epoch = useStore((state) => state.log.epoch);
25
+ const setEpoch = useStore((state) => state.logActions.setEpoch);
26
+ const sort = useStore((state) => state.log.sort);
27
+ const setSort = useStore((state) => state.logActions.setSort);
28
+
29
+ const epochs = selectedLogSummary?.eval.config.epochs || 1;
37
30
  return (
38
31
  <Fragment>
39
32
  <SampleFilter
40
- evalDescriptor={sampleDescriptor.evalDescriptor}
41
- scoreFilter={scoreFilter}
42
- setScoreFilter={setScoreFilter}
33
+ samples={samples}
34
+ scoreFilter={filter}
35
+ setScoreFilter={setFilter}
43
36
  />
44
- {scores.length > 1 ? (
37
+ {scores?.length > 1 ? (
45
38
  <SelectScorer scores={scores} score={score} setScore={setScore} />
46
39
  ) : undefined}
47
40
  {epochs > 1 ? (
@@ -51,3 +44,15 @@ export const SampleTools: FC<SampleToolsProps> = ({
51
44
  </Fragment>
52
45
  );
53
46
  };
47
+
48
+ interface ScoreFilterToolsProps {}
49
+
50
+ export const ScoreFilterTools: FC<ScoreFilterToolsProps> = () => {
51
+ const scores = useScores();
52
+ const score = useScore();
53
+ const setScore = useStore((state) => state.logActions.setScore);
54
+ if (scores.length <= 1) {
55
+ return undefined;
56
+ }
57
+ return <SelectScorer scores={scores} score={score} setScore={setScore} />;
58
+ };
@@ -46,8 +46,9 @@ export const ChatMessage: FC<ChatMessageProps> = ({
46
46
  indented ? styles.indented : undefined,
47
47
  )}
48
48
  >
49
- <ExpandablePanel collapse={collapse} lines={30}>
49
+ <ExpandablePanel id={`${id}-message`} collapse={collapse} lines={30}>
50
50
  <MessageContents
51
+ id={`${id}-contents`}
51
52
  key={`${id}-contents`}
52
53
  message={message}
53
54
  toolMessages={toolMessages}
@@ -26,7 +26,7 @@ export const ChatMessageRenderer: ContentRenderer = {
26
26
  render: (id, entry) => {
27
27
  return {
28
28
  rendered: (
29
- <NavPills>
29
+ <NavPills id={`${id}-navpills`}>
30
30
  <ChatSummary title="Last Turn" id={id} messages={entry.value} />
31
31
  <ChatView title="All" id={id} messages={entry.value} />
32
32
  </NavPills>
@@ -1,73 +1,65 @@
1
- import { FC, RefObject, useState } from "react";
1
+ import { FC, memo, ReactNode, RefObject, useMemo } from "react";
2
2
  import { Messages } from "../../types/log";
3
3
 
4
- import clsx from "clsx";
5
- import { Virtuoso } from "react-virtuoso";
6
4
  import { ChatMessageRow } from "./ChatMessageRow";
7
5
  import { ResolvedMessage, resolveMessages } from "./messages";
8
6
 
9
- import styles from "./ChatViewVirtualList.module.css";
7
+ import { LiveVirtualList } from "../../components/LiveVirtualList";
10
8
 
11
9
  interface ChatViewVirtualListProps {
12
- id?: string;
10
+ id: string;
11
+ className?: string | string[];
13
12
  messages: Messages;
14
13
  toolCallStyle: "compact" | "complete";
15
- className?: string | string[];
16
14
  indented: boolean;
17
15
  numbered?: boolean;
18
- scrollRef?: RefObject<HTMLElement | null>;
16
+ scrollRef?: RefObject<HTMLDivElement | null>;
17
+ running?: boolean;
19
18
  }
20
19
 
21
20
  /**
22
21
  * Renders the ChatViewVirtualList component.
23
22
  */
24
- export const ChatViewVirtualList: FC<ChatViewVirtualListProps> = ({
25
- id,
26
- messages,
27
- toolCallStyle,
28
- className,
29
- indented,
30
- numbered = true,
31
- scrollRef,
32
- }) => {
33
- const collapsedMessages = resolveMessages(messages);
34
- const [followOutput, setFollowOutput] = useState(false);
23
+ export const ChatViewVirtualList: FC<ChatViewVirtualListProps> = memo(
24
+ ({
25
+ id,
26
+ messages,
27
+ className,
28
+ toolCallStyle,
29
+ indented,
30
+ numbered = true,
31
+ scrollRef,
32
+ running,
33
+ }) => {
34
+ const collapsedMessages = useMemo(() => {
35
+ return resolveMessages(messages);
36
+ }, [messages]);
37
+
38
+ const renderRow = (index: number, item: ResolvedMessage): ReactNode => {
39
+ const number =
40
+ collapsedMessages.length > 1 && numbered ? index + 1 : undefined;
41
+
42
+ return (
43
+ <ChatMessageRow
44
+ parentName={id || "chat-virtual-list"}
45
+ number={number}
46
+ resolvedMessage={item}
47
+ indented={indented}
48
+ toolCallStyle={toolCallStyle}
49
+ />
50
+ );
51
+ };
35
52
 
36
- const renderRow = (item: ResolvedMessage, index: number) => {
37
- const number =
38
- collapsedMessages.length > 1 && numbered ? index + 1 : undefined;
39
53
  return (
40
- <ChatMessageRow
41
- parentName={id || "chat-virtual-list"}
42
- number={number}
43
- resolvedMessage={item}
44
- indented={indented}
45
- toolCallStyle={toolCallStyle}
54
+ <LiveVirtualList<ResolvedMessage>
55
+ id="chat-virtual-list"
56
+ className={className}
57
+ scrollRef={scrollRef}
58
+ data={collapsedMessages}
59
+ renderRow={renderRow}
60
+ live={running}
61
+ showProgress={running}
46
62
  />
47
63
  );
48
- };
49
-
50
- const result = (
51
- <Virtuoso
52
- customScrollParent={scrollRef?.current ? scrollRef.current : undefined}
53
- style={{ height: "100%", width: "100%" }}
54
- data={collapsedMessages}
55
- itemContent={(index: number, data: ResolvedMessage) => {
56
- return renderRow(data, index);
57
- }}
58
- increaseViewportBy={{ top: 1000, bottom: 1000 }}
59
- overscan={{
60
- main: 10,
61
- reverse: 10,
62
- }}
63
- followOutput={followOutput}
64
- atBottomStateChange={(atBottom: boolean) => {
65
- setFollowOutput(atBottom);
66
- }}
67
- skipAnimationFrameInResizeObserver={true}
68
- className={clsx(styles.list, className)}
69
- />
70
- );
71
-
72
- return result;
73
- };
64
+ },
65
+ );
@@ -52,6 +52,7 @@ export const MessageContent: FC<MessageContentProps> = ({ contents }) => {
52
52
  {
53
53
  type: "text",
54
54
  text: content,
55
+ refusal: null,
55
56
  },
56
57
  index === contents.length - 1,
57
58
  );
@@ -75,6 +76,7 @@ export const MessageContent: FC<MessageContentProps> = ({ contents }) => {
75
76
  const contentText: ContentText = {
76
77
  type: "text",
77
78
  text: contents,
79
+ refusal: null,
78
80
  };
79
81
  return messageRenderers["text"].render(
80
82
  "text-message-content",
@@ -104,6 +106,9 @@ const messageRenderers: Record<string, MessageRenderer> = {
104
106
  reasoning: {
105
107
  render: (key, content, isLast) => {
106
108
  const r = content as ContentReasoning;
109
+ if (!r.reasoning && !r.redacted) {
110
+ return undefined;
111
+ }
107
112
  return (
108
113
  <Fragment key={key}>
109
114
  <div
@@ -115,7 +120,7 @@ const messageRenderers: Record<string, MessageRenderer> = {
115
120
  >
116
121
  Reasoning
117
122
  </div>
118
- <ExpandablePanel collapse={true}>
123
+ <ExpandablePanel id={`${key}-reasoning`} collapse={true}>
119
124
  <MarkdownDiv
120
125
  markdown={
121
126
  r.redacted
@@ -13,12 +13,14 @@ import { ContentTool } from "../../types";
13
13
  import styles from "./MessageContents.module.css";
14
14
 
15
15
  interface MessageContentsProps {
16
+ id: string;
16
17
  message: ChatMessageAssistant | ChatMessageSystem | ChatMessageUser;
17
18
  toolMessages: ChatMessageTool[];
18
19
  toolCallStyle: "compact" | "complete";
19
20
  }
20
21
 
21
22
  export const MessageContents: FC<MessageContentsProps> = ({
23
+ id,
22
24
  message,
23
25
  toolMessages,
24
26
  toolCallStyle,
@@ -56,6 +58,7 @@ export const MessageContents: FC<MessageContentsProps> = ({
56
58
  } else {
57
59
  return (
58
60
  <ToolCallView
61
+ id={`${id}-tool-call`}
59
62
  key={`tool-call-${idx}`}
60
63
  functionCall={functionCall}
61
64
  input={input}
@@ -98,6 +101,7 @@ const resolveToolMessage = (toolMessage?: ChatMessageTool): ContentTool[] => {
98
101
  {
99
102
  type: "text",
100
103
  text: content,
104
+ refusal: null,
101
105
  },
102
106
  ],
103
107
  },
@@ -112,6 +116,7 @@ const resolveToolMessage = (toolMessage?: ChatMessageTool): ContentTool[] => {
112
116
  {
113
117
  type: "text",
114
118
  text: con,
119
+ refusal: null,
115
120
  },
116
121
  ],
117
122
  } as ContentTool;
@@ -9,6 +9,7 @@ import {
9
9
  ContentReasoning,
10
10
  ContentText,
11
11
  ContentVideo,
12
+ Events,
12
13
  Messages,
13
14
  } from "../../types/log";
14
15
 
@@ -65,9 +66,11 @@ export const resolveMessages = (messages: Messages) => {
65
66
  }
66
67
 
67
68
  const systemMessage: ChatMessageSystem = {
69
+ id: "sys-message-6815A84B062A",
68
70
  role: "system",
69
71
  content: systemContent,
70
72
  source: "input",
73
+ internal: null,
71
74
  };
72
75
 
73
76
  // Converge them
@@ -118,8 +121,41 @@ const normalizeContent = (
118
121
  return {
119
122
  type: "text",
120
123
  text: content,
124
+ refusal: null,
121
125
  };
122
126
  } else {
123
127
  return content;
124
128
  }
125
129
  };
130
+
131
+ export const messagesFromEvents = (runningEvents: Events): Messages => {
132
+ const messages: Map<
133
+ string,
134
+ ChatMessageSystem | ChatMessageUser | ChatMessageAssistant | ChatMessageTool
135
+ > = new Map();
136
+
137
+ runningEvents
138
+ .filter((e) => e.event === "model")
139
+ .forEach((e) => {
140
+ for (const m of e.input) {
141
+ const inputMessage = m as
142
+ | ChatMessageSystem
143
+ | ChatMessageUser
144
+ | ChatMessageAssistant
145
+ | ChatMessageTool;
146
+ if (inputMessage.id && !messages.has(inputMessage.id)) {
147
+ messages.set(inputMessage.id, inputMessage);
148
+ }
149
+ }
150
+ const outputMessage = e.output.choices[0].message;
151
+ if (outputMessage.id) {
152
+ messages.set(outputMessage.id, outputMessage);
153
+ }
154
+ });
155
+
156
+ if (messages.size > 0) {
157
+ return messages.values().toArray();
158
+ } else {
159
+ return [];
160
+ }
161
+ };
@@ -0,0 +1,3 @@
1
+ .output {
2
+ padding-top: 1em;
3
+ }
@@ -10,10 +10,12 @@ import {
10
10
  ToolCallContent,
11
11
  } from "../../../types/log";
12
12
  import { MessageContent } from "../MessageContent";
13
+ import styles from "./ToolCallView.module.css";
13
14
  import { ToolInput } from "./ToolInput";
14
15
  import { ToolTitle } from "./ToolTitle";
15
16
 
16
17
  interface ToolCallViewProps {
18
+ id: string;
17
19
  functionCall: string;
18
20
  input?: string;
19
21
  highlightLanguage?: string;
@@ -43,6 +45,7 @@ interface ToolCallViewProps {
43
45
  * Renders the ToolCallView component.
44
46
  */
45
47
  export const ToolCallView: FC<ToolCallViewProps> = ({
48
+ id,
46
49
  functionCall,
47
50
  input,
48
51
  highlightLanguage,
@@ -116,7 +119,13 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
116
119
  toolCallView={view}
117
120
  />
118
121
  {hasContent ? (
119
- <ExpandablePanel collapse={collapse} border={true} lines={15}>
122
+ <ExpandablePanel
123
+ id={`${id}-tool-input`}
124
+ collapse={collapse}
125
+ border={true}
126
+ lines={15}
127
+ className={styles.output}
128
+ >
120
129
  <MessageContent contents={normalizedContent} />
121
130
  </ExpandablePanel>
122
131
  ) : undefined}
@@ -166,6 +175,7 @@ const normalizeContent = (
166
175
  {
167
176
  type: "text",
168
177
  text: String(output),
178
+ refusal: null,
169
179
  },
170
180
  ],
171
181
  },