inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/eval.py +35 -2
  3. inspect_ai/_cli/util.py +44 -1
  4. inspect_ai/_display/core/config.py +1 -1
  5. inspect_ai/_display/core/display.py +13 -4
  6. inspect_ai/_display/core/results.py +1 -1
  7. inspect_ai/_display/textual/app.py +14 -3
  8. inspect_ai/_display/textual/display.py +4 -0
  9. inspect_ai/_display/textual/widgets/samples.py +9 -3
  10. inspect_ai/_display/textual/widgets/task_detail.py +8 -8
  11. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  12. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  13. inspect_ai/_eval/eval.py +74 -25
  14. inspect_ai/_eval/evalset.py +22 -18
  15. inspect_ai/_eval/loader.py +34 -11
  16. inspect_ai/_eval/run.py +13 -15
  17. inspect_ai/_eval/score.py +13 -3
  18. inspect_ai/_eval/task/generate.py +8 -9
  19. inspect_ai/_eval/task/log.py +55 -6
  20. inspect_ai/_eval/task/run.py +51 -10
  21. inspect_ai/_eval/task/task.py +23 -9
  22. inspect_ai/_util/constants.py +2 -0
  23. inspect_ai/_util/file.py +30 -1
  24. inspect_ai/_util/json.py +37 -1
  25. inspect_ai/_util/registry.py +1 -0
  26. inspect_ai/_util/vscode.py +37 -0
  27. inspect_ai/_view/server.py +113 -1
  28. inspect_ai/_view/www/App.css +7 -1
  29. inspect_ai/_view/www/dist/assets/index.css +813 -415
  30. inspect_ai/_view/www/dist/assets/index.js +54475 -32003
  31. inspect_ai/_view/www/eslint.config.mjs +1 -1
  32. inspect_ai/_view/www/log-schema.json +137 -31
  33. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  34. inspect_ai/_view/www/package.json +11 -2
  35. inspect_ai/_view/www/src/App.tsx +161 -853
  36. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  37. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  38. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  39. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  40. inspect_ai/_view/www/src/api/types.ts +107 -2
  41. inspect_ai/_view/www/src/appearance/icons.ts +2 -0
  42. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  43. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  46. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  47. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  48. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  49. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  50. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  51. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  52. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  53. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  54. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  55. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
  56. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  57. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  58. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  59. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  60. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  61. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  62. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  63. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  64. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  65. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  66. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  67. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  68. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  69. inspect_ai/_view/www/src/index.tsx +26 -94
  70. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  71. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  72. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  73. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  74. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  75. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  76. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  77. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  78. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
  79. inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
  80. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  81. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
  82. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
  83. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
  84. inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
  85. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  86. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  87. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  88. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
  89. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
  90. inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
  91. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  92. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
  93. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  94. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
  95. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  96. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  98. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
  99. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  100. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
  101. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
  102. inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
  103. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  104. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
  105. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  106. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  107. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  108. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  109. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  110. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
  111. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  112. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  113. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  114. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  115. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  116. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  117. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  118. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  119. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  120. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  121. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  122. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  123. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  124. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  125. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  126. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  127. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
  128. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  129. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  130. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  131. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  132. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
  133. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  134. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  135. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
  136. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  137. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  138. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  139. inspect_ai/_view/www/src/state/hooks.ts +399 -0
  140. inspect_ai/_view/www/src/state/logPolling.ts +200 -0
  141. inspect_ai/_view/www/src/state/logSlice.ts +224 -0
  142. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  143. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  144. inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
  145. inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
  146. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  147. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  148. inspect_ai/_view/www/src/state/store.ts +168 -0
  149. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  150. inspect_ai/_view/www/src/state/utils.ts +23 -0
  151. inspect_ai/_view/www/src/storage/index.ts +26 -0
  152. inspect_ai/_view/www/src/types/log.d.ts +36 -26
  153. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  154. inspect_ai/_view/www/src/types.ts +94 -32
  155. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  156. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  157. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  158. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  159. inspect_ai/_view/www/src/utils/react.ts +30 -0
  160. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  161. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
  162. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  163. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  164. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  165. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  166. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
  167. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
  168. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  169. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  170. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  171. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  172. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
  173. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  174. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  175. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  176. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  177. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  178. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  179. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  180. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  181. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
  182. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  183. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  184. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  185. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  186. inspect_ai/_view/www/vite.config.js +6 -0
  187. inspect_ai/_view/www/yarn.lock +464 -355
  188. inspect_ai/agent/__init__.py +36 -0
  189. inspect_ai/agent/_agent.py +268 -0
  190. inspect_ai/agent/_as_solver.py +72 -0
  191. inspect_ai/agent/_as_tool.py +122 -0
  192. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  193. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  194. inspect_ai/agent/_filter.py +46 -0
  195. inspect_ai/agent/_handoff.py +93 -0
  196. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  197. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  198. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  199. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  200. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  201. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  202. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  203. inspect_ai/agent/_react.py +241 -0
  204. inspect_ai/agent/_run.py +36 -0
  205. inspect_ai/agent/_types.py +81 -0
  206. inspect_ai/log/_condense.py +26 -0
  207. inspect_ai/log/_log.py +17 -5
  208. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  209. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  210. inspect_ai/log/_recorders/buffer/database.py +685 -0
  211. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  212. inspect_ai/log/_recorders/buffer/types.py +84 -0
  213. inspect_ai/log/_recorders/eval.py +2 -11
  214. inspect_ai/log/_recorders/types.py +30 -0
  215. inspect_ai/log/_transcript.py +32 -2
  216. inspect_ai/model/__init__.py +7 -1
  217. inspect_ai/model/_call_tools.py +257 -52
  218. inspect_ai/model/_chat_message.py +7 -4
  219. inspect_ai/model/_conversation.py +13 -62
  220. inspect_ai/model/_display.py +85 -0
  221. inspect_ai/model/_generate_config.py +2 -2
  222. inspect_ai/model/_model.py +114 -14
  223. inspect_ai/model/_model_output.py +14 -9
  224. inspect_ai/model/_openai.py +16 -4
  225. inspect_ai/model/_openai_computer_use.py +162 -0
  226. inspect_ai/model/_openai_responses.py +319 -165
  227. inspect_ai/model/_providers/anthropic.py +20 -21
  228. inspect_ai/model/_providers/azureai.py +24 -13
  229. inspect_ai/model/_providers/bedrock.py +1 -7
  230. inspect_ai/model/_providers/cloudflare.py +3 -3
  231. inspect_ai/model/_providers/goodfire.py +2 -6
  232. inspect_ai/model/_providers/google.py +11 -10
  233. inspect_ai/model/_providers/groq.py +6 -3
  234. inspect_ai/model/_providers/hf.py +7 -3
  235. inspect_ai/model/_providers/mistral.py +7 -10
  236. inspect_ai/model/_providers/openai.py +47 -17
  237. inspect_ai/model/_providers/openai_o1.py +11 -4
  238. inspect_ai/model/_providers/openai_responses.py +12 -14
  239. inspect_ai/model/_providers/providers.py +2 -2
  240. inspect_ai/model/_providers/together.py +12 -2
  241. inspect_ai/model/_providers/util/chatapi.py +7 -2
  242. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  243. inspect_ai/model/_providers/util/llama31.py +4 -2
  244. inspect_ai/model/_providers/vertex.py +11 -9
  245. inspect_ai/model/_providers/vllm.py +4 -4
  246. inspect_ai/scorer/__init__.py +2 -0
  247. inspect_ai/scorer/_metrics/__init__.py +2 -0
  248. inspect_ai/scorer/_metrics/grouped.py +84 -0
  249. inspect_ai/scorer/_score.py +26 -6
  250. inspect_ai/solver/__init__.py +2 -2
  251. inspect_ai/solver/_basic_agent.py +22 -9
  252. inspect_ai/solver/_bridge.py +31 -0
  253. inspect_ai/solver/_chain.py +20 -12
  254. inspect_ai/solver/_fork.py +5 -1
  255. inspect_ai/solver/_human_agent.py +52 -0
  256. inspect_ai/solver/_prompt.py +3 -1
  257. inspect_ai/solver/_run.py +59 -0
  258. inspect_ai/solver/_solver.py +14 -4
  259. inspect_ai/solver/_task_state.py +5 -3
  260. inspect_ai/tool/_tool_call.py +15 -8
  261. inspect_ai/tool/_tool_def.py +17 -12
  262. inspect_ai/tool/_tool_support_helpers.py +4 -4
  263. inspect_ai/tool/_tool_with.py +14 -11
  264. inspect_ai/tool/_tools/_bash_session.py +11 -2
  265. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  266. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  267. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  268. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  269. inspect_ai/tool/_tools/_think.py +1 -1
  270. inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
  271. inspect_ai/util/__init__.py +2 -0
  272. inspect_ai/util/_anyio.py +27 -0
  273. inspect_ai/util/_sandbox/__init__.py +2 -1
  274. inspect_ai/util/_sandbox/context.py +32 -7
  275. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  276. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  277. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  278. inspect_ai/util/_store_model.py +30 -7
  279. inspect_ai/util/_subprocess.py +13 -3
  280. inspect_ai/util/_subtask.py +1 -0
  281. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  282. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
  283. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
  284. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  285. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  286. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  287. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  288. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  289. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  290. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  291. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  292. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  293. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  294. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  295. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  296. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  297. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,12 @@
1
- import { Capabilities } from "../types";
2
1
  import { asyncJsonParse } from "../utils/json-worker";
3
2
  import { download_file } from "./api-shared";
4
- import { LogContents, LogViewAPI } from "./types";
3
+ import {
4
+ Capabilities,
5
+ LogContents,
6
+ LogViewAPI,
7
+ PendingSampleResponse,
8
+ SampleDataResponse,
9
+ } from "./types";
5
10
 
6
11
  const loaded_time = Date.now();
7
12
  let last_eval_time = 0;
@@ -49,24 +54,188 @@ async function eval_log_headers(files: string[]) {
49
54
  return (await api("GET", `/api/log-headers?${params.toString()}`)).parsed;
50
55
  }
51
56
 
57
+ async function eval_pending_samples(
58
+ log_file: string,
59
+ etag?: string,
60
+ ): Promise<PendingSampleResponse> {
61
+ // Attach the log file
62
+ const params = new URLSearchParams();
63
+ params.append("log", log_file);
64
+
65
+ // Send the etag along
66
+ const headers: Record<string, string> = {};
67
+ if (etag) {
68
+ headers["If-None-Match"] = etag;
69
+ }
70
+
71
+ // Build up the request
72
+ const request: Request<PendingSampleResponse> = {
73
+ headers,
74
+ parse: async (text: string) => {
75
+ const pendingSamples = await asyncJsonParse(text);
76
+ return {
77
+ status: "OK",
78
+ pendingSamples,
79
+ };
80
+ },
81
+ handleError: (status: number) => {
82
+ if (status === 404) {
83
+ return {
84
+ status: "NotFound",
85
+ };
86
+ } else if (status === 304) {
87
+ return {
88
+ status: "NotModified",
89
+ };
90
+ }
91
+ },
92
+ };
93
+ // Fetch the result
94
+ const result = (
95
+ await apiRequest<PendingSampleResponse>(
96
+ "GET",
97
+ `/api/pending-samples?${params.toString()}`,
98
+ request,
99
+ )
100
+ ).parsed;
101
+
102
+ return result;
103
+ }
104
+
105
+ async function eval_log_sample_data(
106
+ log_file: string,
107
+ id: string | number,
108
+ epoch: number,
109
+ last_event?: number,
110
+ last_attachment?: number,
111
+ ): Promise<SampleDataResponse | undefined> {
112
+ const params = new URLSearchParams();
113
+ params.append("log", log_file);
114
+ params.append("id", String(id));
115
+ params.append("epoch", String(epoch));
116
+ if (last_event) {
117
+ params.append("last-event-id", String(last_event));
118
+ }
119
+
120
+ if (last_attachment) {
121
+ params.append("after-attachment-id", String(last_attachment));
122
+ }
123
+
124
+ // Build up the request
125
+ const request: Request<SampleDataResponse> = {
126
+ headers: {},
127
+ parse: async (text: string) => {
128
+ const pendingSamples = await asyncJsonParse(text);
129
+ return {
130
+ status: "OK",
131
+ sampleData: pendingSamples,
132
+ };
133
+ },
134
+ handleError: (status: number) => {
135
+ if (status === 404) {
136
+ return {
137
+ status: "NotFound",
138
+ };
139
+ } else if (status === 304) {
140
+ return {
141
+ status: "NotModified",
142
+ };
143
+ }
144
+ },
145
+ };
146
+ // Fetch the result
147
+ const result = (
148
+ await apiRequest<SampleDataResponse>(
149
+ "GET",
150
+ `/api/pending-sample-data?${params.toString()}`,
151
+ request,
152
+ )
153
+ ).parsed;
154
+
155
+ return result;
156
+ }
157
+
158
+ interface Request<T> {
159
+ headers?: Record<string, string>;
160
+ body?: string;
161
+ parse?: (text: string) => Promise<T>;
162
+ handleError?: (status: number) => T | undefined;
163
+ }
164
+
165
+ async function apiRequest<T>(
166
+ method: "GET" | "POST" | "PUT" | "DELETE",
167
+ path: string,
168
+ request: Request<T>,
169
+ ): Promise<{ raw: string; parsed: T }> {
170
+ // build headers
171
+ const responseHeaders: HeadersInit = {
172
+ Accept: "application/json",
173
+ Pragma: "no-cache",
174
+ Expires: "0",
175
+ ["Cache-Control"]: "no-cache",
176
+ ...request.headers,
177
+ };
178
+ if (request.body) {
179
+ responseHeaders["Content-Type"] = "application/json";
180
+ }
181
+
182
+ // make request
183
+ const response = await fetch(`${path}`, {
184
+ method,
185
+ headers: responseHeaders,
186
+ body: request.body,
187
+ });
188
+ if (response.ok) {
189
+ const text = await response.text();
190
+ const parse = request.parse || asyncJsonParse;
191
+ return {
192
+ parsed: (await parse(text)) as T,
193
+ raw: text,
194
+ };
195
+ } else if (response.status !== 200) {
196
+ // See if the request handler wants to handle this
197
+ const errorResponse = request.handleError
198
+ ? request.handleError(response.status)
199
+ : undefined;
200
+ if (errorResponse) {
201
+ return {
202
+ raw: response.statusText,
203
+ parsed: errorResponse,
204
+ };
205
+ }
206
+
207
+ const message = (await response.text()) || response.statusText;
208
+ const error = new Error(`Error: ${response.status}: ${message})`);
209
+ throw error;
210
+ } else {
211
+ throw new Error(`${response.status} - ${response.statusText} `);
212
+ }
213
+ }
214
+
52
215
  async function api(
53
216
  method: "GET" | "POST" | "PUT" | "DELETE",
54
217
  path: string,
218
+ headers?: Record<string, string>,
55
219
  body?: string,
56
220
  ) {
57
221
  // build headers
58
- const headers: HeadersInit = {
222
+ const responseHeaders: HeadersInit = {
59
223
  Accept: "application/json",
60
224
  Pragma: "no-cache",
61
225
  Expires: "0",
62
226
  ["Cache-Control"]: "no-cache",
227
+ ...headers,
63
228
  };
64
229
  if (body) {
65
- headers["Content-Type"] = "application/json";
230
+ responseHeaders["Content-Type"] = "application/json";
66
231
  }
67
232
 
68
233
  // make request
69
- const response = await fetch(`${path}`, { method, headers, body });
234
+ const response = await fetch(`${path}`, {
235
+ method,
236
+ headers: responseHeaders,
237
+ body,
238
+ });
70
239
  if (response.ok) {
71
240
  const text = await response.text();
72
241
  return {
@@ -121,5 +290,7 @@ const browserApi: LogViewAPI = {
121
290
  eval_log_headers,
122
291
  download_file,
123
292
  open_log_file,
293
+ eval_pending_samples,
294
+ eval_log_sample_data,
124
295
  };
125
296
  export default browserApi;
@@ -8,9 +8,20 @@ import {
8
8
  kMethodEvalLogHeaders,
9
9
  kMethodEvalLogs,
10
10
  kMethodEvalLogSize,
11
+ kMethodPendingSamples,
12
+ kMethodSampleData,
11
13
  webViewJsonRpcClient,
12
14
  } from "./jsonrpc";
13
- import { Capabilities, LogContents, LogViewAPI } from "./types";
15
+ import {
16
+ Capabilities,
17
+ LogContents,
18
+ LogViewAPI,
19
+ PendingSampleResponse,
20
+ SampleDataResponse,
21
+ } from "./types";
22
+
23
+ const kNotFoundSignal = "NotFound";
24
+ const kNotModifiedSignal = "NotModified";
14
25
 
15
26
  const vscodeClient = webViewJsonRpcClient(getVscodeApi());
16
27
 
@@ -75,6 +86,67 @@ async function eval_log_headers(files: string[]) {
75
86
  }
76
87
  }
77
88
 
89
+ async function eval_pending_samples(
90
+ log_file: string,
91
+ etag?: string,
92
+ ): Promise<PendingSampleResponse> {
93
+ // TODO: use web worked to parse when possible
94
+ const response = await vscodeClient(kMethodPendingSamples, [log_file, etag]);
95
+ if (response) {
96
+ if (response === kNotModifiedSignal) {
97
+ return {
98
+ status: "NotModified",
99
+ };
100
+ } else if (response === kNotFoundSignal) {
101
+ return {
102
+ status: "NotFound",
103
+ };
104
+ }
105
+
106
+ const json = await asyncJsonParse(response);
107
+ return {
108
+ status: "OK",
109
+ pendingSamples: json,
110
+ };
111
+ } else {
112
+ throw new Error(`Unable to load pending samples ${log_file}.`);
113
+ }
114
+ }
115
+
116
+ async function eval_log_sample_data(
117
+ log_file: string,
118
+ id: string | number,
119
+ epoch: number,
120
+ last_event?: number,
121
+ last_attachment?: number,
122
+ ): Promise<SampleDataResponse | undefined> {
123
+ const response = await vscodeClient(kMethodSampleData, [
124
+ log_file,
125
+ id,
126
+ epoch,
127
+ last_event,
128
+ last_attachment,
129
+ ]);
130
+ if (response) {
131
+ if (response === kNotModifiedSignal) {
132
+ return {
133
+ status: "NotModified",
134
+ };
135
+ } else if (response === kNotFoundSignal) {
136
+ return {
137
+ status: "NotFound",
138
+ };
139
+ }
140
+ const json = await asyncJsonParse(response);
141
+ return {
142
+ status: "OK",
143
+ sampleData: json,
144
+ };
145
+ } else {
146
+ throw new Error(`Unable to load live sample data ${log_file}.`);
147
+ }
148
+ }
149
+
78
150
  async function download_file() {
79
151
  throw Error("Downloading files is not supported in VS Code");
80
152
  }
@@ -97,6 +169,8 @@ const api: LogViewAPI = {
97
169
  eval_log_headers,
98
170
  download_file,
99
171
  open_log_file,
172
+ eval_pending_samples,
173
+ eval_log_sample_data,
100
174
  };
101
175
 
102
176
  export default api;
@@ -1,4 +1,8 @@
1
- import { openRemoteLogFile, RemoteLogFile } from "../logfile/remoteLogFile";
1
+ import {
2
+ openRemoteLogFile,
3
+ RemoteLogFile,
4
+ SampleNotFoundError,
5
+ } from "../logfile/remoteLogFile";
2
6
  import { FileSizeLimitError } from "../logfile/remoteZipFile";
3
7
  import { EvalLog, EvalSample } from "../types/log";
4
8
  import { encodePathParts } from "./api-shared";
@@ -8,6 +12,8 @@ import {
8
12
  LogContents,
9
13
  LogFiles,
10
14
  LogViewAPI,
15
+ PendingSampleResponse,
16
+ SampleDataResponse,
11
17
  } from "./types";
12
18
 
13
19
  const isEvalFile = (file: string) => {
@@ -156,19 +162,34 @@ export const clientApi = (api: LogViewAPI, log_file?: string): ClientAPI => {
156
162
  epoch: number,
157
163
  ): Promise<EvalSample | undefined> => {
158
164
  if (isEvalFile(log_file)) {
159
- const remoteLogFile = await remoteEvalFile(log_file, true);
160
- try {
161
- if (remoteLogFile) {
162
- const sample = await remoteLogFile.readSample(String(id), epoch);
163
- return sample;
164
- } else {
165
- throw new Error(`Unable to read remove eval file ${log_file}`);
165
+ async function fetchSample(useCache: boolean) {
166
+ const remoteLogFile = await remoteEvalFile(log_file, useCache);
167
+ if (!remoteLogFile) {
168
+ throw new Error(`Unable to read remote eval file ${log_file}`);
166
169
  }
167
- } catch (error) {
170
+ return await remoteLogFile.readSample(String(id), epoch);
171
+ }
172
+
173
+ function handleError(error: unknown) {
168
174
  if (error instanceof FileSizeLimitError) {
169
175
  throw new SampleSizeLimitedExceededError(id, epoch, error.maxBytes);
176
+ }
177
+ throw error;
178
+ }
179
+
180
+ try {
181
+ // First attempt with cache
182
+ return await fetchSample(true);
183
+ } catch (error) {
184
+ if (error instanceof SampleNotFoundError) {
185
+ try {
186
+ // Retry without cache
187
+ return await fetchSample(false);
188
+ } catch (retryError) {
189
+ handleError(retryError);
190
+ }
170
191
  } else {
171
- throw error;
192
+ handleError(error);
172
193
  }
173
194
  }
174
195
  } else {
@@ -263,6 +284,35 @@ export const clientApi = (api: LogViewAPI, log_file?: string): ClientAPI => {
263
284
  throw new Error("Unable to determine log paths.");
264
285
  };
265
286
 
287
+ const get_log_pending_samples = (
288
+ log_file: string,
289
+ etag?: string,
290
+ ): Promise<PendingSampleResponse> => {
291
+ if (!api.eval_pending_samples) {
292
+ throw new Error("API doesn't support streamed samples");
293
+ }
294
+ return api.eval_pending_samples(log_file, etag);
295
+ };
296
+
297
+ const get_log_sample_data = (
298
+ log_file: string,
299
+ id: string | number,
300
+ epoch: number,
301
+ last_event?: number,
302
+ last_attachment?: number,
303
+ ): Promise<SampleDataResponse | undefined> => {
304
+ if (!api.eval_log_sample_data) {
305
+ throw new Error("API doesn't supported streamed sample data");
306
+ }
307
+ return api.eval_log_sample_data(
308
+ log_file,
309
+ id,
310
+ epoch,
311
+ last_event,
312
+ last_attachment,
313
+ );
314
+ };
315
+
266
316
  return {
267
317
  client_events: () => {
268
318
  return api.client_events();
@@ -284,5 +334,11 @@ export const clientApi = (api: LogViewAPI, log_file?: string): ClientAPI => {
284
334
  ) => {
285
335
  return api.download_file(download_file, file_contents);
286
336
  },
337
+ get_log_pending_samples: api.eval_pending_samples
338
+ ? get_log_pending_samples
339
+ : undefined,
340
+ get_log_sample_data: api.eval_log_sample_data
341
+ ? get_log_sample_data
342
+ : undefined,
287
343
  };
288
344
  };
@@ -39,6 +39,8 @@ export const kMethodEvalLog = "eval_log";
39
39
  export const kMethodEvalLogSize = "eval_log_size";
40
40
  export const kMethodEvalLogBytes = "eval_log_bytes";
41
41
  export const kMethodEvalLogHeaders = "eval_log_headers";
42
+ export const kMethodPendingSamples = "eval_log_pending_samples";
43
+ export const kMethodSampleData = "eval_log_sample_data";
42
44
 
43
45
  export const kJsonRpcParseError = -32700;
44
46
  export const kJsonRpcInvalidRequest = -32600;
@@ -1,4 +1,5 @@
1
1
  import {
2
+ ApprovalEvent,
2
3
  EvalError,
3
4
  EvalLog,
4
5
  EvalPlan,
@@ -6,11 +7,22 @@ import {
6
7
  EvalSample,
7
8
  EvalSpec,
8
9
  EvalStats,
10
+ InfoEvent,
9
11
  Input,
12
+ LoggerEvent,
13
+ ModelEvent,
14
+ SampleInitEvent,
15
+ SampleLimitEvent,
16
+ SandboxEvent,
17
+ ScoreEvent,
10
18
  Scores1,
19
+ StateEvent,
11
20
  Status,
21
+ StepEvent,
22
+ StoreEvent,
23
+ SubtaskEvent,
12
24
  Target,
13
- Type11,
25
+ ToolEvent,
14
26
  Version,
15
27
  } from "../types/log";
16
28
 
@@ -25,6 +37,66 @@ export interface EvalSummary {
25
37
  sampleSummaries: SampleSummary[];
26
38
  }
27
39
 
40
+ export interface PendingSampleResponse {
41
+ pendingSamples?: PendingSamples;
42
+ status: "NotModified" | "NotFound" | "OK";
43
+ }
44
+
45
+ export interface SampleDataResponse {
46
+ sampleData?: SampleData;
47
+ status: "NotModified" | "NotFound" | "OK";
48
+ }
49
+
50
+ export interface RunningMetric {
51
+ scorer: string;
52
+ name: string;
53
+ value?: number;
54
+ reducer?: string;
55
+ }
56
+
57
+ export interface PendingSamples {
58
+ metrics?: RunningMetric[];
59
+ samples: SampleSummary[];
60
+ refresh: number;
61
+ etag?: string;
62
+ }
63
+
64
+ export interface SampleData {
65
+ events: EventData[];
66
+ attachments: AttachmentData[];
67
+ }
68
+
69
+ export interface EventData {
70
+ id: number;
71
+ event_id: string;
72
+ sample_id: string;
73
+ epoch: number;
74
+ event:
75
+ | SampleInitEvent
76
+ | SampleLimitEvent
77
+ | SandboxEvent
78
+ | StateEvent
79
+ | StoreEvent
80
+ | ModelEvent
81
+ | ToolEvent
82
+ | ApprovalEvent
83
+ | InputEvent
84
+ | ScoreEvent
85
+ | ErrorEvent
86
+ | LoggerEvent
87
+ | InfoEvent
88
+ | StepEvent
89
+ | SubtaskEvent;
90
+ }
91
+
92
+ export interface AttachmentData {
93
+ id: number;
94
+ sample_id: string;
95
+ epoch: number;
96
+ hash: string;
97
+ content: string;
98
+ }
99
+
28
100
  export interface EvalLogHeader {
29
101
  version?: Version;
30
102
  status?: Status;
@@ -42,7 +114,8 @@ export interface SampleSummary {
42
114
  target: Target;
43
115
  scores: Scores1;
44
116
  error?: string;
45
- limit?: Type11;
117
+ limit?: string;
118
+ completed?: boolean;
46
119
  }
47
120
 
48
121
  export interface BasicSampleData {
@@ -55,6 +128,9 @@ export interface BasicSampleData {
55
128
  export interface Capabilities {
56
129
  downloadFiles: boolean;
57
130
  webWorkers: boolean;
131
+ streamSamples: boolean;
132
+ streamSampleData: boolean;
133
+ nativeFind: boolean;
58
134
  }
59
135
 
60
136
  export interface LogViewAPI {
@@ -77,6 +153,17 @@ export interface LogViewAPI {
77
153
  filecontents: string | Blob | ArrayBuffer | ArrayBufferView,
78
154
  ) => Promise<void>;
79
155
  open_log_file: (logFile: string, log_dir: string) => Promise<void>;
156
+ eval_pending_samples?: (
157
+ log_file: string,
158
+ etag?: string,
159
+ ) => Promise<PendingSampleResponse>;
160
+ eval_log_sample_data?: (
161
+ log_file: string,
162
+ id: string | number,
163
+ epoch: number,
164
+ last_event?: number,
165
+ last_attachment?: number,
166
+ ) => Promise<SampleDataResponse | undefined>;
80
167
  }
81
168
 
82
169
  export interface ClientAPI {
@@ -94,6 +181,24 @@ export interface ClientAPI {
94
181
  file_contents: string | Blob | ArrayBuffer | ArrayBufferView,
95
182
  ) => Promise<void>;
96
183
  open_log_file: (log_file: string, log_dir: string) => Promise<void>;
184
+
185
+ get_log_pending_samples?: (
186
+ log_file: string,
187
+ etag?: string,
188
+ ) => Promise<PendingSampleResponse>;
189
+ get_log_sample_data?: (
190
+ log_file: string,
191
+ id: string | number,
192
+ epoch: number,
193
+ last_event?: number,
194
+ last_attachment?: number,
195
+ ) => Promise<SampleDataResponse | undefined>;
196
+ }
197
+
198
+ export interface ClientStorage {
199
+ getItem: (name: string) => unknown;
200
+ setItem: (name: string, value: unknown) => void;
201
+ removeItem: (name: string) => void;
97
202
  }
98
203
 
99
204
  export interface FetchResponse {
@@ -66,11 +66,13 @@ export const ApplicationIcons = {
66
66
  menu: "bi bi-list",
67
67
  messages: "bi bi-chat-right-text",
68
68
  metadata: "bi bi-table",
69
+ metrics: "bi bi-clipboard-data",
69
70
  model: "bi bi-grid-3x3-gap",
70
71
  "toggle-right": "bi bi-chevron-right",
71
72
  more: "bi bi-zoom-in",
72
73
  "multiple-choice": "bi bi-card-list",
73
74
  next: "bi bi-chevron-right",
75
+ noSamples: "bi bi-ban",
74
76
  play: "bi bi-play-fill",
75
77
  previous: "bi bi-chevron-left",
76
78
  refresh: "bi bi-arrow-clockwise",
@@ -1,6 +1,6 @@
1
1
  import * as AsciicinemaPlayerJS from "asciinema-player";
2
2
  import "asciinema-player/dist/bundle/asciinema-player.css";
3
- import React, { useEffect, useRef } from "react";
3
+ import { CSSProperties, FC, useEffect, useRef } from "react";
4
4
 
5
5
  interface AsciinemaPlayerProps {
6
6
  id?: string;
@@ -10,7 +10,7 @@ interface AsciinemaPlayerProps {
10
10
  rows?: number;
11
11
  cols?: number;
12
12
  fit?: string;
13
- style?: React.CSSProperties;
13
+ style?: CSSProperties;
14
14
  speed?: number;
15
15
  autoPlay?: boolean;
16
16
  loop?: boolean;
@@ -19,7 +19,7 @@ interface AsciinemaPlayerProps {
19
19
  className?: string;
20
20
  }
21
21
 
22
- export const AsciinemaPlayer: React.FC<AsciinemaPlayerProps> = ({
22
+ export const AsciinemaPlayer: FC<AsciinemaPlayerProps> = ({
23
23
  id,
24
24
  rows,
25
25
  cols,
@@ -14,11 +14,13 @@ interface CardHeaderProps {
14
14
  interface CardBodyProps {
15
15
  id?: string;
16
16
  children?: ReactNode;
17
+ className?: string | string[];
17
18
  }
18
19
 
19
20
  interface CardProps {
20
21
  id?: string;
21
22
  children?: ReactNode;
23
+ className?: string | string[];
22
24
  }
23
25
 
24
26
  interface CardCollapsingHeaderProps {
@@ -51,17 +53,17 @@ export const CardHeader: FC<CardHeaderProps> = ({
51
53
  );
52
54
  };
53
55
 
54
- export const CardBody: FC<CardBodyProps> = ({ id, children }) => {
56
+ export const CardBody: FC<CardBodyProps> = ({ id, children, className }) => {
55
57
  return (
56
- <div className={"card-body"} id={id || ""}>
58
+ <div className={clsx("card-body", className)} id={id || ""}>
57
59
  {children}
58
60
  </div>
59
61
  );
60
62
  };
61
63
 
62
- export const Card: FC<CardProps> = ({ id, children }) => {
64
+ export const Card: FC<CardProps> = ({ id, children, className }) => {
63
65
  return (
64
- <div className={"card"} id={id}>
66
+ <div className={clsx("card", className)} id={id}>
65
67
  {children}
66
68
  </div>
67
69
  );
@@ -1,4 +1,4 @@
1
- import React from "react";
1
+ import { FC } from "react";
2
2
  import { DownloadButton } from "../components/DownloadButton";
3
3
  import "./DownloadPanel.css";
4
4
 
@@ -9,7 +9,7 @@ interface DownloadPanelProps {
9
9
  fileContents: string | Blob | ArrayBuffer | ArrayBufferView;
10
10
  }
11
11
 
12
- export const DownloadPanel: React.FC<DownloadPanelProps> = ({
12
+ export const DownloadPanel: FC<DownloadPanelProps> = ({
13
13
  message,
14
14
  buttonLabel,
15
15
  fileName,