inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/eval.py +35 -2
  3. inspect_ai/_cli/util.py +44 -1
  4. inspect_ai/_display/core/config.py +1 -1
  5. inspect_ai/_display/core/display.py +13 -4
  6. inspect_ai/_display/core/results.py +1 -1
  7. inspect_ai/_display/textual/app.py +14 -3
  8. inspect_ai/_display/textual/display.py +4 -0
  9. inspect_ai/_display/textual/widgets/samples.py +9 -3
  10. inspect_ai/_display/textual/widgets/task_detail.py +8 -8
  11. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  12. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  13. inspect_ai/_eval/eval.py +74 -25
  14. inspect_ai/_eval/evalset.py +22 -18
  15. inspect_ai/_eval/loader.py +34 -11
  16. inspect_ai/_eval/run.py +13 -15
  17. inspect_ai/_eval/score.py +13 -3
  18. inspect_ai/_eval/task/generate.py +8 -9
  19. inspect_ai/_eval/task/log.py +55 -6
  20. inspect_ai/_eval/task/run.py +51 -10
  21. inspect_ai/_eval/task/task.py +23 -9
  22. inspect_ai/_util/constants.py +2 -0
  23. inspect_ai/_util/file.py +30 -1
  24. inspect_ai/_util/json.py +37 -1
  25. inspect_ai/_util/registry.py +1 -0
  26. inspect_ai/_util/vscode.py +37 -0
  27. inspect_ai/_view/server.py +113 -1
  28. inspect_ai/_view/www/App.css +7 -1
  29. inspect_ai/_view/www/dist/assets/index.css +813 -415
  30. inspect_ai/_view/www/dist/assets/index.js +54475 -32003
  31. inspect_ai/_view/www/eslint.config.mjs +1 -1
  32. inspect_ai/_view/www/log-schema.json +137 -31
  33. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  34. inspect_ai/_view/www/package.json +11 -2
  35. inspect_ai/_view/www/src/App.tsx +161 -853
  36. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  37. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  38. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  39. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  40. inspect_ai/_view/www/src/api/types.ts +107 -2
  41. inspect_ai/_view/www/src/appearance/icons.ts +2 -0
  42. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  43. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  46. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  47. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  48. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  49. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  50. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  51. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  52. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  53. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  54. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  55. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
  56. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  57. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  58. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  59. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  60. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  61. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  62. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  63. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  64. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  65. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  66. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  67. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  68. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  69. inspect_ai/_view/www/src/index.tsx +26 -94
  70. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  71. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  72. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  73. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  74. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  75. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  76. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  77. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  78. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
  79. inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
  80. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  81. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
  82. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
  83. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
  84. inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
  85. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  86. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  87. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  88. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
  89. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
  90. inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
  91. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  92. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
  93. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  94. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
  95. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  96. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  98. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
  99. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  100. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
  101. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
  102. inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
  103. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  104. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
  105. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  106. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  107. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  108. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  109. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  110. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
  111. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  112. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  113. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  114. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  115. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  116. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  117. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  118. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  119. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  120. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  121. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  122. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  123. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  124. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  125. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  126. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  127. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
  128. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  129. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  130. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  131. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  132. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
  133. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  134. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  135. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
  136. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  137. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  138. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  139. inspect_ai/_view/www/src/state/hooks.ts +399 -0
  140. inspect_ai/_view/www/src/state/logPolling.ts +200 -0
  141. inspect_ai/_view/www/src/state/logSlice.ts +224 -0
  142. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  143. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  144. inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
  145. inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
  146. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  147. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  148. inspect_ai/_view/www/src/state/store.ts +168 -0
  149. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  150. inspect_ai/_view/www/src/state/utils.ts +23 -0
  151. inspect_ai/_view/www/src/storage/index.ts +26 -0
  152. inspect_ai/_view/www/src/types/log.d.ts +36 -26
  153. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  154. inspect_ai/_view/www/src/types.ts +94 -32
  155. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  156. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  157. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  158. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  159. inspect_ai/_view/www/src/utils/react.ts +30 -0
  160. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  161. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
  162. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  163. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  164. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  165. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  166. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
  167. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
  168. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  169. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  170. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  171. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  172. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
  173. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  174. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  175. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  176. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  177. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  178. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  179. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  180. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  181. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
  182. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  183. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  184. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  185. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  186. inspect_ai/_view/www/vite.config.js +6 -0
  187. inspect_ai/_view/www/yarn.lock +464 -355
  188. inspect_ai/agent/__init__.py +36 -0
  189. inspect_ai/agent/_agent.py +268 -0
  190. inspect_ai/agent/_as_solver.py +72 -0
  191. inspect_ai/agent/_as_tool.py +122 -0
  192. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  193. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  194. inspect_ai/agent/_filter.py +46 -0
  195. inspect_ai/agent/_handoff.py +93 -0
  196. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  197. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  198. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  199. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  200. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  201. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  202. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  203. inspect_ai/agent/_react.py +241 -0
  204. inspect_ai/agent/_run.py +36 -0
  205. inspect_ai/agent/_types.py +81 -0
  206. inspect_ai/log/_condense.py +26 -0
  207. inspect_ai/log/_log.py +17 -5
  208. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  209. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  210. inspect_ai/log/_recorders/buffer/database.py +685 -0
  211. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  212. inspect_ai/log/_recorders/buffer/types.py +84 -0
  213. inspect_ai/log/_recorders/eval.py +2 -11
  214. inspect_ai/log/_recorders/types.py +30 -0
  215. inspect_ai/log/_transcript.py +32 -2
  216. inspect_ai/model/__init__.py +7 -1
  217. inspect_ai/model/_call_tools.py +257 -52
  218. inspect_ai/model/_chat_message.py +7 -4
  219. inspect_ai/model/_conversation.py +13 -62
  220. inspect_ai/model/_display.py +85 -0
  221. inspect_ai/model/_generate_config.py +2 -2
  222. inspect_ai/model/_model.py +114 -14
  223. inspect_ai/model/_model_output.py +14 -9
  224. inspect_ai/model/_openai.py +16 -4
  225. inspect_ai/model/_openai_computer_use.py +162 -0
  226. inspect_ai/model/_openai_responses.py +319 -165
  227. inspect_ai/model/_providers/anthropic.py +20 -21
  228. inspect_ai/model/_providers/azureai.py +24 -13
  229. inspect_ai/model/_providers/bedrock.py +1 -7
  230. inspect_ai/model/_providers/cloudflare.py +3 -3
  231. inspect_ai/model/_providers/goodfire.py +2 -6
  232. inspect_ai/model/_providers/google.py +11 -10
  233. inspect_ai/model/_providers/groq.py +6 -3
  234. inspect_ai/model/_providers/hf.py +7 -3
  235. inspect_ai/model/_providers/mistral.py +7 -10
  236. inspect_ai/model/_providers/openai.py +47 -17
  237. inspect_ai/model/_providers/openai_o1.py +11 -4
  238. inspect_ai/model/_providers/openai_responses.py +12 -14
  239. inspect_ai/model/_providers/providers.py +2 -2
  240. inspect_ai/model/_providers/together.py +12 -2
  241. inspect_ai/model/_providers/util/chatapi.py +7 -2
  242. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  243. inspect_ai/model/_providers/util/llama31.py +4 -2
  244. inspect_ai/model/_providers/vertex.py +11 -9
  245. inspect_ai/model/_providers/vllm.py +4 -4
  246. inspect_ai/scorer/__init__.py +2 -0
  247. inspect_ai/scorer/_metrics/__init__.py +2 -0
  248. inspect_ai/scorer/_metrics/grouped.py +84 -0
  249. inspect_ai/scorer/_score.py +26 -6
  250. inspect_ai/solver/__init__.py +2 -2
  251. inspect_ai/solver/_basic_agent.py +22 -9
  252. inspect_ai/solver/_bridge.py +31 -0
  253. inspect_ai/solver/_chain.py +20 -12
  254. inspect_ai/solver/_fork.py +5 -1
  255. inspect_ai/solver/_human_agent.py +52 -0
  256. inspect_ai/solver/_prompt.py +3 -1
  257. inspect_ai/solver/_run.py +59 -0
  258. inspect_ai/solver/_solver.py +14 -4
  259. inspect_ai/solver/_task_state.py +5 -3
  260. inspect_ai/tool/_tool_call.py +15 -8
  261. inspect_ai/tool/_tool_def.py +17 -12
  262. inspect_ai/tool/_tool_support_helpers.py +4 -4
  263. inspect_ai/tool/_tool_with.py +14 -11
  264. inspect_ai/tool/_tools/_bash_session.py +11 -2
  265. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  266. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  267. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  268. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  269. inspect_ai/tool/_tools/_think.py +1 -1
  270. inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
  271. inspect_ai/util/__init__.py +2 -0
  272. inspect_ai/util/_anyio.py +27 -0
  273. inspect_ai/util/_sandbox/__init__.py +2 -1
  274. inspect_ai/util/_sandbox/context.py +32 -7
  275. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  276. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  277. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  278. inspect_ai/util/_store_model.py +30 -7
  279. inspect_ai/util/_subprocess.py +13 -3
  280. inspect_ai/util/_subtask.py +1 -0
  281. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  282. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
  283. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
  284. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  285. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  286. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  287. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  288. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  289. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  290. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  291. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  292. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  293. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  294. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  295. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  296. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  297. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -12,683 +12,133 @@ import "prismjs/themes/prism.css";
12
12
 
13
13
  import "../App.css";
14
14
 
15
- import { AppErrorBoundary } from "./AppErrorBoundary";
16
15
  import { ErrorPanel } from "./components/ErrorPanel";
17
16
  import { ProgressBar } from "./components/ProgressBar";
18
- import { clearDocumentSelection } from "./utils/browser";
19
- import { debounce, sleep } from "./utils/sync";
20
17
 
21
18
  import { FindBand } from "./components/FindBand";
22
- import { kDefaultSort } from "./constants";
23
- import {
24
- createEvalDescriptor,
25
- createSamplesDescriptor,
26
- } from "./samples/descriptor/samplesDescriptor";
27
- import { filterSamples } from "./samples/sample-tools/filters";
28
- import {
29
- byEpoch,
30
- bySample,
31
- sortSamples,
32
- } from "./samples/sample-tools/SortFilter";
33
- import { resolveAttachments } from "./utils/attachments";
34
- import { getVscodeApi } from "./utils/vscode";
35
19
  import { Sidebar } from "./workspace/sidebar/Sidebar.tsx";
36
20
  import { WorkSpace } from "./workspace/WorkSpace";
37
21
 
38
22
  import ClipboardJS from "clipboard";
39
23
  import clsx from "clsx";
40
- import { FC, useCallback, useEffect, useMemo, useRef, useState } from "react";
41
- import {
42
- ClientAPI,
43
- EvalLogHeader,
44
- EvalSummary,
45
- HostMessage,
46
- LogFiles,
47
- SampleSummary,
48
- } from "./api/types.ts";
49
- import {
50
- kEvalWorkspaceTabId,
51
- kInfoWorkspaceTabId,
52
- kSampleMessagesTabId,
53
- kSampleTranscriptTabId,
54
- } from "./constants";
55
- import {
56
- ApplicationState,
57
- AppStatus,
58
- Capabilities,
59
- CurrentLog,
60
- ScoreFilter,
61
- ScoreLabel,
62
- } from "./types.ts";
63
- import { EvalSample } from "./types/log";
24
+ import { FC, KeyboardEvent, useCallback, useEffect, useRef } from "react";
25
+ import { ClientAPI, HostMessage } from "./api/types.ts";
26
+ import { useSetSelectedLogIndex } from "./state/hooks.ts";
27
+ import { useStore } from "./state/store.ts";
64
28
 
65
29
  interface AppProps {
66
30
  api: ClientAPI;
67
- applicationState?: ApplicationState;
68
- saveApplicationState?: (state: ApplicationState) => void;
69
- pollForLogs: boolean;
70
- capabilities: Capabilities;
71
31
  }
72
32
 
73
33
  /**
74
34
  * Renders the Main Application
75
35
  */
76
- export const App: FC<AppProps> = ({
77
- api,
78
- applicationState,
79
- saveApplicationState,
80
- pollForLogs = true,
81
- capabilities,
82
- }) => {
83
- // List of Logs
84
- const [logs, setLogs] = useState<LogFiles>(
85
- applicationState?.logs || { log_dir: "", files: [] },
36
+ export const App: FC<AppProps> = ({ api }) => {
37
+ // App layout and state
38
+ const appStatus = useStore((state) => state.app.status);
39
+ const setAppStatus = useStore((state) => state.appActions.setStatus);
40
+ const offCanvas = useStore((state) => state.app.offcanvas);
41
+ const setOffCanvas = useStore((state) => state.appActions.setOffcanvas);
42
+ const clearWorkspaceTab = useStore(
43
+ (state) => state.appActions.clearWorkspaceTab,
86
44
  );
87
- const [selectedLogIndex, setSelectedLogIndex] = useState<number>(
88
- applicationState?.selectedLogIndex !== undefined
89
- ? applicationState.selectedLogIndex
90
- : -1,
45
+ const clearSampleTab = useStore((state) => state.appActions.clearSampleTab);
46
+
47
+ // Find
48
+ const nativeFind = useStore((state) => state.capabilities.nativeFind);
49
+ const showFind = useStore((state) => state.app.showFind);
50
+ const setShowFind = useStore((state) => state.appActions.setShowFind);
51
+ const hideFind = useStore((state) => state.appActions.hideFind);
52
+
53
+ // Logs Data
54
+ const logs = useStore((state) => state.logs.logs);
55
+ const selectedLogIndex = useStore((state) => state.logs.selectedLogIndex);
56
+ const logHeaders = useStore((state) => state.logs.logHeaders);
57
+ const headersLoading = useStore((state) => state.logs.headersLoading);
58
+ const setLogs = useStore((state) => state.logsActions.setLogs);
59
+ const selectedLogFile = useStore((state) =>
60
+ state.logsActions.getSelectedLogFile(),
91
61
  );
62
+ const setSelectedLogIndex = useSetSelectedLogIndex();
92
63
 
93
- // Log Headers
94
- const [logHeaders, setLogHeaders] = useState<Record<string, EvalLogHeader>>(
95
- applicationState?.logHeaders || {},
96
- );
97
- const [headersLoading, setHeadersLoading] = useState<boolean>(
98
- applicationState?.headersLoading || false,
99
- );
100
-
101
- const [selectedLog, setSelectedLog] = useState<CurrentLog | undefined>(
102
- applicationState?.selectedLog,
103
- );
104
-
105
- // Workspace (the selected tab)
106
- const [selectedWorkspaceTab, setSelectedWorkspaceTab] = useState<string>(
107
- applicationState?.selectedWorkspaceTab || kEvalWorkspaceTabId,
108
- );
109
- const [selectedSampleIndex, setSelectedSampleIndex] = useState<number>(
110
- applicationState?.selectedSampleIndex !== undefined
111
- ? applicationState.selectedSampleIndex
112
- : -1,
113
- );
114
- const [selectedSample, setSelectedSample] = useState<EvalSample | undefined>(
115
- applicationState?.selectedSample,
116
- );
117
- const [sampleStatus, setSampleStatus] = useState<"loading" | "ok" | "error">(
118
- applicationState?.sampleStatus || "loading",
119
- );
120
- const [sampleError, setSampleError] = useState<Error | undefined>(
121
- applicationState?.sampleError,
122
- );
123
- const [selectedSampleTab, setSelectedSampleTab] = useState<
124
- string | undefined
125
- >(applicationState?.selectedSampleTab);
126
- const sampleScrollPosition = useRef<number>(
127
- applicationState?.sampleScrollPosition || 0,
128
- );
129
- const loadingSampleIndexRef = useRef<number | null>(null);
130
- const workspaceTabScrollPosition = useRef<Record<string, number>>(
131
- applicationState?.workspaceTabScrollPosition || {},
132
- );
133
-
134
- const [showingSampleDialog, setShowingSampleDialog] = useState<boolean>(
135
- !!applicationState?.showingSampleDialog,
136
- );
137
-
138
- // App loading status
139
- const [status, setStatus] = useState<AppStatus>(
140
- applicationState?.status || { loading: false },
141
- );
142
-
143
- // Other application state
144
- const [offcanvas, setOffcanvas] = useState<boolean>(
145
- applicationState?.offcanvas || false,
146
- );
147
- const [showFind, setShowFind] = useState<boolean>(
148
- applicationState?.showFind || false,
149
- );
150
-
151
- // Filtering and sorting
152
- const [filter, setFilter] = useState<ScoreFilter>(
153
- applicationState?.filter || {},
154
- );
155
-
156
- const [epoch, setEpoch] = useState<string>(applicationState?.epoch || "all");
157
- const [sort, setSort] = useState<string>(
158
- applicationState?.sort || kDefaultSort,
159
- );
160
-
161
- const [scores, setScores] = useState<ScoreLabel[]>(
162
- applicationState?.scores || [],
163
- );
164
-
165
- const [score, setScore] = useState<ScoreLabel | undefined>(
166
- applicationState?.score,
167
- );
168
-
169
- // Re-filter the samples
170
- const [filteredSamples, setFilteredSamples] = useState<SampleSummary[]>(
171
- applicationState?.filteredSamples || [],
172
- );
173
- const [groupBy, setGroupBy] = useState<"none" | "epoch" | "sample">(
174
- applicationState?.groupBy || "none",
175
- );
176
- const [groupByOrder, setGroupByOrder] = useState<"asc" | "desc">(
177
- applicationState?.groupByOrder || "asc",
178
- );
179
-
180
- const saveState = useCallback(() => {
181
- const state = {
182
- logs,
183
- selectedLogIndex,
184
- logHeaders,
185
- headersLoading,
186
- selectedLog,
187
- selectedSampleIndex,
188
- selectedWorkspaceTab,
189
- selectedSample,
190
- sampleStatus,
191
- sampleError,
192
- selectedSampleTab,
193
- showingSampleDialog,
194
- status,
195
- offcanvas,
196
- showFind,
197
- filter,
198
- epoch,
199
- sort,
200
- scores,
201
- score,
202
- filteredSamples,
203
- groupBy,
204
- groupByOrder,
205
- sampleScrollPosition: sampleScrollPosition.current,
206
- workspaceTabScrollPosition: workspaceTabScrollPosition.current,
207
- };
208
- if (saveApplicationState) {
209
- saveApplicationState(state);
210
- }
211
- }, [
212
- logs,
213
- selectedLogIndex,
214
- logHeaders,
215
- headersLoading,
216
- selectedLog,
217
- selectedSampleIndex,
218
- selectedWorkspaceTab,
219
- selectedSample,
220
- sampleStatus,
221
- sampleError,
222
- selectedSampleTab,
223
- showingSampleDialog,
224
- status,
225
- offcanvas,
226
- showFind,
227
- filter,
228
- epoch,
229
- sort,
230
- scores,
231
- score,
232
- filteredSamples,
233
- groupBy,
234
- groupByOrder,
235
- ]);
236
-
237
- const saveStateRef = useRef(saveState);
238
- // Update the ref whenever saveState changes
239
- useEffect(() => {
240
- saveStateRef.current = saveState;
241
- }, [saveState]);
242
-
243
- const setSampleScrollPosition = useCallback(
244
- debounce((position) => {
245
- sampleScrollPosition.current = position;
246
- saveStateRef.current();
247
- }, 1000),
248
- [],
249
- );
250
-
251
- const setWorkspaceTabScrollPosition = useCallback(
252
- debounce((tab, position) => {
253
- if (workspaceTabScrollPosition.current[tab] !== position) {
254
- workspaceTabScrollPosition.current = {
255
- ...workspaceTabScrollPosition.current,
256
- [tab]: position,
257
- };
258
- saveStateRef.current();
259
- }
260
- }, 1000),
261
- [],
262
- );
64
+ const refreshLogs = useStore((state) => state.logsActions.refreshLogs);
65
+ const selectLogFile = useStore((state) => state.logsActions.selectLogFile);
263
66
 
264
- // Save state when it changes, so that we can restore it later
265
- //
266
- useEffect(() => {
267
- saveStateRef.current();
268
- }, [
269
- logs,
270
- selectedLogIndex,
271
- logHeaders,
272
- headersLoading,
273
- selectedLog,
274
- selectedSampleIndex,
275
- selectedWorkspaceTab,
276
- selectedSample,
277
- sampleStatus,
278
- sampleError,
279
- selectedSampleTab,
280
- showingSampleDialog,
281
- status,
282
- offcanvas,
283
- showFind,
284
- filter,
285
- epoch,
286
- sort,
287
- scores,
288
- score,
289
- filteredSamples,
290
- groupBy,
291
- groupByOrder,
292
- ]);
293
-
294
- const handleSampleShowingDialog = useCallback(
295
- (show: boolean) => {
296
- setShowingSampleDialog(show);
297
- if (!show) {
298
- setSelectedSample(undefined);
299
- setSelectedSampleTab(undefined);
300
- }
301
- },
302
- [
303
- setShowingSampleDialog,
304
- setSelectedSample,
305
- setSelectedSampleTab,
306
- selectedSample,
307
- ],
67
+ // Log Data
68
+ const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
69
+ const loadedLogFile = useStore((state) => state.log.loadedLog);
70
+ const runningMetrics = useStore(
71
+ (state) => state.log.pendingSampleSummaries?.metrics,
308
72
  );
309
-
310
- useEffect(() => {
311
- const samples = selectedLog?.contents?.sampleSummaries || [];
312
- const { result: prefiltered } =
313
- evalDescriptor && filter?.value
314
- ? filterSamples(evalDescriptor, samples, filter.value)
315
- : { result: samples };
316
-
317
- const filtered = prefiltered.filter((sample) => {
318
- // Filter by epoch if specified
319
- if (epoch && epoch !== "all") {
320
- if (epoch !== String(sample.epoch)) {
321
- return false;
322
- }
323
- }
324
- return true;
325
- });
326
-
327
- // Sort the samples
328
- if (samplesDescriptor) {
329
- const { sorted, order } = sortSamples(sort, filtered, samplesDescriptor);
330
- setFilteredSamples(sorted);
331
- setGroupByOrder(order);
332
- }
333
-
334
- // Set the grouping
335
- let grouping: "none" | "epoch" | "sample" = "none";
336
- if (
337
- samplesDescriptor?.evalDescriptor?.epochs &&
338
- samplesDescriptor.evalDescriptor.epochs > 1
339
- ) {
340
- if (byEpoch(sort) || epoch !== "all") {
341
- grouping = "epoch";
342
- } else if (bySample(sort)) {
343
- grouping = "sample";
344
- }
345
- }
346
- setGroupBy(grouping);
347
- }, [selectedLog, filter, sort, epoch]);
348
-
349
- const evalDescriptor = useMemo(() => {
350
- return createEvalDescriptor(
351
- scores,
352
- selectedLog?.contents?.eval?.config?.epochs || 1,
353
- selectedLog?.contents?.sampleSummaries,
354
- );
355
- }, [selectedLog, scores]);
356
-
357
- const samplesDescriptor = useMemo(() => {
358
- return evalDescriptor && score
359
- ? createSamplesDescriptor(evalDescriptor, score)
360
- : undefined;
361
- }, [evalDescriptor, score]);
362
-
363
- useEffect(() => {
364
- if (selectedSampleTab === undefined && selectedSample) {
365
- setSelectedSampleTab(
366
- selectedSample.events && selectedSample.events.length > 0
367
- ? kSampleTranscriptTabId
368
- : kSampleMessagesTabId,
369
- );
370
- }
371
- }, [selectedSample, selectedSampleTab]);
73
+ const resetFiltering = useStore((state) => state.logActions.resetFiltering);
74
+ const loadLog = useStore((state) => state.logActions.loadLog);
75
+ const pollLog = useStore((state) => state.logActions.pollLog);
76
+ const refreshLog = useStore((state) => state.logActions.refreshLog);
77
+ const selectSample = useStore((state) => state.logActions.selectSample);
372
78
 
373
79
  // The main application reference
374
80
  const mainAppRef = useRef<HTMLDivElement>(null);
375
81
 
376
- // Loads a sample
377
- useEffect(() => {
378
- // Clear the selected sample
379
- if (!selectedLog || selectedSampleIndex === -1) {
380
- setSelectedSample(undefined);
381
- return;
382
- }
383
-
384
- // If already loading the selected sample, do nothing
385
- if (loadingSampleIndexRef.current === selectedSampleIndex) {
386
- return;
387
- }
388
-
389
- if (
390
- !showingSampleDialog &&
391
- selectedLog.contents.sampleSummaries.length > 1
392
- ) {
393
- return;
394
- }
395
-
396
- if (selectedSampleIndex < filteredSamples.length) {
397
- const summary = filteredSamples[selectedSampleIndex];
398
- // If this sample is already loaded, don't bother
399
- if (
400
- selectedSample &&
401
- selectedSample.id === summary.id &&
402
- selectedSample.epoch === summary.epoch
403
- ) {
404
- return;
405
- }
406
-
407
- // Load the selected sample (if not already loaded)
408
- loadingSampleIndexRef.current = selectedSampleIndex;
409
- setSampleStatus("loading");
410
- setSampleError(undefined);
411
-
412
- api
413
- .get_log_sample(selectedLog.name, summary.id, summary.epoch)
414
- .then((sample) => {
415
- if (sample) {
416
- // This migrates old samples (with raw transcript element)
417
- // to the new structure (hence the type bypass).
418
- const anySample = sample as any;
419
- if (anySample.transcript) {
420
- sample.events = anySample.transcript.events;
421
- sample.attachments = anySample.transcript.content;
422
- }
423
- sample.attachments = sample.attachments || {};
424
- sample.input = resolveAttachments(sample.input, sample.attachments);
425
- sample.messages = resolveAttachments(
426
- sample.messages,
427
- sample.attachments,
428
- );
429
- sample.events = resolveAttachments(
430
- sample.events,
431
- sample.attachments,
432
- );
433
- sample.attachments = {};
434
-
435
- sampleScrollPosition.current = 0;
436
- setSelectedSample(sample);
437
-
438
- setSampleStatus("ok");
439
- loadingSampleIndexRef.current = null;
440
- } else {
441
- throw Error("Unable to load sample - an unknown error occurred.");
442
- }
443
- })
444
- .catch((e) => {
445
- setSampleStatus("error");
446
- setSampleError(e);
447
-
448
- sampleScrollPosition.current = 0;
449
- setSelectedSample(undefined);
450
-
451
- loadingSampleIndexRef.current = null;
452
- });
453
- }
454
- }, [
455
- selectedSample,
456
- selectedSampleIndex,
457
- showingSampleDialog,
458
- selectedLog,
459
- filteredSamples,
460
- setSelectedSample,
461
- setSampleStatus,
462
- setSampleError,
463
- ]);
464
-
465
- // Read header information for the logs
466
- // and then update
467
- useEffect(() => {
468
- const loadHeaders = async () => {
469
- setHeadersLoading(true);
470
-
471
- // Group into chunks
472
- const chunkSize = 8;
473
- const fileLists = [];
474
- for (let i = 0; i < logs.files.length; i += chunkSize) {
475
- let chunk = logs.files.slice(i, i + chunkSize).map((log) => log.name);
476
- fileLists.push(chunk);
477
- }
478
-
479
- // Chunk by chunk, read the header information
480
- try {
481
- for (const fileList of fileLists) {
482
- const headers = await api.get_log_headers(fileList);
483
- setLogHeaders((prev) => {
484
- const updatedHeaders: Record<string, EvalLogHeader> = {};
485
- headers.forEach((header, index) => {
486
- const logFile = fileList[index];
487
- updatedHeaders[logFile] = header as EvalLogHeader;
488
- });
489
- return { ...prev, ...updatedHeaders };
490
- });
491
-
492
- if (headers.length === chunkSize) {
493
- await sleep(5000); // Pause between chunks
494
- }
495
- }
496
- } catch (e: unknown) {
497
- if (
498
- e instanceof Error &&
499
- (e.message === "Load failed" || e.message === "Failed to fetch")
500
- ) {
501
- // This will happen if the server disappears (e.g. inspect view is terminated)
502
- setStatus({ loading: false });
503
- } else {
504
- console.log(e);
505
- setStatus({ loading: false, error: e as Error });
506
- }
507
- }
508
- setHeadersLoading(false);
509
- };
510
-
511
- loadHeaders();
512
- }, [logs, setStatus, setLogHeaders, setHeadersLoading]);
513
-
514
- /**
515
- * Resets the workspace tab based on the provided log's state.
516
- *
517
- * Determines whether the workspace tab should display samples or info,
518
- * depending on the presence of samples and the log status.
519
- */
520
- const resetWorkspace = useCallback(
521
- (log: EvalSummary) => {
522
- // Reset the workspace tab
523
- const hasSamples =
524
- !!log.sampleSummaries && log.sampleSummaries.length > 0;
525
- const showSamples = hasSamples;
526
- setSelectedWorkspaceTab(
527
- log.status !== "error" && hasSamples
528
- ? kEvalWorkspaceTabId
529
- : kInfoWorkspaceTabId,
530
- );
531
-
532
- // Select the default scorer to use
533
- const scorer = defaultScorer(log);
534
- const scorers = defaultScorers(log);
535
-
536
- // Reset state
537
- setScores(scorers);
538
- setScore(scorer);
539
-
540
- setEpoch("all");
541
- setFilter({});
542
- setSort(kDefaultSort);
543
-
544
- // Reset the sample tab
545
- setSelectedSampleTab(undefined);
546
- setSelectedSample(undefined);
547
- if (showSamples) {
548
- setSelectedSampleIndex(0);
549
- } else {
550
- setSelectedSampleIndex(-1);
551
- }
552
-
553
- workspaceTabScrollPosition.current = {};
554
- },
555
- [setSelectedWorkspaceTab],
556
- );
557
-
558
82
  // Load a specific log
559
83
  useEffect(() => {
560
84
  const loadSpecificLog = async () => {
561
- const targetLog = logs.files[selectedLogIndex];
562
- if (targetLog && (!selectedLog || selectedLog.name !== targetLog.name)) {
85
+ if (selectedLogFile && selectedLogFile !== loadedLogFile) {
563
86
  try {
564
- setStatus({ loading: true, error: undefined });
565
- const logContents = await loadLog(targetLog.name);
566
- if (logContents) {
567
- const log = logContents;
568
- setSelectedLog({
569
- contents: log,
570
- name: targetLog.name,
571
- });
572
-
573
- // Reset the workspace tab
574
- resetWorkspace(log);
575
-
576
- setStatus({ loading: false, error: undefined });
577
- }
87
+ // Set loading first and wait for it to update
88
+ setAppStatus({ loading: true, error: undefined });
89
+
90
+ // Then load the log
91
+ await loadLog(selectedLogFile);
92
+ selectSample(0);
93
+
94
+ // Finally set loading to false
95
+ setAppStatus({ loading: false, error: undefined });
578
96
  } catch (e) {
579
97
  console.log(e);
580
- setStatus({ loading: false, error: e as Error });
98
+ setAppStatus({ loading: false, error: e as Error });
581
99
  }
582
- } else if (logs.log_dir && logs.files.length === 0) {
583
- setStatus({
584
- loading: false,
585
- error: new Error(
586
- `No log files to display in the directory ${logs.log_dir}. Are you sure this is the correct log directory?`,
587
- ),
588
- });
589
100
  }
590
101
  };
591
102
 
592
103
  loadSpecificLog();
593
- }, [selectedLogIndex, logs, selectedLog, setSelectedLog, setStatus]);
104
+ }, [selectedLogFile, loadedLogFile, loadLog, setAppStatus]);
594
105
 
595
- // Load the list of logs
596
- const loadLogs = async (): Promise<LogFiles> => {
597
- try {
598
- const result = await api.get_log_paths();
599
-
600
- return result;
601
- } catch (e) {
602
- // Show an error
603
- console.log(e);
604
- setStatus({ loading: false, error: e as Error });
605
- return { log_dir: "", files: [] };
106
+ useEffect(() => {
107
+ // If the component re-mounts and there is a running load loaded
108
+ // start up polling
109
+ const doPoll = async () => {
110
+ await pollLog();
111
+ };
112
+ if (selectedLogSummary?.status === "started") {
113
+ doPoll();
606
114
  }
607
- };
115
+ }, []);
608
116
 
609
- // Load a specific log file
610
- const loadLog = async (logFileName: string) => {
611
- try {
612
- const logContents = await api.get_log_summary(logFileName);
613
- return logContents;
614
- } catch (e) {
615
- // Show an error
616
- console.log(e);
617
- setStatus({ loading: false, error: e as Error });
117
+ useEffect(() => {
118
+ if (logs.log_dir && logs.files.length === 0) {
119
+ setAppStatus({
120
+ loading: false,
121
+ error: new Error(
122
+ `No log files to display in the directory ${logs.log_dir}. Are you sure this is the correct log directory?`,
123
+ ),
124
+ });
618
125
  }
619
- };
126
+ }, [logs.log_dir, logs.files.length]);
620
127
 
621
- const refreshLog = useCallback(async () => {
128
+ const appRefreshLog = useCallback(() => {
622
129
  try {
623
- setStatus({ loading: true, error: undefined });
624
- const targetLog = logs.files[selectedLogIndex];
625
- const logContents = await loadLog(targetLog.name);
626
- if (logContents) {
627
- const log = logContents;
628
- if (log.status !== "started") {
629
- setLogHeaders((prev) => {
630
- const updatedState = { ...prev };
631
- const freshHeaders: EvalLogHeader = {
632
- eval: log.eval,
633
- plan: log.plan,
634
- results: log.results !== null ? log.results : undefined,
635
- stats: log.stats,
636
- status: log.status,
637
- version: log.version,
638
- };
639
- updatedState[targetLog.name] = freshHeaders;
640
- return updatedState;
641
- });
642
- }
643
-
644
- setSelectedLog({
645
- contents: log,
646
- name: targetLog.name,
647
- });
130
+ setAppStatus({ loading: true, error: undefined });
648
131
 
649
- // Reset the workspace tab
650
- resetWorkspace(log);
132
+ refreshLog();
133
+ resetFiltering();
651
134
 
652
- setStatus({ loading: false, error: undefined });
653
- }
135
+ setAppStatus({ loading: false, error: undefined });
654
136
  } catch (e) {
655
137
  // Show an error
656
138
  console.log(e);
657
- setStatus({ loading: false, error: e as Error });
139
+ setAppStatus({ loading: false, error: e as Error });
658
140
  }
659
- }, [logs, selectedLogIndex, setStatus, setSelectedLog, setLogHeaders]);
660
-
661
- const showLogFile = useCallback(
662
- async (logUrl: string) => {
663
- const index = logs.files.findIndex((val) => {
664
- return logUrl.endsWith(val.name);
665
- });
666
- if (index > -1) {
667
- setSelectedLogIndex(index);
668
- } else {
669
- const result = await loadLogs();
670
- const idx = result?.files.findIndex((file) => {
671
- return logUrl.endsWith(file.name);
672
- });
673
- setLogs(result || { log_dir: "", files: [] });
674
- setSelectedLogIndex(idx && idx > -1 ? idx : 0);
675
- }
676
- },
677
- [logs, setSelectedLogIndex, setLogs],
678
- );
679
-
680
- const refreshLogList = useCallback(async () => {
681
- const currentLog = logs.files[selectedLogIndex > -1 ? selectedLogIndex : 0];
682
- const refreshedLogs = await loadLogs();
683
- setLogs(refreshedLogs || { log_dir: "", files: [] });
684
-
685
- const newIndex = refreshedLogs?.files.findIndex((file) => {
686
- return currentLog.name.endsWith(file.name);
687
- });
688
- if (newIndex !== undefined) {
689
- setSelectedLogIndex(newIndex);
690
- }
691
- }, [logs, selectedLogIndex, setSelectedLogIndex, setLogs]);
141
+ }, [refreshLog, resetFiltering, setAppStatus]);
692
142
 
693
143
  const onMessage = useCallback(
694
144
  async (e: HostMessage) => {
@@ -696,7 +146,7 @@ export const App: FC<AppProps> = ({
696
146
  case "updateState": {
697
147
  if (e.data.url) {
698
148
  const decodedUrl = decodeURIComponent(e.data.url);
699
- showLogFile(decodedUrl);
149
+ selectLogFile(decodedUrl);
700
150
  }
701
151
  break;
702
152
  }
@@ -706,18 +156,18 @@ export const App: FC<AppProps> = ({
706
156
  const isFocused = document.hasFocus();
707
157
  if (!isFocused) {
708
158
  if (log_dir === logs.log_dir) {
709
- showLogFile(decodedUrl);
159
+ selectLogFile(decodedUrl);
710
160
  } else {
711
161
  api.open_log_file(e.data.url, e.data.log_dir);
712
162
  }
713
163
  } else {
714
- refreshLogList();
164
+ refreshLogs();
715
165
  }
716
166
  break;
717
167
  }
718
168
  }
719
169
  },
720
- [logs, showLogFile, refreshLogList],
170
+ [logs, selectLogFile, refreshLogs],
721
171
  );
722
172
 
723
173
  // listen for updateState messages from vscode
@@ -730,123 +180,95 @@ export const App: FC<AppProps> = ({
730
180
 
731
181
  useEffect(() => {
732
182
  const loadLogsAndState = async () => {
733
- // See whether a specific task_file has been passed.
734
- const urlParams = new URLSearchParams(window.location.search);
735
-
736
- // If the URL provides a task file, load that
737
- const logPath = urlParams.get("task_file");
738
-
739
- // Replace spaces with a '+' sign:
740
- const resolvedLogPath = logPath ? logPath.replace(" ", "+") : logPath;
741
- const load = resolvedLogPath
742
- ? async (): Promise<LogFiles> => {
743
- return {
744
- log_dir: "",
745
- files: [{ name: resolvedLogPath }],
746
- };
747
- }
748
- : loadLogs;
749
-
183
+ // First see if there is embedded state and if so, use that
750
184
  const embeddedState = document.getElementById("logview-state");
751
185
  if (embeddedState) {
752
186
  const state = JSON5.parse(embeddedState.textContent || "");
753
187
  onMessage({ data: state });
754
188
  } else {
755
- const result = await load();
756
- setLogs(result);
757
-
758
- // If a log file was passed, select it
759
- const log_file = urlParams.get("log_file");
760
- if (log_file) {
761
- const index = result.files.findIndex((val) => {
762
- return log_file.endsWith(val.name);
189
+ // See whether a specific task_file has been passed.
190
+ const urlParams = new URLSearchParams(window.location.search);
191
+
192
+ // If the URL provides a task file, load that
193
+ const logPath = urlParams.get("task_file");
194
+
195
+ // Replace spaces with a '+' sign:
196
+ const resolvedLogPath = logPath ? logPath.replace(" ", "+") : logPath;
197
+
198
+ if (resolvedLogPath) {
199
+ // Load only this file
200
+ setLogs({
201
+ log_dir: "",
202
+ files: [{ name: resolvedLogPath }],
763
203
  });
764
- if (index > -1) {
765
- setSelectedLogIndex(index);
204
+ } else {
205
+ // If a log file was passed, select it
206
+ const log_file = urlParams.get("log_file");
207
+ if (log_file) {
208
+ await selectLogFile(log_file);
209
+ } else {
210
+ // Load all logs
211
+ await refreshLogs();
766
212
  }
767
- } else if (selectedLogIndex === -1) {
768
- setSelectedLogIndex(0);
769
213
  }
770
214
  }
771
215
 
772
216
  new ClipboardJS(".clipboard-button,.copy-button");
773
-
774
- if (pollForLogs) {
775
- let retryDelay = 1000;
776
- const maxRetryDelay = 60000;
777
-
778
- const pollEvents = async () => {
779
- try {
780
- const events = await api.client_events();
781
-
782
- if (events.includes("reload")) {
783
- window.location.reload();
784
- }
785
-
786
- if (events.includes("refresh-evals")) {
787
- const logs = await load();
788
- setLogs(logs);
789
- setSelectedLogIndex(0);
790
- }
791
-
792
- // Reset delay after a successful call
793
- retryDelay = 1000;
794
- } catch (error) {
795
- console.error("Error fetching client events:", error);
796
-
797
- // Exponential backoff with capping
798
- retryDelay = Math.min(retryDelay * 2, maxRetryDelay);
799
- } finally {
800
- // Schedule the next poll
801
- setTimeout(pollEvents, retryDelay);
802
- }
803
- };
804
-
805
- // Start polling
806
- pollEvents();
807
- }
808
217
  };
809
218
 
810
219
  loadLogsAndState();
811
- }, []);
220
+ }, [setLogs, selectLogFile, refreshLogs]);
812
221
 
813
222
  // Configure an app envelope specific to the current state
814
223
  // if there are no log files, then don't show sidebar
815
224
  const fullScreen = logs.files.length === 1 && !logs.log_dir;
816
225
 
817
- const hideFind = useCallback(() => {
818
- clearDocumentSelection();
819
- if (showFind) {
820
- setShowFind(false);
821
- }
822
- }, [showFind, setShowFind]);
823
-
824
226
  const showToggle = logs.files.length > 1 || !!logs.log_dir || false;
825
227
 
826
- /**
827
- * Determines the sample mode based on the selected log's contents.
828
- */
829
- const sampleMode =
830
- selectedLog?.contents?.sampleSummaries === undefined ||
831
- selectedLog.contents.sampleSummaries.length === 0
832
- ? "none"
833
- : selectedLog.contents.sampleSummaries.length === 1
834
- ? "single"
835
- : "many";
228
+ const handleSelectedIndexChanged = useCallback(
229
+ (index: number) => {
230
+ setSelectedLogIndex(index);
231
+ setOffCanvas(false);
232
+ resetFiltering();
233
+ clearSampleTab();
234
+ clearWorkspaceTab();
235
+ selectSample(0);
236
+ },
237
+ [
238
+ setSelectedLogIndex,
239
+ setOffCanvas,
240
+ resetFiltering,
241
+ clearSampleTab,
242
+ clearWorkspaceTab,
243
+ selectSample,
244
+ ],
245
+ );
246
+
247
+ const handleKeyboard = useCallback(
248
+ (e: KeyboardEvent) => {
249
+ // Add keyboard shortcuts for find, if needed
250
+ if (nativeFind || !setShowFind) {
251
+ return;
252
+ }
253
+
254
+ if ((e.ctrlKey || e.metaKey) && e.key === "f") {
255
+ setShowFind(true);
256
+ } else if (e.key === "Escape") {
257
+ hideFind();
258
+ }
259
+ },
260
+ [nativeFind, setShowFind, hideFind],
261
+ );
262
+
836
263
  return (
837
- <AppErrorBoundary>
838
- {!fullScreen && selectedLog?.contents ? (
264
+ <>
265
+ {!fullScreen && selectedLogSummary ? (
839
266
  <Sidebar
840
267
  logs={logs}
841
268
  logHeaders={logHeaders}
842
269
  loading={headersLoading}
843
- offcanvas={offcanvas}
844
- setOffcanvas={setOffcanvas}
845
270
  selectedIndex={selectedLogIndex}
846
- onSelectedIndexChanged={(index) => {
847
- setSelectedLogIndex(index);
848
- setOffcanvas(false);
849
- }}
271
+ onSelectedIndexChanged={handleSelectedIndexChanged}
850
272
  />
851
273
  ) : undefined}
852
274
  <div
@@ -854,79 +276,35 @@ export const App: FC<AppProps> = ({
854
276
  className={clsx(
855
277
  "app-main-grid",
856
278
  fullScreen ? "full-screen" : undefined,
857
- offcanvas ? "off-canvas" : undefined,
279
+ offCanvas ? "off-canvas" : undefined,
858
280
  )}
859
281
  tabIndex={0}
860
- onKeyDown={(e) => {
861
- // regular browsers user their own find
862
- if (!getVscodeApi()) {
863
- return;
864
- }
865
-
866
- if ((e.ctrlKey || e.metaKey) && e.key === "f") {
867
- setShowFind(true);
868
- } else if (e.key === "Escape") {
869
- hideFind();
870
- }
871
- }}
282
+ onKeyDown={handleKeyboard}
872
283
  >
873
- {showFind ? <FindBand hideBand={hideFind} /> : ""}
874
- <ProgressBar animating={status?.loading} />
875
- {status?.error ? (
284
+ {!nativeFind && showFind ? <FindBand /> : ""}
285
+ <ProgressBar animating={appStatus.loading} />
286
+ {appStatus.error ? (
876
287
  <ErrorPanel
877
288
  title="An error occurred while loading this task."
878
- error={status.error}
289
+ error={appStatus.error}
879
290
  />
880
291
  ) : (
881
292
  <WorkSpace
882
- task_id={selectedLog?.contents?.eval?.task_id}
883
- logFileName={selectedLog?.name}
884
- evalStatus={selectedLog?.contents?.status}
885
- evalError={filterNull(selectedLog?.contents?.error)}
886
- evalVersion={selectedLog?.contents?.version}
887
- evalSpec={selectedLog?.contents?.eval}
888
- evalPlan={selectedLog?.contents?.plan}
889
- evalStats={selectedLog?.contents?.stats}
890
- evalResults={filterNull(selectedLog?.contents?.results)}
293
+ task_id={selectedLogSummary?.eval?.task_id}
294
+ evalStatus={selectedLogSummary?.status}
295
+ evalError={filterNull(selectedLogSummary?.error)}
296
+ evalVersion={selectedLogSummary?.version}
297
+ evalSpec={selectedLogSummary?.eval}
298
+ evalPlan={selectedLogSummary?.plan}
299
+ evalStats={selectedLogSummary?.stats}
300
+ evalResults={filterNull(selectedLogSummary?.results)}
301
+ runningMetrics={runningMetrics}
891
302
  showToggle={showToggle}
892
- samples={filteredSamples}
893
- sampleMode={sampleMode}
894
- groupBy={groupBy}
895
- groupByOrder={groupByOrder}
896
- sampleStatus={sampleStatus}
897
- sampleError={sampleError}
898
- samplesDescriptor={samplesDescriptor}
899
- refreshLog={refreshLog}
900
- offcanvas={offcanvas}
901
- setOffcanvas={setOffcanvas}
902
- capabilities={capabilities}
903
- selectedSample={selectedSample}
904
- selectedSampleIndex={selectedSampleIndex}
905
- setSelectedSampleIndex={setSelectedSampleIndex}
906
- showingSampleDialog={showingSampleDialog}
907
- setShowingSampleDialog={handleSampleShowingDialog}
908
- selectedTab={selectedWorkspaceTab}
909
- setSelectedTab={setSelectedWorkspaceTab}
910
- selectedSampleTab={selectedSampleTab}
911
- setSelectedSampleTab={setSelectedSampleTab}
912
- sort={sort}
913
- setSort={setSort}
914
- epochs={selectedLog?.contents?.eval?.config?.epochs}
915
- epoch={epoch}
916
- setEpoch={setEpoch}
917
- filter={filter}
918
- setFilter={setFilter}
919
- score={score}
920
- setScore={setScore}
921
- scores={scores}
922
- sampleScrollPositionRef={sampleScrollPosition}
923
- setSampleScrollPosition={setSampleScrollPosition}
924
- workspaceTabScrollPositionRef={workspaceTabScrollPosition}
925
- setWorkspaceTabScrollPosition={setWorkspaceTabScrollPosition}
303
+ refreshLog={appRefreshLog}
926
304
  />
927
305
  )}
928
306
  </div>
929
- </AppErrorBoundary>
307
+ </>
930
308
  );
931
309
  };
932
310
 
@@ -936,73 +314,3 @@ const filterNull = <T,>(obj: T | null): T | undefined => {
936
314
  }
937
315
  return obj;
938
316
  };
939
-
940
- interface ScorerInfo {
941
- name: string;
942
- scorer: string;
943
- }
944
-
945
- /**
946
- * Determines the default scorer for a log
947
- */
948
- const defaultScorer = (log: EvalSummary): ScorerInfo | undefined => {
949
- if (log.sampleSummaries.length === 0) {
950
- return undefined;
951
- }
952
-
953
- // Select the default scorer to use
954
- const scores = log.sampleSummaries[0].scores;
955
-
956
- const scorer = log.results?.scores[0]
957
- ? {
958
- name: log.results?.scores[0].name,
959
- scorer: log.results?.scores[0].scorer,
960
- }
961
- : log.sampleSummaries.length > 0 && scores !== null
962
- ? {
963
- name: Object.keys(scores)[0],
964
- scorer: Object.keys(scores)[0],
965
- }
966
- : undefined;
967
- return scorer;
968
- };
969
-
970
- /**
971
- * Determines the default scorers for a log
972
- */
973
- const defaultScorers = (log: EvalSummary): Array<ScorerInfo> => {
974
- if (log.results?.scores) {
975
- return (log.results?.scores || [])
976
- .map((score): ScorerInfo => {
977
- return {
978
- name: score.name,
979
- scorer: score.scorer,
980
- };
981
- })
982
- .reduce((accum, scorer) => {
983
- if (
984
- !accum.find((sc) => {
985
- return scorer.scorer === sc.scorer && scorer.name === sc.name;
986
- })
987
- ) {
988
- accum.push(scorer);
989
- }
990
- return accum;
991
- }, [] as Array<ScorerInfo>);
992
- } else if (log.sampleSummaries && log.sampleSummaries.length > 0) {
993
- const scores = log.sampleSummaries[0].scores;
994
-
995
- if (scores !== null) {
996
- return Object.keys(scores).map((key) => {
997
- return {
998
- name: key,
999
- scorer: key,
1000
- };
1001
- });
1002
- } else {
1003
- return [];
1004
- }
1005
- } else {
1006
- return [];
1007
- }
1008
- };