inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/eval.py +35 -2
  3. inspect_ai/_cli/util.py +44 -1
  4. inspect_ai/_display/core/config.py +1 -1
  5. inspect_ai/_display/core/display.py +13 -4
  6. inspect_ai/_display/core/results.py +1 -1
  7. inspect_ai/_display/textual/app.py +14 -3
  8. inspect_ai/_display/textual/display.py +4 -0
  9. inspect_ai/_display/textual/widgets/samples.py +9 -3
  10. inspect_ai/_display/textual/widgets/task_detail.py +8 -8
  11. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  12. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  13. inspect_ai/_eval/eval.py +74 -25
  14. inspect_ai/_eval/evalset.py +22 -18
  15. inspect_ai/_eval/loader.py +34 -11
  16. inspect_ai/_eval/run.py +13 -15
  17. inspect_ai/_eval/score.py +13 -3
  18. inspect_ai/_eval/task/generate.py +8 -9
  19. inspect_ai/_eval/task/log.py +55 -6
  20. inspect_ai/_eval/task/run.py +51 -10
  21. inspect_ai/_eval/task/task.py +23 -9
  22. inspect_ai/_util/constants.py +2 -0
  23. inspect_ai/_util/file.py +30 -1
  24. inspect_ai/_util/json.py +37 -1
  25. inspect_ai/_util/registry.py +1 -0
  26. inspect_ai/_util/vscode.py +37 -0
  27. inspect_ai/_view/server.py +113 -1
  28. inspect_ai/_view/www/App.css +7 -1
  29. inspect_ai/_view/www/dist/assets/index.css +813 -415
  30. inspect_ai/_view/www/dist/assets/index.js +54475 -32003
  31. inspect_ai/_view/www/eslint.config.mjs +1 -1
  32. inspect_ai/_view/www/log-schema.json +137 -31
  33. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  34. inspect_ai/_view/www/package.json +11 -2
  35. inspect_ai/_view/www/src/App.tsx +161 -853
  36. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  37. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  38. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  39. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  40. inspect_ai/_view/www/src/api/types.ts +107 -2
  41. inspect_ai/_view/www/src/appearance/icons.ts +2 -0
  42. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  43. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  46. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  47. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  48. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  49. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  50. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  51. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  52. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  53. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  54. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  55. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
  56. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  57. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  58. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  59. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  60. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  61. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  62. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  63. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  64. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  65. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  66. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  67. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  68. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  69. inspect_ai/_view/www/src/index.tsx +26 -94
  70. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  71. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  72. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  73. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  74. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  75. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  76. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  77. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  78. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
  79. inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
  80. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  81. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
  82. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
  83. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
  84. inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
  85. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  86. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  87. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  88. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
  89. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
  90. inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
  91. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  92. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
  93. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  94. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
  95. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  96. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  98. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
  99. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  100. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
  101. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
  102. inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
  103. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  104. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
  105. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  106. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  107. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  108. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  109. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  110. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
  111. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  112. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  113. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  114. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  115. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  116. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  117. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  118. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  119. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  120. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  121. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  122. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  123. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  124. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  125. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  126. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  127. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
  128. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  129. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  130. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  131. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  132. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
  133. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  134. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  135. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
  136. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  137. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  138. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  139. inspect_ai/_view/www/src/state/hooks.ts +399 -0
  140. inspect_ai/_view/www/src/state/logPolling.ts +200 -0
  141. inspect_ai/_view/www/src/state/logSlice.ts +224 -0
  142. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  143. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  144. inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
  145. inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
  146. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  147. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  148. inspect_ai/_view/www/src/state/store.ts +168 -0
  149. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  150. inspect_ai/_view/www/src/state/utils.ts +23 -0
  151. inspect_ai/_view/www/src/storage/index.ts +26 -0
  152. inspect_ai/_view/www/src/types/log.d.ts +36 -26
  153. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  154. inspect_ai/_view/www/src/types.ts +94 -32
  155. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  156. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  157. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  158. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  159. inspect_ai/_view/www/src/utils/react.ts +30 -0
  160. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  161. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
  162. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  163. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  164. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  165. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  166. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
  167. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
  168. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  169. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  170. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  171. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  172. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
  173. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  174. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  175. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  176. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  177. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  178. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  179. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  180. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  181. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
  182. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  183. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  184. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  185. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  186. inspect_ai/_view/www/vite.config.js +6 -0
  187. inspect_ai/_view/www/yarn.lock +464 -355
  188. inspect_ai/agent/__init__.py +36 -0
  189. inspect_ai/agent/_agent.py +268 -0
  190. inspect_ai/agent/_as_solver.py +72 -0
  191. inspect_ai/agent/_as_tool.py +122 -0
  192. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  193. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  194. inspect_ai/agent/_filter.py +46 -0
  195. inspect_ai/agent/_handoff.py +93 -0
  196. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  197. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  198. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  199. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  200. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  201. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  202. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  203. inspect_ai/agent/_react.py +241 -0
  204. inspect_ai/agent/_run.py +36 -0
  205. inspect_ai/agent/_types.py +81 -0
  206. inspect_ai/log/_condense.py +26 -0
  207. inspect_ai/log/_log.py +17 -5
  208. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  209. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  210. inspect_ai/log/_recorders/buffer/database.py +685 -0
  211. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  212. inspect_ai/log/_recorders/buffer/types.py +84 -0
  213. inspect_ai/log/_recorders/eval.py +2 -11
  214. inspect_ai/log/_recorders/types.py +30 -0
  215. inspect_ai/log/_transcript.py +32 -2
  216. inspect_ai/model/__init__.py +7 -1
  217. inspect_ai/model/_call_tools.py +257 -52
  218. inspect_ai/model/_chat_message.py +7 -4
  219. inspect_ai/model/_conversation.py +13 -62
  220. inspect_ai/model/_display.py +85 -0
  221. inspect_ai/model/_generate_config.py +2 -2
  222. inspect_ai/model/_model.py +114 -14
  223. inspect_ai/model/_model_output.py +14 -9
  224. inspect_ai/model/_openai.py +16 -4
  225. inspect_ai/model/_openai_computer_use.py +162 -0
  226. inspect_ai/model/_openai_responses.py +319 -165
  227. inspect_ai/model/_providers/anthropic.py +20 -21
  228. inspect_ai/model/_providers/azureai.py +24 -13
  229. inspect_ai/model/_providers/bedrock.py +1 -7
  230. inspect_ai/model/_providers/cloudflare.py +3 -3
  231. inspect_ai/model/_providers/goodfire.py +2 -6
  232. inspect_ai/model/_providers/google.py +11 -10
  233. inspect_ai/model/_providers/groq.py +6 -3
  234. inspect_ai/model/_providers/hf.py +7 -3
  235. inspect_ai/model/_providers/mistral.py +7 -10
  236. inspect_ai/model/_providers/openai.py +47 -17
  237. inspect_ai/model/_providers/openai_o1.py +11 -4
  238. inspect_ai/model/_providers/openai_responses.py +12 -14
  239. inspect_ai/model/_providers/providers.py +2 -2
  240. inspect_ai/model/_providers/together.py +12 -2
  241. inspect_ai/model/_providers/util/chatapi.py +7 -2
  242. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  243. inspect_ai/model/_providers/util/llama31.py +4 -2
  244. inspect_ai/model/_providers/vertex.py +11 -9
  245. inspect_ai/model/_providers/vllm.py +4 -4
  246. inspect_ai/scorer/__init__.py +2 -0
  247. inspect_ai/scorer/_metrics/__init__.py +2 -0
  248. inspect_ai/scorer/_metrics/grouped.py +84 -0
  249. inspect_ai/scorer/_score.py +26 -6
  250. inspect_ai/solver/__init__.py +2 -2
  251. inspect_ai/solver/_basic_agent.py +22 -9
  252. inspect_ai/solver/_bridge.py +31 -0
  253. inspect_ai/solver/_chain.py +20 -12
  254. inspect_ai/solver/_fork.py +5 -1
  255. inspect_ai/solver/_human_agent.py +52 -0
  256. inspect_ai/solver/_prompt.py +3 -1
  257. inspect_ai/solver/_run.py +59 -0
  258. inspect_ai/solver/_solver.py +14 -4
  259. inspect_ai/solver/_task_state.py +5 -3
  260. inspect_ai/tool/_tool_call.py +15 -8
  261. inspect_ai/tool/_tool_def.py +17 -12
  262. inspect_ai/tool/_tool_support_helpers.py +4 -4
  263. inspect_ai/tool/_tool_with.py +14 -11
  264. inspect_ai/tool/_tools/_bash_session.py +11 -2
  265. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  266. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  267. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  268. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  269. inspect_ai/tool/_tools/_think.py +1 -1
  270. inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
  271. inspect_ai/util/__init__.py +2 -0
  272. inspect_ai/util/_anyio.py +27 -0
  273. inspect_ai/util/_sandbox/__init__.py +2 -1
  274. inspect_ai/util/_sandbox/context.py +32 -7
  275. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  276. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  277. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  278. inspect_ai/util/_store_model.py +30 -7
  279. inspect_ai/util/_subprocess.py +13 -3
  280. inspect_ai/util/_subtask.py +1 -0
  281. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  282. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
  283. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
  284. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  285. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  286. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  287. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  288. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  289. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  290. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  291. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  292. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  293. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  294. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  295. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  296. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  297. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,181 @@
1
+ import { EvalLogHeader, LogFiles } from "../api/types";
2
+ import { LogsState } from "../types";
3
+ import { createLogger } from "../utils/logger";
4
+ import { createLogsPolling } from "./logsPolling";
5
+ import { StoreState } from "./store";
6
+
7
+ const log = createLogger("Log Slice");
8
+
9
+ export interface LogsSlice {
10
+ logs: LogsState;
11
+ logsActions: {
12
+ // Update State
13
+ setLogs: (logs: LogFiles) => void;
14
+ setLogHeaders: (headers: Record<string, EvalLogHeader>) => void;
15
+ setHeadersLoading: (loading: boolean) => void;
16
+ setSelectedLogIndex: (index: number) => void;
17
+ setSelectedLogFile: (logUrl: string) => void;
18
+ updateLogHeaders: (headers: Record<string, EvalLogHeader>) => void;
19
+
20
+ // Fetch or update logs
21
+ refreshLogs: () => Promise<void>;
22
+ selectLogFile: (logUrl: string) => Promise<void>;
23
+ loadLogs: () => Promise<LogFiles>;
24
+
25
+ // Computed values
26
+ getSelectedLogFile: () => string | undefined;
27
+ };
28
+ }
29
+
30
+ const initialState: LogsState = {
31
+ logs: { log_dir: "", files: [] },
32
+ logHeaders: {},
33
+ headersLoading: false,
34
+ selectedLogIndex: -1,
35
+ };
36
+
37
+ export const createLogsSlice = (
38
+ set: (fn: (state: StoreState) => void) => void,
39
+ get: () => StoreState,
40
+ _store: any,
41
+ ): [LogsSlice, () => void] => {
42
+ const logsPolling = createLogsPolling(get, set);
43
+
44
+ const slice = {
45
+ // State
46
+ logs: initialState,
47
+
48
+ // Actions
49
+ logsActions: {
50
+ setLogs: (logs: LogFiles) => {
51
+ set((state) => {
52
+ state.logs.logs = logs;
53
+ });
54
+
55
+ // If we have files in the logs, load the headers
56
+ if (logs.files.length > 0) {
57
+ // ensure state is updated first
58
+ setTimeout(() => {
59
+ const currentState = get();
60
+ if (!currentState.logs.headersLoading) {
61
+ logsPolling.startPolling(logs);
62
+ }
63
+ }, 100);
64
+ }
65
+ },
66
+ setLogHeaders: (headers: Record<string, EvalLogHeader>) =>
67
+ set((state) => {
68
+ state.logs.logHeaders = headers;
69
+ }),
70
+ setHeadersLoading: (loading: boolean) =>
71
+ set((state) => {
72
+ state.logs.headersLoading = loading;
73
+ }),
74
+ setSelectedLogIndex: (selectedLogIndex: number) => {
75
+ set((state) => {
76
+ state.logs.selectedLogIndex = selectedLogIndex;
77
+ });
78
+ },
79
+ updateLogHeaders: (headers: Record<string, EvalLogHeader>) =>
80
+ set((state) => {
81
+ state.logs.logHeaders = { ...get().logs.logHeaders, ...headers };
82
+ }),
83
+
84
+ setSelectedLogFile: (logUrl: string) => {
85
+ const state = get();
86
+ const index = state.logs.logs.files.findIndex((val) =>
87
+ logUrl.endsWith(val.name),
88
+ );
89
+
90
+ if (index > -1) {
91
+ state.logsActions.setSelectedLogIndex(index);
92
+ }
93
+ },
94
+
95
+ // Helper function to load logs
96
+ loadLogs: async () => {
97
+ const api = get().api;
98
+ if (!api) {
99
+ console.error("API not initialized in LogsStore");
100
+ return { log_dir: "", files: [] };
101
+ }
102
+
103
+ try {
104
+ log.debug("LOADING LOG FILES");
105
+ return await api.get_log_paths();
106
+ } catch (e) {
107
+ console.log(e);
108
+ get().appActions.setStatus({ loading: false, error: e as Error });
109
+ return { log_dir: "", files: [] };
110
+ }
111
+ },
112
+ refreshLogs: async () => {
113
+ log.debug("REFRESH LOGS");
114
+ const state = get();
115
+ const refreshedLogs = await state.logsActions.loadLogs();
116
+
117
+ // Set the logs first
118
+ state.logsActions.setLogs(refreshedLogs || { log_dir: "", files: [] });
119
+
120
+ // Preserve the selected log even if new logs appear
121
+ const currentLog =
122
+ refreshedLogs.files[
123
+ state.logs.selectedLogIndex > -1 ? state.logs.selectedLogIndex : 0
124
+ ];
125
+
126
+ if (currentLog) {
127
+ const newIndex = refreshedLogs?.files.findIndex((file) =>
128
+ currentLog.name.endsWith(file.name),
129
+ );
130
+
131
+ if (newIndex !== undefined && newIndex !== -1) {
132
+ state.logsActions.setSelectedLogIndex(newIndex);
133
+ }
134
+ }
135
+ },
136
+ // Select a specific log file
137
+ selectLogFile: async (logUrl: string) => {
138
+ const state = get();
139
+ const index = state.logs.logs.files.findIndex((val) =>
140
+ val.name.endsWith(logUrl),
141
+ );
142
+
143
+ // It is already loaded
144
+ if (index > -1) {
145
+ state.logsActions.setSelectedLogIndex(index);
146
+ } else {
147
+ // It isn't yet loaded, so refresh the logs and try to load it from there
148
+ const result = await state.logsActions.loadLogs();
149
+ const idx = result?.files.findIndex((file) =>
150
+ logUrl.endsWith(file.name),
151
+ );
152
+
153
+ state.logsActions.setLogs(result || { log_dir: "", files: [] });
154
+ state.logsActions.setSelectedLogIndex(
155
+ idx !== undefined && idx > -1 ? idx : 0,
156
+ );
157
+ }
158
+ },
159
+
160
+ getSelectedLogFile: () => {
161
+ const state = get();
162
+ const file = state.logs.logs.files[state.logs.selectedLogIndex];
163
+ return file !== undefined ? file.name : undefined;
164
+ },
165
+ },
166
+ } as const;
167
+
168
+ const cleanup = () => {};
169
+
170
+ return [slice, cleanup];
171
+ };
172
+
173
+ export const initializeLogsSlice = <T extends LogsSlice>(
174
+ set: (fn: (state: T) => void) => void,
175
+ ) => {
176
+ set((state) => {
177
+ if (!state.logs) {
178
+ state.logs = initialState;
179
+ }
180
+ });
181
+ };
@@ -0,0 +1,314 @@
1
+ import {
2
+ AttachmentData,
3
+ EventData,
4
+ SampleData,
5
+ SampleSummary,
6
+ } from "../api/types";
7
+ import { Event } from "../types";
8
+ import { resolveAttachments } from "../utils/attachments";
9
+ import { createLogger } from "../utils/logger";
10
+ import { createPolling } from "../utils/polling";
11
+ import { resolveSample } from "./sampleUtils"; // Import the shared utility
12
+ import { StoreState } from "./store";
13
+
14
+ const log = createLogger("samplePolling");
15
+
16
+ const kNoId = -1;
17
+ const kPollingInterval = 2;
18
+ const kPollingMaxRetries = 10;
19
+
20
+ // Keeps the state for polling (the last ids for events
21
+ // and attachments, the attachments and events, and
22
+ // a mapping from eventIds to event indexes to enable
23
+ // replacing events)
24
+ interface PollingState {
25
+ eventId: number;
26
+ attachmentId: number;
27
+
28
+ attachments: Record<string, string>;
29
+
30
+ eventMapping: Record<string, number>;
31
+ events: Event[];
32
+ }
33
+
34
+ export function createSamplePolling(
35
+ get: () => StoreState,
36
+ set: (fn: (state: StoreState) => void) => void,
37
+ ) {
38
+ // The polling function that will be returned
39
+ let currentPolling: ReturnType<typeof createPolling> | null = null;
40
+
41
+ // handle aborts
42
+ let abortController: AbortController;
43
+
44
+ // The inintial polling state
45
+ const pollingState: PollingState = {
46
+ eventId: kNoId,
47
+ attachmentId: kNoId,
48
+
49
+ eventMapping: {},
50
+ attachments: {},
51
+ events: [],
52
+ };
53
+
54
+ // Function to start polling for a specific log file
55
+ const startPolling = (logFile: string, summary: SampleSummary) => {
56
+ // Create a unique identifier for this polling session
57
+ const pollingId = `${logFile}:${summary.id}-${summary.epoch}`;
58
+ log.debug(`Start Polling ${pollingId}`);
59
+
60
+ // If we're already polling this resource, don't restart
61
+ if (currentPolling && currentPolling.name === pollingId) {
62
+ log.debug(`Aleady polling, ignoring start`);
63
+ return;
64
+ }
65
+
66
+ // Stop any existing polling first
67
+ if (currentPolling) {
68
+ log.debug(`Resetting existing polling`);
69
+ currentPolling.stop();
70
+
71
+ // Clear any current running events
72
+ set((state) => {
73
+ state.sample.runningEvents = [];
74
+ });
75
+
76
+ // Reset the current polling state
77
+ resetPollingState(pollingState);
78
+ }
79
+ abortController = new AbortController();
80
+
81
+ // Create the polling callback
82
+ log.debug(`Polling sample: ${summary.id}-${summary.epoch}`);
83
+ const pollCallback = async () => {
84
+ const state = get();
85
+
86
+ // Get the api
87
+ const api = state.api;
88
+ if (!api) {
89
+ throw new Error("Required API is missing");
90
+ }
91
+
92
+ if (!api.get_log_sample_data) {
93
+ throw new Error("Required API get_log_sample_data is undefined.");
94
+ }
95
+
96
+ if (abortController.signal.aborted) {
97
+ return false;
98
+ }
99
+
100
+ // Fetch sample data
101
+ const eventId = pollingState.eventId;
102
+ const attachmentId = pollingState.attachmentId;
103
+ const sampleDataResponse = await api.get_log_sample_data(
104
+ logFile,
105
+ summary.id,
106
+ summary.epoch,
107
+ eventId,
108
+ attachmentId,
109
+ );
110
+
111
+ if (abortController.signal.aborted) {
112
+ return false;
113
+ }
114
+
115
+ if (sampleDataResponse?.status === "NotFound") {
116
+ // A 404 from the server means that this sample
117
+ // has been flushed to the main eval file, no events
118
+ // are available and we should retrieve the data from the
119
+ // sample file itself.
120
+
121
+ // Stop polling since we now have the complete sample
122
+ stopPolling();
123
+
124
+ // Also fetch a fresh sample and clear the runnning Events
125
+ // (if there were ever running events)
126
+ if (
127
+ state.sample.runningEvents.length > 0 ||
128
+ state.sample.sampleStatus === "streaming"
129
+ ) {
130
+ try {
131
+ log.debug(
132
+ `LOADING COMPLETED SAMPLE AFTER FLUSH: ${summary.id}-${summary.epoch}`,
133
+ );
134
+ const sample = await api.get_log_sample(
135
+ logFile,
136
+ summary.id,
137
+ summary.epoch,
138
+ );
139
+
140
+ if (sample) {
141
+ const migratedSample = resolveSample(sample);
142
+
143
+ // Update the store with the completed sample
144
+ set((state) => {
145
+ state.sample.selectedSample = migratedSample;
146
+ state.sample.sampleStatus = "ok";
147
+ state.sample.runningEvents = [];
148
+ });
149
+ } else {
150
+ set((state) => {
151
+ state.sample.sampleStatus = "error";
152
+ state.sample.sampleError = new Error(
153
+ "Unable to load sample - an unknown error occurred",
154
+ );
155
+ state.sample.runningEvents = [];
156
+ });
157
+ }
158
+ } catch (e) {
159
+ set((state) => {
160
+ state.sample.sampleError = e as Error;
161
+ state.sample.sampleStatus = "error";
162
+ state.sample.runningEvents = [];
163
+ });
164
+ }
165
+ }
166
+
167
+ return false;
168
+ }
169
+
170
+ if (
171
+ sampleDataResponse?.status === "OK" &&
172
+ sampleDataResponse.sampleData
173
+ ) {
174
+ if (abortController.signal.aborted) {
175
+ return false;
176
+ }
177
+
178
+ if (sampleDataResponse.sampleData) {
179
+ // Process attachments
180
+ processAttachments(sampleDataResponse.sampleData, pollingState);
181
+
182
+ // Process events
183
+ const processedEvents = processEvents(
184
+ sampleDataResponse.sampleData,
185
+ pollingState,
186
+ );
187
+
188
+ // update max attachment id
189
+ if (sampleDataResponse.sampleData.attachments.length > 0) {
190
+ const maxAttachment = findMaxId(
191
+ sampleDataResponse.sampleData.attachments,
192
+ pollingState.attachmentId,
193
+ );
194
+ log.debug(`New max attachment ${maxAttachment}`);
195
+ pollingState.attachmentId = maxAttachment;
196
+ }
197
+
198
+ // update max event id
199
+ if (sampleDataResponse.sampleData.events.length > 0) {
200
+ const maxEvent = findMaxId(
201
+ sampleDataResponse.sampleData.events,
202
+ pollingState.eventId,
203
+ );
204
+ log.debug(`New max event ${maxEvent}`);
205
+ pollingState.eventId = maxEvent;
206
+ }
207
+
208
+ // Update the running events (ensure identity of runningEvents fails equality)
209
+ if (processedEvents) {
210
+ set((state) => {
211
+ state.sample.runningEvents = [...pollingState.events];
212
+ });
213
+ }
214
+ }
215
+ }
216
+
217
+ // Continue polling
218
+ return true;
219
+ };
220
+
221
+ // Create the polling instance
222
+ const polling = createPolling(pollingId, pollCallback, {
223
+ maxRetries: kPollingMaxRetries,
224
+ interval: kPollingInterval,
225
+ });
226
+
227
+ // Store the polling instance and start it
228
+ currentPolling = polling;
229
+ polling.start();
230
+ };
231
+
232
+ // Stop polling
233
+ const stopPolling = () => {
234
+ if (currentPolling) {
235
+ currentPolling.stop();
236
+ currentPolling = null;
237
+ }
238
+ };
239
+
240
+ const cleanup = () => {
241
+ log.debug(`CLEANUP`);
242
+ abortController.abort();
243
+ stopPolling();
244
+ };
245
+
246
+ return {
247
+ startPolling,
248
+ stopPolling,
249
+ cleanup,
250
+ };
251
+ }
252
+
253
+ const resetPollingState = (state: PollingState) => {
254
+ state.eventId = -1;
255
+ state.attachmentId = -1;
256
+ state.eventMapping = {};
257
+ state.attachments = {};
258
+ state.events = [];
259
+ };
260
+
261
+ function processAttachments(
262
+ sampleData: SampleData,
263
+ pollingState: PollingState,
264
+ ) {
265
+ log.debug(`Processing ${sampleData.attachments.length} attachments`);
266
+ Object.values(sampleData.attachments).forEach((v) => {
267
+ pollingState.attachments[v.hash] = v.content;
268
+ });
269
+ }
270
+
271
+ function processEvents(sampleData: SampleData, pollingState: PollingState) {
272
+ // Go through each event and resolve it, either appending or replacing
273
+ log.debug(`Processing ${sampleData.events.length} events`);
274
+ if (sampleData.events.length === 0) {
275
+ return false;
276
+ }
277
+
278
+ for (const eventData of sampleData.events) {
279
+ // Identify if this event id already has an event in the event list
280
+ const existingIndex = pollingState.eventMapping[eventData.event_id];
281
+
282
+ // Resolve attachments within this event
283
+ const resolvedEvent = resolveAttachments<Event>(
284
+ eventData.event,
285
+ pollingState.attachments,
286
+ );
287
+
288
+ if (existingIndex) {
289
+ // There is an existing event in the stream, replace it
290
+ log.debug(`Replace event ${existingIndex}`);
291
+ pollingState.events[existingIndex] = resolvedEvent;
292
+ } else {
293
+ // This is a new event, add to the event list and note
294
+ // its position
295
+ log.debug(`New event ${pollingState.events.length}`);
296
+
297
+ const currentIndex = pollingState.events.length;
298
+ pollingState.eventMapping[eventData.event_id] = currentIndex;
299
+ pollingState.events.push(resolvedEvent);
300
+ }
301
+ }
302
+ return true;
303
+ }
304
+
305
+ const findMaxId = (
306
+ items: EventData[] | AttachmentData[],
307
+ currentMax: number,
308
+ ) => {
309
+ if (items.length > 0) {
310
+ const newMax = Math.max(...items.map((i) => i.id), currentMax);
311
+ return newMax;
312
+ }
313
+ return currentMax;
314
+ };
@@ -0,0 +1,140 @@
1
+ import { SampleSummary } from "../api/types";
2
+ import { kSampleMessagesTabId } from "../constants";
3
+ import { SampleState, SampleStatus } from "../types";
4
+ import { EvalSample } from "../types/log";
5
+ import { createLogger } from "../utils/logger";
6
+ import { createSamplePolling } from "./samplePolling";
7
+ import { resolveSample } from "./sampleUtils"; // Import the shared utility
8
+ import { StoreState } from "./store";
9
+
10
+ const log = createLogger("sampleSlice");
11
+
12
+ export interface SampleSlice {
13
+ sample: SampleState;
14
+ sampleActions: {
15
+ // The actual sample data
16
+ setSelectedSample: (sample: EvalSample) => void;
17
+ clearSelectedSample: () => void;
18
+ setSampleStatus: (status: SampleStatus) => void;
19
+ setSampleError: (error: Error | undefined) => void;
20
+
21
+ // Loading
22
+ loadSample: (
23
+ logFile: string,
24
+ sampleSummary: SampleSummary,
25
+ ) => Promise<void>;
26
+
27
+ pollSample: (
28
+ logFile: string,
29
+ sampleSummary: SampleSummary,
30
+ ) => Promise<void>;
31
+ };
32
+ }
33
+
34
+ const initialState: SampleState = {
35
+ selectedSample: undefined,
36
+ sampleStatus: "ok",
37
+ sampleError: undefined,
38
+
39
+ // The resolved events
40
+ runningEvents: [],
41
+ };
42
+
43
+ export const createSampleSlice = (
44
+ set: (fn: (state: StoreState) => void) => void,
45
+ get: () => StoreState,
46
+ _store: any,
47
+ ): [SampleSlice, () => void] => {
48
+ // The sample poller
49
+ const samplePolling = createSamplePolling(get, set);
50
+
51
+ const slice = {
52
+ // Actions
53
+ sample: initialState,
54
+ sampleActions: {
55
+ setSelectedSample: (sample: EvalSample) => {
56
+ set((state) => {
57
+ state.sample.selectedSample = sample;
58
+ });
59
+ if (sample.events.length < 1) {
60
+ // If there are no events, use the messages tab as the default
61
+ get().appActions.setSampleTab(kSampleMessagesTabId);
62
+ }
63
+ },
64
+ clearSelectedSample: () =>
65
+ set((state) => {
66
+ state.sample.selectedSample = undefined;
67
+ }),
68
+ setSampleStatus: (status: SampleStatus) =>
69
+ set((state) => {
70
+ state.sample.sampleStatus = status;
71
+ }),
72
+ setSampleError: (error: Error | undefined) =>
73
+ set((state) => {
74
+ state.sample.sampleError = error;
75
+ }),
76
+ pollSample: async (logFile: string, sampleSummary: SampleSummary) => {
77
+ // Poll running sample
78
+ const state = get();
79
+ if (state.log.loadedLog && state.sample.selectedSample) {
80
+ samplePolling.startPolling(logFile, sampleSummary);
81
+ state.sampleActions.setSampleStatus("streaming");
82
+ }
83
+ },
84
+ loadSample: async (logFile: string, sampleSummary: SampleSummary) => {
85
+ const sampleActions = get().sampleActions;
86
+
87
+ sampleActions.setSampleError(undefined);
88
+ sampleActions.setSampleStatus("loading");
89
+ try {
90
+ if (sampleSummary.completed !== false) {
91
+ log.debug(
92
+ `LOADING COMPLETED SAMPLE: ${sampleSummary.id}-${sampleSummary.epoch}`,
93
+ );
94
+ const sample = await get().api?.get_log_sample(
95
+ logFile,
96
+ sampleSummary.id,
97
+ sampleSummary.epoch,
98
+ );
99
+ if (sample) {
100
+ const migratedSample = resolveSample(sample);
101
+ sampleActions.setSelectedSample(migratedSample);
102
+ sampleActions.setSampleStatus("ok");
103
+ } else {
104
+ sampleActions.setSampleStatus("error");
105
+ throw new Error(
106
+ "Unable to load sample - an unknown error occurred",
107
+ );
108
+ }
109
+ } else {
110
+ log.debug(
111
+ `POLLING RUNNING SAMPLE: ${sampleSummary.id}-${sampleSummary.epoch}`,
112
+ );
113
+
114
+ // Poll running sample
115
+ samplePolling.startPolling(logFile, sampleSummary);
116
+ sampleActions.setSampleStatus("streaming");
117
+ }
118
+ } catch (e) {
119
+ sampleActions.setSampleError(e as Error);
120
+ sampleActions.setSampleStatus("error");
121
+ }
122
+ },
123
+ },
124
+ } as const;
125
+
126
+ const cleanup = () => {
127
+ samplePolling.cleanup();
128
+ };
129
+ return [slice, cleanup];
130
+ };
131
+
132
+ export const initializeSampleSlice = (
133
+ set: (fn: (state: StoreState) => void) => void,
134
+ ) => {
135
+ set((state) => {
136
+ if (!state.sample) {
137
+ state.sample = initialState;
138
+ }
139
+ });
140
+ };
@@ -0,0 +1,21 @@
1
+ import { EvalSample } from "../types/log";
2
+ import { resolveAttachments } from "../utils/attachments";
3
+
4
+ /**
5
+ * Migrates and resolves attachments for a sample
6
+ */
7
+ export const resolveSample = (sample: any): EvalSample => {
8
+ sample = { ...sample };
9
+
10
+ // Migrates old versions of samples to the new structure
11
+ if (sample.transcript) {
12
+ sample.events = sample.transcript.events;
13
+ sample.attachments = sample.transcript.content;
14
+ }
15
+ sample.attachments = sample.attachments || {};
16
+ sample.input = resolveAttachments(sample.input, sample.attachments);
17
+ sample.messages = resolveAttachments(sample.messages, sample.attachments);
18
+ sample.events = resolveAttachments(sample.events, sample.attachments);
19
+ sample.attachments = {};
20
+ return sample;
21
+ };