inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/eval.py +35 -2
  3. inspect_ai/_cli/util.py +44 -1
  4. inspect_ai/_display/core/config.py +1 -1
  5. inspect_ai/_display/core/display.py +13 -4
  6. inspect_ai/_display/core/results.py +1 -1
  7. inspect_ai/_display/textual/app.py +14 -3
  8. inspect_ai/_display/textual/display.py +4 -0
  9. inspect_ai/_display/textual/widgets/samples.py +9 -3
  10. inspect_ai/_display/textual/widgets/task_detail.py +8 -8
  11. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  12. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  13. inspect_ai/_eval/eval.py +74 -25
  14. inspect_ai/_eval/evalset.py +22 -18
  15. inspect_ai/_eval/loader.py +34 -11
  16. inspect_ai/_eval/run.py +13 -15
  17. inspect_ai/_eval/score.py +13 -3
  18. inspect_ai/_eval/task/generate.py +8 -9
  19. inspect_ai/_eval/task/log.py +55 -6
  20. inspect_ai/_eval/task/run.py +51 -10
  21. inspect_ai/_eval/task/task.py +23 -9
  22. inspect_ai/_util/constants.py +2 -0
  23. inspect_ai/_util/file.py +30 -1
  24. inspect_ai/_util/json.py +37 -1
  25. inspect_ai/_util/registry.py +1 -0
  26. inspect_ai/_util/vscode.py +37 -0
  27. inspect_ai/_view/server.py +113 -1
  28. inspect_ai/_view/www/App.css +7 -1
  29. inspect_ai/_view/www/dist/assets/index.css +813 -415
  30. inspect_ai/_view/www/dist/assets/index.js +54475 -32003
  31. inspect_ai/_view/www/eslint.config.mjs +1 -1
  32. inspect_ai/_view/www/log-schema.json +137 -31
  33. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  34. inspect_ai/_view/www/package.json +11 -2
  35. inspect_ai/_view/www/src/App.tsx +161 -853
  36. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  37. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  38. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  39. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  40. inspect_ai/_view/www/src/api/types.ts +107 -2
  41. inspect_ai/_view/www/src/appearance/icons.ts +2 -0
  42. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  43. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  46. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  47. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  48. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  49. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  50. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  51. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  52. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  53. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  54. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  55. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
  56. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  57. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  58. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  59. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  60. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  61. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  62. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  63. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  64. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  65. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  66. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  67. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  68. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  69. inspect_ai/_view/www/src/index.tsx +26 -94
  70. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  71. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  72. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  73. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  74. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  75. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  76. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  77. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  78. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
  79. inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
  80. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  81. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
  82. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
  83. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
  84. inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
  85. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  86. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  87. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  88. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
  89. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
  90. inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
  91. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  92. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
  93. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  94. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
  95. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  96. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  98. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
  99. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  100. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
  101. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
  102. inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
  103. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  104. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
  105. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  106. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  107. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  108. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  109. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  110. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
  111. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  112. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  113. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  114. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  115. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  116. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  117. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  118. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  119. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  120. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  121. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  122. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  123. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  124. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  125. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  126. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  127. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
  128. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  129. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  130. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  131. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  132. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
  133. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  134. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  135. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
  136. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  137. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  138. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  139. inspect_ai/_view/www/src/state/hooks.ts +399 -0
  140. inspect_ai/_view/www/src/state/logPolling.ts +200 -0
  141. inspect_ai/_view/www/src/state/logSlice.ts +224 -0
  142. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  143. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  144. inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
  145. inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
  146. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  147. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  148. inspect_ai/_view/www/src/state/store.ts +168 -0
  149. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  150. inspect_ai/_view/www/src/state/utils.ts +23 -0
  151. inspect_ai/_view/www/src/storage/index.ts +26 -0
  152. inspect_ai/_view/www/src/types/log.d.ts +36 -26
  153. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  154. inspect_ai/_view/www/src/types.ts +94 -32
  155. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  156. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  157. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  158. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  159. inspect_ai/_view/www/src/utils/react.ts +30 -0
  160. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  161. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
  162. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  163. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  164. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  165. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  166. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
  167. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
  168. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  169. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  170. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  171. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  172. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
  173. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  174. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  175. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  176. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  177. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  178. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  179. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  180. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  181. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
  182. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  183. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  184. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  185. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  186. inspect_ai/_view/www/vite.config.js +6 -0
  187. inspect_ai/_view/www/yarn.lock +464 -355
  188. inspect_ai/agent/__init__.py +36 -0
  189. inspect_ai/agent/_agent.py +268 -0
  190. inspect_ai/agent/_as_solver.py +72 -0
  191. inspect_ai/agent/_as_tool.py +122 -0
  192. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  193. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  194. inspect_ai/agent/_filter.py +46 -0
  195. inspect_ai/agent/_handoff.py +93 -0
  196. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  197. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  198. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  199. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  200. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  201. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  202. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  203. inspect_ai/agent/_react.py +241 -0
  204. inspect_ai/agent/_run.py +36 -0
  205. inspect_ai/agent/_types.py +81 -0
  206. inspect_ai/log/_condense.py +26 -0
  207. inspect_ai/log/_log.py +17 -5
  208. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  209. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  210. inspect_ai/log/_recorders/buffer/database.py +685 -0
  211. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  212. inspect_ai/log/_recorders/buffer/types.py +84 -0
  213. inspect_ai/log/_recorders/eval.py +2 -11
  214. inspect_ai/log/_recorders/types.py +30 -0
  215. inspect_ai/log/_transcript.py +32 -2
  216. inspect_ai/model/__init__.py +7 -1
  217. inspect_ai/model/_call_tools.py +257 -52
  218. inspect_ai/model/_chat_message.py +7 -4
  219. inspect_ai/model/_conversation.py +13 -62
  220. inspect_ai/model/_display.py +85 -0
  221. inspect_ai/model/_generate_config.py +2 -2
  222. inspect_ai/model/_model.py +114 -14
  223. inspect_ai/model/_model_output.py +14 -9
  224. inspect_ai/model/_openai.py +16 -4
  225. inspect_ai/model/_openai_computer_use.py +162 -0
  226. inspect_ai/model/_openai_responses.py +319 -165
  227. inspect_ai/model/_providers/anthropic.py +20 -21
  228. inspect_ai/model/_providers/azureai.py +24 -13
  229. inspect_ai/model/_providers/bedrock.py +1 -7
  230. inspect_ai/model/_providers/cloudflare.py +3 -3
  231. inspect_ai/model/_providers/goodfire.py +2 -6
  232. inspect_ai/model/_providers/google.py +11 -10
  233. inspect_ai/model/_providers/groq.py +6 -3
  234. inspect_ai/model/_providers/hf.py +7 -3
  235. inspect_ai/model/_providers/mistral.py +7 -10
  236. inspect_ai/model/_providers/openai.py +47 -17
  237. inspect_ai/model/_providers/openai_o1.py +11 -4
  238. inspect_ai/model/_providers/openai_responses.py +12 -14
  239. inspect_ai/model/_providers/providers.py +2 -2
  240. inspect_ai/model/_providers/together.py +12 -2
  241. inspect_ai/model/_providers/util/chatapi.py +7 -2
  242. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  243. inspect_ai/model/_providers/util/llama31.py +4 -2
  244. inspect_ai/model/_providers/vertex.py +11 -9
  245. inspect_ai/model/_providers/vllm.py +4 -4
  246. inspect_ai/scorer/__init__.py +2 -0
  247. inspect_ai/scorer/_metrics/__init__.py +2 -0
  248. inspect_ai/scorer/_metrics/grouped.py +84 -0
  249. inspect_ai/scorer/_score.py +26 -6
  250. inspect_ai/solver/__init__.py +2 -2
  251. inspect_ai/solver/_basic_agent.py +22 -9
  252. inspect_ai/solver/_bridge.py +31 -0
  253. inspect_ai/solver/_chain.py +20 -12
  254. inspect_ai/solver/_fork.py +5 -1
  255. inspect_ai/solver/_human_agent.py +52 -0
  256. inspect_ai/solver/_prompt.py +3 -1
  257. inspect_ai/solver/_run.py +59 -0
  258. inspect_ai/solver/_solver.py +14 -4
  259. inspect_ai/solver/_task_state.py +5 -3
  260. inspect_ai/tool/_tool_call.py +15 -8
  261. inspect_ai/tool/_tool_def.py +17 -12
  262. inspect_ai/tool/_tool_support_helpers.py +4 -4
  263. inspect_ai/tool/_tool_with.py +14 -11
  264. inspect_ai/tool/_tools/_bash_session.py +11 -2
  265. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  266. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  267. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  268. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  269. inspect_ai/tool/_tools/_think.py +1 -1
  270. inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
  271. inspect_ai/util/__init__.py +2 -0
  272. inspect_ai/util/_anyio.py +27 -0
  273. inspect_ai/util/_sandbox/__init__.py +2 -1
  274. inspect_ai/util/_sandbox/context.py +32 -7
  275. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  276. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  277. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  278. inspect_ai/util/_store_model.py +30 -7
  279. inspect_ai/util/_subprocess.py +13 -3
  280. inspect_ai/util/_subtask.py +1 -0
  281. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  282. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
  283. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
  284. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  285. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  286. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  287. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  288. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  289. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  290. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  291. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  292. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  293. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  294. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  295. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  296. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  297. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,244 @@
1
+ import { StateSnapshot } from "react-virtuoso";
2
+ import { Capabilities } from "../api/types";
3
+ import { kEvalWorkspaceTabId, kSampleTranscriptTabId } from "../constants";
4
+ import { AppState, AppStatus } from "../types";
5
+ import { clearDocumentSelection } from "../utils/browser";
6
+ import { StoreState } from "./store";
7
+
8
+ export interface AppSlice {
9
+ app: AppState;
10
+ capabilities: Capabilities;
11
+ appActions: {
12
+ setStatus: (status: AppStatus) => void;
13
+ setOffcanvas: (show: boolean) => void;
14
+ setShowFind: (show: boolean) => void;
15
+ hideFind: () => void;
16
+
17
+ setShowingSampleDialog: (showing: boolean) => void;
18
+ setWorkspaceTab: (tab: string) => void;
19
+ clearWorkspaceTab: () => void;
20
+
21
+ setSampleTab: (tab: string) => void;
22
+ clearSampleTab: () => void;
23
+
24
+ getScrollPosition: (name: string) => number | undefined;
25
+ setScrollPosition: (name: string, value: number) => void;
26
+
27
+ getListPosition: (name: string) => StateSnapshot | undefined;
28
+ setListPosition: (name: string, state: StateSnapshot) => void;
29
+ clearListPosition: (name: string) => void;
30
+
31
+ getCollapsed: (name: string, defaultValue?: boolean) => boolean;
32
+ setCollapsed: (name: string, value: boolean) => void;
33
+
34
+ getMessageVisible: (name: string, defaultValue?: boolean) => boolean;
35
+ setMessageVisible: (name: string, value: boolean) => void;
36
+ clearMessageVisible: (name: string) => void;
37
+
38
+ getPropertyValue: <T>(bagName: string, key: string, defaultValue?: T) => T;
39
+ setPropertyValue: <T>(bagName: string, key: string, value: T) => void;
40
+ removePropertyValue: (bagName: string, key: string) => void;
41
+ };
42
+ }
43
+
44
+ const kDefaultWorkspaceTab = kEvalWorkspaceTabId;
45
+ const kDefaultSampleTab = kSampleTranscriptTabId;
46
+
47
+ const initialState: AppState = {
48
+ status: { loading: false },
49
+ offcanvas: false,
50
+ showFind: false,
51
+ dialogs: {
52
+ sample: false,
53
+ },
54
+ tabs: {
55
+ workspace: kDefaultWorkspaceTab,
56
+ sample: kDefaultSampleTab,
57
+ },
58
+ scrollPositions: {},
59
+ listPositions: {},
60
+ collapsed: {},
61
+ messages: {},
62
+ propertyBags: {},
63
+ };
64
+
65
+ export const createAppSlice = (
66
+ set: (fn: (state: StoreState) => void) => void,
67
+ get: () => StoreState,
68
+ _store: any,
69
+ ): [AppSlice, () => void] => {
70
+ const getBoolRecord = (
71
+ record: Record<string, boolean>,
72
+ name: string,
73
+ defaultValue?: boolean,
74
+ ) => {
75
+ if (Object.keys(record).includes(name)) {
76
+ return record[name];
77
+ } else {
78
+ return defaultValue || false;
79
+ }
80
+ };
81
+
82
+ const slice = {
83
+ // State
84
+ app: initialState,
85
+ capabilities: {} as Capabilities,
86
+
87
+ // Actions
88
+ appActions: {
89
+ setStatus: (status: AppStatus) =>
90
+ set((state) => {
91
+ state.app.status = status;
92
+ }),
93
+
94
+ setOffcanvas: (show: boolean) =>
95
+ set((state) => {
96
+ state.app.offcanvas = show;
97
+ }),
98
+
99
+ setShowFind: (show: boolean) =>
100
+ set((state) => {
101
+ state.app.showFind = show;
102
+ }),
103
+
104
+ hideFind: () => {
105
+ clearDocumentSelection();
106
+ set((state) => {
107
+ state.app.showFind = false;
108
+ });
109
+ },
110
+ setShowingSampleDialog: (showing: boolean) => {
111
+ set((state) => {
112
+ state.app.dialogs.sample = showing;
113
+ });
114
+ if (!showing) {
115
+ const state = get();
116
+ state.appActions.clearSampleTab();
117
+ state.sampleActions.clearSelectedSample();
118
+ }
119
+ },
120
+ setWorkspaceTab: (tab: string) => {
121
+ set((state) => {
122
+ state.app.tabs.workspace = tab;
123
+ });
124
+ },
125
+ clearWorkspaceTab: () => {
126
+ set((state) => {
127
+ state.app.tabs.workspace = kDefaultWorkspaceTab;
128
+ });
129
+ },
130
+ setSampleTab: (tab: string) => {
131
+ set((state) => {
132
+ state.app.tabs.sample = tab;
133
+ });
134
+ },
135
+ clearSampleTab: () => {
136
+ set((state) => {
137
+ state.app.tabs.sample = kDefaultSampleTab;
138
+ });
139
+ },
140
+ getScrollPosition: (name: string) => {
141
+ const state = get();
142
+ return state.app.scrollPositions[name];
143
+ },
144
+ setScrollPosition: (name: string, position: number) => {
145
+ set((state) => {
146
+ state.app.scrollPositions[name] = position;
147
+ });
148
+ },
149
+ getListPosition: (name: string) => {
150
+ const state = get();
151
+ if (Object.keys(state.app.listPositions).includes(name)) {
152
+ return state.app.listPositions[name];
153
+ } else {
154
+ return undefined;
155
+ }
156
+ },
157
+ setListPosition: (name: string, position: StateSnapshot) => {
158
+ set((state) => {
159
+ state.app.listPositions[name] = position;
160
+ });
161
+ },
162
+ clearListPosition: (name: string) => {
163
+ set((state) => {
164
+ // Remove the key
165
+ const newListPositions = { ...state.app.listPositions };
166
+ delete newListPositions[name];
167
+
168
+ return {
169
+ app: {
170
+ ...state.app,
171
+ listPositions: newListPositions,
172
+ },
173
+ };
174
+ });
175
+ },
176
+ getCollapsed: (name: string, defaultValue?: boolean) => {
177
+ return getBoolRecord(get().app.collapsed, name, defaultValue);
178
+ },
179
+ setCollapsed: (name: string, value: boolean) => {
180
+ set((state) => {
181
+ state.app.collapsed[name] = value;
182
+ });
183
+ },
184
+ getMessageVisible: (name: string, defaultValue?: boolean) => {
185
+ return getBoolRecord(get().app.messages, name, defaultValue);
186
+ },
187
+ setMessageVisible: (name: string, value: boolean) => {
188
+ set((state) => {
189
+ state.app.messages[name] = value;
190
+ });
191
+ },
192
+ clearMessageVisible: (name: string) => {
193
+ set((state) => {
194
+ delete state.app.messages[name];
195
+ });
196
+ },
197
+ getPropertyValue: <T>(
198
+ bagName: string,
199
+ key: string,
200
+ defaultValue?: T,
201
+ ): T => {
202
+ const state = get();
203
+ const bag = state.app.propertyBags[bagName] || {};
204
+ return (key in bag ? bag[key] : defaultValue) as T;
205
+ },
206
+
207
+ setPropertyValue: <T>(bagName: string, key: string, value: T) => {
208
+ set((state) => {
209
+ // Create the bag if it doesn't exist
210
+ if (!state.app.propertyBags[bagName]) {
211
+ state.app.propertyBags[bagName] = {};
212
+ }
213
+ // Only update the specific key
214
+ state.app.propertyBags[bagName][key] = value;
215
+ });
216
+ },
217
+
218
+ removePropertyValue: (bagName: string, key: string) => {
219
+ set((state) => {
220
+ if (state.app.propertyBags[bagName]) {
221
+ const { [key]: _, ...rest } = state.app.propertyBags[bagName];
222
+ state.app.propertyBags[bagName] = rest;
223
+ }
224
+ });
225
+ },
226
+ },
227
+ } as const;
228
+
229
+ const cleanup = () => {};
230
+
231
+ return [slice, cleanup];
232
+ };
233
+
234
+ export const initializeAppSlice = (
235
+ set: (fn: (state: StoreState) => void) => void,
236
+ capabilities: Capabilities,
237
+ ) => {
238
+ set((state) => {
239
+ state.capabilities = capabilities;
240
+ if (!state.app) {
241
+ state.app = initialState;
242
+ }
243
+ });
244
+ };
@@ -0,0 +1,399 @@
1
+ import { highlightElement } from "prismjs";
2
+ import { useCallback, useEffect, useMemo, useRef } from "react";
3
+ import { SampleSummary } from "../api/types";
4
+ import { kEpochAscVal, kSampleAscVal, kScoreAscVal } from "../constants";
5
+ import {
6
+ createEvalDescriptor,
7
+ createSamplesDescriptor,
8
+ } from "../samples/descriptor/samplesDescriptor";
9
+ import { filterSamples } from "../samples/sample-tools/filters";
10
+ import {
11
+ byEpoch,
12
+ bySample,
13
+ sortSamples,
14
+ } from "../samples/sample-tools/SortFilter";
15
+ import { getAvailableScorers, getDefaultScorer } from "../scoring/utils";
16
+ import { Events } from "../types/log";
17
+ import { createLogger } from "../utils/logger";
18
+ import { useStore } from "./store";
19
+ import { mergeSampleSummaries } from "./utils";
20
+
21
+ const log = createLogger("hooks");
22
+
23
+ export const useEvalSpec = () => {
24
+ const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
25
+ return selectedLogSummary?.eval;
26
+ };
27
+
28
+ // Fetches all samples summaries (both completed and incomplete)
29
+ // without applying any filtering
30
+ export const useSampleSummaries = () => {
31
+ const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
32
+ const pendingSampleSummaries = useStore(
33
+ (state) => state.log.pendingSampleSummaries,
34
+ );
35
+
36
+ return useMemo(() => {
37
+ return mergeSampleSummaries(
38
+ selectedLogSummary?.sampleSummaries || [],
39
+ pendingSampleSummaries?.samples || [],
40
+ );
41
+ }, [selectedLogSummary, pendingSampleSummaries]);
42
+ };
43
+
44
+ // Counts the total number of unfiltered sample summaries (both complete and incomplete)
45
+ export const useTotalSampleCount = () => {
46
+ const sampleSummaries = useSampleSummaries();
47
+ return useMemo(() => {
48
+ return sampleSummaries.length;
49
+ }, [sampleSummaries]);
50
+ };
51
+
52
+ // Provides the currently selected score for this eval, providing a default
53
+ // based upon the configuration (eval + summaries) if no scorer has been
54
+ // selected
55
+ export const useScore = () => {
56
+ const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
57
+ const sampleSummaries = useSampleSummaries();
58
+ const score = useStore((state) => state.log.score);
59
+ return useMemo(() => {
60
+ if (score) {
61
+ return score;
62
+ } else if (selectedLogSummary) {
63
+ return getDefaultScorer(selectedLogSummary, sampleSummaries);
64
+ } else {
65
+ return undefined;
66
+ }
67
+ }, [selectedLogSummary, sampleSummaries, score]);
68
+ };
69
+
70
+ // Provides the list of available scorers. Will inspect the eval or samples
71
+ // to determine scores (even for in progress evals that don't yet have final
72
+ // metrics)
73
+ export const useScores = () => {
74
+ const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
75
+ const sampleSummaries = useSampleSummaries();
76
+ return useMemo(() => {
77
+ if (!selectedLogSummary) {
78
+ return [];
79
+ }
80
+
81
+ const result =
82
+ getAvailableScorers(selectedLogSummary, sampleSummaries) || [];
83
+ return result;
84
+ }, [selectedLogSummary, sampleSummaries]);
85
+ };
86
+
87
+ // Provides the eval descriptor
88
+ export const useEvalDescriptor = () => {
89
+ const scores = useScores();
90
+ const sampleSummaries = useSampleSummaries();
91
+ return useMemo(() => {
92
+ return scores ? createEvalDescriptor(scores, sampleSummaries) : null;
93
+ }, [scores, sampleSummaries]);
94
+ };
95
+
96
+ // Provides the sampls descriptor
97
+ export const useSampleDescriptor = () => {
98
+ const evalDescriptor = useEvalDescriptor();
99
+ const sampleSummaries = useSampleSummaries();
100
+ const score = useScore();
101
+ return useMemo(() => {
102
+ return evalDescriptor
103
+ ? createSamplesDescriptor(sampleSummaries, evalDescriptor, score)
104
+ : undefined;
105
+ }, [evalDescriptor, sampleSummaries, score]);
106
+ };
107
+
108
+ // Provides the list of filtered samples
109
+ // (applying sorting, grouping, and filtering)
110
+ export const useFilteredSamples = () => {
111
+ const evalDescriptor = useEvalDescriptor();
112
+ const sampleSummaries = useSampleSummaries();
113
+ const filter = useStore((state) => state.log.filter);
114
+ const epoch = useStore((state) => state.log.epoch);
115
+ const sort = useStore((state) => state.log.sort);
116
+ const samplesDescriptor = useSampleDescriptor();
117
+ const score = useScore();
118
+
119
+ return useMemo(() => {
120
+ // Apply filters
121
+ const prefiltered =
122
+ evalDescriptor && filter.value
123
+ ? filterSamples(evalDescriptor, sampleSummaries, filter.value).result
124
+ : sampleSummaries;
125
+
126
+ // Filter epochs
127
+ const filtered =
128
+ epoch && epoch !== "all"
129
+ ? prefiltered.filter((sample) => epoch === String(sample.epoch))
130
+ : prefiltered;
131
+
132
+ // Sort samples
133
+ const sorted = samplesDescriptor
134
+ ? sortSamples(sort, filtered, samplesDescriptor, score)
135
+ : filtered;
136
+
137
+ return [...sorted];
138
+ }, [
139
+ evalDescriptor,
140
+ sampleSummaries,
141
+ filter,
142
+ epoch,
143
+ sort,
144
+ samplesDescriptor,
145
+ score,
146
+ ]);
147
+ };
148
+
149
+ // Computes the group by to use given a particular sort
150
+ export const useGroupBy = () => {
151
+ const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
152
+ const sort = useStore((state) => state.log.sort);
153
+ const epoch = useStore((state) => state.log.epoch);
154
+ return useMemo(() => {
155
+ const epochs = selectedLogSummary?.eval?.config?.epochs || 1;
156
+ if (epochs > 1) {
157
+ if (byEpoch(sort) || epoch !== "all") {
158
+ return "epoch";
159
+ } else if (bySample(sort)) {
160
+ return "sample";
161
+ }
162
+ }
163
+
164
+ return "none";
165
+ }, [selectedLogSummary, sort, epoch]);
166
+ };
167
+
168
+ // Computes the ordering for groups based upon the sort
169
+ export const useGroupByOrder = () => {
170
+ const sort = useStore((state) => state.log.sort);
171
+ return useMemo(() => {
172
+ return sort === kSampleAscVal ||
173
+ sort === kEpochAscVal ||
174
+ sort === kScoreAscVal
175
+ ? "asc"
176
+ : "desc";
177
+ }, [sort]);
178
+ };
179
+
180
+ // Provides the currently selected sample summary
181
+ export const useSelectedSampleSummary = (): SampleSummary | undefined => {
182
+ const filteredSamples = useFilteredSamples();
183
+ const selectedIndex = useStore((state) => state.log.selectedSampleIndex);
184
+ return useMemo(() => {
185
+ return filteredSamples[selectedIndex];
186
+ }, [filteredSamples, selectedIndex]);
187
+ };
188
+
189
+ export const useSampleData = () => {
190
+ const sampleStatus = useStore((state) => state.sample.sampleStatus);
191
+ const sampleError = useStore((state) => state.sample.sampleError);
192
+ const selectedSample = useStore((state) => state.sample.selectedSample);
193
+ const runningEvents = useStore(
194
+ (state) => state.sample.runningEvents,
195
+ ) as Events;
196
+ return useMemo(() => {
197
+ return {
198
+ status: sampleStatus,
199
+ error: sampleError,
200
+ sample: selectedSample,
201
+ running: runningEvents,
202
+ };
203
+ }, [sampleStatus, sampleError, selectedSample, runningEvents]);
204
+ };
205
+
206
+ export const useLogSelection = () => {
207
+ const selectedSampleSummary = useSelectedSampleSummary();
208
+ const selectedLogFile = useStore((state) =>
209
+ state.logsActions.getSelectedLogFile(),
210
+ );
211
+
212
+ return useMemo(() => {
213
+ return {
214
+ logFile: selectedLogFile,
215
+ sample: selectedSampleSummary,
216
+ };
217
+ }, [selectedLogFile, selectedSampleSummary]);
218
+ };
219
+
220
+ export const useCollapsedState = (
221
+ id: string,
222
+ defaultValue?: boolean,
223
+ ): [boolean, (value: boolean) => void] => {
224
+ const collapsed = useStore((state) =>
225
+ state.appActions.getCollapsed(id, defaultValue),
226
+ );
227
+ const setCollapsed = useStore((state) => state.appActions.setCollapsed);
228
+ return useMemo(() => {
229
+ const set = (value: boolean) => {
230
+ log.debug("Set collapsed", id, value);
231
+ setCollapsed(id, value);
232
+ };
233
+ return [collapsed, set];
234
+ }, [collapsed, setCollapsed]);
235
+ };
236
+
237
+ export const useMessageVisibility = (
238
+ id: string,
239
+ scope: "sample" | "eval",
240
+ ): [boolean, (visible: boolean) => void] => {
241
+ const visible = useStore((state) =>
242
+ state.appActions.getMessageVisible(id, true),
243
+ );
244
+ const setVisible = useStore((state) => state.appActions.setMessageVisible);
245
+ const clearVisible = useStore(
246
+ (state) => state.appActions.clearMessageVisible,
247
+ );
248
+
249
+ // Track if this is the first render (rehydrate)
250
+ const isFirstRender = useRef(true);
251
+
252
+ // Reset state if the eval changes, but not during initialization
253
+ const selectedLogFile = useStore((state) =>
254
+ state.logsActions.getSelectedLogFile(),
255
+ );
256
+ useEffect(() => {
257
+ // Skip the first effect run
258
+ if (isFirstRender.current) {
259
+ isFirstRender.current = false;
260
+ return;
261
+ }
262
+
263
+ log.debug("clear message (eval)", id);
264
+ clearVisible(id);
265
+ }, [selectedLogFile, clearVisible, id]);
266
+
267
+ // Maybe reset state if sample changes
268
+ const selectedSampleIndex = useStore(
269
+ (state) => state.log.selectedSampleIndex,
270
+ );
271
+ useEffect(() => {
272
+ // Skip the first effect run for sample changes too
273
+ if (isFirstRender.current) {
274
+ return;
275
+ }
276
+
277
+ if (scope === "sample") {
278
+ log.debug("clear message (sample)", id);
279
+ clearVisible(id);
280
+ }
281
+ }, [selectedSampleIndex, clearVisible, id, scope]);
282
+
283
+ return useMemo(() => {
284
+ log.debug("visibility", id, visible);
285
+ const set = (visible: boolean) => {
286
+ log.debug("set visiblity", id);
287
+ setVisible(id, visible);
288
+ };
289
+ return [visible, set];
290
+ }, [visible, setVisible, id]);
291
+ };
292
+
293
+ export function useProperty<T>(
294
+ id: string,
295
+ propertyName: string,
296
+ options?: {
297
+ defaultValue?: T;
298
+ cleanup?: boolean;
299
+ },
300
+ ): [T, (value: T) => void, () => void] {
301
+ options = options || { cleanup: true };
302
+ const setPropertyValue = useStore(
303
+ (state) => state.appActions.setPropertyValue,
304
+ );
305
+ const removePropertyValue = useStore(
306
+ (state) => state.appActions.removePropertyValue,
307
+ );
308
+ const propertyValue = useStore(
309
+ useCallback(
310
+ (state) =>
311
+ state.appActions.getPropertyValue(
312
+ id,
313
+ propertyName,
314
+ options.defaultValue,
315
+ ),
316
+ [id, propertyName, options.defaultValue],
317
+ ),
318
+ );
319
+
320
+ const setValue = useCallback(
321
+ (value: T) => {
322
+ setPropertyValue(id, propertyName, value);
323
+ },
324
+ [id, propertyName, setPropertyValue],
325
+ );
326
+
327
+ const removeValue = useCallback(() => {
328
+ removePropertyValue(id, propertyName);
329
+ }, [id, propertyName, removePropertyValue]);
330
+
331
+ // Clean up on unmount
332
+ useEffect(() => {
333
+ return () => {
334
+ if (options.cleanup) {
335
+ removePropertyValue(id, propertyName);
336
+ }
337
+ };
338
+ }, [id, propertyName, removePropertyValue]);
339
+
340
+ return [propertyValue, setValue, removeValue];
341
+ }
342
+
343
+ export const usePrevious = <T>(value: T) => {
344
+ const ref = useRef<T | undefined>(undefined);
345
+
346
+ useEffect(() => {
347
+ ref.current = value;
348
+ }, [value]);
349
+
350
+ return ref.current;
351
+ };
352
+
353
+ // Syntax highlighting strings larger than this is too slow
354
+ const kPrismRenderMaxSize = 250000;
355
+
356
+ export const usePrismHighlight = (toolCallContent?: string) => {
357
+ const toolViewRef = useRef<HTMLDivElement>(null);
358
+
359
+ useEffect(() => {
360
+ if (
361
+ toolCallContent &&
362
+ toolViewRef.current &&
363
+ toolCallContent.length <= kPrismRenderMaxSize
364
+ ) {
365
+ requestAnimationFrame(() => {
366
+ const codeBlocks = toolViewRef.current?.querySelectorAll("pre code");
367
+ codeBlocks?.forEach((block) => {
368
+ if (block.className.includes("language-")) {
369
+ block.classList.add("sourceCode");
370
+ highlightElement(block as HTMLElement);
371
+ }
372
+ });
373
+ });
374
+ }
375
+ }, [toolCallContent]);
376
+
377
+ return toolViewRef;
378
+ };
379
+
380
+ export const useSetSelectedLogIndex = () => {
381
+ const setSelectedLogIndex = useStore(
382
+ (state) => state.logsActions.setSelectedLogIndex,
383
+ );
384
+ const clearSelectedSample = useStore(
385
+ (state) => state.sampleActions.clearSelectedSample,
386
+ );
387
+ const clearSelectedLogSummary = useStore(
388
+ (state) => state.logActions.clearSelectedLogSummary,
389
+ );
390
+
391
+ return useCallback(
392
+ (index: number) => {
393
+ clearSelectedSample();
394
+ clearSelectedLogSummary();
395
+ setSelectedLogIndex(index);
396
+ },
397
+ [setSelectedLogIndex, clearSelectedLogSummary, clearSelectedSample],
398
+ );
399
+ };