inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/eval.py +35 -2
  3. inspect_ai/_cli/util.py +44 -1
  4. inspect_ai/_display/core/config.py +1 -1
  5. inspect_ai/_display/core/display.py +13 -4
  6. inspect_ai/_display/core/results.py +1 -1
  7. inspect_ai/_display/textual/app.py +14 -3
  8. inspect_ai/_display/textual/display.py +4 -0
  9. inspect_ai/_display/textual/widgets/samples.py +9 -3
  10. inspect_ai/_display/textual/widgets/task_detail.py +8 -8
  11. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  12. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  13. inspect_ai/_eval/eval.py +74 -25
  14. inspect_ai/_eval/evalset.py +22 -18
  15. inspect_ai/_eval/loader.py +34 -11
  16. inspect_ai/_eval/run.py +13 -15
  17. inspect_ai/_eval/score.py +13 -3
  18. inspect_ai/_eval/task/generate.py +8 -9
  19. inspect_ai/_eval/task/log.py +55 -6
  20. inspect_ai/_eval/task/run.py +51 -10
  21. inspect_ai/_eval/task/task.py +23 -9
  22. inspect_ai/_util/constants.py +2 -0
  23. inspect_ai/_util/file.py +30 -1
  24. inspect_ai/_util/json.py +37 -1
  25. inspect_ai/_util/registry.py +1 -0
  26. inspect_ai/_util/vscode.py +37 -0
  27. inspect_ai/_view/server.py +113 -1
  28. inspect_ai/_view/www/App.css +7 -1
  29. inspect_ai/_view/www/dist/assets/index.css +813 -415
  30. inspect_ai/_view/www/dist/assets/index.js +54475 -32003
  31. inspect_ai/_view/www/eslint.config.mjs +1 -1
  32. inspect_ai/_view/www/log-schema.json +137 -31
  33. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  34. inspect_ai/_view/www/package.json +11 -2
  35. inspect_ai/_view/www/src/App.tsx +161 -853
  36. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  37. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  38. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  39. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  40. inspect_ai/_view/www/src/api/types.ts +107 -2
  41. inspect_ai/_view/www/src/appearance/icons.ts +2 -0
  42. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  43. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  46. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  47. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  48. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  49. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  50. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  51. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  52. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  53. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  54. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  55. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
  56. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  57. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  58. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  59. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  60. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  61. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  62. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  63. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  64. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  65. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  66. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  67. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  68. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  69. inspect_ai/_view/www/src/index.tsx +26 -94
  70. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  71. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  72. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  73. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  74. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  75. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  76. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  77. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  78. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
  79. inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
  80. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  81. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
  82. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
  83. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
  84. inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
  85. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  86. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  87. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  88. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
  89. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
  90. inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
  91. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  92. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
  93. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  94. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
  95. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  96. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  98. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
  99. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  100. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
  101. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
  102. inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
  103. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  104. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
  105. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  106. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  107. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  108. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  109. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  110. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
  111. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  112. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  113. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  114. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  115. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  116. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  117. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  118. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  119. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  120. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  121. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  122. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  123. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  124. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  125. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  126. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  127. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
  128. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  129. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  130. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  131. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  132. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
  133. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  134. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  135. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
  136. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  137. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  138. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  139. inspect_ai/_view/www/src/state/hooks.ts +399 -0
  140. inspect_ai/_view/www/src/state/logPolling.ts +200 -0
  141. inspect_ai/_view/www/src/state/logSlice.ts +224 -0
  142. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  143. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  144. inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
  145. inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
  146. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  147. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  148. inspect_ai/_view/www/src/state/store.ts +168 -0
  149. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  150. inspect_ai/_view/www/src/state/utils.ts +23 -0
  151. inspect_ai/_view/www/src/storage/index.ts +26 -0
  152. inspect_ai/_view/www/src/types/log.d.ts +36 -26
  153. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  154. inspect_ai/_view/www/src/types.ts +94 -32
  155. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  156. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  157. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  158. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  159. inspect_ai/_view/www/src/utils/react.ts +30 -0
  160. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  161. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
  162. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  163. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  164. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  165. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  166. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
  167. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
  168. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  169. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  170. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  171. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  172. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
  173. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  174. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  175. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  176. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  177. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  178. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  179. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  180. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  181. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
  182. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  183. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  184. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  185. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  186. inspect_ai/_view/www/vite.config.js +6 -0
  187. inspect_ai/_view/www/yarn.lock +464 -355
  188. inspect_ai/agent/__init__.py +36 -0
  189. inspect_ai/agent/_agent.py +268 -0
  190. inspect_ai/agent/_as_solver.py +72 -0
  191. inspect_ai/agent/_as_tool.py +122 -0
  192. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  193. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  194. inspect_ai/agent/_filter.py +46 -0
  195. inspect_ai/agent/_handoff.py +93 -0
  196. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  197. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  198. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  199. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  200. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  201. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  202. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  203. inspect_ai/agent/_react.py +241 -0
  204. inspect_ai/agent/_run.py +36 -0
  205. inspect_ai/agent/_types.py +81 -0
  206. inspect_ai/log/_condense.py +26 -0
  207. inspect_ai/log/_log.py +17 -5
  208. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  209. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  210. inspect_ai/log/_recorders/buffer/database.py +685 -0
  211. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  212. inspect_ai/log/_recorders/buffer/types.py +84 -0
  213. inspect_ai/log/_recorders/eval.py +2 -11
  214. inspect_ai/log/_recorders/types.py +30 -0
  215. inspect_ai/log/_transcript.py +32 -2
  216. inspect_ai/model/__init__.py +7 -1
  217. inspect_ai/model/_call_tools.py +257 -52
  218. inspect_ai/model/_chat_message.py +7 -4
  219. inspect_ai/model/_conversation.py +13 -62
  220. inspect_ai/model/_display.py +85 -0
  221. inspect_ai/model/_generate_config.py +2 -2
  222. inspect_ai/model/_model.py +114 -14
  223. inspect_ai/model/_model_output.py +14 -9
  224. inspect_ai/model/_openai.py +16 -4
  225. inspect_ai/model/_openai_computer_use.py +162 -0
  226. inspect_ai/model/_openai_responses.py +319 -165
  227. inspect_ai/model/_providers/anthropic.py +20 -21
  228. inspect_ai/model/_providers/azureai.py +24 -13
  229. inspect_ai/model/_providers/bedrock.py +1 -7
  230. inspect_ai/model/_providers/cloudflare.py +3 -3
  231. inspect_ai/model/_providers/goodfire.py +2 -6
  232. inspect_ai/model/_providers/google.py +11 -10
  233. inspect_ai/model/_providers/groq.py +6 -3
  234. inspect_ai/model/_providers/hf.py +7 -3
  235. inspect_ai/model/_providers/mistral.py +7 -10
  236. inspect_ai/model/_providers/openai.py +47 -17
  237. inspect_ai/model/_providers/openai_o1.py +11 -4
  238. inspect_ai/model/_providers/openai_responses.py +12 -14
  239. inspect_ai/model/_providers/providers.py +2 -2
  240. inspect_ai/model/_providers/together.py +12 -2
  241. inspect_ai/model/_providers/util/chatapi.py +7 -2
  242. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  243. inspect_ai/model/_providers/util/llama31.py +4 -2
  244. inspect_ai/model/_providers/vertex.py +11 -9
  245. inspect_ai/model/_providers/vllm.py +4 -4
  246. inspect_ai/scorer/__init__.py +2 -0
  247. inspect_ai/scorer/_metrics/__init__.py +2 -0
  248. inspect_ai/scorer/_metrics/grouped.py +84 -0
  249. inspect_ai/scorer/_score.py +26 -6
  250. inspect_ai/solver/__init__.py +2 -2
  251. inspect_ai/solver/_basic_agent.py +22 -9
  252. inspect_ai/solver/_bridge.py +31 -0
  253. inspect_ai/solver/_chain.py +20 -12
  254. inspect_ai/solver/_fork.py +5 -1
  255. inspect_ai/solver/_human_agent.py +52 -0
  256. inspect_ai/solver/_prompt.py +3 -1
  257. inspect_ai/solver/_run.py +59 -0
  258. inspect_ai/solver/_solver.py +14 -4
  259. inspect_ai/solver/_task_state.py +5 -3
  260. inspect_ai/tool/_tool_call.py +15 -8
  261. inspect_ai/tool/_tool_def.py +17 -12
  262. inspect_ai/tool/_tool_support_helpers.py +4 -4
  263. inspect_ai/tool/_tool_with.py +14 -11
  264. inspect_ai/tool/_tools/_bash_session.py +11 -2
  265. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  266. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  267. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  268. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  269. inspect_ai/tool/_tools/_think.py +1 -1
  270. inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
  271. inspect_ai/util/__init__.py +2 -0
  272. inspect_ai/util/_anyio.py +27 -0
  273. inspect_ai/util/_sandbox/__init__.py +2 -1
  274. inspect_ai/util/_sandbox/context.py +32 -7
  275. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  276. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  277. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  278. inspect_ai/util/_store_model.py +30 -7
  279. inspect_ai/util/_subprocess.py +13 -3
  280. inspect_ai/util/_subtask.py +1 -0
  281. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  282. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
  283. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
  284. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  285. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  286. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  287. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  288. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  289. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  290. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  291. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  292. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  293. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  294. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  295. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  296. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  297. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -1,135 +1,153 @@
1
1
  import {
2
2
  FC,
3
3
  Fragment,
4
- RefObject,
5
4
  useCallback,
6
5
  useEffect,
6
+ useMemo,
7
7
  useRef,
8
8
  useState,
9
9
  } from "react";
10
10
  import { VirtuosoHandle } from "react-virtuoso";
11
- import { SampleSummary } from "../../api/types.ts";
12
- import { EmptyPanel } from "../../components/EmptyPanel.tsx";
13
- import { InlineSampleDisplay } from "../../samples/InlineSampleDisplay";
14
- import { SampleDialog } from "../../samples/SampleDialog";
15
- import { SamplesDescriptor } from "../../samples/descriptor/samplesDescriptor.tsx";
16
- import { SampleList } from "../../samples/list/SampleList";
17
- import { SampleMode, ScoreFilter } from "../../types.ts";
18
- import { EvalSample } from "../../types/log";
11
+ import { NoContentsPanel } from "../../components/NoContentsPanel.tsx";
12
+ import { InlineSampleDisplay } from "../../samples/InlineSampleDisplay.tsx";
13
+ import { SampleDialog } from "../../samples/SampleDialog.tsx";
14
+ import { SampleList } from "../../samples/list/SampleList.tsx";
15
+ import {
16
+ useFilteredSamples,
17
+ useGroupBy,
18
+ useGroupByOrder,
19
+ useSampleDescriptor,
20
+ useScore,
21
+ useTotalSampleCount,
22
+ } from "../../state/hooks.ts";
23
+ import { useStore } from "../../state/store.ts";
24
+ import { RunningNoSamples } from "./RunningNoSamples.tsx";
19
25
  import { getSampleProcessor } from "./grouping.ts";
20
26
  import { ListItem } from "./types.ts";
21
27
 
22
28
  interface SamplesTabProps {
23
- // Optional props
24
- sample?: EvalSample;
25
- samples?: SampleSummary[];
26
- sampleDescriptor?: SamplesDescriptor;
27
- sampleError?: Error;
28
-
29
29
  // Required props
30
- sampleMode: SampleMode;
31
- groupBy: "epoch" | "sample" | "none";
32
- groupByOrder: "asc" | "desc";
33
- sampleStatus: string;
34
- selectedSampleIndex: number;
35
- setSelectedSampleIndex: (index: number) => void;
36
- showingSampleDialog: boolean;
37
- setShowingSampleDialog: (showing: boolean) => void;
38
- selectedSampleTab?: string;
39
- setSelectedSampleTab: (tab: string) => void;
40
- epoch: string;
41
- filter: ScoreFilter;
42
- sampleScrollPositionRef: RefObject<number>;
43
- setSampleScrollPosition: (position: number) => void;
44
- sampleTabScrollRef: RefObject<HTMLDivElement | null>;
30
+ running: boolean;
45
31
  }
46
32
 
47
- export const SamplesTab: FC<SamplesTabProps> = ({
48
- sample,
49
- samples,
50
- sampleMode,
51
- groupBy,
52
- groupByOrder,
53
- sampleDescriptor,
54
- sampleStatus,
55
- sampleError,
56
- selectedSampleIndex,
57
- setSelectedSampleIndex,
58
- showingSampleDialog,
59
- setShowingSampleDialog,
60
- selectedSampleTab,
61
- setSelectedSampleTab,
62
- sampleScrollPositionRef,
63
- setSampleScrollPosition,
64
- sampleTabScrollRef,
65
- }) => {
33
+ export const SamplesTab: FC<SamplesTabProps> = ({ running }) => {
34
+ const selectSample = useStore((state) => state.logActions.selectSample);
35
+ const selectedSampleIndex = useStore(
36
+ (state) => state.log.selectedSampleIndex,
37
+ );
38
+
39
+ const sampleSummaries = useFilteredSamples();
40
+ const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
41
+
42
+ // Compute the limit to apply to the sample count (this is so)
43
+ // we can provide a total expected sample count for this evaluation
44
+ const evalSampleCount = useMemo(() => {
45
+ const limit = selectedLogSummary?.eval.config.limit;
46
+ const limitCount =
47
+ limit === null || limit === undefined
48
+ ? undefined
49
+ : typeof limit === "number"
50
+ ? limit
51
+ : (limit[1] as number) - (limit[0] as number);
52
+ return (
53
+ (limitCount || selectedLogSummary?.eval.dataset.samples || 0) *
54
+ (selectedLogSummary?.eval.config.epochs || 0)
55
+ );
56
+ }, [selectedLogSummary?.eval.config.limit]);
57
+
58
+ const totalSampleCount = useTotalSampleCount();
59
+
60
+ const samplesDescriptor = useSampleDescriptor();
61
+ const groupBy = useGroupBy();
62
+ const groupByOrder = useGroupByOrder();
63
+ const currentScore = useScore();
64
+
65
+ const selectedSample = useStore((state) => state.sample.selectedSample);
66
+
66
67
  const [items, setItems] = useState<ListItem[]>([]);
67
68
  const [sampleItems, setSampleItems] = useState<ListItem[]>([]);
68
69
 
69
70
  const sampleListHandle = useRef<VirtuosoHandle | null>(null);
70
71
  const sampleDialogRef = useRef<HTMLDivElement>(null);
71
72
 
73
+ const selectedSampleTab = useStore((state) => state.app.tabs.sample);
74
+ const setSelectedSampleTab = useStore(
75
+ (state) => state.appActions.setSampleTab,
76
+ );
77
+ const showingSampleDialog = useStore((state) => state.app.dialogs.sample);
78
+ const setShowingSampleDialog = useStore(
79
+ (state) => state.appActions.setShowingSampleDialog,
80
+ );
81
+
72
82
  // Shows the sample dialog
73
83
  const showSample = useCallback(
74
84
  (index: number) => {
75
- setSelectedSampleIndex(index);
85
+ selectSample(index);
76
86
  setShowingSampleDialog(true);
77
87
  },
78
- [setSelectedSampleIndex, setShowingSampleDialog],
88
+ [selectSample, setShowingSampleDialog],
79
89
  );
80
90
 
91
+ // Keep the selected item scrolled into view
92
+ useEffect(() => {
93
+ setTimeout(() => {
94
+ if (sampleListHandle.current) {
95
+ sampleListHandle.current.scrollIntoView({ index: selectedSampleIndex });
96
+ }
97
+ }, 0);
98
+ }, [selectedSampleIndex]);
99
+
100
+ // Focus the dialog when it is shown
81
101
  useEffect(() => {
82
102
  if (showingSampleDialog) {
83
103
  setTimeout(() => {
84
104
  sampleDialogRef.current?.focus();
85
105
  }, 0);
86
- } else {
87
- setTimeout(() => {
88
- if (sampleListHandle.current) {
89
- sampleListHandle.current.scrollToIndex(0);
90
- }
91
- }, 0);
92
106
  }
93
107
  }, [showingSampleDialog]);
94
108
 
109
+ const sampleProcessor = useMemo(() => {
110
+ if (!samplesDescriptor) return undefined;
111
+
112
+ return getSampleProcessor(
113
+ sampleSummaries || [],
114
+ selectedLogSummary?.eval?.config?.epochs || 1,
115
+ groupBy,
116
+ groupByOrder,
117
+ samplesDescriptor,
118
+ currentScore,
119
+ );
120
+ }, [
121
+ samplesDescriptor,
122
+ sampleSummaries,
123
+ selectedLogSummary?.eval?.config?.epochs,
124
+ groupBy,
125
+ groupByOrder,
126
+ currentScore,
127
+ ]);
128
+
95
129
  useEffect(() => {
96
- const sampleProcessor = sampleDescriptor
97
- ? getSampleProcessor(
98
- samples || [],
99
- groupBy,
100
- groupByOrder,
101
- sampleDescriptor,
102
- )
103
- : undefined;
104
-
105
- // Process the samples into the proper data structure
106
- const items = samples?.flatMap((sample, index) => {
130
+ const resolvedSamples = sampleSummaries?.flatMap((sample, index) => {
107
131
  const results: ListItem[] = [];
108
- const previousSample = index !== 0 ? samples[index - 1] : undefined;
132
+ const previousSample =
133
+ index !== 0 ? sampleSummaries[index - 1] : undefined;
109
134
  const items = sampleProcessor
110
135
  ? sampleProcessor(sample, index, previousSample)
111
136
  : [];
137
+
112
138
  results.push(...items);
113
139
  return results;
114
140
  });
115
141
 
116
- setItems(items || []);
142
+ setItems(resolvedSamples || []);
117
143
  setSampleItems(
118
- items
119
- ? items.filter((item) => {
144
+ resolvedSamples
145
+ ? resolvedSamples.filter((item) => {
120
146
  return item.type === "sample";
121
147
  })
122
148
  : [],
123
149
  );
124
- }, [samples, groupBy, groupByOrder, sampleDescriptor]);
125
-
126
- const nextSampleIndex = useCallback(() => {
127
- if (selectedSampleIndex < sampleItems.length - 1) {
128
- return selectedSampleIndex + 1;
129
- } else {
130
- return -1;
131
- }
132
- }, [selectedSampleIndex, sampleItems.length]);
150
+ }, [sampleSummaries, sampleProcessor]);
133
151
 
134
152
  const previousSampleIndex = useCallback(() => {
135
153
  return selectedSampleIndex > 0 ? selectedSampleIndex - 1 : -1;
@@ -137,68 +155,63 @@ export const SamplesTab: FC<SamplesTabProps> = ({
137
155
 
138
156
  // Manage the next / previous state the selected sample
139
157
  const nextSample = useCallback(() => {
140
- const next = nextSampleIndex();
141
- if (sampleStatus !== "loading" && next > -1) {
142
- setSelectedSampleIndex(next);
158
+ const next = Math.min(selectedSampleIndex + 1, sampleItems.length - 1);
159
+ if (next > -1) {
160
+ selectSample(next);
143
161
  }
144
- }, [nextSampleIndex, sampleStatus, setSelectedSampleIndex]);
162
+ }, [selectedSampleIndex, sampleItems, selectSample]);
145
163
 
146
164
  const previousSample = useCallback(() => {
147
165
  const prev = previousSampleIndex();
148
- if (sampleStatus !== "loading" && prev > -1) {
149
- setSelectedSampleIndex(prev);
166
+ if (prev > -1) {
167
+ selectSample(prev);
150
168
  }
151
- }, [previousSampleIndex, sampleStatus, setSelectedSampleIndex]);
169
+ }, [previousSampleIndex, selectSample]);
152
170
 
153
171
  const title =
154
172
  selectedSampleIndex > -1 && sampleItems.length > selectedSampleIndex
155
173
  ? sampleItems[selectedSampleIndex].label
156
174
  : "";
157
175
 
158
- if (!sampleDescriptor) {
159
- return <EmptyPanel />;
176
+ if (totalSampleCount === 0) {
177
+ if (running) {
178
+ return <RunningNoSamples />;
179
+ } else {
180
+ return <NoContentsPanel text="No samples" />;
181
+ }
160
182
  } else {
161
183
  return (
162
184
  <Fragment>
163
- {sampleDescriptor && sampleMode === "single" ? (
185
+ {samplesDescriptor && totalSampleCount === 1 ? (
164
186
  <InlineSampleDisplay
165
187
  id="sample-display"
166
- sample={sample}
167
- sampleStatus={sampleStatus}
168
- sampleError={sampleError}
169
- sampleDescriptor={sampleDescriptor}
170
188
  selectedTab={selectedSampleTab}
171
189
  setSelectedTab={setSelectedSampleTab}
172
- scrollRef={sampleTabScrollRef}
173
190
  />
174
191
  ) : undefined}
175
- {sampleDescriptor && sampleMode === "many" ? (
192
+ {samplesDescriptor && totalSampleCount > 1 ? (
176
193
  <SampleList
177
194
  listHandle={sampleListHandle}
178
195
  items={items}
179
- sampleDescriptor={sampleDescriptor}
180
- selectedIndex={selectedSampleIndex}
196
+ totalItemCount={evalSampleCount}
197
+ running={running}
181
198
  nextSample={nextSample}
182
199
  prevSample={previousSample}
183
200
  showSample={showSample}
184
201
  />
185
202
  ) : undefined}
186
- <SampleDialog
187
- id={String(sample?.id || "")}
188
- title={title}
189
- sample={sample}
190
- sampleStatus={sampleStatus}
191
- sampleError={sampleError}
192
- sampleDescriptor={sampleDescriptor}
193
- showingSampleDialog={showingSampleDialog}
194
- setShowingSampleDialog={setShowingSampleDialog}
195
- selectedTab={selectedSampleTab}
196
- setSelectedTab={setSelectedSampleTab}
197
- nextSample={nextSample}
198
- prevSample={previousSample}
199
- sampleScrollPositionRef={sampleScrollPositionRef}
200
- setSampleScrollPosition={setSampleScrollPosition}
201
- />
203
+ {showingSampleDialog ? (
204
+ <SampleDialog
205
+ id={String(selectedSample?.id || "")}
206
+ title={title}
207
+ showingSampleDialog={showingSampleDialog}
208
+ setShowingSampleDialog={setShowingSampleDialog}
209
+ selectedTab={selectedSampleTab}
210
+ setSelectedTab={setSelectedSampleTab}
211
+ nextSample={nextSample}
212
+ prevSample={previousSample}
213
+ />
214
+ ) : undefined}
202
215
  </Fragment>
203
216
  );
204
217
  }
@@ -1,12 +1,16 @@
1
1
  import { SampleSummary } from "../../api/types";
2
2
  import { SamplesDescriptor } from "../../samples/descriptor/samplesDescriptor";
3
+ import { ScoreLabel } from "../../types";
4
+ import { Epochs } from "../../types/log";
3
5
  import { ListItem, SampleListItem, SeparatorListItem } from "./types";
4
6
 
5
7
  export const getSampleProcessor = (
6
8
  samples: SampleSummary[],
9
+ epochs: Epochs,
7
10
  groupBy: "sample" | "epoch" | "none",
8
11
  groupByOrder: "asc" | "desc",
9
12
  sampleDescriptor: SamplesDescriptor,
13
+ score?: ScoreLabel,
10
14
  ): ((
11
15
  sample: SampleSummary,
12
16
  index: number,
@@ -14,11 +18,17 @@ export const getSampleProcessor = (
14
18
  ) => ListItem[]) => {
15
19
  // Perform grouping if there are epochs
16
20
  if (groupBy == "epoch") {
17
- return groupByEpoch(samples, sampleDescriptor, groupByOrder);
21
+ return groupByEpoch(samples, epochs, sampleDescriptor, groupByOrder, score);
18
22
  } else if (groupBy === "sample") {
19
- return groupBySample(samples, sampleDescriptor, groupByOrder);
23
+ return groupBySample(
24
+ samples,
25
+ epochs,
26
+ sampleDescriptor,
27
+ groupByOrder,
28
+ score,
29
+ );
20
30
  } else {
21
- return noGrouping(samples, groupByOrder);
31
+ return noGrouping(samples, groupByOrder, sampleDescriptor, score);
22
32
  }
23
33
  };
24
34
 
@@ -28,6 +38,8 @@ export const getSampleProcessor = (
28
38
  const noGrouping = (
29
39
  samples: SampleSummary[],
30
40
  order: "asc" | "desc",
41
+ sampleDescriptor: SamplesDescriptor,
42
+ score?: ScoreLabel,
31
43
  ): ((sample: SampleSummary, index: number) => ListItem[]) => {
32
44
  const counter = getCounter(samples.length, 1, order);
33
45
  return (sample: SampleSummary, index: number) => {
@@ -40,6 +52,12 @@ const noGrouping = (
40
52
  index: index,
41
53
  data: sample,
42
54
  type: "sample",
55
+ answer:
56
+ sampleDescriptor.selectedScorerDescriptor(sample)?.answer() || "",
57
+ scoreRendered: sampleDescriptor.evalDescriptor
58
+ .score(sample, score)
59
+ ?.render(),
60
+ completed: sample.completed !== undefined ? sample.completed : true,
43
61
  },
44
62
  ];
45
63
  };
@@ -50,8 +68,10 @@ const noGrouping = (
50
68
  */
51
69
  const groupBySample = (
52
70
  samples: SampleSummary[],
71
+ epochs: Epochs,
53
72
  sampleDescriptor: SamplesDescriptor,
54
73
  order: "asc" | "desc",
74
+ score?: ScoreLabel,
55
75
  ): ((
56
76
  sample: SampleSummary,
57
77
  index: number,
@@ -73,7 +93,7 @@ const groupBySample = (
73
93
  }
74
94
  }
75
95
  });
76
- const groupCount = samples.length / sampleDescriptor.evalDescriptor.epochs;
96
+ const groupCount = samples.length / (epochs || 1);
77
97
  const itemCount = samples.length / groupCount;
78
98
  const counter = getCounter(itemCount, groupCount, order);
79
99
  return (
@@ -103,6 +123,11 @@ const groupBySample = (
103
123
  index: index,
104
124
  data: sample,
105
125
  type: "sample",
126
+ answer: sampleDescriptor.selectedScorerDescriptor(sample)?.answer() || "",
127
+ scoreRendered: sampleDescriptor.evalDescriptor
128
+ .score(sample, score)
129
+ ?.render(),
130
+ completed: sample.completed !== undefined ? sample.completed : true,
106
131
  } as SampleListItem);
107
132
 
108
133
  return results;
@@ -114,14 +139,16 @@ const groupBySample = (
114
139
  */
115
140
  const groupByEpoch = (
116
141
  samples: SampleSummary[],
142
+ epochs: Epochs,
117
143
  sampleDescriptor: SamplesDescriptor,
118
144
  order: "asc" | "desc",
145
+ score?: ScoreLabel,
119
146
  ): ((
120
147
  sample: SampleSummary,
121
148
  index: number,
122
149
  previousSample?: SampleSummary,
123
150
  ) => ListItem[]) => {
124
- const groupCount = sampleDescriptor.evalDescriptor.epochs;
151
+ const groupCount = epochs || 1;
125
152
  const itemCount = samples.length / groupCount;
126
153
  const counter = getCounter(itemCount, groupCount, order);
127
154
 
@@ -153,6 +180,11 @@ const groupByEpoch = (
153
180
  index: index,
154
181
  data: sample,
155
182
  type: "sample",
183
+ answer: sampleDescriptor.selectedScorerDescriptor(sample)?.answer() || "",
184
+ scoreRendered: sampleDescriptor.evalDescriptor
185
+ .score(sample, score)
186
+ ?.render(),
187
+ completed: sample.completed !== undefined ? sample.completed : true,
156
188
  } as SampleListItem);
157
189
 
158
190
  return results;
@@ -1,11 +1,15 @@
1
+ import { ReactNode } from "react";
1
2
  import { SampleSummary } from "../../api/types";
2
3
 
3
4
  export interface SampleListItem {
4
5
  label: string;
5
6
  index: number;
6
7
  number: number;
8
+ answer: string;
9
+ scoreRendered: ReactNode;
7
10
  data: SampleSummary;
8
11
  type: "sample";
12
+ completed: boolean;
9
13
  }
10
14
 
11
15
  export interface SeparatorListItem {
@@ -1,10 +1,11 @@
1
- import { ReactNode, RefObject } from "react";
1
+ import { ComponentType, ReactNode, RefObject } from "react";
2
2
 
3
- export interface TabDescriptor {
3
+ export interface TabDescriptor<P> {
4
4
  id: string;
5
5
  scrollable: boolean;
6
6
  scrollRef?: RefObject<HTMLDivElement | null>;
7
7
  label: string;
8
- content: () => ReactNode;
8
+ component: ComponentType<P>;
9
+ componentProps: P;
9
10
  tools?: () => ReactNode[] | undefined;
10
11
  }
@@ -1,6 +1,6 @@
1
- import { EvalMetric } from "../types/log";
1
+ import { ResultsMetric } from "./navbar/ResultsPanel";
2
2
 
3
- export const metricDisplayName = (metric: EvalMetric): string => {
3
+ export const metricDisplayName = (metric: ResultsMetric): string => {
4
4
  let modifier = undefined;
5
5
  for (const metricModifier of metricModifiers) {
6
6
  modifier = metricModifier(metric);
@@ -13,10 +13,10 @@ export const metricDisplayName = (metric: EvalMetric): string => {
13
13
  return metricName;
14
14
  };
15
15
 
16
- type MetricModifier = (metric: EvalMetric) => string | undefined;
16
+ type MetricModifier = (metric: ResultsMetric) => string | undefined;
17
17
 
18
18
  const clusterMetricModifier: MetricModifier = (
19
- metric: EvalMetric,
19
+ metric: ResultsMetric,
20
20
  ): string | undefined => {
21
21
  if (metric.name !== "stderr") {
22
22
  return undefined;
@@ -24,4 +24,10 @@ export default defineConfig({
24
24
  resolve: {
25
25
  dedupe: ["react", "react-dom"],
26
26
  },
27
+ define: {
28
+ __DEV_WATCH__: JSON.stringify(process.env.DEV_LOGGING === "true"),
29
+ __LOGGING_FILTER__: JSON.stringify(
30
+ process.env.DEV_LOGGING_NAMESPACES || "*",
31
+ ),
32
+ },
27
33
  });