inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/eval.py +35 -2
  3. inspect_ai/_cli/util.py +44 -1
  4. inspect_ai/_display/core/config.py +1 -1
  5. inspect_ai/_display/core/display.py +13 -4
  6. inspect_ai/_display/core/results.py +1 -1
  7. inspect_ai/_display/textual/app.py +14 -3
  8. inspect_ai/_display/textual/display.py +4 -0
  9. inspect_ai/_display/textual/widgets/samples.py +9 -3
  10. inspect_ai/_display/textual/widgets/task_detail.py +8 -8
  11. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  12. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  13. inspect_ai/_eval/eval.py +74 -25
  14. inspect_ai/_eval/evalset.py +22 -18
  15. inspect_ai/_eval/loader.py +34 -11
  16. inspect_ai/_eval/run.py +13 -15
  17. inspect_ai/_eval/score.py +13 -3
  18. inspect_ai/_eval/task/generate.py +8 -9
  19. inspect_ai/_eval/task/log.py +55 -6
  20. inspect_ai/_eval/task/run.py +51 -10
  21. inspect_ai/_eval/task/task.py +23 -9
  22. inspect_ai/_util/constants.py +2 -0
  23. inspect_ai/_util/file.py +30 -1
  24. inspect_ai/_util/json.py +37 -1
  25. inspect_ai/_util/registry.py +1 -0
  26. inspect_ai/_util/vscode.py +37 -0
  27. inspect_ai/_view/server.py +113 -1
  28. inspect_ai/_view/www/App.css +7 -1
  29. inspect_ai/_view/www/dist/assets/index.css +813 -415
  30. inspect_ai/_view/www/dist/assets/index.js +54475 -32003
  31. inspect_ai/_view/www/eslint.config.mjs +1 -1
  32. inspect_ai/_view/www/log-schema.json +137 -31
  33. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  34. inspect_ai/_view/www/package.json +11 -2
  35. inspect_ai/_view/www/src/App.tsx +161 -853
  36. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  37. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  38. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  39. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  40. inspect_ai/_view/www/src/api/types.ts +107 -2
  41. inspect_ai/_view/www/src/appearance/icons.ts +2 -0
  42. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  43. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  46. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  47. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  48. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  49. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  50. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  51. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  52. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  53. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  54. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  55. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
  56. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  57. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  58. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  59. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  60. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  61. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  62. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  63. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  64. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  65. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  66. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  67. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  68. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  69. inspect_ai/_view/www/src/index.tsx +26 -94
  70. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  71. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  72. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  73. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  74. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  75. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  76. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  77. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  78. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
  79. inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
  80. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  81. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
  82. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
  83. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
  84. inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
  85. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  86. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  87. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  88. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
  89. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
  90. inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
  91. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  92. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
  93. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  94. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
  95. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  96. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  98. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
  99. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  100. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
  101. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
  102. inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
  103. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  104. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
  105. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  106. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  107. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  108. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  109. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  110. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
  111. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  112. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  113. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  114. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  115. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  116. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  117. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  118. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  119. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  120. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  121. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  122. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  123. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  124. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  125. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  126. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  127. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
  128. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  129. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  130. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  131. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  132. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
  133. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  134. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  135. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
  136. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  137. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  138. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  139. inspect_ai/_view/www/src/state/hooks.ts +399 -0
  140. inspect_ai/_view/www/src/state/logPolling.ts +200 -0
  141. inspect_ai/_view/www/src/state/logSlice.ts +224 -0
  142. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  143. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  144. inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
  145. inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
  146. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  147. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  148. inspect_ai/_view/www/src/state/store.ts +168 -0
  149. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  150. inspect_ai/_view/www/src/state/utils.ts +23 -0
  151. inspect_ai/_view/www/src/storage/index.ts +26 -0
  152. inspect_ai/_view/www/src/types/log.d.ts +36 -26
  153. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  154. inspect_ai/_view/www/src/types.ts +94 -32
  155. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  156. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  157. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  158. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  159. inspect_ai/_view/www/src/utils/react.ts +30 -0
  160. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  161. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
  162. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  163. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  164. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  165. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  166. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
  167. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
  168. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  169. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  170. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  171. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  172. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
  173. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  174. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  175. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  176. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  177. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  178. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  179. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  180. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  181. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
  182. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  183. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  184. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  185. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  186. inspect_ai/_view/www/vite.config.js +6 -0
  187. inspect_ai/_view/www/yarn.lock +464 -355
  188. inspect_ai/agent/__init__.py +36 -0
  189. inspect_ai/agent/_agent.py +268 -0
  190. inspect_ai/agent/_as_solver.py +72 -0
  191. inspect_ai/agent/_as_tool.py +122 -0
  192. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  193. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  194. inspect_ai/agent/_filter.py +46 -0
  195. inspect_ai/agent/_handoff.py +93 -0
  196. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  197. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  198. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  199. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  200. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  201. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  202. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  203. inspect_ai/agent/_react.py +241 -0
  204. inspect_ai/agent/_run.py +36 -0
  205. inspect_ai/agent/_types.py +81 -0
  206. inspect_ai/log/_condense.py +26 -0
  207. inspect_ai/log/_log.py +17 -5
  208. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  209. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  210. inspect_ai/log/_recorders/buffer/database.py +685 -0
  211. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  212. inspect_ai/log/_recorders/buffer/types.py +84 -0
  213. inspect_ai/log/_recorders/eval.py +2 -11
  214. inspect_ai/log/_recorders/types.py +30 -0
  215. inspect_ai/log/_transcript.py +32 -2
  216. inspect_ai/model/__init__.py +7 -1
  217. inspect_ai/model/_call_tools.py +257 -52
  218. inspect_ai/model/_chat_message.py +7 -4
  219. inspect_ai/model/_conversation.py +13 -62
  220. inspect_ai/model/_display.py +85 -0
  221. inspect_ai/model/_generate_config.py +2 -2
  222. inspect_ai/model/_model.py +114 -14
  223. inspect_ai/model/_model_output.py +14 -9
  224. inspect_ai/model/_openai.py +16 -4
  225. inspect_ai/model/_openai_computer_use.py +162 -0
  226. inspect_ai/model/_openai_responses.py +319 -165
  227. inspect_ai/model/_providers/anthropic.py +20 -21
  228. inspect_ai/model/_providers/azureai.py +24 -13
  229. inspect_ai/model/_providers/bedrock.py +1 -7
  230. inspect_ai/model/_providers/cloudflare.py +3 -3
  231. inspect_ai/model/_providers/goodfire.py +2 -6
  232. inspect_ai/model/_providers/google.py +11 -10
  233. inspect_ai/model/_providers/groq.py +6 -3
  234. inspect_ai/model/_providers/hf.py +7 -3
  235. inspect_ai/model/_providers/mistral.py +7 -10
  236. inspect_ai/model/_providers/openai.py +47 -17
  237. inspect_ai/model/_providers/openai_o1.py +11 -4
  238. inspect_ai/model/_providers/openai_responses.py +12 -14
  239. inspect_ai/model/_providers/providers.py +2 -2
  240. inspect_ai/model/_providers/together.py +12 -2
  241. inspect_ai/model/_providers/util/chatapi.py +7 -2
  242. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  243. inspect_ai/model/_providers/util/llama31.py +4 -2
  244. inspect_ai/model/_providers/vertex.py +11 -9
  245. inspect_ai/model/_providers/vllm.py +4 -4
  246. inspect_ai/scorer/__init__.py +2 -0
  247. inspect_ai/scorer/_metrics/__init__.py +2 -0
  248. inspect_ai/scorer/_metrics/grouped.py +84 -0
  249. inspect_ai/scorer/_score.py +26 -6
  250. inspect_ai/solver/__init__.py +2 -2
  251. inspect_ai/solver/_basic_agent.py +22 -9
  252. inspect_ai/solver/_bridge.py +31 -0
  253. inspect_ai/solver/_chain.py +20 -12
  254. inspect_ai/solver/_fork.py +5 -1
  255. inspect_ai/solver/_human_agent.py +52 -0
  256. inspect_ai/solver/_prompt.py +3 -1
  257. inspect_ai/solver/_run.py +59 -0
  258. inspect_ai/solver/_solver.py +14 -4
  259. inspect_ai/solver/_task_state.py +5 -3
  260. inspect_ai/tool/_tool_call.py +15 -8
  261. inspect_ai/tool/_tool_def.py +17 -12
  262. inspect_ai/tool/_tool_support_helpers.py +4 -4
  263. inspect_ai/tool/_tool_with.py +14 -11
  264. inspect_ai/tool/_tools/_bash_session.py +11 -2
  265. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  266. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  267. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  268. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  269. inspect_ai/tool/_tools/_think.py +1 -1
  270. inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
  271. inspect_ai/util/__init__.py +2 -0
  272. inspect_ai/util/_anyio.py +27 -0
  273. inspect_ai/util/_sandbox/__init__.py +2 -1
  274. inspect_ai/util/_sandbox/context.py +32 -7
  275. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  276. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  277. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  278. inspect_ai/util/_store_model.py +30 -7
  279. inspect_ai/util/_subprocess.py +13 -3
  280. inspect_ai/util/_subtask.py +1 -0
  281. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  282. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
  283. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
  284. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  285. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  286. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  287. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  288. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  289. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  290. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  291. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  292. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  293. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  294. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  295. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  296. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  297. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,15 @@
1
1
  import clsx from "clsx";
2
2
  import {
3
+ createElement,
3
4
  FC,
4
5
  Fragment,
5
6
  MouseEvent,
6
7
  RefObject,
7
8
  useCallback,
8
- useMemo,
9
9
  } from "react";
10
- import { SampleSummary } from "../api/types";
10
+ import { RunningMetric } from "../api/types";
11
11
  import { EmptyPanel } from "../components/EmptyPanel";
12
12
  import { TabPanel, TabSet } from "../components/TabSet";
13
- import { EvalDescriptor } from "../samples/descriptor/types";
14
13
  import {
15
14
  EvalPlan,
16
15
  EvalResults,
@@ -18,63 +17,37 @@ import {
18
17
  EvalStats,
19
18
  Status,
20
19
  } from "../types/log";
21
- import { debounce } from "../utils/sync";
22
20
  import { Navbar } from "./navbar/Navbar";
23
21
  import { TabDescriptor } from "./types";
24
22
 
23
+ import { useStore } from "../state/store";
25
24
  import styles from "./WorkSpaceView.module.css";
26
25
 
27
26
  interface WorkSpaceViewProps {
28
- logFileName?: string;
29
27
  evalSpec: EvalSpec;
30
28
  evalPlan?: EvalPlan;
31
29
  evalResults?: EvalResults;
30
+ runningMetrics?: RunningMetric[];
32
31
  evalStats?: EvalStats;
33
- samples?: SampleSummary[];
34
- evalDescriptor?: EvalDescriptor;
35
32
  status?: Status;
36
33
  showToggle: boolean;
37
- tabs: Record<string, TabDescriptor>;
38
- selectedTab: string;
39
- setSelectedTab: (tab: string) => void;
34
+ tabs: Record<string, TabDescriptor<any>>;
40
35
  divRef: RefObject<HTMLDivElement | null>;
41
- offcanvas: boolean;
42
- setOffcanvas: (offcanvas: boolean) => void;
43
- workspaceTabScrollPositionRef: RefObject<Record<string, number>>;
44
- setWorkspaceTabScrollPosition: (tab: string, pos: number) => void;
45
36
  }
46
37
 
47
38
  export const WorkSpaceView: FC<WorkSpaceViewProps> = ({
48
- logFileName,
49
39
  evalSpec,
50
40
  evalPlan,
51
41
  evalResults,
42
+ runningMetrics,
52
43
  evalStats,
53
- samples,
54
- evalDescriptor,
55
44
  status,
56
45
  showToggle,
57
- selectedTab,
58
46
  tabs,
59
- setSelectedTab,
60
47
  divRef,
61
- offcanvas,
62
- setOffcanvas,
63
- workspaceTabScrollPositionRef,
64
- setWorkspaceTabScrollPosition,
65
48
  }) => {
66
- const debouncedScroll = useMemo(() => {
67
- return debounce((id, position) => {
68
- setWorkspaceTabScrollPosition(id, position);
69
- }, 100);
70
- }, [setWorkspaceTabScrollPosition]);
71
-
72
- const onScroll = useCallback(
73
- (id: string, position: number) => {
74
- debouncedScroll(id, position);
75
- },
76
- [debouncedScroll],
77
- );
49
+ const selectedTab = useStore((state) => state.app.tabs.workspace);
50
+ const setSelectedTab = useStore((state) => state.appActions.setWorkspaceTab);
78
51
 
79
52
  const onSelected = useCallback(
80
53
  (e: MouseEvent<HTMLElement>) => {
@@ -85,12 +58,6 @@ export const WorkSpaceView: FC<WorkSpaceViewProps> = ({
85
58
  },
86
59
  [setSelectedTab],
87
60
  );
88
- const handleScroll = useCallback(
89
- (tabid: string, position: number) => {
90
- onScroll(tabid, position);
91
- },
92
- [onScroll],
93
- );
94
61
 
95
62
  if (evalSpec === undefined) {
96
63
  return <EmptyPanel />;
@@ -118,14 +85,10 @@ export const WorkSpaceView: FC<WorkSpaceViewProps> = ({
118
85
  evalSpec={evalSpec}
119
86
  evalPlan={evalPlan}
120
87
  evalResults={evalResults}
88
+ runningMetrics={runningMetrics}
121
89
  evalStats={evalStats}
122
- samples={samples}
123
- evalDescriptor={evalDescriptor}
124
90
  status={status}
125
- file={logFileName}
126
91
  showToggle={showToggle}
127
- offcanvas={offcanvas}
128
- setOffcanvas={setOffcanvas}
129
92
  />
130
93
  <div ref={divRef} className={clsx("workspace", styles.workspace)}>
131
94
  <div className={clsx("log-detail", styles.tabContainer)}>
@@ -148,14 +111,9 @@ export const WorkSpaceView: FC<WorkSpaceViewProps> = ({
148
111
  selected={selectedTab === tab.id}
149
112
  scrollable={!!tab.scrollable}
150
113
  scrollRef={tab.scrollRef}
151
- scrollPosition={
152
- workspaceTabScrollPositionRef.current?.[tab.id]
153
- }
154
- setScrollPosition={(position: number) => {
155
- handleScroll(tab.id, position);
156
- }}
114
+ style={{ height: tab.scrollable ? "100%" : undefined }}
157
115
  >
158
- {tab.content()}
116
+ {createElement(tab.component, tab.componentProps)}
159
117
  </TabPanel>
160
118
  );
161
119
  })}
@@ -1,7 +1,7 @@
1
1
  import clsx from "clsx";
2
2
  import { FC } from "react";
3
- import { SampleSummary } from "../../api/types";
4
- import { EvalDescriptor } from "../../samples/descriptor/types";
3
+ import { RunningMetric } from "../../api/types";
4
+ import { useTotalSampleCount } from "../../state/hooks";
5
5
  import {
6
6
  EvalPlan,
7
7
  EvalResults,
@@ -14,16 +14,12 @@ import { PrimaryBar } from "./PrimaryBar";
14
14
  import { SecondaryBar } from "./SecondaryBar";
15
15
 
16
16
  interface NavBarProps {
17
- file?: string;
18
17
  evalSpec?: EvalSpec;
19
18
  evalResults?: EvalResults;
19
+ runningMetrics?: RunningMetric[];
20
20
  evalPlan?: EvalPlan;
21
21
  evalStats?: EvalStats;
22
- evalDescriptor?: EvalDescriptor;
23
- samples?: SampleSummary[];
24
22
  status?: Status;
25
- offcanvas: boolean;
26
- setOffcanvas: (offcanvas: boolean) => void;
27
23
  showToggle: boolean;
28
24
  }
29
25
 
@@ -31,38 +27,32 @@ interface NavBarProps {
31
27
  * Renders the Navbar
32
28
  */
33
29
  export const Navbar: FC<NavBarProps> = ({
34
- file,
35
30
  evalSpec,
36
31
  evalPlan,
37
32
  evalResults,
38
33
  evalStats,
39
- samples,
40
- evalDescriptor,
41
34
  showToggle,
42
- offcanvas,
43
- setOffcanvas,
44
35
  status,
36
+ runningMetrics,
45
37
  }) => {
38
+ const totalSampleCount = useTotalSampleCount();
46
39
  return (
47
40
  <nav className={clsx("navbar", "sticky-top", styles.navbarWrapper)}>
48
41
  <PrimaryBar
49
- file={file}
50
42
  evalSpec={evalSpec}
51
43
  evalResults={evalResults}
52
- samples={samples}
53
44
  showToggle={showToggle}
54
- offcanvas={offcanvas}
55
- setOffcanvas={setOffcanvas}
56
45
  status={status}
46
+ runningMetrics={runningMetrics}
47
+ sampleCount={totalSampleCount}
57
48
  />
58
49
  <SecondaryBar
59
50
  evalSpec={evalSpec}
60
51
  evalPlan={evalPlan}
61
52
  evalResults={evalResults}
62
53
  evalStats={evalStats}
63
- samples={samples}
64
- evalDescriptor={evalDescriptor}
65
54
  status={status}
55
+ sampleCount={totalSampleCount}
66
56
  />
67
57
  </nav>
68
58
  );
@@ -38,6 +38,7 @@
38
38
  }
39
39
 
40
40
  .taskStatus {
41
+ display: flex;
41
42
  justify-content: end;
42
43
  margin-right: 1em;
43
44
  margin-bottom: 0;
@@ -1,41 +1,52 @@
1
1
  import clsx from "clsx";
2
2
  import { FC, useCallback } from "react";
3
- import { SampleSummary } from "../../api/types";
3
+ import { RunningMetric } from "../../api/types";
4
4
  import { ApplicationIcons } from "../../appearance/icons";
5
5
  import { CopyButton } from "../../components/CopyButton";
6
6
  import { kModelNone } from "../../constants";
7
+ import { useStore } from "../../state/store";
7
8
  import { EvalResults, EvalSpec, Status } from "../../types/log";
8
9
  import { filename } from "../../utils/path";
9
10
  import styles from "./PrimaryBar.module.css";
10
- import { ResultsPanel } from "./ResultsPanel";
11
- import { CancelledPanel, ErroredPanel, RunningPanel } from "./StatusPanel";
11
+ import {
12
+ displayScorersFromRunningMetrics,
13
+ ResultsPanel,
14
+ toDisplayScorers,
15
+ } from "./ResultsPanel";
16
+ import { RunningStatusPanel } from "./RunningStatusPanel";
17
+ import { CancelledPanel, ErroredPanel } from "./StatusPanel";
12
18
 
13
19
  interface PrimaryBarProps {
14
20
  showToggle: boolean;
15
- offcanvas: boolean;
16
- setOffcanvas: (offcanvas: boolean) => void;
17
21
  status?: Status;
18
22
  evalResults?: EvalResults;
19
- samples?: SampleSummary[];
20
- file?: string;
23
+ runningMetrics?: RunningMetric[];
21
24
  evalSpec?: EvalSpec;
25
+ sampleCount?: number;
22
26
  }
23
27
 
24
28
  export const PrimaryBar: FC<PrimaryBarProps> = ({
25
29
  showToggle,
26
- offcanvas,
27
30
  status,
28
31
  evalResults,
29
- samples,
30
- file,
32
+ runningMetrics,
31
33
  evalSpec,
32
- setOffcanvas,
34
+ sampleCount,
33
35
  }) => {
34
- const logFileName = file ? filename(file) : "";
36
+ const offCanvas = useStore((state) => state.app.offcanvas);
37
+ const setOffCanvas = useStore((state) => state.appActions.setOffcanvas);
38
+ const streamSamples = useStore((state) => state.capabilities.streamSamples);
39
+ const selectedLogFile = useStore((state) =>
40
+ state.logsActions.getSelectedLogFile(),
41
+ );
42
+
43
+ const logFileName = selectedLogFile ? filename(selectedLogFile) : "";
35
44
 
36
45
  const handleToggle = useCallback(() => {
37
- setOffcanvas(!offcanvas);
38
- }, [setOffcanvas, offcanvas]);
46
+ setOffCanvas(!offCanvas);
47
+ }, [offCanvas, setOffCanvas]);
48
+
49
+ const hasRunningMetrics = runningMetrics && runningMetrics.length > 0;
39
50
 
40
51
  return (
41
52
  <div className={clsx(styles.wrapper)}>
@@ -53,7 +64,7 @@ export const PrimaryBar: FC<PrimaryBarProps> = ({
53
64
  onClick={handleToggle}
54
65
  className={clsx(
55
66
  "btn",
56
- offcanvas ? "d-md-none" : undefined,
67
+ offCanvas ? "d-md-none" : undefined,
57
68
  styles.toggle,
58
69
  )}
59
70
  type="button"
@@ -93,22 +104,29 @@ export const PrimaryBar: FC<PrimaryBarProps> = ({
93
104
  <div className={clsx("navbar-secondary-text", "text-truncate")}>
94
105
  {logFileName}
95
106
  </div>
96
- {file ? <CopyButton value={file} /> : ""}
107
+ {selectedLogFile ? <CopyButton value={selectedLogFile} /> : ""}
97
108
  </div>
98
109
  </div>
99
110
  </div>
100
111
  <div className={clsx(styles.taskStatus, "navbar-text")}>
101
- {status === "success" ? (
102
- <ResultsPanel results={evalResults} />
112
+ {status === "success" ||
113
+ (status === "started" && streamSamples && hasRunningMetrics) ? (
114
+ <ResultsPanel
115
+ scorers={
116
+ runningMetrics
117
+ ? displayScorersFromRunningMetrics(runningMetrics)
118
+ : toDisplayScorers(evalResults?.scores)
119
+ }
120
+ />
103
121
  ) : undefined}
104
122
  {status === "cancelled" ? (
105
- <CancelledPanel sampleCount={samples?.length || 0} />
123
+ <CancelledPanel sampleCount={sampleCount || 0} />
106
124
  ) : undefined}
107
- {status === "started" ? (
108
- <RunningPanel sampleCount={samples?.length || 0} />
125
+ {status === "started" && (!streamSamples || !hasRunningMetrics) ? (
126
+ <RunningStatusPanel sampleCount={sampleCount || 0} />
109
127
  ) : undefined}
110
128
  {status === "error" ? (
111
- <ErroredPanel sampleCount={samples?.length || 0} />
129
+ <ErroredPanel sampleCount={sampleCount || 0} />
112
130
  ) : undefined}
113
131
  </div>
114
132
  <div id="task-created" style={{ display: "none" }}>
@@ -45,7 +45,6 @@
45
45
 
46
46
  .multiScorer {
47
47
  padding-left: 0;
48
- height: 100%;
49
48
  display: flex;
50
49
  flex-direction: column;
51
50
  padding: 0.5em 1em;
@@ -88,3 +87,19 @@
88
87
  padding: 0 0.2em;
89
88
  justify-content: center;
90
89
  }
90
+
91
+ .moreButton {
92
+ margin-top: 0.5em;
93
+ margin-bottom: 0.5em;
94
+ padding-right: 0;
95
+ }
96
+
97
+ .metricsSummary {
98
+ display: flex;
99
+ flex-direction: column;
100
+ align-items: flex-end;
101
+ }
102
+
103
+ .modalScores {
104
+ padding-bottom: 4em;
105
+ }
@@ -1,46 +1,115 @@
1
1
  import clsx from "clsx";
2
2
  import { FC } from "react";
3
- import { EvalMetric, EvalResults, EvalScore, Reducer } from "../../types/log";
3
+ import { RunningMetric } from "../../api/types";
4
+ import { LinkButton } from "../../components/LinkButton";
5
+ import { Modal } from "../../components/Modal";
6
+ import { useProperty } from "../../state/hooks";
7
+ import { Scores } from "../../types/log";
4
8
  import { formatPrettyDecimal } from "../../utils/format";
5
9
  import { metricDisplayName } from "../utils";
6
10
  import styles from "./ResultsPanel.module.css";
11
+ import { ScoreGrid } from "./ScoreGrid";
7
12
 
8
- interface ResultsPanelProps {
9
- results?: EvalResults;
13
+ export interface ResultsMetric {
14
+ name: string;
15
+ params?: {};
16
+ value: number;
10
17
  }
11
18
 
12
- interface MetricSummary {
13
- reducer: Reducer;
14
- metric: EvalMetric;
19
+ export interface ResultsScorer {
20
+ scorer: string;
21
+ reducer?: string;
22
+ metrics: ResultsMetric[];
15
23
  }
16
24
 
17
- export const ResultsPanel: FC<ResultsPanelProps> = ({ results }) => {
18
- // Map the scores into a list of key/values
19
- if (results?.scores?.length === 1) {
20
- const scorers: Record<string, MetricSummary[]> = {};
21
- results.scores.map((score) => {
22
- scorers[score.name] = Object.keys(score.metrics).map((key) => {
25
+ export const displayScorersFromRunningMetrics = (metrics?: RunningMetric[]) => {
26
+ if (!metrics) {
27
+ return [];
28
+ }
29
+
30
+ const getKey = (metric: RunningMetric) => {
31
+ return metric.reducer
32
+ ? `${metric.scorer}-${metric.reducer}`
33
+ : metric.scorer;
34
+ };
35
+
36
+ const scorers: Record<string, ResultsScorer> = {};
37
+ metrics.forEach((metric) => {
38
+ if (metric.value !== undefined) {
39
+ const key = getKey(metric);
40
+ if (scorers[key]) {
41
+ scorers[key].metrics.push({
42
+ name: metric.name,
43
+ value: metric.value,
44
+ });
45
+ } else {
46
+ scorers[key] = {
47
+ scorer: metric.scorer,
48
+ reducer: metric.reducer,
49
+ metrics: [
50
+ {
51
+ name: metric.name,
52
+ value: metric.value,
53
+ },
54
+ ],
55
+ };
56
+ }
57
+ }
58
+ });
59
+
60
+ return Object.values(scorers);
61
+ };
62
+
63
+ export const toDisplayScorers = (scores?: Scores): ResultsScorer[] => {
64
+ if (!scores) {
65
+ return [];
66
+ }
67
+
68
+ return scores.map((score) => {
69
+ return {
70
+ scorer: score.name,
71
+ reducer: score.reducer === null ? undefined : score.reducer,
72
+ metrics: Object.keys(score.metrics).map((key) => {
73
+ const metric = score.metrics[key];
23
74
  return {
24
- reducer: score.reducer,
25
- metric: {
26
- name: key,
27
- value: score.metrics[key].value,
28
- params: score.metrics[key].params,
29
- metadata: {},
30
- },
75
+ name: metric.name,
76
+ value: metric.value,
77
+ params: metric.params,
31
78
  };
32
- });
33
- });
79
+ }),
80
+ };
81
+ });
82
+ };
83
+
84
+ interface ResultsPanelProps {
85
+ scorers?: ResultsScorer[];
86
+ }
87
+
88
+ export const ResultsPanel: FC<ResultsPanelProps> = ({ scorers }) => {
89
+ const [showing, setShowing] = useProperty(
90
+ "results-panel-metrics",
91
+ "modal-showing",
92
+ {
93
+ defaultValue: false,
94
+ },
95
+ );
96
+
97
+ if (!scorers || scorers.length === 0) {
98
+ return undefined;
99
+ }
34
100
 
35
- const metrics = Object.values(scorers)[0];
36
- const showReducer = metrics && metrics.length > 0 && !!metrics[0].reducer;
101
+ // Get the display scorers
102
+ if (scorers.length === 1) {
103
+ const showReducer = !!scorers[0].reducer;
104
+ const metrics = scorers[0].metrics;
37
105
  return (
38
106
  <div className={styles.simpleMetricsRows}>
39
107
  {metrics.map((metric, i) => {
40
108
  return (
41
109
  <VerticalMetric
42
110
  key={`simple-metric-${i}`}
43
- metricSummary={metric}
111
+ reducer={scorers[0].reducer}
112
+ metric={metric}
44
113
  isFirst={i === 0}
45
114
  showReducer={showReducer}
46
115
  />
@@ -49,27 +118,73 @@ export const ResultsPanel: FC<ResultsPanelProps> = ({ results }) => {
49
118
  </div>
50
119
  );
51
120
  } else {
52
- const showReducer =
53
- results?.scores.findIndex((score) => !!score.reducer) !== -1;
121
+ const showReducer = scorers.findIndex((score) => !!score.reducer) !== -1;
122
+ const grouped = groupMetrics(scorers);
123
+
124
+ // Try to select metrics with a group size 5 or less, if possible
125
+ let primaryResults = grouped[0];
126
+ if (primaryResults.length > 5) {
127
+ const shorterResults = grouped.find((g) => {
128
+ return g.length <= 5;
129
+ });
130
+ if (shorterResults) {
131
+ primaryResults = shorterResults;
132
+ }
133
+ }
134
+
54
135
  return (
55
- <div className={styles.multiMetricsRows}>
56
- {results?.scores?.map((score, index) => {
57
- return (
58
- <MultiScorerMetric
59
- key={`multi-metric-${index}`}
60
- scorer={score}
61
- isFirst={index === 0}
62
- showReducer={showReducer}
136
+ <div className={clsx(styles.metricsSummary)}>
137
+ <ScoreGrid scoreGroups={[primaryResults]} showReducer={showReducer} />
138
+ {grouped.length > 1 ? (
139
+ <>
140
+ <Modal
141
+ id="results-metrics"
142
+ showing={showing}
143
+ setShowing={setShowing}
144
+ title={"Scoring Detail"}
145
+ >
146
+ <ScoreGrid
147
+ scoreGroups={grouped}
148
+ showReducer={showReducer}
149
+ className={styles.modalScores}
150
+ striped={false}
151
+ />
152
+ </Modal>
153
+ <LinkButton
154
+ className={styles.moreButton}
155
+ text={"All scoring..."}
156
+ onClick={() => {
157
+ setShowing(true);
158
+ }}
63
159
  />
64
- );
65
- })}
160
+ </>
161
+ ) : undefined}
66
162
  </div>
67
163
  );
68
164
  }
69
165
  };
70
166
 
167
+ const metricsKey = (metrics: ResultsMetric[]): string => {
168
+ const metricKey = metrics.map((m) => m.name).join("");
169
+ return metricKey;
170
+ };
171
+
172
+ const groupMetrics = (scorers: ResultsScorer[]): ResultsScorer[][] => {
173
+ const results: Record<string, ResultsScorer[]> = {};
174
+ scorers.forEach((scorer) => {
175
+ if (scorer.metrics.length > 0) {
176
+ const key = metricsKey(scorer.metrics);
177
+ results[key] = results[key] || [];
178
+
179
+ results[key].push(scorer);
180
+ }
181
+ });
182
+ return Object.values(results);
183
+ };
184
+
71
185
  interface VerticalMetricProps {
72
- metricSummary: MetricSummary;
186
+ metric: ResultsMetric;
187
+ reducer?: string;
73
188
  isFirst: boolean;
74
189
  showReducer: boolean;
75
190
  }
@@ -77,7 +192,8 @@ interface VerticalMetricProps {
77
192
  /** Renders a Vertical Metric
78
193
  */
79
194
  const VerticalMetric: FC<VerticalMetricProps> = ({
80
- metricSummary,
195
+ metric,
196
+ reducer,
81
197
  isFirst,
82
198
  showReducer,
83
199
  }) => {
@@ -91,7 +207,7 @@ const VerticalMetric: FC<VerticalMetricProps> = ({
91
207
  styles.verticalMetricName,
92
208
  )}
93
209
  >
94
- {metricDisplayName(metricSummary.metric)}
210
+ {metricDisplayName(metric)}
95
211
  </div>
96
212
  {showReducer ? (
97
213
  <div
@@ -101,7 +217,7 @@ const VerticalMetric: FC<VerticalMetricProps> = ({
101
217
  styles.verticalMetricReducer,
102
218
  )}
103
219
  >
104
- {metricSummary.reducer || "default"}
220
+ {reducer || "default"}
105
221
  </div>
106
222
  ) : undefined}
107
223
 
@@ -112,69 +228,9 @@ const VerticalMetric: FC<VerticalMetricProps> = ({
112
228
  styles.verticalMetricValue,
113
229
  )}
114
230
  >
115
- {formatPrettyDecimal(metricSummary.metric.value)}
116
- </div>
117
- </div>
118
- );
119
- };
120
-
121
- interface MultiScorerMetricProps {
122
- scorer: EvalScore;
123
- isFirst: boolean;
124
- showReducer: boolean;
125
- }
126
-
127
- const MultiScorerMetric: FC<MultiScorerMetricProps> = ({
128
- scorer,
129
- isFirst,
130
- showReducer,
131
- }) => {
132
- const titleFontClz = "text-size-base";
133
- const reducerFontClz = "text-size-smaller";
134
- const valueFontClz = "text-size-base";
135
-
136
- return (
137
- <div
138
- className={clsx(
139
- styles.multiScorer,
140
- isFirst ? styles.multiScorerIndent : undefined,
141
- )}
142
- >
143
- <div
144
- className={clsx(
145
- titleFontClz,
146
- "text-style-label",
147
- "text-style-secondary",
148
- "multi-score-label",
149
- styles.multiScorerLabel,
150
- )}
151
- >
152
- {scorer.name}
153
- </div>
154
- {showReducer ? (
155
- <div
156
- className={clsx(
157
- reducerFontClz,
158
- "text-style-label",
159
- "text-style-secondary",
160
- styles.multiScorerReducer,
161
- )}
162
- >
163
- {scorer.reducer || "default"}
164
- </div>
165
- ) : undefined}
166
- <div className={clsx(valueFontClz, styles.multiScorerValue)}>
167
- {Object.keys(scorer.metrics).map((key) => {
168
- const metric = scorer.metrics[key];
169
- return (
170
- <div className={styles.multiScoreMetricGrid} key={key}>
171
- <div>{metricDisplayName(metric)}</div>
172
- <div className={styles.multiScorerValueContent}>
173
- {formatPrettyDecimal(metric.value)}
174
- </div>
175
- </div>
176
- );
177
- })}
231
+ {metric.value !== undefined && metric.value !== null
232
+ ? formatPrettyDecimal(metric.value)
233
+ : "n/a"}
178
234
  </div>
179
235
  </div>
180
236
  );