inspect-ai 0.3.81__py3-none-any.whl → 0.3.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (297) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/eval.py +35 -2
  3. inspect_ai/_cli/util.py +44 -1
  4. inspect_ai/_display/core/config.py +1 -1
  5. inspect_ai/_display/core/display.py +13 -4
  6. inspect_ai/_display/core/results.py +1 -1
  7. inspect_ai/_display/textual/app.py +14 -3
  8. inspect_ai/_display/textual/display.py +4 -0
  9. inspect_ai/_display/textual/widgets/samples.py +9 -3
  10. inspect_ai/_display/textual/widgets/task_detail.py +8 -8
  11. inspect_ai/_display/textual/widgets/tasks.py +17 -1
  12. inspect_ai/_display/textual/widgets/vscode.py +44 -0
  13. inspect_ai/_eval/eval.py +74 -25
  14. inspect_ai/_eval/evalset.py +22 -18
  15. inspect_ai/_eval/loader.py +34 -11
  16. inspect_ai/_eval/run.py +13 -15
  17. inspect_ai/_eval/score.py +13 -3
  18. inspect_ai/_eval/task/generate.py +8 -9
  19. inspect_ai/_eval/task/log.py +55 -6
  20. inspect_ai/_eval/task/run.py +51 -10
  21. inspect_ai/_eval/task/task.py +23 -9
  22. inspect_ai/_util/constants.py +2 -0
  23. inspect_ai/_util/file.py +30 -1
  24. inspect_ai/_util/json.py +37 -1
  25. inspect_ai/_util/registry.py +1 -0
  26. inspect_ai/_util/vscode.py +37 -0
  27. inspect_ai/_view/server.py +113 -1
  28. inspect_ai/_view/www/App.css +7 -1
  29. inspect_ai/_view/www/dist/assets/index.css +813 -415
  30. inspect_ai/_view/www/dist/assets/index.js +54475 -32003
  31. inspect_ai/_view/www/eslint.config.mjs +1 -1
  32. inspect_ai/_view/www/log-schema.json +137 -31
  33. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  34. inspect_ai/_view/www/package.json +11 -2
  35. inspect_ai/_view/www/src/App.tsx +161 -853
  36. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  37. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  38. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  39. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  40. inspect_ai/_view/www/src/api/types.ts +107 -2
  41. inspect_ai/_view/www/src/appearance/icons.ts +2 -0
  42. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  43. inspect_ai/_view/www/src/components/Card.tsx +6 -4
  44. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  45. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  46. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  47. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  48. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  49. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  50. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  51. inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
  52. inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
  53. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  54. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  55. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +116 -26
  56. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  57. inspect_ai/_view/www/src/components/Modal.module.css +38 -0
  58. inspect_ai/_view/www/src/components/Modal.tsx +77 -0
  59. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  60. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  61. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  62. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  63. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  64. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  65. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  66. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  67. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  68. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  69. inspect_ai/_view/www/src/index.tsx +26 -94
  70. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  71. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  72. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  73. inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
  74. inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
  75. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  76. inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
  77. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  78. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +74 -28
  79. inspect_ai/_view/www/src/samples/SampleDialog.tsx +58 -22
  80. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  81. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +135 -104
  82. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +10 -0
  83. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +83 -36
  84. inspect_ai/_view/www/src/samples/SamplesTools.tsx +35 -30
  85. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  86. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  87. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  88. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +6 -1
  89. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +5 -0
  90. inspect_ai/_view/www/src/samples/chat/messages.ts +36 -0
  91. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  92. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +11 -1
  93. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  94. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +34 -20
  95. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
  96. inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
  97. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
  98. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -10
  99. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  100. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +22 -3
  101. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +27 -2
  102. inspect_ai/_view/www/src/samples/list/SampleList.tsx +122 -85
  103. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  104. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +28 -15
  105. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  106. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  107. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  108. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  109. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  110. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +12 -27
  111. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
  112. inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
  113. inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
  114. inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
  115. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  116. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  117. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  118. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  119. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  120. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  121. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  122. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  123. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  124. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  125. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  126. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  127. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +33 -17
  128. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  129. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  130. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  131. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  132. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +81 -60
  133. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  134. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  135. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +29 -1
  136. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  137. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  138. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  139. inspect_ai/_view/www/src/state/hooks.ts +399 -0
  140. inspect_ai/_view/www/src/state/logPolling.ts +200 -0
  141. inspect_ai/_view/www/src/state/logSlice.ts +224 -0
  142. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  143. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  144. inspect_ai/_view/www/src/state/samplePolling.ts +314 -0
  145. inspect_ai/_view/www/src/state/sampleSlice.ts +140 -0
  146. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  147. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  148. inspect_ai/_view/www/src/state/store.ts +168 -0
  149. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  150. inspect_ai/_view/www/src/state/utils.ts +23 -0
  151. inspect_ai/_view/www/src/storage/index.ts +26 -0
  152. inspect_ai/_view/www/src/types/log.d.ts +36 -26
  153. inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
  154. inspect_ai/_view/www/src/types.ts +94 -32
  155. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  156. inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
  157. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  158. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  159. inspect_ai/_view/www/src/utils/react.ts +30 -0
  160. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  161. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +184 -217
  162. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  163. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  164. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  165. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  166. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -1
  167. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +159 -103
  168. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  169. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  170. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
  171. inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
  172. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +12 -14
  173. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  174. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  175. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
  176. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  177. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  178. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  179. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  180. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  181. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +128 -115
  182. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  183. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  184. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  185. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  186. inspect_ai/_view/www/vite.config.js +6 -0
  187. inspect_ai/_view/www/yarn.lock +464 -355
  188. inspect_ai/agent/__init__.py +36 -0
  189. inspect_ai/agent/_agent.py +268 -0
  190. inspect_ai/agent/_as_solver.py +72 -0
  191. inspect_ai/agent/_as_tool.py +122 -0
  192. inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
  193. inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
  194. inspect_ai/agent/_filter.py +46 -0
  195. inspect_ai/agent/_handoff.py +93 -0
  196. inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
  197. inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
  198. inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
  199. inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
  200. inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
  201. inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
  202. inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
  203. inspect_ai/agent/_react.py +241 -0
  204. inspect_ai/agent/_run.py +36 -0
  205. inspect_ai/agent/_types.py +81 -0
  206. inspect_ai/log/_condense.py +26 -0
  207. inspect_ai/log/_log.py +17 -5
  208. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  209. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  210. inspect_ai/log/_recorders/buffer/database.py +685 -0
  211. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  212. inspect_ai/log/_recorders/buffer/types.py +84 -0
  213. inspect_ai/log/_recorders/eval.py +2 -11
  214. inspect_ai/log/_recorders/types.py +30 -0
  215. inspect_ai/log/_transcript.py +32 -2
  216. inspect_ai/model/__init__.py +7 -1
  217. inspect_ai/model/_call_tools.py +257 -52
  218. inspect_ai/model/_chat_message.py +7 -4
  219. inspect_ai/model/_conversation.py +13 -62
  220. inspect_ai/model/_display.py +85 -0
  221. inspect_ai/model/_generate_config.py +2 -2
  222. inspect_ai/model/_model.py +114 -14
  223. inspect_ai/model/_model_output.py +14 -9
  224. inspect_ai/model/_openai.py +16 -4
  225. inspect_ai/model/_openai_computer_use.py +162 -0
  226. inspect_ai/model/_openai_responses.py +319 -165
  227. inspect_ai/model/_providers/anthropic.py +20 -21
  228. inspect_ai/model/_providers/azureai.py +24 -13
  229. inspect_ai/model/_providers/bedrock.py +1 -7
  230. inspect_ai/model/_providers/cloudflare.py +3 -3
  231. inspect_ai/model/_providers/goodfire.py +2 -6
  232. inspect_ai/model/_providers/google.py +11 -10
  233. inspect_ai/model/_providers/groq.py +6 -3
  234. inspect_ai/model/_providers/hf.py +7 -3
  235. inspect_ai/model/_providers/mistral.py +7 -10
  236. inspect_ai/model/_providers/openai.py +47 -17
  237. inspect_ai/model/_providers/openai_o1.py +11 -4
  238. inspect_ai/model/_providers/openai_responses.py +12 -14
  239. inspect_ai/model/_providers/providers.py +2 -2
  240. inspect_ai/model/_providers/together.py +12 -2
  241. inspect_ai/model/_providers/util/chatapi.py +7 -2
  242. inspect_ai/model/_providers/util/hf_handler.py +4 -2
  243. inspect_ai/model/_providers/util/llama31.py +4 -2
  244. inspect_ai/model/_providers/vertex.py +11 -9
  245. inspect_ai/model/_providers/vllm.py +4 -4
  246. inspect_ai/scorer/__init__.py +2 -0
  247. inspect_ai/scorer/_metrics/__init__.py +2 -0
  248. inspect_ai/scorer/_metrics/grouped.py +84 -0
  249. inspect_ai/scorer/_score.py +26 -6
  250. inspect_ai/solver/__init__.py +2 -2
  251. inspect_ai/solver/_basic_agent.py +22 -9
  252. inspect_ai/solver/_bridge.py +31 -0
  253. inspect_ai/solver/_chain.py +20 -12
  254. inspect_ai/solver/_fork.py +5 -1
  255. inspect_ai/solver/_human_agent.py +52 -0
  256. inspect_ai/solver/_prompt.py +3 -1
  257. inspect_ai/solver/_run.py +59 -0
  258. inspect_ai/solver/_solver.py +14 -4
  259. inspect_ai/solver/_task_state.py +5 -3
  260. inspect_ai/tool/_tool_call.py +15 -8
  261. inspect_ai/tool/_tool_def.py +17 -12
  262. inspect_ai/tool/_tool_support_helpers.py +4 -4
  263. inspect_ai/tool/_tool_with.py +14 -11
  264. inspect_ai/tool/_tools/_bash_session.py +11 -2
  265. inspect_ai/tool/_tools/_computer/_common.py +18 -2
  266. inspect_ai/tool/_tools/_computer/_computer.py +18 -2
  267. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
  268. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
  269. inspect_ai/tool/_tools/_think.py +1 -1
  270. inspect_ai/tool/_tools/_web_browser/_web_browser.py +103 -62
  271. inspect_ai/util/__init__.py +2 -0
  272. inspect_ai/util/_anyio.py +27 -0
  273. inspect_ai/util/_sandbox/__init__.py +2 -1
  274. inspect_ai/util/_sandbox/context.py +32 -7
  275. inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
  276. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  277. inspect_ai/util/_sandbox/docker/docker.py +12 -1
  278. inspect_ai/util/_store_model.py +30 -7
  279. inspect_ai/util/_subprocess.py +13 -3
  280. inspect_ai/util/_subtask.py +1 -0
  281. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
  282. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +295 -229
  283. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -169
  284. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  285. /inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
  286. /inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
  287. /inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
  288. /inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
  289. /inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
  290. /inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
  291. /inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
  292. /inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
  293. /inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
  294. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
  295. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
  296. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
  297. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0
@@ -16,6 +16,8 @@ import clsx from "clsx";
16
16
  import { EditorView, minimalSetup } from "codemirror";
17
17
  import { FC, useEffect, useMemo, useRef, useState } from "react";
18
18
 
19
+ import { SampleSummary } from "../../../api/types";
20
+ import { useEvalDescriptor } from "../../../state/hooks";
19
21
  import { ScoreFilter } from "../../../types";
20
22
  import { EvalDescriptor } from "../../descriptor/types";
21
23
  import { FilterError, filterSamples, scoreFilterItems } from "../filters";
@@ -30,7 +32,7 @@ interface FilteringResult {
30
32
  }
31
33
 
32
34
  interface SampleFilterProps {
33
- evalDescriptor: EvalDescriptor;
35
+ samples: SampleSummary[];
34
36
  scoreFilter: ScoreFilter;
35
37
  setScoreFilter: (filter: ScoreFilter) => void;
36
38
  }
@@ -105,11 +107,12 @@ const editorTheme = EditorView.theme({
105
107
  // Helper functions
106
108
  const getFilteringResult = (
107
109
  evalDescriptor: EvalDescriptor,
110
+ sampleSummaries: SampleSummary[],
108
111
  filterValue: string,
109
112
  ): FilteringResult => {
110
113
  const { result, error } = filterSamples(
111
114
  evalDescriptor,
112
- evalDescriptor.samples,
115
+ sampleSummaries,
113
116
  filterValue,
114
117
  );
115
118
  return { numSamples: result.length, error };
@@ -151,7 +154,7 @@ const getLints = (
151
154
 
152
155
  // Main component
153
156
  export const SampleFilter: FC<SampleFilterProps> = ({
154
- evalDescriptor,
157
+ samples,
155
158
  scoreFilter,
156
159
  setScoreFilter,
157
160
  }) => {
@@ -160,9 +163,10 @@ export const SampleFilter: FC<SampleFilterProps> = ({
160
163
  const linterCompartment = useRef<Compartment>(new Compartment());
161
164
  const autocompletionCompartment = useRef<Compartment>(new Compartment());
162
165
  const updateListenerCompartment = useRef<Compartment>(new Compartment());
166
+ const evalDescriptor = useEvalDescriptor();
163
167
 
164
168
  const filterItems = useMemo(
165
- () => scoreFilterItems(evalDescriptor),
169
+ () => (evalDescriptor ? scoreFilterItems(evalDescriptor) : []),
166
170
  [evalDescriptor],
167
171
  );
168
172
 
@@ -186,9 +190,13 @@ export const SampleFilter: FC<SampleFilterProps> = ({
186
190
 
187
191
  const makeUpdateListener = () =>
188
192
  EditorView.updateListener.of((update) => {
189
- if (update.docChanged) {
193
+ if (update.docChanged && evalDescriptor) {
190
194
  const newValue = update.state.doc.toString();
191
- const filteringResult = getFilteringResult(evalDescriptor, newValue);
195
+ const filteringResult = getFilteringResult(
196
+ evalDescriptor,
197
+ samples,
198
+ newValue,
199
+ );
192
200
  if (!filteringResult.error) {
193
201
  setScoreFilter({ value: newValue });
194
202
  }
@@ -229,9 +237,11 @@ export const SampleFilter: FC<SampleFilterProps> = ({
229
237
  const currentValue = editorViewRef.current.state.doc.toString();
230
238
  if (scoreFilter.value === currentValue) return;
231
239
 
232
- setFilteringResultInstant(
233
- getFilteringResult(evalDescriptor, scoreFilter.value || ""),
234
- );
240
+ if (evalDescriptor) {
241
+ setFilteringResultInstant(
242
+ getFilteringResult(evalDescriptor, samples, scoreFilter.value || ""),
243
+ );
244
+ }
235
245
  editorViewRef.current.dispatch({
236
246
  changes: {
237
247
  from: 0,
@@ -0,0 +1,33 @@
1
+ import { EventData, SampleData } from "../api/types";
2
+ import { Events } from "../types/log";
3
+ import { resolveAttachments } from "../utils/attachments";
4
+
5
+ export const sampleDataAdapter = () => {
6
+ const attachments: Record<string, string> = {};
7
+ const events: Record<string, EventData> = {};
8
+
9
+ return {
10
+ addData: (data: SampleData) => {
11
+ data.attachments.forEach((a) => {
12
+ if (attachments[a.hash] === undefined) {
13
+ attachments[a.hash] = a.content;
14
+ }
15
+ });
16
+
17
+ data.events.forEach((e) => {
18
+ if (events[e.event_id] === undefined) {
19
+ events[e.event_id] = e;
20
+ }
21
+ });
22
+ },
23
+ resolvedEvents: (): Events => {
24
+ const eventDatas = Object.values(events);
25
+
26
+ const resolvedEvents = eventDatas.map((ed: EventData) => {
27
+ return ed.event;
28
+ }) as Events;
29
+
30
+ return resolveAttachments<Events>(resolvedEvents, attachments);
31
+ },
32
+ };
33
+ };
@@ -1,9 +1,9 @@
1
- import { Type11 } from "../types/log";
1
+ import { Type14 } from "../types/log";
2
2
 
3
3
  /**
4
4
  * Formats a limit message
5
5
  */
6
- export const sampleLimitMessage = (type: Type11): string => {
6
+ export const sampleLimitMessage = (type: Type14): string => {
7
7
  switch (type) {
8
8
  case "operator":
9
9
  return "Sample terminated due to operator limit.";
@@ -1,37 +1,22 @@
1
- import { FC, Fragment } from "react";
1
+ import { FC } from "react";
2
2
  import { SampleSummary } from "../../api/types";
3
- import { SamplesDescriptor } from "../descriptor/samplesDescriptor";
4
3
 
5
- import styles from "./SampleScores.module.css";
4
+ import { getScoreDescriptorForValues } from "../descriptor/score/ScoreDescriptor";
6
5
 
7
6
  interface SampleScoresProps {
8
7
  sample: SampleSummary;
9
- sampleDescriptor: SamplesDescriptor;
10
8
  scorer: string;
11
9
  }
12
10
 
13
- export const SampleScores: FC<SampleScoresProps> = ({
14
- sample,
15
- sampleDescriptor,
16
- scorer,
17
- }) => {
18
- const scores = scorer
19
- ? sampleDescriptor.evalDescriptor
20
- .scorerDescriptor(sample, { scorer, name: scorer })
21
- .scores()
22
- : sampleDescriptor.selectedScorerDescriptor(sample).scores();
23
-
24
- if (scores.length === 1) {
25
- return scores[0].rendered();
26
- } else {
27
- const rows = scores.map((score) => {
28
- return (
29
- <Fragment>
30
- <div style={{ opacity: "0.7" }}>{score.name}</div>
31
- <div>{score.rendered()}</div>
32
- </Fragment>
33
- );
34
- });
35
- return <div className={styles.grid}>{rows}</div>;
11
+ export const SampleScores: FC<SampleScoresProps> = ({ sample, scorer }) => {
12
+ const scoreData = sample.scores?.[scorer];
13
+ if (!scoreData) {
14
+ return undefined;
36
15
  }
16
+
17
+ const scorerDescriptor = getScoreDescriptorForValues(
18
+ [scoreData.value],
19
+ [typeof scoreData.value],
20
+ );
21
+ return scorerDescriptor?.render(scoreData.value);
37
22
  };
@@ -0,0 +1,38 @@
1
+ .container {
2
+ display: grid;
3
+ grid-template-columns:
4
+ minmax(auto, 1fr) minmax(auto, 1fr) minmax(auto, 1fr)
5
+ 2fr;
6
+ column-gap: 0.75em;
7
+ }
8
+
9
+ .container .cell {
10
+ margin-bottom: 0.5em;
11
+ }
12
+
13
+ .fullWidth {
14
+ grid-column: 1 / -1;
15
+ }
16
+
17
+ .heading {
18
+ font-weight: 600;
19
+ }
20
+
21
+ .padded {
22
+ padding-bottom: 3em;
23
+ }
24
+
25
+ .separator {
26
+ height: 1px;
27
+ background-color: var(--bs-light-border-subtle);
28
+ }
29
+
30
+ .separatorPadded {
31
+ margin-top: 0.5em;
32
+ margin-bottom: 0.5em;
33
+ }
34
+
35
+ .headerSep {
36
+ margin-top: 0.1em;
37
+ margin-bottom: 0.2em;
38
+ }
@@ -0,0 +1,118 @@
1
+ import clsx from "clsx";
2
+ import { FC } from "react";
3
+ import { SampleSummary } from "../../api/types";
4
+ import { EmptyPanel } from "../../components/EmptyPanel";
5
+ import { MetaDataGrid } from "../../metadata/MetaDataGrid";
6
+ import { useEvalDescriptor } from "../../state/hooks";
7
+ import { EvalSample } from "../../types/log";
8
+ import { SampleScores } from "./SampleScores";
9
+ import styles from "./SampleScoresGrid.module.css";
10
+
11
+ interface SampleScoresGridProps {
12
+ evalSample: EvalSample;
13
+ className?: string | string[];
14
+ }
15
+
16
+ export const SampleScoresGrid: FC<SampleScoresGridProps> = ({
17
+ evalSample,
18
+ className,
19
+ }) => {
20
+ const evalDescriptor = useEvalDescriptor();
21
+ if (!evalDescriptor) {
22
+ return <EmptyPanel>No Sample Selected</EmptyPanel>;
23
+ }
24
+ return (
25
+ <div className={clsx(className, styles.container)}>
26
+ <div
27
+ className={clsx(
28
+ "text-size-smaller",
29
+ "text-style-label",
30
+ "text-style-secondary",
31
+ )}
32
+ >
33
+ Scorer
34
+ </div>
35
+ <div
36
+ className={clsx(
37
+ "text-size-smaller",
38
+ "text-style-label",
39
+ "text-style-secondary",
40
+ )}
41
+ >
42
+ Answer
43
+ </div>
44
+ <div
45
+ className={clsx(
46
+ "text-size-smaller",
47
+ "text-style-label",
48
+ "text-style-secondary",
49
+ )}
50
+ >
51
+ Score
52
+ </div>
53
+ <div
54
+ className={clsx(
55
+ "text-size-smaller",
56
+ "text-style-label",
57
+ "text-style-secondary",
58
+ )}
59
+ >
60
+ Explanation
61
+ </div>
62
+ <div
63
+ className={clsx(styles.separator, styles.fullWidth, styles.headerSep)}
64
+ ></div>
65
+
66
+ {Object.keys(evalSample.scores || {}).map((scorer) => {
67
+ if (!evalSample.scores) {
68
+ return undefined;
69
+ }
70
+ const scoreData = evalSample.scores[scorer];
71
+ const explanation = scoreData.explanation || "(No Explanation)";
72
+ const answer = scoreData.answer;
73
+ let metadata = scoreData.metadata || {};
74
+
75
+ return (
76
+ <>
77
+ <div className={clsx("text-size-base", styles.cell)}>{scorer}</div>
78
+ <div className={clsx(styles.cell, "text-size-base")}>{answer}</div>
79
+ <div className={clsx(styles.cell, "text-size-base")}>
80
+ <SampleScores
81
+ sample={evalSample as any as SampleSummary}
82
+ scorer={scorer}
83
+ />
84
+ </div>
85
+ <div className={clsx("text-size-base", styles.cell)}>
86
+ {explanation}
87
+ </div>
88
+
89
+ {Object.keys(metadata).length > 0 ? (
90
+ <>
91
+ <div
92
+ className={clsx(
93
+ "text-size-smaller",
94
+ "text-style-label",
95
+ "text-style-secondary",
96
+ styles.fullWidth,
97
+ )}
98
+ >
99
+ Metadata
100
+ </div>
101
+ <div className={clsx(styles.fullWidth)}>
102
+ <MetaDataGrid entries={metadata} />
103
+ </div>
104
+ <div
105
+ className={clsx(
106
+ styles.separator,
107
+ styles.separatorPadded,
108
+ styles.fullWidth,
109
+ )}
110
+ ></div>
111
+ </>
112
+ ) : undefined}
113
+ </>
114
+ );
115
+ })}
116
+ </div>
117
+ );
118
+ };
@@ -1,6 +1,7 @@
1
1
  .container {
2
2
  margin-top: 0.5em;
3
3
  padding-left: 0;
4
+ padding-right: 0;
4
5
  }
5
6
 
6
7
  .label {
@@ -12,7 +13,7 @@
12
13
  }
13
14
 
14
15
  .wordBreak {
15
- word-break: break-all;
16
+ word-break: break-word;
16
17
  }
17
18
 
18
19
  .scoreTable {
@@ -51,3 +52,11 @@
51
52
  .noTop {
52
53
  margin-top: 0 !important;
53
54
  }
55
+
56
+ .scoreCard {
57
+ padding-top: 0.5em;
58
+ }
59
+
60
+ .scores {
61
+ padding-top: 1em;
62
+ }
@@ -0,0 +1,78 @@
1
+ import clsx from "clsx";
2
+ import { Card, CardBody } from "../../components/Card";
3
+ import { MarkdownDiv } from "../../components/MarkdownDiv";
4
+ import { EvalSample } from "../../types/log";
5
+ import { inputString } from "../../utils/format";
6
+
7
+ import { FC } from "react";
8
+ import ExpandablePanel from "../../components/ExpandablePanel";
9
+ import { useEvalDescriptor } from "../../state/hooks";
10
+ import { SampleScoresGrid } from "./SampleScoresGrid";
11
+ import styles from "./SampleScoresView.module.css";
12
+
13
+ interface SampleScoresViewProps {
14
+ sample?: EvalSample;
15
+ className?: string | string[];
16
+ }
17
+
18
+ export const SampleScoresView: FC<SampleScoresViewProps> = ({
19
+ sample,
20
+ className,
21
+ }) => {
22
+ const evalDescriptor = useEvalDescriptor();
23
+ if (!evalDescriptor) {
24
+ return undefined;
25
+ }
26
+ if (!sample) {
27
+ return undefined;
28
+ }
29
+
30
+ const scoreInput = inputString(sample.input);
31
+ if (sample.choices && sample.choices.length > 0) {
32
+ scoreInput.push("");
33
+ scoreInput.push(
34
+ ...sample.choices.map((choice, index) => {
35
+ return `${String.fromCharCode(65 + index)}) ${choice}`;
36
+ }),
37
+ );
38
+ }
39
+
40
+ return (
41
+ <div
42
+ className={clsx(
43
+ "container-fluid",
44
+ className,
45
+ "font-size-base",
46
+ styles.container,
47
+ )}
48
+ >
49
+ <Card className={clsx(styles.scoreCard)}>
50
+ <CardBody>
51
+ <div
52
+ className={clsx(
53
+ "text-size-small",
54
+ "text-style-label",
55
+ "text-style-secondary",
56
+ )}
57
+ >
58
+ Input
59
+ </div>
60
+ <ExpandablePanel
61
+ lines={10}
62
+ id={`sample-score-${sample.id}-${sample.epoch}`}
63
+ collapse={true}
64
+ >
65
+ <MarkdownDiv
66
+ markdown={scoreInput.join("\n")}
67
+ className={clsx(styles.wordBreak, "text-size-base")}
68
+ />
69
+ </ExpandablePanel>
70
+ <SampleScoresGrid
71
+ evalSample={sample}
72
+ className={clsx(styles.scores)}
73
+ />
74
+ </CardBody>
75
+ </Card>
76
+ </div>
77
+ );
78
+ };
@@ -4,13 +4,10 @@ import { ANSIDisplay } from "../../components/AnsiDisplay";
4
4
  import { ErrorEvent } from "../../types/log";
5
5
  import { formatDateTime } from "../../utils/format";
6
6
  import { EventPanel } from "./event/EventPanel";
7
- import { TranscriptEventState } from "./types";
8
7
 
9
8
  interface ErrorEventViewProps {
10
9
  id: string;
11
10
  event: ErrorEvent;
12
- eventState: TranscriptEventState;
13
- setEventState: (state: TranscriptEventState) => void;
14
11
  className?: string | string[];
15
12
  }
16
13
 
@@ -20,8 +17,6 @@ interface ErrorEventViewProps {
20
17
  export const ErrorEventView: FC<ErrorEventViewProps> = ({
21
18
  id,
22
19
  event,
23
- eventState,
24
- setEventState,
25
20
  className,
26
21
  }) => {
27
22
  return (
@@ -31,14 +26,6 @@ export const ErrorEventView: FC<ErrorEventViewProps> = ({
31
26
  className={className}
32
27
  subTitle={formatDateTime(new Date(event.timestamp))}
33
28
  icon={ApplicationIcons.error}
34
- selectedNav={eventState.selectedNav || ""}
35
- setSelectedNav={(selectedNav: string) => {
36
- setEventState({ ...eventState, selectedNav });
37
- }}
38
- collapsed={eventState.collapsed}
39
- setCollapsed={(collapsed: boolean) => {
40
- setEventState({ ...eventState, collapsed });
41
- }}
42
29
  >
43
30
  <ANSIDisplay
44
31
  output={event.error.traceback_ansi}
@@ -6,13 +6,10 @@ import { InfoEvent } from "../../types/log";
6
6
  import { formatDateTime } from "../../utils/format";
7
7
  import { EventPanel } from "./event/EventPanel";
8
8
  import styles from "./InfoEventView.module.css";
9
- import { TranscriptEventState } from "./types";
10
9
 
11
10
  interface InfoEventViewProps {
12
11
  id: string;
13
12
  event: InfoEvent;
14
- eventState: TranscriptEventState;
15
- setEventState: (state: TranscriptEventState) => void;
16
13
  className?: string | string[];
17
14
  }
18
15
 
@@ -22,8 +19,6 @@ interface InfoEventViewProps {
22
19
  export const InfoEventView: FC<InfoEventViewProps> = ({
23
20
  id,
24
21
  event,
25
- eventState,
26
- setEventState,
27
22
  className,
28
23
  }) => {
29
24
  const panels = [];
@@ -40,14 +35,6 @@ export const InfoEventView: FC<InfoEventViewProps> = ({
40
35
  className={className}
41
36
  subTitle={formatDateTime(new Date(event.timestamp))}
42
37
  icon={ApplicationIcons.info}
43
- selectedNav={eventState.selectedNav || ""}
44
- setSelectedNav={(selectedNav) => {
45
- setEventState({ ...eventState, selectedNav });
46
- }}
47
- collapsed={eventState.collapsed}
48
- setCollapsed={(collapsed) => {
49
- setEventState({ ...eventState, collapsed });
50
- }}
51
38
  >
52
39
  {panels}
53
40
  </EventPanel>
@@ -4,13 +4,10 @@ import { ANSIDisplay } from "../../components/AnsiDisplay";
4
4
  import { InputEvent } from "../../types/log";
5
5
  import { formatDateTime } from "../../utils/format";
6
6
  import { EventPanel } from "./event/EventPanel";
7
- import { TranscriptEventState } from "./types";
8
7
 
9
8
  interface InputEventViewProps {
10
9
  id: string;
11
10
  event: InputEvent;
12
- eventState: TranscriptEventState;
13
- setEventState: (state: TranscriptEventState) => void;
14
11
  className?: string | string[];
15
12
  }
16
13
 
@@ -20,8 +17,6 @@ interface InputEventViewProps {
20
17
  export const InputEventView: FC<InputEventViewProps> = ({
21
18
  id,
22
19
  event,
23
- eventState,
24
- setEventState,
25
20
  className,
26
21
  }) => {
27
22
  return (
@@ -31,14 +26,6 @@ export const InputEventView: FC<InputEventViewProps> = ({
31
26
  className={className}
32
27
  subTitle={formatDateTime(new Date(event.timestamp))}
33
28
  icon={ApplicationIcons.input}
34
- selectedNav={eventState.selectedNav || ""}
35
- setSelectedNav={(selectedNav) => {
36
- setEventState({ ...eventState, selectedNav });
37
- }}
38
- collapsed={eventState.collapsed}
39
- setCollapsed={(collapsed) => {
40
- setEventState({ ...eventState, collapsed });
41
- }}
42
29
  >
43
30
  <ANSIDisplay
44
31
  output={event.input_ansi}
@@ -41,3 +41,7 @@
41
41
  column-gap: 1em;
42
42
  row-gap: 0.5em;
43
43
  }
44
+
45
+ .progress {
46
+ margin-left: 2em;
47
+ }
@@ -3,7 +3,7 @@ import "prismjs/components/prism-json";
3
3
  import "prismjs/components/prism-python";
4
4
 
5
5
  import clsx from "clsx";
6
- import { FC, Fragment, useEffect, useMemo, useRef } from "react";
6
+ import { FC, Fragment, useMemo } from "react";
7
7
  import { ApplicationIcons } from "../../appearance/icons";
8
8
  import { MetaDataGrid } from "../../metadata/MetaDataGrid";
9
9
  import {
@@ -17,9 +17,9 @@ import { ModelUsagePanel } from "../../usage/ModelUsagePanel";
17
17
  import { ChatView } from "../chat/ChatView";
18
18
  import { EventPanel } from "./event/EventPanel";
19
19
  import { EventSection } from "./event/EventSection";
20
- import { TranscriptEventState } from "./types";
21
20
 
22
- import { highlightElement } from "prismjs";
21
+ import { PulsingDots } from "../../components/PulsingDots";
22
+ import { usePrismHighlight } from "../../state/hooks";
23
23
  import styles from "./ModelEventView.module.css";
24
24
  import { EventTimingPanel } from "./event/EventTimingPanel";
25
25
  import { formatTiming, formatTitle } from "./event/utils";
@@ -27,8 +27,6 @@ import { formatTiming, formatTitle } from "./event/utils";
27
27
  interface ModelEventViewProps {
28
28
  id: string;
29
29
  event: ModelEvent;
30
- eventState: TranscriptEventState;
31
- setEventState: (state: TranscriptEventState) => void;
32
30
  className?: string | string[];
33
31
  }
34
32
 
@@ -38,8 +36,6 @@ interface ModelEventViewProps {
38
36
  export const ModelEventView: FC<ModelEventViewProps> = ({
39
37
  id,
40
38
  event,
41
- eventState,
42
- setEventState,
43
39
  className,
44
40
  }) => {
45
41
  const totalUsage = event.output.usage?.total_tokens;
@@ -74,14 +70,6 @@ export const ModelEventView: FC<ModelEventViewProps> = ({
74
70
  title={formatTitle(`Model Call: ${event.model}`, totalUsage, callTime)}
75
71
  subTitle={formatTiming(event.timestamp, event.working_start)}
76
72
  icon={ApplicationIcons.model}
77
- selectedNav={eventState.selectedNav || ""}
78
- setSelectedNav={(selectedNav) => {
79
- setEventState({ ...eventState, selectedNav });
80
- }}
81
- collapsed={eventState.collapsed}
82
- setCollapsed={(collapsed) => {
83
- setEventState({ ...eventState, collapsed });
84
- }}
85
73
  >
86
74
  <div data-name="Summary" className={styles.container}>
87
75
  <ChatView
@@ -91,6 +79,11 @@ export const ModelEventView: FC<ModelEventViewProps> = ({
91
79
  numbered={false}
92
80
  toolCallStyle="compact"
93
81
  />
82
+ {event.pending ? (
83
+ <div className={clsx(styles.progress)}>
84
+ <PulsingDots subtle={false} size="medium" />
85
+ </div>
86
+ ) : undefined}
94
87
  </div>
95
88
  <div data-name="All" className={styles.container}>
96
89
  <div className={styles.all}>
@@ -170,27 +163,20 @@ interface APICodeCellProps {
170
163
  }
171
164
 
172
165
  export const APICodeCell: FC<APICodeCellProps> = ({ id, contents }) => {
173
- const codeRef = useRef<HTMLElement>(null);
174
166
  const sourceCode = useMemo(() => {
175
167
  return JSON.stringify(contents, undefined, 2);
176
168
  }, [contents]);
177
-
178
- useEffect(() => {
179
- if (codeRef.current) {
180
- highlightElement(codeRef.current);
181
- }
182
- }, [contents]);
169
+ const prismParentRef = usePrismHighlight(sourceCode);
183
170
 
184
171
  if (!contents) {
185
172
  return null;
186
173
  }
187
174
 
188
175
  return (
189
- <div className={clsx("model-call")}>
176
+ <div ref={prismParentRef} className={clsx("model-call")}>
190
177
  <pre className={clsx(styles.codePre)}>
191
178
  <code
192
179
  id={id}
193
- ref={codeRef}
194
180
  className={clsx("language-json", styles.code, "text-size-small")}
195
181
  >
196
182
  {sourceCode}