inspect-ai 0.3.81__py3-none-any.whl → 0.3.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. inspect_ai/_cli/eval.py +35 -2
  2. inspect_ai/_cli/util.py +44 -1
  3. inspect_ai/_display/core/config.py +1 -1
  4. inspect_ai/_display/core/display.py +13 -4
  5. inspect_ai/_display/core/results.py +1 -1
  6. inspect_ai/_display/textual/widgets/task_detail.py +5 -4
  7. inspect_ai/_eval/eval.py +38 -1
  8. inspect_ai/_eval/evalset.py +5 -0
  9. inspect_ai/_eval/run.py +5 -2
  10. inspect_ai/_eval/task/log.py +53 -6
  11. inspect_ai/_eval/task/run.py +51 -10
  12. inspect_ai/_util/constants.py +2 -0
  13. inspect_ai/_util/file.py +17 -1
  14. inspect_ai/_util/json.py +36 -1
  15. inspect_ai/_view/server.py +113 -1
  16. inspect_ai/_view/www/App.css +1 -1
  17. inspect_ai/_view/www/dist/assets/index.css +518 -296
  18. inspect_ai/_view/www/dist/assets/index.js +38803 -36307
  19. inspect_ai/_view/www/eslint.config.mjs +1 -1
  20. inspect_ai/_view/www/log-schema.json +13 -0
  21. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  22. inspect_ai/_view/www/package.json +8 -2
  23. inspect_ai/_view/www/src/App.tsx +151 -855
  24. inspect_ai/_view/www/src/api/api-browser.ts +176 -5
  25. inspect_ai/_view/www/src/api/api-vscode.ts +75 -1
  26. inspect_ai/_view/www/src/api/client-api.ts +66 -10
  27. inspect_ai/_view/www/src/api/jsonrpc.ts +2 -0
  28. inspect_ai/_view/www/src/api/types.ts +107 -2
  29. inspect_ai/_view/www/src/appearance/icons.ts +1 -0
  30. inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +3 -3
  31. inspect_ai/_view/www/src/components/DownloadPanel.tsx +2 -2
  32. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +56 -61
  33. inspect_ai/_view/www/src/components/FindBand.tsx +17 -9
  34. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +1 -1
  35. inspect_ai/_view/www/src/components/JsonPanel.tsx +14 -24
  36. inspect_ai/_view/www/src/components/LargeModal.tsx +2 -35
  37. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +27 -11
  38. inspect_ai/_view/www/src/components/LiveVirtualList.module.css +11 -0
  39. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +177 -0
  40. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -3
  41. inspect_ai/_view/www/src/components/MessageBand.tsx +14 -9
  42. inspect_ai/_view/www/src/components/MorePopOver.tsx +3 -3
  43. inspect_ai/_view/www/src/components/NavPills.tsx +20 -8
  44. inspect_ai/_view/www/src/components/NoContentsPanel.module.css +12 -0
  45. inspect_ai/_view/www/src/components/NoContentsPanel.tsx +20 -0
  46. inspect_ai/_view/www/src/components/ProgressBar.module.css +5 -4
  47. inspect_ai/_view/www/src/components/ProgressBar.tsx +3 -2
  48. inspect_ai/_view/www/src/components/PulsingDots.module.css +81 -0
  49. inspect_ai/_view/www/src/components/PulsingDots.tsx +45 -0
  50. inspect_ai/_view/www/src/components/TabSet.tsx +4 -37
  51. inspect_ai/_view/www/src/components/ToolButton.tsx +3 -4
  52. inspect_ai/_view/www/src/index.tsx +26 -94
  53. inspect_ai/_view/www/src/logfile/remoteLogFile.ts +9 -1
  54. inspect_ai/_view/www/src/logfile/remoteZipFile.ts +30 -4
  55. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +4 -6
  56. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +1 -1
  57. inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +9 -1
  58. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +67 -28
  59. inspect_ai/_view/www/src/samples/SampleDialog.tsx +51 -22
  60. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +4 -0
  61. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +144 -90
  62. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
  63. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +82 -35
  64. inspect_ai/_view/www/src/samples/SamplesTools.tsx +23 -30
  65. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +2 -1
  66. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +1 -1
  67. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +45 -53
  68. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +4 -1
  69. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +3 -0
  70. inspect_ai/_view/www/src/samples/chat/messages.ts +34 -0
  71. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +3 -0
  72. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +10 -1
  73. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +22 -46
  74. inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +25 -17
  75. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +2 -1
  76. inspect_ai/_view/www/src/samples/descriptor/types.ts +6 -5
  77. inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +21 -3
  78. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +20 -1
  79. inspect_ai/_view/www/src/samples/list/SampleList.tsx +105 -85
  80. inspect_ai/_view/www/src/samples/list/SampleRow.module.css +6 -0
  81. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +27 -14
  82. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +29 -18
  83. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +28 -28
  84. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +19 -9
  85. inspect_ai/_view/www/src/samples/sampleDataAdapter.ts +33 -0
  86. inspect_ai/_view/www/src/samples/sampleLimit.ts +2 -2
  87. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +7 -9
  88. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +7 -11
  89. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +0 -13
  90. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +0 -13
  91. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +0 -13
  92. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +4 -0
  93. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +10 -24
  94. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +0 -13
  95. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -22
  96. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +15 -24
  97. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +0 -13
  98. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +6 -28
  99. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +24 -34
  100. inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +4 -0
  101. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +8 -13
  102. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +197 -338
  103. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.module.css +16 -0
  104. inspect_ai/_view/www/src/samples/transcript/TranscriptVirtualListComponent.tsx +44 -0
  105. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +7 -4
  106. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +52 -58
  107. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.module.css +23 -0
  108. inspect_ai/_view/www/src/samples/transcript/event/EventProgressPanel.tsx +27 -0
  109. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +30 -1
  110. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +102 -72
  111. inspect_ai/_view/www/src/scoring/utils.ts +87 -0
  112. inspect_ai/_view/www/src/state/appSlice.ts +244 -0
  113. inspect_ai/_view/www/src/state/hooks.ts +397 -0
  114. inspect_ai/_view/www/src/state/logPolling.ts +196 -0
  115. inspect_ai/_view/www/src/state/logSlice.ts +214 -0
  116. inspect_ai/_view/www/src/state/logsPolling.ts +118 -0
  117. inspect_ai/_view/www/src/state/logsSlice.ts +181 -0
  118. inspect_ai/_view/www/src/state/samplePolling.ts +311 -0
  119. inspect_ai/_view/www/src/state/sampleSlice.ts +127 -0
  120. inspect_ai/_view/www/src/state/sampleUtils.ts +21 -0
  121. inspect_ai/_view/www/src/state/scrolling.ts +206 -0
  122. inspect_ai/_view/www/src/state/store.ts +168 -0
  123. inspect_ai/_view/www/src/state/store_filter.ts +84 -0
  124. inspect_ai/_view/www/src/state/utils.ts +23 -0
  125. inspect_ai/_view/www/src/storage/index.ts +26 -0
  126. inspect_ai/_view/www/src/types/log.d.ts +2 -0
  127. inspect_ai/_view/www/src/types.ts +94 -32
  128. inspect_ai/_view/www/src/utils/attachments.ts +58 -23
  129. inspect_ai/_view/www/src/utils/logger.ts +52 -0
  130. inspect_ai/_view/www/src/utils/polling.ts +100 -0
  131. inspect_ai/_view/www/src/utils/react.ts +30 -0
  132. inspect_ai/_view/www/src/utils/vscode.ts +1 -1
  133. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +181 -216
  134. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +11 -53
  135. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +8 -18
  136. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +1 -0
  137. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +40 -22
  138. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +0 -1
  139. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +98 -39
  140. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.module.css +32 -0
  141. inspect_ai/_view/www/src/workspace/navbar/RunningStatusPanel.tsx +32 -0
  142. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +11 -13
  143. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +6 -2
  144. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +4 -4
  145. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +28 -13
  146. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +5 -10
  147. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +4 -4
  148. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.module.css +22 -0
  149. inspect_ai/_view/www/src/workspace/tabs/RunningNoSamples.tsx +19 -0
  150. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +110 -115
  151. inspect_ai/_view/www/src/workspace/tabs/grouping.ts +37 -5
  152. inspect_ai/_view/www/src/workspace/tabs/types.ts +4 -0
  153. inspect_ai/_view/www/src/workspace/types.ts +4 -3
  154. inspect_ai/_view/www/src/workspace/utils.ts +4 -4
  155. inspect_ai/_view/www/vite.config.js +6 -0
  156. inspect_ai/_view/www/yarn.lock +370 -354
  157. inspect_ai/log/_condense.py +26 -0
  158. inspect_ai/log/_log.py +6 -3
  159. inspect_ai/log/_recorders/buffer/__init__.py +14 -0
  160. inspect_ai/log/_recorders/buffer/buffer.py +30 -0
  161. inspect_ai/log/_recorders/buffer/database.py +685 -0
  162. inspect_ai/log/_recorders/buffer/filestore.py +259 -0
  163. inspect_ai/log/_recorders/buffer/types.py +84 -0
  164. inspect_ai/log/_recorders/eval.py +2 -11
  165. inspect_ai/log/_recorders/types.py +30 -0
  166. inspect_ai/log/_transcript.py +27 -1
  167. inspect_ai/model/_call_tools.py +1 -0
  168. inspect_ai/model/_generate_config.py +2 -2
  169. inspect_ai/model/_model.py +1 -0
  170. inspect_ai/tool/_tool_support_helpers.py +4 -4
  171. inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -1
  172. inspect_ai/util/_subtask.py +1 -0
  173. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/METADATA +1 -1
  174. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/RECORD +178 -138
  175. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +0 -22
  176. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/WHEEL +0 -0
  177. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/entry_points.txt +0 -0
  178. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/licenses/LICENSE +0 -0
  179. {inspect_ai-0.3.81.dist-info → inspect_ai-0.3.82.dist-info}/top_level.txt +0 -0
@@ -26,3 +26,7 @@
26
26
  .titled:hover {
27
27
  cursor: pointer;
28
28
  }
29
+
30
+ .value {
31
+ flex-direction: column;
32
+ }
@@ -1,17 +1,18 @@
1
1
  import clsx from "clsx";
2
2
  import { MarkdownDiv } from "../components/MarkdownDiv";
3
- import { EvalSample, WorkingTime } from "../types/log";
3
+ import { EvalSample, Target, TotalTime, WorkingTime } from "../types/log";
4
4
  import { arrayToString, formatTime, inputString } from "../utils/format";
5
- import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
6
5
  import { FlatSampleError } from "./error/FlatSampleErrorView";
7
6
 
8
7
  import { FC, ReactNode } from "react";
8
+ import { SampleSummary } from "../api/types";
9
+ import { useSampleDescriptor, useScore } from "../state/hooks";
9
10
  import styles from "./SampleSummaryView.module.css";
11
+ import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
10
12
 
11
13
  interface SampleSummaryViewProps {
12
14
  parent_id: string;
13
- sample: EvalSample;
14
- sampleDescriptor: SamplesDescriptor;
15
+ sample: SampleSummary | EvalSample;
15
16
  }
16
17
 
17
18
  interface SummaryColumn {
@@ -23,14 +24,73 @@ interface SummaryColumn {
23
24
  title?: string;
24
25
  }
25
26
 
27
+ interface SampleFields {
28
+ id: string | number;
29
+ input: string[];
30
+ target: Target;
31
+ answer?: string;
32
+ limit?: string;
33
+ working_time?: WorkingTime;
34
+ total_time?: TotalTime;
35
+ error?: string;
36
+ }
37
+
38
+ function isEvalSample(
39
+ sample: SampleSummary | EvalSample,
40
+ ): sample is EvalSample {
41
+ return "choices" in sample && Array.isArray((sample as EvalSample).choices);
42
+ }
43
+
44
+ const resolveSample = (
45
+ sample: SampleSummary | EvalSample,
46
+ sampleDescriptor: SamplesDescriptor,
47
+ ): SampleFields => {
48
+ const input = inputString(sample.input);
49
+ if (isEvalSample(sample) && sample.choices && sample.choices.length > 0) {
50
+ input.push("");
51
+ input.push(
52
+ ...sample.choices.map((choice, index) => {
53
+ return `${String.fromCharCode(65 + index)}) ${choice}`;
54
+ }),
55
+ );
56
+ }
57
+
58
+ const target = sample.target;
59
+ const answer =
60
+ sample && sampleDescriptor
61
+ ? sampleDescriptor.selectedScorerDescriptor(sample)?.answer()
62
+ : undefined;
63
+ const limit = isEvalSample(sample) ? sample.limit?.type : undefined;
64
+ const working_time = isEvalSample(sample) ? sample.working_time : undefined;
65
+ const total_time = isEvalSample(sample) ? sample.total_time : undefined;
66
+ const error = isEvalSample(sample) ? sample.error?.message : undefined;
67
+
68
+ return {
69
+ id: sample.id,
70
+ input,
71
+ target,
72
+ answer,
73
+ limit,
74
+ working_time,
75
+ total_time,
76
+ error,
77
+ };
78
+ };
79
+
26
80
  /**
27
81
  * Component to display a sample with relevant context and visibility control.
28
82
  */
29
83
  export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
30
84
  parent_id,
31
85
  sample,
32
- sampleDescriptor,
33
86
  }) => {
87
+ const sampleDescriptor = useSampleDescriptor();
88
+ const currentScore = useScore();
89
+ if (!sampleDescriptor) {
90
+ return undefined;
91
+ }
92
+ const fields = resolveSample(sample, sampleDescriptor);
93
+
34
94
  const input =
35
95
  sampleDescriptor?.messageShape.normalized.input > 0
36
96
  ? Math.max(0.15, sampleDescriptor.messageShape.normalized.input)
@@ -47,43 +107,33 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
47
107
  sampleDescriptor?.messageShape.normalized.limit > 0
48
108
  ? Math.max(0.15, sampleDescriptor.messageShape.normalized.limit)
49
109
  : 0;
50
- const timeSize = sample.working_time || sample.total_time ? 0.15 : 0;
110
+ const timeSize = fields.working_time || fields.total_time ? 0.15 : 0;
51
111
  const idSize = Math.max(
52
112
  2,
53
113
  Math.min(10, sampleDescriptor?.messageShape.raw.id),
54
114
  );
55
115
 
56
- const scoreInput = inputString(sample.input);
57
- if (sample.choices && sample.choices.length > 0) {
58
- scoreInput.push("");
59
- scoreInput.push(
60
- ...sample.choices.map((choice, index) => {
61
- return `${String.fromCharCode(65 + index)}) ${choice}`;
62
- }),
63
- );
64
- }
65
-
66
116
  // The columns for the sample
67
117
  const columns: SummaryColumn[] = [];
68
118
  columns.push({
69
119
  label: "Id",
70
- value: sample.id,
120
+ value: fields.id,
71
121
  size: `${idSize}em`,
72
122
  });
73
123
 
74
124
  columns.push({
75
125
  label: "Input",
76
- value: scoreInput,
126
+ value: fields.input,
77
127
  size: `${input}fr`,
78
128
  clamp: true,
79
129
  });
80
130
 
81
- if (sample.target) {
131
+ if (fields.target) {
82
132
  columns.push({
83
133
  label: "Target",
84
134
  value: (
85
135
  <MarkdownDiv
86
- markdown={arrayToString(arrayToString(sample?.target || "none"))}
136
+ markdown={arrayToString(fields?.target || "none")}
87
137
  className={clsx("no-last-para-padding", styles.target)}
88
138
  />
89
139
  ),
@@ -92,16 +142,12 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
92
142
  });
93
143
  }
94
144
 
95
- const fullAnswer =
96
- sample && sampleDescriptor
97
- ? sampleDescriptor.selectedScorerDescriptor(sample).answer()
98
- : undefined;
99
- if (fullAnswer) {
145
+ if (fields.answer) {
100
146
  columns.push({
101
147
  label: "Answer",
102
148
  value: sample ? (
103
149
  <MarkdownDiv
104
- markdown={fullAnswer}
150
+ markdown={fields.answer}
105
151
  className={clsx("no-last-para-padding", styles.answer)}
106
152
  />
107
153
  ) : (
@@ -119,20 +165,20 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
119
165
  return `Working time: ${formatTime(working_time)}`;
120
166
  };
121
167
 
122
- if (sample.total_time) {
168
+ if (fields.total_time) {
123
169
  columns.push({
124
170
  label: "Time",
125
- value: formatTime(sample.total_time),
171
+ value: formatTime(fields.total_time),
126
172
  size: `${timeSize}fr`,
127
173
  center: true,
128
- title: toolTip(sample.working_time),
174
+ title: toolTip(fields.working_time),
129
175
  });
130
176
  }
131
177
 
132
- if (sample?.limit && limitSize > 0) {
178
+ if (fields?.limit && limitSize > 0) {
133
179
  columns.push({
134
180
  label: "Limit",
135
- value: sample.limit.type,
181
+ value: fields.limit,
136
182
  size: `${limitSize}fr`,
137
183
  center: true,
138
184
  });
@@ -140,11 +186,11 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
140
186
 
141
187
  columns.push({
142
188
  label: "Score",
143
- value: sample.error ? (
144
- <FlatSampleError message={sample.error.message} />
189
+ value: fields.error ? (
190
+ <FlatSampleError message={fields.error} />
145
191
  ) : (
146
- // TODO: Cleanup once the PR lands which makes sample / sample summary share common interface
147
- sampleDescriptor?.selectedScore(sample)?.render() || ""
192
+ sampleDescriptor?.evalDescriptor.score(sample, currentScore)?.render() ||
193
+ ""
148
194
  ),
149
195
  size: "minmax(2em, 30em)",
150
196
  center: true,
@@ -184,6 +230,7 @@ export const SampleSummaryView: FC<SampleSummaryViewProps> = ({
184
230
  <div
185
231
  key={`sample-summ-val-${idx}`}
186
232
  className={clsx(
233
+ styles.value,
187
234
  styles.wrap,
188
235
  col.clamp ? "three-line-clamp" : undefined,
189
236
  col.center ? styles.centerLabel : undefined,
@@ -1,47 +1,40 @@
1
1
  import { FC } from "react";
2
2
  import { Fragment } from "react/jsx-runtime";
3
- import { ScoreFilter, ScoreLabel } from "../types";
4
- import { SamplesDescriptor } from "./descriptor/samplesDescriptor";
3
+ import { SampleSummary } from "../api/types";
4
+ import { useScore, useScores } from "../state/hooks";
5
+ import { useStore } from "../state/store";
5
6
  import { EpochFilter } from "./sample-tools/EpochFilter";
6
7
  import { SampleFilter } from "./sample-tools/sample-filter/SampleFilter";
7
8
  import { SelectScorer } from "./sample-tools/SelectScorer";
8
9
  import { SortFilter } from "./sample-tools/SortFilter";
9
10
 
10
11
  interface SampleToolsProps {
11
- epoch: string;
12
- setEpoch: (epoch: string) => void;
13
- epochs: number;
14
- scoreFilter: ScoreFilter;
15
- setScoreFilter: (filter: ScoreFilter) => void;
16
- sort: string;
17
- setSort: (sort: string) => void;
18
- score?: ScoreLabel;
19
- setScore: (score: ScoreLabel) => void;
20
- scores: ScoreLabel[];
21
- sampleDescriptor: SamplesDescriptor;
12
+ samples: SampleSummary[];
22
13
  }
23
14
 
24
- export const SampleTools: FC<SampleToolsProps> = ({
25
- epoch,
26
- setEpoch,
27
- epochs,
28
- scoreFilter,
29
- setScoreFilter,
30
- sort,
31
- setSort,
32
- score,
33
- setScore,
34
- scores,
35
- sampleDescriptor,
36
- }) => {
15
+ export const SampleTools: FC<SampleToolsProps> = ({ samples }) => {
16
+ const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
17
+
18
+ const filter = useStore((state) => state.log.filter);
19
+ const setFilter = useStore((state) => state.logActions.setFilter);
20
+
21
+ const scores = useScores();
22
+ const score = useScore();
23
+ const setScore = useStore((state) => state.logActions.setScore);
24
+ const epoch = useStore((state) => state.log.epoch);
25
+ const setEpoch = useStore((state) => state.logActions.setEpoch);
26
+ const sort = useStore((state) => state.log.sort);
27
+ const setSort = useStore((state) => state.logActions.setSort);
28
+
29
+ const epochs = selectedLogSummary?.eval.config.epochs || 1;
37
30
  return (
38
31
  <Fragment>
39
32
  <SampleFilter
40
- evalDescriptor={sampleDescriptor.evalDescriptor}
41
- scoreFilter={scoreFilter}
42
- setScoreFilter={setScoreFilter}
33
+ samples={samples}
34
+ scoreFilter={filter}
35
+ setScoreFilter={setFilter}
43
36
  />
44
- {scores.length > 1 ? (
37
+ {scores?.length > 1 ? (
45
38
  <SelectScorer scores={scores} score={score} setScore={setScore} />
46
39
  ) : undefined}
47
40
  {epochs > 1 ? (
@@ -46,8 +46,9 @@ export const ChatMessage: FC<ChatMessageProps> = ({
46
46
  indented ? styles.indented : undefined,
47
47
  )}
48
48
  >
49
- <ExpandablePanel collapse={collapse} lines={30}>
49
+ <ExpandablePanel id={`${id}-message`} collapse={collapse} lines={30}>
50
50
  <MessageContents
51
+ id={`${id}-contents`}
51
52
  key={`${id}-contents`}
52
53
  message={message}
53
54
  toolMessages={toolMessages}
@@ -26,7 +26,7 @@ export const ChatMessageRenderer: ContentRenderer = {
26
26
  render: (id, entry) => {
27
27
  return {
28
28
  rendered: (
29
- <NavPills>
29
+ <NavPills id={`${id}-navpills`}>
30
30
  <ChatSummary title="Last Turn" id={id} messages={entry.value} />
31
31
  <ChatView title="All" id={id} messages={entry.value} />
32
32
  </NavPills>
@@ -1,73 +1,65 @@
1
- import { FC, RefObject, useState } from "react";
1
+ import { FC, memo, ReactNode, RefObject, useMemo } from "react";
2
2
  import { Messages } from "../../types/log";
3
3
 
4
- import clsx from "clsx";
5
- import { Virtuoso } from "react-virtuoso";
6
4
  import { ChatMessageRow } from "./ChatMessageRow";
7
5
  import { ResolvedMessage, resolveMessages } from "./messages";
8
6
 
9
- import styles from "./ChatViewVirtualList.module.css";
7
+ import { LiveVirtualList } from "../../components/LiveVirtualList";
10
8
 
11
9
  interface ChatViewVirtualListProps {
12
- id?: string;
10
+ id: string;
11
+ className?: string | string[];
13
12
  messages: Messages;
14
13
  toolCallStyle: "compact" | "complete";
15
- className?: string | string[];
16
14
  indented: boolean;
17
15
  numbered?: boolean;
18
- scrollRef?: RefObject<HTMLElement | null>;
16
+ scrollRef?: RefObject<HTMLDivElement | null>;
17
+ running?: boolean;
19
18
  }
20
19
 
21
20
  /**
22
21
  * Renders the ChatViewVirtualList component.
23
22
  */
24
- export const ChatViewVirtualList: FC<ChatViewVirtualListProps> = ({
25
- id,
26
- messages,
27
- toolCallStyle,
28
- className,
29
- indented,
30
- numbered = true,
31
- scrollRef,
32
- }) => {
33
- const collapsedMessages = resolveMessages(messages);
34
- const [followOutput, setFollowOutput] = useState(false);
23
+ export const ChatViewVirtualList: FC<ChatViewVirtualListProps> = memo(
24
+ ({
25
+ id,
26
+ messages,
27
+ className,
28
+ toolCallStyle,
29
+ indented,
30
+ numbered = true,
31
+ scrollRef,
32
+ running,
33
+ }) => {
34
+ const collapsedMessages = useMemo(() => {
35
+ return resolveMessages(messages);
36
+ }, [messages]);
37
+
38
+ const renderRow = (index: number, item: ResolvedMessage): ReactNode => {
39
+ const number =
40
+ collapsedMessages.length > 1 && numbered ? index + 1 : undefined;
41
+
42
+ return (
43
+ <ChatMessageRow
44
+ parentName={id || "chat-virtual-list"}
45
+ number={number}
46
+ resolvedMessage={item}
47
+ indented={indented}
48
+ toolCallStyle={toolCallStyle}
49
+ />
50
+ );
51
+ };
35
52
 
36
- const renderRow = (item: ResolvedMessage, index: number) => {
37
- const number =
38
- collapsedMessages.length > 1 && numbered ? index + 1 : undefined;
39
53
  return (
40
- <ChatMessageRow
41
- parentName={id || "chat-virtual-list"}
42
- number={number}
43
- resolvedMessage={item}
44
- indented={indented}
45
- toolCallStyle={toolCallStyle}
54
+ <LiveVirtualList<ResolvedMessage>
55
+ id="chat-virtual-list"
56
+ className={className}
57
+ scrollRef={scrollRef}
58
+ data={collapsedMessages}
59
+ renderRow={renderRow}
60
+ live={running}
61
+ showProgress={running}
46
62
  />
47
63
  );
48
- };
49
-
50
- const result = (
51
- <Virtuoso
52
- customScrollParent={scrollRef?.current ? scrollRef.current : undefined}
53
- style={{ height: "100%", width: "100%" }}
54
- data={collapsedMessages}
55
- itemContent={(index: number, data: ResolvedMessage) => {
56
- return renderRow(data, index);
57
- }}
58
- increaseViewportBy={{ top: 1000, bottom: 1000 }}
59
- overscan={{
60
- main: 10,
61
- reverse: 10,
62
- }}
63
- followOutput={followOutput}
64
- atBottomStateChange={(atBottom: boolean) => {
65
- setFollowOutput(atBottom);
66
- }}
67
- skipAnimationFrameInResizeObserver={true}
68
- className={clsx(styles.list, className)}
69
- />
70
- );
71
-
72
- return result;
73
- };
64
+ },
65
+ );
@@ -104,6 +104,9 @@ const messageRenderers: Record<string, MessageRenderer> = {
104
104
  reasoning: {
105
105
  render: (key, content, isLast) => {
106
106
  const r = content as ContentReasoning;
107
+ if (!r.reasoning && !r.redacted) {
108
+ return undefined;
109
+ }
107
110
  return (
108
111
  <Fragment key={key}>
109
112
  <div
@@ -115,7 +118,7 @@ const messageRenderers: Record<string, MessageRenderer> = {
115
118
  >
116
119
  Reasoning
117
120
  </div>
118
- <ExpandablePanel collapse={true}>
121
+ <ExpandablePanel id={`${key}-reasoning`} collapse={true}>
119
122
  <MarkdownDiv
120
123
  markdown={
121
124
  r.redacted
@@ -13,12 +13,14 @@ import { ContentTool } from "../../types";
13
13
  import styles from "./MessageContents.module.css";
14
14
 
15
15
  interface MessageContentsProps {
16
+ id: string;
16
17
  message: ChatMessageAssistant | ChatMessageSystem | ChatMessageUser;
17
18
  toolMessages: ChatMessageTool[];
18
19
  toolCallStyle: "compact" | "complete";
19
20
  }
20
21
 
21
22
  export const MessageContents: FC<MessageContentsProps> = ({
23
+ id,
22
24
  message,
23
25
  toolMessages,
24
26
  toolCallStyle,
@@ -56,6 +58,7 @@ export const MessageContents: FC<MessageContentsProps> = ({
56
58
  } else {
57
59
  return (
58
60
  <ToolCallView
61
+ id={`${id}-tool-call`}
59
62
  key={`tool-call-${idx}`}
60
63
  functionCall={functionCall}
61
64
  input={input}
@@ -9,6 +9,7 @@ import {
9
9
  ContentReasoning,
10
10
  ContentText,
11
11
  ContentVideo,
12
+ Events,
12
13
  Messages,
13
14
  } from "../../types/log";
14
15
 
@@ -65,6 +66,7 @@ export const resolveMessages = (messages: Messages) => {
65
66
  }
66
67
 
67
68
  const systemMessage: ChatMessageSystem = {
69
+ id: "sys-message-6815A84B062A",
68
70
  role: "system",
69
71
  content: systemContent,
70
72
  source: "input",
@@ -123,3 +125,35 @@ const normalizeContent = (
123
125
  return content;
124
126
  }
125
127
  };
128
+
129
+ export const messagesFromEvents = (runningEvents: Events): Messages => {
130
+ const messages: Map<
131
+ string,
132
+ ChatMessageSystem | ChatMessageUser | ChatMessageAssistant | ChatMessageTool
133
+ > = new Map();
134
+
135
+ runningEvents
136
+ .filter((e) => e.event === "model")
137
+ .forEach((e) => {
138
+ for (const m of e.input) {
139
+ const inputMessage = m as
140
+ | ChatMessageSystem
141
+ | ChatMessageUser
142
+ | ChatMessageAssistant
143
+ | ChatMessageTool;
144
+ if (inputMessage.id && !messages.has(inputMessage.id)) {
145
+ messages.set(inputMessage.id, inputMessage);
146
+ }
147
+ }
148
+ const outputMessage = e.output.choices[0].message;
149
+ if (outputMessage.id) {
150
+ messages.set(outputMessage.id, outputMessage);
151
+ }
152
+ });
153
+
154
+ if (messages.entries.length > 0) {
155
+ return messages.values().toArray();
156
+ } else {
157
+ return [];
158
+ }
159
+ };
@@ -0,0 +1,3 @@
1
+ .output {
2
+ padding-top: 1em;
3
+ }
@@ -10,10 +10,12 @@ import {
10
10
  ToolCallContent,
11
11
  } from "../../../types/log";
12
12
  import { MessageContent } from "../MessageContent";
13
+ import styles from "./ToolCallView.module.css";
13
14
  import { ToolInput } from "./ToolInput";
14
15
  import { ToolTitle } from "./ToolTitle";
15
16
 
16
17
  interface ToolCallViewProps {
18
+ id: string;
17
19
  functionCall: string;
18
20
  input?: string;
19
21
  highlightLanguage?: string;
@@ -43,6 +45,7 @@ interface ToolCallViewProps {
43
45
  * Renders the ToolCallView component.
44
46
  */
45
47
  export const ToolCallView: FC<ToolCallViewProps> = ({
48
+ id,
46
49
  functionCall,
47
50
  input,
48
51
  highlightLanguage,
@@ -116,7 +119,13 @@ export const ToolCallView: FC<ToolCallViewProps> = ({
116
119
  toolCallView={view}
117
120
  />
118
121
  {hasContent ? (
119
- <ExpandablePanel collapse={collapse} border={true} lines={15}>
122
+ <ExpandablePanel
123
+ id={`${id}-tool-input`}
124
+ collapse={collapse}
125
+ border={true}
126
+ lines={15}
127
+ className={styles.output}
128
+ >
120
129
  <MessageContent contents={normalizedContent} />
121
130
  </ExpandablePanel>
122
131
  ) : undefined}
@@ -1,46 +1,19 @@
1
1
  import clsx from "clsx";
2
- import { highlightElement } from "prismjs";
3
- import { FC, memo, useEffect, useRef } from "react";
2
+ import { FC } from "react";
4
3
  import { MarkdownDiv } from "../../../components/MarkdownDiv";
5
4
 
5
+ import { usePrismHighlight } from "../../../state/hooks";
6
6
  import styles from "./ToolInput.module.css";
7
7
 
8
- export const useCodeHighlight = (language?: string) => {
9
- const codeRef = useRef<HTMLElement>(null);
10
-
11
- useEffect(() => {
12
- if (codeRef.current && language) {
13
- highlightElement(codeRef.current);
14
- }
15
- }, [language]);
16
-
17
- return codeRef;
18
- };
19
-
20
8
  interface ToolInputProps {
21
9
  highlightLanguage?: string;
22
10
  contents?: string | object;
23
11
  toolCallView?: { content: string };
24
12
  }
25
- export const ToolInput: FC<ToolInputProps> = memo((props) => {
13
+ export const ToolInput: FC<ToolInputProps> = (props) => {
26
14
  const { highlightLanguage, contents, toolCallView } = props;
27
15
 
28
- const codeRef = useCodeHighlight(highlightLanguage);
29
- const toolViewRef = useRef<HTMLDivElement>(null);
30
-
31
- useEffect(() => {
32
- if (toolCallView?.content && toolViewRef.current) {
33
- requestAnimationFrame(() => {
34
- const codeBlocks = toolViewRef.current!.querySelectorAll("pre code");
35
- codeBlocks.forEach((block) => {
36
- if (block.className.includes("language-")) {
37
- block.classList.add("sourceCode");
38
- highlightElement(block as HTMLElement);
39
- }
40
- });
41
- });
42
- }
43
- }, [toolCallView?.content]);
16
+ const prismParentRef = usePrismHighlight(toolCallView?.content);
44
17
 
45
18
  if (!contents && !toolCallView?.content) return null;
46
19
 
@@ -48,8 +21,8 @@ export const ToolInput: FC<ToolInputProps> = memo((props) => {
48
21
  return (
49
22
  <MarkdownDiv
50
23
  markdown={toolCallView.content}
51
- ref={toolViewRef}
52
- className={clsx("text-size-small", "tool-output")}
24
+ ref={prismParentRef}
25
+ className={clsx(styles.bottomPadding, "text-size-small", "tool-output")}
53
26
  />
54
27
  );
55
28
  }
@@ -58,18 +31,21 @@ export const ToolInput: FC<ToolInputProps> = memo((props) => {
58
31
  typeof contents === "object" ? JSON.stringify(contents) : contents;
59
32
 
60
33
  return (
61
- <pre className={clsx("tool-output", styles.outputPre, styles.bottomMargin)}>
62
- <code
63
- ref={codeRef}
64
- className={clsx(
65
- "source-code",
66
- "sourceCode",
67
- highlightLanguage ? `language-${highlightLanguage}` : undefined,
68
- styles.outputCode,
69
- )}
34
+ <div ref={prismParentRef}>
35
+ <pre
36
+ className={clsx("tool-output", styles.outputPre, styles.bottomMargin)}
70
37
  >
71
- {formattedContent}
72
- </code>
73
- </pre>
38
+ <code
39
+ className={clsx(
40
+ "source-code",
41
+ "sourceCode",
42
+ highlightLanguage ? `language-${highlightLanguage}` : undefined,
43
+ styles.outputCode,
44
+ )}
45
+ >
46
+ {formattedContent}
47
+ </code>
48
+ </pre>
49
+ </div>
74
50
  );
75
- });
51
+ };