inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/common.py +7 -3
  3. inspect_ai/_cli/eval.py +17 -2
  4. inspect_ai/_cli/trace.py +21 -2
  5. inspect_ai/_display/core/active.py +4 -3
  6. inspect_ai/_display/core/config.py +3 -3
  7. inspect_ai/_display/core/panel.py +7 -3
  8. inspect_ai/_display/plain/__init__.py +0 -0
  9. inspect_ai/_display/plain/display.py +203 -0
  10. inspect_ai/_display/rich/display.py +4 -9
  11. inspect_ai/_display/textual/app.py +4 -1
  12. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  13. inspect_ai/_display/textual/widgets/samples.py +119 -16
  14. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  15. inspect_ai/_eval/eval.py +32 -20
  16. inspect_ai/_eval/evalset.py +7 -5
  17. inspect_ai/_eval/score.py +1 -0
  18. inspect_ai/_eval/task/__init__.py +2 -2
  19. inspect_ai/_eval/task/images.py +40 -25
  20. inspect_ai/_eval/task/results.py +50 -22
  21. inspect_ai/_eval/task/run.py +180 -124
  22. inspect_ai/_eval/task/sandbox.py +10 -5
  23. inspect_ai/_eval/task/task.py +140 -25
  24. inspect_ai/_util/constants.py +2 -0
  25. inspect_ai/_util/content.py +23 -1
  26. inspect_ai/_util/images.py +20 -17
  27. inspect_ai/_util/kvstore.py +73 -0
  28. inspect_ai/_util/notgiven.py +18 -0
  29. inspect_ai/_util/port_names.py +61 -0
  30. inspect_ai/_util/text.py +23 -0
  31. inspect_ai/_util/thread.py +5 -0
  32. inspect_ai/_view/www/App.css +31 -1
  33. inspect_ai/_view/www/dist/assets/index.css +31 -1
  34. inspect_ai/_view/www/dist/assets/index.js +25375 -1846
  35. inspect_ai/_view/www/log-schema.json +129 -15
  36. inspect_ai/_view/www/package.json +2 -0
  37. inspect_ai/_view/www/src/App.mjs +8 -10
  38. inspect_ai/_view/www/src/Types.mjs +0 -1
  39. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  40. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  41. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  42. inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
  43. inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
  44. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  45. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  46. inspect_ai/_view/www/src/index.js +75 -2
  47. inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
  48. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
  49. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  50. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  51. inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
  52. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  53. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
  54. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
  55. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  56. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +62 -27
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/Json.mjs +12 -6
  76. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
  77. inspect_ai/_view/www/vite.config.js +7 -0
  78. inspect_ai/_view/www/yarn.lock +116 -0
  79. inspect_ai/approval/_human/__init__.py +0 -0
  80. inspect_ai/approval/_human/util.py +2 -2
  81. inspect_ai/approval/_policy.py +12 -6
  82. inspect_ai/dataset/_sources/csv.py +2 -1
  83. inspect_ai/dataset/_sources/json.py +2 -1
  84. inspect_ai/dataset/_sources/util.py +15 -7
  85. inspect_ai/log/_condense.py +11 -1
  86. inspect_ai/log/_log.py +3 -6
  87. inspect_ai/log/_recorders/eval.py +19 -8
  88. inspect_ai/log/_samples.py +26 -5
  89. inspect_ai/log/_transcript.py +32 -2
  90. inspect_ai/model/__init__.py +10 -2
  91. inspect_ai/model/_call_tools.py +59 -12
  92. inspect_ai/model/_chat_message.py +2 -4
  93. inspect_ai/model/_conversation.py +61 -0
  94. inspect_ai/model/_generate_config.py +10 -4
  95. inspect_ai/model/_model.py +117 -18
  96. inspect_ai/model/_model_output.py +7 -2
  97. inspect_ai/model/_providers/anthropic.py +109 -51
  98. inspect_ai/model/_providers/azureai.py +26 -24
  99. inspect_ai/model/_providers/bedrock.py +43 -44
  100. inspect_ai/model/_providers/google.py +121 -58
  101. inspect_ai/model/_providers/groq.py +7 -5
  102. inspect_ai/model/_providers/hf.py +11 -6
  103. inspect_ai/model/_providers/mistral.py +17 -20
  104. inspect_ai/model/_providers/openai.py +32 -21
  105. inspect_ai/model/_providers/openai_o1.py +9 -8
  106. inspect_ai/model/_providers/providers.py +1 -1
  107. inspect_ai/model/_providers/together.py +8 -8
  108. inspect_ai/model/_providers/vertex.py +18 -8
  109. inspect_ai/scorer/__init__.py +13 -2
  110. inspect_ai/scorer/_metrics/__init__.py +2 -2
  111. inspect_ai/scorer/_metrics/std.py +3 -3
  112. inspect_ai/scorer/_reducer/reducer.py +1 -1
  113. inspect_ai/scorer/_scorer.py +2 -2
  114. inspect_ai/solver/__init__.py +2 -5
  115. inspect_ai/solver/_prompt.py +35 -5
  116. inspect_ai/solver/_task_state.py +80 -38
  117. inspect_ai/tool/__init__.py +11 -1
  118. inspect_ai/tool/_tool.py +21 -3
  119. inspect_ai/tool/_tool_call.py +10 -0
  120. inspect_ai/tool/_tool_def.py +16 -5
  121. inspect_ai/tool/_tool_with.py +21 -4
  122. inspect_ai/tool/beta/__init__.py +5 -0
  123. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  124. inspect_ai/tool/beta/_computer/_common.py +133 -0
  125. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  126. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  127. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  128. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  129. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  130. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  131. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  134. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  135. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  136. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  137. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  138. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  139. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  144. inspect_ai/util/__init__.py +2 -3
  145. inspect_ai/util/{_trace.py → _conversation.py} +3 -17
  146. inspect_ai/util/_display.py +14 -4
  147. inspect_ai/util/_limit.py +26 -0
  148. inspect_ai/util/_sandbox/context.py +12 -13
  149. inspect_ai/util/_sandbox/docker/compose.py +24 -11
  150. inspect_ai/util/_sandbox/docker/docker.py +84 -14
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/environment.py +27 -1
  153. inspect_ai/util/_sandbox/local.py +1 -0
  154. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
  155. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
  156. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  157. inspect_ai/model/_trace.py +0 -48
  158. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
  159. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
  160. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
  161. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  // @ts-check
2
2
  import { html } from "htm/preact";
3
+ import { useCallback, useState } from "preact/hooks";
3
4
  import { SampleInitEventView } from "./SampleInitEventView.mjs";
4
5
  import { StateEventView } from "./state/StateEventView.mjs";
5
6
  import { StepEventView } from "./StepEventView.mjs";
@@ -15,6 +16,8 @@ import { ApprovalEventView } from "./ApprovalEventView.mjs";
15
16
  import { SampleLimitEventView } from "./SampleLimitEventView.mjs";
16
17
  import { FontSize } from "../../appearance/Fonts.mjs";
17
18
  import { EventNode } from "./Types.mjs";
19
+ // @ts-ignore
20
+ import { VirtualList } from "../../components/VirtualList.mjs";
18
21
 
19
22
  /**
20
23
  * Renders the TranscriptView component.
@@ -26,10 +29,61 @@ import { EventNode } from "./Types.mjs";
26
29
  * @returns {import("preact").JSX.Element} The TranscriptView component.
27
30
  */
28
31
  export const TranscriptView = ({ id, events, depth = 0 }) => {
32
+ const [transcriptState, setTranscriptState] = useState({});
33
+ const onTranscriptState = useCallback(
34
+ (state) => {
35
+ setTranscriptState(state);
36
+ },
37
+ [transcriptState, setTranscriptState],
38
+ );
39
+
29
40
  // Normalize Events themselves
30
41
  const resolvedEvents = fixupEventStream(events);
31
42
  const eventNodes = treeifyEvents(resolvedEvents, depth);
32
- return html` <${TranscriptComponent} id=${id} eventNodes=${eventNodes} /> `;
43
+ return html`
44
+ <${TranscriptComponent}
45
+ id=${id}
46
+ eventNodes=${eventNodes}
47
+ transcriptState=${transcriptState}
48
+ setTranscriptState=${onTranscriptState}
49
+ />
50
+ `;
51
+ };
52
+
53
+ /**
54
+ * Renders the Transcript component.
55
+ *
56
+ * @param {Object} props - The parameters for the component.
57
+ * @param {string} props.id - The identifier for this view
58
+ * @param {import("../../types/log").Events} props.events - The transcript events to display.
59
+ * @param {Object} props.style - The transcript style to display.
60
+ * @param {number} props.depth - The base depth for this transcript view
61
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
62
+ * @returns {import("preact").JSX.Element} The TranscriptView component.
63
+ */
64
+ export const TranscriptVirtualList = (props) => {
65
+ let { id, scrollRef, events, depth, style } = props;
66
+
67
+ // Normalize Events themselves
68
+ const resolvedEvents = fixupEventStream(events);
69
+ const eventNodes = treeifyEvents(resolvedEvents, depth);
70
+
71
+ const [transcriptState, setTranscriptState] = useState({});
72
+ const onTranscriptState = useCallback(
73
+ (state) => {
74
+ setTranscriptState(state);
75
+ },
76
+ [transcriptState, setTranscriptState],
77
+ );
78
+
79
+ return html`<${TranscriptVirtualListComponent}
80
+ id=${id}
81
+ eventNodes=${eventNodes}
82
+ style=${style}
83
+ scrollRef=${scrollRef}
84
+ transcriptState=${transcriptState}
85
+ setTranscriptState=${onTranscriptState}
86
+ />`;
33
87
  };
34
88
 
35
89
  /**
@@ -39,9 +93,81 @@ export const TranscriptView = ({ id, events, depth = 0 }) => {
39
93
  * @param {string} props.id - The identifier for this view
40
94
  * @param {EventNode[]} props.eventNodes - The transcript events nodes to display.
41
95
  * @param {Object} props.style - The transcript style to display.
96
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
97
+ * @param {import("./Types.mjs").TranscriptState} props.transcriptState - The state for this transcript
98
+ * @param {(state: import("./Types.mjs").TranscriptState) => void} props.setTranscriptState - Set the transcript state for this transcript
42
99
  * @returns {import("preact").JSX.Element} The TranscriptView component.
43
100
  */
44
- export const TranscriptComponent = ({ id, eventNodes, style }) => {
101
+ export const TranscriptVirtualListComponent = ({
102
+ id,
103
+ eventNodes,
104
+ style,
105
+ scrollRef,
106
+ transcriptState,
107
+ setTranscriptState,
108
+ }) => {
109
+ const renderRow = (item, index) => {
110
+ const toggleStyle = {};
111
+ if (item.depth % 2 == 0) {
112
+ toggleStyle.backgroundColor = "var(--bs-light-bg-subtle)";
113
+ } else {
114
+ toggleStyle.backgroundColor = "var(--bs-body-bg)";
115
+ }
116
+
117
+ let paddingTop = "0";
118
+ if (index === 0) {
119
+ paddingTop = ".5em";
120
+ }
121
+ const eventId = `${id}-event${index}`;
122
+ const setEventState = useCallback(
123
+ (state) => {
124
+ setTranscriptState({ ...transcriptState, [eventId]: state });
125
+ },
126
+ [setTranscriptState, transcriptState],
127
+ );
128
+
129
+ return html`<div style=${{ paddingTop, paddingBottom: ".5em" }}>
130
+ <${RenderedEventNode}
131
+ id=${eventId}
132
+ node=${item}
133
+ style=${{
134
+ ...toggleStyle,
135
+ ...style,
136
+ }}
137
+ scrollRef=${scrollRef}
138
+ eventState=${transcriptState[eventId] || {}}
139
+ setEventState=${setEventState}
140
+ />
141
+ </div>`;
142
+ };
143
+
144
+ return html`<${VirtualList}
145
+ data=${eventNodes}
146
+ tabIndex="0"
147
+ renderRow=${renderRow}
148
+ scrollRef=${scrollRef}
149
+ style=${{ width: "100%", marginTop: "1em" }}
150
+ />`;
151
+ };
152
+
153
+ /**
154
+ * Renders the Transcript component.
155
+ *
156
+ * @param {Object} props - The parameters for the component.
157
+ * @param {string} props.id - The identifier for this view
158
+ * @param {EventNode[]} props.eventNodes - The transcript events nodes to display.
159
+ * @param {Object} props.style - The transcript style to display.
160
+ * @param {import("./Types.mjs").TranscriptState} props.transcriptState - The state for this transcript
161
+ * @param {(state: import("./Types.mjs").TranscriptState) => void} props.setTranscriptState - Set the transcript state for this transcript
162
+ * @returns {import("preact").JSX.Element} The TranscriptView component.
163
+ */
164
+ export const TranscriptComponent = ({
165
+ id,
166
+ transcriptState,
167
+ setTranscriptState,
168
+ eventNodes,
169
+ style,
170
+ }) => {
45
171
  const rows = eventNodes.map((eventNode, index) => {
46
172
  const toggleStyle = {};
47
173
  if (eventNode.depth % 2 == 0) {
@@ -55,15 +181,32 @@ export const TranscriptComponent = ({ id, eventNodes, style }) => {
55
181
  toggleStyle.marginBottom = "1.5em";
56
182
  }
57
183
 
184
+ let paddingBottom = ".5em";
185
+ if (index === eventNodes.length - 1) {
186
+ paddingBottom = "0";
187
+ }
188
+
189
+ const eventId = `${id}-event${index}`;
190
+ const setEventState = useCallback(
191
+ (state) => {
192
+ setTranscriptState({ ...transcriptState, [eventId]: state });
193
+ },
194
+ [setTranscriptState, transcriptState],
195
+ );
196
+
58
197
  const row = html`
59
- <${RenderedEventNode}
60
- id=${`${id}-event${index}`}
61
- node=${eventNode}
62
- style=${{
63
- ...toggleStyle,
64
- ...style,
65
- }}
66
- />
198
+ <div style=${{ paddingBottom }}>
199
+ <${RenderedEventNode}
200
+ id=${eventId}
201
+ node=${eventNode}
202
+ style=${{
203
+ ...toggleStyle,
204
+ ...style,
205
+ }}
206
+ eventState=${transcriptState[eventId] || {}}
207
+ setEventState=${setEventState}
208
+ />
209
+ </div>
67
210
  `;
68
211
  return row;
69
212
  });
@@ -89,14 +232,26 @@ export const TranscriptComponent = ({ id, eventNodes, style }) => {
89
232
  * @param {string} props.id - The id for this event.
90
233
  * @param { EventNode } props.node - This event.
91
234
  * @param { Object } props.style - The style for this node.
235
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
236
+ * @param {import("./Types.mjs").TranscriptEventState} props.eventState - The state for this event
237
+ * @param {(state: import("./Types.mjs").TranscriptEventState) => void} props.setEventState - Update the state for this event
92
238
  * @returns {import("preact").JSX.Element} The rendered event.
93
239
  */
94
- export const RenderedEventNode = ({ id, node, style }) => {
240
+ export const RenderedEventNode = ({
241
+ id,
242
+ node,
243
+ style,
244
+ scrollRef,
245
+ eventState,
246
+ setEventState,
247
+ }) => {
95
248
  switch (node.event.event) {
96
249
  case "sample_init":
97
250
  return html`<${SampleInitEventView}
98
251
  id=${id}
99
252
  event=${node.event}
253
+ eventState=${eventState}
254
+ setEventState=${setEventState}
100
255
  style=${style}
101
256
  />`;
102
257
 
@@ -104,6 +259,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
104
259
  return html`<${SampleLimitEventView}
105
260
  id=${id}
106
261
  event=${node.event}
262
+ eventState=${eventState}
263
+ setEventState=${setEventState}
107
264
  style=${style}
108
265
  />`;
109
266
 
@@ -111,6 +268,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
111
268
  return html`<${InfoEventView}
112
269
  id=${id}
113
270
  event=${node.event}
271
+ eventState=${eventState}
272
+ setEventState=${setEventState}
114
273
  style=${style}
115
274
  />`;
116
275
 
@@ -118,6 +277,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
118
277
  return html`<${LoggerEventView}
119
278
  id=${id}
120
279
  event=${node.event}
280
+ eventState=${eventState}
281
+ setEventState=${setEventState}
121
282
  style=${style}
122
283
  />`;
123
284
 
@@ -125,6 +286,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
125
286
  return html`<${ModelEventView}
126
287
  id=${id}
127
288
  event=${node.event}
289
+ eventState=${eventState}
290
+ setEventState=${setEventState}
128
291
  style=${style}
129
292
  />`;
130
293
 
@@ -132,6 +295,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
132
295
  return html`<${ScoreEventView}
133
296
  id=${id}
134
297
  event=${node.event}
298
+ eventState=${eventState}
299
+ setEventState=${setEventState}
135
300
  style=${style}
136
301
  />`;
137
302
 
@@ -139,6 +304,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
139
304
  return html`<${StateEventView}
140
305
  id=${id}
141
306
  event=${node.event}
307
+ eventState=${eventState}
308
+ setEventState=${setEventState}
142
309
  style=${style}
143
310
  />`;
144
311
 
@@ -146,14 +313,19 @@ export const RenderedEventNode = ({ id, node, style }) => {
146
313
  return html`<${StepEventView}
147
314
  id=${id}
148
315
  event=${node.event}
316
+ eventState=${eventState}
317
+ setEventState=${setEventState}
149
318
  children=${node.children}
150
319
  style=${style}
320
+ scrollRef=${scrollRef}
151
321
  />`;
152
322
 
153
323
  case "store":
154
324
  return html`<${StateEventView}
155
325
  id=${id}
156
326
  event=${node.event}
327
+ eventState=${eventState}
328
+ setEventState=${setEventState}
157
329
  style=${style}
158
330
  isStore=${true}
159
331
  />`;
@@ -162,6 +334,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
162
334
  return html`<${SubtaskEventView}
163
335
  id=${id}
164
336
  event=${node.event}
337
+ eventState=${eventState}
338
+ setEventState=${setEventState}
165
339
  style=${style}
166
340
  depth=${node.depth}
167
341
  />`;
@@ -170,6 +344,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
170
344
  return html`<${ToolEventView}
171
345
  id=${id}
172
346
  event=${node.event}
347
+ eventState=${eventState}
348
+ setEventState=${setEventState}
173
349
  style=${style}
174
350
  depth=${node.depth}
175
351
  />`;
@@ -178,6 +354,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
178
354
  return html`<${InputEventView}
179
355
  id=${id}
180
356
  event=${node.event}
357
+ eventState=${eventState}
358
+ setEventState=${setEventState}
181
359
  style=${style}
182
360
  />`;
183
361
 
@@ -185,6 +363,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
185
363
  return html`<${ErrorEventView}
186
364
  id=${id}
187
365
  event=${node.event}
366
+ eventState=${eventState}
367
+ setEventState=${setEventState}
188
368
  style=${style}
189
369
  />`;
190
370
 
@@ -192,6 +372,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
192
372
  return html`<${ApprovalEventView}
193
373
  id=${id}
194
374
  event=${node.event}
375
+ eventState=${eventState}
376
+ setEventState=${setEventState}
195
377
  style=${style}
196
378
  />`;
197
379
 
@@ -32,3 +32,13 @@ export class EventNode {
32
32
  this.depth = depth;
33
33
  }
34
34
  }
35
+
36
+ /**
37
+ * @typedef {Record<string, TranscriptEventState>} TranscriptState
38
+ */
39
+
40
+ /**
41
+ * @typedef {Object} TranscriptEventState
42
+ * @property {string} [selectedNav] - The selected nav for this event
43
+ * @property {boolean} [collapsed] - The collapse state for this event
44
+ */
@@ -15,11 +15,20 @@ import { formatDateTime } from "../../../utils/Format.mjs";
15
15
  * @param {Object} props - The properties passed to the component.
16
16
  * @param { string } props.id - The id of this event.
17
17
  * @param {import("../../../types/log").StateEvent } props.event - The event object to display.
18
+ * @param {import("./../Types.mjs").TranscriptEventState} props.eventState - The state for this event
19
+ * @param {(state: import("./../Types.mjs").TranscriptEventState) => void} props.setEventState - Update the state for this event
18
20
  * @param { boolean } props.isStore - Whether this event view is rendering a storage (rather than a state)
19
21
  * @param { Object } props.style - The style of this event.
20
22
  * @returns {import("preact").JSX.Element} The component.
21
23
  */
22
- export const StateEventView = ({ id, event, isStore, style }) => {
24
+ export const StateEventView = ({
25
+ id,
26
+ event,
27
+ eventState,
28
+ setEventState,
29
+ isStore,
30
+ style,
31
+ }) => {
23
32
  const summary = summarizeChanges(event.changes);
24
33
 
25
34
  // Synthesize objects for comparison
@@ -53,7 +62,22 @@ export const StateEventView = ({ id, event, isStore, style }) => {
53
62
  const title = event.event === "state" ? "State Updated" : "Store Updated";
54
63
 
55
64
  return html`
56
- <${EventPanel} id=${id} title="${title}" subTitle=${formatDateTime(new Date(event.timestamp))} text=${tabs.length === 1 ? summary : undefined} collapse=${changePreview === undefined ? true : undefined} style=${style}>
65
+ <${EventPanel}
66
+ id=${id}
67
+ title="${title}"
68
+ subTitle=${formatDateTime(new Date(event.timestamp))}
69
+ text=${tabs.length === 1 ? summary : undefined}
70
+ collapse=${changePreview === undefined ? true : undefined}
71
+ style=${style}
72
+ selectedNav=${eventState.selectedNav || ""}
73
+ onSelectedNav=${(selectedNav) => {
74
+ setEventState({ ...eventState, selectedNav });
75
+ }}
76
+ collapsed=${eventState.collapsed}
77
+ onCollapsed=${(collapsed) => {
78
+ setEventState({ ...eventState, collapsed });
79
+ }}
80
+ >
57
81
  ${tabs}
58
82
  </${EventPanel}>`;
59
83
  };
@@ -32,7 +32,6 @@ export type Limit = number | [unknown, unknown] | null;
32
32
  export type SampleId = string | number | (string | number)[] | null;
33
33
  export type Epochs = number | null;
34
34
  export type EpochsReducer = string[] | null;
35
- export type Trace = boolean | null;
36
35
  export type Name1 = string;
37
36
  export type Tools = string | string[];
38
37
  export type Approvers = ApproverPolicyConfig[];
@@ -77,6 +76,7 @@ export type NumChoices = number | null;
77
76
  export type Logprobs = boolean | null;
78
77
  export type TopLogprobs = number | null;
79
78
  export type ParallelToolCalls = boolean | null;
79
+ export type InternalTools = boolean | null;
80
80
  export type MaxToolOutput = number | null;
81
81
  export type CachePrompt = "auto" | boolean | null;
82
82
  export type ReasoningEffort = ("low" | "medium" | "high") | null;
@@ -112,35 +112,49 @@ export type Input =
112
112
  | ChatMessageAssistant
113
113
  | ChatMessageTool
114
114
  )[];
115
- export type Content = string | (ContentText | ContentImage)[];
115
+ export type Content =
116
+ | string
117
+ | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
116
118
  export type Type1 = "text";
117
119
  export type Text = string;
118
120
  export type Type2 = "image";
119
121
  export type Image = string;
120
122
  export type Detail = "auto" | "low" | "high";
123
+ export type Type3 = "audio";
124
+ export type Audio = string;
125
+ export type Format = "wav" | "mp3";
126
+ export type Type4 = "video";
127
+ export type Video = string;
128
+ export type Format1 = "mp4" | "mpeg" | "mov";
121
129
  export type Source = ("input" | "generate") | null;
122
130
  export type Role = "system";
123
- export type Content1 = string | (ContentText | ContentImage)[];
131
+ export type Content1 =
132
+ | string
133
+ | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
124
134
  export type Source1 = ("input" | "generate") | null;
125
135
  export type Role1 = "user";
126
136
  export type ToolCallId = string | null;
127
- export type Content2 = string | (ContentText | ContentImage)[];
137
+ export type Content2 =
138
+ | string
139
+ | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
128
140
  export type Source2 = ("input" | "generate") | null;
129
141
  export type Role2 = "assistant";
130
142
  export type ToolCalls = ToolCall[] | null;
131
143
  export type Id1 = string;
132
144
  export type Function = string;
133
- export type Type3 = "function";
145
+ export type Type5 = "function";
134
146
  export type ParseError = string | null;
135
147
  export type Title = string | null;
136
- export type Format = "text" | "markdown";
148
+ export type Format2 = "text" | "markdown";
137
149
  export type Content3 = string;
138
- export type Content4 = string | (ContentText | ContentImage)[];
150
+ export type Content4 =
151
+ | string
152
+ | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
139
153
  export type Source3 = ("input" | "generate") | null;
140
154
  export type Role3 = "tool";
141
155
  export type ToolCallId1 = string | null;
142
156
  export type Function1 = string | null;
143
- export type Type4 =
157
+ export type Type6 =
144
158
  | "parsing"
145
159
  | "timeout"
146
160
  | "unicode_decode"
@@ -218,7 +232,7 @@ export type JsonValue = unknown;
218
232
  export type Timestamp1 = string;
219
233
  export type Pending1 = boolean | null;
220
234
  export type Event1 = "sample_limit";
221
- export type Type5 = "message" | "time" | "token" | "operator";
235
+ export type Type7 = "message" | "time" | "token" | "operator" | "custom";
222
236
  export type Message2 = string;
223
237
  export type Limit1 = number | null;
224
238
  export type Timestamp2 = string;
@@ -244,8 +258,8 @@ export type Input2 = (
244
258
  )[];
245
259
  export type Name5 = string;
246
260
  export type Description = string;
247
- export type Type6 = "object";
248
- export type Type7 =
261
+ export type Type8 = "object";
262
+ export type Type9 =
249
263
  | ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
250
264
  | null;
251
265
  export type Description1 = string | null;
@@ -261,11 +275,12 @@ export type Additionalproperties1 = boolean;
261
275
  export type Tools1 = ToolInfo[];
262
276
  export type ToolChoice = ("auto" | "any" | "none") | ToolFunction;
263
277
  export type Name6 = string;
278
+ export type Error1 = string | null;
264
279
  export type Cache = ("read" | "write") | null;
265
280
  export type Timestamp5 = string;
266
281
  export type Pending5 = boolean | null;
267
282
  export type Event5 = "tool";
268
- export type Type8 = "function";
283
+ export type Type10 = "function";
269
284
  export type Id3 = string;
270
285
  export type Function2 = string;
271
286
  export type Result =
@@ -274,7 +289,9 @@ export type Result =
274
289
  | boolean
275
290
  | ContentText
276
291
  | ContentImage
277
- | (ContentText | ContentImage)[];
292
+ | ContentAudio
293
+ | ContentVideo
294
+ | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
278
295
  export type Truncated = [unknown, unknown] | null;
279
296
  export type Timestamp6 = string;
280
297
  export type Pending6 = boolean | null;
@@ -324,13 +341,13 @@ export type Timestamp12 = string;
324
341
  export type Pending12 = boolean | null;
325
342
  export type Event12 = "step";
326
343
  export type Action = "begin" | "end";
327
- export type Type9 = string | null;
344
+ export type Type11 = string | null;
328
345
  export type Name8 = string;
329
346
  export type Timestamp13 = string;
330
347
  export type Pending13 = boolean | null;
331
348
  export type Event13 = "subtask";
332
349
  export type Name9 = string;
333
- export type Type10 = string | null;
350
+ export type Type12 = string | null;
334
351
  export type Events2 = (
335
352
  | SampleInitEvent
336
353
  | SampleLimitEvent
@@ -379,7 +396,13 @@ export type Events = (
379
396
  | StepEvent
380
397
  | SubtaskEvent
381
398
  )[];
382
- export type Type11 = "context" | "time" | "message" | "token" | "operator";
399
+ export type Type13 =
400
+ | "context"
401
+ | "time"
402
+ | "message"
403
+ | "token"
404
+ | "operator"
405
+ | "custom";
383
406
  export type Limit2 = number;
384
407
  export type Reductions = EvalSampleReductions[] | null;
385
408
  export type Scorer1 = string;
@@ -448,7 +471,6 @@ export interface EvalConfig {
448
471
  sample_id: SampleId;
449
472
  epochs: Epochs;
450
473
  epochs_reducer: EpochsReducer;
451
- trace: Trace;
452
474
  approval: ApprovalPolicyConfig | null;
453
475
  fail_on_error: FailOnError;
454
476
  message_limit: MessageLimit;
@@ -531,6 +553,7 @@ export interface GenerateConfig {
531
553
  logprobs: Logprobs;
532
554
  top_logprobs: TopLogprobs;
533
555
  parallel_tool_calls: ParallelToolCalls;
556
+ internal_tools: InternalTools;
534
557
  max_tool_output: MaxToolOutput;
535
558
  cache_prompt: CachePrompt;
536
559
  reasoning_effort: ReasoningEffort;
@@ -614,6 +637,16 @@ export interface ContentImage {
614
637
  image: Image;
615
638
  detail: Detail;
616
639
  }
640
+ export interface ContentAudio {
641
+ type: Type3;
642
+ audio: Audio;
643
+ format: Format;
644
+ }
645
+ export interface ContentVideo {
646
+ type: Type4;
647
+ video: Video;
648
+ format: Format1;
649
+ }
617
650
  export interface ChatMessageUser {
618
651
  content: Content1;
619
652
  source: Source1;
@@ -630,7 +663,7 @@ export interface ToolCall {
630
663
  id: Id1;
631
664
  function: Function;
632
665
  arguments: Arguments;
633
- type: Type3;
666
+ type: Type5;
634
667
  parse_error: ParseError;
635
668
  view: ToolCallContent | null;
636
669
  }
@@ -640,7 +673,7 @@ export interface Arguments {}
640
673
  */
641
674
  export interface ToolCallContent {
642
675
  title: Title;
643
- format: Format;
676
+ format: Format2;
644
677
  content: Content3;
645
678
  }
646
679
  export interface ChatMessageTool {
@@ -652,7 +685,7 @@ export interface ChatMessageTool {
652
685
  error: ToolCallError | null;
653
686
  }
654
687
  export interface ToolCallError {
655
- type: Type4;
688
+ type: Type6;
656
689
  message: Message1;
657
690
  }
658
691
  export interface ModelOutput {
@@ -735,7 +768,7 @@ export interface SampleLimitEvent {
735
768
  timestamp: Timestamp1;
736
769
  pending: Pending1;
737
770
  event: Event1;
738
- type: Type5;
771
+ type: Type7;
739
772
  message: Message2;
740
773
  limit: Limit1;
741
774
  }
@@ -784,6 +817,7 @@ export interface ModelEvent {
784
817
  tool_choice: ToolChoice;
785
818
  config: GenerateConfig1;
786
819
  output: ModelOutput;
820
+ error: Error1;
787
821
  cache: Cache;
788
822
  call: ModelCall | null;
789
823
  }
@@ -822,7 +856,7 @@ export interface ToolInfo {
822
856
  * Description of tool parameters object in JSON Schema format.
823
857
  */
824
858
  export interface ToolParams {
825
- type: Type6;
859
+ type: Type8;
826
860
  properties: Properties;
827
861
  required: Required1;
828
862
  additionalProperties: Additionalproperties1;
@@ -834,7 +868,7 @@ export interface Properties {
834
868
  * Description of tool parameter in JSON Schema format.
835
869
  */
836
870
  export interface ToolParam {
837
- type: Type7;
871
+ type: Type9;
838
872
  description: Description1;
839
873
  default: Default;
840
874
  enum: Enum;
@@ -873,6 +907,7 @@ export interface GenerateConfig1 {
873
907
  logprobs: Logprobs;
874
908
  top_logprobs: TopLogprobs;
875
909
  parallel_tool_calls: ParallelToolCalls;
910
+ internal_tools: InternalTools;
876
911
  max_tool_output: MaxToolOutput;
877
912
  cache_prompt: CachePrompt;
878
913
  reasoning_effort: ReasoningEffort;
@@ -897,7 +932,7 @@ export interface ToolEvent {
897
932
  timestamp: Timestamp5;
898
933
  pending: Pending5;
899
934
  event: Event5;
900
- type: Type8;
935
+ type: Type10;
901
936
  id: Id3;
902
937
  function: Function2;
903
938
  arguments: Arguments1;
@@ -999,7 +1034,7 @@ export interface StepEvent {
999
1034
  pending: Pending12;
1000
1035
  event: Event12;
1001
1036
  action: Action;
1002
- type: Type9;
1037
+ type: Type11;
1003
1038
  name: Name8;
1004
1039
  }
1005
1040
  /**
@@ -1010,7 +1045,7 @@ export interface SubtaskEvent {
1010
1045
  pending: Pending13;
1011
1046
  event: Event13;
1012
1047
  name: Name9;
1013
- type: Type10;
1048
+ type: Type12;
1014
1049
  input: Input4;
1015
1050
  result: Result1;
1016
1051
  events: Events2;
@@ -1026,7 +1061,7 @@ export interface Attachments {
1026
1061
  [k: string]: string;
1027
1062
  }
1028
1063
  export interface EvalSampleLimit {
1029
- type: Type11;
1064
+ type: Type13;
1030
1065
  limit: Limit2;
1031
1066
  }
1032
1067
  export interface EvalSampleReductions {