inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. inspect_ai/_cli/common.py +3 -1
  2. inspect_ai/_cli/eval.py +15 -9
  3. inspect_ai/_display/core/active.py +4 -1
  4. inspect_ai/_display/core/config.py +3 -3
  5. inspect_ai/_display/core/panel.py +7 -3
  6. inspect_ai/_display/plain/__init__.py +0 -0
  7. inspect_ai/_display/plain/display.py +203 -0
  8. inspect_ai/_display/rich/display.py +0 -5
  9. inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
  10. inspect_ai/_display/textual/widgets/samples.py +79 -12
  11. inspect_ai/_display/textual/widgets/sandbox.py +37 -0
  12. inspect_ai/_eval/eval.py +10 -1
  13. inspect_ai/_eval/loader.py +79 -19
  14. inspect_ai/_eval/registry.py +6 -0
  15. inspect_ai/_eval/score.py +3 -1
  16. inspect_ai/_eval/task/results.py +51 -22
  17. inspect_ai/_eval/task/run.py +47 -13
  18. inspect_ai/_eval/task/sandbox.py +10 -5
  19. inspect_ai/_util/constants.py +1 -0
  20. inspect_ai/_util/port_names.py +61 -0
  21. inspect_ai/_util/text.py +23 -0
  22. inspect_ai/_view/www/App.css +31 -1
  23. inspect_ai/_view/www/dist/assets/index.css +31 -1
  24. inspect_ai/_view/www/dist/assets/index.js +25498 -2044
  25. inspect_ai/_view/www/log-schema.json +32 -2
  26. inspect_ai/_view/www/package.json +2 -0
  27. inspect_ai/_view/www/src/App.mjs +14 -16
  28. inspect_ai/_view/www/src/Types.mjs +1 -2
  29. inspect_ai/_view/www/src/api/Types.ts +133 -0
  30. inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
  31. inspect_ai/_view/www/src/api/api-http.ts +219 -0
  32. inspect_ai/_view/www/src/api/api-shared.ts +47 -0
  33. inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
  34. inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
  35. inspect_ai/_view/www/src/api/index.ts +51 -0
  36. inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
  37. inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
  38. inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
  39. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
  40. inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
  41. inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
  42. inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
  43. inspect_ai/_view/www/src/index.js +77 -4
  44. inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
  45. inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
  46. inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
  47. inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
  48. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
  49. inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
  50. inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
  51. inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
  52. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
  53. inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
  54. inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
  55. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
  56. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
  57. inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
  58. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
  59. inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
  60. inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
  61. inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
  62. inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
  63. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
  64. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
  65. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
  66. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
  67. inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
  68. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
  69. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
  70. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
  71. inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
  72. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
  73. inspect_ai/_view/www/src/types/log.d.ts +13 -2
  74. inspect_ai/_view/www/src/utils/Format.mjs +10 -3
  75. inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
  76. inspect_ai/_view/www/src/utils/vscode.ts +36 -0
  77. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
  78. inspect_ai/_view/www/vite.config.js +7 -0
  79. inspect_ai/_view/www/yarn.lock +116 -0
  80. inspect_ai/approval/_human/__init__.py +0 -0
  81. inspect_ai/approval/_human/manager.py +1 -1
  82. inspect_ai/approval/_policy.py +12 -6
  83. inspect_ai/log/_log.py +1 -1
  84. inspect_ai/log/_samples.py +16 -0
  85. inspect_ai/log/_transcript.py +4 -1
  86. inspect_ai/model/_call_tools.py +59 -0
  87. inspect_ai/model/_conversation.py +16 -7
  88. inspect_ai/model/_generate_config.py +12 -12
  89. inspect_ai/model/_model.py +117 -18
  90. inspect_ai/model/_model_output.py +22 -2
  91. inspect_ai/model/_openai.py +383 -0
  92. inspect_ai/model/_providers/anthropic.py +152 -55
  93. inspect_ai/model/_providers/azureai.py +21 -21
  94. inspect_ai/model/_providers/bedrock.py +37 -40
  95. inspect_ai/model/_providers/goodfire.py +248 -0
  96. inspect_ai/model/_providers/google.py +46 -54
  97. inspect_ai/model/_providers/groq.py +7 -3
  98. inspect_ai/model/_providers/hf.py +6 -0
  99. inspect_ai/model/_providers/mistral.py +13 -12
  100. inspect_ai/model/_providers/openai.py +51 -218
  101. inspect_ai/model/_providers/openai_o1.py +11 -12
  102. inspect_ai/model/_providers/providers.py +23 -1
  103. inspect_ai/model/_providers/together.py +12 -12
  104. inspect_ai/model/_providers/util/__init__.py +2 -3
  105. inspect_ai/model/_providers/util/hf_handler.py +1 -1
  106. inspect_ai/model/_providers/util/llama31.py +1 -1
  107. inspect_ai/model/_providers/util/util.py +0 -76
  108. inspect_ai/model/_providers/vertex.py +1 -4
  109. inspect_ai/scorer/_metric.py +3 -0
  110. inspect_ai/scorer/_reducer/reducer.py +1 -1
  111. inspect_ai/scorer/_scorer.py +4 -3
  112. inspect_ai/solver/__init__.py +4 -5
  113. inspect_ai/solver/_basic_agent.py +1 -1
  114. inspect_ai/solver/_bridge/__init__.py +3 -0
  115. inspect_ai/solver/_bridge/bridge.py +100 -0
  116. inspect_ai/solver/_bridge/patch.py +170 -0
  117. inspect_ai/solver/_prompt.py +35 -5
  118. inspect_ai/solver/_solver.py +6 -0
  119. inspect_ai/solver/_task_state.py +80 -38
  120. inspect_ai/tool/__init__.py +2 -0
  121. inspect_ai/tool/_tool.py +12 -1
  122. inspect_ai/tool/_tool_call.py +10 -0
  123. inspect_ai/tool/_tool_def.py +16 -5
  124. inspect_ai/tool/_tool_with.py +21 -4
  125. inspect_ai/tool/beta/__init__.py +5 -0
  126. inspect_ai/tool/beta/_computer/__init__.py +3 -0
  127. inspect_ai/tool/beta/_computer/_common.py +133 -0
  128. inspect_ai/tool/beta/_computer/_computer.py +155 -0
  129. inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
  130. inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
  131. inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
  132. inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
  133. inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
  134. inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
  135. inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
  136. inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
  137. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
  138. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
  139. inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
  140. inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
  141. inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
  142. inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
  143. inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
  144. inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
  145. inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
  146. inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
  147. inspect_ai/util/__init__.py +2 -0
  148. inspect_ai/util/_display.py +5 -0
  149. inspect_ai/util/_limit.py +26 -0
  150. inspect_ai/util/_sandbox/docker/docker.py +64 -1
  151. inspect_ai/util/_sandbox/docker/internal.py +3 -1
  152. inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
  153. inspect_ai/util/_sandbox/environment.py +14 -0
  154. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
  155. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
  156. inspect_ai/_view/www/src/api/Types.mjs +0 -117
  157. inspect_ai/_view/www/src/api/api-http.mjs +0 -300
  158. inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
  159. inspect_ai/_view/www/src/api/index.mjs +0 -49
  160. inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
  161. inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
  162. inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
  163. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
  164. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
  165. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
  166. {inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  // @ts-check
2
2
  import { html } from "htm/preact";
3
+ import { useCallback, useState } from "preact/hooks";
3
4
  import { SampleInitEventView } from "./SampleInitEventView.mjs";
4
5
  import { StateEventView } from "./state/StateEventView.mjs";
5
6
  import { StepEventView } from "./StepEventView.mjs";
@@ -15,6 +16,8 @@ import { ApprovalEventView } from "./ApprovalEventView.mjs";
15
16
  import { SampleLimitEventView } from "./SampleLimitEventView.mjs";
16
17
  import { FontSize } from "../../appearance/Fonts.mjs";
17
18
  import { EventNode } from "./Types.mjs";
19
+ // @ts-ignore
20
+ import { VirtualList } from "../../components/VirtualList.mjs";
18
21
 
19
22
  /**
20
23
  * Renders the TranscriptView component.
@@ -26,10 +29,61 @@ import { EventNode } from "./Types.mjs";
26
29
  * @returns {import("preact").JSX.Element} The TranscriptView component.
27
30
  */
28
31
  export const TranscriptView = ({ id, events, depth = 0 }) => {
32
+ const [transcriptState, setTranscriptState] = useState({});
33
+ const onTranscriptState = useCallback(
34
+ (state) => {
35
+ setTranscriptState(state);
36
+ },
37
+ [transcriptState, setTranscriptState],
38
+ );
39
+
29
40
  // Normalize Events themselves
30
41
  const resolvedEvents = fixupEventStream(events);
31
42
  const eventNodes = treeifyEvents(resolvedEvents, depth);
32
- return html` <${TranscriptComponent} id=${id} eventNodes=${eventNodes} /> `;
43
+ return html`
44
+ <${TranscriptComponent}
45
+ id=${id}
46
+ eventNodes=${eventNodes}
47
+ transcriptState=${transcriptState}
48
+ setTranscriptState=${onTranscriptState}
49
+ />
50
+ `;
51
+ };
52
+
53
+ /**
54
+ * Renders the Transcript component.
55
+ *
56
+ * @param {Object} props - The parameters for the component.
57
+ * @param {string} props.id - The identifier for this view
58
+ * @param {import("../../types/log").Events} props.events - The transcript events to display.
59
+ * @param {Object} props.style - The transcript style to display.
60
+ * @param {number} props.depth - The base depth for this transcript view
61
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
62
+ * @returns {import("preact").JSX.Element} The TranscriptView component.
63
+ */
64
+ export const TranscriptVirtualList = (props) => {
65
+ let { id, scrollRef, events, depth, style } = props;
66
+
67
+ // Normalize Events themselves
68
+ const resolvedEvents = fixupEventStream(events);
69
+ const eventNodes = treeifyEvents(resolvedEvents, depth);
70
+
71
+ const [transcriptState, setTranscriptState] = useState({});
72
+ const onTranscriptState = useCallback(
73
+ (state) => {
74
+ setTranscriptState(state);
75
+ },
76
+ [transcriptState, setTranscriptState],
77
+ );
78
+
79
+ return html`<${TranscriptVirtualListComponent}
80
+ id=${id}
81
+ eventNodes=${eventNodes}
82
+ style=${style}
83
+ scrollRef=${scrollRef}
84
+ transcriptState=${transcriptState}
85
+ setTranscriptState=${onTranscriptState}
86
+ />`;
33
87
  };
34
88
 
35
89
  /**
@@ -39,9 +93,81 @@ export const TranscriptView = ({ id, events, depth = 0 }) => {
39
93
  * @param {string} props.id - The identifier for this view
40
94
  * @param {EventNode[]} props.eventNodes - The transcript events nodes to display.
41
95
  * @param {Object} props.style - The transcript style to display.
96
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
97
+ * @param {import("./Types.mjs").TranscriptState} props.transcriptState - The state for this transcript
98
+ * @param {(state: import("./Types.mjs").TranscriptState) => void} props.setTranscriptState - Set the transcript state for this transcript
42
99
  * @returns {import("preact").JSX.Element} The TranscriptView component.
43
100
  */
44
- export const TranscriptComponent = ({ id, eventNodes, style }) => {
101
+ export const TranscriptVirtualListComponent = ({
102
+ id,
103
+ eventNodes,
104
+ style,
105
+ scrollRef,
106
+ transcriptState,
107
+ setTranscriptState,
108
+ }) => {
109
+ const renderRow = (item, index) => {
110
+ const toggleStyle = {};
111
+ if (item.depth % 2 == 0) {
112
+ toggleStyle.backgroundColor = "var(--bs-light-bg-subtle)";
113
+ } else {
114
+ toggleStyle.backgroundColor = "var(--bs-body-bg)";
115
+ }
116
+
117
+ let paddingTop = "0";
118
+ if (index === 0) {
119
+ paddingTop = ".5em";
120
+ }
121
+ const eventId = `${id}-event${index}`;
122
+ const setEventState = useCallback(
123
+ (state) => {
124
+ setTranscriptState({ ...transcriptState, [eventId]: state });
125
+ },
126
+ [setTranscriptState, transcriptState],
127
+ );
128
+
129
+ return html`<div style=${{ paddingTop, paddingBottom: ".5em" }}>
130
+ <${RenderedEventNode}
131
+ id=${eventId}
132
+ node=${item}
133
+ style=${{
134
+ ...toggleStyle,
135
+ ...style,
136
+ }}
137
+ scrollRef=${scrollRef}
138
+ eventState=${transcriptState[eventId] || {}}
139
+ setEventState=${setEventState}
140
+ />
141
+ </div>`;
142
+ };
143
+
144
+ return html`<${VirtualList}
145
+ data=${eventNodes}
146
+ tabIndex="0"
147
+ renderRow=${renderRow}
148
+ scrollRef=${scrollRef}
149
+ style=${{ width: "100%", marginTop: "1em" }}
150
+ />`;
151
+ };
152
+
153
+ /**
154
+ * Renders the Transcript component.
155
+ *
156
+ * @param {Object} props - The parameters for the component.
157
+ * @param {string} props.id - The identifier for this view
158
+ * @param {EventNode[]} props.eventNodes - The transcript events nodes to display.
159
+ * @param {Object} props.style - The transcript style to display.
160
+ * @param {import("./Types.mjs").TranscriptState} props.transcriptState - The state for this transcript
161
+ * @param {(state: import("./Types.mjs").TranscriptState) => void} props.setTranscriptState - Set the transcript state for this transcript
162
+ * @returns {import("preact").JSX.Element} The TranscriptView component.
163
+ */
164
+ export const TranscriptComponent = ({
165
+ id,
166
+ transcriptState,
167
+ setTranscriptState,
168
+ eventNodes,
169
+ style,
170
+ }) => {
45
171
  const rows = eventNodes.map((eventNode, index) => {
46
172
  const toggleStyle = {};
47
173
  if (eventNode.depth % 2 == 0) {
@@ -55,15 +181,32 @@ export const TranscriptComponent = ({ id, eventNodes, style }) => {
55
181
  toggleStyle.marginBottom = "1.5em";
56
182
  }
57
183
 
184
+ let paddingBottom = ".5em";
185
+ if (index === eventNodes.length - 1) {
186
+ paddingBottom = "0";
187
+ }
188
+
189
+ const eventId = `${id}-event${index}`;
190
+ const setEventState = useCallback(
191
+ (state) => {
192
+ setTranscriptState({ ...transcriptState, [eventId]: state });
193
+ },
194
+ [setTranscriptState, transcriptState],
195
+ );
196
+
58
197
  const row = html`
59
- <${RenderedEventNode}
60
- id=${`${id}-event${index}`}
61
- node=${eventNode}
62
- style=${{
63
- ...toggleStyle,
64
- ...style,
65
- }}
66
- />
198
+ <div style=${{ paddingBottom }}>
199
+ <${RenderedEventNode}
200
+ id=${eventId}
201
+ node=${eventNode}
202
+ style=${{
203
+ ...toggleStyle,
204
+ ...style,
205
+ }}
206
+ eventState=${transcriptState[eventId] || {}}
207
+ setEventState=${setEventState}
208
+ />
209
+ </div>
67
210
  `;
68
211
  return row;
69
212
  });
@@ -89,14 +232,26 @@ export const TranscriptComponent = ({ id, eventNodes, style }) => {
89
232
  * @param {string} props.id - The id for this event.
90
233
  * @param { EventNode } props.node - This event.
91
234
  * @param { Object } props.style - The style for this node.
235
+ * @param {import("htm/preact").MutableRef<HTMLElement>} props.scrollRef - The scrollable parent element
236
+ * @param {import("./Types.mjs").TranscriptEventState} props.eventState - The state for this event
237
+ * @param {(state: import("./Types.mjs").TranscriptEventState) => void} props.setEventState - Update the state for this event
92
238
  * @returns {import("preact").JSX.Element} The rendered event.
93
239
  */
94
- export const RenderedEventNode = ({ id, node, style }) => {
240
+ export const RenderedEventNode = ({
241
+ id,
242
+ node,
243
+ style,
244
+ scrollRef,
245
+ eventState,
246
+ setEventState,
247
+ }) => {
95
248
  switch (node.event.event) {
96
249
  case "sample_init":
97
250
  return html`<${SampleInitEventView}
98
251
  id=${id}
99
252
  event=${node.event}
253
+ eventState=${eventState}
254
+ setEventState=${setEventState}
100
255
  style=${style}
101
256
  />`;
102
257
 
@@ -104,6 +259,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
104
259
  return html`<${SampleLimitEventView}
105
260
  id=${id}
106
261
  event=${node.event}
262
+ eventState=${eventState}
263
+ setEventState=${setEventState}
107
264
  style=${style}
108
265
  />`;
109
266
 
@@ -111,6 +268,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
111
268
  return html`<${InfoEventView}
112
269
  id=${id}
113
270
  event=${node.event}
271
+ eventState=${eventState}
272
+ setEventState=${setEventState}
114
273
  style=${style}
115
274
  />`;
116
275
 
@@ -118,6 +277,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
118
277
  return html`<${LoggerEventView}
119
278
  id=${id}
120
279
  event=${node.event}
280
+ eventState=${eventState}
281
+ setEventState=${setEventState}
121
282
  style=${style}
122
283
  />`;
123
284
 
@@ -125,6 +286,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
125
286
  return html`<${ModelEventView}
126
287
  id=${id}
127
288
  event=${node.event}
289
+ eventState=${eventState}
290
+ setEventState=${setEventState}
128
291
  style=${style}
129
292
  />`;
130
293
 
@@ -132,6 +295,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
132
295
  return html`<${ScoreEventView}
133
296
  id=${id}
134
297
  event=${node.event}
298
+ eventState=${eventState}
299
+ setEventState=${setEventState}
135
300
  style=${style}
136
301
  />`;
137
302
 
@@ -139,6 +304,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
139
304
  return html`<${StateEventView}
140
305
  id=${id}
141
306
  event=${node.event}
307
+ eventState=${eventState}
308
+ setEventState=${setEventState}
142
309
  style=${style}
143
310
  />`;
144
311
 
@@ -146,14 +313,19 @@ export const RenderedEventNode = ({ id, node, style }) => {
146
313
  return html`<${StepEventView}
147
314
  id=${id}
148
315
  event=${node.event}
316
+ eventState=${eventState}
317
+ setEventState=${setEventState}
149
318
  children=${node.children}
150
319
  style=${style}
320
+ scrollRef=${scrollRef}
151
321
  />`;
152
322
 
153
323
  case "store":
154
324
  return html`<${StateEventView}
155
325
  id=${id}
156
326
  event=${node.event}
327
+ eventState=${eventState}
328
+ setEventState=${setEventState}
157
329
  style=${style}
158
330
  isStore=${true}
159
331
  />`;
@@ -162,6 +334,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
162
334
  return html`<${SubtaskEventView}
163
335
  id=${id}
164
336
  event=${node.event}
337
+ eventState=${eventState}
338
+ setEventState=${setEventState}
165
339
  style=${style}
166
340
  depth=${node.depth}
167
341
  />`;
@@ -170,6 +344,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
170
344
  return html`<${ToolEventView}
171
345
  id=${id}
172
346
  event=${node.event}
347
+ eventState=${eventState}
348
+ setEventState=${setEventState}
173
349
  style=${style}
174
350
  depth=${node.depth}
175
351
  />`;
@@ -178,6 +354,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
178
354
  return html`<${InputEventView}
179
355
  id=${id}
180
356
  event=${node.event}
357
+ eventState=${eventState}
358
+ setEventState=${setEventState}
181
359
  style=${style}
182
360
  />`;
183
361
 
@@ -185,6 +363,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
185
363
  return html`<${ErrorEventView}
186
364
  id=${id}
187
365
  event=${node.event}
366
+ eventState=${eventState}
367
+ setEventState=${setEventState}
188
368
  style=${style}
189
369
  />`;
190
370
 
@@ -192,6 +372,8 @@ export const RenderedEventNode = ({ id, node, style }) => {
192
372
  return html`<${ApprovalEventView}
193
373
  id=${id}
194
374
  event=${node.event}
375
+ eventState=${eventState}
376
+ setEventState=${setEventState}
195
377
  style=${style}
196
378
  />`;
197
379
 
@@ -32,3 +32,13 @@ export class EventNode {
32
32
  this.depth = depth;
33
33
  }
34
34
  }
35
+
36
+ /**
37
+ * @typedef {Record<string, TranscriptEventState>} TranscriptState
38
+ */
39
+
40
+ /**
41
+ * @typedef {Object} TranscriptEventState
42
+ * @property {string} [selectedNav] - The selected nav for this event
43
+ * @property {boolean} [collapsed] - The collapse state for this event
44
+ */
@@ -15,11 +15,20 @@ import { formatDateTime } from "../../../utils/Format.mjs";
15
15
  * @param {Object} props - The properties passed to the component.
16
16
  * @param { string } props.id - The id of this event.
17
17
  * @param {import("../../../types/log").StateEvent } props.event - The event object to display.
18
+ * @param {import("./../Types.mjs").TranscriptEventState} props.eventState - The state for this event
19
+ * @param {(state: import("./../Types.mjs").TranscriptEventState) => void} props.setEventState - Update the state for this event
18
20
  * @param { boolean } props.isStore - Whether this event view is rendering a storage (rather than a state)
19
21
  * @param { Object } props.style - The style of this event.
20
22
  * @returns {import("preact").JSX.Element} The component.
21
23
  */
22
- export const StateEventView = ({ id, event, isStore, style }) => {
24
+ export const StateEventView = ({
25
+ id,
26
+ event,
27
+ eventState,
28
+ setEventState,
29
+ isStore,
30
+ style,
31
+ }) => {
23
32
  const summary = summarizeChanges(event.changes);
24
33
 
25
34
  // Synthesize objects for comparison
@@ -53,7 +62,22 @@ export const StateEventView = ({ id, event, isStore, style }) => {
53
62
  const title = event.event === "state" ? "State Updated" : "Store Updated";
54
63
 
55
64
  return html`
56
- <${EventPanel} id=${id} title="${title}" subTitle=${formatDateTime(new Date(event.timestamp))} text=${tabs.length === 1 ? summary : undefined} collapse=${changePreview === undefined ? true : undefined} style=${style}>
65
+ <${EventPanel}
66
+ id=${id}
67
+ title="${title}"
68
+ subTitle=${formatDateTime(new Date(event.timestamp))}
69
+ text=${tabs.length === 1 ? summary : undefined}
70
+ collapse=${changePreview === undefined ? true : undefined}
71
+ style=${style}
72
+ selectedNav=${eventState.selectedNav || ""}
73
+ onSelectedNav=${(selectedNav) => {
74
+ setEventState({ ...eventState, selectedNav });
75
+ }}
76
+ collapsed=${eventState.collapsed}
77
+ onCollapsed=${(collapsed) => {
78
+ setEventState({ ...eventState, collapsed });
79
+ }}
80
+ >
57
81
  ${tabs}
58
82
  </${EventPanel}>`;
59
83
  };
@@ -76,6 +76,7 @@ export type NumChoices = number | null;
76
76
  export type Logprobs = boolean | null;
77
77
  export type TopLogprobs = number | null;
78
78
  export type ParallelToolCalls = boolean | null;
79
+ export type InternalTools = boolean | null;
79
80
  export type MaxToolOutput = number | null;
80
81
  export type CachePrompt = "auto" | boolean | null;
81
82
  export type ReasoningEffort = ("low" | "medium" | "high") | null;
@@ -231,7 +232,7 @@ export type JsonValue = unknown;
231
232
  export type Timestamp1 = string;
232
233
  export type Pending1 = boolean | null;
233
234
  export type Event1 = "sample_limit";
234
- export type Type7 = "message" | "time" | "token" | "operator";
235
+ export type Type7 = "message" | "time" | "token" | "operator" | "custom";
235
236
  export type Message2 = string;
236
237
  export type Limit1 = number | null;
237
238
  export type Timestamp2 = string;
@@ -274,6 +275,7 @@ export type Additionalproperties1 = boolean;
274
275
  export type Tools1 = ToolInfo[];
275
276
  export type ToolChoice = ("auto" | "any" | "none") | ToolFunction;
276
277
  export type Name6 = string;
278
+ export type Error1 = string | null;
277
279
  export type Cache = ("read" | "write") | null;
278
280
  export type Timestamp5 = string;
279
281
  export type Pending5 = boolean | null;
@@ -394,7 +396,13 @@ export type Events = (
394
396
  | StepEvent
395
397
  | SubtaskEvent
396
398
  )[];
397
- export type Type13 = "context" | "time" | "message" | "token" | "operator";
399
+ export type Type13 =
400
+ | "context"
401
+ | "time"
402
+ | "message"
403
+ | "token"
404
+ | "operator"
405
+ | "custom";
398
406
  export type Limit2 = number;
399
407
  export type Reductions = EvalSampleReductions[] | null;
400
408
  export type Scorer1 = string;
@@ -545,6 +553,7 @@ export interface GenerateConfig {
545
553
  logprobs: Logprobs;
546
554
  top_logprobs: TopLogprobs;
547
555
  parallel_tool_calls: ParallelToolCalls;
556
+ internal_tools: InternalTools;
548
557
  max_tool_output: MaxToolOutput;
549
558
  cache_prompt: CachePrompt;
550
559
  reasoning_effort: ReasoningEffort;
@@ -808,6 +817,7 @@ export interface ModelEvent {
808
817
  tool_choice: ToolChoice;
809
818
  config: GenerateConfig1;
810
819
  output: ModelOutput;
820
+ error: Error1;
811
821
  cache: Cache;
812
822
  call: ModelCall | null;
813
823
  }
@@ -897,6 +907,7 @@ export interface GenerateConfig1 {
897
907
  logprobs: Logprobs;
898
908
  top_logprobs: TopLogprobs;
899
909
  parallel_tool_calls: ParallelToolCalls;
910
+ internal_tools: InternalTools;
900
911
  max_tool_output: MaxToolOutput;
901
912
  cache_prompt: CachePrompt;
902
913
  reasoning_effort: ReasoningEffort;
@@ -126,10 +126,17 @@ export const formatTime = (seconds) => {
126
126
  return `${seconds} sec`;
127
127
  } else if (seconds < 60 * 60) {
128
128
  return `${Math.floor(seconds / 60)} min ${seconds % 60} sec`;
129
+ } else if (seconds < 60 * 60 * 24) {
130
+ const hours = Math.floor(seconds / (60 * 60));
131
+ const minutes = Math.floor((seconds % (60 * 60)) / 60);
132
+ const remainingSeconds = seconds % 60;
133
+ return `${hours} hr ${minutes} min ${remainingSeconds} sec`;
129
134
  } else {
130
- return `${Math.floor(seconds / (60 * 60 * 24))} days ${Math.floor(
131
- seconds / 60,
132
- )} min ${seconds % 60} sec`;
135
+ const days = Math.floor(seconds / (60 * 60 * 24));
136
+ const hours = Math.floor((seconds % (60 * 60 * 24)) / (60 * 60));
137
+ const minutes = Math.floor((seconds % (60 * 60)) / 60);
138
+ const remainingSeconds = seconds % 60;
139
+ return `${days} days ${hours} hr ${minutes} min ${remainingSeconds} sec`;
133
140
  }
134
141
  };
135
142
 
@@ -1,6 +1,6 @@
1
- // @ts-check
2
-
3
- export const asyncJsonParse = async (text) => {
1
+ export const asyncJsonParse = async (text: string): Promise<any> => {
2
+ const encoder = new TextEncoder();
3
+ const encodedText = encoder.encode(text);
4
4
  const blob = new Blob([kWorkerCode], { type: "application/javascript" });
5
5
  const blobURL = URL.createObjectURL(blob);
6
6
  const worker = new Worker(blobURL);
@@ -17,7 +17,9 @@ export const asyncJsonParse = async (text) => {
17
17
  reject(new Error(error.message));
18
18
  };
19
19
  });
20
- worker.postMessage({ scriptContent: kJson5ScriptBase64, text });
20
+ worker.postMessage({ scriptContent: kJson5ScriptBase64, encodedText }, [
21
+ encodedText.buffer,
22
+ ]);
21
23
  return await result;
22
24
  } finally {
23
25
  worker.terminate();
@@ -28,12 +30,14 @@ export const asyncJsonParse = async (text) => {
28
30
  const kWorkerCode = `
29
31
  self.onmessage = function (e) {
30
32
  eval(atob(e.data.scriptContent));
31
- const text = e.data.text;
33
+ const { encodedText } = e.data;
34
+ const decoder = new TextDecoder();
35
+ const text = decoder.decode(encodedText);
32
36
  try {
33
- const result = JSON5.parse(text);
34
- self.postMessage({ success: true, result });
35
- } catch (error) {
36
- self.postMessage({ success: false, error: error.message });
37
+ const result = JSON.parse(text);
38
+ postMessage({ success: true, result });
39
+ } catch (err) {
40
+ postMessage({ success: false, error: err.message });
37
41
  }
38
42
  };`;
39
43
 
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Type definition for the VS Code API object
3
+ * Note: This is a minimal definition - expand based on your needs
4
+ */
5
+ interface VSCodeApi {
6
+ postMessage(message: unknown): void;
7
+ getState(): unknown;
8
+ setState(state: unknown): void;
9
+ }
10
+
11
+ /**
12
+ * The cached instance of the VS Code API
13
+ */
14
+ let vscodeApi: VSCodeApi | undefined;
15
+
16
+ // Declare the acquireVsCodeApi function on the window object
17
+ declare global {
18
+ interface Window {
19
+ acquireVsCodeApi?: () => VSCodeApi;
20
+ }
21
+ }
22
+
23
+ /**
24
+ * Gets or initializes the VS Code API instance
25
+ * @returns {VSCodeApi | undefined} The VS Code API instance if in VS Code environment, undefined otherwise
26
+ */
27
+ export const getVscodeApi = (): VSCodeApi | undefined => {
28
+ if (window.acquireVsCodeApi) {
29
+ if (vscodeApi === undefined) {
30
+ vscodeApi = window.acquireVsCodeApi();
31
+ }
32
+ return vscodeApi;
33
+ } else {
34
+ return undefined;
35
+ }
36
+ };
@@ -43,7 +43,7 @@ import { debounce } from "../utils/sync.mjs";
43
43
  * @param {import("../types/log").EvalStats} [props.evalStats] - The EvalStats for this eval
44
44
  * @param {import("../types/log").EvalResults} [props.evalResults] - The EvalResults for this eval
45
45
  * @param {import("../Types.mjs").CurrentLog} [props.log] - the current log
46
- * @param {import("../api/Types.mjs").SampleSummary[]} [props.samples] - the samples
46
+ * @param {import("../api/Types.ts").SampleSummary[]} [props.samples] - the samples
47
47
  * @param {import("../Types.mjs").SampleMode} props.sampleMode - the mode for displaying samples
48
48
  * @param {string} props.groupBy - what to group by
49
49
  * @param {string} props.groupByOrder - the grouping order
@@ -142,6 +142,8 @@ export const WorkSpace = ({
142
142
  }
143
143
  }, [divRef, task_id]);
144
144
 
145
+ const sampleTabScrollRef = useRef(/** @type {HTMLElement|null} */ (null));
146
+
145
147
  const resolvedTabs = useMemo(() => {
146
148
  // Tabs that are available within the app
147
149
  // Include the tab contents as well as any tools that the tab provides
@@ -154,6 +156,7 @@ export const WorkSpace = ({
154
156
  resolvedTabs.samples = {
155
157
  id: kEvalWorkspaceTabId,
156
158
  scrollable: samples.length === 1,
159
+ scrollRef: sampleTabScrollRef,
157
160
  label: samples?.length > 1 ? "Samples" : "Sample",
158
161
  content: () => {
159
162
  return html` <${SamplesTab}
@@ -178,6 +181,7 @@ export const WorkSpace = ({
178
181
  epoch=${epoch}
179
182
  sampleScrollPositionRef=${sampleScrollPositionRef}
180
183
  setSampleScrollPosition=${setSampleScrollPosition}
184
+ sampleTabScrollRef=${sampleTabScrollRef}
181
185
  />`;
182
186
  },
183
187
  tools: () => {
@@ -368,6 +372,7 @@ export const WorkSpace = ({
368
372
  evalResults=${evalResults}
369
373
  evalStats=${evalStats}
370
374
  samples=${samples}
375
+ evalDescriptor=${samplesDescriptor.evalDescriptor}
371
376
  status=${evalStatus}
372
377
  tabs=${resolvedTabs}
373
378
  selectedTab=${selectedTab}
@@ -386,6 +391,7 @@ const WorkspaceDisplay = ({
386
391
  evalResults,
387
392
  evalStats,
388
393
  samples,
394
+ evalDescriptor,
389
395
  status,
390
396
  showToggle,
391
397
  selectedTab,
@@ -442,6 +448,7 @@ const WorkspaceDisplay = ({
442
448
  onSelected=${onSelected}
443
449
  selected=${selectedTab === tab.id}
444
450
  scrollable=${!!tab.scrollable}
451
+ scrollRef=${tab.scrollRef}
445
452
  scrollPosition=${workspaceTabScrollPositionRef.current[tab.id]}
446
453
  setScrollPosition=${useCallback(
447
454
  (position) => {
@@ -456,20 +463,19 @@ const WorkspaceDisplay = ({
456
463
  }, [tabs]);
457
464
 
458
465
  return html`
459
-
460
-
461
466
  <${Navbar}
462
467
  evalSpec=${evalSpec}
463
468
  evalPlan=${evalPlan}
464
469
  evalResults=${evalResults}
465
470
  evalStats=${evalStats}
466
471
  samples=${samples}
472
+ evalDescriptor=${evalDescriptor}
467
473
  status=${status}
468
474
  file=${logFileName}
469
475
  showToggle=${showToggle}
470
-
476
+
471
477
  offcanvas=${offcanvas}
472
- />
478
+ />
473
479
  <div ref=${divRef} class="workspace" style=${{
474
480
  paddingTop: "0rem",
475
481
  overflowY: "hidden",
@@ -26,4 +26,11 @@ export default defineConfig({
26
26
  css: true,
27
27
  }),
28
28
  ],
29
+ resolve: {
30
+ alias: {
31
+ 'react': 'preact/compat',
32
+ 'react-dom': 'preact/compat',
33
+ 'react-dom/test-utils': 'preact/test-utils'
34
+ }
35
+ }
29
36
  });