inspect-ai 0.3.92__py3-none-any.whl → 0.3.94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. inspect_ai/_cli/eval.py +27 -0
  2. inspect_ai/_display/textual/widgets/samples.py +3 -3
  3. inspect_ai/_display/textual/widgets/transcript.py +3 -29
  4. inspect_ai/_eval/eval.py +19 -2
  5. inspect_ai/_eval/evalset.py +4 -1
  6. inspect_ai/_eval/run.py +41 -0
  7. inspect_ai/_eval/task/generate.py +38 -44
  8. inspect_ai/_eval/task/log.py +26 -28
  9. inspect_ai/_eval/task/run.py +23 -27
  10. inspect_ai/_util/answer.py +26 -0
  11. inspect_ai/_util/constants.py +0 -1
  12. inspect_ai/_util/local_server.py +398 -0
  13. inspect_ai/_util/working.py +10 -4
  14. inspect_ai/_view/www/dist/assets/index.css +173 -159
  15. inspect_ai/_view/www/dist/assets/index.js +1417 -1142
  16. inspect_ai/_view/www/log-schema.json +379 -3
  17. inspect_ai/_view/www/package.json +1 -1
  18. inspect_ai/_view/www/src/@types/log.d.ts +93 -14
  19. inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
  20. inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
  21. inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
  22. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
  23. inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
  24. inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
  25. inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
  26. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
  27. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
  28. inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
  29. inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
  30. inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
  31. inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
  32. inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
  33. inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
  34. inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
  35. inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
  36. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
  37. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
  38. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
  39. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
  40. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
  41. inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
  42. inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
  43. inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
  44. inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
  45. inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
  46. inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
  47. inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
  48. inspect_ai/_view/www/src/components/Card.css +0 -1
  49. inspect_ai/_view/www/src/constants.ts +2 -0
  50. inspect_ai/_view/www/src/utils/numeric.ts +17 -0
  51. inspect_ai/agent/_agent.py +3 -3
  52. inspect_ai/agent/_as_solver.py +22 -12
  53. inspect_ai/agent/_as_tool.py +20 -6
  54. inspect_ai/agent/_handoff.py +12 -1
  55. inspect_ai/agent/_react.py +4 -3
  56. inspect_ai/agent/_run.py +16 -3
  57. inspect_ai/agent/_types.py +9 -0
  58. inspect_ai/dataset/_dataset.py +6 -3
  59. inspect_ai/log/__init__.py +14 -0
  60. inspect_ai/log/_convert.py +4 -9
  61. inspect_ai/log/_file.py +56 -0
  62. inspect_ai/log/_log.py +99 -0
  63. inspect_ai/log/_recorders/__init__.py +2 -0
  64. inspect_ai/log/_recorders/buffer/database.py +12 -11
  65. inspect_ai/log/_recorders/buffer/filestore.py +2 -2
  66. inspect_ai/log/_recorders/buffer/types.py +2 -2
  67. inspect_ai/log/_recorders/eval.py +20 -65
  68. inspect_ai/log/_recorders/file.py +28 -6
  69. inspect_ai/log/_recorders/recorder.py +7 -0
  70. inspect_ai/log/_recorders/types.py +1 -23
  71. inspect_ai/log/_samples.py +14 -25
  72. inspect_ai/log/_transcript.py +84 -36
  73. inspect_ai/log/_tree.py +118 -0
  74. inspect_ai/log/_util.py +52 -0
  75. inspect_ai/model/__init__.py +5 -1
  76. inspect_ai/model/_call_tools.py +72 -44
  77. inspect_ai/model/_generate_config.py +14 -8
  78. inspect_ai/model/_model.py +66 -88
  79. inspect_ai/model/_model_output.py +25 -0
  80. inspect_ai/model/_openai.py +2 -0
  81. inspect_ai/model/_providers/anthropic.py +13 -23
  82. inspect_ai/model/_providers/hf.py +27 -1
  83. inspect_ai/model/_providers/openai_o1.py +8 -2
  84. inspect_ai/model/_providers/providers.py +18 -4
  85. inspect_ai/model/_providers/sglang.py +247 -0
  86. inspect_ai/model/_providers/vllm.py +211 -400
  87. inspect_ai/scorer/_choice.py +1 -2
  88. inspect_ai/solver/__init__.py +7 -2
  89. inspect_ai/solver/_basic_agent.py +3 -10
  90. inspect_ai/solver/_chain.py +1 -1
  91. inspect_ai/solver/_fork.py +1 -1
  92. inspect_ai/solver/_multiple_choice.py +5 -22
  93. inspect_ai/solver/_plan.py +2 -2
  94. inspect_ai/solver/_task_state.py +26 -88
  95. inspect_ai/solver/_transcript.py +6 -7
  96. inspect_ai/tool/_json_rpc_helpers.py +45 -17
  97. inspect_ai/tool/_mcp/_mcp.py +8 -5
  98. inspect_ai/tool/_mcp/_sandbox.py +8 -2
  99. inspect_ai/tool/_mcp/server.py +3 -1
  100. inspect_ai/tool/_tool_call.py +4 -1
  101. inspect_ai/tool/_tool_support_helpers.py +51 -12
  102. inspect_ai/tool/_tools/_bash_session.py +190 -68
  103. inspect_ai/tool/_tools/_computer/_computer.py +25 -1
  104. inspect_ai/tool/_tools/_execute.py +4 -1
  105. inspect_ai/tool/_tools/_text_editor.py +4 -3
  106. inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
  107. inspect_ai/util/__init__.py +16 -0
  108. inspect_ai/util/_anyio.py +11 -0
  109. inspect_ai/util/_collect.py +50 -0
  110. inspect_ai/util/_limit.py +393 -0
  111. inspect_ai/util/_limited_conversation.py +57 -0
  112. inspect_ai/util/_span.py +58 -0
  113. inspect_ai/util/_subtask.py +27 -42
  114. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/METADATA +1 -1
  115. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/RECORD +120 -134
  116. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/WHEEL +1 -1
  117. inspect_ai/_display/core/group.py +0 -79
  118. inspect_ai/solver/_limit.py +0 -39
  119. inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
  120. inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
  121. inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
  122. inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
  123. inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
  124. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
  125. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
  126. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  127. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
  128. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
  129. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
  130. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
  131. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
  132. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
  133. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
  134. inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
  135. inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
  136. inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
  137. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
  138. inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
  139. inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
  140. inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
  141. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
  142. inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
  143. inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
  144. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  145. inspect_ai/tool/_tools/_computer/test_args.py +0 -151
  146. /inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
  147. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/entry_points.txt +0 -0
  148. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/licenses/LICENSE +0 -0
  149. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,12 @@
1
1
  // This is a special name that signals a group of sandbox events.
2
2
 
3
- import { Events, StepEvent } from "../../../../@types/log";
3
+ import {
4
+ Events,
5
+ SpanBeginEvent,
6
+ SpanEndEvent,
7
+ StepEvent,
8
+ } from "../../../../@types/log";
9
+ import { hasSpans } from "./utils";
4
10
 
5
11
  // It will be caught elsewhere and rendered with a pretty name
6
12
  export const kSandboxSignalName = "53787D8A-D3FC-426D-B383-9F880B70E4AA";
@@ -54,39 +60,55 @@ const processPendingEvents = (events: Events, filter: boolean): Events => {
54
60
  };
55
61
 
56
62
  const collapseSampleInit = (events: Events): Events => {
57
- // See if the events have an init step
63
+ // Don't performance sample init logic if spans are present
64
+ const hasSpans = events.some((e) => {
65
+ return e.event === "span_begin" || e.event === "span_end";
66
+ });
67
+ if (hasSpans) {
68
+ return events;
69
+ }
70
+
71
+ // Don't synthesize a sample init step if one already exists
58
72
  const hasInitStep =
59
73
  events.findIndex((e) => {
60
74
  return e.event === "step" && e.name === "init";
61
75
  }) !== -1;
76
+ if (hasInitStep) {
77
+ return events;
78
+ }
62
79
 
80
+ // Find a sample init event
63
81
  const initEventIndex = events.findIndex((e) => {
64
82
  return e.event === "sample_init";
65
83
  });
66
84
  const initEvent = events[initEventIndex];
85
+ if (!initEvent) {
86
+ return events;
87
+ }
67
88
 
89
+ // Splice in sample init step if needed
68
90
  const fixedUp = [...events];
69
- if (!hasInitStep && initEvent) {
70
- fixedUp.splice(initEventIndex, 0, {
71
- timestamp: initEvent.timestamp,
72
- event: "step",
73
- action: "begin",
74
- type: null,
75
- name: "sample_init",
76
- pending: false,
77
- working_start: 0,
78
- });
79
-
80
- fixedUp.splice(initEventIndex + 2, 0, {
81
- timestamp: initEvent.timestamp,
82
- event: "step",
83
- action: "end",
84
- type: null,
85
- name: "sample_init",
86
- pending: false,
87
- working_start: 0,
88
- });
89
- }
91
+ fixedUp.splice(initEventIndex, 0, {
92
+ timestamp: initEvent.timestamp,
93
+ event: "step",
94
+ action: "begin",
95
+ type: null,
96
+ name: "sample_init",
97
+ pending: false,
98
+ working_start: 0,
99
+ span_id: initEvent.span_id,
100
+ });
101
+
102
+ fixedUp.splice(initEventIndex + 2, 0, {
103
+ timestamp: initEvent.timestamp,
104
+ event: "step",
105
+ action: "end",
106
+ type: null,
107
+ name: "sample_init",
108
+ pending: false,
109
+ working_start: 0,
110
+ span_id: initEvent.span_id,
111
+ });
90
112
  return fixedUp;
91
113
  };
92
114
 
@@ -94,12 +116,22 @@ const groupSandboxEvents = (events: Events): Events => {
94
116
  const result: Events = [];
95
117
  const pendingSandboxEvents: Events = [];
96
118
 
119
+ const useSpans = hasSpans(events);
120
+
97
121
  const pushPendingSandboxEvents = () => {
98
122
  const timestamp =
99
123
  pendingSandboxEvents[pendingSandboxEvents.length - 1].timestamp;
100
- result.push(createStepEvent(kSandboxSignalName, timestamp, "begin"));
124
+ if (useSpans) {
125
+ result.push(createSpanBegin(kSandboxSignalName, timestamp, null));
126
+ } else {
127
+ result.push(createStepEvent(kSandboxSignalName, timestamp, "begin"));
128
+ }
101
129
  result.push(...pendingSandboxEvents);
102
- result.push(createStepEvent(kSandboxSignalName, timestamp, "end"));
130
+ if (useSpans) {
131
+ result.push(createSpanEnd(kSandboxSignalName, timestamp));
132
+ } else {
133
+ result.push(createStepEvent(kSandboxSignalName, timestamp, "end"));
134
+ }
103
135
  pendingSandboxEvents.length = 0;
104
136
  };
105
137
 
@@ -139,4 +171,34 @@ const createStepEvent = (
139
171
  name,
140
172
  pending: false,
141
173
  working_start: 0,
174
+ span_id: null,
142
175
  });
176
+
177
+ const createSpanBegin = (
178
+ name: string,
179
+ timestamp: string,
180
+ parent_id: string | null,
181
+ ): SpanBeginEvent => {
182
+ return {
183
+ name,
184
+ id: `${name}-begin`,
185
+ span_id: name,
186
+ parent_id,
187
+ timestamp,
188
+ event: "span_begin",
189
+ type: null,
190
+ pending: false,
191
+ working_start: 0,
192
+ };
193
+ };
194
+
195
+ const createSpanEnd = (name: string, timestamp: string): SpanEndEvent => {
196
+ return {
197
+ id: `${name}-end`,
198
+ timestamp,
199
+ event: "span_end",
200
+ pending: false,
201
+ working_start: 0,
202
+ span_id: name,
203
+ };
204
+ };
@@ -1,14 +1,28 @@
1
1
  import { Events } from "../../../../@types/log";
2
2
  import { EventNode, EventType } from "../types";
3
+ import {
4
+ ACTION_BEGIN,
5
+ ET_SPAN_BEGIN,
6
+ ET_SPAN_END,
7
+ ET_STEP,
8
+ hasSpans,
9
+ } from "./utils";
10
+
11
+ type TreeifyFunction = (
12
+ event: EventType,
13
+ addNode: (event: EventType) => EventNode,
14
+ pushStack: (node: EventNode) => void,
15
+ popStack: () => void,
16
+ ) => void;
3
17
 
4
- /**
5
- * Gathers events into a hierarchy of EventNodes.
6
- */
7
18
  export function treeifyEvents(events: Events, depth: number): EventNode[] {
19
+ const useSpans = hasSpans(events);
20
+ const treeFn = useSpans ? treeifyFnSpan : treeifyFnStep;
21
+
8
22
  const rootNodes: EventNode[] = [];
9
23
  const stack: EventNode[] = [];
10
24
 
11
- const pushNode = (event: EventType): EventNode => {
25
+ const addNode = (event: EventType): EventNode => {
12
26
  const node = new EventNode(event, stack.length + depth);
13
27
  if (stack.length > 0) {
14
28
  const parentNode = stack[stack.length - 1];
@@ -19,21 +33,219 @@ export function treeifyEvents(events: Events, depth: number): EventNode[] {
19
33
  return node;
20
34
  };
21
35
 
36
+ const pushStack = (node: EventNode): void => {
37
+ stack.push(node);
38
+ };
39
+
40
+ const popStack = (): void => {
41
+ if (stack.length > 0) {
42
+ stack.pop();
43
+ }
44
+ };
45
+
22
46
  events.forEach((event) => {
23
- if (event.event === "step" && event.action === "begin") {
24
- // Starting a new step
25
- const node = pushNode(event);
26
- stack.push(node);
27
- } else if (event.event === "step" && event.action === "end") {
28
- // An ending step
29
- if (stack.length > 0) {
30
- stack.pop();
47
+ treeFn(event, addNode, pushStack, popStack);
48
+ });
49
+
50
+ if (useSpans) {
51
+ return transformTree(rootNodes);
52
+ } else {
53
+ return rootNodes;
54
+ }
55
+ }
56
+
57
+ const treeifyFnStep: TreeifyFunction = (
58
+ event: EventType,
59
+ addNode: (event: EventType) => EventNode,
60
+ pushStack: (node: EventNode) => void,
61
+ popStack: () => void,
62
+ ): void => {
63
+ switch (event.event) {
64
+ case ET_STEP:
65
+ if (event.action === ACTION_BEGIN) {
66
+ // Starting a new step
67
+ const node = addNode(event);
68
+ pushStack(node);
69
+ } else {
70
+ // An ending step
71
+ popStack();
31
72
  }
32
- } else {
73
+ break;
74
+ case ET_SPAN_BEGIN: {
75
+ // These shoudn't be here, but throw away
76
+ break;
77
+ }
78
+ case ET_SPAN_END: {
79
+ // These shoudn't be here, but throw away
80
+ break;
81
+ }
82
+ default:
33
83
  // An event
34
- pushNode(event);
84
+ addNode(event);
85
+ break;
86
+ }
87
+ };
88
+
89
+ const treeifyFnSpan: TreeifyFunction = (
90
+ event: EventType,
91
+ addNode: (event: EventType) => EventNode,
92
+ pushStack: (node: EventNode) => void,
93
+ popStack: () => void,
94
+ ): void => {
95
+ switch (event.event) {
96
+ case ET_STEP:
97
+ // strip steps
98
+ break;
99
+ case ET_SPAN_BEGIN: {
100
+ const node = addNode(event);
101
+ pushStack(node);
102
+ break;
35
103
  }
36
- });
104
+ case ET_SPAN_END: {
105
+ popStack();
106
+ break;
107
+ }
108
+ default:
109
+ // An event
110
+ addNode(event);
111
+ break;
112
+ }
113
+ };
37
114
 
38
- return rootNodes;
39
- }
115
+ type TreeNodeTransformer = {
116
+ name: string;
117
+ matches: (node: EventNode) => boolean;
118
+ process: (node: EventNode) => EventNode;
119
+ };
120
+
121
+ const treeNodeTransformers: TreeNodeTransformer[] = [
122
+ {
123
+ name: "unwrap_tools",
124
+ matches: (node) =>
125
+ node.event.event === "span_begin" && node.event.type === "tool",
126
+ process: (node) => elevateChildNode(node, "tool") || node,
127
+ },
128
+ {
129
+ name: "unwrap_subtasks",
130
+ matches: (node) =>
131
+ node.event.event === "span_begin" && node.event.type === "subtask",
132
+ process: (node) => elevateChildNode(node, "subtask") || node,
133
+ },
134
+ {
135
+ name: "unwrap_agent_solver",
136
+ matches: (node) =>
137
+ node.event.event === "span_begin" &&
138
+ node.event["type"] === "solver" &&
139
+ node.children.length === 2 &&
140
+ node.children[0].event.event === "span_begin" &&
141
+ node.children[0].event.type === "agent" &&
142
+ node.children[1].event.event === "state",
143
+
144
+ process: (node) => skipFirstChildNode(node),
145
+ },
146
+ {
147
+ name: "unwrap_agent_solver w/store",
148
+ matches: (node) =>
149
+ node.event.event === "span_begin" &&
150
+ node.event["type"] === "solver" &&
151
+ node.children.length === 3 &&
152
+ node.children[0].event.event === "span_begin" &&
153
+ node.children[0].event.type === "agent" &&
154
+ node.children[1].event.event === "state" &&
155
+ node.children[2].event.event === "store",
156
+ process: (node) => skipFirstChildNode(node),
157
+ },
158
+ {
159
+ name: "unwrap_handoff",
160
+ matches: (node) =>
161
+ node.event.event === "span_begin" &&
162
+ node.event["type"] === "handoff" &&
163
+ node.children.length === 2 &&
164
+ node.children[0].event.event === "tool" &&
165
+ node.children[1].event.event === "store" &&
166
+ node.children[0].children.length === 2 &&
167
+ node.children[0].children[0].event.event === "span_begin" &&
168
+ node.children[0].children[0].event.type === "agent",
169
+ process: (node) => skipThisNode(node),
170
+ },
171
+ ];
172
+
173
+ const transformTree = (roots: EventNode[]): EventNode[] => {
174
+ const visitNode = (node: EventNode): EventNode => {
175
+ let processedNode = node;
176
+
177
+ // Visit children (depth first)
178
+ processedNode.children = processedNode.children.map(visitNode);
179
+
180
+ // Apply any visitors to this node
181
+ for (const transformer of treeNodeTransformers) {
182
+ if (transformer.matches(processedNode)) {
183
+ processedNode = transformer.process(processedNode);
184
+ // Only apply the first matching transformer
185
+ break;
186
+ }
187
+ }
188
+ return processedNode;
189
+ };
190
+
191
+ return roots.map(visitNode);
192
+ };
193
+
194
+ /**
195
+ * Process a span node by elevating a specific child node type and moving its siblings as children
196
+ * @template T - Type of the event (either ToolEvent or SubtaskEvent)
197
+ */
198
+ const elevateChildNode = (
199
+ node: EventNode,
200
+ childEventType: "tool" | "subtask",
201
+ ): EventNode | null => {
202
+ // Find the specific event child
203
+ const targetIndex = node.children.findIndex(
204
+ (child) => child.event.event === childEventType,
205
+ );
206
+
207
+ if (targetIndex === -1) {
208
+ console.log(
209
+ `No ${childEventType} event found in a span, this is very unexpected.`,
210
+ );
211
+ return null;
212
+ }
213
+
214
+ // Get the target node and set its depth
215
+ const targetNode = { ...node.children[targetIndex] };
216
+ const remainingChildren = node.children.filter((_, i) => i !== targetIndex);
217
+
218
+ // Process the remaining children
219
+ targetNode.depth = node.depth;
220
+ targetNode.children = reduceDepth(remainingChildren);
221
+
222
+ // No need to update the event itself (events have been deprecated
223
+ // and more importantly we drive children / transcripts using the tree structure itself
224
+ // and notes rather than the event.events itself)
225
+ return targetNode;
226
+ };
227
+
228
+ const skipFirstChildNode = (node: EventNode): EventNode => {
229
+ const agentSpan = node.children.splice(0, 1)[0];
230
+ node.children.unshift(...reduceDepth(agentSpan.children));
231
+ return node;
232
+ };
233
+
234
+ const skipThisNode = (node: EventNode): EventNode => {
235
+ const newNode = { ...node.children[0] };
236
+ newNode.depth = node.depth;
237
+ newNode.children = reduceDepth(newNode.children[0].children, 2);
238
+ return newNode;
239
+ };
240
+
241
+ // Reduce the depth of the children by 1
242
+ // This is used when we hoist a child node to the parent
243
+ const reduceDepth = (nodes: EventNode[], depth: number = 1): EventNode[] => {
244
+ return nodes.map((node) => {
245
+ if (node.children.length > 0) {
246
+ node.children = reduceDepth(node.children, 1);
247
+ }
248
+ node.depth = node.depth - depth;
249
+ return node;
250
+ });
251
+ };
@@ -0,0 +1,11 @@
1
+ import { Events } from "../../../../@types/log";
2
+
3
+ export const ET_STEP = "step";
4
+ export const ACTION_BEGIN = "begin";
5
+
6
+ export const ET_SPAN_BEGIN = "span_begin";
7
+ export const ET_SPAN_END = "span_end";
8
+
9
+ export const hasSpans = (events: Events): boolean => {
10
+ return events.some((event) => event.event === ET_SPAN_BEGIN);
11
+ };
@@ -10,6 +10,8 @@ import {
10
10
  SampleLimitEvent,
11
11
  SandboxEvent,
12
12
  ScoreEvent,
13
+ SpanBeginEvent,
14
+ SpanEndEvent,
13
15
  StateEvent,
14
16
  StepEvent,
15
17
  StoreEvent,
@@ -39,7 +41,9 @@ export type EventType =
39
41
  | InputEvent
40
42
  | ErrorEvent
41
43
  | ApprovalEvent
42
- | SandboxEvent;
44
+ | SandboxEvent
45
+ | SpanBeginEvent
46
+ | SpanEndEvent;
43
47
 
44
48
  export class EventNode {
45
49
  event: EventType;
@@ -6,6 +6,7 @@ import styles from "./ModelUsagePanel.module.css";
6
6
 
7
7
  interface ModelUsageProps {
8
8
  usage: ModelUsage1;
9
+ className?: string | string[];
9
10
  }
10
11
 
11
12
  interface ModelUsageRow {
@@ -19,7 +20,7 @@ interface ModelUsageRow {
19
20
  /**
20
21
  * Renders the ModelUsagePanel component.
21
22
  */
22
- export const ModelUsagePanel: FC<ModelUsageProps> = ({ usage }) => {
23
+ export const ModelUsagePanel: FC<ModelUsageProps> = ({ usage, className }) => {
23
24
  if (!usage) {
24
25
  return null;
25
26
  }
@@ -84,7 +85,7 @@ export const ModelUsagePanel: FC<ModelUsageProps> = ({ usage }) => {
84
85
  });
85
86
 
86
87
  return (
87
- <div className={clsx("text-size-small", styles.wrapper)}>
88
+ <div className={clsx("text-size-small", styles.wrapper, className)}>
88
89
  {rows.map((row, idx) => {
89
90
  if (row.label === "---") {
90
91
  return (
@@ -1,6 +1,5 @@
1
1
  .table {
2
2
  width: 100%;
3
- margin-top: 0.7rem;
4
3
  }
5
4
 
6
5
  .tableTokens {
@@ -15,3 +14,7 @@
15
14
  .model {
16
15
  padding-right: 1em;
17
16
  }
17
+
18
+ .cellContents {
19
+ padding-bottom: 1em;
20
+ }
@@ -79,10 +79,10 @@ export const TokenRow: FC<TokenRowProps> = ({ model, usage }) => {
79
79
  return (
80
80
  <tr>
81
81
  <td>
82
- <div className={styles.model}>{model}</div>
82
+ <div className={clsx(styles.model, styles.cellContents)}>{model}</div>
83
83
  </td>
84
84
  <td>
85
- <ModelUsagePanel usage={usage} />
85
+ <ModelUsagePanel usage={usage} className={clsx(styles.cellContents)} />
86
86
  </td>
87
87
  </tr>
88
88
  );
@@ -3,13 +3,18 @@
3
3
  padding-bottom: 1em;
4
4
  margin-left: 0.5em;
5
5
  display: flex;
6
+ flex-wrap: wrap;
7
+ gap: 1em;
6
8
  }
7
9
 
8
10
  .col1 {
9
- flex: 1 1 40%;
10
- margin-right: 1em;
11
+ flex: 0 1 auto;
12
+ min-width: 200px;
13
+ width: fit-content;
11
14
  }
12
15
 
13
16
  .col2 {
14
- flex: 1 1 60%;
17
+ flex: 1 1 auto;
18
+ min-width: 300px;
19
+ width: fit-content;
15
20
  }
@@ -1,10 +1,5 @@
1
- import clsx from "clsx";
2
1
  import { EvalStats } from "../../@types/log";
3
- import { FontSize } from "../../app/appearance/fonts";
4
- import { ApplicationIcons } from "../../app/appearance/icons";
5
- import { MetaDataView } from "../../app/content/MetaDataView";
6
2
  import { Card, CardBody, CardHeader } from "../../components/Card";
7
- import { formatDuration } from "../../utils/format";
8
3
  import { ModelTokenTable } from "./ModelTokenTable";
9
4
 
10
5
  import { FC } from "react";
@@ -24,40 +19,11 @@ export const UsageCard: FC<UsageCardProps> = ({ stats }) => {
24
19
  return null;
25
20
  }
26
21
 
27
- const totalDuration = formatDuration(
28
- new Date(stats.started_at),
29
- new Date(stats.completed_at),
30
- );
31
- const usageMetadataStyle = {
32
- fontSize: FontSize.smaller,
33
- };
34
-
35
22
  return (
36
23
  <Card>
37
- <CardHeader icon={ApplicationIcons.usage} label="Usage" />
24
+ <CardHeader label="Usage" />
38
25
  <CardBody id={kUsageCardBodyId}>
39
26
  <div className={styles.wrapper}>
40
- <div className={styles.col1}>
41
- <div
42
- className={clsx(
43
- "text-size-smaller",
44
- "text-style-label",
45
- "text-style-secondary",
46
- )}
47
- >
48
- Duration
49
- </div>
50
- <MetaDataView
51
- entries={{
52
- ["Start"]: new Date(stats.started_at).toLocaleString(),
53
- ["End"]: new Date(stats.completed_at).toLocaleString(),
54
- ["Duration"]: totalDuration,
55
- }}
56
- tableOptions="borderless,sm"
57
- style={usageMetadataStyle}
58
- />
59
- </div>
60
-
61
27
  <div className={styles.col2}>
62
28
  <ModelTokenTable model_usage={stats.model_usage} />
63
29
  </div>
@@ -23,7 +23,6 @@
23
23
  background-color: var(--bs-light-bg-subtle);
24
24
  border: solid 1px var(--bs-light-border-subtle);
25
25
  border-radius: var(--bs-border-radius);
26
- margin-bottom: 1.5em;
27
26
  }
28
27
 
29
28
  .card-collaping-header {
@@ -5,6 +5,8 @@ export const kModelNone = "none/none";
5
5
  export const kLogViewSamplesTabId = "samples";
6
6
  export const kLogViewJsonTabId = "json";
7
7
  export const kLogViewInfoTabId = "info";
8
+ export const kLogViewModelsTabId = "models";
9
+ export const kLogViewTaskTabId = "task";
8
10
 
9
11
  // Sample tab constants
10
12
  export const kSampleMessagesTabId = `messages`;
@@ -0,0 +1,17 @@
1
+ export function compareWithNan(a: number, b: number): number {
2
+ const aIsNaN = Number.isNaN(a);
3
+ const bIsNaN = Number.isNaN(b);
4
+
5
+ if (aIsNaN && bIsNaN) {
6
+ return 0;
7
+ }
8
+
9
+ if (aIsNaN) {
10
+ return 1;
11
+ }
12
+ if (bIsNaN) {
13
+ return -1;
14
+ }
15
+
16
+ return a - b;
17
+ }
@@ -27,13 +27,14 @@ from inspect_ai.model._chat_message import (
27
27
  ChatMessageAssistant,
28
28
  )
29
29
  from inspect_ai.model._model_output import ChatCompletionChoice, ModelOutput
30
+ from inspect_ai.util._limited_conversation import ChatMessageList
30
31
 
31
32
 
32
33
  class AgentState:
33
34
  """Agent state."""
34
35
 
35
36
  def __init__(self, *, messages: list[ChatMessage]) -> None:
36
- self._messages = messages
37
+ self._messages: list[ChatMessage] = ChatMessageList(messages)
37
38
  self._output: ModelOutput | None = None
38
39
 
39
40
  @property
@@ -43,8 +44,7 @@ class AgentState:
43
44
 
44
45
  @messages.setter
45
46
  def messages(self, messages: list[ChatMessage]) -> None:
46
- """Set the conversation history."""
47
- self._messages = messages
47
+ self._messages = ChatMessageList(messages)
48
48
 
49
49
  @property
50
50
  def output(self) -> ModelOutput: