inspect-ai 0.3.92__py3-none-any.whl → 0.3.94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. inspect_ai/_cli/eval.py +27 -0
  2. inspect_ai/_display/textual/widgets/samples.py +3 -3
  3. inspect_ai/_display/textual/widgets/transcript.py +3 -29
  4. inspect_ai/_eval/eval.py +19 -2
  5. inspect_ai/_eval/evalset.py +4 -1
  6. inspect_ai/_eval/run.py +41 -0
  7. inspect_ai/_eval/task/generate.py +38 -44
  8. inspect_ai/_eval/task/log.py +26 -28
  9. inspect_ai/_eval/task/run.py +23 -27
  10. inspect_ai/_util/answer.py +26 -0
  11. inspect_ai/_util/constants.py +0 -1
  12. inspect_ai/_util/local_server.py +398 -0
  13. inspect_ai/_util/working.py +10 -4
  14. inspect_ai/_view/www/dist/assets/index.css +173 -159
  15. inspect_ai/_view/www/dist/assets/index.js +1417 -1142
  16. inspect_ai/_view/www/log-schema.json +379 -3
  17. inspect_ai/_view/www/package.json +1 -1
  18. inspect_ai/_view/www/src/@types/log.d.ts +93 -14
  19. inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
  20. inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
  21. inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
  22. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
  23. inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
  24. inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
  25. inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
  26. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
  27. inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
  28. inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
  29. inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
  30. inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
  31. inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
  32. inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
  33. inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
  34. inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
  35. inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
  36. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
  37. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
  38. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
  39. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
  40. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
  41. inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
  42. inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
  43. inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
  44. inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
  45. inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
  46. inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
  47. inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
  48. inspect_ai/_view/www/src/components/Card.css +0 -1
  49. inspect_ai/_view/www/src/constants.ts +2 -0
  50. inspect_ai/_view/www/src/utils/numeric.ts +17 -0
  51. inspect_ai/agent/_agent.py +3 -3
  52. inspect_ai/agent/_as_solver.py +22 -12
  53. inspect_ai/agent/_as_tool.py +20 -6
  54. inspect_ai/agent/_handoff.py +12 -1
  55. inspect_ai/agent/_react.py +4 -3
  56. inspect_ai/agent/_run.py +16 -3
  57. inspect_ai/agent/_types.py +9 -0
  58. inspect_ai/dataset/_dataset.py +6 -3
  59. inspect_ai/log/__init__.py +14 -0
  60. inspect_ai/log/_convert.py +4 -9
  61. inspect_ai/log/_file.py +56 -0
  62. inspect_ai/log/_log.py +99 -0
  63. inspect_ai/log/_recorders/__init__.py +2 -0
  64. inspect_ai/log/_recorders/buffer/database.py +12 -11
  65. inspect_ai/log/_recorders/buffer/filestore.py +2 -2
  66. inspect_ai/log/_recorders/buffer/types.py +2 -2
  67. inspect_ai/log/_recorders/eval.py +20 -65
  68. inspect_ai/log/_recorders/file.py +28 -6
  69. inspect_ai/log/_recorders/recorder.py +7 -0
  70. inspect_ai/log/_recorders/types.py +1 -23
  71. inspect_ai/log/_samples.py +14 -25
  72. inspect_ai/log/_transcript.py +84 -36
  73. inspect_ai/log/_tree.py +118 -0
  74. inspect_ai/log/_util.py +52 -0
  75. inspect_ai/model/__init__.py +5 -1
  76. inspect_ai/model/_call_tools.py +72 -44
  77. inspect_ai/model/_generate_config.py +14 -8
  78. inspect_ai/model/_model.py +66 -88
  79. inspect_ai/model/_model_output.py +25 -0
  80. inspect_ai/model/_openai.py +2 -0
  81. inspect_ai/model/_providers/anthropic.py +13 -23
  82. inspect_ai/model/_providers/hf.py +27 -1
  83. inspect_ai/model/_providers/openai_o1.py +8 -2
  84. inspect_ai/model/_providers/providers.py +18 -4
  85. inspect_ai/model/_providers/sglang.py +247 -0
  86. inspect_ai/model/_providers/vllm.py +211 -400
  87. inspect_ai/scorer/_choice.py +1 -2
  88. inspect_ai/solver/__init__.py +7 -2
  89. inspect_ai/solver/_basic_agent.py +3 -10
  90. inspect_ai/solver/_chain.py +1 -1
  91. inspect_ai/solver/_fork.py +1 -1
  92. inspect_ai/solver/_multiple_choice.py +5 -22
  93. inspect_ai/solver/_plan.py +2 -2
  94. inspect_ai/solver/_task_state.py +26 -88
  95. inspect_ai/solver/_transcript.py +6 -7
  96. inspect_ai/tool/_json_rpc_helpers.py +45 -17
  97. inspect_ai/tool/_mcp/_mcp.py +8 -5
  98. inspect_ai/tool/_mcp/_sandbox.py +8 -2
  99. inspect_ai/tool/_mcp/server.py +3 -1
  100. inspect_ai/tool/_tool_call.py +4 -1
  101. inspect_ai/tool/_tool_support_helpers.py +51 -12
  102. inspect_ai/tool/_tools/_bash_session.py +190 -68
  103. inspect_ai/tool/_tools/_computer/_computer.py +25 -1
  104. inspect_ai/tool/_tools/_execute.py +4 -1
  105. inspect_ai/tool/_tools/_text_editor.py +4 -3
  106. inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
  107. inspect_ai/util/__init__.py +16 -0
  108. inspect_ai/util/_anyio.py +11 -0
  109. inspect_ai/util/_collect.py +50 -0
  110. inspect_ai/util/_limit.py +393 -0
  111. inspect_ai/util/_limited_conversation.py +57 -0
  112. inspect_ai/util/_span.py +58 -0
  113. inspect_ai/util/_subtask.py +27 -42
  114. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/METADATA +1 -1
  115. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/RECORD +120 -134
  116. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/WHEEL +1 -1
  117. inspect_ai/_display/core/group.py +0 -79
  118. inspect_ai/solver/_limit.py +0 -39
  119. inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
  120. inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
  121. inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
  122. inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
  123. inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
  124. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
  125. inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
  126. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
  127. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
  128. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
  129. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
  130. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
  131. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
  132. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
  133. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
  134. inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
  135. inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
  136. inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
  137. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
  138. inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
  139. inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
  140. inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
  141. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
  142. inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
  143. inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
  144. inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
  145. inspect_ai/tool/_tools/_computer/test_args.py +0 -151
  146. /inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
  147. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/entry_points.txt +0 -0
  148. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/licenses/LICENSE +0 -0
  149. {inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,6 @@ import { FC } from "react";
3
3
  import clsx from "clsx";
4
4
  import { EvalModelConfig, EvalSpec } from "../../@types/log";
5
5
  import { Card, CardBody, CardHeader } from "../../components/Card";
6
- import { ApplicationIcons } from "../appearance/icons";
7
6
  import { MetaDataGrid } from "../content/MetaDataGrid";
8
7
  import styles from "./ModelCard.module.css";
9
8
 
@@ -33,7 +32,7 @@ export const ModelCard: FC<ModelCardProps> = ({ evalSpec }) => {
33
32
 
34
33
  return (
35
34
  <Card>
36
- <CardHeader icon={ApplicationIcons.model} label="Models" />
35
+ <CardHeader label="Models" />
37
36
  <CardBody id={"task-model-card-body"}>
38
37
  <div className={styles.container}>
39
38
  {Object.keys(modelsInfo || {}).map((modelKey) => {
@@ -1,7 +1,7 @@
1
1
  import { FC } from "react";
2
2
  import { EvalPlan, EvalScore, EvalSpec } from "../../@types/log";
3
3
  import { Card, CardBody, CardHeader } from "../../components/Card";
4
- import { ApplicationIcons } from "../appearance/icons";
4
+ import { MetaDataView } from "../content/MetaDataView";
5
5
  import { PlanDetailView } from "./PlanDetailView";
6
6
 
7
7
  interface PlanCardProps {
@@ -14,12 +14,34 @@ interface PlanCardProps {
14
14
  * Renders the plan card
15
15
  */
16
16
  export const PlanCard: FC<PlanCardProps> = ({ evalSpec, evalPlan, scores }) => {
17
+ const metadata = evalSpec?.metadata || {};
18
+
17
19
  return (
18
- <Card>
19
- <CardHeader icon={ApplicationIcons.config} label="Config" />
20
- <CardBody id={"task-plan-card-body"}>
21
- <PlanDetailView evaluation={evalSpec} plan={evalPlan} scores={scores} />
22
- </CardBody>
23
- </Card>
20
+ <>
21
+ <Card>
22
+ <CardHeader label="Summary" />
23
+ <CardBody id={"task-plan-card-body"}>
24
+ <PlanDetailView
25
+ evaluation={evalSpec}
26
+ plan={evalPlan}
27
+ scores={scores}
28
+ />
29
+ </CardBody>
30
+ </Card>
31
+
32
+ {Object.keys(metadata).length > 0 && (
33
+ <Card>
34
+ <CardHeader label="Metadata" />
35
+ <CardBody id={"task-metadata`"}>
36
+ <MetaDataView
37
+ key={`plan-md-metadata`}
38
+ className={"text-size-small"}
39
+ entries={metadata}
40
+ tableOptions="sm"
41
+ />
42
+ </CardBody>
43
+ </Card>
44
+ )}
45
+ </>
24
46
  );
25
47
  };
@@ -22,7 +22,7 @@
22
22
  }
23
23
 
24
24
  .planCol {
25
- margin-top: em;
25
+ margin-top: 0;
26
26
  }
27
27
 
28
28
  .container {
@@ -1,14 +1,10 @@
1
1
  import { EvalPlan, EvalScore, EvalSpec, Params2 } from "../../@types/log";
2
- import { toTitleCase } from "../../utils/format";
3
- import { ghCommitUrl } from "../../utils/git";
4
- import { MetaDataView } from "../content/MetaDataView";
5
2
  import { DatasetDetailView } from "./DatasetDetailView";
6
3
  import { ScorerDetailView } from "./ScorerDetailView";
7
4
  import { SolversDetailView } from "./SolverDetailView";
8
5
 
9
6
  import clsx from "clsx";
10
7
  import { FC, ReactNode } from "react";
11
- import { kModelNone } from "../../constants";
12
8
  import styles from "./PlanDetailView.module.css";
13
9
 
14
10
  interface PlanDetailViewProps {
@@ -26,71 +22,7 @@ export const PlanDetailView: FC<PlanDetailViewProps> = ({
26
22
  return null;
27
23
  }
28
24
 
29
- // Add configuration
30
- const config: Record<string, unknown> = {};
31
- Object.entries(evaluation?.config || {}).forEach((entry) => {
32
- const key = entry[0];
33
- const value = entry[1];
34
- config[key] = value;
35
- });
36
-
37
25
  const steps = plan?.steps;
38
- const metadata = evaluation?.metadata;
39
- const revision = evaluation?.revision;
40
- const packages = evaluation?.packages;
41
- const model_args = evaluation?.model_args;
42
- const task_args = evaluation?.task_args;
43
- const generate_config = plan?.config;
44
-
45
- const taskInformation: Record<string, unknown> = {
46
- ["Task ID"]: evaluation?.task_id,
47
- ["Run ID"]: evaluation?.run_id,
48
- };
49
- if (revision) {
50
- taskInformation[
51
- `${revision.type ? `${toTitleCase(revision.type)} ` : ""}Revision`
52
- ] = {
53
- _html: (
54
- <a href={ghCommitUrl(revision.origin, revision.commit)}>
55
- {revision.commit}
56
- </a>
57
- ),
58
- };
59
- }
60
- if (packages) {
61
- const names = Object.keys(packages).map((key) => {
62
- return `${key} ${packages[key]}`;
63
- });
64
-
65
- if (names.length === 1) {
66
- taskInformation["Inspect"] = names[0];
67
- } else {
68
- taskInformation["Inspect"] = names;
69
- }
70
- }
71
- if (evaluation.tags) {
72
- taskInformation["Tags"] = evaluation.tags.join(", ");
73
- }
74
-
75
- if (evaluation?.model && evaluation.model !== kModelNone) {
76
- config["model"] = evaluation.model;
77
- }
78
-
79
- if (evaluation?.model_base_url) {
80
- config["model_base_url"] = evaluation.model_base_url;
81
- }
82
-
83
- if (evaluation?.sandbox) {
84
- if (Array.isArray(evaluation?.sandbox)) {
85
- config["sandbox"] = evaluation.sandbox[0];
86
- if (evaluation.sandbox[1]) {
87
- config["sandbox_config"] = evaluation.sandbox[1];
88
- }
89
- } else {
90
- config["sandbox"] = evaluation?.sandbox.type;
91
- config["sandbox_config"] = evaluation?.sandbox.config;
92
- }
93
- }
94
26
 
95
27
  const taskColumns: {
96
28
  title: string;
@@ -148,117 +80,12 @@ export const PlanDetailView: FC<PlanDetailViewProps> = ({
148
80
  }
149
81
  }
150
82
 
151
- // Compute the column style for the remaining (either 1 or 2 columns wide)
152
- const metadataColumns: {
153
- title: string;
154
- className: string;
155
- contents: ReactNode;
156
- }[] = [];
157
- const cols = colCount(
158
- metadataColumns,
159
- task_args,
160
- model_args,
161
- config,
162
- metadata,
163
- );
164
-
165
- metadataColumns.push({
166
- title: "Task Information",
167
- className: cols === 1 ? styles.oneCol : styles.twoCol,
168
- contents: (
169
- <MetaDataView
170
- key={`plan-md-task`}
171
- className={"text-size-small"}
172
- entries={taskInformation}
173
- tableOptions="sm"
174
- />
175
- ),
176
- });
177
-
178
- if (task_args && Object.keys(task_args).length > 0) {
179
- metadataColumns.push({
180
- title: "Task Args",
181
- className: cols === 1 ? styles.oneCol : styles.twoCol,
182
- contents: (
183
- <MetaDataView
184
- key={`plan-md-task-args`}
185
- className={"text-size-small"}
186
- entries={task_args as Record<string, unknown>}
187
- tableOptions="sm"
188
- />
189
- ),
190
- });
191
- }
192
- if (model_args && Object.keys(model_args).length > 0) {
193
- metadataColumns.push({
194
- title: "Model Args",
195
- className: cols === 1 ? styles.oneCol : styles.twoCol,
196
- contents: (
197
- <MetaDataView
198
- key={`plan-md-model-args`}
199
- className={"text-size-small"}
200
- entries={model_args as Record<string, unknown>}
201
- tableOptions="sm"
202
- />
203
- ),
204
- });
205
- }
206
-
207
- if (config && Object.keys(config).length > 0) {
208
- metadataColumns.push({
209
- title: "Configuration",
210
- className: cols === 1 ? styles.oneCol : styles.twoCol,
211
- contents: (
212
- <MetaDataView
213
- key={`plan-md-config`}
214
- className={"text-size-small"}
215
- entries={config}
216
- tableOptions="sm"
217
- />
218
- ),
219
- });
220
- }
221
-
222
- if (generate_config && Object.keys(generate_config).length > 0) {
223
- const generate_record: Record<string, unknown> = Object.fromEntries(
224
- Object.entries(generate_config),
225
- );
226
-
227
- metadataColumns.push({
228
- title: "Generate Config",
229
- className: cols === 1 ? styles.oneCol : styles.twoCol,
230
- contents: (
231
- <MetaDataView
232
- key={`plan-md-generate-config`}
233
- className={"text-size-small"}
234
- entries={generate_record}
235
- tableOptions="sm"
236
- />
237
- ),
238
- });
239
- }
240
-
241
- if (metadata && Object.keys(metadata).length > 0) {
242
- metadataColumns.push({
243
- title: "Metadata",
244
- className: cols === 1 ? styles.oneCol : styles.twoCol,
245
- contents: (
246
- <MetaDataView
247
- key={`plan-md-metadata`}
248
- className={"text-size-small"}
249
- entries={metadata}
250
- tableOptions="sm"
251
- />
252
- ),
253
- });
254
- }
255
-
256
83
  return (
257
84
  <div className={styles.container}>
258
85
  <div
259
86
  className={styles.grid}
260
87
  style={{
261
- gridTemplateColumns: `repeat(${taskColumns.length}, auto)`,
88
+ gridTemplateColumns: `repeat(${taskColumns.length}, fit-content(50%))`,
262
89
  }}
263
90
  >
264
91
  {taskColumns.map((col) => {
@@ -273,34 +100,10 @@ export const PlanDetailView: FC<PlanDetailViewProps> = ({
273
100
  );
274
101
  })}
275
102
  </div>
276
-
277
- <div className={clsx(styles.row)}>
278
- {metadataColumns.map((col) => {
279
- return (
280
- <PlanColumn
281
- title={col.title}
282
- className={col.className}
283
- key={`plan-col-${col.title}`}
284
- >
285
- {col.contents}
286
- </PlanColumn>
287
- );
288
- })}
289
- </div>
290
103
  </div>
291
104
  );
292
105
  };
293
106
 
294
- const colCount = (...other: unknown[]) => {
295
- let count = 0;
296
- for (const o in other) {
297
- if (o && Object.keys(o).length > 0) {
298
- count++;
299
- }
300
- }
301
- return count;
302
- };
303
-
304
107
  interface PlanColumnProps {
305
108
  title: string;
306
109
  className: string | string[];
@@ -1,6 +1,7 @@
1
1
  import { Value2 } from "../../../../@types/log";
2
2
  import { kScoreTypeNumeric } from "../../../../constants";
3
3
  import { formatDecimalNoTrailingZeroes } from "../../../../utils/format";
4
+ import { compareWithNan } from "../../../../utils/numeric";
4
5
  import { ScoreDescriptor } from "../types";
5
6
 
6
7
  export const numericScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
@@ -14,7 +15,7 @@ export const numericScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
14
15
  max: Math.max(...onlyNumeric),
15
16
  compare: (a, b) => {
16
17
  if (typeof a.value === "number" && typeof b.value === "number") {
17
- return a.value - b.value;
18
+ return compareWithNan(a.value, b.value);
18
19
  } else {
19
20
  console.warn("Comparing non-numerics using a numeric score descriptor");
20
21
  return 0;
@@ -13,7 +13,7 @@
13
13
  }
14
14
 
15
15
  .exec {
16
- margin-top: 0.5em;
16
+ margin-top: 0;
17
17
  }
18
18
 
19
19
  .result {
@@ -29,4 +29,5 @@
29
29
  white-space: pre-wrap;
30
30
  word-wrap: break-word;
31
31
  overflow-wrap: break-word;
32
+ margin-bottom: 0;
32
33
  }
@@ -0,0 +1,174 @@
1
+ import clsx from "clsx";
2
+ import { FC } from "react";
3
+ import { SpanBeginEvent } from "../../../@types/log";
4
+ import { formatDateTime } from "../../../utils/format";
5
+ import { EventPanel } from "./event/EventPanel";
6
+ import { TranscriptComponent } from "./TranscriptView";
7
+ import { kSandboxSignalName } from "./transform/fixups";
8
+ import { EventNode } from "./types";
9
+
10
+ interface SpanEventViewProps {
11
+ id: string;
12
+ event: SpanBeginEvent;
13
+ children: EventNode[];
14
+ className?: string | string[];
15
+ }
16
+
17
+ /**
18
+ * Renders the SpanEventView component.
19
+ */
20
+ export const SpanEventView: FC<SpanEventViewProps> = ({
21
+ id,
22
+ event,
23
+ children,
24
+ className,
25
+ }) => {
26
+ const descriptor = spanDescriptor(event);
27
+ const title =
28
+ descriptor.name ||
29
+ `${event.type ? event.type + ": " : "Step: "}${event.name}`;
30
+ const text = summarize(children);
31
+
32
+ return (
33
+ <EventPanel
34
+ id={`span-${event.name}-${id}`}
35
+ className={clsx("transcript-span", className)}
36
+ title={title}
37
+ subTitle={formatDateTime(new Date(event.timestamp))}
38
+ text={text}
39
+ collapse={descriptor.collapse}
40
+ icon={descriptor.icon}
41
+ >
42
+ <TranscriptComponent
43
+ id={`span|${event.name}|${id}`}
44
+ eventNodes={children}
45
+ />
46
+ </EventPanel>
47
+ );
48
+ };
49
+
50
+ const summarize = (children: EventNode[]) => {
51
+ if (children.length === 0) {
52
+ return "(no events)";
53
+ }
54
+
55
+ const formatEvent = (event: string, count: number) => {
56
+ if (count === 1) {
57
+ return `${count} ${event} event`;
58
+ } else {
59
+ return `${count} ${event} events`;
60
+ }
61
+ };
62
+
63
+ // Count the types
64
+ const typeCount: Record<string, number> = {};
65
+ children.forEach((child) => {
66
+ const currentCount = typeCount[child.event.event] || 0;
67
+ typeCount[child.event.event] = currentCount + 1;
68
+ });
69
+
70
+ // Try to summarize event types
71
+ const numberOfTypes = Object.keys(typeCount).length;
72
+ if (numberOfTypes < 3) {
73
+ return Object.keys(typeCount)
74
+ .map((key) => {
75
+ return formatEvent(key, typeCount[key]);
76
+ })
77
+ .join(", ");
78
+ }
79
+
80
+ // To many types, just return the number of events
81
+ if (children.length === 1) {
82
+ return "1 event";
83
+ } else {
84
+ return `${children.length} events`;
85
+ }
86
+ };
87
+
88
+ /**
89
+ * Returns a descriptor object containing icon and style based on the event type and name.
90
+ */
91
+ const spanDescriptor = (
92
+ event: SpanBeginEvent,
93
+ ): { icon?: string; name?: string; endSpace?: boolean; collapse?: boolean } => {
94
+ const rootStepDescriptor = {
95
+ endSpace: true,
96
+ };
97
+
98
+ if (event.type === "solver") {
99
+ switch (event.name) {
100
+ case "chain_of_thought":
101
+ return {
102
+ ...rootStepDescriptor,
103
+ collapse: false,
104
+ };
105
+ case "generate":
106
+ return {
107
+ ...rootStepDescriptor,
108
+ collapse: false,
109
+ };
110
+ case "self_critique":
111
+ return {
112
+ ...rootStepDescriptor,
113
+ collapse: false,
114
+ };
115
+ case "system_message":
116
+ return {
117
+ ...rootStepDescriptor,
118
+ collapse: true,
119
+ };
120
+ case "use_tools":
121
+ return {
122
+ ...rootStepDescriptor,
123
+ collapse: false,
124
+ };
125
+ case "multiple_choice":
126
+ return {
127
+ ...rootStepDescriptor,
128
+ collapse: false,
129
+ };
130
+ default:
131
+ return {
132
+ ...rootStepDescriptor,
133
+ collapse: false,
134
+ };
135
+ }
136
+ } else if (event.type === "scorer") {
137
+ return {
138
+ ...rootStepDescriptor,
139
+ collapse: false,
140
+ };
141
+ } else if (event.event === "span_begin") {
142
+ if (event.span_id === kSandboxSignalName) {
143
+ return {
144
+ ...rootStepDescriptor,
145
+ name: "Sandbox Events",
146
+ collapse: true,
147
+ };
148
+ } else if (event.name === "init") {
149
+ return {
150
+ ...rootStepDescriptor,
151
+ name: "Init",
152
+ collapse: true,
153
+ };
154
+ } else {
155
+ return {
156
+ ...rootStepDescriptor,
157
+ collapse: false,
158
+ };
159
+ }
160
+ } else {
161
+ switch (event.name) {
162
+ case "sample_init":
163
+ return {
164
+ ...rootStepDescriptor,
165
+ name: "Sample Init",
166
+ collapse: true,
167
+ };
168
+ default:
169
+ return {
170
+ endSpace: false,
171
+ };
172
+ }
173
+ }
174
+ };
@@ -4,7 +4,7 @@ import { resolveToolInput } from "../chat/tools/tool";
4
4
  import { ToolCallView } from "../chat/tools/ToolCallView";
5
5
  import { ApprovalEventView } from "./ApprovalEventView";
6
6
  import { EventPanel } from "./event/EventPanel";
7
- import { TranscriptView } from "./TranscriptView";
7
+ import { TranscriptComponent } from "./TranscriptView";
8
8
 
9
9
  import clsx from "clsx";
10
10
  import { FC, useMemo } from "react";
@@ -12,11 +12,12 @@ import { PulsingDots } from "../../../components/PulsingDots";
12
12
  import { ChatView } from "../chat/ChatView";
13
13
  import { formatTiming, formatTitle } from "./event/utils";
14
14
  import styles from "./ToolEventView.module.css";
15
+ import { EventNode } from "./types";
15
16
 
16
17
  interface ToolEventViewProps {
17
18
  id: string;
18
19
  event: ToolEvent;
19
- depth: number;
20
+ children: EventNode[];
20
21
  className?: string | string[];
21
22
  }
22
23
 
@@ -26,7 +27,7 @@ interface ToolEventViewProps {
26
27
  export const ToolEventView: FC<ToolEventViewProps> = ({
27
28
  id,
28
29
  event,
29
- depth,
30
+ children,
30
31
  className,
31
32
  }) => {
32
33
  // Extract tool input
@@ -92,13 +93,12 @@ export const ToolEventView: FC<ToolEventViewProps> = ({
92
93
  </div>
93
94
  ) : undefined}
94
95
  </div>
95
- {event.events.length > 0 ? (
96
- <TranscriptView
97
- id={`${id}-subtask`}
96
+ {children.length > 0 ? (
97
+ <TranscriptComponent
98
98
  data-name="Transcript"
99
+ id={`${id}-subtask`}
100
+ eventNodes={children}
99
101
  data-default={event.failed || event.agent ? true : null}
100
- events={event.events}
101
- depth={depth + 1}
102
102
  />
103
103
  ) : (
104
104
  ""
@@ -17,6 +17,7 @@ import { ToolEventView } from "./ToolEventView";
17
17
  import { EventNode } from "./types";
18
18
 
19
19
  import clsx from "clsx";
20
+ import { SpanEventView } from "./SpanEventView";
20
21
  import styles from "./TranscriptView.module.css";
21
22
  import { TranscriptVirtualListComponent } from "./TranscriptVirtualListComponent";
22
23
  import { fixupEventStream } from "./transform/fixups";
@@ -64,7 +65,6 @@ export const TranscriptVirtualList: FC<TranscriptVirtualListProps> = memo(
64
65
  const eventNodes = useMemo(() => {
65
66
  const resolvedEvents = fixupEventStream(events, !running);
66
67
  const eventNodes = treeifyEvents(resolvedEvents, depth || 0);
67
-
68
68
  return eventNodes;
69
69
  }, [events, depth]);
70
70
 
@@ -201,6 +201,16 @@ export const RenderedEventNode: FC<RenderedEventNodeProps> = memo(
201
201
  <StateEventView id={id} event={node.event} className={className} />
202
202
  );
203
203
 
204
+ case "span_begin":
205
+ return (
206
+ <SpanEventView
207
+ id={id}
208
+ event={node.event}
209
+ children={node.children}
210
+ className={className}
211
+ />
212
+ );
213
+
204
214
  case "step":
205
215
  return (
206
216
  <StepEventView
@@ -237,7 +247,7 @@ export const RenderedEventNode: FC<RenderedEventNodeProps> = memo(
237
247
  id={id}
238
248
  event={node.event}
239
249
  className={className}
240
- depth={node.depth}
250
+ children={node.children}
241
251
  />
242
252
  );
243
253
 
@@ -8,7 +8,7 @@
8
8
 
9
9
  .node {
10
10
  padding-top: 0.7rem;
11
- padding-bottom: 0em;
11
+ padding-bottom: 1px;
12
12
  }
13
13
 
14
14
  .attached {
@@ -9,7 +9,6 @@ import {
9
9
  import { ApplicationIcons } from "../../../appearance/icons";
10
10
  import { EventNavs } from "./EventNavs";
11
11
 
12
- import { ProgressBar } from "../../../../components/ProgressBar";
13
12
  import { useProperty } from "../../../../state/hooks";
14
13
  import styles from "./EventPanel.module.css";
15
14
 
@@ -41,7 +40,6 @@ export const EventPanel: FC<EventPanelProps> = ({
41
40
  icon,
42
41
  collapse,
43
42
  children,
44
- running,
45
43
  }) => {
46
44
  const [isCollapsed, setCollapsed] = useProperty(id, "collapsed", {
47
45
  defaultValue: !!collapse,
@@ -191,7 +189,6 @@ export const EventPanel: FC<EventPanelProps> = ({
191
189
  })}
192
190
  </div>
193
191
  </div>
194
- <ProgressBar animating={!!running} />
195
192
  </>
196
193
  );
197
194
  return card;