inspect-ai 0.3.93__py3-none-any.whl → 0.3.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_display/textual/widgets/samples.py +3 -3
- inspect_ai/_display/textual/widgets/transcript.py +3 -29
- inspect_ai/_eval/task/run.py +10 -7
- inspect_ai/_util/answer.py +26 -0
- inspect_ai/_util/constants.py +0 -1
- inspect_ai/_util/local_server.py +51 -21
- inspect_ai/_view/www/dist/assets/index.css +14 -13
- inspect_ai/_view/www/dist/assets/index.js +400 -84
- inspect_ai/_view/www/log-schema.json +375 -0
- inspect_ai/_view/www/src/@types/log.d.ts +90 -12
- inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
- inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
- inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
- inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
- inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
- inspect_ai/agent/_as_solver.py +3 -1
- inspect_ai/agent/_as_tool.py +6 -4
- inspect_ai/agent/_handoff.py +5 -1
- inspect_ai/agent/_react.py +4 -3
- inspect_ai/agent/_run.py +6 -1
- inspect_ai/agent/_types.py +9 -0
- inspect_ai/dataset/_dataset.py +6 -3
- inspect_ai/log/__init__.py +10 -0
- inspect_ai/log/_convert.py +4 -9
- inspect_ai/log/_samples.py +14 -17
- inspect_ai/log/_transcript.py +77 -35
- inspect_ai/log/_tree.py +118 -0
- inspect_ai/model/_call_tools.py +42 -34
- inspect_ai/model/_model.py +45 -40
- inspect_ai/model/_providers/hf.py +27 -1
- inspect_ai/model/_providers/sglang.py +8 -2
- inspect_ai/model/_providers/vllm.py +6 -2
- inspect_ai/scorer/_choice.py +1 -2
- inspect_ai/solver/_chain.py +1 -1
- inspect_ai/solver/_fork.py +1 -1
- inspect_ai/solver/_multiple_choice.py +5 -22
- inspect_ai/solver/_plan.py +2 -2
- inspect_ai/solver/_transcript.py +6 -7
- inspect_ai/tool/_mcp/_mcp.py +6 -5
- inspect_ai/tool/_tools/_execute.py +4 -1
- inspect_ai/util/__init__.py +4 -0
- inspect_ai/util/_anyio.py +11 -0
- inspect_ai/util/_collect.py +50 -0
- inspect_ai/util/_span.py +58 -0
- inspect_ai/util/_subtask.py +27 -42
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.94.dist-info}/METADATA +1 -1
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.94.dist-info}/RECORD +56 -51
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.94.dist-info}/WHEEL +1 -1
- inspect_ai/_display/core/group.py +0 -79
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.94.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.94.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.94.dist-info}/top_level.txt +0 -0
@@ -316,6 +316,7 @@ export type Explanation = string | null;
|
|
316
316
|
export type Metadata6 = {
|
317
317
|
[k: string]: unknown;
|
318
318
|
} | null;
|
319
|
+
export type SpanId = string | null;
|
319
320
|
export type Timestamp = string;
|
320
321
|
export type WorkingStart = number;
|
321
322
|
export type Pending = boolean | null;
|
@@ -339,6 +340,7 @@ export type Files1 = {
|
|
339
340
|
} | null;
|
340
341
|
export type Setup1 = string | null;
|
341
342
|
export type JsonValue = unknown;
|
343
|
+
export type SpanId1 = string | null;
|
342
344
|
export type Timestamp1 = string;
|
343
345
|
export type WorkingStart1 = number;
|
344
346
|
export type Pending1 = boolean | null;
|
@@ -352,6 +354,7 @@ export type Type10 =
|
|
352
354
|
| "custom";
|
353
355
|
export type Message2 = string;
|
354
356
|
export type Limit1 = number | null;
|
357
|
+
export type SpanId2 = string | null;
|
355
358
|
export type Timestamp2 = string;
|
356
359
|
export type WorkingStart2 = number;
|
357
360
|
export type Pending2 = boolean | null;
|
@@ -366,6 +369,7 @@ export type Input2 = string | null;
|
|
366
369
|
export type Result = number | null;
|
367
370
|
export type Output = string | null;
|
368
371
|
export type Completed = string | null;
|
372
|
+
export type SpanId3 = string | null;
|
369
373
|
export type Timestamp3 = string;
|
370
374
|
export type WorkingStart3 = number;
|
371
375
|
export type Pending3 = boolean | null;
|
@@ -374,11 +378,13 @@ export type Op = "remove" | "add" | "replace" | "move" | "test" | "copy";
|
|
374
378
|
export type Path = string;
|
375
379
|
export type From = string | null;
|
376
380
|
export type Changes = JsonChange[];
|
381
|
+
export type SpanId4 = string | null;
|
377
382
|
export type Timestamp4 = string;
|
378
383
|
export type WorkingStart4 = number;
|
379
384
|
export type Pending4 = boolean | null;
|
380
385
|
export type Event4 = "store";
|
381
386
|
export type Changes1 = JsonChange[];
|
387
|
+
export type SpanId5 = string | null;
|
382
388
|
export type Timestamp5 = string;
|
383
389
|
export type WorkingStart5 = number;
|
384
390
|
export type Pending5 = boolean | null;
|
@@ -399,11 +405,13 @@ export type Additionalproperties1 = boolean;
|
|
399
405
|
export type Tools1 = ToolInfo[];
|
400
406
|
export type ToolChoice = ("auto" | "any" | "none") | ToolFunction;
|
401
407
|
export type Name9 = string;
|
408
|
+
export type Retries = number | null;
|
402
409
|
export type Error1 = string | null;
|
403
410
|
export type Cache = ("read" | "write") | null;
|
404
411
|
export type Time1 = number | null;
|
405
412
|
export type Completed1 = string | null;
|
406
413
|
export type WorkingTime = number | null;
|
414
|
+
export type SpanId6 = string | null;
|
407
415
|
export type Timestamp6 = string;
|
408
416
|
export type WorkingStart6 = number;
|
409
417
|
export type Pending6 = boolean | null;
|
@@ -428,6 +436,7 @@ export type Result1 =
|
|
428
436
|
| ContentVideo
|
429
437
|
)[];
|
430
438
|
export type Truncated = [unknown, unknown] | null;
|
439
|
+
export type SpanId7 = string | null;
|
431
440
|
export type Timestamp7 = string;
|
432
441
|
export type WorkingStart7 = number;
|
433
442
|
export type Pending7 = boolean | null;
|
@@ -441,22 +450,26 @@ export type Decision =
|
|
441
450
|
| "escalate"
|
442
451
|
| "terminate";
|
443
452
|
export type Explanation1 = string | null;
|
453
|
+
export type SpanId8 = string | null;
|
444
454
|
export type Timestamp8 = string;
|
445
455
|
export type WorkingStart8 = number;
|
446
456
|
export type Pending8 = boolean | null;
|
447
457
|
export type Event8 = "input";
|
448
458
|
export type Input4 = string;
|
449
459
|
export type InputAnsi = string;
|
460
|
+
export type SpanId9 = string | null;
|
450
461
|
export type Timestamp9 = string;
|
451
462
|
export type WorkingStart9 = number;
|
452
463
|
export type Pending9 = boolean | null;
|
453
464
|
export type Event9 = "score";
|
454
465
|
export type Target2 = string | string[] | null;
|
455
466
|
export type Intermediate = boolean;
|
467
|
+
export type SpanId10 = string | null;
|
456
468
|
export type Timestamp10 = string;
|
457
469
|
export type WorkingStart10 = number;
|
458
470
|
export type Pending10 = boolean | null;
|
459
471
|
export type Event10 = "error";
|
472
|
+
export type SpanId11 = string | null;
|
460
473
|
export type Timestamp11 = string;
|
461
474
|
export type WorkingStart11 = number;
|
462
475
|
export type Pending11 = boolean | null;
|
@@ -476,24 +489,42 @@ export type Created1 = number;
|
|
476
489
|
export type Filename = string;
|
477
490
|
export type Module = string;
|
478
491
|
export type Lineno = number;
|
492
|
+
export type SpanId12 = string | null;
|
479
493
|
export type Timestamp12 = string;
|
480
494
|
export type WorkingStart12 = number;
|
481
495
|
export type Pending12 = boolean | null;
|
482
496
|
export type Event12 = "info";
|
483
497
|
export type Source4 = string | null;
|
498
|
+
export type SpanId13 = string | null;
|
484
499
|
export type Timestamp13 = string;
|
485
500
|
export type WorkingStart13 = number;
|
486
501
|
export type Pending13 = boolean | null;
|
487
|
-
export type Event13 = "
|
488
|
-
export type
|
502
|
+
export type Event13 = "span_begin";
|
503
|
+
export type Id8 = string;
|
504
|
+
export type ParentId = string | null;
|
489
505
|
export type Type13 = string | null;
|
490
506
|
export type Name11 = string;
|
507
|
+
export type SpanId14 = string | null;
|
491
508
|
export type Timestamp14 = string;
|
492
509
|
export type WorkingStart14 = number;
|
493
510
|
export type Pending14 = boolean | null;
|
494
|
-
export type Event14 = "
|
495
|
-
export type
|
511
|
+
export type Event14 = "span_end";
|
512
|
+
export type Id9 = string;
|
513
|
+
export type SpanId15 = string | null;
|
514
|
+
export type Timestamp15 = string;
|
515
|
+
export type WorkingStart15 = number;
|
516
|
+
export type Pending15 = boolean | null;
|
517
|
+
export type Event15 = "step";
|
518
|
+
export type Action1 = "begin" | "end";
|
496
519
|
export type Type14 = string | null;
|
520
|
+
export type Name12 = string;
|
521
|
+
export type SpanId16 = string | null;
|
522
|
+
export type Timestamp16 = string;
|
523
|
+
export type WorkingStart16 = number;
|
524
|
+
export type Pending16 = boolean | null;
|
525
|
+
export type Event16 = "subtask";
|
526
|
+
export type Name13 = string;
|
527
|
+
export type Type15 = string | null;
|
497
528
|
export type Events2 = (
|
498
529
|
| SampleInitEvent
|
499
530
|
| SampleLimitEvent
|
@@ -508,6 +539,8 @@ export type Events2 = (
|
|
508
539
|
| ErrorEvent
|
509
540
|
| LoggerEvent
|
510
541
|
| InfoEvent
|
542
|
+
| SpanBeginEvent
|
543
|
+
| SpanEndEvent
|
511
544
|
| StepEvent
|
512
545
|
| SubtaskEvent
|
513
546
|
)[];
|
@@ -527,6 +560,8 @@ export type Events1 = (
|
|
527
560
|
| ErrorEvent
|
528
561
|
| LoggerEvent
|
529
562
|
| InfoEvent
|
563
|
+
| SpanBeginEvent
|
564
|
+
| SpanEndEvent
|
530
565
|
| StepEvent
|
531
566
|
| SubtaskEvent
|
532
567
|
)[];
|
@@ -548,6 +583,8 @@ export type Events = (
|
|
548
583
|
| ErrorEvent
|
549
584
|
| LoggerEvent
|
550
585
|
| InfoEvent
|
586
|
+
| SpanBeginEvent
|
587
|
+
| SpanEndEvent
|
551
588
|
| StepEvent
|
552
589
|
| SubtaskEvent
|
553
590
|
)[];
|
@@ -555,7 +592,7 @@ export type TotalTime = number | null;
|
|
555
592
|
export type WorkingTime3 = number | null;
|
556
593
|
export type Uuid = string | null;
|
557
594
|
export type ErrorRetries = EvalError[] | null;
|
558
|
-
export type
|
595
|
+
export type Type16 =
|
559
596
|
| "context"
|
560
597
|
| "time"
|
561
598
|
| "working"
|
@@ -1121,6 +1158,7 @@ export interface Store {
|
|
1121
1158
|
* Beginning of processing a Sample.
|
1122
1159
|
*/
|
1123
1160
|
export interface SampleInitEvent {
|
1161
|
+
span_id: SpanId;
|
1124
1162
|
timestamp: Timestamp;
|
1125
1163
|
working_start: WorkingStart;
|
1126
1164
|
pending: Pending;
|
@@ -1145,6 +1183,7 @@ export interface Sample {
|
|
1145
1183
|
* The sample was unable to finish processing due to a limit
|
1146
1184
|
*/
|
1147
1185
|
export interface SampleLimitEvent {
|
1186
|
+
span_id: SpanId1;
|
1148
1187
|
timestamp: Timestamp1;
|
1149
1188
|
working_start: WorkingStart1;
|
1150
1189
|
pending: Pending1;
|
@@ -1157,6 +1196,7 @@ export interface SampleLimitEvent {
|
|
1157
1196
|
* Sandbox execution or I/O
|
1158
1197
|
*/
|
1159
1198
|
export interface SandboxEvent {
|
1199
|
+
span_id: SpanId2;
|
1160
1200
|
timestamp: Timestamp2;
|
1161
1201
|
working_start: WorkingStart2;
|
1162
1202
|
pending: Pending2;
|
@@ -1174,6 +1214,7 @@ export interface SandboxEvent {
|
|
1174
1214
|
* Change to the current `TaskState`
|
1175
1215
|
*/
|
1176
1216
|
export interface StateEvent {
|
1217
|
+
span_id: SpanId3;
|
1177
1218
|
timestamp: Timestamp3;
|
1178
1219
|
working_start: WorkingStart3;
|
1179
1220
|
pending: Pending3;
|
@@ -1198,6 +1239,7 @@ export interface JsonChange {
|
|
1198
1239
|
* Change to data within the current `Store`.
|
1199
1240
|
*/
|
1200
1241
|
export interface StoreEvent {
|
1242
|
+
span_id: SpanId4;
|
1201
1243
|
timestamp: Timestamp4;
|
1202
1244
|
working_start: WorkingStart4;
|
1203
1245
|
pending: Pending4;
|
@@ -1208,6 +1250,7 @@ export interface StoreEvent {
|
|
1208
1250
|
* Call to a language model.
|
1209
1251
|
*/
|
1210
1252
|
export interface ModelEvent {
|
1253
|
+
span_id: SpanId5;
|
1211
1254
|
timestamp: Timestamp5;
|
1212
1255
|
working_start: WorkingStart5;
|
1213
1256
|
pending: Pending5;
|
@@ -1219,6 +1262,7 @@ export interface ModelEvent {
|
|
1219
1262
|
tool_choice: ToolChoice;
|
1220
1263
|
config: GenerateConfig;
|
1221
1264
|
output: ModelOutput;
|
1265
|
+
retries: Retries;
|
1222
1266
|
error: Error1;
|
1223
1267
|
cache: Cache;
|
1224
1268
|
call: ModelCall | null;
|
@@ -1289,6 +1333,7 @@ export interface Response {
|
|
1289
1333
|
* Call to a tool.
|
1290
1334
|
*/
|
1291
1335
|
export interface ToolEvent {
|
1336
|
+
span_id: SpanId6;
|
1292
1337
|
timestamp: Timestamp6;
|
1293
1338
|
working_start: WorkingStart6;
|
1294
1339
|
pending: Pending6;
|
@@ -1315,6 +1360,7 @@ export interface Arguments1 {
|
|
1315
1360
|
* Tool approval.
|
1316
1361
|
*/
|
1317
1362
|
export interface ApprovalEvent {
|
1363
|
+
span_id: SpanId7;
|
1318
1364
|
timestamp: Timestamp7;
|
1319
1365
|
working_start: WorkingStart7;
|
1320
1366
|
pending: Pending7;
|
@@ -1341,6 +1387,7 @@ export interface ToolCallView {
|
|
1341
1387
|
* Input screen interaction.
|
1342
1388
|
*/
|
1343
1389
|
export interface InputEvent {
|
1390
|
+
span_id: SpanId8;
|
1344
1391
|
timestamp: Timestamp8;
|
1345
1392
|
working_start: WorkingStart8;
|
1346
1393
|
pending: Pending8;
|
@@ -1355,6 +1402,7 @@ export interface InputEvent {
|
|
1355
1402
|
* resulting from a call to `score`.
|
1356
1403
|
*/
|
1357
1404
|
export interface ScoreEvent {
|
1405
|
+
span_id: SpanId9;
|
1358
1406
|
timestamp: Timestamp9;
|
1359
1407
|
working_start: WorkingStart9;
|
1360
1408
|
pending: Pending9;
|
@@ -1367,6 +1415,7 @@ export interface ScoreEvent {
|
|
1367
1415
|
* Event with sample error.
|
1368
1416
|
*/
|
1369
1417
|
export interface ErrorEvent {
|
1418
|
+
span_id: SpanId10;
|
1370
1419
|
timestamp: Timestamp10;
|
1371
1420
|
working_start: WorkingStart10;
|
1372
1421
|
pending: Pending10;
|
@@ -1377,6 +1426,7 @@ export interface ErrorEvent {
|
|
1377
1426
|
* Log message recorded with Python logger.
|
1378
1427
|
*/
|
1379
1428
|
export interface LoggerEvent {
|
1429
|
+
span_id: SpanId11;
|
1380
1430
|
timestamp: Timestamp11;
|
1381
1431
|
working_start: WorkingStart11;
|
1382
1432
|
pending: Pending11;
|
@@ -1399,6 +1449,7 @@ export interface LoggingMessage {
|
|
1399
1449
|
* Event with custom info/data.
|
1400
1450
|
*/
|
1401
1451
|
export interface InfoEvent {
|
1452
|
+
span_id: SpanId12;
|
1402
1453
|
timestamp: Timestamp12;
|
1403
1454
|
working_start: WorkingStart12;
|
1404
1455
|
pending: Pending12;
|
@@ -1407,27 +1458,54 @@ export interface InfoEvent {
|
|
1407
1458
|
data: JsonValue;
|
1408
1459
|
}
|
1409
1460
|
/**
|
1410
|
-
*
|
1461
|
+
* Mark the beginning of a transcript span.
|
1411
1462
|
*/
|
1412
|
-
export interface
|
1463
|
+
export interface SpanBeginEvent {
|
1464
|
+
span_id: SpanId13;
|
1413
1465
|
timestamp: Timestamp13;
|
1414
1466
|
working_start: WorkingStart13;
|
1415
1467
|
pending: Pending13;
|
1416
1468
|
event: Event13;
|
1417
|
-
|
1469
|
+
id: Id8;
|
1470
|
+
parent_id: ParentId;
|
1418
1471
|
type: Type13;
|
1419
1472
|
name: Name11;
|
1420
1473
|
}
|
1421
1474
|
/**
|
1422
|
-
*
|
1475
|
+
* Mark the end of a transcript span.
|
1423
1476
|
*/
|
1424
|
-
export interface
|
1477
|
+
export interface SpanEndEvent {
|
1478
|
+
span_id: SpanId14;
|
1425
1479
|
timestamp: Timestamp14;
|
1426
1480
|
working_start: WorkingStart14;
|
1427
1481
|
pending: Pending14;
|
1428
1482
|
event: Event14;
|
1429
|
-
|
1483
|
+
id: Id9;
|
1484
|
+
}
|
1485
|
+
/**
|
1486
|
+
* Step within current sample or subtask.
|
1487
|
+
*/
|
1488
|
+
export interface StepEvent {
|
1489
|
+
span_id: SpanId15;
|
1490
|
+
timestamp: Timestamp15;
|
1491
|
+
working_start: WorkingStart15;
|
1492
|
+
pending: Pending15;
|
1493
|
+
event: Event15;
|
1494
|
+
action: Action1;
|
1430
1495
|
type: Type14;
|
1496
|
+
name: Name12;
|
1497
|
+
}
|
1498
|
+
/**
|
1499
|
+
* Subtask spawned.
|
1500
|
+
*/
|
1501
|
+
export interface SubtaskEvent {
|
1502
|
+
span_id: SpanId16;
|
1503
|
+
timestamp: Timestamp16;
|
1504
|
+
working_start: WorkingStart16;
|
1505
|
+
pending: Pending16;
|
1506
|
+
event: Event16;
|
1507
|
+
name: Name13;
|
1508
|
+
type: Type15;
|
1431
1509
|
input: Input5;
|
1432
1510
|
result: Result2;
|
1433
1511
|
events: Events2;
|
@@ -1450,7 +1528,7 @@ export interface Attachments {
|
|
1450
1528
|
* Limit encontered by sample.
|
1451
1529
|
*/
|
1452
1530
|
export interface EvalSampleLimit {
|
1453
|
-
type:
|
1531
|
+
type: Type16;
|
1454
1532
|
limit: Limit2;
|
1455
1533
|
}
|
1456
1534
|
/**
|
@@ -0,0 +1,174 @@
|
|
1
|
+
import clsx from "clsx";
|
2
|
+
import { FC } from "react";
|
3
|
+
import { SpanBeginEvent } from "../../../@types/log";
|
4
|
+
import { formatDateTime } from "../../../utils/format";
|
5
|
+
import { EventPanel } from "./event/EventPanel";
|
6
|
+
import { TranscriptComponent } from "./TranscriptView";
|
7
|
+
import { kSandboxSignalName } from "./transform/fixups";
|
8
|
+
import { EventNode } from "./types";
|
9
|
+
|
10
|
+
interface SpanEventViewProps {
|
11
|
+
id: string;
|
12
|
+
event: SpanBeginEvent;
|
13
|
+
children: EventNode[];
|
14
|
+
className?: string | string[];
|
15
|
+
}
|
16
|
+
|
17
|
+
/**
|
18
|
+
* Renders the SpanEventView component.
|
19
|
+
*/
|
20
|
+
export const SpanEventView: FC<SpanEventViewProps> = ({
|
21
|
+
id,
|
22
|
+
event,
|
23
|
+
children,
|
24
|
+
className,
|
25
|
+
}) => {
|
26
|
+
const descriptor = spanDescriptor(event);
|
27
|
+
const title =
|
28
|
+
descriptor.name ||
|
29
|
+
`${event.type ? event.type + ": " : "Step: "}${event.name}`;
|
30
|
+
const text = summarize(children);
|
31
|
+
|
32
|
+
return (
|
33
|
+
<EventPanel
|
34
|
+
id={`span-${event.name}-${id}`}
|
35
|
+
className={clsx("transcript-span", className)}
|
36
|
+
title={title}
|
37
|
+
subTitle={formatDateTime(new Date(event.timestamp))}
|
38
|
+
text={text}
|
39
|
+
collapse={descriptor.collapse}
|
40
|
+
icon={descriptor.icon}
|
41
|
+
>
|
42
|
+
<TranscriptComponent
|
43
|
+
id={`span|${event.name}|${id}`}
|
44
|
+
eventNodes={children}
|
45
|
+
/>
|
46
|
+
</EventPanel>
|
47
|
+
);
|
48
|
+
};
|
49
|
+
|
50
|
+
const summarize = (children: EventNode[]) => {
|
51
|
+
if (children.length === 0) {
|
52
|
+
return "(no events)";
|
53
|
+
}
|
54
|
+
|
55
|
+
const formatEvent = (event: string, count: number) => {
|
56
|
+
if (count === 1) {
|
57
|
+
return `${count} ${event} event`;
|
58
|
+
} else {
|
59
|
+
return `${count} ${event} events`;
|
60
|
+
}
|
61
|
+
};
|
62
|
+
|
63
|
+
// Count the types
|
64
|
+
const typeCount: Record<string, number> = {};
|
65
|
+
children.forEach((child) => {
|
66
|
+
const currentCount = typeCount[child.event.event] || 0;
|
67
|
+
typeCount[child.event.event] = currentCount + 1;
|
68
|
+
});
|
69
|
+
|
70
|
+
// Try to summarize event types
|
71
|
+
const numberOfTypes = Object.keys(typeCount).length;
|
72
|
+
if (numberOfTypes < 3) {
|
73
|
+
return Object.keys(typeCount)
|
74
|
+
.map((key) => {
|
75
|
+
return formatEvent(key, typeCount[key]);
|
76
|
+
})
|
77
|
+
.join(", ");
|
78
|
+
}
|
79
|
+
|
80
|
+
// To many types, just return the number of events
|
81
|
+
if (children.length === 1) {
|
82
|
+
return "1 event";
|
83
|
+
} else {
|
84
|
+
return `${children.length} events`;
|
85
|
+
}
|
86
|
+
};
|
87
|
+
|
88
|
+
/**
|
89
|
+
* Returns a descriptor object containing icon and style based on the event type and name.
|
90
|
+
*/
|
91
|
+
const spanDescriptor = (
|
92
|
+
event: SpanBeginEvent,
|
93
|
+
): { icon?: string; name?: string; endSpace?: boolean; collapse?: boolean } => {
|
94
|
+
const rootStepDescriptor = {
|
95
|
+
endSpace: true,
|
96
|
+
};
|
97
|
+
|
98
|
+
if (event.type === "solver") {
|
99
|
+
switch (event.name) {
|
100
|
+
case "chain_of_thought":
|
101
|
+
return {
|
102
|
+
...rootStepDescriptor,
|
103
|
+
collapse: false,
|
104
|
+
};
|
105
|
+
case "generate":
|
106
|
+
return {
|
107
|
+
...rootStepDescriptor,
|
108
|
+
collapse: false,
|
109
|
+
};
|
110
|
+
case "self_critique":
|
111
|
+
return {
|
112
|
+
...rootStepDescriptor,
|
113
|
+
collapse: false,
|
114
|
+
};
|
115
|
+
case "system_message":
|
116
|
+
return {
|
117
|
+
...rootStepDescriptor,
|
118
|
+
collapse: true,
|
119
|
+
};
|
120
|
+
case "use_tools":
|
121
|
+
return {
|
122
|
+
...rootStepDescriptor,
|
123
|
+
collapse: false,
|
124
|
+
};
|
125
|
+
case "multiple_choice":
|
126
|
+
return {
|
127
|
+
...rootStepDescriptor,
|
128
|
+
collapse: false,
|
129
|
+
};
|
130
|
+
default:
|
131
|
+
return {
|
132
|
+
...rootStepDescriptor,
|
133
|
+
collapse: false,
|
134
|
+
};
|
135
|
+
}
|
136
|
+
} else if (event.type === "scorer") {
|
137
|
+
return {
|
138
|
+
...rootStepDescriptor,
|
139
|
+
collapse: false,
|
140
|
+
};
|
141
|
+
} else if (event.event === "span_begin") {
|
142
|
+
if (event.span_id === kSandboxSignalName) {
|
143
|
+
return {
|
144
|
+
...rootStepDescriptor,
|
145
|
+
name: "Sandbox Events",
|
146
|
+
collapse: true,
|
147
|
+
};
|
148
|
+
} else if (event.name === "init") {
|
149
|
+
return {
|
150
|
+
...rootStepDescriptor,
|
151
|
+
name: "Init",
|
152
|
+
collapse: true,
|
153
|
+
};
|
154
|
+
} else {
|
155
|
+
return {
|
156
|
+
...rootStepDescriptor,
|
157
|
+
collapse: false,
|
158
|
+
};
|
159
|
+
}
|
160
|
+
} else {
|
161
|
+
switch (event.name) {
|
162
|
+
case "sample_init":
|
163
|
+
return {
|
164
|
+
...rootStepDescriptor,
|
165
|
+
name: "Sample Init",
|
166
|
+
collapse: true,
|
167
|
+
};
|
168
|
+
default:
|
169
|
+
return {
|
170
|
+
endSpace: false,
|
171
|
+
};
|
172
|
+
}
|
173
|
+
}
|
174
|
+
};
|
@@ -4,7 +4,7 @@ import { resolveToolInput } from "../chat/tools/tool";
|
|
4
4
|
import { ToolCallView } from "../chat/tools/ToolCallView";
|
5
5
|
import { ApprovalEventView } from "./ApprovalEventView";
|
6
6
|
import { EventPanel } from "./event/EventPanel";
|
7
|
-
import {
|
7
|
+
import { TranscriptComponent } from "./TranscriptView";
|
8
8
|
|
9
9
|
import clsx from "clsx";
|
10
10
|
import { FC, useMemo } from "react";
|
@@ -12,11 +12,12 @@ import { PulsingDots } from "../../../components/PulsingDots";
|
|
12
12
|
import { ChatView } from "../chat/ChatView";
|
13
13
|
import { formatTiming, formatTitle } from "./event/utils";
|
14
14
|
import styles from "./ToolEventView.module.css";
|
15
|
+
import { EventNode } from "./types";
|
15
16
|
|
16
17
|
interface ToolEventViewProps {
|
17
18
|
id: string;
|
18
19
|
event: ToolEvent;
|
19
|
-
|
20
|
+
children: EventNode[];
|
20
21
|
className?: string | string[];
|
21
22
|
}
|
22
23
|
|
@@ -26,7 +27,7 @@ interface ToolEventViewProps {
|
|
26
27
|
export const ToolEventView: FC<ToolEventViewProps> = ({
|
27
28
|
id,
|
28
29
|
event,
|
29
|
-
|
30
|
+
children,
|
30
31
|
className,
|
31
32
|
}) => {
|
32
33
|
// Extract tool input
|
@@ -92,13 +93,12 @@ export const ToolEventView: FC<ToolEventViewProps> = ({
|
|
92
93
|
</div>
|
93
94
|
) : undefined}
|
94
95
|
</div>
|
95
|
-
{
|
96
|
-
<
|
97
|
-
id={`${id}-subtask`}
|
96
|
+
{children.length > 0 ? (
|
97
|
+
<TranscriptComponent
|
98
98
|
data-name="Transcript"
|
99
|
+
id={`${id}-subtask`}
|
100
|
+
eventNodes={children}
|
99
101
|
data-default={event.failed || event.agent ? true : null}
|
100
|
-
events={event.events}
|
101
|
-
depth={depth + 1}
|
102
102
|
/>
|
103
103
|
) : (
|
104
104
|
""
|
@@ -17,6 +17,7 @@ import { ToolEventView } from "./ToolEventView";
|
|
17
17
|
import { EventNode } from "./types";
|
18
18
|
|
19
19
|
import clsx from "clsx";
|
20
|
+
import { SpanEventView } from "./SpanEventView";
|
20
21
|
import styles from "./TranscriptView.module.css";
|
21
22
|
import { TranscriptVirtualListComponent } from "./TranscriptVirtualListComponent";
|
22
23
|
import { fixupEventStream } from "./transform/fixups";
|
@@ -64,7 +65,6 @@ export const TranscriptVirtualList: FC<TranscriptVirtualListProps> = memo(
|
|
64
65
|
const eventNodes = useMemo(() => {
|
65
66
|
const resolvedEvents = fixupEventStream(events, !running);
|
66
67
|
const eventNodes = treeifyEvents(resolvedEvents, depth || 0);
|
67
|
-
|
68
68
|
return eventNodes;
|
69
69
|
}, [events, depth]);
|
70
70
|
|
@@ -201,6 +201,16 @@ export const RenderedEventNode: FC<RenderedEventNodeProps> = memo(
|
|
201
201
|
<StateEventView id={id} event={node.event} className={className} />
|
202
202
|
);
|
203
203
|
|
204
|
+
case "span_begin":
|
205
|
+
return (
|
206
|
+
<SpanEventView
|
207
|
+
id={id}
|
208
|
+
event={node.event}
|
209
|
+
children={node.children}
|
210
|
+
className={className}
|
211
|
+
/>
|
212
|
+
);
|
213
|
+
|
204
214
|
case "step":
|
205
215
|
return (
|
206
216
|
<StepEventView
|
@@ -237,7 +247,7 @@ export const RenderedEventNode: FC<RenderedEventNodeProps> = memo(
|
|
237
247
|
id={id}
|
238
248
|
event={node.event}
|
239
249
|
className={className}
|
240
|
-
|
250
|
+
children={node.children}
|
241
251
|
/>
|
242
252
|
);
|
243
253
|
|
@@ -9,7 +9,6 @@ import {
|
|
9
9
|
import { ApplicationIcons } from "../../../appearance/icons";
|
10
10
|
import { EventNavs } from "./EventNavs";
|
11
11
|
|
12
|
-
import { ProgressBar } from "../../../../components/ProgressBar";
|
13
12
|
import { useProperty } from "../../../../state/hooks";
|
14
13
|
import styles from "./EventPanel.module.css";
|
15
14
|
|
@@ -41,7 +40,6 @@ export const EventPanel: FC<EventPanelProps> = ({
|
|
41
40
|
icon,
|
42
41
|
collapse,
|
43
42
|
children,
|
44
|
-
running,
|
45
43
|
}) => {
|
46
44
|
const [isCollapsed, setCollapsed] = useProperty(id, "collapsed", {
|
47
45
|
defaultValue: !!collapse,
|
@@ -191,7 +189,6 @@ export const EventPanel: FC<EventPanelProps> = ({
|
|
191
189
|
})}
|
192
190
|
</div>
|
193
191
|
</div>
|
194
|
-
<ProgressBar animating={!!running} />
|
195
192
|
</>
|
196
193
|
);
|
197
194
|
return card;
|