inspect-ai 0.3.93__py3-none-any.whl → 0.3.95__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_display/textual/widgets/samples.py +3 -3
- inspect_ai/_display/textual/widgets/transcript.py +3 -29
- inspect_ai/_eval/loader.py +1 -1
- inspect_ai/_eval/task/run.py +21 -12
- inspect_ai/_util/answer.py +26 -0
- inspect_ai/_util/constants.py +0 -1
- inspect_ai/_util/exception.py +4 -0
- inspect_ai/_util/hash.py +39 -0
- inspect_ai/_util/local_server.py +51 -21
- inspect_ai/_util/path.py +22 -0
- inspect_ai/_util/trace.py +1 -1
- inspect_ai/_util/working.py +4 -0
- inspect_ai/_view/www/dist/assets/index.css +23 -22
- inspect_ai/_view/www/dist/assets/index.js +517 -204
- inspect_ai/_view/www/log-schema.json +375 -0
- inspect_ai/_view/www/package.json +1 -1
- inspect_ai/_view/www/src/@types/log.d.ts +90 -12
- inspect_ai/_view/www/src/app/log-view/navbar/SecondaryBar.tsx +2 -2
- inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +1 -4
- inspect_ai/_view/www/src/app/samples/SamplesTools.tsx +3 -13
- inspect_ai/_view/www/src/app/samples/sample-tools/SelectScorer.tsx +45 -48
- inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +16 -15
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +47 -75
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +9 -9
- inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
- inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
- inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
- inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
- inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
- inspect_ai/_view/www/src/app/types.ts +12 -2
- inspect_ai/_view/www/src/components/ExpandablePanel.module.css +1 -1
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +5 -5
- inspect_ai/_view/www/src/state/hooks.ts +19 -3
- inspect_ai/_view/www/src/state/logSlice.ts +23 -5
- inspect_ai/_view/www/yarn.lock +9 -9
- inspect_ai/agent/_as_solver.py +3 -1
- inspect_ai/agent/_as_tool.py +6 -4
- inspect_ai/agent/_bridge/patch.py +1 -3
- inspect_ai/agent/_handoff.py +5 -1
- inspect_ai/agent/_react.py +4 -3
- inspect_ai/agent/_run.py +6 -1
- inspect_ai/agent/_types.py +9 -0
- inspect_ai/analysis/__init__.py +0 -0
- inspect_ai/analysis/beta/__init__.py +57 -0
- inspect_ai/analysis/beta/_dataframe/__init__.py +0 -0
- inspect_ai/analysis/beta/_dataframe/columns.py +145 -0
- inspect_ai/analysis/beta/_dataframe/evals/__init__.py +0 -0
- inspect_ai/analysis/beta/_dataframe/evals/columns.py +132 -0
- inspect_ai/analysis/beta/_dataframe/evals/extract.py +23 -0
- inspect_ai/analysis/beta/_dataframe/evals/table.py +140 -0
- inspect_ai/analysis/beta/_dataframe/events/__init__.py +0 -0
- inspect_ai/analysis/beta/_dataframe/events/columns.py +37 -0
- inspect_ai/analysis/beta/_dataframe/events/table.py +14 -0
- inspect_ai/analysis/beta/_dataframe/extract.py +54 -0
- inspect_ai/analysis/beta/_dataframe/messages/__init__.py +0 -0
- inspect_ai/analysis/beta/_dataframe/messages/columns.py +60 -0
- inspect_ai/analysis/beta/_dataframe/messages/extract.py +21 -0
- inspect_ai/analysis/beta/_dataframe/messages/table.py +87 -0
- inspect_ai/analysis/beta/_dataframe/record.py +377 -0
- inspect_ai/analysis/beta/_dataframe/samples/__init__.py +0 -0
- inspect_ai/analysis/beta/_dataframe/samples/columns.py +73 -0
- inspect_ai/analysis/beta/_dataframe/samples/extract.py +82 -0
- inspect_ai/analysis/beta/_dataframe/samples/table.py +329 -0
- inspect_ai/analysis/beta/_dataframe/util.py +157 -0
- inspect_ai/analysis/beta/_dataframe/validate.py +171 -0
- inspect_ai/dataset/_dataset.py +6 -3
- inspect_ai/log/__init__.py +10 -0
- inspect_ai/log/_convert.py +4 -9
- inspect_ai/log/_file.py +1 -1
- inspect_ai/log/_log.py +21 -1
- inspect_ai/log/_samples.py +14 -17
- inspect_ai/log/_transcript.py +77 -35
- inspect_ai/log/_tree.py +118 -0
- inspect_ai/model/_call_tools.py +44 -35
- inspect_ai/model/_model.py +51 -44
- inspect_ai/model/_openai_responses.py +17 -18
- inspect_ai/model/_providers/anthropic.py +30 -5
- inspect_ai/model/_providers/hf.py +27 -1
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/model/_providers/sglang.py +8 -2
- inspect_ai/model/_providers/vllm.py +6 -2
- inspect_ai/scorer/_choice.py +1 -2
- inspect_ai/solver/_chain.py +1 -1
- inspect_ai/solver/_fork.py +1 -1
- inspect_ai/solver/_multiple_choice.py +9 -23
- inspect_ai/solver/_plan.py +2 -2
- inspect_ai/solver/_task_state.py +7 -3
- inspect_ai/solver/_transcript.py +6 -7
- inspect_ai/tool/_mcp/_context.py +3 -5
- inspect_ai/tool/_mcp/_mcp.py +6 -5
- inspect_ai/tool/_mcp/server.py +1 -1
- inspect_ai/tool/_tools/_execute.py +4 -1
- inspect_ai/tool/_tools/_think.py +1 -1
- inspect_ai/tool/_tools/_web_search/__init__.py +3 -0
- inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} +56 -103
- inspect_ai/tool/_tools/_web_search/_tavily.py +77 -0
- inspect_ai/tool/_tools/_web_search/_web_search.py +85 -0
- inspect_ai/util/__init__.py +4 -0
- inspect_ai/util/_anyio.py +11 -0
- inspect_ai/util/_collect.py +50 -0
- inspect_ai/util/_sandbox/events.py +3 -2
- inspect_ai/util/_span.py +58 -0
- inspect_ai/util/_subtask.py +27 -42
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/METADATA +8 -1
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/RECORD +114 -82
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/WHEEL +1 -1
- inspect_ai/_display/core/group.py +0 -79
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/top_level.txt +0 -0
@@ -316,6 +316,7 @@ export type Explanation = string | null;
|
|
316
316
|
export type Metadata6 = {
|
317
317
|
[k: string]: unknown;
|
318
318
|
} | null;
|
319
|
+
export type SpanId = string | null;
|
319
320
|
export type Timestamp = string;
|
320
321
|
export type WorkingStart = number;
|
321
322
|
export type Pending = boolean | null;
|
@@ -339,6 +340,7 @@ export type Files1 = {
|
|
339
340
|
} | null;
|
340
341
|
export type Setup1 = string | null;
|
341
342
|
export type JsonValue = unknown;
|
343
|
+
export type SpanId1 = string | null;
|
342
344
|
export type Timestamp1 = string;
|
343
345
|
export type WorkingStart1 = number;
|
344
346
|
export type Pending1 = boolean | null;
|
@@ -352,6 +354,7 @@ export type Type10 =
|
|
352
354
|
| "custom";
|
353
355
|
export type Message2 = string;
|
354
356
|
export type Limit1 = number | null;
|
357
|
+
export type SpanId2 = string | null;
|
355
358
|
export type Timestamp2 = string;
|
356
359
|
export type WorkingStart2 = number;
|
357
360
|
export type Pending2 = boolean | null;
|
@@ -366,6 +369,7 @@ export type Input2 = string | null;
|
|
366
369
|
export type Result = number | null;
|
367
370
|
export type Output = string | null;
|
368
371
|
export type Completed = string | null;
|
372
|
+
export type SpanId3 = string | null;
|
369
373
|
export type Timestamp3 = string;
|
370
374
|
export type WorkingStart3 = number;
|
371
375
|
export type Pending3 = boolean | null;
|
@@ -374,11 +378,13 @@ export type Op = "remove" | "add" | "replace" | "move" | "test" | "copy";
|
|
374
378
|
export type Path = string;
|
375
379
|
export type From = string | null;
|
376
380
|
export type Changes = JsonChange[];
|
381
|
+
export type SpanId4 = string | null;
|
377
382
|
export type Timestamp4 = string;
|
378
383
|
export type WorkingStart4 = number;
|
379
384
|
export type Pending4 = boolean | null;
|
380
385
|
export type Event4 = "store";
|
381
386
|
export type Changes1 = JsonChange[];
|
387
|
+
export type SpanId5 = string | null;
|
382
388
|
export type Timestamp5 = string;
|
383
389
|
export type WorkingStart5 = number;
|
384
390
|
export type Pending5 = boolean | null;
|
@@ -399,11 +405,13 @@ export type Additionalproperties1 = boolean;
|
|
399
405
|
export type Tools1 = ToolInfo[];
|
400
406
|
export type ToolChoice = ("auto" | "any" | "none") | ToolFunction;
|
401
407
|
export type Name9 = string;
|
408
|
+
export type Retries = number | null;
|
402
409
|
export type Error1 = string | null;
|
403
410
|
export type Cache = ("read" | "write") | null;
|
404
411
|
export type Time1 = number | null;
|
405
412
|
export type Completed1 = string | null;
|
406
413
|
export type WorkingTime = number | null;
|
414
|
+
export type SpanId6 = string | null;
|
407
415
|
export type Timestamp6 = string;
|
408
416
|
export type WorkingStart6 = number;
|
409
417
|
export type Pending6 = boolean | null;
|
@@ -428,6 +436,7 @@ export type Result1 =
|
|
428
436
|
| ContentVideo
|
429
437
|
)[];
|
430
438
|
export type Truncated = [unknown, unknown] | null;
|
439
|
+
export type SpanId7 = string | null;
|
431
440
|
export type Timestamp7 = string;
|
432
441
|
export type WorkingStart7 = number;
|
433
442
|
export type Pending7 = boolean | null;
|
@@ -441,22 +450,26 @@ export type Decision =
|
|
441
450
|
| "escalate"
|
442
451
|
| "terminate";
|
443
452
|
export type Explanation1 = string | null;
|
453
|
+
export type SpanId8 = string | null;
|
444
454
|
export type Timestamp8 = string;
|
445
455
|
export type WorkingStart8 = number;
|
446
456
|
export type Pending8 = boolean | null;
|
447
457
|
export type Event8 = "input";
|
448
458
|
export type Input4 = string;
|
449
459
|
export type InputAnsi = string;
|
460
|
+
export type SpanId9 = string | null;
|
450
461
|
export type Timestamp9 = string;
|
451
462
|
export type WorkingStart9 = number;
|
452
463
|
export type Pending9 = boolean | null;
|
453
464
|
export type Event9 = "score";
|
454
465
|
export type Target2 = string | string[] | null;
|
455
466
|
export type Intermediate = boolean;
|
467
|
+
export type SpanId10 = string | null;
|
456
468
|
export type Timestamp10 = string;
|
457
469
|
export type WorkingStart10 = number;
|
458
470
|
export type Pending10 = boolean | null;
|
459
471
|
export type Event10 = "error";
|
472
|
+
export type SpanId11 = string | null;
|
460
473
|
export type Timestamp11 = string;
|
461
474
|
export type WorkingStart11 = number;
|
462
475
|
export type Pending11 = boolean | null;
|
@@ -476,24 +489,42 @@ export type Created1 = number;
|
|
476
489
|
export type Filename = string;
|
477
490
|
export type Module = string;
|
478
491
|
export type Lineno = number;
|
492
|
+
export type SpanId12 = string | null;
|
479
493
|
export type Timestamp12 = string;
|
480
494
|
export type WorkingStart12 = number;
|
481
495
|
export type Pending12 = boolean | null;
|
482
496
|
export type Event12 = "info";
|
483
497
|
export type Source4 = string | null;
|
498
|
+
export type SpanId13 = string | null;
|
484
499
|
export type Timestamp13 = string;
|
485
500
|
export type WorkingStart13 = number;
|
486
501
|
export type Pending13 = boolean | null;
|
487
|
-
export type Event13 = "
|
488
|
-
export type
|
502
|
+
export type Event13 = "span_begin";
|
503
|
+
export type Id8 = string;
|
504
|
+
export type ParentId = string | null;
|
489
505
|
export type Type13 = string | null;
|
490
506
|
export type Name11 = string;
|
507
|
+
export type SpanId14 = string | null;
|
491
508
|
export type Timestamp14 = string;
|
492
509
|
export type WorkingStart14 = number;
|
493
510
|
export type Pending14 = boolean | null;
|
494
|
-
export type Event14 = "
|
495
|
-
export type
|
511
|
+
export type Event14 = "span_end";
|
512
|
+
export type Id9 = string;
|
513
|
+
export type SpanId15 = string | null;
|
514
|
+
export type Timestamp15 = string;
|
515
|
+
export type WorkingStart15 = number;
|
516
|
+
export type Pending15 = boolean | null;
|
517
|
+
export type Event15 = "step";
|
518
|
+
export type Action1 = "begin" | "end";
|
496
519
|
export type Type14 = string | null;
|
520
|
+
export type Name12 = string;
|
521
|
+
export type SpanId16 = string | null;
|
522
|
+
export type Timestamp16 = string;
|
523
|
+
export type WorkingStart16 = number;
|
524
|
+
export type Pending16 = boolean | null;
|
525
|
+
export type Event16 = "subtask";
|
526
|
+
export type Name13 = string;
|
527
|
+
export type Type15 = string | null;
|
497
528
|
export type Events2 = (
|
498
529
|
| SampleInitEvent
|
499
530
|
| SampleLimitEvent
|
@@ -508,6 +539,8 @@ export type Events2 = (
|
|
508
539
|
| ErrorEvent
|
509
540
|
| LoggerEvent
|
510
541
|
| InfoEvent
|
542
|
+
| SpanBeginEvent
|
543
|
+
| SpanEndEvent
|
511
544
|
| StepEvent
|
512
545
|
| SubtaskEvent
|
513
546
|
)[];
|
@@ -527,6 +560,8 @@ export type Events1 = (
|
|
527
560
|
| ErrorEvent
|
528
561
|
| LoggerEvent
|
529
562
|
| InfoEvent
|
563
|
+
| SpanBeginEvent
|
564
|
+
| SpanEndEvent
|
530
565
|
| StepEvent
|
531
566
|
| SubtaskEvent
|
532
567
|
)[];
|
@@ -548,6 +583,8 @@ export type Events = (
|
|
548
583
|
| ErrorEvent
|
549
584
|
| LoggerEvent
|
550
585
|
| InfoEvent
|
586
|
+
| SpanBeginEvent
|
587
|
+
| SpanEndEvent
|
551
588
|
| StepEvent
|
552
589
|
| SubtaskEvent
|
553
590
|
)[];
|
@@ -555,7 +592,7 @@ export type TotalTime = number | null;
|
|
555
592
|
export type WorkingTime3 = number | null;
|
556
593
|
export type Uuid = string | null;
|
557
594
|
export type ErrorRetries = EvalError[] | null;
|
558
|
-
export type
|
595
|
+
export type Type16 =
|
559
596
|
| "context"
|
560
597
|
| "time"
|
561
598
|
| "working"
|
@@ -1121,6 +1158,7 @@ export interface Store {
|
|
1121
1158
|
* Beginning of processing a Sample.
|
1122
1159
|
*/
|
1123
1160
|
export interface SampleInitEvent {
|
1161
|
+
span_id: SpanId;
|
1124
1162
|
timestamp: Timestamp;
|
1125
1163
|
working_start: WorkingStart;
|
1126
1164
|
pending: Pending;
|
@@ -1145,6 +1183,7 @@ export interface Sample {
|
|
1145
1183
|
* The sample was unable to finish processing due to a limit
|
1146
1184
|
*/
|
1147
1185
|
export interface SampleLimitEvent {
|
1186
|
+
span_id: SpanId1;
|
1148
1187
|
timestamp: Timestamp1;
|
1149
1188
|
working_start: WorkingStart1;
|
1150
1189
|
pending: Pending1;
|
@@ -1157,6 +1196,7 @@ export interface SampleLimitEvent {
|
|
1157
1196
|
* Sandbox execution or I/O
|
1158
1197
|
*/
|
1159
1198
|
export interface SandboxEvent {
|
1199
|
+
span_id: SpanId2;
|
1160
1200
|
timestamp: Timestamp2;
|
1161
1201
|
working_start: WorkingStart2;
|
1162
1202
|
pending: Pending2;
|
@@ -1174,6 +1214,7 @@ export interface SandboxEvent {
|
|
1174
1214
|
* Change to the current `TaskState`
|
1175
1215
|
*/
|
1176
1216
|
export interface StateEvent {
|
1217
|
+
span_id: SpanId3;
|
1177
1218
|
timestamp: Timestamp3;
|
1178
1219
|
working_start: WorkingStart3;
|
1179
1220
|
pending: Pending3;
|
@@ -1198,6 +1239,7 @@ export interface JsonChange {
|
|
1198
1239
|
* Change to data within the current `Store`.
|
1199
1240
|
*/
|
1200
1241
|
export interface StoreEvent {
|
1242
|
+
span_id: SpanId4;
|
1201
1243
|
timestamp: Timestamp4;
|
1202
1244
|
working_start: WorkingStart4;
|
1203
1245
|
pending: Pending4;
|
@@ -1208,6 +1250,7 @@ export interface StoreEvent {
|
|
1208
1250
|
* Call to a language model.
|
1209
1251
|
*/
|
1210
1252
|
export interface ModelEvent {
|
1253
|
+
span_id: SpanId5;
|
1211
1254
|
timestamp: Timestamp5;
|
1212
1255
|
working_start: WorkingStart5;
|
1213
1256
|
pending: Pending5;
|
@@ -1219,6 +1262,7 @@ export interface ModelEvent {
|
|
1219
1262
|
tool_choice: ToolChoice;
|
1220
1263
|
config: GenerateConfig;
|
1221
1264
|
output: ModelOutput;
|
1265
|
+
retries: Retries;
|
1222
1266
|
error: Error1;
|
1223
1267
|
cache: Cache;
|
1224
1268
|
call: ModelCall | null;
|
@@ -1289,6 +1333,7 @@ export interface Response {
|
|
1289
1333
|
* Call to a tool.
|
1290
1334
|
*/
|
1291
1335
|
export interface ToolEvent {
|
1336
|
+
span_id: SpanId6;
|
1292
1337
|
timestamp: Timestamp6;
|
1293
1338
|
working_start: WorkingStart6;
|
1294
1339
|
pending: Pending6;
|
@@ -1315,6 +1360,7 @@ export interface Arguments1 {
|
|
1315
1360
|
* Tool approval.
|
1316
1361
|
*/
|
1317
1362
|
export interface ApprovalEvent {
|
1363
|
+
span_id: SpanId7;
|
1318
1364
|
timestamp: Timestamp7;
|
1319
1365
|
working_start: WorkingStart7;
|
1320
1366
|
pending: Pending7;
|
@@ -1341,6 +1387,7 @@ export interface ToolCallView {
|
|
1341
1387
|
* Input screen interaction.
|
1342
1388
|
*/
|
1343
1389
|
export interface InputEvent {
|
1390
|
+
span_id: SpanId8;
|
1344
1391
|
timestamp: Timestamp8;
|
1345
1392
|
working_start: WorkingStart8;
|
1346
1393
|
pending: Pending8;
|
@@ -1355,6 +1402,7 @@ export interface InputEvent {
|
|
1355
1402
|
* resulting from a call to `score`.
|
1356
1403
|
*/
|
1357
1404
|
export interface ScoreEvent {
|
1405
|
+
span_id: SpanId9;
|
1358
1406
|
timestamp: Timestamp9;
|
1359
1407
|
working_start: WorkingStart9;
|
1360
1408
|
pending: Pending9;
|
@@ -1367,6 +1415,7 @@ export interface ScoreEvent {
|
|
1367
1415
|
* Event with sample error.
|
1368
1416
|
*/
|
1369
1417
|
export interface ErrorEvent {
|
1418
|
+
span_id: SpanId10;
|
1370
1419
|
timestamp: Timestamp10;
|
1371
1420
|
working_start: WorkingStart10;
|
1372
1421
|
pending: Pending10;
|
@@ -1377,6 +1426,7 @@ export interface ErrorEvent {
|
|
1377
1426
|
* Log message recorded with Python logger.
|
1378
1427
|
*/
|
1379
1428
|
export interface LoggerEvent {
|
1429
|
+
span_id: SpanId11;
|
1380
1430
|
timestamp: Timestamp11;
|
1381
1431
|
working_start: WorkingStart11;
|
1382
1432
|
pending: Pending11;
|
@@ -1399,6 +1449,7 @@ export interface LoggingMessage {
|
|
1399
1449
|
* Event with custom info/data.
|
1400
1450
|
*/
|
1401
1451
|
export interface InfoEvent {
|
1452
|
+
span_id: SpanId12;
|
1402
1453
|
timestamp: Timestamp12;
|
1403
1454
|
working_start: WorkingStart12;
|
1404
1455
|
pending: Pending12;
|
@@ -1407,27 +1458,54 @@ export interface InfoEvent {
|
|
1407
1458
|
data: JsonValue;
|
1408
1459
|
}
|
1409
1460
|
/**
|
1410
|
-
*
|
1461
|
+
* Mark the beginning of a transcript span.
|
1411
1462
|
*/
|
1412
|
-
export interface
|
1463
|
+
export interface SpanBeginEvent {
|
1464
|
+
span_id: SpanId13;
|
1413
1465
|
timestamp: Timestamp13;
|
1414
1466
|
working_start: WorkingStart13;
|
1415
1467
|
pending: Pending13;
|
1416
1468
|
event: Event13;
|
1417
|
-
|
1469
|
+
id: Id8;
|
1470
|
+
parent_id: ParentId;
|
1418
1471
|
type: Type13;
|
1419
1472
|
name: Name11;
|
1420
1473
|
}
|
1421
1474
|
/**
|
1422
|
-
*
|
1475
|
+
* Mark the end of a transcript span.
|
1423
1476
|
*/
|
1424
|
-
export interface
|
1477
|
+
export interface SpanEndEvent {
|
1478
|
+
span_id: SpanId14;
|
1425
1479
|
timestamp: Timestamp14;
|
1426
1480
|
working_start: WorkingStart14;
|
1427
1481
|
pending: Pending14;
|
1428
1482
|
event: Event14;
|
1429
|
-
|
1483
|
+
id: Id9;
|
1484
|
+
}
|
1485
|
+
/**
|
1486
|
+
* Step within current sample or subtask.
|
1487
|
+
*/
|
1488
|
+
export interface StepEvent {
|
1489
|
+
span_id: SpanId15;
|
1490
|
+
timestamp: Timestamp15;
|
1491
|
+
working_start: WorkingStart15;
|
1492
|
+
pending: Pending15;
|
1493
|
+
event: Event15;
|
1494
|
+
action: Action1;
|
1430
1495
|
type: Type14;
|
1496
|
+
name: Name12;
|
1497
|
+
}
|
1498
|
+
/**
|
1499
|
+
* Subtask spawned.
|
1500
|
+
*/
|
1501
|
+
export interface SubtaskEvent {
|
1502
|
+
span_id: SpanId16;
|
1503
|
+
timestamp: Timestamp16;
|
1504
|
+
working_start: WorkingStart16;
|
1505
|
+
pending: Pending16;
|
1506
|
+
event: Event16;
|
1507
|
+
name: Name13;
|
1508
|
+
type: Type15;
|
1431
1509
|
input: Input5;
|
1432
1510
|
result: Result2;
|
1433
1511
|
events: Events2;
|
@@ -1450,7 +1528,7 @@ export interface Attachments {
|
|
1450
1528
|
* Limit encontered by sample.
|
1451
1529
|
*/
|
1452
1530
|
export interface EvalSampleLimit {
|
1453
|
-
type:
|
1531
|
+
type: Type16;
|
1454
1532
|
limit: Limit2;
|
1455
1533
|
}
|
1456
1534
|
/**
|
@@ -8,7 +8,7 @@ import {
|
|
8
8
|
EvalStats,
|
9
9
|
} from "../../../@types/log";
|
10
10
|
import { EvalDescriptor } from "../../../app/samples/descriptor/types";
|
11
|
-
import {
|
11
|
+
import { sampleFilterItems } from "../../../app/samples/sample-tools/filters";
|
12
12
|
import { ExpandablePanel } from "../../../components/ExpandablePanel";
|
13
13
|
import { LabeledValue } from "../../../components/LabeledValue";
|
14
14
|
import { useEvalDescriptor } from "../../../state/hooks";
|
@@ -181,7 +181,7 @@ const ScorerSummary: FC<ScoreSummaryProps> = ({ evalDescriptor }) => {
|
|
181
181
|
return null;
|
182
182
|
}
|
183
183
|
|
184
|
-
const items =
|
184
|
+
const items = sampleFilterItems(evalDescriptor);
|
185
185
|
return (
|
186
186
|
<span style={{ position: "relative" }}>
|
187
187
|
{Array.from(items).map((item, index, array) => (
|
@@ -50,10 +50,7 @@ export const useSamplesTabConfig = (
|
|
50
50
|
: totalSampleCount === 1
|
51
51
|
? [<ScoreFilterTools />]
|
52
52
|
: [
|
53
|
-
<SampleTools
|
54
|
-
samples={sampleSummaries || []}
|
55
|
-
key="sample-tools"
|
56
|
-
/>,
|
53
|
+
<SampleTools key="sample-tools" />,
|
57
54
|
evalStatus === "started" && !streamSamples && (
|
58
55
|
<ToolButton
|
59
56
|
key="refresh"
|
@@ -1,6 +1,5 @@
|
|
1
1
|
import { FC } from "react";
|
2
2
|
import { Fragment } from "react/jsx-runtime";
|
3
|
-
import { SampleSummary } from "../../client/api/types";
|
4
3
|
import { useScore, useScores } from "../../state/hooks";
|
5
4
|
import { useStore } from "../../state/store";
|
6
5
|
import { EpochFilter } from "./sample-tools/EpochFilter";
|
@@ -8,16 +7,11 @@ import { SampleFilter } from "./sample-tools/sample-filter/SampleFilter";
|
|
8
7
|
import { SelectScorer } from "./sample-tools/SelectScorer";
|
9
8
|
import { SortFilter } from "./sample-tools/SortFilter";
|
10
9
|
|
11
|
-
interface SampleToolsProps {
|
12
|
-
samples: SampleSummary[];
|
13
|
-
}
|
10
|
+
interface SampleToolsProps {}
|
14
11
|
|
15
|
-
export const SampleTools: FC<SampleToolsProps> = (
|
12
|
+
export const SampleTools: FC<SampleToolsProps> = () => {
|
16
13
|
const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
|
17
14
|
|
18
|
-
const filter = useStore((state) => state.log.filter);
|
19
|
-
const setFilter = useStore((state) => state.logActions.setFilter);
|
20
|
-
|
21
15
|
const scores = useScores();
|
22
16
|
const score = useScore();
|
23
17
|
const setScore = useStore((state) => state.logActions.setScore);
|
@@ -29,11 +23,7 @@ export const SampleTools: FC<SampleToolsProps> = ({ samples }) => {
|
|
29
23
|
const epochs = selectedLogSummary?.eval.config.epochs || 1;
|
30
24
|
return (
|
31
25
|
<Fragment>
|
32
|
-
<SampleFilter
|
33
|
-
samples={samples}
|
34
|
-
scoreFilter={filter}
|
35
|
-
setScoreFilter={setFilter}
|
36
|
-
/>
|
26
|
+
<SampleFilter />
|
37
27
|
{scores?.length > 1 ? (
|
38
28
|
<SelectScorer scores={scores} score={score} setScore={setScore} />
|
39
29
|
) : undefined}
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import clsx from "clsx";
|
2
2
|
import { ScoreLabel } from "../../../app/types";
|
3
3
|
|
4
|
-
import { ChangeEvent, FC, useCallback } from "react";
|
4
|
+
import { ChangeEvent, FC, useCallback, useMemo } from "react";
|
5
5
|
import styles from "./SelectScorer.module.css";
|
6
6
|
|
7
7
|
interface SelectScorerProps {
|
@@ -15,23 +15,18 @@ export const SelectScorer: FC<SelectScorerProps> = ({
|
|
15
15
|
score,
|
16
16
|
setScore,
|
17
17
|
}) => {
|
18
|
-
const scorers =
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
(index: number) => {
|
31
|
-
setScore(scores[index]);
|
32
|
-
},
|
33
|
-
[setScore, scores],
|
34
|
-
);
|
18
|
+
const scorers = useMemo(() => {
|
19
|
+
return scores.reduce((accum, scorer) => {
|
20
|
+
if (
|
21
|
+
!accum.find((sc) => {
|
22
|
+
return scorer.scorer === sc.scorer;
|
23
|
+
})
|
24
|
+
) {
|
25
|
+
accum.push(scorer);
|
26
|
+
}
|
27
|
+
return accum;
|
28
|
+
}, [] as ScoreLabel[]);
|
29
|
+
}, [scores]);
|
35
30
|
|
36
31
|
if (scorers.length === 1) {
|
37
32
|
// There is only a single scorer in play, just show the list of available scores
|
@@ -50,8 +45,8 @@ export const SelectScorer: FC<SelectScorerProps> = ({
|
|
50
45
|
</span>
|
51
46
|
<ScoreSelector
|
52
47
|
scores={scores}
|
53
|
-
|
54
|
-
|
48
|
+
selectedScore={score}
|
49
|
+
setSelectedScore={setScore}
|
55
50
|
/>
|
56
51
|
</div>
|
57
52
|
);
|
@@ -79,15 +74,15 @@ export const SelectScorer: FC<SelectScorerProps> = ({
|
|
79
74
|
</span>
|
80
75
|
<ScorerSelector
|
81
76
|
scorers={scorers}
|
82
|
-
|
83
|
-
|
77
|
+
selectedScore={score}
|
78
|
+
setSelectedScore={setScore}
|
84
79
|
/>
|
85
80
|
{scorerScores.length > 1 ? (
|
86
81
|
<ScoreSelector
|
87
82
|
className={clsx(styles.secondSel)}
|
88
83
|
scores={scorerScores}
|
89
|
-
|
90
|
-
|
84
|
+
selectedScore={score}
|
85
|
+
setSelectedScore={setScore}
|
91
86
|
/>
|
92
87
|
) : undefined}
|
93
88
|
</div>
|
@@ -97,25 +92,33 @@ export const SelectScorer: FC<SelectScorerProps> = ({
|
|
97
92
|
|
98
93
|
interface ScoreSelectorProps {
|
99
94
|
scores: ScoreLabel[];
|
100
|
-
|
101
|
-
|
95
|
+
selectedScore?: ScoreLabel;
|
96
|
+
setSelectedScore: (score: ScoreLabel) => void;
|
102
97
|
className?: string | string[];
|
103
98
|
}
|
104
99
|
|
105
100
|
const ScoreSelector: FC<ScoreSelectorProps> = ({
|
106
101
|
scores,
|
107
|
-
|
108
|
-
|
102
|
+
selectedScore,
|
103
|
+
setSelectedScore,
|
109
104
|
className,
|
110
105
|
}) => {
|
111
106
|
const handleChange = useCallback(
|
112
107
|
(e: ChangeEvent<HTMLSelectElement>) => {
|
113
108
|
const sel = e.target as HTMLSelectElement;
|
114
|
-
|
109
|
+
setSelectedScore(scores[sel.selectedIndex]);
|
115
110
|
},
|
116
|
-
[
|
111
|
+
[setSelectedScore, scores],
|
117
112
|
);
|
118
113
|
|
114
|
+
const index = scores.findIndex((sc) => {
|
115
|
+
return (
|
116
|
+
selectedScore &&
|
117
|
+
sc.name === selectedScore.name &&
|
118
|
+
sc.scorer === selectedScore.scorer
|
119
|
+
);
|
120
|
+
});
|
121
|
+
|
119
122
|
return (
|
120
123
|
<select
|
121
124
|
className={clsx(
|
@@ -125,7 +128,7 @@ const ScoreSelector: FC<ScoreSelectorProps> = ({
|
|
125
128
|
className,
|
126
129
|
)}
|
127
130
|
aria-label=".select-scorer-label"
|
128
|
-
value={scores[
|
131
|
+
value={scores[index].name}
|
129
132
|
onChange={handleChange}
|
130
133
|
>
|
131
134
|
{scores.map((score) => {
|
@@ -141,28 +144,32 @@ const ScoreSelector: FC<ScoreSelectorProps> = ({
|
|
141
144
|
|
142
145
|
interface ScorerSelectorProps {
|
143
146
|
scorers: ScoreLabel[];
|
144
|
-
|
145
|
-
|
147
|
+
selectedScore?: ScoreLabel;
|
148
|
+
setSelectedScore: (score: ScoreLabel) => void;
|
146
149
|
}
|
147
150
|
|
148
151
|
const ScorerSelector: FC<ScorerSelectorProps> = ({
|
149
152
|
scorers,
|
150
|
-
|
151
|
-
|
153
|
+
selectedScore,
|
154
|
+
setSelectedScore,
|
152
155
|
}) => {
|
153
156
|
const handleChange = useCallback(
|
154
157
|
(e: ChangeEvent<HTMLSelectElement>) => {
|
155
158
|
const sel = e.target as HTMLSelectElement;
|
156
|
-
|
159
|
+
setSelectedScore(scorers[sel.selectedIndex]);
|
157
160
|
},
|
158
|
-
[
|
161
|
+
[setSelectedScore, scorers],
|
159
162
|
);
|
160
163
|
|
164
|
+
const index = scorers.findIndex((sc) => {
|
165
|
+
return selectedScore && sc.scorer === selectedScore.scorer;
|
166
|
+
});
|
167
|
+
|
161
168
|
return (
|
162
169
|
<select
|
163
170
|
className={clsx("form-select", "form-select-sm", "text-size-smaller")}
|
164
171
|
aria-label=".epoch-filter-label"
|
165
|
-
value={scorers[
|
172
|
+
value={scorers[index].scorer}
|
166
173
|
onChange={handleChange}
|
167
174
|
>
|
168
175
|
{scorers.map((scorer) => {
|
@@ -175,13 +182,3 @@ const ScorerSelector: FC<ScorerSelectorProps> = ({
|
|
175
182
|
</select>
|
176
183
|
);
|
177
184
|
};
|
178
|
-
|
179
|
-
const scoreIndex = (scores: ScoreLabel[], score?: ScoreLabel) =>
|
180
|
-
scores.findIndex((sc) => {
|
181
|
-
return score && sc.name === score.name && sc.scorer === score.scorer;
|
182
|
-
});
|
183
|
-
|
184
|
-
const scorerIndex = (scores: ScoreLabel[], score?: ScoreLabel) =>
|
185
|
-
scores.findIndex((sc) => {
|
186
|
-
return score && sc.scorer === score.scorer;
|
187
|
-
});
|
@@ -1,19 +1,12 @@
|
|
1
1
|
import { compileExpression } from "filtrex";
|
2
2
|
import { Scores1 } from "../../../@types/log";
|
3
|
-
import { ScoreLabel } from "../../../app/types";
|
3
|
+
import { FilterError, ScoreLabel } from "../../../app/types";
|
4
4
|
import { SampleSummary } from "../../../client/api/types";
|
5
5
|
import { kScoreTypeBoolean } from "../../../constants";
|
6
6
|
import { inputString } from "../../../utils/format";
|
7
7
|
import { EvalDescriptor, ScoreDescriptor } from "../descriptor/types";
|
8
8
|
|
9
|
-
export interface
|
10
|
-
from: number;
|
11
|
-
to: number;
|
12
|
-
message: string;
|
13
|
-
severity: "warning" | "error";
|
14
|
-
}
|
15
|
-
|
16
|
-
export interface ScoreFilterItem {
|
9
|
+
export interface SampleFilterItem {
|
17
10
|
shortName?: string;
|
18
11
|
qualifiedName?: string;
|
19
12
|
canonicalName: string;
|
@@ -120,10 +113,10 @@ const sampleVariables = (sample: SampleSummary): Record<string, unknown> => {
|
|
120
113
|
* Child metrics are accessed using dot notation (e.g. `scorer_name.score_name`) or
|
121
114
|
* directly by name when it is unique.
|
122
115
|
*/
|
123
|
-
export const
|
116
|
+
export const sampleFilterItems = (
|
124
117
|
evalDescriptor: EvalDescriptor,
|
125
|
-
):
|
126
|
-
const items:
|
118
|
+
): SampleFilterItem[] => {
|
119
|
+
const items: SampleFilterItem[] = [];
|
127
120
|
const bannedShortNames = bannedShortScoreNames(evalDescriptor.scores);
|
128
121
|
const valueToString = (value: unknown) =>
|
129
122
|
typeof value === "string" ? `"${value}"` : String(value);
|
@@ -296,8 +289,13 @@ export const filterSamples = (
|
|
296
289
|
evalDescriptor: EvalDescriptor,
|
297
290
|
samples: SampleSummary[],
|
298
291
|
filterValue: string,
|
299
|
-
): {
|
300
|
-
|
292
|
+
): {
|
293
|
+
result: SampleSummary[];
|
294
|
+
error: FilterError | undefined;
|
295
|
+
allErrors: boolean;
|
296
|
+
} => {
|
297
|
+
let error = undefined;
|
298
|
+
let errorCount = 0;
|
301
299
|
const result = samples.filter((sample) => {
|
302
300
|
if (filterValue) {
|
303
301
|
const { matches, error: sampleError } = filterExpression(
|
@@ -306,10 +304,13 @@ export const filterSamples = (
|
|
306
304
|
filterValue,
|
307
305
|
);
|
308
306
|
error ||= sampleError;
|
307
|
+
if (sampleError) {
|
308
|
+
errorCount++;
|
309
|
+
}
|
309
310
|
return matches;
|
310
311
|
} else {
|
311
312
|
return true;
|
312
313
|
}
|
313
314
|
});
|
314
|
-
return { result, error };
|
315
|
+
return { result, error, allErrors: errorCount === samples.length };
|
315
316
|
};
|