inspect-ai 0.3.93__py3-none-any.whl → 0.3.95__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. inspect_ai/_display/textual/widgets/samples.py +3 -3
  2. inspect_ai/_display/textual/widgets/transcript.py +3 -29
  3. inspect_ai/_eval/loader.py +1 -1
  4. inspect_ai/_eval/task/run.py +21 -12
  5. inspect_ai/_util/answer.py +26 -0
  6. inspect_ai/_util/constants.py +0 -1
  7. inspect_ai/_util/exception.py +4 -0
  8. inspect_ai/_util/hash.py +39 -0
  9. inspect_ai/_util/local_server.py +51 -21
  10. inspect_ai/_util/path.py +22 -0
  11. inspect_ai/_util/trace.py +1 -1
  12. inspect_ai/_util/working.py +4 -0
  13. inspect_ai/_view/www/dist/assets/index.css +23 -22
  14. inspect_ai/_view/www/dist/assets/index.js +517 -204
  15. inspect_ai/_view/www/log-schema.json +375 -0
  16. inspect_ai/_view/www/package.json +1 -1
  17. inspect_ai/_view/www/src/@types/log.d.ts +90 -12
  18. inspect_ai/_view/www/src/app/log-view/navbar/SecondaryBar.tsx +2 -2
  19. inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +1 -4
  20. inspect_ai/_view/www/src/app/samples/SamplesTools.tsx +3 -13
  21. inspect_ai/_view/www/src/app/samples/sample-tools/SelectScorer.tsx +45 -48
  22. inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +16 -15
  23. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +47 -75
  24. inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +9 -9
  25. inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
  26. inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
  27. inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
  28. inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
  29. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
  30. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
  31. inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
  32. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
  33. inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
  34. inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
  35. inspect_ai/_view/www/src/app/types.ts +12 -2
  36. inspect_ai/_view/www/src/components/ExpandablePanel.module.css +1 -1
  37. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +5 -5
  38. inspect_ai/_view/www/src/state/hooks.ts +19 -3
  39. inspect_ai/_view/www/src/state/logSlice.ts +23 -5
  40. inspect_ai/_view/www/yarn.lock +9 -9
  41. inspect_ai/agent/_as_solver.py +3 -1
  42. inspect_ai/agent/_as_tool.py +6 -4
  43. inspect_ai/agent/_bridge/patch.py +1 -3
  44. inspect_ai/agent/_handoff.py +5 -1
  45. inspect_ai/agent/_react.py +4 -3
  46. inspect_ai/agent/_run.py +6 -1
  47. inspect_ai/agent/_types.py +9 -0
  48. inspect_ai/analysis/__init__.py +0 -0
  49. inspect_ai/analysis/beta/__init__.py +57 -0
  50. inspect_ai/analysis/beta/_dataframe/__init__.py +0 -0
  51. inspect_ai/analysis/beta/_dataframe/columns.py +145 -0
  52. inspect_ai/analysis/beta/_dataframe/evals/__init__.py +0 -0
  53. inspect_ai/analysis/beta/_dataframe/evals/columns.py +132 -0
  54. inspect_ai/analysis/beta/_dataframe/evals/extract.py +23 -0
  55. inspect_ai/analysis/beta/_dataframe/evals/table.py +140 -0
  56. inspect_ai/analysis/beta/_dataframe/events/__init__.py +0 -0
  57. inspect_ai/analysis/beta/_dataframe/events/columns.py +37 -0
  58. inspect_ai/analysis/beta/_dataframe/events/table.py +14 -0
  59. inspect_ai/analysis/beta/_dataframe/extract.py +54 -0
  60. inspect_ai/analysis/beta/_dataframe/messages/__init__.py +0 -0
  61. inspect_ai/analysis/beta/_dataframe/messages/columns.py +60 -0
  62. inspect_ai/analysis/beta/_dataframe/messages/extract.py +21 -0
  63. inspect_ai/analysis/beta/_dataframe/messages/table.py +87 -0
  64. inspect_ai/analysis/beta/_dataframe/record.py +377 -0
  65. inspect_ai/analysis/beta/_dataframe/samples/__init__.py +0 -0
  66. inspect_ai/analysis/beta/_dataframe/samples/columns.py +73 -0
  67. inspect_ai/analysis/beta/_dataframe/samples/extract.py +82 -0
  68. inspect_ai/analysis/beta/_dataframe/samples/table.py +329 -0
  69. inspect_ai/analysis/beta/_dataframe/util.py +157 -0
  70. inspect_ai/analysis/beta/_dataframe/validate.py +171 -0
  71. inspect_ai/dataset/_dataset.py +6 -3
  72. inspect_ai/log/__init__.py +10 -0
  73. inspect_ai/log/_convert.py +4 -9
  74. inspect_ai/log/_file.py +1 -1
  75. inspect_ai/log/_log.py +21 -1
  76. inspect_ai/log/_samples.py +14 -17
  77. inspect_ai/log/_transcript.py +77 -35
  78. inspect_ai/log/_tree.py +118 -0
  79. inspect_ai/model/_call_tools.py +44 -35
  80. inspect_ai/model/_model.py +51 -44
  81. inspect_ai/model/_openai_responses.py +17 -18
  82. inspect_ai/model/_providers/anthropic.py +30 -5
  83. inspect_ai/model/_providers/hf.py +27 -1
  84. inspect_ai/model/_providers/providers.py +1 -1
  85. inspect_ai/model/_providers/sglang.py +8 -2
  86. inspect_ai/model/_providers/vllm.py +6 -2
  87. inspect_ai/scorer/_choice.py +1 -2
  88. inspect_ai/solver/_chain.py +1 -1
  89. inspect_ai/solver/_fork.py +1 -1
  90. inspect_ai/solver/_multiple_choice.py +9 -23
  91. inspect_ai/solver/_plan.py +2 -2
  92. inspect_ai/solver/_task_state.py +7 -3
  93. inspect_ai/solver/_transcript.py +6 -7
  94. inspect_ai/tool/_mcp/_context.py +3 -5
  95. inspect_ai/tool/_mcp/_mcp.py +6 -5
  96. inspect_ai/tool/_mcp/server.py +1 -1
  97. inspect_ai/tool/_tools/_execute.py +4 -1
  98. inspect_ai/tool/_tools/_think.py +1 -1
  99. inspect_ai/tool/_tools/_web_search/__init__.py +3 -0
  100. inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} +56 -103
  101. inspect_ai/tool/_tools/_web_search/_tavily.py +77 -0
  102. inspect_ai/tool/_tools/_web_search/_web_search.py +85 -0
  103. inspect_ai/util/__init__.py +4 -0
  104. inspect_ai/util/_anyio.py +11 -0
  105. inspect_ai/util/_collect.py +50 -0
  106. inspect_ai/util/_sandbox/events.py +3 -2
  107. inspect_ai/util/_span.py +58 -0
  108. inspect_ai/util/_subtask.py +27 -42
  109. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/METADATA +8 -1
  110. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/RECORD +114 -82
  111. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/WHEEL +1 -1
  112. inspect_ai/_display/core/group.py +0 -79
  113. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/entry_points.txt +0 -0
  114. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/licenses/LICENSE +0 -0
  115. {inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/top_level.txt +0 -0
@@ -316,6 +316,7 @@ export type Explanation = string | null;
316
316
  export type Metadata6 = {
317
317
  [k: string]: unknown;
318
318
  } | null;
319
+ export type SpanId = string | null;
319
320
  export type Timestamp = string;
320
321
  export type WorkingStart = number;
321
322
  export type Pending = boolean | null;
@@ -339,6 +340,7 @@ export type Files1 = {
339
340
  } | null;
340
341
  export type Setup1 = string | null;
341
342
  export type JsonValue = unknown;
343
+ export type SpanId1 = string | null;
342
344
  export type Timestamp1 = string;
343
345
  export type WorkingStart1 = number;
344
346
  export type Pending1 = boolean | null;
@@ -352,6 +354,7 @@ export type Type10 =
352
354
  | "custom";
353
355
  export type Message2 = string;
354
356
  export type Limit1 = number | null;
357
+ export type SpanId2 = string | null;
355
358
  export type Timestamp2 = string;
356
359
  export type WorkingStart2 = number;
357
360
  export type Pending2 = boolean | null;
@@ -366,6 +369,7 @@ export type Input2 = string | null;
366
369
  export type Result = number | null;
367
370
  export type Output = string | null;
368
371
  export type Completed = string | null;
372
+ export type SpanId3 = string | null;
369
373
  export type Timestamp3 = string;
370
374
  export type WorkingStart3 = number;
371
375
  export type Pending3 = boolean | null;
@@ -374,11 +378,13 @@ export type Op = "remove" | "add" | "replace" | "move" | "test" | "copy";
374
378
  export type Path = string;
375
379
  export type From = string | null;
376
380
  export type Changes = JsonChange[];
381
+ export type SpanId4 = string | null;
377
382
  export type Timestamp4 = string;
378
383
  export type WorkingStart4 = number;
379
384
  export type Pending4 = boolean | null;
380
385
  export type Event4 = "store";
381
386
  export type Changes1 = JsonChange[];
387
+ export type SpanId5 = string | null;
382
388
  export type Timestamp5 = string;
383
389
  export type WorkingStart5 = number;
384
390
  export type Pending5 = boolean | null;
@@ -399,11 +405,13 @@ export type Additionalproperties1 = boolean;
399
405
  export type Tools1 = ToolInfo[];
400
406
  export type ToolChoice = ("auto" | "any" | "none") | ToolFunction;
401
407
  export type Name9 = string;
408
+ export type Retries = number | null;
402
409
  export type Error1 = string | null;
403
410
  export type Cache = ("read" | "write") | null;
404
411
  export type Time1 = number | null;
405
412
  export type Completed1 = string | null;
406
413
  export type WorkingTime = number | null;
414
+ export type SpanId6 = string | null;
407
415
  export type Timestamp6 = string;
408
416
  export type WorkingStart6 = number;
409
417
  export type Pending6 = boolean | null;
@@ -428,6 +436,7 @@ export type Result1 =
428
436
  | ContentVideo
429
437
  )[];
430
438
  export type Truncated = [unknown, unknown] | null;
439
+ export type SpanId7 = string | null;
431
440
  export type Timestamp7 = string;
432
441
  export type WorkingStart7 = number;
433
442
  export type Pending7 = boolean | null;
@@ -441,22 +450,26 @@ export type Decision =
441
450
  | "escalate"
442
451
  | "terminate";
443
452
  export type Explanation1 = string | null;
453
+ export type SpanId8 = string | null;
444
454
  export type Timestamp8 = string;
445
455
  export type WorkingStart8 = number;
446
456
  export type Pending8 = boolean | null;
447
457
  export type Event8 = "input";
448
458
  export type Input4 = string;
449
459
  export type InputAnsi = string;
460
+ export type SpanId9 = string | null;
450
461
  export type Timestamp9 = string;
451
462
  export type WorkingStart9 = number;
452
463
  export type Pending9 = boolean | null;
453
464
  export type Event9 = "score";
454
465
  export type Target2 = string | string[] | null;
455
466
  export type Intermediate = boolean;
467
+ export type SpanId10 = string | null;
456
468
  export type Timestamp10 = string;
457
469
  export type WorkingStart10 = number;
458
470
  export type Pending10 = boolean | null;
459
471
  export type Event10 = "error";
472
+ export type SpanId11 = string | null;
460
473
  export type Timestamp11 = string;
461
474
  export type WorkingStart11 = number;
462
475
  export type Pending11 = boolean | null;
@@ -476,24 +489,42 @@ export type Created1 = number;
476
489
  export type Filename = string;
477
490
  export type Module = string;
478
491
  export type Lineno = number;
492
+ export type SpanId12 = string | null;
479
493
  export type Timestamp12 = string;
480
494
  export type WorkingStart12 = number;
481
495
  export type Pending12 = boolean | null;
482
496
  export type Event12 = "info";
483
497
  export type Source4 = string | null;
498
+ export type SpanId13 = string | null;
484
499
  export type Timestamp13 = string;
485
500
  export type WorkingStart13 = number;
486
501
  export type Pending13 = boolean | null;
487
- export type Event13 = "step";
488
- export type Action1 = "begin" | "end";
502
+ export type Event13 = "span_begin";
503
+ export type Id8 = string;
504
+ export type ParentId = string | null;
489
505
  export type Type13 = string | null;
490
506
  export type Name11 = string;
507
+ export type SpanId14 = string | null;
491
508
  export type Timestamp14 = string;
492
509
  export type WorkingStart14 = number;
493
510
  export type Pending14 = boolean | null;
494
- export type Event14 = "subtask";
495
- export type Name12 = string;
511
+ export type Event14 = "span_end";
512
+ export type Id9 = string;
513
+ export type SpanId15 = string | null;
514
+ export type Timestamp15 = string;
515
+ export type WorkingStart15 = number;
516
+ export type Pending15 = boolean | null;
517
+ export type Event15 = "step";
518
+ export type Action1 = "begin" | "end";
496
519
  export type Type14 = string | null;
520
+ export type Name12 = string;
521
+ export type SpanId16 = string | null;
522
+ export type Timestamp16 = string;
523
+ export type WorkingStart16 = number;
524
+ export type Pending16 = boolean | null;
525
+ export type Event16 = "subtask";
526
+ export type Name13 = string;
527
+ export type Type15 = string | null;
497
528
  export type Events2 = (
498
529
  | SampleInitEvent
499
530
  | SampleLimitEvent
@@ -508,6 +539,8 @@ export type Events2 = (
508
539
  | ErrorEvent
509
540
  | LoggerEvent
510
541
  | InfoEvent
542
+ | SpanBeginEvent
543
+ | SpanEndEvent
511
544
  | StepEvent
512
545
  | SubtaskEvent
513
546
  )[];
@@ -527,6 +560,8 @@ export type Events1 = (
527
560
  | ErrorEvent
528
561
  | LoggerEvent
529
562
  | InfoEvent
563
+ | SpanBeginEvent
564
+ | SpanEndEvent
530
565
  | StepEvent
531
566
  | SubtaskEvent
532
567
  )[];
@@ -548,6 +583,8 @@ export type Events = (
548
583
  | ErrorEvent
549
584
  | LoggerEvent
550
585
  | InfoEvent
586
+ | SpanBeginEvent
587
+ | SpanEndEvent
551
588
  | StepEvent
552
589
  | SubtaskEvent
553
590
  )[];
@@ -555,7 +592,7 @@ export type TotalTime = number | null;
555
592
  export type WorkingTime3 = number | null;
556
593
  export type Uuid = string | null;
557
594
  export type ErrorRetries = EvalError[] | null;
558
- export type Type15 =
595
+ export type Type16 =
559
596
  | "context"
560
597
  | "time"
561
598
  | "working"
@@ -1121,6 +1158,7 @@ export interface Store {
1121
1158
  * Beginning of processing a Sample.
1122
1159
  */
1123
1160
  export interface SampleInitEvent {
1161
+ span_id: SpanId;
1124
1162
  timestamp: Timestamp;
1125
1163
  working_start: WorkingStart;
1126
1164
  pending: Pending;
@@ -1145,6 +1183,7 @@ export interface Sample {
1145
1183
  * The sample was unable to finish processing due to a limit
1146
1184
  */
1147
1185
  export interface SampleLimitEvent {
1186
+ span_id: SpanId1;
1148
1187
  timestamp: Timestamp1;
1149
1188
  working_start: WorkingStart1;
1150
1189
  pending: Pending1;
@@ -1157,6 +1196,7 @@ export interface SampleLimitEvent {
1157
1196
  * Sandbox execution or I/O
1158
1197
  */
1159
1198
  export interface SandboxEvent {
1199
+ span_id: SpanId2;
1160
1200
  timestamp: Timestamp2;
1161
1201
  working_start: WorkingStart2;
1162
1202
  pending: Pending2;
@@ -1174,6 +1214,7 @@ export interface SandboxEvent {
1174
1214
  * Change to the current `TaskState`
1175
1215
  */
1176
1216
  export interface StateEvent {
1217
+ span_id: SpanId3;
1177
1218
  timestamp: Timestamp3;
1178
1219
  working_start: WorkingStart3;
1179
1220
  pending: Pending3;
@@ -1198,6 +1239,7 @@ export interface JsonChange {
1198
1239
  * Change to data within the current `Store`.
1199
1240
  */
1200
1241
  export interface StoreEvent {
1242
+ span_id: SpanId4;
1201
1243
  timestamp: Timestamp4;
1202
1244
  working_start: WorkingStart4;
1203
1245
  pending: Pending4;
@@ -1208,6 +1250,7 @@ export interface StoreEvent {
1208
1250
  * Call to a language model.
1209
1251
  */
1210
1252
  export interface ModelEvent {
1253
+ span_id: SpanId5;
1211
1254
  timestamp: Timestamp5;
1212
1255
  working_start: WorkingStart5;
1213
1256
  pending: Pending5;
@@ -1219,6 +1262,7 @@ export interface ModelEvent {
1219
1262
  tool_choice: ToolChoice;
1220
1263
  config: GenerateConfig;
1221
1264
  output: ModelOutput;
1265
+ retries: Retries;
1222
1266
  error: Error1;
1223
1267
  cache: Cache;
1224
1268
  call: ModelCall | null;
@@ -1289,6 +1333,7 @@ export interface Response {
1289
1333
  * Call to a tool.
1290
1334
  */
1291
1335
  export interface ToolEvent {
1336
+ span_id: SpanId6;
1292
1337
  timestamp: Timestamp6;
1293
1338
  working_start: WorkingStart6;
1294
1339
  pending: Pending6;
@@ -1315,6 +1360,7 @@ export interface Arguments1 {
1315
1360
  * Tool approval.
1316
1361
  */
1317
1362
  export interface ApprovalEvent {
1363
+ span_id: SpanId7;
1318
1364
  timestamp: Timestamp7;
1319
1365
  working_start: WorkingStart7;
1320
1366
  pending: Pending7;
@@ -1341,6 +1387,7 @@ export interface ToolCallView {
1341
1387
  * Input screen interaction.
1342
1388
  */
1343
1389
  export interface InputEvent {
1390
+ span_id: SpanId8;
1344
1391
  timestamp: Timestamp8;
1345
1392
  working_start: WorkingStart8;
1346
1393
  pending: Pending8;
@@ -1355,6 +1402,7 @@ export interface InputEvent {
1355
1402
  * resulting from a call to `score`.
1356
1403
  */
1357
1404
  export interface ScoreEvent {
1405
+ span_id: SpanId9;
1358
1406
  timestamp: Timestamp9;
1359
1407
  working_start: WorkingStart9;
1360
1408
  pending: Pending9;
@@ -1367,6 +1415,7 @@ export interface ScoreEvent {
1367
1415
  * Event with sample error.
1368
1416
  */
1369
1417
  export interface ErrorEvent {
1418
+ span_id: SpanId10;
1370
1419
  timestamp: Timestamp10;
1371
1420
  working_start: WorkingStart10;
1372
1421
  pending: Pending10;
@@ -1377,6 +1426,7 @@ export interface ErrorEvent {
1377
1426
  * Log message recorded with Python logger.
1378
1427
  */
1379
1428
  export interface LoggerEvent {
1429
+ span_id: SpanId11;
1380
1430
  timestamp: Timestamp11;
1381
1431
  working_start: WorkingStart11;
1382
1432
  pending: Pending11;
@@ -1399,6 +1449,7 @@ export interface LoggingMessage {
1399
1449
  * Event with custom info/data.
1400
1450
  */
1401
1451
  export interface InfoEvent {
1452
+ span_id: SpanId12;
1402
1453
  timestamp: Timestamp12;
1403
1454
  working_start: WorkingStart12;
1404
1455
  pending: Pending12;
@@ -1407,27 +1458,54 @@ export interface InfoEvent {
1407
1458
  data: JsonValue;
1408
1459
  }
1409
1460
  /**
1410
- * Step within current sample or subtask.
1461
+ * Mark the beginning of a transcript span.
1411
1462
  */
1412
- export interface StepEvent {
1463
+ export interface SpanBeginEvent {
1464
+ span_id: SpanId13;
1413
1465
  timestamp: Timestamp13;
1414
1466
  working_start: WorkingStart13;
1415
1467
  pending: Pending13;
1416
1468
  event: Event13;
1417
- action: Action1;
1469
+ id: Id8;
1470
+ parent_id: ParentId;
1418
1471
  type: Type13;
1419
1472
  name: Name11;
1420
1473
  }
1421
1474
  /**
1422
- * Subtask spawned.
1475
+ * Mark the end of a transcript span.
1423
1476
  */
1424
- export interface SubtaskEvent {
1477
+ export interface SpanEndEvent {
1478
+ span_id: SpanId14;
1425
1479
  timestamp: Timestamp14;
1426
1480
  working_start: WorkingStart14;
1427
1481
  pending: Pending14;
1428
1482
  event: Event14;
1429
- name: Name12;
1483
+ id: Id9;
1484
+ }
1485
+ /**
1486
+ * Step within current sample or subtask.
1487
+ */
1488
+ export interface StepEvent {
1489
+ span_id: SpanId15;
1490
+ timestamp: Timestamp15;
1491
+ working_start: WorkingStart15;
1492
+ pending: Pending15;
1493
+ event: Event15;
1494
+ action: Action1;
1430
1495
  type: Type14;
1496
+ name: Name12;
1497
+ }
1498
+ /**
1499
+ * Subtask spawned.
1500
+ */
1501
+ export interface SubtaskEvent {
1502
+ span_id: SpanId16;
1503
+ timestamp: Timestamp16;
1504
+ working_start: WorkingStart16;
1505
+ pending: Pending16;
1506
+ event: Event16;
1507
+ name: Name13;
1508
+ type: Type15;
1431
1509
  input: Input5;
1432
1510
  result: Result2;
1433
1511
  events: Events2;
@@ -1450,7 +1528,7 @@ export interface Attachments {
1450
1528
  * Limit encontered by sample.
1451
1529
  */
1452
1530
  export interface EvalSampleLimit {
1453
- type: Type15;
1531
+ type: Type16;
1454
1532
  limit: Limit2;
1455
1533
  }
1456
1534
  /**
@@ -8,7 +8,7 @@ import {
8
8
  EvalStats,
9
9
  } from "../../../@types/log";
10
10
  import { EvalDescriptor } from "../../../app/samples/descriptor/types";
11
- import { scoreFilterItems } from "../../../app/samples/sample-tools/filters";
11
+ import { sampleFilterItems } from "../../../app/samples/sample-tools/filters";
12
12
  import { ExpandablePanel } from "../../../components/ExpandablePanel";
13
13
  import { LabeledValue } from "../../../components/LabeledValue";
14
14
  import { useEvalDescriptor } from "../../../state/hooks";
@@ -181,7 +181,7 @@ const ScorerSummary: FC<ScoreSummaryProps> = ({ evalDescriptor }) => {
181
181
  return null;
182
182
  }
183
183
 
184
- const items = scoreFilterItems(evalDescriptor);
184
+ const items = sampleFilterItems(evalDescriptor);
185
185
  return (
186
186
  <span style={{ position: "relative" }}>
187
187
  {Array.from(items).map((item, index, array) => (
@@ -50,10 +50,7 @@ export const useSamplesTabConfig = (
50
50
  : totalSampleCount === 1
51
51
  ? [<ScoreFilterTools />]
52
52
  : [
53
- <SampleTools
54
- samples={sampleSummaries || []}
55
- key="sample-tools"
56
- />,
53
+ <SampleTools key="sample-tools" />,
57
54
  evalStatus === "started" && !streamSamples && (
58
55
  <ToolButton
59
56
  key="refresh"
@@ -1,6 +1,5 @@
1
1
  import { FC } from "react";
2
2
  import { Fragment } from "react/jsx-runtime";
3
- import { SampleSummary } from "../../client/api/types";
4
3
  import { useScore, useScores } from "../../state/hooks";
5
4
  import { useStore } from "../../state/store";
6
5
  import { EpochFilter } from "./sample-tools/EpochFilter";
@@ -8,16 +7,11 @@ import { SampleFilter } from "./sample-tools/sample-filter/SampleFilter";
8
7
  import { SelectScorer } from "./sample-tools/SelectScorer";
9
8
  import { SortFilter } from "./sample-tools/SortFilter";
10
9
 
11
- interface SampleToolsProps {
12
- samples: SampleSummary[];
13
- }
10
+ interface SampleToolsProps {}
14
11
 
15
- export const SampleTools: FC<SampleToolsProps> = ({ samples }) => {
12
+ export const SampleTools: FC<SampleToolsProps> = () => {
16
13
  const selectedLogSummary = useStore((state) => state.log.selectedLogSummary);
17
14
 
18
- const filter = useStore((state) => state.log.filter);
19
- const setFilter = useStore((state) => state.logActions.setFilter);
20
-
21
15
  const scores = useScores();
22
16
  const score = useScore();
23
17
  const setScore = useStore((state) => state.logActions.setScore);
@@ -29,11 +23,7 @@ export const SampleTools: FC<SampleToolsProps> = ({ samples }) => {
29
23
  const epochs = selectedLogSummary?.eval.config.epochs || 1;
30
24
  return (
31
25
  <Fragment>
32
- <SampleFilter
33
- samples={samples}
34
- scoreFilter={filter}
35
- setScoreFilter={setFilter}
36
- />
26
+ <SampleFilter />
37
27
  {scores?.length > 1 ? (
38
28
  <SelectScorer scores={scores} score={score} setScore={setScore} />
39
29
  ) : undefined}
@@ -1,7 +1,7 @@
1
1
  import clsx from "clsx";
2
2
  import { ScoreLabel } from "../../../app/types";
3
3
 
4
- import { ChangeEvent, FC, useCallback } from "react";
4
+ import { ChangeEvent, FC, useCallback, useMemo } from "react";
5
5
  import styles from "./SelectScorer.module.css";
6
6
 
7
7
  interface SelectScorerProps {
@@ -15,23 +15,18 @@ export const SelectScorer: FC<SelectScorerProps> = ({
15
15
  score,
16
16
  setScore,
17
17
  }) => {
18
- const scorers = scores.reduce((accum, scorer) => {
19
- if (
20
- !accum.find((sc) => {
21
- return scorer.scorer === sc.scorer;
22
- })
23
- ) {
24
- accum.push(scorer);
25
- }
26
- return accum;
27
- }, [] as ScoreLabel[]);
28
-
29
- const handleSelectScore = useCallback(
30
- (index: number) => {
31
- setScore(scores[index]);
32
- },
33
- [setScore, scores],
34
- );
18
+ const scorers = useMemo(() => {
19
+ return scores.reduce((accum, scorer) => {
20
+ if (
21
+ !accum.find((sc) => {
22
+ return scorer.scorer === sc.scorer;
23
+ })
24
+ ) {
25
+ accum.push(scorer);
26
+ }
27
+ return accum;
28
+ }, [] as ScoreLabel[]);
29
+ }, [scores]);
35
30
 
36
31
  if (scorers.length === 1) {
37
32
  // There is only a single scorer in play, just show the list of available scores
@@ -50,8 +45,8 @@ export const SelectScorer: FC<SelectScorerProps> = ({
50
45
  </span>
51
46
  <ScoreSelector
52
47
  scores={scores}
53
- selectedIndex={scoreIndex(scores, score)}
54
- setSelectedIndex={handleSelectScore}
48
+ selectedScore={score}
49
+ setSelectedScore={setScore}
55
50
  />
56
51
  </div>
57
52
  );
@@ -79,15 +74,15 @@ export const SelectScorer: FC<SelectScorerProps> = ({
79
74
  </span>
80
75
  <ScorerSelector
81
76
  scorers={scorers}
82
- selectedIndex={scorerIndex(scorers, score)}
83
- setSelectedIndex={handleSelectScore}
77
+ selectedScore={score}
78
+ setSelectedScore={setScore}
84
79
  />
85
80
  {scorerScores.length > 1 ? (
86
81
  <ScoreSelector
87
82
  className={clsx(styles.secondSel)}
88
83
  scores={scorerScores}
89
- selectedIndex={scoreIndex(scorerScores, score)}
90
- setSelectedIndex={handleSelectScore}
84
+ selectedScore={score}
85
+ setSelectedScore={setScore}
91
86
  />
92
87
  ) : undefined}
93
88
  </div>
@@ -97,25 +92,33 @@ export const SelectScorer: FC<SelectScorerProps> = ({
97
92
 
98
93
  interface ScoreSelectorProps {
99
94
  scores: ScoreLabel[];
100
- selectedIndex: number;
101
- setSelectedIndex: (index: number) => void;
95
+ selectedScore?: ScoreLabel;
96
+ setSelectedScore: (score: ScoreLabel) => void;
102
97
  className?: string | string[];
103
98
  }
104
99
 
105
100
  const ScoreSelector: FC<ScoreSelectorProps> = ({
106
101
  scores,
107
- selectedIndex,
108
- setSelectedIndex,
102
+ selectedScore,
103
+ setSelectedScore,
109
104
  className,
110
105
  }) => {
111
106
  const handleChange = useCallback(
112
107
  (e: ChangeEvent<HTMLSelectElement>) => {
113
108
  const sel = e.target as HTMLSelectElement;
114
- setSelectedIndex(sel.selectedIndex);
109
+ setSelectedScore(scores[sel.selectedIndex]);
115
110
  },
116
- [setSelectedIndex],
111
+ [setSelectedScore, scores],
117
112
  );
118
113
 
114
+ const index = scores.findIndex((sc) => {
115
+ return (
116
+ selectedScore &&
117
+ sc.name === selectedScore.name &&
118
+ sc.scorer === selectedScore.scorer
119
+ );
120
+ });
121
+
119
122
  return (
120
123
  <select
121
124
  className={clsx(
@@ -125,7 +128,7 @@ const ScoreSelector: FC<ScoreSelectorProps> = ({
125
128
  className,
126
129
  )}
127
130
  aria-label=".select-scorer-label"
128
- value={scores[selectedIndex].name}
131
+ value={scores[index].name}
129
132
  onChange={handleChange}
130
133
  >
131
134
  {scores.map((score) => {
@@ -141,28 +144,32 @@ const ScoreSelector: FC<ScoreSelectorProps> = ({
141
144
 
142
145
  interface ScorerSelectorProps {
143
146
  scorers: ScoreLabel[];
144
- selectedIndex: number;
145
- setSelectedIndex: (index: number) => void;
147
+ selectedScore?: ScoreLabel;
148
+ setSelectedScore: (score: ScoreLabel) => void;
146
149
  }
147
150
 
148
151
  const ScorerSelector: FC<ScorerSelectorProps> = ({
149
152
  scorers,
150
- selectedIndex,
151
- setSelectedIndex,
153
+ selectedScore,
154
+ setSelectedScore,
152
155
  }) => {
153
156
  const handleChange = useCallback(
154
157
  (e: ChangeEvent<HTMLSelectElement>) => {
155
158
  const sel = e.target as HTMLSelectElement;
156
- setSelectedIndex(sel.selectedIndex);
159
+ setSelectedScore(scorers[sel.selectedIndex]);
157
160
  },
158
- [setSelectedIndex],
161
+ [setSelectedScore, scorers],
159
162
  );
160
163
 
164
+ const index = scorers.findIndex((sc) => {
165
+ return selectedScore && sc.scorer === selectedScore.scorer;
166
+ });
167
+
161
168
  return (
162
169
  <select
163
170
  className={clsx("form-select", "form-select-sm", "text-size-smaller")}
164
171
  aria-label=".epoch-filter-label"
165
- value={scorers[selectedIndex].scorer}
172
+ value={scorers[index].scorer}
166
173
  onChange={handleChange}
167
174
  >
168
175
  {scorers.map((scorer) => {
@@ -175,13 +182,3 @@ const ScorerSelector: FC<ScorerSelectorProps> = ({
175
182
  </select>
176
183
  );
177
184
  };
178
-
179
- const scoreIndex = (scores: ScoreLabel[], score?: ScoreLabel) =>
180
- scores.findIndex((sc) => {
181
- return score && sc.name === score.name && sc.scorer === score.scorer;
182
- });
183
-
184
- const scorerIndex = (scores: ScoreLabel[], score?: ScoreLabel) =>
185
- scores.findIndex((sc) => {
186
- return score && sc.scorer === score.scorer;
187
- });
@@ -1,19 +1,12 @@
1
1
  import { compileExpression } from "filtrex";
2
2
  import { Scores1 } from "../../../@types/log";
3
- import { ScoreLabel } from "../../../app/types";
3
+ import { FilterError, ScoreLabel } from "../../../app/types";
4
4
  import { SampleSummary } from "../../../client/api/types";
5
5
  import { kScoreTypeBoolean } from "../../../constants";
6
6
  import { inputString } from "../../../utils/format";
7
7
  import { EvalDescriptor, ScoreDescriptor } from "../descriptor/types";
8
8
 
9
- export interface FilterError {
10
- from: number;
11
- to: number;
12
- message: string;
13
- severity: "warning" | "error";
14
- }
15
-
16
- export interface ScoreFilterItem {
9
+ export interface SampleFilterItem {
17
10
  shortName?: string;
18
11
  qualifiedName?: string;
19
12
  canonicalName: string;
@@ -120,10 +113,10 @@ const sampleVariables = (sample: SampleSummary): Record<string, unknown> => {
120
113
  * Child metrics are accessed using dot notation (e.g. `scorer_name.score_name`) or
121
114
  * directly by name when it is unique.
122
115
  */
123
- export const scoreFilterItems = (
116
+ export const sampleFilterItems = (
124
117
  evalDescriptor: EvalDescriptor,
125
- ): ScoreFilterItem[] => {
126
- const items: ScoreFilterItem[] = [];
118
+ ): SampleFilterItem[] => {
119
+ const items: SampleFilterItem[] = [];
127
120
  const bannedShortNames = bannedShortScoreNames(evalDescriptor.scores);
128
121
  const valueToString = (value: unknown) =>
129
122
  typeof value === "string" ? `"${value}"` : String(value);
@@ -296,8 +289,13 @@ export const filterSamples = (
296
289
  evalDescriptor: EvalDescriptor,
297
290
  samples: SampleSummary[],
298
291
  filterValue: string,
299
- ): { result: SampleSummary[]; error: FilterError | undefined } => {
300
- var error = undefined;
292
+ ): {
293
+ result: SampleSummary[];
294
+ error: FilterError | undefined;
295
+ allErrors: boolean;
296
+ } => {
297
+ let error = undefined;
298
+ let errorCount = 0;
301
299
  const result = samples.filter((sample) => {
302
300
  if (filterValue) {
303
301
  const { matches, error: sampleError } = filterExpression(
@@ -306,10 +304,13 @@ export const filterSamples = (
306
304
  filterValue,
307
305
  );
308
306
  error ||= sampleError;
307
+ if (sampleError) {
308
+ errorCount++;
309
+ }
309
310
  return matches;
310
311
  } else {
311
312
  return true;
312
313
  }
313
314
  });
314
- return { result, error };
315
+ return { result, error, allErrors: errorCount === samples.length };
315
316
  };