inspect-ai 0.3.56__py3-none-any.whl → 0.3.58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_cli/common.py +4 -2
- inspect_ai/_cli/eval.py +2 -0
- inspect_ai/_cli/trace.py +21 -2
- inspect_ai/_display/core/active.py +0 -2
- inspect_ai/_display/core/panel.py +1 -1
- inspect_ai/_display/rich/display.py +4 -4
- inspect_ai/_display/textual/app.py +4 -1
- inspect_ai/_display/textual/widgets/samples.py +41 -5
- inspect_ai/_eval/eval.py +32 -20
- inspect_ai/_eval/evalset.py +7 -5
- inspect_ai/_eval/run.py +16 -11
- inspect_ai/_eval/task/__init__.py +2 -2
- inspect_ai/_eval/task/images.py +40 -25
- inspect_ai/_eval/task/run.py +141 -119
- inspect_ai/_eval/task/task.py +140 -25
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/content.py +23 -1
- inspect_ai/_util/datetime.py +1 -1
- inspect_ai/_util/deprecation.py +1 -1
- inspect_ai/_util/images.py +20 -17
- inspect_ai/_util/json.py +11 -1
- inspect_ai/_util/kvstore.py +73 -0
- inspect_ai/_util/logger.py +2 -1
- inspect_ai/_util/notgiven.py +18 -0
- inspect_ai/_util/thread.py +5 -0
- inspect_ai/_util/trace.py +39 -3
- inspect_ai/_util/transcript.py +36 -7
- inspect_ai/_view/www/.prettierrc.js +12 -0
- inspect_ai/_view/www/dist/assets/index.js +322 -226
- inspect_ai/_view/www/log-schema.json +221 -138
- inspect_ai/_view/www/src/App.mjs +18 -9
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/api/Types.mjs +15 -4
- inspect_ai/_view/www/src/api/api-http.mjs +2 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
- inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
- inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
- inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
- inspect_ai/_view/www/src/components/MessageContent.mjs +44 -2
- inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
- inspect_ai/_view/www/src/components/Tools.mjs +18 -3
- inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
- inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
- inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
- inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
- inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +242 -178
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
- inspect_ai/_view/www/src/types/log.d.ts +53 -35
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
- inspect_ai/approval/_human/util.py +2 -2
- inspect_ai/dataset/_sources/csv.py +2 -1
- inspect_ai/dataset/_sources/json.py +2 -1
- inspect_ai/dataset/_sources/util.py +15 -7
- inspect_ai/log/_condense.py +11 -1
- inspect_ai/log/_log.py +27 -5
- inspect_ai/log/_recorders/eval.py +21 -8
- inspect_ai/log/_samples.py +10 -5
- inspect_ai/log/_transcript.py +28 -1
- inspect_ai/model/__init__.py +10 -2
- inspect_ai/model/_call_tools.py +82 -17
- inspect_ai/model/_chat_message.py +2 -4
- inspect_ai/model/{_trace.py → _conversation.py} +9 -8
- inspect_ai/model/_model.py +2 -2
- inspect_ai/model/_providers/anthropic.py +9 -7
- inspect_ai/model/_providers/azureai.py +6 -4
- inspect_ai/model/_providers/bedrock.py +6 -4
- inspect_ai/model/_providers/google.py +103 -14
- inspect_ai/model/_providers/groq.py +7 -5
- inspect_ai/model/_providers/hf.py +11 -6
- inspect_ai/model/_providers/mistral.py +6 -9
- inspect_ai/model/_providers/openai.py +34 -8
- inspect_ai/model/_providers/openai_o1.py +10 -12
- inspect_ai/model/_providers/vertex.py +17 -4
- inspect_ai/scorer/__init__.py +13 -2
- inspect_ai/scorer/_metrics/__init__.py +2 -2
- inspect_ai/scorer/_metrics/std.py +3 -3
- inspect_ai/tool/__init__.py +9 -1
- inspect_ai/tool/_tool.py +9 -2
- inspect_ai/tool/_tool_info.py +2 -1
- inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -3
- inspect_ai/util/__init__.py +4 -3
- inspect_ai/util/{_trace.py → _conversation.py} +3 -17
- inspect_ai/util/_display.py +14 -4
- inspect_ai/util/_sandbox/context.py +12 -13
- inspect_ai/util/_sandbox/docker/compose.py +24 -13
- inspect_ai/util/_sandbox/docker/docker.py +20 -13
- inspect_ai/util/_sandbox/docker/util.py +2 -1
- inspect_ai/util/_sandbox/environment.py +13 -1
- inspect_ai/util/_sandbox/local.py +1 -0
- inspect_ai/util/_sandbox/self_check.py +18 -18
- inspect_ai/util/_store.py +2 -2
- inspect_ai/util/_subprocess.py +3 -3
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/METADATA +3 -3
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/RECORD +107 -103
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/WHEEL +1 -1
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/top_level.txt +0 -0
@@ -210,6 +210,12 @@
|
|
210
210
|
},
|
211
211
|
{
|
212
212
|
"$ref": "#/$defs/ContentImage"
|
213
|
+
},
|
214
|
+
{
|
215
|
+
"$ref": "#/$defs/ContentAudio"
|
216
|
+
},
|
217
|
+
{
|
218
|
+
"$ref": "#/$defs/ContentVideo"
|
213
219
|
}
|
214
220
|
]
|
215
221
|
},
|
@@ -281,6 +287,12 @@
|
|
281
287
|
},
|
282
288
|
{
|
283
289
|
"$ref": "#/$defs/ContentImage"
|
290
|
+
},
|
291
|
+
{
|
292
|
+
"$ref": "#/$defs/ContentAudio"
|
293
|
+
},
|
294
|
+
{
|
295
|
+
"$ref": "#/$defs/ContentVideo"
|
284
296
|
}
|
285
297
|
]
|
286
298
|
},
|
@@ -336,6 +348,12 @@
|
|
336
348
|
},
|
337
349
|
{
|
338
350
|
"$ref": "#/$defs/ContentImage"
|
351
|
+
},
|
352
|
+
{
|
353
|
+
"$ref": "#/$defs/ContentAudio"
|
354
|
+
},
|
355
|
+
{
|
356
|
+
"$ref": "#/$defs/ContentVideo"
|
339
357
|
}
|
340
358
|
]
|
341
359
|
},
|
@@ -429,6 +447,12 @@
|
|
429
447
|
},
|
430
448
|
{
|
431
449
|
"$ref": "#/$defs/ContentImage"
|
450
|
+
},
|
451
|
+
{
|
452
|
+
"$ref": "#/$defs/ContentAudio"
|
453
|
+
},
|
454
|
+
{
|
455
|
+
"$ref": "#/$defs/ContentVideo"
|
432
456
|
}
|
433
457
|
]
|
434
458
|
},
|
@@ -482,6 +506,36 @@
|
|
482
506
|
"type": "object",
|
483
507
|
"additionalProperties": false
|
484
508
|
},
|
509
|
+
"ContentAudio": {
|
510
|
+
"properties": {
|
511
|
+
"type": {
|
512
|
+
"const": "audio",
|
513
|
+
"default": "audio",
|
514
|
+
"title": "Type",
|
515
|
+
"type": "string"
|
516
|
+
},
|
517
|
+
"audio": {
|
518
|
+
"title": "Audio",
|
519
|
+
"type": "string"
|
520
|
+
},
|
521
|
+
"format": {
|
522
|
+
"enum": [
|
523
|
+
"wav",
|
524
|
+
"mp3"
|
525
|
+
],
|
526
|
+
"title": "Format",
|
527
|
+
"type": "string"
|
528
|
+
}
|
529
|
+
},
|
530
|
+
"required": [
|
531
|
+
"type",
|
532
|
+
"audio",
|
533
|
+
"format"
|
534
|
+
],
|
535
|
+
"title": "ContentAudio",
|
536
|
+
"type": "object",
|
537
|
+
"additionalProperties": false
|
538
|
+
},
|
485
539
|
"ContentImage": {
|
486
540
|
"properties": {
|
487
541
|
"type": {
|
@@ -535,6 +589,37 @@
|
|
535
589
|
"type": "object",
|
536
590
|
"additionalProperties": false
|
537
591
|
},
|
592
|
+
"ContentVideo": {
|
593
|
+
"properties": {
|
594
|
+
"type": {
|
595
|
+
"const": "video",
|
596
|
+
"default": "video",
|
597
|
+
"title": "Type",
|
598
|
+
"type": "string"
|
599
|
+
},
|
600
|
+
"video": {
|
601
|
+
"title": "Video",
|
602
|
+
"type": "string"
|
603
|
+
},
|
604
|
+
"format": {
|
605
|
+
"enum": [
|
606
|
+
"mp4",
|
607
|
+
"mpeg",
|
608
|
+
"mov"
|
609
|
+
],
|
610
|
+
"title": "Format",
|
611
|
+
"type": "string"
|
612
|
+
}
|
613
|
+
},
|
614
|
+
"required": [
|
615
|
+
"type",
|
616
|
+
"video",
|
617
|
+
"format"
|
618
|
+
],
|
619
|
+
"title": "ContentVideo",
|
620
|
+
"type": "object",
|
621
|
+
"additionalProperties": false
|
622
|
+
},
|
538
623
|
"ErrorEvent": {
|
539
624
|
"description": "Event with sample error.",
|
540
625
|
"properties": {
|
@@ -657,18 +742,6 @@
|
|
657
742
|
"default": null,
|
658
743
|
"title": "Epochs Reducer"
|
659
744
|
},
|
660
|
-
"trace": {
|
661
|
-
"anyOf": [
|
662
|
-
{
|
663
|
-
"type": "boolean"
|
664
|
-
},
|
665
|
-
{
|
666
|
-
"type": "null"
|
667
|
-
}
|
668
|
-
],
|
669
|
-
"default": null,
|
670
|
-
"title": "Trace"
|
671
|
-
},
|
672
745
|
"approval": {
|
673
746
|
"anyOf": [
|
674
747
|
{
|
@@ -847,7 +920,6 @@
|
|
847
920
|
"sample_id",
|
848
921
|
"epochs",
|
849
922
|
"epochs_reducer",
|
850
|
-
"trace",
|
851
923
|
"approval",
|
852
924
|
"fail_on_error",
|
853
925
|
"message_limit",
|
@@ -1482,7 +1554,7 @@
|
|
1482
1554
|
},
|
1483
1555
|
"samples": {
|
1484
1556
|
"items": {
|
1485
|
-
"$ref": "#/$defs/
|
1557
|
+
"$ref": "#/$defs/EvalSampleScore"
|
1486
1558
|
},
|
1487
1559
|
"title": "Samples",
|
1488
1560
|
"type": "array"
|
@@ -1497,6 +1569,129 @@
|
|
1497
1569
|
"type": "object",
|
1498
1570
|
"additionalProperties": false
|
1499
1571
|
},
|
1572
|
+
"EvalSampleScore": {
|
1573
|
+
"properties": {
|
1574
|
+
"value": {
|
1575
|
+
"anyOf": [
|
1576
|
+
{
|
1577
|
+
"type": "string"
|
1578
|
+
},
|
1579
|
+
{
|
1580
|
+
"type": "integer"
|
1581
|
+
},
|
1582
|
+
{
|
1583
|
+
"type": "number"
|
1584
|
+
},
|
1585
|
+
{
|
1586
|
+
"type": "boolean"
|
1587
|
+
},
|
1588
|
+
{
|
1589
|
+
"items": {
|
1590
|
+
"anyOf": [
|
1591
|
+
{
|
1592
|
+
"type": "string"
|
1593
|
+
},
|
1594
|
+
{
|
1595
|
+
"type": "integer"
|
1596
|
+
},
|
1597
|
+
{
|
1598
|
+
"type": "number"
|
1599
|
+
},
|
1600
|
+
{
|
1601
|
+
"type": "boolean"
|
1602
|
+
}
|
1603
|
+
]
|
1604
|
+
},
|
1605
|
+
"type": "array"
|
1606
|
+
},
|
1607
|
+
{
|
1608
|
+
"additionalProperties": {
|
1609
|
+
"anyOf": [
|
1610
|
+
{
|
1611
|
+
"type": "string"
|
1612
|
+
},
|
1613
|
+
{
|
1614
|
+
"type": "integer"
|
1615
|
+
},
|
1616
|
+
{
|
1617
|
+
"type": "number"
|
1618
|
+
},
|
1619
|
+
{
|
1620
|
+
"type": "boolean"
|
1621
|
+
},
|
1622
|
+
{
|
1623
|
+
"type": "null"
|
1624
|
+
}
|
1625
|
+
]
|
1626
|
+
},
|
1627
|
+
"type": "object"
|
1628
|
+
}
|
1629
|
+
],
|
1630
|
+
"title": "Value"
|
1631
|
+
},
|
1632
|
+
"answer": {
|
1633
|
+
"anyOf": [
|
1634
|
+
{
|
1635
|
+
"type": "string"
|
1636
|
+
},
|
1637
|
+
{
|
1638
|
+
"type": "null"
|
1639
|
+
}
|
1640
|
+
],
|
1641
|
+
"default": null,
|
1642
|
+
"title": "Answer"
|
1643
|
+
},
|
1644
|
+
"explanation": {
|
1645
|
+
"anyOf": [
|
1646
|
+
{
|
1647
|
+
"type": "string"
|
1648
|
+
},
|
1649
|
+
{
|
1650
|
+
"type": "null"
|
1651
|
+
}
|
1652
|
+
],
|
1653
|
+
"default": null,
|
1654
|
+
"title": "Explanation"
|
1655
|
+
},
|
1656
|
+
"metadata": {
|
1657
|
+
"anyOf": [
|
1658
|
+
{
|
1659
|
+
"type": "object"
|
1660
|
+
},
|
1661
|
+
{
|
1662
|
+
"type": "null"
|
1663
|
+
}
|
1664
|
+
],
|
1665
|
+
"default": null,
|
1666
|
+
"title": "Metadata"
|
1667
|
+
},
|
1668
|
+
"sample_id": {
|
1669
|
+
"anyOf": [
|
1670
|
+
{
|
1671
|
+
"type": "string"
|
1672
|
+
},
|
1673
|
+
{
|
1674
|
+
"type": "integer"
|
1675
|
+
},
|
1676
|
+
{
|
1677
|
+
"type": "null"
|
1678
|
+
}
|
1679
|
+
],
|
1680
|
+
"default": null,
|
1681
|
+
"title": "Sample Id"
|
1682
|
+
}
|
1683
|
+
},
|
1684
|
+
"required": [
|
1685
|
+
"value",
|
1686
|
+
"answer",
|
1687
|
+
"explanation",
|
1688
|
+
"metadata",
|
1689
|
+
"sample_id"
|
1690
|
+
],
|
1691
|
+
"title": "EvalSampleScore",
|
1692
|
+
"type": "object",
|
1693
|
+
"additionalProperties": false
|
1694
|
+
},
|
1500
1695
|
"EvalScore": {
|
1501
1696
|
"properties": {
|
1502
1697
|
"name": {
|
@@ -2905,130 +3100,6 @@
|
|
2905
3100
|
"type": "object",
|
2906
3101
|
"additionalProperties": false
|
2907
3102
|
},
|
2908
|
-
"SampleScore": {
|
2909
|
-
"description": "Score for a Sample\n\nArgs:\n sample_id: (str | int | None) Unique id of a sample",
|
2910
|
-
"properties": {
|
2911
|
-
"value": {
|
2912
|
-
"anyOf": [
|
2913
|
-
{
|
2914
|
-
"type": "string"
|
2915
|
-
},
|
2916
|
-
{
|
2917
|
-
"type": "integer"
|
2918
|
-
},
|
2919
|
-
{
|
2920
|
-
"type": "number"
|
2921
|
-
},
|
2922
|
-
{
|
2923
|
-
"type": "boolean"
|
2924
|
-
},
|
2925
|
-
{
|
2926
|
-
"items": {
|
2927
|
-
"anyOf": [
|
2928
|
-
{
|
2929
|
-
"type": "string"
|
2930
|
-
},
|
2931
|
-
{
|
2932
|
-
"type": "integer"
|
2933
|
-
},
|
2934
|
-
{
|
2935
|
-
"type": "number"
|
2936
|
-
},
|
2937
|
-
{
|
2938
|
-
"type": "boolean"
|
2939
|
-
}
|
2940
|
-
]
|
2941
|
-
},
|
2942
|
-
"type": "array"
|
2943
|
-
},
|
2944
|
-
{
|
2945
|
-
"additionalProperties": {
|
2946
|
-
"anyOf": [
|
2947
|
-
{
|
2948
|
-
"type": "string"
|
2949
|
-
},
|
2950
|
-
{
|
2951
|
-
"type": "integer"
|
2952
|
-
},
|
2953
|
-
{
|
2954
|
-
"type": "number"
|
2955
|
-
},
|
2956
|
-
{
|
2957
|
-
"type": "boolean"
|
2958
|
-
},
|
2959
|
-
{
|
2960
|
-
"type": "null"
|
2961
|
-
}
|
2962
|
-
]
|
2963
|
-
},
|
2964
|
-
"type": "object"
|
2965
|
-
}
|
2966
|
-
],
|
2967
|
-
"title": "Value"
|
2968
|
-
},
|
2969
|
-
"answer": {
|
2970
|
-
"anyOf": [
|
2971
|
-
{
|
2972
|
-
"type": "string"
|
2973
|
-
},
|
2974
|
-
{
|
2975
|
-
"type": "null"
|
2976
|
-
}
|
2977
|
-
],
|
2978
|
-
"default": null,
|
2979
|
-
"title": "Answer"
|
2980
|
-
},
|
2981
|
-
"explanation": {
|
2982
|
-
"anyOf": [
|
2983
|
-
{
|
2984
|
-
"type": "string"
|
2985
|
-
},
|
2986
|
-
{
|
2987
|
-
"type": "null"
|
2988
|
-
}
|
2989
|
-
],
|
2990
|
-
"default": null,
|
2991
|
-
"title": "Explanation"
|
2992
|
-
},
|
2993
|
-
"metadata": {
|
2994
|
-
"anyOf": [
|
2995
|
-
{
|
2996
|
-
"type": "object"
|
2997
|
-
},
|
2998
|
-
{
|
2999
|
-
"type": "null"
|
3000
|
-
}
|
3001
|
-
],
|
3002
|
-
"default": null,
|
3003
|
-
"title": "Metadata"
|
3004
|
-
},
|
3005
|
-
"sample_id": {
|
3006
|
-
"anyOf": [
|
3007
|
-
{
|
3008
|
-
"type": "string"
|
3009
|
-
},
|
3010
|
-
{
|
3011
|
-
"type": "integer"
|
3012
|
-
},
|
3013
|
-
{
|
3014
|
-
"type": "null"
|
3015
|
-
}
|
3016
|
-
],
|
3017
|
-
"default": null,
|
3018
|
-
"title": "Sample Id"
|
3019
|
-
}
|
3020
|
-
},
|
3021
|
-
"required": [
|
3022
|
-
"value",
|
3023
|
-
"answer",
|
3024
|
-
"explanation",
|
3025
|
-
"metadata",
|
3026
|
-
"sample_id"
|
3027
|
-
],
|
3028
|
-
"title": "SampleScore",
|
3029
|
-
"type": "object",
|
3030
|
-
"additionalProperties": false
|
3031
|
-
},
|
3032
3103
|
"SandboxEnvironmentSpec": {
|
3033
3104
|
"maxItems": 2,
|
3034
3105
|
"minItems": 1,
|
@@ -3722,6 +3793,12 @@
|
|
3722
3793
|
{
|
3723
3794
|
"$ref": "#/$defs/ContentImage"
|
3724
3795
|
},
|
3796
|
+
{
|
3797
|
+
"$ref": "#/$defs/ContentAudio"
|
3798
|
+
},
|
3799
|
+
{
|
3800
|
+
"$ref": "#/$defs/ContentVideo"
|
3801
|
+
},
|
3725
3802
|
{
|
3726
3803
|
"items": {
|
3727
3804
|
"anyOf": [
|
@@ -3730,6 +3807,12 @@
|
|
3730
3807
|
},
|
3731
3808
|
{
|
3732
3809
|
"$ref": "#/$defs/ContentImage"
|
3810
|
+
},
|
3811
|
+
{
|
3812
|
+
"$ref": "#/$defs/ContentAudio"
|
3813
|
+
},
|
3814
|
+
{
|
3815
|
+
"$ref": "#/$defs/ContentVideo"
|
3733
3816
|
}
|
3734
3817
|
]
|
3735
3818
|
},
|
inspect_ai/_view/www/src/App.mjs
CHANGED
@@ -32,7 +32,10 @@ import { FindBand } from "./components/FindBand.mjs";
|
|
32
32
|
import { isVscode } from "./utils/Html.mjs";
|
33
33
|
import { getVscodeApi } from "./utils/vscode.mjs";
|
34
34
|
import { kDefaultSort } from "./constants.mjs";
|
35
|
-
import {
|
35
|
+
import {
|
36
|
+
createEvalDescriptor,
|
37
|
+
createSamplesDescriptor,
|
38
|
+
} from "./samples/SamplesDescriptor.mjs";
|
36
39
|
import { byEpoch, bySample, sortSamples } from "./samples/tools/SortFilter.mjs";
|
37
40
|
import { resolveAttachments } from "./utils/attachments.mjs";
|
38
41
|
import { filterFnForType } from "./samples/tools/filters.mjs";
|
@@ -76,7 +79,7 @@ export function App({
|
|
76
79
|
initialState?.headersLoading || false,
|
77
80
|
);
|
78
81
|
|
79
|
-
|
82
|
+
/** @type {[import("./Types.mjs").CurrentLog, function(import("./Types.mjs").CurrentLog): void]} */
|
80
83
|
const [selectedLog, setSelectedLog] = useState(
|
81
84
|
initialState?.selectedLog || {
|
82
85
|
contents: undefined,
|
@@ -95,6 +98,7 @@ export function App({
|
|
95
98
|
? initialState.selectedSampleIndex
|
96
99
|
: -1,
|
97
100
|
);
|
101
|
+
/** @type {[import("./types/log").EvalSample, function(import("./types/log").EvalSample): void]} */
|
98
102
|
const [selectedSample, setSelectedSample] = useState(
|
99
103
|
initialState?.selectedSample,
|
100
104
|
);
|
@@ -326,7 +330,7 @@ export function App({
|
|
326
330
|
|
327
331
|
// Set the grouping
|
328
332
|
let grouping = "none";
|
329
|
-
if (samplesDescriptor?.epochs > 1) {
|
333
|
+
if (samplesDescriptor?.evalDescriptor?.epochs > 1) {
|
330
334
|
if (byEpoch(sort) || epoch !== "all") {
|
331
335
|
grouping = "epoch";
|
332
336
|
} else if (bySample(sort)) {
|
@@ -339,14 +343,17 @@ export function App({
|
|
339
343
|
setGroupByOrder(order);
|
340
344
|
}, [selectedLog, filter, sort, epoch]);
|
341
345
|
|
342
|
-
const
|
343
|
-
return
|
346
|
+
const evalDescriptor = useMemo(() => {
|
347
|
+
return createEvalDescriptor(
|
344
348
|
scores,
|
345
349
|
selectedLog.contents?.sampleSummaries,
|
346
350
|
selectedLog.contents?.eval?.config?.epochs || 1,
|
347
|
-
score,
|
348
351
|
);
|
349
|
-
}, [selectedLog, scores
|
352
|
+
}, [selectedLog, scores]);
|
353
|
+
|
354
|
+
const samplesDescriptor = useMemo(() => {
|
355
|
+
return createSamplesDescriptor(evalDescriptor, score);
|
356
|
+
}, [evalDescriptor, score]);
|
350
357
|
|
351
358
|
const refreshSampleTab = useCallback(
|
352
359
|
(sample) => {
|
@@ -513,9 +520,11 @@ export function App({
|
|
513
520
|
// Reset the workspace tab
|
514
521
|
const hasSamples =
|
515
522
|
!!log.sampleSummaries && log.sampleSummaries.length > 0;
|
516
|
-
const showSamples =
|
523
|
+
const showSamples = hasSamples;
|
517
524
|
setSelectedWorkspaceTab(
|
518
|
-
|
525
|
+
log.status !== "error" && hasSamples
|
526
|
+
? kEvalWorkspaceTabId
|
527
|
+
: kInfoWorkspaceTabId,
|
519
528
|
);
|
520
529
|
|
521
530
|
// Select the default scorer to use
|
@@ -30,15 +30,26 @@
|
|
30
30
|
* @property { import("../types/log").Input } input
|
31
31
|
* @property { import("../types/log").Target } target
|
32
32
|
* @property { import("../types/log").Scores1 } scores
|
33
|
+
* @property { string } [error]
|
33
34
|
* @property { import("../types/log").Type11 } [limit]
|
34
35
|
*/
|
35
36
|
|
36
37
|
/**
|
37
|
-
*
|
38
|
-
*
|
39
|
-
*
|
40
|
-
*
|
38
|
+
* Fields shared by EvalSample and SampleSummary.
|
39
|
+
* Contains only fields that are copied verbatim in src/inspect_ai/log/_recorders/eval.py.
|
40
|
+
*
|
41
|
+
* @typedef {Object} BasicSampleData
|
42
|
+
* @property { number | string } id
|
43
|
+
* @property { number } epoch
|
44
|
+
* @property { import("../types/log").Target } target
|
45
|
+
* @property { import("../types/log").Scores1 } scores
|
46
|
+
*/
|
41
47
|
|
48
|
+
/**
|
49
|
+
* @typedef {Object} Capabilities
|
50
|
+
* @property {boolean} downloadFiles - Indicates if file downloads are supported.
|
51
|
+
* @property {boolean} webWorkers - Indicates if web workers are supported.
|
52
|
+
*/
|
42
53
|
|
43
54
|
/**
|
44
55
|
* @typedef {Object} LogViewAPI
|
@@ -56,6 +56,7 @@ function simpleHttpAPI(logInfo) {
|
|
56
56
|
});
|
57
57
|
return Promise.resolve({
|
58
58
|
files: logs,
|
59
|
+
log_dir,
|
59
60
|
});
|
60
61
|
} else if (log_file) {
|
61
62
|
// Check the cache
|
@@ -76,6 +77,7 @@ function simpleHttpAPI(logInfo) {
|
|
76
77
|
|
77
78
|
return {
|
78
79
|
files: [result],
|
80
|
+
log_dir,
|
79
81
|
};
|
80
82
|
} else {
|
81
83
|
// No log.json could be found, and there isn't a log file,
|
@@ -14,8 +14,8 @@ export const ExpandablePanel = ({
|
|
14
14
|
const [collapsed, setCollapsed] = useState(collapse);
|
15
15
|
const [showToggle, setShowToggle] = useState(false);
|
16
16
|
|
17
|
-
const contentsRef = useRef();
|
18
|
-
const observerRef = useRef();
|
17
|
+
const contentsRef = useRef(/** @type {HTMLElement|null} */ (null));
|
18
|
+
const observerRef = useRef(/** @type {IntersectionObserver|null} */ (null));
|
19
19
|
|
20
20
|
// Ensure that when content changes, we reset the collapse state.
|
21
21
|
useEffect(() => {
|
@@ -4,7 +4,7 @@ import { ApplicationIcons } from "../appearance/Icons.mjs";
|
|
4
4
|
import { FontSize } from "../appearance/Fonts.mjs";
|
5
5
|
|
6
6
|
export const FindBand = ({ hideBand }) => {
|
7
|
-
const searchBoxRef = useRef();
|
7
|
+
const searchBoxRef = useRef(/** @type {HTMLInputElement|null} */ (null));
|
8
8
|
useEffect(() => {
|
9
9
|
searchBoxRef.current.focus();
|
10
10
|
}, []);
|
@@ -31,13 +31,14 @@ export const FindBand = ({ hideBand }) => {
|
|
31
31
|
};
|
32
32
|
|
33
33
|
// capture what is focused
|
34
|
-
const focusedElement = document.activeElement;
|
34
|
+
const focusedElement = /** @type {HTMLElement} */ (document.activeElement);
|
35
|
+
// @ts-expect-error: `Window.find` is non-standard
|
35
36
|
const result = window.find(term, false, !!back, false, false, true, false);
|
36
37
|
const noResultEl = window.document.getElementById(
|
37
38
|
"inspect-find-no-results",
|
38
39
|
);
|
39
40
|
if (result) {
|
40
|
-
noResultEl.style.opacity = 0;
|
41
|
+
noResultEl.style.opacity = "0";
|
41
42
|
const selection = window.getSelection();
|
42
43
|
if (selection.rangeCount > 0) {
|
43
44
|
// See if the parent is an expandable panel and expand it
|
@@ -58,7 +59,7 @@ export const FindBand = ({ hideBand }) => {
|
|
58
59
|
}, 100);
|
59
60
|
}
|
60
61
|
} else {
|
61
|
-
noResultEl.style.opacity = 1;
|
62
|
+
noResultEl.style.opacity = "1";
|
62
63
|
}
|
63
64
|
|
64
65
|
// Return focus to the previously focused element
|
@@ -31,7 +31,7 @@ export const LargeModal = (props) => {
|
|
31
31
|
|
32
32
|
// Support restoring the scroll position
|
33
33
|
// but only do this for the first time that the children are set
|
34
|
-
const scrollRef = useRef();
|
34
|
+
const scrollRef = useRef(/** @type {HTMLElement|null} */ (null));
|
35
35
|
useEffect(() => {
|
36
36
|
if (scrollRef.current) {
|
37
37
|
setTimeout(() => {
|
@@ -8,7 +8,7 @@ export const MessageBand = ({ message, hidden, setHidden, type }) => {
|
|
8
8
|
const bgColor =
|
9
9
|
type === "info" ? "var(--bs-light)" : "var(--bs-" + type + "-bg-subtle)";
|
10
10
|
const color =
|
11
|
-
|
11
|
+
type === "info" ? undefined : "var(--bs-" + type + "-text-emphasis)";
|
12
12
|
|
13
13
|
return html`
|
14
14
|
<div
|
@@ -32,7 +32,7 @@ export const MessageBand = ({ message, hidden, setHidden, type }) => {
|
|
32
32
|
fontSize: FontSize["title-secondary"],
|
33
33
|
margin: "0",
|
34
34
|
padding: "0",
|
35
|
-
color:
|
35
|
+
color: color,
|
36
36
|
height: FontSize["title-secondary"],
|
37
37
|
lineHeight: FontSize["title-secondary"],
|
38
38
|
}}
|