inspect-ai 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. inspect_ai/__init__.py +1 -0
  2. inspect_ai/_cli/common.py +1 -1
  3. inspect_ai/_cli/trace.py +33 -20
  4. inspect_ai/_display/core/active.py +1 -1
  5. inspect_ai/_display/core/display.py +1 -1
  6. inspect_ai/_display/core/footer.py +1 -1
  7. inspect_ai/_display/core/panel.py +1 -1
  8. inspect_ai/_display/core/progress.py +0 -6
  9. inspect_ai/_display/core/rich.py +1 -1
  10. inspect_ai/_display/rich/display.py +2 -2
  11. inspect_ai/_display/textual/app.py +15 -17
  12. inspect_ai/_display/textual/widgets/clock.py +3 -3
  13. inspect_ai/_display/textual/widgets/samples.py +6 -13
  14. inspect_ai/_eval/context.py +9 -1
  15. inspect_ai/_eval/run.py +16 -11
  16. inspect_ai/_eval/score.py +4 -10
  17. inspect_ai/_eval/task/results.py +5 -4
  18. inspect_ai/_eval/task/run.py +6 -12
  19. inspect_ai/_eval/task/task.py +10 -0
  20. inspect_ai/_util/ansi.py +31 -0
  21. inspect_ai/_util/datetime.py +1 -1
  22. inspect_ai/_util/deprecation.py +1 -1
  23. inspect_ai/_util/format.py +7 -0
  24. inspect_ai/_util/json.py +11 -1
  25. inspect_ai/_util/logger.py +14 -13
  26. inspect_ai/_util/throttle.py +10 -1
  27. inspect_ai/_util/trace.py +79 -47
  28. inspect_ai/_util/transcript.py +37 -4
  29. inspect_ai/_util/vscode.py +51 -0
  30. inspect_ai/_view/notify.py +2 -1
  31. inspect_ai/_view/www/.prettierrc.js +12 -0
  32. inspect_ai/_view/www/App.css +22 -1
  33. inspect_ai/_view/www/dist/assets/index.css +2374 -2
  34. inspect_ai/_view/www/dist/assets/index.js +29752 -24492
  35. inspect_ai/_view/www/log-schema.json +262 -215
  36. inspect_ai/_view/www/package.json +1 -0
  37. inspect_ai/_view/www/src/App.mjs +19 -9
  38. inspect_ai/_view/www/src/Types.mjs +0 -1
  39. inspect_ai/_view/www/src/api/Types.mjs +15 -4
  40. inspect_ai/_view/www/src/api/api-http.mjs +2 -0
  41. inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
  42. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
  43. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
  44. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
  45. inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
  46. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
  47. inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
  48. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
  49. inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
  50. inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
  51. inspect_ai/_view/www/src/components/Tools.mjs +28 -5
  52. inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
  53. inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
  54. inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
  55. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
  56. inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
  57. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
  58. inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
  59. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
  60. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
  61. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
  62. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
  63. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
  64. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
  65. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
  66. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
  67. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
  68. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
  69. inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
  70. inspect_ai/_view/www/src/types/log.d.ts +28 -20
  71. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
  72. inspect_ai/_view/www/yarn.lock +44 -0
  73. inspect_ai/approval/_apply.py +4 -0
  74. inspect_ai/approval/_human/panel.py +5 -8
  75. inspect_ai/dataset/_dataset.py +51 -10
  76. inspect_ai/dataset/_util.py +31 -3
  77. inspect_ai/log/__init__.py +2 -0
  78. inspect_ai/log/_log.py +30 -2
  79. inspect_ai/log/_recorders/eval.py +2 -0
  80. inspect_ai/model/_call_tools.py +31 -7
  81. inspect_ai/model/_chat_message.py +3 -0
  82. inspect_ai/model/_model.py +42 -1
  83. inspect_ai/model/_providers/anthropic.py +4 -0
  84. inspect_ai/model/_providers/google.py +24 -6
  85. inspect_ai/model/_providers/openai.py +17 -3
  86. inspect_ai/model/_providers/openai_o1.py +10 -12
  87. inspect_ai/model/_render.py +9 -2
  88. inspect_ai/scorer/_metric.py +12 -1
  89. inspect_ai/solver/__init__.py +2 -0
  90. inspect_ai/solver/_human_agent/agent.py +83 -0
  91. inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
  92. inspect_ai/solver/_human_agent/commands/clock.py +70 -0
  93. inspect_ai/solver/_human_agent/commands/command.py +59 -0
  94. inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
  95. inspect_ai/solver/_human_agent/commands/note.py +42 -0
  96. inspect_ai/solver/_human_agent/commands/score.py +80 -0
  97. inspect_ai/solver/_human_agent/commands/status.py +62 -0
  98. inspect_ai/solver/_human_agent/commands/submit.py +151 -0
  99. inspect_ai/solver/_human_agent/install.py +222 -0
  100. inspect_ai/solver/_human_agent/panel.py +252 -0
  101. inspect_ai/solver/_human_agent/service.py +45 -0
  102. inspect_ai/solver/_human_agent/state.py +55 -0
  103. inspect_ai/solver/_human_agent/view.py +24 -0
  104. inspect_ai/solver/_task_state.py +28 -2
  105. inspect_ai/tool/_tool.py +10 -2
  106. inspect_ai/tool/_tool_info.py +2 -1
  107. inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
  108. inspect_ai/tool/_tools/_web_browser/_web_browser.py +16 -13
  109. inspect_ai/util/__init__.py +12 -4
  110. inspect_ai/{_util/display.py → util/_display.py} +6 -0
  111. inspect_ai/util/_panel.py +31 -9
  112. inspect_ai/util/_sandbox/__init__.py +0 -3
  113. inspect_ai/util/_sandbox/context.py +5 -1
  114. inspect_ai/util/_sandbox/docker/compose.py +17 -13
  115. inspect_ai/util/_sandbox/docker/docker.py +9 -6
  116. inspect_ai/util/_sandbox/docker/internal.py +1 -1
  117. inspect_ai/util/_sandbox/docker/util.py +3 -2
  118. inspect_ai/util/_sandbox/environment.py +6 -5
  119. inspect_ai/util/_sandbox/local.py +1 -1
  120. inspect_ai/util/_sandbox/self_check.py +18 -18
  121. inspect_ai/util/_sandbox/service.py +22 -7
  122. inspect_ai/util/_store.py +7 -8
  123. inspect_ai/util/_store_model.py +110 -0
  124. inspect_ai/util/_subprocess.py +3 -3
  125. inspect_ai/util/_throttle.py +32 -0
  126. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
  127. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +131 -108
  128. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
  129. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
  130. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
  131. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0
@@ -23,9 +23,6 @@
23
23
  "event": {
24
24
  "const": "approval",
25
25
  "default": "approval",
26
- "enum": [
27
- "approval"
28
- ],
29
26
  "title": "Event",
30
27
  "type": "string"
31
28
  },
@@ -154,6 +151,11 @@
154
151
  "title": "ApproverPolicyConfig",
155
152
  "type": "object"
156
153
  },
154
+ "BaseModel": {
155
+ "properties": {},
156
+ "title": "BaseModel",
157
+ "type": "object"
158
+ },
157
159
  "ChatCompletionChoice": {
158
160
  "properties": {
159
161
  "message": {
@@ -235,9 +237,6 @@
235
237
  "role": {
236
238
  "const": "assistant",
237
239
  "default": "assistant",
238
- "enum": [
239
- "assistant"
240
- ],
241
240
  "title": "Role",
242
241
  "type": "string"
243
242
  },
@@ -309,9 +308,6 @@
309
308
  "role": {
310
309
  "const": "system",
311
310
  "default": "system",
312
- "enum": [
313
- "system"
314
- ],
315
311
  "title": "Role",
316
312
  "type": "string"
317
313
  }
@@ -367,9 +363,6 @@
367
363
  "role": {
368
364
  "const": "tool",
369
365
  "default": "tool",
370
- "enum": [
371
- "tool"
372
- ],
373
366
  "title": "Role",
374
367
  "type": "string"
375
368
  },
@@ -463,17 +456,27 @@
463
456
  "role": {
464
457
  "const": "user",
465
458
  "default": "user",
466
- "enum": [
467
- "user"
468
- ],
469
459
  "title": "Role",
470
460
  "type": "string"
461
+ },
462
+ "tool_call_id": {
463
+ "anyOf": [
464
+ {
465
+ "type": "string"
466
+ },
467
+ {
468
+ "type": "null"
469
+ }
470
+ ],
471
+ "default": null,
472
+ "title": "Tool Call Id"
471
473
  }
472
474
  },
473
475
  "required": [
474
476
  "content",
475
477
  "source",
476
- "role"
478
+ "role",
479
+ "tool_call_id"
477
480
  ],
478
481
  "title": "ChatMessageUser",
479
482
  "type": "object",
@@ -484,9 +487,6 @@
484
487
  "type": {
485
488
  "const": "image",
486
489
  "default": "image",
487
- "enum": [
488
- "image"
489
- ],
490
490
  "title": "Type",
491
491
  "type": "string"
492
492
  },
@@ -519,9 +519,6 @@
519
519
  "type": {
520
520
  "const": "text",
521
521
  "default": "text",
522
- "enum": [
523
- "text"
524
- ],
525
522
  "title": "Type",
526
523
  "type": "string"
527
524
  },
@@ -561,9 +558,6 @@
561
558
  "event": {
562
559
  "const": "error",
563
560
  "default": "error",
564
- "enum": [
565
- "error"
566
- ],
567
561
  "title": "Event",
568
562
  "type": "string"
569
563
  },
@@ -608,6 +602,34 @@
608
602
  "default": null,
609
603
  "title": "Limit"
610
604
  },
605
+ "sample_id": {
606
+ "anyOf": [
607
+ {
608
+ "type": "string"
609
+ },
610
+ {
611
+ "type": "integer"
612
+ },
613
+ {
614
+ "items": {
615
+ "anyOf": [
616
+ {
617
+ "type": "string"
618
+ },
619
+ {
620
+ "type": "integer"
621
+ }
622
+ ]
623
+ },
624
+ "type": "array"
625
+ },
626
+ {
627
+ "type": "null"
628
+ }
629
+ ],
630
+ "default": null,
631
+ "title": "Sample Id"
632
+ },
611
633
  "epochs": {
612
634
  "anyOf": [
613
635
  {
@@ -745,6 +767,18 @@
745
767
  "default": null,
746
768
  "title": "Max Subprocesses"
747
769
  },
770
+ "max_sandboxes": {
771
+ "anyOf": [
772
+ {
773
+ "type": "integer"
774
+ },
775
+ {
776
+ "type": "null"
777
+ }
778
+ ],
779
+ "default": null,
780
+ "title": "Max Sandboxes"
781
+ },
748
782
  "sandbox_cleanup": {
749
783
  "anyOf": [
750
784
  {
@@ -792,12 +826,25 @@
792
826
  ],
793
827
  "default": null,
794
828
  "title": "Log Buffer"
829
+ },
830
+ "score_display": {
831
+ "anyOf": [
832
+ {
833
+ "type": "boolean"
834
+ },
835
+ {
836
+ "type": "null"
837
+ }
838
+ ],
839
+ "default": null,
840
+ "title": "Score Display"
795
841
  }
796
842
  },
797
843
  "title": "EvalConfig",
798
844
  "type": "object",
799
845
  "required": [
800
846
  "limit",
847
+ "sample_id",
801
848
  "epochs",
802
849
  "epochs_reducer",
803
850
  "trace",
@@ -809,10 +856,12 @@
809
856
  "max_samples",
810
857
  "max_tasks",
811
858
  "max_subprocesses",
859
+ "max_sandboxes",
812
860
  "sandbox_cleanup",
813
861
  "log_samples",
814
862
  "log_images",
815
- "log_buffer"
863
+ "log_buffer",
864
+ "score_display"
816
865
  ],
817
866
  "additionalProperties": false
818
867
  },
@@ -1017,7 +1066,8 @@
1017
1066
  "top_logprobs": null,
1018
1067
  "parallel_tool_calls": null,
1019
1068
  "max_tool_output": null,
1020
- "cache_prompt": null
1069
+ "cache_prompt": null,
1070
+ "reasoning_effort": null
1021
1071
  }
1022
1072
  }
1023
1073
  },
@@ -1097,9 +1147,6 @@
1097
1147
  "properties": {
1098
1148
  "type": {
1099
1149
  "const": "git",
1100
- "enum": [
1101
- "git"
1102
- ],
1103
1150
  "title": "Type",
1104
1151
  "type": "string"
1105
1152
  },
@@ -1435,7 +1482,7 @@
1435
1482
  },
1436
1483
  "samples": {
1437
1484
  "items": {
1438
- "$ref": "#/$defs/SampleScore"
1485
+ "$ref": "#/$defs/EvalSampleScore"
1439
1486
  },
1440
1487
  "title": "Samples",
1441
1488
  "type": "array"
@@ -1450,6 +1497,129 @@
1450
1497
  "type": "object",
1451
1498
  "additionalProperties": false
1452
1499
  },
1500
+ "EvalSampleScore": {
1501
+ "properties": {
1502
+ "value": {
1503
+ "anyOf": [
1504
+ {
1505
+ "type": "string"
1506
+ },
1507
+ {
1508
+ "type": "integer"
1509
+ },
1510
+ {
1511
+ "type": "number"
1512
+ },
1513
+ {
1514
+ "type": "boolean"
1515
+ },
1516
+ {
1517
+ "items": {
1518
+ "anyOf": [
1519
+ {
1520
+ "type": "string"
1521
+ },
1522
+ {
1523
+ "type": "integer"
1524
+ },
1525
+ {
1526
+ "type": "number"
1527
+ },
1528
+ {
1529
+ "type": "boolean"
1530
+ }
1531
+ ]
1532
+ },
1533
+ "type": "array"
1534
+ },
1535
+ {
1536
+ "additionalProperties": {
1537
+ "anyOf": [
1538
+ {
1539
+ "type": "string"
1540
+ },
1541
+ {
1542
+ "type": "integer"
1543
+ },
1544
+ {
1545
+ "type": "number"
1546
+ },
1547
+ {
1548
+ "type": "boolean"
1549
+ },
1550
+ {
1551
+ "type": "null"
1552
+ }
1553
+ ]
1554
+ },
1555
+ "type": "object"
1556
+ }
1557
+ ],
1558
+ "title": "Value"
1559
+ },
1560
+ "answer": {
1561
+ "anyOf": [
1562
+ {
1563
+ "type": "string"
1564
+ },
1565
+ {
1566
+ "type": "null"
1567
+ }
1568
+ ],
1569
+ "default": null,
1570
+ "title": "Answer"
1571
+ },
1572
+ "explanation": {
1573
+ "anyOf": [
1574
+ {
1575
+ "type": "string"
1576
+ },
1577
+ {
1578
+ "type": "null"
1579
+ }
1580
+ ],
1581
+ "default": null,
1582
+ "title": "Explanation"
1583
+ },
1584
+ "metadata": {
1585
+ "anyOf": [
1586
+ {
1587
+ "type": "object"
1588
+ },
1589
+ {
1590
+ "type": "null"
1591
+ }
1592
+ ],
1593
+ "default": null,
1594
+ "title": "Metadata"
1595
+ },
1596
+ "sample_id": {
1597
+ "anyOf": [
1598
+ {
1599
+ "type": "string"
1600
+ },
1601
+ {
1602
+ "type": "integer"
1603
+ },
1604
+ {
1605
+ "type": "null"
1606
+ }
1607
+ ],
1608
+ "default": null,
1609
+ "title": "Sample Id"
1610
+ }
1611
+ },
1612
+ "required": [
1613
+ "value",
1614
+ "answer",
1615
+ "explanation",
1616
+ "metadata",
1617
+ "sample_id"
1618
+ ],
1619
+ "title": "EvalSampleScore",
1620
+ "type": "object",
1621
+ "additionalProperties": false
1622
+ },
1453
1623
  "EvalScore": {
1454
1624
  "properties": {
1455
1625
  "name": {
@@ -1964,9 +2134,6 @@
1964
2134
  "anyOf": [
1965
2135
  {
1966
2136
  "const": "auto",
1967
- "enum": [
1968
- "auto"
1969
- ],
1970
2137
  "type": "string"
1971
2138
  },
1972
2139
  {
@@ -1978,6 +2145,23 @@
1978
2145
  ],
1979
2146
  "default": null,
1980
2147
  "title": "Cache Prompt"
2148
+ },
2149
+ "reasoning_effort": {
2150
+ "anyOf": [
2151
+ {
2152
+ "enum": [
2153
+ "low",
2154
+ "medium",
2155
+ "high"
2156
+ ],
2157
+ "type": "string"
2158
+ },
2159
+ {
2160
+ "type": "null"
2161
+ }
2162
+ ],
2163
+ "default": null,
2164
+ "title": "Reasoning Effort"
1981
2165
  }
1982
2166
  },
1983
2167
  "title": "GenerateConfig",
@@ -2003,7 +2187,8 @@
2003
2187
  "top_logprobs",
2004
2188
  "parallel_tool_calls",
2005
2189
  "max_tool_output",
2006
- "cache_prompt"
2190
+ "cache_prompt",
2191
+ "reasoning_effort"
2007
2192
  ],
2008
2193
  "additionalProperties": false
2009
2194
  },
@@ -2030,9 +2215,6 @@
2030
2215
  "event": {
2031
2216
  "const": "info",
2032
2217
  "default": "info",
2033
- "enum": [
2034
- "info"
2035
- ],
2036
2218
  "title": "Event",
2037
2219
  "type": "string"
2038
2220
  },
@@ -2073,9 +2255,6 @@
2073
2255
  "event": {
2074
2256
  "const": "input",
2075
2257
  "default": "input",
2076
- "enum": [
2077
- "input"
2078
- ],
2079
2258
  "title": "Event",
2080
2259
  "type": "string"
2081
2260
  },
@@ -2174,9 +2353,6 @@
2174
2353
  "event": {
2175
2354
  "const": "logger",
2176
2355
  "default": "logger",
2177
- "enum": [
2178
- "logger"
2179
- ],
2180
2356
  "title": "Event",
2181
2357
  "type": "string"
2182
2358
  },
@@ -2377,9 +2553,6 @@
2377
2553
  "event": {
2378
2554
  "const": "model",
2379
2555
  "default": "model",
2380
- "enum": [
2381
- "model"
2382
- ],
2383
2556
  "title": "Event",
2384
2557
  "type": "string"
2385
2558
  },
@@ -2769,9 +2942,6 @@
2769
2942
  "event": {
2770
2943
  "const": "sample_init",
2771
2944
  "default": "sample_init",
2772
- "enum": [
2773
- "sample_init"
2774
- ],
2775
2945
  "title": "Event",
2776
2946
  "type": "string"
2777
2947
  },
@@ -2816,9 +2986,6 @@
2816
2986
  "event": {
2817
2987
  "const": "sample_limit",
2818
2988
  "default": "sample_limit",
2819
- "enum": [
2820
- "sample_limit"
2821
- ],
2822
2989
  "title": "Event",
2823
2990
  "type": "string"
2824
2991
  },
@@ -2861,130 +3028,6 @@
2861
3028
  "type": "object",
2862
3029
  "additionalProperties": false
2863
3030
  },
2864
- "SampleScore": {
2865
- "description": "Score for a Sample\n\nArgs:\n sample_id: (str | int | None) Unique id of a sample",
2866
- "properties": {
2867
- "value": {
2868
- "anyOf": [
2869
- {
2870
- "type": "string"
2871
- },
2872
- {
2873
- "type": "integer"
2874
- },
2875
- {
2876
- "type": "number"
2877
- },
2878
- {
2879
- "type": "boolean"
2880
- },
2881
- {
2882
- "items": {
2883
- "anyOf": [
2884
- {
2885
- "type": "string"
2886
- },
2887
- {
2888
- "type": "integer"
2889
- },
2890
- {
2891
- "type": "number"
2892
- },
2893
- {
2894
- "type": "boolean"
2895
- }
2896
- ]
2897
- },
2898
- "type": "array"
2899
- },
2900
- {
2901
- "additionalProperties": {
2902
- "anyOf": [
2903
- {
2904
- "type": "string"
2905
- },
2906
- {
2907
- "type": "integer"
2908
- },
2909
- {
2910
- "type": "number"
2911
- },
2912
- {
2913
- "type": "boolean"
2914
- },
2915
- {
2916
- "type": "null"
2917
- }
2918
- ]
2919
- },
2920
- "type": "object"
2921
- }
2922
- ],
2923
- "title": "Value"
2924
- },
2925
- "answer": {
2926
- "anyOf": [
2927
- {
2928
- "type": "string"
2929
- },
2930
- {
2931
- "type": "null"
2932
- }
2933
- ],
2934
- "default": null,
2935
- "title": "Answer"
2936
- },
2937
- "explanation": {
2938
- "anyOf": [
2939
- {
2940
- "type": "string"
2941
- },
2942
- {
2943
- "type": "null"
2944
- }
2945
- ],
2946
- "default": null,
2947
- "title": "Explanation"
2948
- },
2949
- "metadata": {
2950
- "anyOf": [
2951
- {
2952
- "type": "object"
2953
- },
2954
- {
2955
- "type": "null"
2956
- }
2957
- ],
2958
- "default": null,
2959
- "title": "Metadata"
2960
- },
2961
- "sample_id": {
2962
- "anyOf": [
2963
- {
2964
- "type": "string"
2965
- },
2966
- {
2967
- "type": "integer"
2968
- },
2969
- {
2970
- "type": "null"
2971
- }
2972
- ],
2973
- "default": null,
2974
- "title": "Sample Id"
2975
- }
2976
- },
2977
- "required": [
2978
- "value",
2979
- "answer",
2980
- "explanation",
2981
- "metadata",
2982
- "sample_id"
2983
- ],
2984
- "title": "SampleScore",
2985
- "type": "object",
2986
- "additionalProperties": false
2987
- },
2988
3031
  "SandboxEnvironmentSpec": {
2989
3032
  "maxItems": 2,
2990
3033
  "minItems": 1,
@@ -2995,6 +3038,9 @@
2995
3038
  },
2996
3039
  {
2997
3040
  "anyOf": [
3041
+ {
3042
+ "$ref": "#/$defs/BaseModel"
3043
+ },
2998
3044
  {
2999
3045
  "type": "string"
3000
3046
  },
@@ -3139,9 +3185,6 @@
3139
3185
  "event": {
3140
3186
  "const": "score",
3141
3187
  "default": "score",
3142
- "enum": [
3143
- "score"
3144
- ],
3145
3188
  "title": "Event",
3146
3189
  "type": "string"
3147
3190
  },
@@ -3201,9 +3244,6 @@
3201
3244
  "event": {
3202
3245
  "const": "state",
3203
3246
  "default": "state",
3204
- "enum": [
3205
- "state"
3206
- ],
3207
3247
  "title": "Event",
3208
3248
  "type": "string"
3209
3249
  },
@@ -3248,9 +3288,6 @@
3248
3288
  "event": {
3249
3289
  "const": "step",
3250
3290
  "default": "step",
3251
- "enum": [
3252
- "step"
3253
- ],
3254
3291
  "title": "Event",
3255
3292
  "type": "string"
3256
3293
  },
@@ -3314,9 +3351,6 @@
3314
3351
  "event": {
3315
3352
  "const": "store",
3316
3353
  "default": "store",
3317
- "enum": [
3318
- "store"
3319
- ],
3320
3354
  "title": "Event",
3321
3355
  "type": "string"
3322
3356
  },
@@ -3361,9 +3395,6 @@
3361
3395
  "event": {
3362
3396
  "const": "subtask",
3363
3397
  "default": "subtask",
3364
- "enum": [
3365
- "subtask"
3366
- ],
3367
3398
  "title": "Event",
3368
3399
  "type": "string"
3369
3400
  },
@@ -3472,9 +3503,6 @@
3472
3503
  },
3473
3504
  "type": {
3474
3505
  "const": "function",
3475
- "enum": [
3476
- "function"
3477
- ],
3478
3506
  "title": "Type",
3479
3507
  "type": "string"
3480
3508
  },
@@ -3638,18 +3666,12 @@
3638
3666
  "event": {
3639
3667
  "const": "tool",
3640
3668
  "default": "tool",
3641
- "enum": [
3642
- "tool"
3643
- ],
3644
3669
  "title": "Event",
3645
3670
  "type": "string"
3646
3671
  },
3647
3672
  "type": {
3648
3673
  "const": "function",
3649
3674
  "default": "function",
3650
- "enum": [
3651
- "function"
3652
- ],
3653
3675
  "title": "Type",
3654
3676
  "type": "string"
3655
3677
  },
@@ -3693,6 +3715,12 @@
3693
3715
  {
3694
3716
  "type": "boolean"
3695
3717
  },
3718
+ {
3719
+ "$ref": "#/$defs/ContentText"
3720
+ },
3721
+ {
3722
+ "$ref": "#/$defs/ContentImage"
3723
+ },
3696
3724
  {
3697
3725
  "items": {
3698
3726
  "anyOf": [
@@ -3853,18 +3881,25 @@
3853
3881
  "description": "Description of tool parameter in JSON Schema format.",
3854
3882
  "properties": {
3855
3883
  "type": {
3856
- "default": "null",
3857
- "enum": [
3858
- "string",
3859
- "integer",
3860
- "number",
3861
- "boolean",
3862
- "array",
3863
- "object",
3864
- "null"
3884
+ "anyOf": [
3885
+ {
3886
+ "enum": [
3887
+ "string",
3888
+ "integer",
3889
+ "number",
3890
+ "boolean",
3891
+ "array",
3892
+ "object",
3893
+ "null"
3894
+ ],
3895
+ "type": "string"
3896
+ },
3897
+ {
3898
+ "type": "null"
3899
+ }
3865
3900
  ],
3866
- "title": "Type",
3867
- "type": "string"
3901
+ "default": null,
3902
+ "title": "Type"
3868
3903
  },
3869
3904
  "description": {
3870
3905
  "anyOf": [
@@ -3882,6 +3917,19 @@
3882
3917
  "default": null,
3883
3918
  "title": "Default"
3884
3919
  },
3920
+ "enum": {
3921
+ "anyOf": [
3922
+ {
3923
+ "items": {},
3924
+ "type": "array"
3925
+ },
3926
+ {
3927
+ "type": "null"
3928
+ }
3929
+ ],
3930
+ "default": null,
3931
+ "title": "Enum"
3932
+ },
3885
3933
  "items": {
3886
3934
  "anyOf": [
3887
3935
  {
@@ -3960,6 +4008,7 @@
3960
4008
  "type",
3961
4009
  "description",
3962
4010
  "default",
4011
+ "enum",
3963
4012
  "items",
3964
4013
  "properties",
3965
4014
  "additionalProperties",
@@ -3974,9 +4023,6 @@
3974
4023
  "type": {
3975
4024
  "const": "object",
3976
4025
  "default": "object",
3977
- "enum": [
3978
- "object"
3979
- ],
3980
4026
  "title": "Type",
3981
4027
  "type": "string"
3982
4028
  },
@@ -4086,6 +4132,7 @@
4086
4132
  "num_choices": null,
4087
4133
  "parallel_tool_calls": null,
4088
4134
  "presence_penalty": null,
4135
+ "reasoning_effort": null,
4089
4136
  "seed": null,
4090
4137
  "stop_seqs": null,
4091
4138
  "suffix": null,