inspect-ai 0.3.57__py3-none-any.whl → 0.3.58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -1
- inspect_ai/_cli/common.py +4 -2
- inspect_ai/_cli/eval.py +2 -0
- inspect_ai/_cli/trace.py +21 -2
- inspect_ai/_display/core/active.py +0 -2
- inspect_ai/_display/rich/display.py +4 -4
- inspect_ai/_display/textual/app.py +4 -1
- inspect_ai/_display/textual/widgets/samples.py +41 -5
- inspect_ai/_eval/eval.py +32 -20
- inspect_ai/_eval/evalset.py +7 -5
- inspect_ai/_eval/task/__init__.py +2 -2
- inspect_ai/_eval/task/images.py +40 -25
- inspect_ai/_eval/task/run.py +141 -119
- inspect_ai/_eval/task/task.py +140 -25
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/content.py +23 -1
- inspect_ai/_util/images.py +20 -17
- inspect_ai/_util/kvstore.py +73 -0
- inspect_ai/_util/notgiven.py +18 -0
- inspect_ai/_util/thread.py +5 -0
- inspect_ai/_view/www/dist/assets/index.js +37 -3
- inspect_ai/_view/www/log-schema.json +97 -13
- inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
- inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +5 -1
- inspect_ai/_view/www/src/types/log.d.ts +51 -27
- inspect_ai/approval/_human/util.py +2 -2
- inspect_ai/dataset/_sources/csv.py +2 -1
- inspect_ai/dataset/_sources/json.py +2 -1
- inspect_ai/dataset/_sources/util.py +15 -7
- inspect_ai/log/_condense.py +11 -1
- inspect_ai/log/_log.py +2 -5
- inspect_ai/log/_recorders/eval.py +19 -8
- inspect_ai/log/_samples.py +10 -5
- inspect_ai/log/_transcript.py +28 -1
- inspect_ai/model/__init__.py +10 -2
- inspect_ai/model/_call_tools.py +55 -12
- inspect_ai/model/_chat_message.py +2 -4
- inspect_ai/model/{_trace.py → _conversation.py} +9 -8
- inspect_ai/model/_model.py +2 -2
- inspect_ai/model/_providers/anthropic.py +9 -7
- inspect_ai/model/_providers/azureai.py +6 -4
- inspect_ai/model/_providers/bedrock.py +6 -4
- inspect_ai/model/_providers/google.py +79 -8
- inspect_ai/model/_providers/groq.py +7 -5
- inspect_ai/model/_providers/hf.py +11 -6
- inspect_ai/model/_providers/mistral.py +6 -9
- inspect_ai/model/_providers/openai.py +17 -5
- inspect_ai/model/_providers/vertex.py +17 -4
- inspect_ai/scorer/__init__.py +13 -2
- inspect_ai/scorer/_metrics/__init__.py +2 -2
- inspect_ai/scorer/_metrics/std.py +3 -3
- inspect_ai/tool/__init__.py +9 -1
- inspect_ai/tool/_tool.py +9 -2
- inspect_ai/util/__init__.py +0 -3
- inspect_ai/util/{_trace.py → _conversation.py} +3 -17
- inspect_ai/util/_display.py +14 -4
- inspect_ai/util/_sandbox/context.py +12 -13
- inspect_ai/util/_sandbox/docker/compose.py +24 -11
- inspect_ai/util/_sandbox/docker/docker.py +20 -13
- inspect_ai/util/_sandbox/environment.py +13 -1
- inspect_ai/util/_sandbox/local.py +1 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/RECORD +68 -65
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,73 @@
|
|
1
|
+
import sqlite3
|
2
|
+
from contextlib import AbstractContextManager
|
3
|
+
from typing import Any, Optional, cast
|
4
|
+
|
5
|
+
from .appdirs import inspect_data_dir
|
6
|
+
|
7
|
+
|
8
|
+
class KVStore(AbstractContextManager["KVStore"]):
|
9
|
+
def __init__(self, filename: str, max_entries: int | None = None):
|
10
|
+
self.filename = filename
|
11
|
+
self.max_entries = max_entries
|
12
|
+
|
13
|
+
def __enter__(self) -> "KVStore":
|
14
|
+
self.conn = sqlite3.connect(self.filename)
|
15
|
+
self.conn.execute("""
|
16
|
+
CREATE TABLE IF NOT EXISTS kv_store (
|
17
|
+
key TEXT PRIMARY KEY,
|
18
|
+
value TEXT,
|
19
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
20
|
+
)
|
21
|
+
""")
|
22
|
+
self.conn.commit()
|
23
|
+
return self
|
24
|
+
|
25
|
+
def __exit__(self, *excinfo: Any) -> None:
|
26
|
+
self.conn.close()
|
27
|
+
|
28
|
+
def put(self, key: str, value: str) -> None:
|
29
|
+
# Insert or update the value
|
30
|
+
self.conn.execute(
|
31
|
+
"""
|
32
|
+
INSERT OR REPLACE INTO kv_store (key, value, created_at)
|
33
|
+
VALUES (?, ?, CURRENT_TIMESTAMP)
|
34
|
+
""",
|
35
|
+
(key, value),
|
36
|
+
)
|
37
|
+
|
38
|
+
# If we have a max_entries limit, remove oldest entries
|
39
|
+
if self.max_entries:
|
40
|
+
count = self.count()
|
41
|
+
if count > self.max_entries:
|
42
|
+
self.conn.execute(
|
43
|
+
"""
|
44
|
+
DELETE FROM kv_store
|
45
|
+
WHERE key IN (
|
46
|
+
SELECT key FROM kv_store
|
47
|
+
ORDER BY created_at ASC
|
48
|
+
LIMIT ?
|
49
|
+
)
|
50
|
+
""",
|
51
|
+
(max(0, count - self.max_entries),),
|
52
|
+
)
|
53
|
+
|
54
|
+
self.conn.commit()
|
55
|
+
|
56
|
+
def get(self, key: str) -> Optional[str]:
|
57
|
+
cursor = self.conn.execute("SELECT value FROM kv_store WHERE key = ?", (key,))
|
58
|
+
result = cursor.fetchone()
|
59
|
+
return result[0] if result else None
|
60
|
+
|
61
|
+
def delete(self, key: str) -> bool:
|
62
|
+
cursor = self.conn.execute("DELETE FROM kv_store WHERE key = ?", (key,))
|
63
|
+
self.conn.commit()
|
64
|
+
return cursor.rowcount > 0
|
65
|
+
|
66
|
+
def count(self) -> int:
|
67
|
+
cursor = self.conn.execute("SELECT COUNT(*) FROM kv_store")
|
68
|
+
return cast(int, cursor.fetchone()[0])
|
69
|
+
|
70
|
+
|
71
|
+
def inspect_kvstore(name: str, max_entries: int | None = None) -> KVStore:
|
72
|
+
filename = inspect_data_dir("kvstore") / f"{name}.db"
|
73
|
+
return KVStore(filename.as_posix(), max_entries=max_entries)
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# Sentinel class used until PEP 0661 is accepted
|
2
|
+
from typing import Literal
|
3
|
+
|
4
|
+
from typing_extensions import override
|
5
|
+
|
6
|
+
|
7
|
+
class NotGiven:
|
8
|
+
"""A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior)."""
|
9
|
+
|
10
|
+
def __bool__(self) -> Literal[False]:
|
11
|
+
return False
|
12
|
+
|
13
|
+
@override
|
14
|
+
def __repr__(self) -> str:
|
15
|
+
return "NOT_GIVEN"
|
16
|
+
|
17
|
+
|
18
|
+
NOT_GIVEN = NotGiven()
|
@@ -15547,12 +15547,46 @@ var require_assets = __commonJS({
|
|
15547
15547
|
}
|
15548
15548
|
}
|
15549
15549
|
},
|
15550
|
+
audio: {
|
15551
|
+
render: (content) => {
|
15552
|
+
return m$1` <audio controls>
|
15553
|
+
<source
|
15554
|
+
src=${content.audio}
|
15555
|
+
type=${mimeTypeForFormat(content.format)}
|
15556
|
+
/>
|
15557
|
+
</audio>`;
|
15558
|
+
}
|
15559
|
+
},
|
15560
|
+
video: {
|
15561
|
+
render: (content) => {
|
15562
|
+
return m$1` <video width="500" height="375" controls>
|
15563
|
+
<source
|
15564
|
+
src=${content.video}
|
15565
|
+
type=${mimeTypeForFormat(content.format)}
|
15566
|
+
/>
|
15567
|
+
</video>`;
|
15568
|
+
}
|
15569
|
+
},
|
15550
15570
|
tool: {
|
15551
15571
|
render: (content) => {
|
15552
15572
|
return m$1`<${ToolOutput} output=${content.content} />`;
|
15553
15573
|
}
|
15554
15574
|
}
|
15555
15575
|
};
|
15576
|
+
const mimeTypeForFormat = (format2) => {
|
15577
|
+
switch (format2) {
|
15578
|
+
case "mov":
|
15579
|
+
return "video/quicktime";
|
15580
|
+
case "wav":
|
15581
|
+
return "audio/wav";
|
15582
|
+
case "mp3":
|
15583
|
+
return "audio/mpeg";
|
15584
|
+
case "mp4":
|
15585
|
+
return "video/mp4";
|
15586
|
+
case "mpeg":
|
15587
|
+
return "video/mpeg";
|
15588
|
+
}
|
15589
|
+
};
|
15556
15590
|
const ChatView = ({
|
15557
15591
|
id,
|
15558
15592
|
messages,
|
@@ -16493,7 +16527,7 @@ ${entry.value}</pre
|
|
16493
16527
|
};
|
16494
16528
|
const MessageBand = ({ message, hidden, setHidden, type }) => {
|
16495
16529
|
const bgColor = type === "info" ? "var(--bs-light)" : "var(--bs-" + type + "-bg-subtle)";
|
16496
|
-
const color =
|
16530
|
+
const color = type === "info" ? void 0 : "var(--bs-" + type + "-text-emphasis)";
|
16497
16531
|
return m$1`
|
16498
16532
|
<div
|
16499
16533
|
style=${{
|
@@ -16516,7 +16550,7 @@ ${entry.value}</pre
|
|
16516
16550
|
fontSize: FontSize["title-secondary"],
|
16517
16551
|
margin: "0",
|
16518
16552
|
padding: "0",
|
16519
|
-
color
|
16553
|
+
color,
|
16520
16554
|
height: FontSize["title-secondary"],
|
16521
16555
|
lineHeight: FontSize["title-secondary"]
|
16522
16556
|
}}
|
@@ -30728,7 +30762,7 @@ self.onmessage = function (e) {
|
|
30728
30762
|
var _a2;
|
30729
30763
|
const text2 = inputString(current.input).join(" ");
|
30730
30764
|
const scoreValue = evalDescriptor.score(current, selectedScore).value;
|
30731
|
-
const scoreText = scoreValue ? String(scoreValue) : "";
|
30765
|
+
const scoreText = scoreValue ? String(scoreValue) : current.error ? String(current.error) : "";
|
30732
30766
|
previous[0] = Math.min(Math.max(previous[0], text2.length), 300);
|
30733
30767
|
previous[1] = Math.min(
|
30734
30768
|
Math.max(previous[1], arrayToString(current.target).length),
|
@@ -210,6 +210,12 @@
|
|
210
210
|
},
|
211
211
|
{
|
212
212
|
"$ref": "#/$defs/ContentImage"
|
213
|
+
},
|
214
|
+
{
|
215
|
+
"$ref": "#/$defs/ContentAudio"
|
216
|
+
},
|
217
|
+
{
|
218
|
+
"$ref": "#/$defs/ContentVideo"
|
213
219
|
}
|
214
220
|
]
|
215
221
|
},
|
@@ -281,6 +287,12 @@
|
|
281
287
|
},
|
282
288
|
{
|
283
289
|
"$ref": "#/$defs/ContentImage"
|
290
|
+
},
|
291
|
+
{
|
292
|
+
"$ref": "#/$defs/ContentAudio"
|
293
|
+
},
|
294
|
+
{
|
295
|
+
"$ref": "#/$defs/ContentVideo"
|
284
296
|
}
|
285
297
|
]
|
286
298
|
},
|
@@ -336,6 +348,12 @@
|
|
336
348
|
},
|
337
349
|
{
|
338
350
|
"$ref": "#/$defs/ContentImage"
|
351
|
+
},
|
352
|
+
{
|
353
|
+
"$ref": "#/$defs/ContentAudio"
|
354
|
+
},
|
355
|
+
{
|
356
|
+
"$ref": "#/$defs/ContentVideo"
|
339
357
|
}
|
340
358
|
]
|
341
359
|
},
|
@@ -429,6 +447,12 @@
|
|
429
447
|
},
|
430
448
|
{
|
431
449
|
"$ref": "#/$defs/ContentImage"
|
450
|
+
},
|
451
|
+
{
|
452
|
+
"$ref": "#/$defs/ContentAudio"
|
453
|
+
},
|
454
|
+
{
|
455
|
+
"$ref": "#/$defs/ContentVideo"
|
432
456
|
}
|
433
457
|
]
|
434
458
|
},
|
@@ -482,6 +506,36 @@
|
|
482
506
|
"type": "object",
|
483
507
|
"additionalProperties": false
|
484
508
|
},
|
509
|
+
"ContentAudio": {
|
510
|
+
"properties": {
|
511
|
+
"type": {
|
512
|
+
"const": "audio",
|
513
|
+
"default": "audio",
|
514
|
+
"title": "Type",
|
515
|
+
"type": "string"
|
516
|
+
},
|
517
|
+
"audio": {
|
518
|
+
"title": "Audio",
|
519
|
+
"type": "string"
|
520
|
+
},
|
521
|
+
"format": {
|
522
|
+
"enum": [
|
523
|
+
"wav",
|
524
|
+
"mp3"
|
525
|
+
],
|
526
|
+
"title": "Format",
|
527
|
+
"type": "string"
|
528
|
+
}
|
529
|
+
},
|
530
|
+
"required": [
|
531
|
+
"type",
|
532
|
+
"audio",
|
533
|
+
"format"
|
534
|
+
],
|
535
|
+
"title": "ContentAudio",
|
536
|
+
"type": "object",
|
537
|
+
"additionalProperties": false
|
538
|
+
},
|
485
539
|
"ContentImage": {
|
486
540
|
"properties": {
|
487
541
|
"type": {
|
@@ -535,6 +589,37 @@
|
|
535
589
|
"type": "object",
|
536
590
|
"additionalProperties": false
|
537
591
|
},
|
592
|
+
"ContentVideo": {
|
593
|
+
"properties": {
|
594
|
+
"type": {
|
595
|
+
"const": "video",
|
596
|
+
"default": "video",
|
597
|
+
"title": "Type",
|
598
|
+
"type": "string"
|
599
|
+
},
|
600
|
+
"video": {
|
601
|
+
"title": "Video",
|
602
|
+
"type": "string"
|
603
|
+
},
|
604
|
+
"format": {
|
605
|
+
"enum": [
|
606
|
+
"mp4",
|
607
|
+
"mpeg",
|
608
|
+
"mov"
|
609
|
+
],
|
610
|
+
"title": "Format",
|
611
|
+
"type": "string"
|
612
|
+
}
|
613
|
+
},
|
614
|
+
"required": [
|
615
|
+
"type",
|
616
|
+
"video",
|
617
|
+
"format"
|
618
|
+
],
|
619
|
+
"title": "ContentVideo",
|
620
|
+
"type": "object",
|
621
|
+
"additionalProperties": false
|
622
|
+
},
|
538
623
|
"ErrorEvent": {
|
539
624
|
"description": "Event with sample error.",
|
540
625
|
"properties": {
|
@@ -657,18 +742,6 @@
|
|
657
742
|
"default": null,
|
658
743
|
"title": "Epochs Reducer"
|
659
744
|
},
|
660
|
-
"trace": {
|
661
|
-
"anyOf": [
|
662
|
-
{
|
663
|
-
"type": "boolean"
|
664
|
-
},
|
665
|
-
{
|
666
|
-
"type": "null"
|
667
|
-
}
|
668
|
-
],
|
669
|
-
"default": null,
|
670
|
-
"title": "Trace"
|
671
|
-
},
|
672
745
|
"approval": {
|
673
746
|
"anyOf": [
|
674
747
|
{
|
@@ -847,7 +920,6 @@
|
|
847
920
|
"sample_id",
|
848
921
|
"epochs",
|
849
922
|
"epochs_reducer",
|
850
|
-
"trace",
|
851
923
|
"approval",
|
852
924
|
"fail_on_error",
|
853
925
|
"message_limit",
|
@@ -3721,6 +3793,12 @@
|
|
3721
3793
|
{
|
3722
3794
|
"$ref": "#/$defs/ContentImage"
|
3723
3795
|
},
|
3796
|
+
{
|
3797
|
+
"$ref": "#/$defs/ContentAudio"
|
3798
|
+
},
|
3799
|
+
{
|
3800
|
+
"$ref": "#/$defs/ContentVideo"
|
3801
|
+
},
|
3724
3802
|
{
|
3725
3803
|
"items": {
|
3726
3804
|
"anyOf": [
|
@@ -3729,6 +3807,12 @@
|
|
3729
3807
|
},
|
3730
3808
|
{
|
3731
3809
|
"$ref": "#/$defs/ContentImage"
|
3810
|
+
},
|
3811
|
+
{
|
3812
|
+
"$ref": "#/$defs/ContentAudio"
|
3813
|
+
},
|
3814
|
+
{
|
3815
|
+
"$ref": "#/$defs/ContentVideo"
|
3732
3816
|
}
|
3733
3817
|
]
|
3734
3818
|
},
|
@@ -8,7 +8,7 @@ export const MessageBand = ({ message, hidden, setHidden, type }) => {
|
|
8
8
|
const bgColor =
|
9
9
|
type === "info" ? "var(--bs-light)" : "var(--bs-" + type + "-bg-subtle)";
|
10
10
|
const color =
|
11
|
-
|
11
|
+
type === "info" ? undefined : "var(--bs-" + type + "-text-emphasis)";
|
12
12
|
|
13
13
|
return html`
|
14
14
|
<div
|
@@ -32,7 +32,7 @@ export const MessageBand = ({ message, hidden, setHidden, type }) => {
|
|
32
32
|
fontSize: FontSize["title-secondary"],
|
33
33
|
margin: "0",
|
34
34
|
padding: "0",
|
35
|
-
color:
|
35
|
+
color: color,
|
36
36
|
height: FontSize["title-secondary"],
|
37
37
|
lineHeight: FontSize["title-secondary"],
|
38
38
|
}}
|
@@ -7,7 +7,7 @@ import { ToolOutput } from "./Tools.mjs";
|
|
7
7
|
* Supports rendering strings, images, and tools using specific renderers.
|
8
8
|
*
|
9
9
|
* @param {Object} props - The props object.
|
10
|
-
* @param {string|string[]| (import("../types/log").ContentText | import("../types/log").ContentImage | import("../Types.mjs").ContentTool)[]} props.contents - The content or array of contents to render.
|
10
|
+
* @param {string|string[]| (import("../types/log").ContentText | import("../types/log").ContentImage | import("../types/log").ContentAudio | import("../types/log").ContentVideo | import("../Types.mjs").ContentTool)[]} props.contents - The content or array of contents to render.
|
11
11
|
* @returns {import("preact").JSX.Element | import("preact").JSX.Element[]} The component.
|
12
12
|
*/
|
13
13
|
export const MessageContent = ({ contents }) => {
|
@@ -61,9 +61,51 @@ const messageRenderers = {
|
|
61
61
|
}
|
62
62
|
},
|
63
63
|
},
|
64
|
+
audio: {
|
65
|
+
render: (content) => {
|
66
|
+
return html` <audio controls>
|
67
|
+
<source
|
68
|
+
src=${content.audio}
|
69
|
+
type=${mimeTypeForFormat(content.format)}
|
70
|
+
/>
|
71
|
+
</audio>`;
|
72
|
+
},
|
73
|
+
},
|
74
|
+
video: {
|
75
|
+
render: (content) => {
|
76
|
+
return html` <video width="500" height="375" controls>
|
77
|
+
<source
|
78
|
+
src=${content.video}
|
79
|
+
type=${mimeTypeForFormat(content.format)}
|
80
|
+
/>
|
81
|
+
</video>`;
|
82
|
+
},
|
83
|
+
},
|
64
84
|
tool: {
|
65
85
|
render: (content) => {
|
66
86
|
return html`<${ToolOutput} output=${content.content} />`;
|
67
87
|
},
|
68
88
|
},
|
69
89
|
};
|
90
|
+
|
91
|
+
/**
|
92
|
+
* Renders message content based on its type.
|
93
|
+
* Supports rendering strings, images, and tools using specific renderers.
|
94
|
+
*
|
95
|
+
* @param {import("../types/log").Format | import("../types/log").Format1 } format - The format
|
96
|
+
* @returns {string} - The mime type.
|
97
|
+
*/
|
98
|
+
const mimeTypeForFormat = (format) => {
|
99
|
+
switch (format) {
|
100
|
+
case "mov":
|
101
|
+
return "video/quicktime";
|
102
|
+
case "wav":
|
103
|
+
return "audio/wav";
|
104
|
+
case "mp3":
|
105
|
+
return "audio/mpeg";
|
106
|
+
case "mp4":
|
107
|
+
return "video/mp4";
|
108
|
+
case "mpeg":
|
109
|
+
return "video/mpeg";
|
110
|
+
}
|
111
|
+
};
|
@@ -377,7 +377,11 @@ export const createSamplesDescriptor = (evalDescriptor, selectedScore) => {
|
|
377
377
|
(previous, current) => {
|
378
378
|
const text = inputString(current.input).join(" ");
|
379
379
|
const scoreValue = evalDescriptor.score(current, selectedScore).value;
|
380
|
-
const scoreText = scoreValue
|
380
|
+
const scoreText = scoreValue
|
381
|
+
? String(scoreValue)
|
382
|
+
: current.error
|
383
|
+
? String(current.error)
|
384
|
+
: "";
|
381
385
|
previous[0] = Math.min(Math.max(previous[0], text.length), 300);
|
382
386
|
previous[1] = Math.min(
|
383
387
|
Math.max(previous[1], arrayToString(current.target).length),
|
@@ -32,7 +32,6 @@ export type Limit = number | [unknown, unknown] | null;
|
|
32
32
|
export type SampleId = string | number | (string | number)[] | null;
|
33
33
|
export type Epochs = number | null;
|
34
34
|
export type EpochsReducer = string[] | null;
|
35
|
-
export type Trace = boolean | null;
|
36
35
|
export type Name1 = string;
|
37
36
|
export type Tools = string | string[];
|
38
37
|
export type Approvers = ApproverPolicyConfig[];
|
@@ -112,35 +111,49 @@ export type Input =
|
|
112
111
|
| ChatMessageAssistant
|
113
112
|
| ChatMessageTool
|
114
113
|
)[];
|
115
|
-
export type Content =
|
114
|
+
export type Content =
|
115
|
+
| string
|
116
|
+
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
116
117
|
export type Type1 = "text";
|
117
118
|
export type Text = string;
|
118
119
|
export type Type2 = "image";
|
119
120
|
export type Image = string;
|
120
121
|
export type Detail = "auto" | "low" | "high";
|
122
|
+
export type Type3 = "audio";
|
123
|
+
export type Audio = string;
|
124
|
+
export type Format = "wav" | "mp3";
|
125
|
+
export type Type4 = "video";
|
126
|
+
export type Video = string;
|
127
|
+
export type Format1 = "mp4" | "mpeg" | "mov";
|
121
128
|
export type Source = ("input" | "generate") | null;
|
122
129
|
export type Role = "system";
|
123
|
-
export type Content1 =
|
130
|
+
export type Content1 =
|
131
|
+
| string
|
132
|
+
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
124
133
|
export type Source1 = ("input" | "generate") | null;
|
125
134
|
export type Role1 = "user";
|
126
135
|
export type ToolCallId = string | null;
|
127
|
-
export type Content2 =
|
136
|
+
export type Content2 =
|
137
|
+
| string
|
138
|
+
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
128
139
|
export type Source2 = ("input" | "generate") | null;
|
129
140
|
export type Role2 = "assistant";
|
130
141
|
export type ToolCalls = ToolCall[] | null;
|
131
142
|
export type Id1 = string;
|
132
143
|
export type Function = string;
|
133
|
-
export type
|
144
|
+
export type Type5 = "function";
|
134
145
|
export type ParseError = string | null;
|
135
146
|
export type Title = string | null;
|
136
|
-
export type
|
147
|
+
export type Format2 = "text" | "markdown";
|
137
148
|
export type Content3 = string;
|
138
|
-
export type Content4 =
|
149
|
+
export type Content4 =
|
150
|
+
| string
|
151
|
+
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
139
152
|
export type Source3 = ("input" | "generate") | null;
|
140
153
|
export type Role3 = "tool";
|
141
154
|
export type ToolCallId1 = string | null;
|
142
155
|
export type Function1 = string | null;
|
143
|
-
export type
|
156
|
+
export type Type6 =
|
144
157
|
| "parsing"
|
145
158
|
| "timeout"
|
146
159
|
| "unicode_decode"
|
@@ -218,7 +231,7 @@ export type JsonValue = unknown;
|
|
218
231
|
export type Timestamp1 = string;
|
219
232
|
export type Pending1 = boolean | null;
|
220
233
|
export type Event1 = "sample_limit";
|
221
|
-
export type
|
234
|
+
export type Type7 = "message" | "time" | "token" | "operator";
|
222
235
|
export type Message2 = string;
|
223
236
|
export type Limit1 = number | null;
|
224
237
|
export type Timestamp2 = string;
|
@@ -244,8 +257,8 @@ export type Input2 = (
|
|
244
257
|
)[];
|
245
258
|
export type Name5 = string;
|
246
259
|
export type Description = string;
|
247
|
-
export type
|
248
|
-
export type
|
260
|
+
export type Type8 = "object";
|
261
|
+
export type Type9 =
|
249
262
|
| ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
|
250
263
|
| null;
|
251
264
|
export type Description1 = string | null;
|
@@ -265,7 +278,7 @@ export type Cache = ("read" | "write") | null;
|
|
265
278
|
export type Timestamp5 = string;
|
266
279
|
export type Pending5 = boolean | null;
|
267
280
|
export type Event5 = "tool";
|
268
|
-
export type
|
281
|
+
export type Type10 = "function";
|
269
282
|
export type Id3 = string;
|
270
283
|
export type Function2 = string;
|
271
284
|
export type Result =
|
@@ -274,7 +287,9 @@ export type Result =
|
|
274
287
|
| boolean
|
275
288
|
| ContentText
|
276
289
|
| ContentImage
|
277
|
-
|
|
290
|
+
| ContentAudio
|
291
|
+
| ContentVideo
|
292
|
+
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
278
293
|
export type Truncated = [unknown, unknown] | null;
|
279
294
|
export type Timestamp6 = string;
|
280
295
|
export type Pending6 = boolean | null;
|
@@ -324,13 +339,13 @@ export type Timestamp12 = string;
|
|
324
339
|
export type Pending12 = boolean | null;
|
325
340
|
export type Event12 = "step";
|
326
341
|
export type Action = "begin" | "end";
|
327
|
-
export type
|
342
|
+
export type Type11 = string | null;
|
328
343
|
export type Name8 = string;
|
329
344
|
export type Timestamp13 = string;
|
330
345
|
export type Pending13 = boolean | null;
|
331
346
|
export type Event13 = "subtask";
|
332
347
|
export type Name9 = string;
|
333
|
-
export type
|
348
|
+
export type Type12 = string | null;
|
334
349
|
export type Events2 = (
|
335
350
|
| SampleInitEvent
|
336
351
|
| SampleLimitEvent
|
@@ -379,7 +394,7 @@ export type Events = (
|
|
379
394
|
| StepEvent
|
380
395
|
| SubtaskEvent
|
381
396
|
)[];
|
382
|
-
export type
|
397
|
+
export type Type13 = "context" | "time" | "message" | "token" | "operator";
|
383
398
|
export type Limit2 = number;
|
384
399
|
export type Reductions = EvalSampleReductions[] | null;
|
385
400
|
export type Scorer1 = string;
|
@@ -448,7 +463,6 @@ export interface EvalConfig {
|
|
448
463
|
sample_id: SampleId;
|
449
464
|
epochs: Epochs;
|
450
465
|
epochs_reducer: EpochsReducer;
|
451
|
-
trace: Trace;
|
452
466
|
approval: ApprovalPolicyConfig | null;
|
453
467
|
fail_on_error: FailOnError;
|
454
468
|
message_limit: MessageLimit;
|
@@ -614,6 +628,16 @@ export interface ContentImage {
|
|
614
628
|
image: Image;
|
615
629
|
detail: Detail;
|
616
630
|
}
|
631
|
+
export interface ContentAudio {
|
632
|
+
type: Type3;
|
633
|
+
audio: Audio;
|
634
|
+
format: Format;
|
635
|
+
}
|
636
|
+
export interface ContentVideo {
|
637
|
+
type: Type4;
|
638
|
+
video: Video;
|
639
|
+
format: Format1;
|
640
|
+
}
|
617
641
|
export interface ChatMessageUser {
|
618
642
|
content: Content1;
|
619
643
|
source: Source1;
|
@@ -630,7 +654,7 @@ export interface ToolCall {
|
|
630
654
|
id: Id1;
|
631
655
|
function: Function;
|
632
656
|
arguments: Arguments;
|
633
|
-
type:
|
657
|
+
type: Type5;
|
634
658
|
parse_error: ParseError;
|
635
659
|
view: ToolCallContent | null;
|
636
660
|
}
|
@@ -640,7 +664,7 @@ export interface Arguments {}
|
|
640
664
|
*/
|
641
665
|
export interface ToolCallContent {
|
642
666
|
title: Title;
|
643
|
-
format:
|
667
|
+
format: Format2;
|
644
668
|
content: Content3;
|
645
669
|
}
|
646
670
|
export interface ChatMessageTool {
|
@@ -652,7 +676,7 @@ export interface ChatMessageTool {
|
|
652
676
|
error: ToolCallError | null;
|
653
677
|
}
|
654
678
|
export interface ToolCallError {
|
655
|
-
type:
|
679
|
+
type: Type6;
|
656
680
|
message: Message1;
|
657
681
|
}
|
658
682
|
export interface ModelOutput {
|
@@ -735,7 +759,7 @@ export interface SampleLimitEvent {
|
|
735
759
|
timestamp: Timestamp1;
|
736
760
|
pending: Pending1;
|
737
761
|
event: Event1;
|
738
|
-
type:
|
762
|
+
type: Type7;
|
739
763
|
message: Message2;
|
740
764
|
limit: Limit1;
|
741
765
|
}
|
@@ -822,7 +846,7 @@ export interface ToolInfo {
|
|
822
846
|
* Description of tool parameters object in JSON Schema format.
|
823
847
|
*/
|
824
848
|
export interface ToolParams {
|
825
|
-
type:
|
849
|
+
type: Type8;
|
826
850
|
properties: Properties;
|
827
851
|
required: Required1;
|
828
852
|
additionalProperties: Additionalproperties1;
|
@@ -834,7 +858,7 @@ export interface Properties {
|
|
834
858
|
* Description of tool parameter in JSON Schema format.
|
835
859
|
*/
|
836
860
|
export interface ToolParam {
|
837
|
-
type:
|
861
|
+
type: Type9;
|
838
862
|
description: Description1;
|
839
863
|
default: Default;
|
840
864
|
enum: Enum;
|
@@ -897,7 +921,7 @@ export interface ToolEvent {
|
|
897
921
|
timestamp: Timestamp5;
|
898
922
|
pending: Pending5;
|
899
923
|
event: Event5;
|
900
|
-
type:
|
924
|
+
type: Type10;
|
901
925
|
id: Id3;
|
902
926
|
function: Function2;
|
903
927
|
arguments: Arguments1;
|
@@ -999,7 +1023,7 @@ export interface StepEvent {
|
|
999
1023
|
pending: Pending12;
|
1000
1024
|
event: Event12;
|
1001
1025
|
action: Action;
|
1002
|
-
type:
|
1026
|
+
type: Type11;
|
1003
1027
|
name: Name8;
|
1004
1028
|
}
|
1005
1029
|
/**
|
@@ -1010,7 +1034,7 @@ export interface SubtaskEvent {
|
|
1010
1034
|
pending: Pending13;
|
1011
1035
|
event: Event13;
|
1012
1036
|
name: Name9;
|
1013
|
-
type:
|
1037
|
+
type: Type12;
|
1014
1038
|
input: Input4;
|
1015
1039
|
result: Result1;
|
1016
1040
|
events: Events2;
|
@@ -1026,7 +1050,7 @@ export interface Attachments {
|
|
1026
1050
|
[k: string]: string;
|
1027
1051
|
}
|
1028
1052
|
export interface EvalSampleLimit {
|
1029
|
-
type:
|
1053
|
+
type: Type13;
|
1030
1054
|
limit: Limit2;
|
1031
1055
|
}
|
1032
1056
|
export interface EvalSampleReductions {
|