inspect-ai 0.3.57__py3-none-any.whl → 0.3.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. inspect_ai/__init__.py +2 -1
  2. inspect_ai/_cli/common.py +4 -2
  3. inspect_ai/_cli/eval.py +2 -0
  4. inspect_ai/_cli/trace.py +21 -2
  5. inspect_ai/_display/core/active.py +0 -2
  6. inspect_ai/_display/rich/display.py +4 -4
  7. inspect_ai/_display/textual/app.py +4 -1
  8. inspect_ai/_display/textual/widgets/samples.py +41 -5
  9. inspect_ai/_eval/eval.py +32 -20
  10. inspect_ai/_eval/evalset.py +7 -5
  11. inspect_ai/_eval/task/__init__.py +2 -2
  12. inspect_ai/_eval/task/images.py +40 -25
  13. inspect_ai/_eval/task/run.py +141 -119
  14. inspect_ai/_eval/task/task.py +140 -25
  15. inspect_ai/_util/constants.py +1 -0
  16. inspect_ai/_util/content.py +23 -1
  17. inspect_ai/_util/images.py +20 -17
  18. inspect_ai/_util/kvstore.py +73 -0
  19. inspect_ai/_util/notgiven.py +18 -0
  20. inspect_ai/_util/thread.py +5 -0
  21. inspect_ai/_view/www/dist/assets/index.js +37 -3
  22. inspect_ai/_view/www/log-schema.json +97 -13
  23. inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
  24. inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
  25. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +5 -1
  26. inspect_ai/_view/www/src/types/log.d.ts +51 -27
  27. inspect_ai/approval/_human/util.py +2 -2
  28. inspect_ai/dataset/_sources/csv.py +2 -1
  29. inspect_ai/dataset/_sources/json.py +2 -1
  30. inspect_ai/dataset/_sources/util.py +15 -7
  31. inspect_ai/log/_condense.py +11 -1
  32. inspect_ai/log/_log.py +2 -5
  33. inspect_ai/log/_recorders/eval.py +19 -8
  34. inspect_ai/log/_samples.py +10 -5
  35. inspect_ai/log/_transcript.py +28 -1
  36. inspect_ai/model/__init__.py +10 -2
  37. inspect_ai/model/_call_tools.py +55 -12
  38. inspect_ai/model/_chat_message.py +2 -4
  39. inspect_ai/model/{_trace.py → _conversation.py} +9 -8
  40. inspect_ai/model/_model.py +2 -2
  41. inspect_ai/model/_providers/anthropic.py +9 -7
  42. inspect_ai/model/_providers/azureai.py +6 -4
  43. inspect_ai/model/_providers/bedrock.py +6 -4
  44. inspect_ai/model/_providers/google.py +79 -8
  45. inspect_ai/model/_providers/groq.py +7 -5
  46. inspect_ai/model/_providers/hf.py +11 -6
  47. inspect_ai/model/_providers/mistral.py +6 -9
  48. inspect_ai/model/_providers/openai.py +17 -5
  49. inspect_ai/model/_providers/vertex.py +17 -4
  50. inspect_ai/scorer/__init__.py +13 -2
  51. inspect_ai/scorer/_metrics/__init__.py +2 -2
  52. inspect_ai/scorer/_metrics/std.py +3 -3
  53. inspect_ai/tool/__init__.py +9 -1
  54. inspect_ai/tool/_tool.py +9 -2
  55. inspect_ai/util/__init__.py +0 -3
  56. inspect_ai/util/{_trace.py → _conversation.py} +3 -17
  57. inspect_ai/util/_display.py +14 -4
  58. inspect_ai/util/_sandbox/context.py +12 -13
  59. inspect_ai/util/_sandbox/docker/compose.py +24 -11
  60. inspect_ai/util/_sandbox/docker/docker.py +20 -13
  61. inspect_ai/util/_sandbox/environment.py +13 -1
  62. inspect_ai/util/_sandbox/local.py +1 -0
  63. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/METADATA +2 -2
  64. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/RECORD +68 -65
  65. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/LICENSE +0 -0
  66. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/WHEEL +0 -0
  67. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/entry_points.txt +0 -0
  68. {inspect_ai-0.3.57.dist-info → inspect_ai-0.3.58.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,73 @@
1
+ import sqlite3
2
+ from contextlib import AbstractContextManager
3
+ from typing import Any, Optional, cast
4
+
5
+ from .appdirs import inspect_data_dir
6
+
7
+
8
+ class KVStore(AbstractContextManager["KVStore"]):
9
+ def __init__(self, filename: str, max_entries: int | None = None):
10
+ self.filename = filename
11
+ self.max_entries = max_entries
12
+
13
+ def __enter__(self) -> "KVStore":
14
+ self.conn = sqlite3.connect(self.filename)
15
+ self.conn.execute("""
16
+ CREATE TABLE IF NOT EXISTS kv_store (
17
+ key TEXT PRIMARY KEY,
18
+ value TEXT,
19
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
20
+ )
21
+ """)
22
+ self.conn.commit()
23
+ return self
24
+
25
+ def __exit__(self, *excinfo: Any) -> None:
26
+ self.conn.close()
27
+
28
+ def put(self, key: str, value: str) -> None:
29
+ # Insert or update the value
30
+ self.conn.execute(
31
+ """
32
+ INSERT OR REPLACE INTO kv_store (key, value, created_at)
33
+ VALUES (?, ?, CURRENT_TIMESTAMP)
34
+ """,
35
+ (key, value),
36
+ )
37
+
38
+ # If we have a max_entries limit, remove oldest entries
39
+ if self.max_entries:
40
+ count = self.count()
41
+ if count > self.max_entries:
42
+ self.conn.execute(
43
+ """
44
+ DELETE FROM kv_store
45
+ WHERE key IN (
46
+ SELECT key FROM kv_store
47
+ ORDER BY created_at ASC
48
+ LIMIT ?
49
+ )
50
+ """,
51
+ (max(0, count - self.max_entries),),
52
+ )
53
+
54
+ self.conn.commit()
55
+
56
+ def get(self, key: str) -> Optional[str]:
57
+ cursor = self.conn.execute("SELECT value FROM kv_store WHERE key = ?", (key,))
58
+ result = cursor.fetchone()
59
+ return result[0] if result else None
60
+
61
+ def delete(self, key: str) -> bool:
62
+ cursor = self.conn.execute("DELETE FROM kv_store WHERE key = ?", (key,))
63
+ self.conn.commit()
64
+ return cursor.rowcount > 0
65
+
66
+ def count(self) -> int:
67
+ cursor = self.conn.execute("SELECT COUNT(*) FROM kv_store")
68
+ return cast(int, cursor.fetchone()[0])
69
+
70
+
71
+ def inspect_kvstore(name: str, max_entries: int | None = None) -> KVStore:
72
+ filename = inspect_data_dir("kvstore") / f"{name}.db"
73
+ return KVStore(filename.as_posix(), max_entries=max_entries)
@@ -0,0 +1,18 @@
1
+ # Sentinel class used until PEP 0661 is accepted
2
+ from typing import Literal
3
+
4
+ from typing_extensions import override
5
+
6
+
7
+ class NotGiven:
8
+ """A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior)."""
9
+
10
+ def __bool__(self) -> Literal[False]:
11
+ return False
12
+
13
+ @override
14
+ def __repr__(self) -> str:
15
+ return "NOT_GIVEN"
16
+
17
+
18
+ NOT_GIVEN = NotGiven()
@@ -0,0 +1,5 @@
1
+ import threading
2
+
3
+
4
+ def is_main_thread() -> bool:
5
+ return threading.current_thread() is threading.main_thread()
@@ -15547,12 +15547,46 @@ var require_assets = __commonJS({
15547
15547
  }
15548
15548
  }
15549
15549
  },
15550
+ audio: {
15551
+ render: (content) => {
15552
+ return m$1` <audio controls>
15553
+ <source
15554
+ src=${content.audio}
15555
+ type=${mimeTypeForFormat(content.format)}
15556
+ />
15557
+ </audio>`;
15558
+ }
15559
+ },
15560
+ video: {
15561
+ render: (content) => {
15562
+ return m$1` <video width="500" height="375" controls>
15563
+ <source
15564
+ src=${content.video}
15565
+ type=${mimeTypeForFormat(content.format)}
15566
+ />
15567
+ </video>`;
15568
+ }
15569
+ },
15550
15570
  tool: {
15551
15571
  render: (content) => {
15552
15572
  return m$1`<${ToolOutput} output=${content.content} />`;
15553
15573
  }
15554
15574
  }
15555
15575
  };
15576
+ const mimeTypeForFormat = (format2) => {
15577
+ switch (format2) {
15578
+ case "mov":
15579
+ return "video/quicktime";
15580
+ case "wav":
15581
+ return "audio/wav";
15582
+ case "mp3":
15583
+ return "audio/mpeg";
15584
+ case "mp4":
15585
+ return "video/mp4";
15586
+ case "mpeg":
15587
+ return "video/mpeg";
15588
+ }
15589
+ };
15556
15590
  const ChatView = ({
15557
15591
  id,
15558
15592
  messages,
@@ -16493,7 +16527,7 @@ ${entry.value}</pre
16493
16527
  };
16494
16528
  const MessageBand = ({ message, hidden, setHidden, type }) => {
16495
16529
  const bgColor = type === "info" ? "var(--bs-light)" : "var(--bs-" + type + "-bg-subtle)";
16496
- const color = "var(--bs-" + (type === "info" ? "secondary" : type) + "-text-emphasis)";
16530
+ const color = type === "info" ? void 0 : "var(--bs-" + type + "-text-emphasis)";
16497
16531
  return m$1`
16498
16532
  <div
16499
16533
  style=${{
@@ -16516,7 +16550,7 @@ ${entry.value}</pre
16516
16550
  fontSize: FontSize["title-secondary"],
16517
16551
  margin: "0",
16518
16552
  padding: "0",
16519
- color: "var(--bs-" + type + "-text-emphasis)",
16553
+ color,
16520
16554
  height: FontSize["title-secondary"],
16521
16555
  lineHeight: FontSize["title-secondary"]
16522
16556
  }}
@@ -30728,7 +30762,7 @@ self.onmessage = function (e) {
30728
30762
  var _a2;
30729
30763
  const text2 = inputString(current.input).join(" ");
30730
30764
  const scoreValue = evalDescriptor.score(current, selectedScore).value;
30731
- const scoreText = scoreValue ? String(scoreValue) : "";
30765
+ const scoreText = scoreValue ? String(scoreValue) : current.error ? String(current.error) : "";
30732
30766
  previous[0] = Math.min(Math.max(previous[0], text2.length), 300);
30733
30767
  previous[1] = Math.min(
30734
30768
  Math.max(previous[1], arrayToString(current.target).length),
@@ -210,6 +210,12 @@
210
210
  },
211
211
  {
212
212
  "$ref": "#/$defs/ContentImage"
213
+ },
214
+ {
215
+ "$ref": "#/$defs/ContentAudio"
216
+ },
217
+ {
218
+ "$ref": "#/$defs/ContentVideo"
213
219
  }
214
220
  ]
215
221
  },
@@ -281,6 +287,12 @@
281
287
  },
282
288
  {
283
289
  "$ref": "#/$defs/ContentImage"
290
+ },
291
+ {
292
+ "$ref": "#/$defs/ContentAudio"
293
+ },
294
+ {
295
+ "$ref": "#/$defs/ContentVideo"
284
296
  }
285
297
  ]
286
298
  },
@@ -336,6 +348,12 @@
336
348
  },
337
349
  {
338
350
  "$ref": "#/$defs/ContentImage"
351
+ },
352
+ {
353
+ "$ref": "#/$defs/ContentAudio"
354
+ },
355
+ {
356
+ "$ref": "#/$defs/ContentVideo"
339
357
  }
340
358
  ]
341
359
  },
@@ -429,6 +447,12 @@
429
447
  },
430
448
  {
431
449
  "$ref": "#/$defs/ContentImage"
450
+ },
451
+ {
452
+ "$ref": "#/$defs/ContentAudio"
453
+ },
454
+ {
455
+ "$ref": "#/$defs/ContentVideo"
432
456
  }
433
457
  ]
434
458
  },
@@ -482,6 +506,36 @@
482
506
  "type": "object",
483
507
  "additionalProperties": false
484
508
  },
509
+ "ContentAudio": {
510
+ "properties": {
511
+ "type": {
512
+ "const": "audio",
513
+ "default": "audio",
514
+ "title": "Type",
515
+ "type": "string"
516
+ },
517
+ "audio": {
518
+ "title": "Audio",
519
+ "type": "string"
520
+ },
521
+ "format": {
522
+ "enum": [
523
+ "wav",
524
+ "mp3"
525
+ ],
526
+ "title": "Format",
527
+ "type": "string"
528
+ }
529
+ },
530
+ "required": [
531
+ "type",
532
+ "audio",
533
+ "format"
534
+ ],
535
+ "title": "ContentAudio",
536
+ "type": "object",
537
+ "additionalProperties": false
538
+ },
485
539
  "ContentImage": {
486
540
  "properties": {
487
541
  "type": {
@@ -535,6 +589,37 @@
535
589
  "type": "object",
536
590
  "additionalProperties": false
537
591
  },
592
+ "ContentVideo": {
593
+ "properties": {
594
+ "type": {
595
+ "const": "video",
596
+ "default": "video",
597
+ "title": "Type",
598
+ "type": "string"
599
+ },
600
+ "video": {
601
+ "title": "Video",
602
+ "type": "string"
603
+ },
604
+ "format": {
605
+ "enum": [
606
+ "mp4",
607
+ "mpeg",
608
+ "mov"
609
+ ],
610
+ "title": "Format",
611
+ "type": "string"
612
+ }
613
+ },
614
+ "required": [
615
+ "type",
616
+ "video",
617
+ "format"
618
+ ],
619
+ "title": "ContentVideo",
620
+ "type": "object",
621
+ "additionalProperties": false
622
+ },
538
623
  "ErrorEvent": {
539
624
  "description": "Event with sample error.",
540
625
  "properties": {
@@ -657,18 +742,6 @@
657
742
  "default": null,
658
743
  "title": "Epochs Reducer"
659
744
  },
660
- "trace": {
661
- "anyOf": [
662
- {
663
- "type": "boolean"
664
- },
665
- {
666
- "type": "null"
667
- }
668
- ],
669
- "default": null,
670
- "title": "Trace"
671
- },
672
745
  "approval": {
673
746
  "anyOf": [
674
747
  {
@@ -847,7 +920,6 @@
847
920
  "sample_id",
848
921
  "epochs",
849
922
  "epochs_reducer",
850
- "trace",
851
923
  "approval",
852
924
  "fail_on_error",
853
925
  "message_limit",
@@ -3721,6 +3793,12 @@
3721
3793
  {
3722
3794
  "$ref": "#/$defs/ContentImage"
3723
3795
  },
3796
+ {
3797
+ "$ref": "#/$defs/ContentAudio"
3798
+ },
3799
+ {
3800
+ "$ref": "#/$defs/ContentVideo"
3801
+ },
3724
3802
  {
3725
3803
  "items": {
3726
3804
  "anyOf": [
@@ -3729,6 +3807,12 @@
3729
3807
  },
3730
3808
  {
3731
3809
  "$ref": "#/$defs/ContentImage"
3810
+ },
3811
+ {
3812
+ "$ref": "#/$defs/ContentAudio"
3813
+ },
3814
+ {
3815
+ "$ref": "#/$defs/ContentVideo"
3732
3816
  }
3733
3817
  ]
3734
3818
  },
@@ -8,7 +8,7 @@ export const MessageBand = ({ message, hidden, setHidden, type }) => {
8
8
  const bgColor =
9
9
  type === "info" ? "var(--bs-light)" : "var(--bs-" + type + "-bg-subtle)";
10
10
  const color =
11
- "var(--bs-" + (type === "info" ? "secondary" : type) + "-text-emphasis)";
11
+ type === "info" ? undefined : "var(--bs-" + type + "-text-emphasis)";
12
12
 
13
13
  return html`
14
14
  <div
@@ -32,7 +32,7 @@ export const MessageBand = ({ message, hidden, setHidden, type }) => {
32
32
  fontSize: FontSize["title-secondary"],
33
33
  margin: "0",
34
34
  padding: "0",
35
- color: "var(--bs-" + type + "-text-emphasis)",
35
+ color: color,
36
36
  height: FontSize["title-secondary"],
37
37
  lineHeight: FontSize["title-secondary"],
38
38
  }}
@@ -7,7 +7,7 @@ import { ToolOutput } from "./Tools.mjs";
7
7
  * Supports rendering strings, images, and tools using specific renderers.
8
8
  *
9
9
  * @param {Object} props - The props object.
10
- * @param {string|string[]| (import("../types/log").ContentText | import("../types/log").ContentImage | import("../Types.mjs").ContentTool)[]} props.contents - The content or array of contents to render.
10
+ * @param {string|string[]| (import("../types/log").ContentText | import("../types/log").ContentImage | import("../types/log").ContentAudio | import("../types/log").ContentVideo | import("../Types.mjs").ContentTool)[]} props.contents - The content or array of contents to render.
11
11
  * @returns {import("preact").JSX.Element | import("preact").JSX.Element[]} The component.
12
12
  */
13
13
  export const MessageContent = ({ contents }) => {
@@ -61,9 +61,51 @@ const messageRenderers = {
61
61
  }
62
62
  },
63
63
  },
64
+ audio: {
65
+ render: (content) => {
66
+ return html` <audio controls>
67
+ <source
68
+ src=${content.audio}
69
+ type=${mimeTypeForFormat(content.format)}
70
+ />
71
+ </audio>`;
72
+ },
73
+ },
74
+ video: {
75
+ render: (content) => {
76
+ return html` <video width="500" height="375" controls>
77
+ <source
78
+ src=${content.video}
79
+ type=${mimeTypeForFormat(content.format)}
80
+ />
81
+ </video>`;
82
+ },
83
+ },
64
84
  tool: {
65
85
  render: (content) => {
66
86
  return html`<${ToolOutput} output=${content.content} />`;
67
87
  },
68
88
  },
69
89
  };
90
+
91
+ /**
92
+ * Renders message content based on its type.
93
+ * Supports rendering strings, images, and tools using specific renderers.
94
+ *
95
+ * @param {import("../types/log").Format | import("../types/log").Format1 } format - The format
96
+ * @returns {string} - The mime type.
97
+ */
98
+ const mimeTypeForFormat = (format) => {
99
+ switch (format) {
100
+ case "mov":
101
+ return "video/quicktime";
102
+ case "wav":
103
+ return "audio/wav";
104
+ case "mp3":
105
+ return "audio/mpeg";
106
+ case "mp4":
107
+ return "video/mp4";
108
+ case "mpeg":
109
+ return "video/mpeg";
110
+ }
111
+ };
@@ -377,7 +377,11 @@ export const createSamplesDescriptor = (evalDescriptor, selectedScore) => {
377
377
  (previous, current) => {
378
378
  const text = inputString(current.input).join(" ");
379
379
  const scoreValue = evalDescriptor.score(current, selectedScore).value;
380
- const scoreText = scoreValue ? String(scoreValue) : "";
380
+ const scoreText = scoreValue
381
+ ? String(scoreValue)
382
+ : current.error
383
+ ? String(current.error)
384
+ : "";
381
385
  previous[0] = Math.min(Math.max(previous[0], text.length), 300);
382
386
  previous[1] = Math.min(
383
387
  Math.max(previous[1], arrayToString(current.target).length),
@@ -32,7 +32,6 @@ export type Limit = number | [unknown, unknown] | null;
32
32
  export type SampleId = string | number | (string | number)[] | null;
33
33
  export type Epochs = number | null;
34
34
  export type EpochsReducer = string[] | null;
35
- export type Trace = boolean | null;
36
35
  export type Name1 = string;
37
36
  export type Tools = string | string[];
38
37
  export type Approvers = ApproverPolicyConfig[];
@@ -112,35 +111,49 @@ export type Input =
112
111
  | ChatMessageAssistant
113
112
  | ChatMessageTool
114
113
  )[];
115
- export type Content = string | (ContentText | ContentImage)[];
114
+ export type Content =
115
+ | string
116
+ | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
116
117
  export type Type1 = "text";
117
118
  export type Text = string;
118
119
  export type Type2 = "image";
119
120
  export type Image = string;
120
121
  export type Detail = "auto" | "low" | "high";
122
+ export type Type3 = "audio";
123
+ export type Audio = string;
124
+ export type Format = "wav" | "mp3";
125
+ export type Type4 = "video";
126
+ export type Video = string;
127
+ export type Format1 = "mp4" | "mpeg" | "mov";
121
128
  export type Source = ("input" | "generate") | null;
122
129
  export type Role = "system";
123
- export type Content1 = string | (ContentText | ContentImage)[];
130
+ export type Content1 =
131
+ | string
132
+ | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
124
133
  export type Source1 = ("input" | "generate") | null;
125
134
  export type Role1 = "user";
126
135
  export type ToolCallId = string | null;
127
- export type Content2 = string | (ContentText | ContentImage)[];
136
+ export type Content2 =
137
+ | string
138
+ | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
128
139
  export type Source2 = ("input" | "generate") | null;
129
140
  export type Role2 = "assistant";
130
141
  export type ToolCalls = ToolCall[] | null;
131
142
  export type Id1 = string;
132
143
  export type Function = string;
133
- export type Type3 = "function";
144
+ export type Type5 = "function";
134
145
  export type ParseError = string | null;
135
146
  export type Title = string | null;
136
- export type Format = "text" | "markdown";
147
+ export type Format2 = "text" | "markdown";
137
148
  export type Content3 = string;
138
- export type Content4 = string | (ContentText | ContentImage)[];
149
+ export type Content4 =
150
+ | string
151
+ | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
139
152
  export type Source3 = ("input" | "generate") | null;
140
153
  export type Role3 = "tool";
141
154
  export type ToolCallId1 = string | null;
142
155
  export type Function1 = string | null;
143
- export type Type4 =
156
+ export type Type6 =
144
157
  | "parsing"
145
158
  | "timeout"
146
159
  | "unicode_decode"
@@ -218,7 +231,7 @@ export type JsonValue = unknown;
218
231
  export type Timestamp1 = string;
219
232
  export type Pending1 = boolean | null;
220
233
  export type Event1 = "sample_limit";
221
- export type Type5 = "message" | "time" | "token" | "operator";
234
+ export type Type7 = "message" | "time" | "token" | "operator";
222
235
  export type Message2 = string;
223
236
  export type Limit1 = number | null;
224
237
  export type Timestamp2 = string;
@@ -244,8 +257,8 @@ export type Input2 = (
244
257
  )[];
245
258
  export type Name5 = string;
246
259
  export type Description = string;
247
- export type Type6 = "object";
248
- export type Type7 =
260
+ export type Type8 = "object";
261
+ export type Type9 =
249
262
  | ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
250
263
  | null;
251
264
  export type Description1 = string | null;
@@ -265,7 +278,7 @@ export type Cache = ("read" | "write") | null;
265
278
  export type Timestamp5 = string;
266
279
  export type Pending5 = boolean | null;
267
280
  export type Event5 = "tool";
268
- export type Type8 = "function";
281
+ export type Type10 = "function";
269
282
  export type Id3 = string;
270
283
  export type Function2 = string;
271
284
  export type Result =
@@ -274,7 +287,9 @@ export type Result =
274
287
  | boolean
275
288
  | ContentText
276
289
  | ContentImage
277
- | (ContentText | ContentImage)[];
290
+ | ContentAudio
291
+ | ContentVideo
292
+ | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
278
293
  export type Truncated = [unknown, unknown] | null;
279
294
  export type Timestamp6 = string;
280
295
  export type Pending6 = boolean | null;
@@ -324,13 +339,13 @@ export type Timestamp12 = string;
324
339
  export type Pending12 = boolean | null;
325
340
  export type Event12 = "step";
326
341
  export type Action = "begin" | "end";
327
- export type Type9 = string | null;
342
+ export type Type11 = string | null;
328
343
  export type Name8 = string;
329
344
  export type Timestamp13 = string;
330
345
  export type Pending13 = boolean | null;
331
346
  export type Event13 = "subtask";
332
347
  export type Name9 = string;
333
- export type Type10 = string | null;
348
+ export type Type12 = string | null;
334
349
  export type Events2 = (
335
350
  | SampleInitEvent
336
351
  | SampleLimitEvent
@@ -379,7 +394,7 @@ export type Events = (
379
394
  | StepEvent
380
395
  | SubtaskEvent
381
396
  )[];
382
- export type Type11 = "context" | "time" | "message" | "token" | "operator";
397
+ export type Type13 = "context" | "time" | "message" | "token" | "operator";
383
398
  export type Limit2 = number;
384
399
  export type Reductions = EvalSampleReductions[] | null;
385
400
  export type Scorer1 = string;
@@ -448,7 +463,6 @@ export interface EvalConfig {
448
463
  sample_id: SampleId;
449
464
  epochs: Epochs;
450
465
  epochs_reducer: EpochsReducer;
451
- trace: Trace;
452
466
  approval: ApprovalPolicyConfig | null;
453
467
  fail_on_error: FailOnError;
454
468
  message_limit: MessageLimit;
@@ -614,6 +628,16 @@ export interface ContentImage {
614
628
  image: Image;
615
629
  detail: Detail;
616
630
  }
631
+ export interface ContentAudio {
632
+ type: Type3;
633
+ audio: Audio;
634
+ format: Format;
635
+ }
636
+ export interface ContentVideo {
637
+ type: Type4;
638
+ video: Video;
639
+ format: Format1;
640
+ }
617
641
  export interface ChatMessageUser {
618
642
  content: Content1;
619
643
  source: Source1;
@@ -630,7 +654,7 @@ export interface ToolCall {
630
654
  id: Id1;
631
655
  function: Function;
632
656
  arguments: Arguments;
633
- type: Type3;
657
+ type: Type5;
634
658
  parse_error: ParseError;
635
659
  view: ToolCallContent | null;
636
660
  }
@@ -640,7 +664,7 @@ export interface Arguments {}
640
664
  */
641
665
  export interface ToolCallContent {
642
666
  title: Title;
643
- format: Format;
667
+ format: Format2;
644
668
  content: Content3;
645
669
  }
646
670
  export interface ChatMessageTool {
@@ -652,7 +676,7 @@ export interface ChatMessageTool {
652
676
  error: ToolCallError | null;
653
677
  }
654
678
  export interface ToolCallError {
655
- type: Type4;
679
+ type: Type6;
656
680
  message: Message1;
657
681
  }
658
682
  export interface ModelOutput {
@@ -735,7 +759,7 @@ export interface SampleLimitEvent {
735
759
  timestamp: Timestamp1;
736
760
  pending: Pending1;
737
761
  event: Event1;
738
- type: Type5;
762
+ type: Type7;
739
763
  message: Message2;
740
764
  limit: Limit1;
741
765
  }
@@ -822,7 +846,7 @@ export interface ToolInfo {
822
846
  * Description of tool parameters object in JSON Schema format.
823
847
  */
824
848
  export interface ToolParams {
825
- type: Type6;
849
+ type: Type8;
826
850
  properties: Properties;
827
851
  required: Required1;
828
852
  additionalProperties: Additionalproperties1;
@@ -834,7 +858,7 @@ export interface Properties {
834
858
  * Description of tool parameter in JSON Schema format.
835
859
  */
836
860
  export interface ToolParam {
837
- type: Type7;
861
+ type: Type9;
838
862
  description: Description1;
839
863
  default: Default;
840
864
  enum: Enum;
@@ -897,7 +921,7 @@ export interface ToolEvent {
897
921
  timestamp: Timestamp5;
898
922
  pending: Pending5;
899
923
  event: Event5;
900
- type: Type8;
924
+ type: Type10;
901
925
  id: Id3;
902
926
  function: Function2;
903
927
  arguments: Arguments1;
@@ -999,7 +1023,7 @@ export interface StepEvent {
999
1023
  pending: Pending12;
1000
1024
  event: Event12;
1001
1025
  action: Action;
1002
- type: Type9;
1026
+ type: Type11;
1003
1027
  name: Name8;
1004
1028
  }
1005
1029
  /**
@@ -1010,7 +1034,7 @@ export interface SubtaskEvent {
1010
1034
  pending: Pending13;
1011
1035
  event: Event13;
1012
1036
  name: Name9;
1013
- type: Type10;
1037
+ type: Type12;
1014
1038
  input: Input4;
1015
1039
  result: Result1;
1016
1040
  events: Events2;
@@ -1026,7 +1050,7 @@ export interface Attachments {
1026
1050
  [k: string]: string;
1027
1051
  }
1028
1052
  export interface EvalSampleLimit {
1029
- type: Type11;
1053
+ type: Type13;
1030
1054
  limit: Limit2;
1031
1055
  }
1032
1056
  export interface EvalSampleReductions {