zidane 5.11.2 → 5.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/dist/{agent-D0W9yClt.d.ts → agent-Dt3mALPV.d.ts} +209 -30
  2. package/dist/agent-Dt3mALPV.d.ts.map +1 -0
  3. package/dist/chat/pure.d.ts +3 -3
  4. package/dist/chat.d.ts +6 -6
  5. package/dist/chat.js +2 -2
  6. package/dist/contexts/e2b.d.ts +1 -1
  7. package/dist/{errors-DkR6GPJw.js → errors-BpPfMo_4.js} +17 -2
  8. package/dist/{errors-DkR6GPJw.js.map → errors-BpPfMo_4.js.map} +1 -1
  9. package/dist/eval.d.ts +1 -1
  10. package/dist/eval.js +2 -2
  11. package/dist/{fetch-url-DPP6-Ruo.js → fetch-url-Cgbq-HYx.js} +2 -2
  12. package/dist/{fetch-url-DPP6-Ruo.js.map → fetch-url-Cgbq-HYx.js.map} +1 -1
  13. package/dist/{headless-Bb5gU8AR.js → headless-DSOPnfFZ.js} +39 -16
  14. package/dist/headless-DSOPnfFZ.js.map +1 -0
  15. package/dist/headless.d.ts +1 -1
  16. package/dist/headless.js +1 -1
  17. package/dist/{index-DZR99FD4.d.ts → index-BDRh3kup.d.ts} +13 -2
  18. package/dist/index-BDRh3kup.d.ts.map +1 -0
  19. package/dist/{index-D60tX5XC.d.ts → index-Do7IZGW5.d.ts} +2 -2
  20. package/dist/{index-D60tX5XC.d.ts.map → index-Do7IZGW5.d.ts.map} +1 -1
  21. package/dist/index.d.ts +5 -5
  22. package/dist/index.js +17 -14
  23. package/dist/index.js.map +1 -1
  24. package/dist/{interpolate-CTfr0GdR.js → interpolate-Da6kPCXn.js} +2 -2
  25. package/dist/{interpolate-CTfr0GdR.js.map → interpolate-Da6kPCXn.js.map} +1 -1
  26. package/dist/{logger-n4LsLISE.d.ts → logger-C2E41UWq.d.ts} +2 -2
  27. package/dist/{logger-n4LsLISE.d.ts.map → logger-C2E41UWq.d.ts.map} +1 -1
  28. package/dist/{login-BHhOdTp9.js → login-LF-inV4T.js} +33 -5
  29. package/dist/login-LF-inV4T.js.map +1 -0
  30. package/dist/{mcp-Cy9mgCcr.js → mcp-DeJ9280K.js} +141 -14
  31. package/dist/mcp-DeJ9280K.js.map +1 -0
  32. package/dist/mcp.d.ts +1 -1
  33. package/dist/mcp.js +1 -1
  34. package/dist/media-sniff-Bn76JxAu.js +216 -0
  35. package/dist/media-sniff-Bn76JxAu.js.map +1 -0
  36. package/dist/{messages-RPKrEPvH.js → messages-9wyCuvLF.js} +539 -54
  37. package/dist/messages-9wyCuvLF.js.map +1 -0
  38. package/dist/output/stream-json.d.ts +2 -2
  39. package/dist/output/stream-json.js +1 -1
  40. package/dist/output/terminal.d.ts +2 -2
  41. package/dist/{presets-D5ibZTml.js → presets-By-idp1u.js} +2 -2
  42. package/dist/{presets-D5ibZTml.js.map → presets-By-idp1u.js.map} +1 -1
  43. package/dist/presets.d.ts +2 -2
  44. package/dist/presets.js +1 -1
  45. package/dist/{providers-C2cxujp_.js → providers-psx2_0LB.js} +55 -22
  46. package/dist/providers-psx2_0LB.js.map +1 -0
  47. package/dist/providers.d.ts +1 -1
  48. package/dist/providers.js +2 -2
  49. package/dist/{read-state-BFqpQRc5.js → read-state-DH2IuQHX.js} +2 -2
  50. package/dist/{read-state-BFqpQRc5.js.map → read-state-DH2IuQHX.js.map} +1 -1
  51. package/dist/restate.d.ts +1 -1
  52. package/dist/restate.js +1 -1
  53. package/dist/session/sqlite.d.ts +1 -1
  54. package/dist/session/sqlite.js +1 -1
  55. package/dist/{session-Do_TQV7c.js → session-CB23Ne0_.js} +2 -2
  56. package/dist/{session-Do_TQV7c.js.map → session-CB23Ne0_.js.map} +1 -1
  57. package/dist/session.d.ts +1 -1
  58. package/dist/session.js +2 -2
  59. package/dist/skills.d.ts +2 -2
  60. package/dist/skills.js +1 -1
  61. package/dist/{tool-formatters-RT5-gyE2.d.ts → tool-formatters-COmtAwgF.d.ts} +2 -2
  62. package/dist/{tool-formatters-RT5-gyE2.d.ts.map → tool-formatters-COmtAwgF.d.ts.map} +1 -1
  63. package/dist/tools/fetch-url.d.ts +1 -1
  64. package/dist/tools/fetch-url.js +1 -1
  65. package/dist/tools/web-search.d.ts +1 -1
  66. package/dist/tools/web-search.js +2 -2
  67. package/dist/{tools-ZHKOh44k.js → tools-CZYTs_lT.js} +98 -127
  68. package/dist/tools-CZYTs_lT.js.map +1 -0
  69. package/dist/tools.d.ts +2 -2
  70. package/dist/tools.js +2 -2
  71. package/dist/{transcript-anchors-0zzqcSm5.js → transcript-anchors-A7__EUVN.js} +25 -18
  72. package/dist/transcript-anchors-A7__EUVN.js.map +1 -0
  73. package/dist/{transcript-anchors-B4FxkG-8.d.ts → transcript-anchors-DLa8m9_E.d.ts} +4 -4
  74. package/dist/{transcript-anchors-B4FxkG-8.d.ts.map → transcript-anchors-DLa8m9_E.d.ts.map} +1 -1
  75. package/dist/tui.d.ts +3 -3
  76. package/dist/tui.js +9 -9
  77. package/dist/tui.js.map +1 -1
  78. package/dist/{turn-operations-CoRj3mYZ.d.ts → turn-operations-ifKg5muR.d.ts} +3 -3
  79. package/dist/{turn-operations-CoRj3mYZ.d.ts.map → turn-operations-ifKg5muR.d.ts.map} +1 -1
  80. package/dist/{types-BiobHM1D.js → types-DxHDaqN7.js} +23 -6
  81. package/dist/{types-BiobHM1D.js.map → types-DxHDaqN7.js.map} +1 -1
  82. package/dist/types.d.ts +3 -3
  83. package/dist/types.js +2 -2
  84. package/docs/RESTATE.md +25 -0
  85. package/package.json +2 -1
  86. package/dist/agent-D0W9yClt.d.ts.map +0 -1
  87. package/dist/headless-Bb5gU8AR.js.map +0 -1
  88. package/dist/image-sniff-B7uFSNO1.js +0 -90
  89. package/dist/image-sniff-B7uFSNO1.js.map +0 -1
  90. package/dist/index-DZR99FD4.d.ts.map +0 -1
  91. package/dist/login-BHhOdTp9.js.map +0 -1
  92. package/dist/mcp-Cy9mgCcr.js.map +0 -1
  93. package/dist/messages-RPKrEPvH.js.map +0 -1
  94. package/dist/providers-C2cxujp_.js.map +0 -1
  95. package/dist/tools-ZHKOh44k.js.map +0 -1
  96. package/dist/transcript-anchors-0zzqcSm5.js.map +0 -1
@@ -1,8 +1,127 @@
1
1
  import { n as estimateTokens } from "./utils-ngQzYzZD.js";
2
- import { c as classifyErrorPrelude, d as matchesContextExceeded, f as matchesToolPairingError, l as errorMessage, u as isRetryableHttpStatus } from "./errors-DkR6GPJw.js";
3
- import { n as documentBlockMarker } from "./types-BiobHM1D.js";
4
- import { t as reconcileImageMediaType } from "./image-sniff-B7uFSNO1.js";
2
+ import { c as classifyErrorPrelude, d as isRetryableHttpStatus, f as matchesContextExceeded, l as errorMessage, p as matchesToolPairingError } from "./errors-BpPfMo_4.js";
3
+ import { n as reconcileImageMediaType, t as audioFormatFromMediaType } from "./media-sniff-Bn76JxAu.js";
4
+ import { n as assertResolvedMediaBlock, r as documentBlockMarker } from "./types-DxHDaqN7.js";
5
5
  import { getModel } from "@earendil-works/pi-ai";
6
+ //#region src/prompt.ts
7
+ /**
8
+ * Coerce the run-level prompt into a `PromptPart[]`.
9
+ *
10
+ * - `string` prompt → a single `text` part. Empty string returns `undefined`
11
+ * so callers skip pushing an empty user turn.
12
+ * - `PromptPart[]` prompt → validated and returned as-is. An empty array, or
13
+ * an array whose text parts are all empty with no image/document parts,
14
+ * returns `undefined`.
15
+ * - `undefined` → `undefined` (promptless resume path).
16
+ */
17
+ function canonicalizePrompt(prompt) {
18
+ if (prompt === void 0) return void 0;
19
+ if (typeof prompt === "string") {
20
+ if (prompt.length === 0) return void 0;
21
+ return [{
22
+ type: "text",
23
+ text: prompt
24
+ }];
25
+ }
26
+ if (prompt.length === 0) return void 0;
27
+ for (const part of prompt) {
28
+ if (!part || typeof part !== "object" || typeof part.type !== "string") throw new Error("Invalid PromptPart: each part must be an object with a `type` field.");
29
+ const type = part.type;
30
+ if (type !== "text" && type !== "image" && type !== "audio" && type !== "video" && type !== "document") throw new Error(`Invalid PromptPart type "${type}". Expected "text" | "image" | "audio" | "video" | "document".`);
31
+ }
32
+ if (!prompt.some((part) => part.type === "text" && part.text.length > 0 || part.type === "image" || part.type === "audio" || part.type === "video" || part.type === "document")) return void 0;
33
+ return prompt;
34
+ }
35
+ /**
36
+ * Standard error for a media kind the active provider/model can't accept.
37
+ *
38
+ * Per the audio/video design, unsupported media is a hard error (not a silent
39
+ * marker) so the caller switches to a capable model rather than paying for a
40
+ * request the provider would 400 — or worse, one that succeeds with the bytes
41
+ * stripped and the model confabulating over what it can't see.
42
+ */
43
+ function unsupportedMediaError(kind, provider) {
44
+ return /* @__PURE__ */ new Error(`Provider "${provider}" does not support ${kind} input. Route to a ${kind}-capable model (e.g. an OpenAI-compat / OpenRouter model with capabilities.${kind} = true) to send ${kind} attachments.`);
45
+ }
46
+ /**
47
+ * Build a user `SessionMessage` from prompt parts without provider-specific handling.
48
+ *
49
+ * - `text` parts map to `{ type: 'text', text }` blocks.
50
+ * - `image` parts map to `{ type: 'image', mediaType, data }` blocks.
51
+ * - `audio` / `video` parts map to their canonical blocks verbatim. Providers
52
+ * that cannot accept them throw later (in `promptMessage` / loop gating); the
53
+ * default builder stays lossless so capable providers receive the bytes.
54
+ * - `document` parts with `encoding: 'text'` are inlined as an attachment-tagged
55
+ * text block so every provider can read them.
56
+ * - `document` parts with `encoding: 'base64'` throw — the caller should switch
57
+ * to a provider that implements `promptMessage` (e.g. Anthropic for PDFs).
58
+ */
59
+ function defaultPromptMessage(parts) {
60
+ const content = [];
61
+ for (const part of parts) {
62
+ if (part.type === "text") {
63
+ if (part.text.length > 0) content.push({
64
+ type: "text",
65
+ text: part.text
66
+ });
67
+ continue;
68
+ }
69
+ if (part.type === "image") {
70
+ content.push({
71
+ type: "image",
72
+ mediaType: part.mediaType,
73
+ data: part.data,
74
+ ...part.name ? { name: part.name } : {}
75
+ });
76
+ continue;
77
+ }
78
+ if (part.type === "audio") {
79
+ content.push({
80
+ type: "audio",
81
+ mediaType: part.mediaType,
82
+ data: part.data,
83
+ ...part.name ? { name: part.name } : {}
84
+ });
85
+ continue;
86
+ }
87
+ if (part.type === "video") {
88
+ content.push({
89
+ type: "video",
90
+ mediaType: part.mediaType,
91
+ data: part.data,
92
+ ...part.name ? { name: part.name } : {}
93
+ });
94
+ continue;
95
+ }
96
+ if (part.encoding === "text") {
97
+ const header = part.name ? `<attachment name="${part.name}" media_type="${part.mediaType}">` : `<attachment media_type="${part.mediaType}">`;
98
+ content.push({
99
+ type: "text",
100
+ text: `${header}\n${part.data}\n</attachment>`
101
+ });
102
+ continue;
103
+ }
104
+ throw new Error(`Provider does not support base64 document parts (mediaType: ${part.mediaType}). Use a text-encoded document or a provider that implements promptMessage (e.g. Anthropic).`);
105
+ }
106
+ return {
107
+ role: "user",
108
+ content
109
+ };
110
+ }
111
+ /**
112
+ * Build the prompt `SessionMessage` for a given provider.
113
+ *
114
+ * Prefers `provider.promptMessage` when defined, falling back to `defaultPromptMessage`.
115
+ */
116
+ function buildPromptMessage(provider, parts) {
117
+ for (const part of parts) {
118
+ if (part.type === "audio" && provider.meta.capabilities?.audio !== true) throw unsupportedMediaError("audio", provider.name);
119
+ if (part.type === "video" && provider.meta.capabilities?.video !== true) throw unsupportedMediaError("video", provider.name);
120
+ }
121
+ if (provider.promptMessage) return provider.promptMessage(parts);
122
+ return defaultPromptMessage(parts);
123
+ }
124
+ //#endregion
6
125
  //#region src/system-prompt.ts
7
126
  /**
8
127
  * System-prompt boundary marker — splits a system prompt into a stable static
@@ -814,10 +933,63 @@ function toImageUrlPart(img) {
814
933
  image_url: { url: `data:${img.mediaType};base64,${img.data}` }
815
934
  };
816
935
  }
936
+ /**
937
+ * Encode an audio block as an OpenAI-compat `input_audio` part. The `data` is
938
+ * RAW base64 with NO data-URI prefix (unlike images), per the spec.
939
+ */
940
+ function toInputAudioPart(audio) {
941
+ return {
942
+ type: "input_audio",
943
+ input_audio: {
944
+ data: audio.data,
945
+ format: audioFormatFromMediaType(audio.mediaType)
946
+ }
947
+ };
948
+ }
949
+ /**
950
+ * Encode a video block as an OpenAI-compat `video_url` part. Uses the same
951
+ * base64 data-URL shape as images. Only emitted on video-capable routes
952
+ * (currently Gemini via OpenRouter).
953
+ */
954
+ function toVideoUrlPart(video) {
955
+ return {
956
+ type: "video_url",
957
+ video_url: { url: `data:${video.mediaType};base64,${video.data}` }
958
+ };
959
+ }
817
960
  function documentMarker$1(doc) {
818
961
  return documentBlockMarker(doc, "document omitted");
819
962
  }
820
963
  /**
964
+ * Whether a document block can be forwarded natively to OpenAI's `file` content
965
+ * part. Only base64-encoded files qualify (the `file_data` data-URL shape);
966
+ * text-encoded documents are already inlined as attachment text upstream and
967
+ * have no portable binary representation.
968
+ */
969
+ function isForwardableDocument(doc) {
970
+ return doc.encoding === "base64";
971
+ }
972
+ /**
973
+ * Encode a base64 document as an OpenAI-compat `file` content part:
974
+ * `{ type: 'file', file: { filename, file_data: 'data:<mime>;base64,<data>' } }`.
975
+ *
976
+ * Supported by OpenAI (Chat Completions + Responses) and routes that proxy it
977
+ * (e.g. OpenRouter → OpenAI/Gemini/Anthropic). Gated behind `capabilities.documents`.
978
+ */
979
+ function toFilePart(doc) {
980
+ return {
981
+ type: "file",
982
+ file: {
983
+ filename: doc.name ?? `file.${doc.mediaType.split("/")[1] ?? "bin"}`,
984
+ file_data: `data:${doc.mediaType};base64,${doc.data}`
985
+ }
986
+ };
987
+ }
988
+ /** True when any built OAI message carries a `{ type: 'file' }` content part. */
989
+ function messagesContainFilePart(messages) {
990
+ return messages.some((m) => Array.isArray(m.content) && m.content.some((p) => typeof p === "object" && p !== null && p.type === "file"));
991
+ }
992
+ /**
821
993
  * Summarize a `tool_result` output for the companion-user-message path — text blocks
822
994
  * are joined (separated by `\n`) so the tool message carries all textual context; image
823
995
  * blocks are collected in a flat list for the companion user message.
@@ -825,22 +997,51 @@ function documentMarker$1(doc) {
825
997
  * Used only on the fallback path; the native path walks `output` in-order to preserve
826
998
  * text↔image interleaving.
827
999
  */
828
- function summarizeToolResultOutput(output) {
1000
+ function summarizeToolResultOutput(output, documentsEnabled = false) {
829
1001
  if (typeof output === "string") return {
830
1002
  text: output,
831
- images: []
1003
+ images: [],
1004
+ audios: [],
1005
+ videos: [],
1006
+ documents: []
832
1007
  };
833
1008
  const texts = [];
834
1009
  const images = [];
1010
+ const audios = [];
1011
+ const videos = [];
1012
+ const documents = [];
835
1013
  for (const block of output) if (block.type === "text") texts.push(block.text);
836
- else if (block.type === "image") images.push({
1014
+ else if (block.type === "image") {
1015
+ assertResolvedMediaBlock(block, "OpenAI-compatible wire messages");
1016
+ images.push({
1017
+ mediaType: block.mediaType,
1018
+ data: block.data
1019
+ });
1020
+ } else if (block.type === "audio") audios.push({
1021
+ mediaType: block.mediaType,
1022
+ data: block.data
1023
+ });
1024
+ else if (block.type === "video") videos.push({
837
1025
  mediaType: block.mediaType,
838
1026
  data: block.data
839
1027
  });
840
- else if (block.type === "document") texts.push(documentMarker$1(block));
1028
+ else if (block.type === "document" && documentsEnabled && isForwardableDocument(block)) {
1029
+ assertResolvedMediaBlock(block, "OpenAI-compatible wire messages");
1030
+ documents.push({
1031
+ mediaType: block.mediaType,
1032
+ data: block.data,
1033
+ ...block.name ? { name: block.name } : {}
1034
+ });
1035
+ } else if (block.type === "document") {
1036
+ assertResolvedMediaBlock(block, "OpenAI-compatible wire messages");
1037
+ texts.push(documentMarker$1(block));
1038
+ }
841
1039
  return {
842
1040
  text: texts.join("\n"),
843
- images
1041
+ images,
1042
+ audios,
1043
+ videos,
1044
+ documents
844
1045
  };
845
1046
  }
846
1047
  function toOAIMessages(system, messages, options = {}) {
@@ -849,6 +1050,9 @@ function toOAIMessages(system, messages, options = {}) {
849
1050
  content: renderSystemForWire(system)
850
1051
  }];
851
1052
  const nativeImageInTool = options.imageInToolResult === true;
1053
+ const audioEnabled = options.audio === true;
1054
+ const videoEnabled = options.video === true;
1055
+ const documentsEnabled = options.documents === true;
852
1056
  const reasoningEnabled = options.supportsReasoning === true;
853
1057
  const reasoningMode = options.reasoningMode ?? "openrouter";
854
1058
  const reasoningContentField = options.reasoningContentField ?? "reasoning";
@@ -860,6 +1064,8 @@ function toOAIMessages(system, messages, options = {}) {
860
1064
  const toolCalls = [];
861
1065
  const textBlocks = [];
862
1066
  const imageBlocks = [];
1067
+ const audioBlocks = [];
1068
+ const videoBlocks = [];
863
1069
  const documentBlocks = [];
864
1070
  const reasoningBlocks = [];
865
1071
  for (const b of msg.content) switch (b.type) {
@@ -875,6 +1081,12 @@ function toOAIMessages(system, messages, options = {}) {
875
1081
  case "image":
876
1082
  imageBlocks.push(b);
877
1083
  break;
1084
+ case "audio":
1085
+ audioBlocks.push(b);
1086
+ break;
1087
+ case "video":
1088
+ videoBlocks.push(b);
1089
+ break;
878
1090
  case "document":
879
1091
  documentBlocks.push(b);
880
1092
  break;
@@ -884,6 +1096,8 @@ function toOAIMessages(system, messages, options = {}) {
884
1096
  }
885
1097
  const reasoningDetails = reasoningBlocks.flatMap((b) => b.producer === "openrouter" ? b.details : []);
886
1098
  const reasoningText = reasoningMode === "reasoning_content" ? reasoningBlocks.map((b) => b.producer === "vllm" ? b.reasoningText : "").filter(Boolean).join("") : "";
1099
+ if (audioBlocks.length > 0 && !audioEnabled) throw unsupportedMediaError("audio", "openai-compat");
1100
+ if (videoBlocks.length > 0 && !videoEnabled) throw unsupportedMediaError("video", "openai-compat");
887
1101
  if (toolResults.length > 0) {
888
1102
  for (const tr of toolResults) {
889
1103
  if (typeof tr.output === "string") {
@@ -894,13 +1108,21 @@ function toOAIMessages(system, messages, options = {}) {
894
1108
  });
895
1109
  continue;
896
1110
  }
897
- if (nativeImageInTool) {
898
- const parts = tr.output.map((block) => block.type === "image" ? toImageUrlPart({
899
- mediaType: block.mediaType,
900
- data: block.data
901
- }) : {
902
- type: "text",
903
- text: block.type === "document" ? documentMarker$1(block) : block.text
1111
+ const imageToolParts = tr.output;
1112
+ const hasNonImageMedia = tr.output.some((block) => block.type === "audio" || block.type === "video" || block.type === "document");
1113
+ if (nativeImageInTool && !hasNonImageMedia) {
1114
+ const parts = imageToolParts.map((block) => {
1115
+ if (block.type === "image") {
1116
+ assertResolvedMediaBlock(block, "OpenAI-compatible wire messages");
1117
+ return toImageUrlPart({
1118
+ mediaType: block.mediaType,
1119
+ data: block.data
1120
+ });
1121
+ }
1122
+ return {
1123
+ type: "text",
1124
+ text: block.text
1125
+ };
904
1126
  });
905
1127
  out.push({
906
1128
  role: "tool",
@@ -909,8 +1131,11 @@ function toOAIMessages(system, messages, options = {}) {
909
1131
  });
910
1132
  continue;
911
1133
  }
912
- const { text, images } = summarizeToolResultOutput(tr.output);
913
- if (images.length === 0) {
1134
+ const { text, images, audios, videos, documents } = summarizeToolResultOutput(tr.output, documentsEnabled);
1135
+ if (audios.length > 0 && !audioEnabled) throw unsupportedMediaError("audio", "openai-compat");
1136
+ if (videos.length > 0 && !videoEnabled) throw unsupportedMediaError("video", "openai-compat");
1137
+ const mediaCount = images.length + audios.length + videos.length + documents.length;
1138
+ if (mediaCount === 0) {
914
1139
  out.push({
915
1140
  role: "tool",
916
1141
  tool_call_id: tr.callId,
@@ -918,8 +1143,8 @@ function toOAIMessages(system, messages, options = {}) {
918
1143
  });
919
1144
  continue;
920
1145
  }
921
- const noun = images.length === 1 ? "image" : "images";
922
- const attachedMarker = `[${images.length} ${noun} attached — see next user message]`;
1146
+ const noun = audios.length === 0 && videos.length === 0 && documents.length === 0 ? mediaCount === 1 ? "image" : "images" : mediaCount === 1 ? "attachment" : "attachments";
1147
+ const attachedMarker = `[${mediaCount} ${noun} attached — see next user message]`;
923
1148
  const toolMarker = text.length > 0 ? `${text}\n\n${attachedMarker}` : attachedMarker;
924
1149
  out.push({
925
1150
  role: "tool",
@@ -928,13 +1153,19 @@ function toOAIMessages(system, messages, options = {}) {
928
1153
  });
929
1154
  out.push({
930
1155
  role: "user",
931
- content: [...images.map(toImageUrlPart), {
932
- type: "text",
933
- text: `(${noun} returned by tool call ${tr.callId})`
934
- }]
1156
+ content: [
1157
+ ...images.map(toImageUrlPart),
1158
+ ...audios.map(toInputAudioPart),
1159
+ ...videos.map(toVideoUrlPart),
1160
+ ...documents.map(toFilePart),
1161
+ {
1162
+ type: "text",
1163
+ text: `(${noun} returned by tool call ${tr.callId})`
1164
+ }
1165
+ ]
935
1166
  });
936
1167
  }
937
- if (textBlocks.length === 0 && imageBlocks.length === 0 && documentBlocks.length === 0) continue;
1168
+ if (textBlocks.length === 0 && imageBlocks.length === 0 && audioBlocks.length === 0 && videoBlocks.length === 0 && documentBlocks.length === 0) continue;
938
1169
  }
939
1170
  if (toolCalls.length > 0) {
940
1171
  const m = {
@@ -954,15 +1185,31 @@ function toOAIMessages(system, messages, options = {}) {
954
1185
  out.push(m);
955
1186
  continue;
956
1187
  }
957
- if (imageBlocks.length > 0 || documentBlocks.length > 0) {
958
- const parts = imageBlocks.map((img) => ({
959
- type: "image_url",
960
- image_url: { url: `data:${img.mediaType};base64,${img.data}` }
1188
+ if (imageBlocks.length > 0 || audioBlocks.length > 0 || videoBlocks.length > 0 || documentBlocks.length > 0) {
1189
+ const parts = [];
1190
+ for (const img of imageBlocks) {
1191
+ assertResolvedMediaBlock(img, "OpenAI-compatible wire messages");
1192
+ parts.push({
1193
+ type: "image_url",
1194
+ image_url: { url: `data:${img.mediaType};base64,${img.data}` }
1195
+ });
1196
+ }
1197
+ for (const b of audioBlocks) parts.push(toInputAudioPart({
1198
+ mediaType: b.mediaType,
1199
+ data: b.data
961
1200
  }));
962
- for (const b of documentBlocks) parts.push({
963
- type: "text",
964
- text: documentMarker$1(b)
965
- });
1201
+ for (const b of videoBlocks) parts.push(toVideoUrlPart({
1202
+ mediaType: b.mediaType,
1203
+ data: b.data
1204
+ }));
1205
+ for (const b of documentBlocks) {
1206
+ assertResolvedMediaBlock(b, "OpenAI-compatible wire messages");
1207
+ if (documentsEnabled && isForwardableDocument(b)) parts.push(toFilePart(b));
1208
+ else parts.push({
1209
+ type: "text",
1210
+ text: documentMarker$1(b)
1211
+ });
1212
+ }
966
1213
  for (const b of textBlocks) parts.push({
967
1214
  type: "text",
968
1215
  text: b.text
@@ -1128,6 +1375,48 @@ function userMessage(content) {
1128
1375
  }]
1129
1376
  };
1130
1377
  }
1378
+ /**
1379
+ * Build a user `SessionMessage` from multimodal prompt parts.
1380
+ *
1381
+ * Passes every kind (text/image/audio/video/document) through as a canonical
1382
+ * block; the actual wire emission, capability gating, and unsupported-media
1383
+ * throwing all happen later in {@link toOAIMessages}. Implementing this (rather
1384
+ * than letting the agent fall back to `defaultPromptMessage`) is what lets
1385
+ * openai-compat routes accept base64 documents/PDFs at all — the default
1386
+ * builder throws on base64 documents.
1387
+ */
1388
+ function openAICompatPromptMessage(parts) {
1389
+ const content = [];
1390
+ for (const part of parts) {
1391
+ if (part.type === "text") {
1392
+ if (part.text.length > 0) content.push({
1393
+ type: "text",
1394
+ text: part.text
1395
+ });
1396
+ continue;
1397
+ }
1398
+ if (part.type === "image" || part.type === "audio" || part.type === "video") {
1399
+ content.push({
1400
+ type: part.type,
1401
+ mediaType: part.mediaType,
1402
+ data: part.data,
1403
+ ...part.name ? { name: part.name } : {}
1404
+ });
1405
+ continue;
1406
+ }
1407
+ content.push({
1408
+ type: "document",
1409
+ mediaType: part.mediaType,
1410
+ data: part.data,
1411
+ encoding: part.encoding,
1412
+ ...part.name ? { name: part.name } : {}
1413
+ });
1414
+ }
1415
+ return {
1416
+ role: "user",
1417
+ content
1418
+ };
1419
+ }
1131
1420
  function assistantMessage(content) {
1132
1421
  return {
1133
1422
  role: "assistant",
@@ -1332,6 +1621,23 @@ function estimateMissingUsage(body, result) {
1332
1621
  * authHeader: { name: 'Authorization', scheme: 'Api-Key' },
1333
1622
  * })
1334
1623
  * ```
1624
+ *
1625
+ * @example OpenAI directly — including native PDF input
1626
+ * The bundled `openai` provider routes through pi-ai (Codex Responses), whose
1627
+ * content union is text+image only, so it degrades documents to a marker. To
1628
+ * send PDFs to OpenAI, point this factory at OpenAI's Chat Completions endpoint
1629
+ * and enable `documents` — base64 PDFs are forwarded as the `{type:'file'}`
1630
+ * content part. NOTE: OpenAI's Chat Completions `file` part accepts
1631
+ * `application/pdf` ONLY; spreadsheets/`.docx` are Responses-API-only and will
1632
+ * 400 here.
1633
+ * ```ts
1634
+ * openaiCompat({
1635
+ * name: 'openai',
1636
+ * apiKey: process.env.OPENAI_API_KEY!,
1637
+ * baseURL: 'https://api.openai.com/v1',
1638
+ * capabilities: { vision: true, documents: true },
1639
+ * })
1640
+ * ```
1335
1641
  */
1336
1642
  function openaiCompat(params) {
1337
1643
  const name = params.name ?? "openai-compat";
@@ -1342,7 +1648,10 @@ function openaiCompat(params) {
1342
1648
  const endpoint = `${params.baseURL.replace(TRAILING_SLASH_RE, "")}/chat/completions`;
1343
1649
  const capabilities = {
1344
1650
  vision: params.capabilities?.vision ?? false,
1345
- imageInToolResult: params.capabilities?.imageInToolResult ?? false
1651
+ imageInToolResult: params.capabilities?.imageInToolResult ?? false,
1652
+ audio: params.capabilities?.audio ?? false,
1653
+ video: params.capabilities?.video ?? false,
1654
+ documents: params.capabilities?.documents ?? false
1346
1655
  };
1347
1656
  const cacheBreakpointsEnabled = params.cacheBreakpoints === true;
1348
1657
  const reasoningEnabled = params.supportsReasoning === true;
@@ -1359,11 +1668,15 @@ function openaiCompat(params) {
1359
1668
  userMessage,
1360
1669
  assistantMessage,
1361
1670
  toolResultsMessage,
1671
+ promptMessage: openAICompatPromptMessage,
1362
1672
  classifyError: classifyOpenAICompatError,
1363
1673
  async stream(options, callbacks) {
1364
1674
  const modelId = options.model || defaultModel;
1365
1675
  const messages = toOAIMessages(options.system, options.messages, {
1366
1676
  imageInToolResult: capabilities.imageInToolResult === true,
1677
+ audio: capabilities.audio === true,
1678
+ video: capabilities.video === true,
1679
+ documents: capabilities.documents === true,
1367
1680
  supportsReasoning: reasoningEnabled,
1368
1681
  reasoningMode,
1369
1682
  reasoningContentField,
@@ -1386,6 +1699,13 @@ function openaiCompat(params) {
1386
1699
  max_tokens: maxTokens,
1387
1700
  stream: true
1388
1701
  };
1702
+ if (params.pdfEngine && messagesContainFilePart(messages)) {
1703
+ const existing = Array.isArray(body.plugins) ? body.plugins : [];
1704
+ if (!existing.some((p) => typeof p === "object" && p !== null && p.id === "file-parser")) body.plugins = [...existing, {
1705
+ id: "file-parser",
1706
+ pdf: { engine: params.pdfEngine }
1707
+ }];
1708
+ }
1389
1709
  if (reasoningEnabled && reasoningMode === "openrouter") {
1390
1710
  const reasoning = planOpenRouterReasoning(options.thinking, options.thinkingBudget);
1391
1711
  if (reasoning) body.reasoning = reasoning;
@@ -1536,6 +1856,12 @@ function anthropicDocumentBlocks(doc) {
1536
1856
  function documentMarker(doc) {
1537
1857
  return documentBlockMarker(doc, "document attached");
1538
1858
  }
1859
+ function isContentRefBlock(block) {
1860
+ return (block.type === "image" || block.type === "document") && typeof block.ref === "string";
1861
+ }
1862
+ function unresolvedContentRefError(block) {
1863
+ return /* @__PURE__ */ new Error(`Cannot project content ref "${block.ref}" to provider wire messages without resolveContentRef.`);
1864
+ }
1539
1865
  /**
1540
1866
  * Coerce a tool-call id to Anthropic's wire constraint `^[a-zA-Z0-9_-]+$`.
1541
1867
  *
@@ -1584,10 +1910,22 @@ function encodeAnthropicToolResultContent(output) {
1584
1910
  type: "text",
1585
1911
  text: b.text
1586
1912
  };
1587
- if (b.type === "document") return {
1913
+ if (b.type === "document") {
1914
+ assertResolvedMediaBlock(b, "Anthropic wire messages");
1915
+ return {
1916
+ type: "text",
1917
+ text: documentMarker(b)
1918
+ };
1919
+ }
1920
+ if (b.type === "audio") return {
1921
+ type: "text",
1922
+ text: `[audio omitted: ${b.mediaType}]`
1923
+ };
1924
+ if (b.type === "video") return {
1588
1925
  type: "text",
1589
- text: documentMarker(b)
1926
+ text: `[video omitted: ${b.mediaType}]`
1590
1927
  };
1928
+ assertResolvedMediaBlock(b, "Anthropic wire messages");
1591
1929
  return {
1592
1930
  type: "image",
1593
1931
  source: anthropicImageSource(b.mediaType, b.data)
@@ -1797,11 +2135,23 @@ function toAnthropic(msg) {
1797
2135
  type: "text",
1798
2136
  text: block.text
1799
2137
  }];
1800
- case "image": return [{
1801
- type: "image",
1802
- source: anthropicImageSource(block.mediaType, block.data)
2138
+ case "image":
2139
+ assertResolvedMediaBlock(block, "Anthropic wire messages");
2140
+ return [{
2141
+ type: "image",
2142
+ source: anthropicImageSource(block.mediaType, block.data)
2143
+ }];
2144
+ case "audio": return [{
2145
+ type: "text",
2146
+ text: `[audio omitted: ${block.mediaType}]`
2147
+ }];
2148
+ case "video": return [{
2149
+ type: "text",
2150
+ text: `[video omitted: ${block.mediaType}]`
1803
2151
  }];
1804
- case "document": return anthropicDocumentBlocks(block);
2152
+ case "document":
2153
+ assertResolvedMediaBlock(block, "Anthropic wire messages");
2154
+ return anthropicDocumentBlocks(block);
1805
2155
  case "tool_call": return [{
1806
2156
  type: "tool_use",
1807
2157
  id: sanitizeAnthropicToolId(block.id),
@@ -1815,7 +2165,7 @@ function toAnthropic(msg) {
1815
2165
  content: encodeAnthropicToolResultContent(block.output)
1816
2166
  };
1817
2167
  if (block.isError) out.is_error = true;
1818
- return [out, ...(typeof block.output === "string" ? [] : block.output.filter((part) => part.type === "document")).flatMap(anthropicDocumentBlocks)];
2168
+ return [out, ...(typeof block.output === "string" ? [] : block.output.filter((part) => part.type === "document" && typeof part.data === "string")).flatMap(anthropicDocumentBlocks)];
1819
2169
  }
1820
2170
  case "thinking": return [{
1821
2171
  type: "thinking",
@@ -1858,6 +2208,8 @@ function toOpenAI(msg) {
1858
2208
  const toolResults = msg.content.filter((b) => b.type === "tool_result");
1859
2209
  const textBlocks = msg.content.filter((b) => b.type === "text");
1860
2210
  const imageBlocks = msg.content.filter((b) => b.type === "image");
2211
+ const audioBlocks = msg.content.filter((b) => b.type === "audio");
2212
+ const videoBlocks = msg.content.filter((b) => b.type === "video");
1861
2213
  const documentBlocks = msg.content.filter((b) => b.type === "document");
1862
2214
  if (toolResults.length > 0) {
1863
2215
  const siblingText = textBlocks.map((b) => b.text).join("\n");
@@ -1891,15 +2243,33 @@ function toOpenAI(msg) {
1891
2243
  }
1892
2244
  };
1893
2245
  }
1894
- if (imageBlocks.length > 0 || documentBlocks.length > 0) {
1895
- const parts = imageBlocks.map((img) => ({
1896
- type: "image_url",
1897
- image_url: { url: `data:${img.mediaType};base64,${img.data}` }
1898
- }));
1899
- for (const b of documentBlocks) parts.push({
1900
- type: "text",
1901
- text: documentMarker(b)
2246
+ if (imageBlocks.length > 0 || audioBlocks.length > 0 || videoBlocks.length > 0 || documentBlocks.length > 0) {
2247
+ const parts = [];
2248
+ for (const img of imageBlocks) {
2249
+ assertResolvedMediaBlock(img, "OpenAI-compatible wire messages");
2250
+ parts.push({
2251
+ type: "image_url",
2252
+ image_url: { url: `data:${img.mediaType};base64,${img.data}` }
2253
+ });
2254
+ }
2255
+ for (const b of audioBlocks) parts.push({
2256
+ type: "input_audio",
2257
+ input_audio: {
2258
+ data: b.data,
2259
+ format: audioFormatFromMediaType(b.mediaType)
2260
+ }
1902
2261
  });
2262
+ for (const b of videoBlocks) parts.push({
2263
+ type: "video_url",
2264
+ video_url: { url: `data:${b.mediaType};base64,${b.data}` }
2265
+ });
2266
+ for (const b of documentBlocks) {
2267
+ assertResolvedMediaBlock(b, "OpenAI-compatible wire messages");
2268
+ parts.push({
2269
+ type: "text",
2270
+ text: documentMarker(b)
2271
+ });
2272
+ }
1903
2273
  for (const b of textBlocks) parts.push({
1904
2274
  type: "text",
1905
2275
  text: b.text
@@ -2390,6 +2760,123 @@ function ensureEndsWithUserMessage(messages, provider, directive = DEFAULT_USER_
2390
2760
  if (messages[messages.length - 1].role === "user") return messages;
2391
2761
  return [...messages, provider.userMessage(directive)];
2392
2762
  }
2763
+ async function resolveRefContentBlock(block, resolveContentRef) {
2764
+ if (isContentRefBlock(block)) {
2765
+ if (!resolveContentRef) throw unresolvedContentRefError(block);
2766
+ const { data } = await resolveContentRef({
2767
+ type: block.type,
2768
+ ref: block.ref,
2769
+ mediaType: block.mediaType,
2770
+ ...block.type === "document" ? { encoding: block.encoding } : {},
2771
+ ...block.name ? { name: block.name } : {}
2772
+ });
2773
+ if (block.type === "image") return {
2774
+ block: {
2775
+ type: "image",
2776
+ mediaType: block.mediaType,
2777
+ data,
2778
+ ...block.name ? { name: block.name } : {}
2779
+ },
2780
+ changed: true
2781
+ };
2782
+ return {
2783
+ block: {
2784
+ type: "document",
2785
+ mediaType: block.mediaType,
2786
+ encoding: block.encoding,
2787
+ data,
2788
+ ...block.name ? { name: block.name } : {}
2789
+ },
2790
+ changed: true
2791
+ };
2792
+ }
2793
+ if (block.type !== "tool_result" || typeof block.output === "string") return {
2794
+ block,
2795
+ changed: false
2796
+ };
2797
+ const resolved = await resolveToolResultContent(block.output, resolveContentRef);
2798
+ if (!resolved.changed) return {
2799
+ block,
2800
+ changed: false
2801
+ };
2802
+ return {
2803
+ block: {
2804
+ ...block,
2805
+ output: resolved.output
2806
+ },
2807
+ changed: true
2808
+ };
2809
+ }
2810
+ async function resolveToolResultContent(output, resolveContentRef) {
2811
+ let changed = false;
2812
+ const next = [];
2813
+ for (const part of output) {
2814
+ if (!isContentRefBlock(part)) {
2815
+ next.push(part);
2816
+ continue;
2817
+ }
2818
+ if (!resolveContentRef) throw unresolvedContentRefError(part);
2819
+ const { data } = await resolveContentRef({
2820
+ type: part.type,
2821
+ ref: part.ref,
2822
+ mediaType: part.mediaType,
2823
+ ...part.type === "document" ? { encoding: part.encoding } : {},
2824
+ ...part.name ? { name: part.name } : {}
2825
+ });
2826
+ changed = true;
2827
+ if (part.type === "image") next.push({
2828
+ type: "image",
2829
+ mediaType: part.mediaType,
2830
+ data,
2831
+ ...part.name ? { name: part.name } : {}
2832
+ });
2833
+ else next.push({
2834
+ type: "document",
2835
+ mediaType: part.mediaType,
2836
+ encoding: part.encoding,
2837
+ data,
2838
+ ...part.name ? { name: part.name } : {}
2839
+ });
2840
+ }
2841
+ return {
2842
+ output: changed ? next : output,
2843
+ changed
2844
+ };
2845
+ }
2846
+ /**
2847
+ * Resolve image/document ref blocks into data blocks for provider wire use.
2848
+ * This is intentionally wire-only: callers must never persist the returned
2849
+ * messages back into `session.turns`.
2850
+ *
2851
+ * Returns the input array reference unchanged when no block carried a ref —
2852
+ * the common case (most turns hold no refs, and re-running on already-resolved
2853
+ * output is a no-op), so the multi-pass wire pipeline doesn't pay per-message
2854
+ * copies for nothing.
2855
+ */
2856
+ async function resolveContentRefsInMessages(messages, options = {}) {
2857
+ let anyChanged = false;
2858
+ const out = [];
2859
+ for (const msg of messages) {
2860
+ let changed = false;
2861
+ const content = [];
2862
+ for (const block of msg.content) {
2863
+ const resolved = await resolveRefContentBlock(block, options.resolveContentRef);
2864
+ if (resolved.changed) changed = true;
2865
+ content.push(resolved.block);
2866
+ }
2867
+ if (changed) anyChanged = true;
2868
+ out.push(changed ? {
2869
+ ...msg,
2870
+ content
2871
+ } : msg);
2872
+ }
2873
+ return anyChanged ? out : messages;
2874
+ }
2875
+ function finalizeWireMessages(messages, options) {
2876
+ const paired = ensureToolResultPairing(messages, options.onRepair ? { onRepair: options.onRepair } : {});
2877
+ if (!options.provider) return paired;
2878
+ return ensureEndsWithUserMessage(paired, options.provider, options.userTailDirective);
2879
+ }
2393
2880
  /**
2394
2881
  * Build a wire-ready `SessionMessage[]` from raw persisted `SessionTurn[]`.
2395
2882
  *
@@ -2418,7 +2905,7 @@ function ensureEndsWithUserMessage(messages, provider, directive = DEFAULT_USER_
2418
2905
  * "don't rewrite the past" invariant the wire-only repair was designed
2419
2906
  * around.
2420
2907
  */
2421
- function toWireMessages(turns, options = {}) {
2908
+ async function toWireMessages(turns, options = {}) {
2422
2909
  const messages = [];
2423
2910
  for (const t of turns) {
2424
2911
  if (t.role === "system") continue;
@@ -2427,9 +2914,7 @@ function toWireMessages(turns, options = {}) {
2427
2914
  content: t.content
2428
2915
  });
2429
2916
  }
2430
- const paired = ensureToolResultPairing(messages, options.onRepair ? { onRepair: options.onRepair } : {});
2431
- if (!options.provider) return paired;
2432
- return ensureEndsWithUserMessage(paired, options.provider, options.userTailDirective);
2917
+ return finalizeWireMessages(await resolveContentRefsInMessages(messages, { resolveContentRef: options.resolveContentRef }), options);
2433
2918
  }
2434
2919
  function autoDetectAndConvert(msg) {
2435
2920
  const c = msg.content;
@@ -2450,6 +2935,6 @@ function autoDetectAndConvert(msg) {
2450
2935
  return fromAnthropic(msg);
2451
2936
  }
2452
2937
  //#endregion
2453
- export { renderSystemForWire as A, sanitizeToolSpecs as C, appendStaticSection as D, appendDynamicSection as E, splitSystemPrompt as M, hasSystemPromptBoundary as O, sanitizeToolSchema as S, SYSTEM_PROMPT_BOUNDARY as T, classifyOpenAICompatError as _, detectTurnInterruption as a, toolResultsMessage as b, filterUnresolvedToolUses as c, remintDuplicateToolCallIds as d, toAnthropic as f, assistantMessage as g, OpenAICompatHttpError as h, autoDetectAndConvert as i, replaceDynamicSection as j, joinSystemPrompt as k, fromAnthropic as l, toWireMessages as m, SYNTHETIC_TOOL_RESULT_PLACEHOLDER as n, ensureEndsWithUserMessage as o, toOpenAI as p, TOOL_USE_INTERRUPTED_MARKER as r, ensureToolResultPairing as s, ORPHANED_TOOL_RESULT_MARKER as t, fromOpenAI as u, mapOAIFinishReason as v, fillEstimatedCost as w, userMessage as x, openaiCompat as y };
2938
+ export { joinSystemPrompt as A, sanitizeToolSchema as C, appendDynamicSection as D, SYSTEM_PROMPT_BOUNDARY as E, canonicalizePrompt as F, unsupportedMediaError as I, replaceDynamicSection as M, splitSystemPrompt as N, appendStaticSection as O, buildPromptMessage as P, userMessage as S, fillEstimatedCost as T, assistantMessage as _, detectTurnInterruption as a, openaiCompat as b, filterUnresolvedToolUses as c, remintDuplicateToolCallIds as d, resolveContentRefsInMessages as f, OpenAICompatHttpError as g, toWireMessages as h, autoDetectAndConvert as i, renderSystemForWire as j, hasSystemPromptBoundary as k, fromAnthropic as l, toOpenAI as m, SYNTHETIC_TOOL_RESULT_PLACEHOLDER as n, ensureEndsWithUserMessage as o, toAnthropic as p, TOOL_USE_INTERRUPTED_MARKER as r, ensureToolResultPairing as s, ORPHANED_TOOL_RESULT_MARKER as t, fromOpenAI as u, classifyOpenAICompatError as v, sanitizeToolSpecs as w, toolResultsMessage as x, mapOAIFinishReason as y };
2454
2939
 
2455
- //# sourceMappingURL=messages-RPKrEPvH.js.map
2940
+ //# sourceMappingURL=messages-9wyCuvLF.js.map