@draht/ai 2026.5.12 → 2026.6.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/dist/api-registry.d.ts +1 -1
  2. package/dist/api-registry.d.ts.map +1 -1
  3. package/dist/api-registry.js.map +1 -1
  4. package/dist/bedrock-provider.d.ts +2 -2
  5. package/dist/bedrock-provider.d.ts.map +1 -1
  6. package/dist/bedrock-provider.js.map +1 -1
  7. package/dist/cli.d.ts.map +1 -1
  8. package/dist/cli.js +14 -0
  9. package/dist/cli.js.map +1 -1
  10. package/dist/env-api-keys.d.ts +10 -1
  11. package/dist/env-api-keys.d.ts.map +1 -1
  12. package/dist/env-api-keys.js +110 -36
  13. package/dist/env-api-keys.js.map +1 -1
  14. package/dist/image-models.d.ts +10 -0
  15. package/dist/image-models.d.ts.map +1 -0
  16. package/dist/image-models.generated.d.ts +485 -0
  17. package/dist/image-models.generated.d.ts.map +1 -0
  18. package/dist/image-models.generated.js +487 -0
  19. package/dist/image-models.generated.js.map +1 -0
  20. package/dist/image-models.js +23 -0
  21. package/dist/image-models.js.map +1 -0
  22. package/dist/images-api-registry.d.ts +14 -0
  23. package/dist/images-api-registry.d.ts.map +1 -0
  24. package/dist/images-api-registry.js +22 -0
  25. package/dist/images-api-registry.js.map +1 -0
  26. package/dist/images.d.ts +4 -0
  27. package/dist/images.d.ts.map +1 -0
  28. package/dist/images.js +14 -0
  29. package/dist/images.js.map +1 -0
  30. package/dist/index.d.ts +31 -25
  31. package/dist/index.d.ts.map +1 -1
  32. package/dist/index.js +7 -1
  33. package/dist/index.js.map +1 -1
  34. package/dist/models.d.ts +5 -8
  35. package/dist/models.d.ts.map +1 -1
  36. package/dist/models.generated.d.ts +4665 -1252
  37. package/dist/models.generated.d.ts.map +1 -1
  38. package/dist/models.generated.js +4877 -2833
  39. package/dist/models.generated.js.map +1 -1
  40. package/dist/models.js +33 -6
  41. package/dist/models.js.map +1 -1
  42. package/dist/oauth.d.ts +1 -1
  43. package/dist/oauth.d.ts.map +1 -1
  44. package/dist/oauth.js.map +1 -1
  45. package/dist/providers/amazon-bedrock.d.ts +19 -1
  46. package/dist/providers/amazon-bedrock.d.ts.map +1 -1
  47. package/dist/providers/amazon-bedrock.js +278 -89
  48. package/dist/providers/amazon-bedrock.js.map +1 -1
  49. package/dist/providers/anthropic.d.ts +37 -6
  50. package/dist/providers/anthropic.d.ts.map +1 -1
  51. package/dist/providers/anthropic.js +300 -114
  52. package/dist/providers/anthropic.js.map +1 -1
  53. package/dist/providers/azure-openai-responses.d.ts +1 -1
  54. package/dist/providers/azure-openai-responses.d.ts.map +1 -1
  55. package/dist/providers/azure-openai-responses.js +68 -21
  56. package/dist/providers/azure-openai-responses.js.map +1 -1
  57. package/dist/providers/cloudflare.d.ts +13 -0
  58. package/dist/providers/cloudflare.d.ts.map +1 -0
  59. package/dist/providers/cloudflare.js +26 -0
  60. package/dist/providers/cloudflare.js.map +1 -0
  61. package/dist/providers/faux.d.ts +1 -1
  62. package/dist/providers/faux.d.ts.map +1 -1
  63. package/dist/providers/faux.js +1 -0
  64. package/dist/providers/faux.js.map +1 -1
  65. package/dist/providers/github-copilot-headers.d.ts +1 -1
  66. package/dist/providers/github-copilot-headers.d.ts.map +1 -1
  67. package/dist/providers/github-copilot-headers.js.map +1 -1
  68. package/dist/providers/google-shared.d.ts +8 -3
  69. package/dist/providers/google-shared.d.ts.map +1 -1
  70. package/dist/providers/google-shared.js +34 -17
  71. package/dist/providers/google-shared.js.map +1 -1
  72. package/dist/providers/google-vertex.d.ts +2 -2
  73. package/dist/providers/google-vertex.d.ts.map +1 -1
  74. package/dist/providers/google-vertex.js +45 -18
  75. package/dist/providers/google-vertex.js.map +1 -1
  76. package/dist/providers/google.d.ts +2 -2
  77. package/dist/providers/google.d.ts.map +1 -1
  78. package/dist/providers/google.js +9 -6
  79. package/dist/providers/google.js.map +1 -1
  80. package/dist/providers/images/openrouter.d.ts +3 -0
  81. package/dist/providers/images/openrouter.d.ts.map +1 -0
  82. package/dist/providers/images/openrouter.js +128 -0
  83. package/dist/providers/images/openrouter.js.map +1 -0
  84. package/dist/providers/images/register-builtins.d.ts +4 -0
  85. package/dist/providers/images/register-builtins.d.ts.map +1 -0
  86. package/dist/providers/images/register-builtins.js +34 -0
  87. package/dist/providers/images/register-builtins.js.map +1 -0
  88. package/dist/providers/mistral.d.ts +4 -1
  89. package/dist/providers/mistral.d.ts.map +1 -1
  90. package/dist/providers/mistral.js +43 -10
  91. package/dist/providers/mistral.js.map +1 -1
  92. package/dist/providers/openai-codex-responses.d.ts +22 -1
  93. package/dist/providers/openai-codex-responses.d.ts.map +1 -1
  94. package/dist/providers/openai-codex-responses.js +542 -111
  95. package/dist/providers/openai-codex-responses.js.map +1 -1
  96. package/dist/providers/openai-completions.d.ts +6 -2
  97. package/dist/providers/openai-completions.d.ts.map +1 -1
  98. package/dist/providers/openai-completions.js +447 -229
  99. package/dist/providers/openai-completions.js.map +1 -1
  100. package/dist/providers/openai-prompt-cache.d.ts +3 -0
  101. package/dist/providers/openai-prompt-cache.d.ts.map +1 -0
  102. package/dist/providers/openai-prompt-cache.js +10 -0
  103. package/dist/providers/openai-prompt-cache.js.map +1 -0
  104. package/dist/providers/openai-responses-shared.d.ts +3 -2
  105. package/dist/providers/openai-responses-shared.d.ts.map +1 -1
  106. package/dist/providers/openai-responses-shared.js +41 -15
  107. package/dist/providers/openai-responses-shared.js.map +1 -1
  108. package/dist/providers/openai-responses.d.ts +1 -1
  109. package/dist/providers/openai-responses.d.ts.map +1 -1
  110. package/dist/providers/openai-responses.js +85 -40
  111. package/dist/providers/openai-responses.js.map +1 -1
  112. package/dist/providers/register-builtins.d.ts +10 -13
  113. package/dist/providers/register-builtins.d.ts.map +1 -1
  114. package/dist/providers/register-builtins.js +13 -20
  115. package/dist/providers/register-builtins.js.map +1 -1
  116. package/dist/providers/simple-options.d.ts +2 -2
  117. package/dist/providers/simple-options.d.ts.map +1 -1
  118. package/dist/providers/simple-options.js +8 -2
  119. package/dist/providers/simple-options.js.map +1 -1
  120. package/dist/providers/transform-messages.d.ts +1 -1
  121. package/dist/providers/transform-messages.d.ts.map +1 -1
  122. package/dist/providers/transform-messages.js +63 -34
  123. package/dist/providers/transform-messages.js.map +1 -1
  124. package/dist/session-resources.d.ts +4 -0
  125. package/dist/session-resources.d.ts.map +1 -0
  126. package/dist/session-resources.js +22 -0
  127. package/dist/session-resources.js.map +1 -0
  128. package/dist/stream.d.ts +3 -3
  129. package/dist/stream.d.ts.map +1 -1
  130. package/dist/stream.js +14 -2
  131. package/dist/stream.js.map +1 -1
  132. package/dist/types.d.ts +177 -14
  133. package/dist/types.d.ts.map +1 -1
  134. package/dist/types.js.map +1 -1
  135. package/dist/utils/abort-signals.d.ts +6 -0
  136. package/dist/utils/abort-signals.d.ts.map +1 -0
  137. package/dist/utils/abort-signals.js +34 -0
  138. package/dist/utils/abort-signals.js.map +1 -0
  139. package/dist/utils/diagnostics.d.ts +19 -0
  140. package/dist/utils/diagnostics.d.ts.map +1 -0
  141. package/dist/utils/diagnostics.js +25 -0
  142. package/dist/utils/diagnostics.js.map +1 -0
  143. package/dist/utils/event-stream.d.ts +3 -3
  144. package/dist/utils/event-stream.d.ts.map +1 -1
  145. package/dist/utils/event-stream.js +2 -2
  146. package/dist/utils/event-stream.js.map +1 -1
  147. package/dist/utils/headers.d.ts +2 -0
  148. package/dist/utils/headers.d.ts.map +1 -0
  149. package/dist/utils/headers.js +8 -0
  150. package/dist/utils/headers.js.map +1 -0
  151. package/dist/utils/json-parse.d.ts +8 -1
  152. package/dist/utils/json-parse.d.ts.map +1 -1
  153. package/dist/utils/json-parse.js +89 -5
  154. package/dist/utils/json-parse.js.map +1 -1
  155. package/dist/utils/node-http-proxy.d.ts +10 -0
  156. package/dist/utils/node-http-proxy.d.ts.map +1 -0
  157. package/dist/utils/node-http-proxy.js +97 -0
  158. package/dist/utils/node-http-proxy.js.map +1 -0
  159. package/dist/utils/oauth/anthropic.d.ts +1 -1
  160. package/dist/utils/oauth/anthropic.d.ts.map +1 -1
  161. package/dist/utils/oauth/anthropic.js +1 -1
  162. package/dist/utils/oauth/anthropic.js.map +1 -1
  163. package/dist/utils/oauth/device-code.d.ts +21 -0
  164. package/dist/utils/oauth/device-code.d.ts.map +1 -0
  165. package/dist/utils/oauth/device-code.js +56 -0
  166. package/dist/utils/oauth/device-code.js.map +1 -0
  167. package/dist/utils/oauth/github-copilot.d.ts +3 -3
  168. package/dist/utils/oauth/github-copilot.d.ts.map +1 -1
  169. package/dist/utils/oauth/github-copilot.js +58 -70
  170. package/dist/utils/oauth/github-copilot.js.map +1 -1
  171. package/dist/utils/oauth/index.d.ts +8 -11
  172. package/dist/utils/oauth/index.d.ts.map +1 -1
  173. package/dist/utils/oauth/index.js +2 -11
  174. package/dist/utils/oauth/index.js.map +1 -1
  175. package/dist/utils/oauth/openai-codex.d.ts +11 -2
  176. package/dist/utils/oauth/openai-codex.d.ts.map +1 -1
  177. package/dist/utils/oauth/openai-codex.js +187 -73
  178. package/dist/utils/oauth/openai-codex.js.map +1 -1
  179. package/dist/utils/oauth/types.d.ts +18 -1
  180. package/dist/utils/oauth/types.d.ts.map +1 -1
  181. package/dist/utils/oauth/types.js.map +1 -1
  182. package/dist/utils/overflow.d.ts +7 -3
  183. package/dist/utils/overflow.d.ts.map +1 -1
  184. package/dist/utils/overflow.js +25 -3
  185. package/dist/utils/overflow.js.map +1 -1
  186. package/dist/utils/typebox-helpers.d.ts +1 -1
  187. package/dist/utils/typebox-helpers.d.ts.map +1 -1
  188. package/dist/utils/typebox-helpers.js +1 -1
  189. package/dist/utils/typebox-helpers.js.map +1 -1
  190. package/dist/utils/validation.d.ts +1 -1
  191. package/dist/utils/validation.d.ts.map +1 -1
  192. package/dist/utils/validation.js +242 -41
  193. package/dist/utils/validation.js.map +1 -1
  194. package/package.json +14 -15
  195. package/dist/providers/google-gemini-cli.d.ts +0 -74
  196. package/dist/providers/google-gemini-cli.d.ts.map +0 -1
  197. package/dist/providers/google-gemini-cli.js +0 -776
  198. package/dist/providers/google-gemini-cli.js.map +0 -1
@@ -1,11 +1,13 @@
1
1
  import OpenAI from "openai";
2
- import { getEnvApiKey } from "../env-api-keys.js";
3
- import { calculateCost, supportsXhigh } from "../models.js";
2
+ import { calculateCost, clampThinkingLevel } from "../models.js";
4
3
  import { AssistantMessageEventStream } from "../utils/event-stream.js";
4
+ import { headersToRecord } from "../utils/headers.js";
5
5
  import { parseStreamingJson } from "../utils/json-parse.js";
6
6
  import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
7
+ import { isCloudflareProvider, resolveCloudflareBaseUrl } from "./cloudflare.js";
7
8
  import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
8
- import { buildBaseOptions, clampReasoning, clampToXhigh } from "./simple-options.js";
9
+ import { clampOpenAIPromptCacheKey } from "./openai-prompt-cache.js";
10
+ import { buildBaseOptions, clampToXhigh } from "./simple-options.js";
9
11
  import { transformMessages } from "./transform-messages.js";
10
12
  /**
11
13
  * Check if conversation messages contain tool calls or tool results.
@@ -25,6 +27,27 @@ function hasToolHistory(messages) {
25
27
  }
26
28
  return false;
27
29
  }
30
+ function isTextContentBlock(block) {
31
+ return block.type === "text";
32
+ }
33
+ function isThinkingContentBlock(block) {
34
+ return block.type === "thinking";
35
+ }
36
+ function isToolCallBlock(block) {
37
+ return block.type === "toolCall";
38
+ }
39
+ function isImageContentBlock(block) {
40
+ return block.type === "image";
41
+ }
42
+ function resolveCacheRetention(cacheRetention) {
43
+ if (cacheRetention) {
44
+ return cacheRetention;
45
+ }
46
+ if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
47
+ return "long";
48
+ }
49
+ return "short";
50
+ }
28
51
  export const streamOpenAICompletions = (model, context, options) => {
29
52
  const stream = new AssistantMessageEventStream();
30
53
  (async () => {
@@ -46,47 +69,127 @@ export const streamOpenAICompletions = (model, context, options) => {
46
69
  timestamp: Date.now(),
47
70
  };
48
71
  try {
49
- const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
50
- const client = createClient(model, context, apiKey, options?.headers);
51
- let params = buildParams(model, context, options);
72
+ const apiKey = options?.apiKey;
73
+ if (!apiKey) {
74
+ throw new Error(`No API key for provider: ${model.provider}`);
75
+ }
76
+ const compat = getCompat(model);
77
+ const cacheRetention = resolveCacheRetention(options?.cacheRetention);
78
+ const cacheSessionId = cacheRetention === "none" ? undefined : options?.sessionId;
79
+ const client = createClient(model, context, apiKey, options?.headers, cacheSessionId, compat);
80
+ let params = buildParams(model, context, options, compat, cacheRetention);
52
81
  const nextParams = await options?.onPayload?.(params, model);
53
82
  if (nextParams !== undefined) {
54
83
  params = nextParams;
55
84
  }
56
- const openaiStream = await client.chat.completions.create(params, { signal: options?.signal });
85
+ const requestOptions = {
86
+ ...(options?.signal ? { signal: options.signal } : {}),
87
+ ...(options?.timeoutMs !== undefined ? { timeout: options.timeoutMs } : {}),
88
+ maxRetries: options?.maxRetries ?? 0,
89
+ };
90
+ const { data: openaiStream, response } = await client.chat.completions
91
+ .create(params, requestOptions)
92
+ .withResponse();
93
+ await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
57
94
  stream.push({ type: "start", partial: output });
58
- let currentBlock = null;
95
+ let textBlock = null;
96
+ let thinkingBlock = null;
97
+ let hasFinishReason = false;
98
+ const toolCallBlocksByIndex = new Map();
99
+ const toolCallBlocksById = new Map();
59
100
  const blocks = output.content;
60
- const blockIndex = () => blocks.length - 1;
61
- const finishCurrentBlock = (block) => {
62
- if (block) {
63
- if (block.type === "text") {
64
- stream.push({
65
- type: "text_end",
66
- contentIndex: blockIndex(),
67
- content: block.text,
68
- partial: output,
69
- });
101
+ const getContentIndex = (block) => blocks.indexOf(block);
102
+ const finishBlock = (block) => {
103
+ const contentIndex = getContentIndex(block);
104
+ if (contentIndex === -1) {
105
+ return;
106
+ }
107
+ if (block.type === "text") {
108
+ stream.push({
109
+ type: "text_end",
110
+ contentIndex,
111
+ content: block.text,
112
+ partial: output,
113
+ });
114
+ }
115
+ else if (block.type === "thinking") {
116
+ stream.push({
117
+ type: "thinking_end",
118
+ contentIndex,
119
+ content: block.thinking,
120
+ partial: output,
121
+ });
122
+ }
123
+ else if (block.type === "toolCall") {
124
+ block.arguments = parseStreamingJson(block.partialArgs);
125
+ // Finalize in-place and strip the scratch buffers so replay only
126
+ // carries parsed arguments.
127
+ delete block.partialArgs;
128
+ delete block.streamIndex;
129
+ stream.push({
130
+ type: "toolcall_end",
131
+ contentIndex,
132
+ toolCall: block,
133
+ partial: output,
134
+ });
135
+ }
136
+ };
137
+ const ensureTextBlock = () => {
138
+ if (!textBlock) {
139
+ textBlock = { type: "text", text: "" };
140
+ blocks.push(textBlock);
141
+ stream.push({ type: "text_start", contentIndex: getContentIndex(textBlock), partial: output });
142
+ }
143
+ return textBlock;
144
+ };
145
+ const ensureThinkingBlock = (thinkingSignature) => {
146
+ if (!thinkingBlock) {
147
+ thinkingBlock = {
148
+ type: "thinking",
149
+ thinking: "",
150
+ thinkingSignature,
151
+ };
152
+ blocks.push(thinkingBlock);
153
+ stream.push({ type: "thinking_start", contentIndex: getContentIndex(thinkingBlock), partial: output });
154
+ }
155
+ return thinkingBlock;
156
+ };
157
+ const ensureToolCallBlock = (toolCall) => {
158
+ const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
159
+ let block = streamIndex !== undefined ? toolCallBlocksByIndex.get(streamIndex) : undefined;
160
+ if (!block && toolCall.id) {
161
+ block = toolCallBlocksById.get(toolCall.id);
162
+ }
163
+ if (!block) {
164
+ block = {
165
+ type: "toolCall",
166
+ id: toolCall.id || "",
167
+ name: toolCall.function?.name || "",
168
+ arguments: {},
169
+ partialArgs: "",
170
+ streamIndex,
171
+ };
172
+ if (streamIndex !== undefined) {
173
+ toolCallBlocksByIndex.set(streamIndex, block);
70
174
  }
71
- else if (block.type === "thinking") {
72
- stream.push({
73
- type: "thinking_end",
74
- contentIndex: blockIndex(),
75
- content: block.thinking,
76
- partial: output,
77
- });
78
- }
79
- else if (block.type === "toolCall") {
80
- block.arguments = parseStreamingJson(block.partialArgs);
81
- delete block.partialArgs;
82
- stream.push({
83
- type: "toolcall_end",
84
- contentIndex: blockIndex(),
85
- toolCall: block,
86
- partial: output,
87
- });
175
+ if (toolCall.id) {
176
+ toolCallBlocksById.set(toolCall.id, block);
88
177
  }
178
+ blocks.push(block);
179
+ stream.push({
180
+ type: "toolcall_start",
181
+ contentIndex: getContentIndex(block),
182
+ partial: output,
183
+ });
184
+ }
185
+ if (streamIndex !== undefined && block.streamIndex === undefined) {
186
+ block.streamIndex = streamIndex;
187
+ toolCallBlocksByIndex.set(streamIndex, block);
188
+ }
189
+ if (toolCall.id) {
190
+ toolCallBlocksById.set(toolCall.id, block);
89
191
  }
192
+ return block;
90
193
  };
91
194
  for await (const chunk of openaiStream) {
92
195
  if (!chunk || typeof chunk !== "object")
@@ -94,6 +197,9 @@ export const streamOpenAICompletions = (model, context, options) => {
94
197
  // OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,
95
198
  // and each chunk in a streamed completion carries the same id.
96
199
  output.responseId ||= chunk.id;
200
+ if (typeof chunk.model === "string" && chunk.model.length > 0 && chunk.model !== model.id) {
201
+ output.responseModel ||= chunk.model;
202
+ }
97
203
  if (chunk.usage) {
98
204
  output.usage = parseChunkUsage(chunk.usage, model);
99
205
  }
@@ -111,60 +217,46 @@ export const streamOpenAICompletions = (model, context, options) => {
111
217
  if (finishReasonResult.errorMessage) {
112
218
  output.errorMessage = finishReasonResult.errorMessage;
113
219
  }
220
+ hasFinishReason = true;
114
221
  }
115
222
  if (choice.delta) {
116
223
  if (choice.delta.content !== null &&
117
224
  choice.delta.content !== undefined &&
118
225
  choice.delta.content.length > 0) {
119
- if (!currentBlock || currentBlock.type !== "text") {
120
- finishCurrentBlock(currentBlock);
121
- currentBlock = { type: "text", text: "" };
122
- output.content.push(currentBlock);
123
- stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
124
- }
125
- if (currentBlock.type === "text") {
126
- currentBlock.text += choice.delta.content;
127
- stream.push({
128
- type: "text_delta",
129
- contentIndex: blockIndex(),
130
- delta: choice.delta.content,
131
- partial: output,
132
- });
133
- }
226
+ const block = ensureTextBlock();
227
+ block.text += choice.delta.content;
228
+ stream.push({
229
+ type: "text_delta",
230
+ contentIndex: getContentIndex(block),
231
+ delta: choice.delta.content,
232
+ partial: output,
233
+ });
134
234
  }
135
235
  // Some endpoints return reasoning in reasoning_content (llama.cpp),
136
236
  // or reasoning (other openai compatible endpoints)
137
237
  // Use the first non-empty reasoning field to avoid duplication
138
238
  // (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
139
239
  const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
240
+ const deltaFields = choice.delta;
140
241
  let foundReasoningField = null;
141
242
  for (const field of reasoningFields) {
142
- if (choice.delta[field] !== null &&
143
- choice.delta[field] !== undefined &&
144
- choice.delta[field].length > 0) {
145
- if (!foundReasoningField) {
146
- foundReasoningField = field;
147
- break;
148
- }
243
+ const value = deltaFields[field];
244
+ if (typeof value === "string" && value.length > 0) {
245
+ foundReasoningField = field;
246
+ break;
149
247
  }
150
248
  }
151
249
  if (foundReasoningField) {
152
- if (!currentBlock || currentBlock.type !== "thinking") {
153
- finishCurrentBlock(currentBlock);
154
- currentBlock = {
155
- type: "thinking",
156
- thinking: "",
157
- thinkingSignature: foundReasoningField,
158
- };
159
- output.content.push(currentBlock);
160
- stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
161
- }
162
- if (currentBlock.type === "thinking") {
163
- const delta = choice.delta[foundReasoningField];
164
- currentBlock.thinking += delta;
250
+ const delta = deltaFields[foundReasoningField];
251
+ if (typeof delta === "string" && delta.length > 0) {
252
+ const thinkingSignature = model.provider === "opencode-go" && foundReasoningField === "reasoning"
253
+ ? "reasoning_content"
254
+ : foundReasoningField;
255
+ const block = ensureThinkingBlock(thinkingSignature);
256
+ block.thinking += delta;
165
257
  stream.push({
166
258
  type: "thinking_delta",
167
- contentIndex: blockIndex(),
259
+ contentIndex: getContentIndex(block),
168
260
  delta,
169
261
  partial: output,
170
262
  });
@@ -172,38 +264,26 @@ export const streamOpenAICompletions = (model, context, options) => {
172
264
  }
173
265
  if (choice?.delta?.tool_calls) {
174
266
  for (const toolCall of choice.delta.tool_calls) {
175
- if (!currentBlock ||
176
- currentBlock.type !== "toolCall" ||
177
- (toolCall.id && currentBlock.id !== toolCall.id)) {
178
- finishCurrentBlock(currentBlock);
179
- currentBlock = {
180
- type: "toolCall",
181
- id: toolCall.id || "",
182
- name: toolCall.function?.name || "",
183
- arguments: {},
184
- partialArgs: "",
185
- };
186
- output.content.push(currentBlock);
187
- stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
267
+ const block = ensureToolCallBlock(toolCall);
268
+ if (!block.id && toolCall.id) {
269
+ block.id = toolCall.id;
270
+ toolCallBlocksById.set(toolCall.id, block);
188
271
  }
189
- if (currentBlock.type === "toolCall") {
190
- if (toolCall.id)
191
- currentBlock.id = toolCall.id;
192
- if (toolCall.function?.name)
193
- currentBlock.name = toolCall.function.name;
194
- let delta = "";
195
- if (toolCall.function?.arguments) {
196
- delta = toolCall.function.arguments;
197
- currentBlock.partialArgs += toolCall.function.arguments;
198
- currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
199
- }
200
- stream.push({
201
- type: "toolcall_delta",
202
- contentIndex: blockIndex(),
203
- delta,
204
- partial: output,
205
- });
272
+ if (!block.name && toolCall.function?.name) {
273
+ block.name = toolCall.function.name;
274
+ }
275
+ let delta = "";
276
+ if (toolCall.function?.arguments) {
277
+ delta = toolCall.function.arguments;
278
+ block.partialArgs = (block.partialArgs ?? "") + toolCall.function.arguments;
279
+ block.arguments = parseStreamingJson(block.partialArgs);
206
280
  }
281
+ stream.push({
282
+ type: "toolcall_delta",
283
+ contentIndex: getContentIndex(block),
284
+ delta,
285
+ partial: output,
286
+ });
207
287
  }
208
288
  }
209
289
  const reasoningDetails = choice.delta.reasoning_details;
@@ -219,7 +299,9 @@ export const streamOpenAICompletions = (model, context, options) => {
219
299
  }
220
300
  }
221
301
  }
222
- finishCurrentBlock(currentBlock);
302
+ for (const block of blocks) {
303
+ finishBlock(block);
304
+ }
223
305
  if (options?.signal?.aborted) {
224
306
  throw new Error("Request was aborted");
225
307
  }
@@ -229,12 +311,19 @@ export const streamOpenAICompletions = (model, context, options) => {
229
311
  if (output.stopReason === "error") {
230
312
  throw new Error(output.errorMessage || "Provider returned an error stop reason");
231
313
  }
314
+ if (!hasFinishReason) {
315
+ throw new Error("Stream ended without finish_reason");
316
+ }
232
317
  stream.push({ type: "done", reason: output.stopReason, message: output });
233
318
  stream.end();
234
319
  }
235
320
  catch (error) {
236
- for (const block of output.content)
321
+ for (const block of output.content) {
237
322
  delete block.index;
323
+ // Streaming scratch buffers are only used during parsing; never persist them.
324
+ delete block.partialArgs;
325
+ delete block.streamIndex;
326
+ }
238
327
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
239
328
  output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
240
329
  // Some providers via OpenRouter give additional information in this field.
@@ -248,12 +337,14 @@ export const streamOpenAICompletions = (model, context, options) => {
248
337
  return stream;
249
338
  };
250
339
  export const streamSimpleOpenAICompletions = (model, context, options) => {
251
- const apiKey = options?.apiKey || getEnvApiKey(model.provider);
340
+ const apiKey = options?.apiKey;
252
341
  if (!apiKey) {
253
342
  throw new Error(`No API key for provider: ${model.provider}`);
254
343
  }
255
344
  const base = buildBaseOptions(model, options, apiKey);
256
- const reasoningEffort = supportsXhigh(model) ? clampToXhigh(options?.reasoning) : clampReasoning(options?.reasoning);
345
+ const rawLevel = options?.reasoning ? clampThinkingLevel(model, options.reasoning) : undefined;
346
+ const clampedReasoning = rawLevel === "off" ? rawLevel : clampToXhigh(rawLevel);
347
+ const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
257
348
  const toolChoice = options?.toolChoice;
258
349
  return streamOpenAICompletions(model, context, {
259
350
  ...base,
@@ -261,13 +352,7 @@ export const streamSimpleOpenAICompletions = (model, context, options) => {
261
352
  toolChoice,
262
353
  });
263
354
  };
264
- function createClient(model, context, apiKey, optionsHeaders) {
265
- if (!apiKey) {
266
- if (!process.env.OPENAI_API_KEY) {
267
- throw new Error("OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.");
268
- }
269
- apiKey = process.env.OPENAI_API_KEY;
270
- }
355
+ function createClient(model, context, apiKey, optionsHeaders, sessionId, compat = getCompat(model)) {
271
356
  const headers = { ...model.headers };
272
357
  if (model.provider === "github-copilot") {
273
358
  const hasImages = hasCopilotVisionInput(context.messages);
@@ -277,25 +362,41 @@ function createClient(model, context, apiKey, optionsHeaders) {
277
362
  });
278
363
  Object.assign(headers, copilotHeaders);
279
364
  }
365
+ if (sessionId && compat.sendSessionAffinityHeaders) {
366
+ headers.session_id = sessionId;
367
+ headers["x-client-request-id"] = sessionId;
368
+ headers["x-session-affinity"] = sessionId;
369
+ }
280
370
  // Merge options headers last so they can override defaults
281
371
  if (optionsHeaders) {
282
372
  Object.assign(headers, optionsHeaders);
283
373
  }
374
+ const defaultHeaders = model.provider === "cloudflare-ai-gateway"
375
+ ? {
376
+ ...headers,
377
+ Authorization: headers.Authorization ?? null,
378
+ "cf-aig-authorization": `Bearer ${apiKey}`,
379
+ }
380
+ : headers;
284
381
  return new OpenAI({
285
382
  apiKey,
286
- baseURL: model.baseUrl,
383
+ baseURL: isCloudflareProvider(model.provider) ? resolveCloudflareBaseUrl(model) : model.baseUrl,
287
384
  dangerouslyAllowBrowser: true,
288
- defaultHeaders: headers,
385
+ defaultHeaders,
289
386
  });
290
387
  }
291
- function buildParams(model, context, options) {
292
- const compat = getCompat(model);
388
+ function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
293
389
  const messages = convertMessages(model, context, compat);
294
- maybeAddOpenRouterAnthropicCacheControl(model, messages);
390
+ const cacheControl = getCompatCacheControl(compat, cacheRetention);
295
391
  const params = {
296
392
  model: model.id,
297
393
  messages,
298
394
  stream: true,
395
+ prompt_cache_key: (model.baseUrl.includes("api.openai.com") && cacheRetention !== "none") ||
396
+ (cacheRetention === "long" && compat.supportsLongCacheRetention)
397
+ ? clampOpenAIPromptCacheKey(options?.sessionId)
398
+ : undefined,
399
+ prompt_cache_retention: cacheRetention === "long" && compat.supportsLongCacheRetention ? "24h" : undefined,
299
400
  };
300
401
  if (compat.supportsUsageInStreaming !== false) {
301
402
  params.stream_options = { include_usage: true };
@@ -314,7 +415,7 @@ function buildParams(model, context, options) {
314
415
  if (options?.temperature !== undefined) {
315
416
  params.temperature = options.temperature;
316
417
  }
317
- if (context.tools) {
418
+ if (context.tools && context.tools.length > 0) {
318
419
  params.tools = convertTools(context.tools, compat);
319
420
  if (compat.zaiToolStream) {
320
421
  params.tool_stream = true;
@@ -324,36 +425,78 @@ function buildParams(model, context, options) {
324
425
  // Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
325
426
  params.tools = [];
326
427
  }
428
+ if (cacheControl) {
429
+ applyAnthropicCacheControl(messages, params.tools, cacheControl);
430
+ }
327
431
  if (options?.toolChoice) {
328
432
  params.tool_choice = options.toolChoice;
329
433
  }
330
434
  if (compat.thinkingFormat === "zai" && model.reasoning) {
331
- params.enable_thinking = !!options?.reasoningEffort;
435
+ const zaiParams = params;
436
+ zaiParams.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
332
437
  }
333
438
  else if (compat.thinkingFormat === "qwen" && model.reasoning) {
334
439
  params.enable_thinking = !!options?.reasoningEffort;
335
440
  }
336
441
  else if (compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
337
- params.chat_template_kwargs = { enable_thinking: !!options?.reasoningEffort };
442
+ params.chat_template_kwargs = {
443
+ enable_thinking: !!options?.reasoningEffort,
444
+ preserve_thinking: true,
445
+ };
446
+ }
447
+ else if (compat.thinkingFormat === "deepseek" && model.reasoning) {
448
+ params.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
449
+ if (options?.reasoningEffort && compat.supportsReasoningEffort) {
450
+ params.reasoning_effort =
451
+ model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
452
+ }
338
453
  }
339
454
  else if (compat.thinkingFormat === "openrouter" && model.reasoning) {
340
455
  // OpenRouter normalizes reasoning across providers via a nested reasoning object.
341
456
  const openRouterParams = params;
342
457
  if (options?.reasoningEffort) {
343
458
  openRouterParams.reasoning = {
344
- effort: mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap),
459
+ effort: model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort,
345
460
  };
346
461
  }
347
- else {
348
- openRouterParams.reasoning = { effort: "none" };
462
+ else if (model.thinkingLevelMap?.off !== null) {
463
+ openRouterParams.reasoning = { effort: model.thinkingLevelMap?.off ?? "none" };
464
+ }
465
+ }
466
+ else if (compat.thinkingFormat === "ant-ling" && model.reasoning && options?.reasoningEffort) {
467
+ const effort = model.thinkingLevelMap?.[options.reasoningEffort];
468
+ if (typeof effort === "string") {
469
+ params.reasoning = { effort };
470
+ }
471
+ }
472
+ else if (compat.thinkingFormat === "together" && model.reasoning) {
473
+ const togetherParams = params;
474
+ togetherParams.reasoning = { enabled: !!options?.reasoningEffort };
475
+ if (options?.reasoningEffort && compat.supportsReasoningEffort) {
476
+ togetherParams.reasoning_effort = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
477
+ }
478
+ }
479
+ else if (compat.thinkingFormat === "string-thinking" && model.reasoning) {
480
+ const stringThinkingParams = params;
481
+ if (options?.reasoningEffort) {
482
+ stringThinkingParams.thinking = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
483
+ }
484
+ else if (model.thinkingLevelMap?.off !== null) {
485
+ stringThinkingParams.thinking = model.thinkingLevelMap?.off ?? "none";
349
486
  }
350
487
  }
351
488
  else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
352
489
  // OpenAI-style reasoning_effort
353
- params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
490
+ params.reasoning_effort = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
491
+ }
492
+ else if (!options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
493
+ const offValue = model.thinkingLevelMap?.off;
494
+ if (typeof offValue === "string") {
495
+ params.reasoning_effort = offValue;
496
+ }
354
497
  }
355
498
  // OpenRouter provider routing preferences
356
- if (model.baseUrl.includes("openrouter.ai") && model.compat?.openRouterRouting) {
499
+ if (model.compat?.openRouterRouting) {
357
500
  params.provider = model.compat.openRouterRouting;
358
501
  }
359
502
  // Vercel AI Gateway provider routing preferences
@@ -370,37 +513,80 @@ function buildParams(model, context, options) {
370
513
  }
371
514
  return params;
372
515
  }
373
- function mapReasoningEffort(effort, reasoningEffortMap) {
374
- return reasoningEffortMap[effort] ?? effort;
516
+ function getCompatCacheControl(compat, cacheRetention) {
517
+ if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
518
+ return undefined;
519
+ }
520
+ const ttl = cacheRetention === "long" && compat.supportsLongCacheRetention ? "1h" : undefined;
521
+ return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
375
522
  }
376
- function maybeAddOpenRouterAnthropicCacheControl(model, messages) {
377
- if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/"))
378
- return;
379
- // Anthropic-style caching requires cache_control on a text part. Add a breakpoint
380
- // on the last user/assistant message (walking backwards until we find text content).
381
- for (let i = messages.length - 1; i >= 0; i--) {
382
- const msg = messages[i];
383
- if (msg.role !== "user" && msg.role !== "assistant")
384
- continue;
385
- const content = msg.content;
386
- if (typeof content === "string") {
387
- msg.content = [
388
- Object.assign({ type: "text", text: content }, { cache_control: { type: "ephemeral" } }),
389
- ];
523
+ function applyAnthropicCacheControl(messages, tools, cacheControl) {
524
+ addCacheControlToSystemPrompt(messages, cacheControl);
525
+ addCacheControlToLastTool(tools, cacheControl);
526
+ addCacheControlToLastConversationMessage(messages, cacheControl);
527
+ }
528
+ function addCacheControlToSystemPrompt(messages, cacheControl) {
529
+ for (const message of messages) {
530
+ if (message.role === "system" || message.role === "developer") {
531
+ addCacheControlToInstructionMessage(message, cacheControl);
390
532
  return;
391
533
  }
392
- if (!Array.isArray(content))
393
- continue;
394
- // Find last text part and add cache_control
395
- for (let j = content.length - 1; j >= 0; j--) {
396
- const part = content[j];
397
- if (part?.type === "text") {
398
- Object.assign(part, { cache_control: { type: "ephemeral" } });
534
+ }
535
+ }
536
+ function addCacheControlToLastConversationMessage(messages, cacheControl) {
537
+ for (let i = messages.length - 1; i >= 0; i--) {
538
+ const message = messages[i];
539
+ if (message.role === "user" || message.role === "assistant") {
540
+ if (addCacheControlToMessage(message, cacheControl)) {
399
541
  return;
400
542
  }
401
543
  }
402
544
  }
403
545
  }
546
+ function addCacheControlToLastTool(tools, cacheControl) {
547
+ if (!tools || tools.length === 0) {
548
+ return;
549
+ }
550
+ const lastTool = tools[tools.length - 1];
551
+ lastTool.cache_control = cacheControl;
552
+ }
553
+ function addCacheControlToInstructionMessage(message, cacheControl) {
554
+ return addCacheControlToTextContent(message, cacheControl);
555
+ }
556
+ function addCacheControlToMessage(message, cacheControl) {
557
+ if (message.role === "user" || message.role === "assistant") {
558
+ return addCacheControlToTextContent(message, cacheControl);
559
+ }
560
+ return false;
561
+ }
562
+ function addCacheControlToTextContent(message, cacheControl) {
563
+ const content = message.content;
564
+ if (typeof content === "string") {
565
+ if (content.length === 0) {
566
+ return false;
567
+ }
568
+ message.content = [
569
+ {
570
+ type: "text",
571
+ text: content,
572
+ cache_control: cacheControl,
573
+ },
574
+ ];
575
+ return true;
576
+ }
577
+ if (!Array.isArray(content)) {
578
+ return false;
579
+ }
580
+ for (let i = content.length - 1; i >= 0; i--) {
581
+ const part = content[i];
582
+ if (part?.type === "text") {
583
+ const textPart = part;
584
+ textPart.cache_control = cacheControl;
585
+ return true;
586
+ }
587
+ }
588
+ return false;
589
+ }
404
590
  export function convertMessages(model, context, compat) {
405
591
  const params = [];
406
592
  const normalizeToolCallId = (id) => {
@@ -458,14 +644,11 @@ export function convertMessages(model, context, compat) {
458
644
  };
459
645
  }
460
646
  });
461
- const filteredContent = !model.input.includes("image")
462
- ? content.filter((c) => c.type !== "image_url")
463
- : content;
464
- if (filteredContent.length === 0)
647
+ if (content.length === 0)
465
648
  continue;
466
649
  params.push({
467
650
  role: "user",
468
- content: filteredContent,
651
+ content,
469
652
  });
470
653
  }
471
654
  }
@@ -475,48 +658,53 @@ export function convertMessages(model, context, compat) {
475
658
  role: "assistant",
476
659
  content: compat.requiresAssistantAfterToolResult ? "" : null,
477
660
  };
478
- const textBlocks = msg.content.filter((b) => b.type === "text");
479
- // Filter out empty text blocks to avoid API validation errors
480
- const nonEmptyTextBlocks = textBlocks.filter((b) => b.text && b.text.trim().length > 0);
481
- if (nonEmptyTextBlocks.length > 0) {
482
- // Always send assistant content as a plain string (OpenAI Chat Completions
483
- // API standard format). Sending as an array of {type:"text", text:"..."}
484
- // objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
485
- // NVIDIA NIM) to mirror the content-block structure literally in their
486
- // output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
487
- assistantMsg.content = nonEmptyTextBlocks.map((b) => sanitizeSurrogates(b.text)).join("");
488
- }
489
- // Handle thinking blocks
490
- const thinkingBlocks = msg.content.filter((b) => b.type === "thinking");
491
- // Filter out empty thinking blocks to avoid API validation errors
492
- const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
493
- if (compat.requiresThinkingAsText) {
494
- if (nonEmptyThinkingBlocks.length > 0) {
661
+ const assistantTextParts = msg.content
662
+ .filter(isTextContentBlock)
663
+ .filter((block) => block.text.trim().length > 0)
664
+ .map((block) => ({
665
+ type: "text",
666
+ text: sanitizeSurrogates(block.text),
667
+ }));
668
+ const assistantText = assistantTextParts.map((part) => part.text).join("");
669
+ const nonEmptyThinkingBlocks = msg.content
670
+ .filter(isThinkingContentBlock)
671
+ .filter((block) => block.thinking.trim().length > 0);
672
+ if (nonEmptyThinkingBlocks.length > 0) {
673
+ if (compat.requiresThinkingAsText) {
495
674
  // Convert thinking blocks to plain text (no tags to avoid model mimicking them)
496
- const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
497
- const textContent = assistantMsg.content;
498
- if (textContent) {
499
- textContent.unshift({ type: "text", text: thinkingText });
675
+ const thinkingText = nonEmptyThinkingBlocks
676
+ .map((block) => sanitizeSurrogates(block.thinking))
677
+ .join("\n\n");
678
+ assistantMsg.content = [{ type: "text", text: thinkingText }, ...assistantTextParts];
679
+ }
680
+ else {
681
+ // Always send assistant content as a plain string (OpenAI Chat Completions
682
+ // API standard format). Sending as an array of {type:"text", text:"..."}
683
+ // objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
684
+ // NVIDIA NIM) to mirror the content-block structure literally in their
685
+ // output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
686
+ if (assistantText.length > 0) {
687
+ assistantMsg.content = assistantText;
500
688
  }
501
- else {
502
- assistantMsg.content = [{ type: "text", text: thinkingText }];
689
+ // Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
690
+ let signature = nonEmptyThinkingBlocks[0].thinkingSignature;
691
+ if (model.provider === "opencode-go" && signature === "reasoning") {
692
+ signature = "reasoning_content";
693
+ }
694
+ if (signature && signature.length > 0) {
695
+ assistantMsg[signature] = nonEmptyThinkingBlocks.map((block) => block.thinking).join("\n");
503
696
  }
504
697
  }
505
698
  }
506
- else {
507
- // Replay the reasoning field (e.g. "reasoning_content" for llama.cpp,
508
- // gpt-oss, and DeepSeek reasoner) under its captured signature.
509
- // DeepSeek requires this field on every assistant turn produced under
510
- // thinking mode even if the captured reasoning text was empty —
511
- // otherwise the next request fails with:
512
- // 400: The `reasoning_content` in the thinking mode must be passed back to the API.
513
- const signature = thinkingBlocks.find((b) => b.thinkingSignature && b.thinkingSignature.length > 0)?.thinkingSignature;
514
- if (signature) {
515
- const matchingBlocks = thinkingBlocks.filter((b) => b.thinkingSignature === signature);
516
- assistantMsg[signature] = matchingBlocks.map((b) => b.thinking || "").join("\n");
517
- }
699
+ else if (assistantText.length > 0) {
700
+ // Always send assistant content as a plain string (OpenAI Chat Completions
701
+ // API standard format). Sending as an array of {type:"text", text:"..."}
702
+ // objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
703
+ // NVIDIA NIM) to mirror the content-block structure literally in their
704
+ // output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
705
+ assistantMsg.content = assistantText;
518
706
  }
519
- const toolCalls = msg.content.filter((b) => b.type === "toolCall");
707
+ const toolCalls = msg.content.filter(isToolCallBlock);
520
708
  if (toolCalls.length > 0) {
521
709
  assistantMsg.tool_calls = toolCalls.map((tc) => ({
522
710
  id: tc.id,
@@ -541,6 +729,11 @@ export function convertMessages(model, context, compat) {
541
729
  assistantMsg.reasoning_details = reasoningDetails;
542
730
  }
543
731
  }
732
+ if (compat.requiresReasoningContentOnAssistantMessages &&
733
+ model.reasoning &&
734
+ assistantMsg.reasoning_content === undefined) {
735
+ assistantMsg.reasoning_content = "";
736
+ }
544
737
  // Skip assistant messages that have no content and no tool calls.
545
738
  // Some providers require "either content or tool_calls, but not none".
546
739
  // Other providers also don't accept empty assistant messages.
@@ -549,8 +742,7 @@ export function convertMessages(model, context, compat) {
549
742
  const hasContent = content !== null &&
550
743
  content !== undefined &&
551
744
  (typeof content === "string" ? content.length > 0 : content.length > 0);
552
- const hasReasoning = thinkingBlocks.some((b) => b.thinkingSignature && b.thinkingSignature.length > 0);
553
- if (!hasContent && !hasReasoning && !assistantMsg.tool_calls) {
745
+ if (!hasContent && !assistantMsg.tool_calls) {
554
746
  continue;
555
747
  }
556
748
  params.push(assistantMsg);
@@ -562,8 +754,8 @@ export function convertMessages(model, context, compat) {
562
754
  const toolMsg = transformedMessages[j];
563
755
  // Extract text and image content
564
756
  const textResult = toolMsg.content
565
- .filter((c) => c.type === "text")
566
- .map((c) => c.text)
757
+ .filter(isTextContentBlock)
758
+ .map((block) => block.text)
567
759
  .join("\n");
568
760
  const hasImages = toolMsg.content.some((c) => c.type === "image");
569
761
  // Always send tool result with text (or placeholder if only images)
@@ -580,7 +772,7 @@ export function convertMessages(model, context, compat) {
580
772
  params.push(toolResultMsg);
581
773
  if (hasImages && model.input.includes("image")) {
582
774
  for (const block of toolMsg.content) {
583
- if (block.type === "image") {
775
+ if (isImageContentBlock(block)) {
584
776
  imageBlocks.push({
585
777
  type: "image_url",
586
778
  image_url: {
@@ -634,19 +826,19 @@ function convertTools(tools, compat) {
634
826
  }
635
827
  function parseChunkUsage(rawUsage, model) {
636
828
  const promptTokens = rawUsage.prompt_tokens || 0;
637
- const reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens || 0;
829
+ const cacheReadTokens = rawUsage.prompt_tokens_details?.cached_tokens ?? rawUsage.prompt_cache_hit_tokens ?? 0;
638
830
  const cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;
639
- const reasoningTokens = rawUsage.completion_tokens_details?.reasoning_tokens || 0;
640
- // Normalize to @draht/ai semantics:
641
- // - cacheRead: hits from cache created by previous requests only
642
- // - cacheWrite: tokens written to cache in this request
643
- // Some OpenAI-compatible providers (observed on OpenRouter) report cached_tokens
644
- // as (previous hits + current writes). In that case, remove cacheWrite from cacheRead.
645
- const cacheReadTokens = cacheWriteTokens > 0 ? Math.max(0, reportedCachedTokens - cacheWriteTokens) : reportedCachedTokens;
831
+ // Follow documented OpenAI/OpenRouter semantics: cached_tokens is cache-read
832
+ // tokens (hits). OpenAI does not document or emit cache_write_tokens, but
833
+ // OpenRouter-compatible providers can include it as a separate write count.
834
+ // OpenRouter's own provider/tests affirm the separate mapping:
835
+ // https://github.com/OpenRouterTeam/ai-sdk-provider/pull/409
836
+ // Do not subtract writes from cached_tokens, otherwise spec-compliant
837
+ // providers are under-reported. DS4 mirrors this contract too:
838
+ // https://github.com/antirez/ds4/pull/29
646
839
  const input = Math.max(0, promptTokens - cacheReadTokens - cacheWriteTokens);
647
- // Compute totalTokens ourselves since we add reasoning_tokens to output
648
- // and some providers (e.g., Groq) don't include them in total_tokens
649
- const outputTokens = (rawUsage.completion_tokens || 0) + reasoningTokens;
840
+ // OpenAI completion_tokens already includes reasoning_tokens.
841
+ const outputTokens = rawUsage.completion_tokens || 0;
650
842
  const usage = {
651
843
  input,
652
844
  output: outputTokens,
@@ -689,47 +881,69 @@ function mapStopReason(reason) {
689
881
  function detectCompat(model) {
690
882
  const provider = model.provider;
691
883
  const baseUrl = model.baseUrl;
692
- const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
693
- const isNonStandard = provider === "cerebras" ||
884
+ const isZai = provider === "zai" ||
885
+ provider === "zai-coding-cn" ||
886
+ baseUrl.includes("api.z.ai") ||
887
+ baseUrl.includes("open.bigmodel.cn");
888
+ const isTogether = provider === "together" || baseUrl.includes("api.together.ai") || baseUrl.includes("api.together.xyz");
889
+ const isMoonshot = provider === "moonshotai" || provider === "moonshotai-cn" || baseUrl.includes("api.moonshot.");
890
+ const isOpenRouter = provider === "openrouter" || baseUrl.includes("openrouter.ai");
891
+ const isCloudflareWorkersAI = provider === "cloudflare-workers-ai" || baseUrl.includes("api.cloudflare.com");
892
+ const isCloudflareAiGateway = provider === "cloudflare-ai-gateway" || baseUrl.includes("gateway.ai.cloudflare.com");
893
+ const isNvidia = provider === "nvidia" || baseUrl.includes("integrate.api.nvidia.com");
894
+ const isAntLing = provider === "ant-ling" || baseUrl.includes("api.ant-ling.com");
895
+ const isNonStandard = isNvidia ||
896
+ provider === "cerebras" ||
694
897
  baseUrl.includes("cerebras.ai") ||
695
898
  provider === "xai" ||
696
899
  baseUrl.includes("api.x.ai") ||
900
+ isTogether ||
697
901
  baseUrl.includes("chutes.ai") ||
698
902
  baseUrl.includes("deepseek.com") ||
699
903
  isZai ||
904
+ isMoonshot ||
700
905
  provider === "opencode" ||
701
- baseUrl.includes("opencode.ai");
702
- const useMaxTokens = baseUrl.includes("chutes.ai");
906
+ baseUrl.includes("opencode.ai") ||
907
+ isCloudflareWorkersAI ||
908
+ isCloudflareAiGateway ||
909
+ isAntLing;
910
+ const useMaxTokens = baseUrl.includes("chutes.ai") || isMoonshot || isCloudflareAiGateway || isTogether || isNvidia || isAntLing;
703
911
  const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
704
- const isGroq = provider === "groq" || baseUrl.includes("groq.com");
705
- const reasoningEffortMap = isGroq && model.id === "qwen/qwen3-32b"
706
- ? {
707
- minimal: "default",
708
- low: "default",
709
- medium: "default",
710
- high: "default",
711
- xhigh: "default",
712
- }
713
- : {};
912
+ const isDeepSeek = provider === "deepseek" || baseUrl.includes("deepseek.com");
913
+ const isOpenRouterDeveloperRoleModel = isOpenRouter && (model.id.startsWith("anthropic/") || model.id.startsWith("openai/"));
914
+ const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
714
915
  return {
715
916
  supportsStore: !isNonStandard,
716
- supportsDeveloperRole: !isNonStandard,
717
- supportsReasoningEffort: !isGrok && !isZai,
718
- reasoningEffortMap,
917
+ supportsDeveloperRole: isOpenRouterDeveloperRoleModel || (!isNonStandard && !isOpenRouter),
918
+ supportsReasoningEffort: !isGrok && !isZai && !isMoonshot && !isTogether && !isCloudflareAiGateway && !isNvidia && !isAntLing,
719
919
  supportsUsageInStreaming: true,
720
920
  maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
721
921
  requiresToolResultName: false,
722
922
  requiresAssistantAfterToolResult: false,
723
923
  requiresThinkingAsText: false,
724
- thinkingFormat: isZai
725
- ? "zai"
726
- : provider === "openrouter" || baseUrl.includes("openrouter.ai")
727
- ? "openrouter"
728
- : "openai",
924
+ requiresReasoningContentOnAssistantMessages: isDeepSeek,
925
+ thinkingFormat: isDeepSeek
926
+ ? "deepseek"
927
+ : isZai
928
+ ? "zai"
929
+ : isTogether
930
+ ? "together"
931
+ : isAntLing
932
+ ? "ant-ling"
933
+ : isOpenRouter
934
+ ? "openrouter"
935
+ : "openai",
729
936
  openRouterRouting: {},
730
937
  vercelGatewayRouting: {},
731
938
  zaiToolStream: false,
732
- supportsStrictMode: true,
939
+ supportsStrictMode: !isMoonshot && !isTogether && !isCloudflareAiGateway && !isNvidia,
940
+ cacheControlFormat,
941
+ sendSessionAffinityHeaders: false,
942
+ supportsLongCacheRetention: !(isTogether ||
943
+ isCloudflareWorkersAI ||
944
+ isCloudflareAiGateway ||
945
+ isNvidia ||
946
+ isAntLing),
733
947
  };
734
948
  }
735
949
  /**
@@ -744,17 +958,21 @@ function getCompat(model) {
744
958
  supportsStore: model.compat.supportsStore ?? detected.supportsStore,
745
959
  supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
746
960
  supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
747
- reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
748
961
  supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
749
962
  maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
750
963
  requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
751
964
  requiresAssistantAfterToolResult: model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
752
965
  requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
966
+ requiresReasoningContentOnAssistantMessages: model.compat.requiresReasoningContentOnAssistantMessages ??
967
+ detected.requiresReasoningContentOnAssistantMessages,
753
968
  thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
754
969
  openRouterRouting: model.compat.openRouterRouting ?? {},
755
970
  vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
756
971
  zaiToolStream: model.compat.zaiToolStream ?? detected.zaiToolStream,
757
972
  supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
973
+ cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
974
+ sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
975
+ supportsLongCacheRetention: model.compat.supportsLongCacheRetention ?? detected.supportsLongCacheRetention,
758
976
  };
759
977
  }
760
978
  //# sourceMappingURL=openai-completions.js.map