phi-code-ai 0.56.3 → 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/README.md +258 -73
  2. package/dist/api-registry.d.ts.map +1 -1
  3. package/dist/api-registry.js.map +1 -1
  4. package/dist/bedrock-provider.d.ts.map +1 -1
  5. package/dist/cli.d.ts.map +1 -1
  6. package/dist/cli.js +1 -1
  7. package/dist/cli.js.map +1 -1
  8. package/dist/env-api-keys.d.ts +9 -0
  9. package/dist/env-api-keys.d.ts.map +1 -1
  10. package/dist/env-api-keys.js +96 -30
  11. package/dist/env-api-keys.js.map +1 -1
  12. package/dist/image-models.d.ts +10 -0
  13. package/dist/image-models.d.ts.map +1 -0
  14. package/dist/image-models.generated.d.ts +305 -0
  15. package/dist/image-models.generated.d.ts.map +1 -0
  16. package/dist/image-models.generated.js +307 -0
  17. package/dist/image-models.generated.js.map +1 -0
  18. package/dist/image-models.js +23 -0
  19. package/dist/image-models.js.map +1 -0
  20. package/dist/images-api-registry.d.ts +14 -0
  21. package/dist/images-api-registry.d.ts.map +1 -0
  22. package/dist/images-api-registry.js +22 -0
  23. package/dist/images-api-registry.js.map +1 -0
  24. package/dist/images.d.ts +4 -0
  25. package/dist/images.d.ts.map +1 -0
  26. package/dist/images.js +14 -0
  27. package/dist/images.js.map +1 -0
  28. package/dist/index.d.ts +20 -11
  29. package/dist/index.d.ts.map +1 -1
  30. package/dist/index.js +8 -9
  31. package/dist/index.js.map +1 -1
  32. package/dist/models.d.ts +3 -9
  33. package/dist/models.d.ts.map +1 -1
  34. package/dist/models.generated.d.ts +6525 -2231
  35. package/dist/models.generated.d.ts.map +1 -1
  36. package/dist/models.generated.js +8992 -5524
  37. package/dist/models.generated.js.map +1 -1
  38. package/dist/models.js +28 -12
  39. package/dist/models.js.map +1 -1
  40. package/dist/oauth.d.ts.map +1 -1
  41. package/dist/providers/amazon-bedrock.d.ts +23 -0
  42. package/dist/providers/amazon-bedrock.d.ts.map +1 -1
  43. package/dist/providers/amazon-bedrock.js +206 -44
  44. package/dist/providers/amazon-bedrock.js.map +1 -1
  45. package/dist/providers/anthropic.d.ts +23 -2
  46. package/dist/providers/anthropic.d.ts.map +1 -1
  47. package/dist/providers/anthropic.js +294 -63
  48. package/dist/providers/anthropic.js.map +1 -1
  49. package/dist/providers/azure-openai-responses.d.ts.map +1 -1
  50. package/dist/providers/azure-openai-responses.js +47 -23
  51. package/dist/providers/azure-openai-responses.js.map +1 -1
  52. package/dist/providers/cloudflare.d.ts +13 -0
  53. package/dist/providers/cloudflare.d.ts.map +1 -0
  54. package/dist/providers/cloudflare.js +26 -0
  55. package/dist/providers/cloudflare.js.map +1 -0
  56. package/dist/providers/faux.d.ts +56 -0
  57. package/dist/providers/faux.d.ts.map +1 -0
  58. package/dist/providers/faux.js +368 -0
  59. package/dist/providers/faux.js.map +1 -0
  60. package/dist/providers/github-copilot-headers.d.ts.map +1 -1
  61. package/dist/providers/github-copilot-headers.js.map +1 -1
  62. package/dist/providers/google-shared.d.ts +7 -2
  63. package/dist/providers/google-shared.d.ts.map +1 -1
  64. package/dist/providers/google-shared.js +53 -24
  65. package/dist/providers/google-shared.js.map +1 -1
  66. package/dist/providers/google-vertex.d.ts +1 -1
  67. package/dist/providers/google-vertex.d.ts.map +1 -1
  68. package/dist/providers/google-vertex.js +87 -16
  69. package/dist/providers/google-vertex.js.map +1 -1
  70. package/dist/providers/google.d.ts +1 -1
  71. package/dist/providers/google.d.ts.map +1 -1
  72. package/dist/providers/google.js +57 -9
  73. package/dist/providers/google.js.map +1 -1
  74. package/dist/providers/images/openrouter.d.ts +3 -0
  75. package/dist/providers/images/openrouter.d.ts.map +1 -0
  76. package/dist/providers/images/openrouter.js +129 -0
  77. package/dist/providers/images/openrouter.js.map +1 -0
  78. package/dist/providers/images/register-builtins.d.ts +4 -0
  79. package/dist/providers/images/register-builtins.d.ts.map +1 -0
  80. package/dist/providers/images/register-builtins.js +34 -0
  81. package/dist/providers/images/register-builtins.js.map +1 -0
  82. package/dist/providers/mistral.d.ts +3 -0
  83. package/dist/providers/mistral.d.ts.map +1 -1
  84. package/dist/providers/mistral.js +49 -9
  85. package/dist/providers/mistral.js.map +1 -1
  86. package/dist/providers/openai-codex-responses.d.ts +21 -0
  87. package/dist/providers/openai-codex-responses.d.ts.map +1 -1
  88. package/dist/providers/openai-codex-responses.js +443 -86
  89. package/dist/providers/openai-codex-responses.js.map +1 -1
  90. package/dist/providers/openai-completions.d.ts +5 -1
  91. package/dist/providers/openai-completions.d.ts.map +1 -1
  92. package/dist/providers/openai-completions.js +460 -225
  93. package/dist/providers/openai-completions.js.map +1 -1
  94. package/dist/providers/openai-responses-shared.d.ts +1 -0
  95. package/dist/providers/openai-responses-shared.d.ts.map +1 -1
  96. package/dist/providers/openai-responses-shared.js +95 -45
  97. package/dist/providers/openai-responses-shared.js.map +1 -1
  98. package/dist/providers/openai-responses.d.ts.map +1 -1
  99. package/dist/providers/openai-responses.js +66 -44
  100. package/dist/providers/openai-responses.js.map +1 -1
  101. package/dist/providers/register-builtins.d.ts +27 -2
  102. package/dist/providers/register-builtins.d.ts.map +1 -1
  103. package/dist/providers/register-builtins.js +157 -52
  104. package/dist/providers/register-builtins.js.map +1 -1
  105. package/dist/providers/simple-options.d.ts.map +1 -1
  106. package/dist/providers/simple-options.js +5 -1
  107. package/dist/providers/simple-options.js.map +1 -1
  108. package/dist/providers/transform-messages.d.ts.map +1 -1
  109. package/dist/providers/transform-messages.js +63 -34
  110. package/dist/providers/transform-messages.js.map +1 -1
  111. package/dist/session-resources.d.ts +4 -0
  112. package/dist/session-resources.d.ts.map +1 -0
  113. package/dist/session-resources.js +22 -0
  114. package/dist/session-resources.js.map +1 -0
  115. package/dist/stream.d.ts.map +1 -1
  116. package/dist/stream.js.map +1 -1
  117. package/dist/types.d.ts +219 -15
  118. package/dist/types.d.ts.map +1 -1
  119. package/dist/types.js.map +1 -1
  120. package/dist/utils/diagnostics.d.ts +19 -0
  121. package/dist/utils/diagnostics.d.ts.map +1 -0
  122. package/dist/utils/diagnostics.js +25 -0
  123. package/dist/utils/diagnostics.js.map +1 -0
  124. package/dist/utils/event-stream.d.ts.map +1 -1
  125. package/dist/utils/event-stream.js +7 -3
  126. package/dist/utils/event-stream.js.map +1 -1
  127. package/dist/utils/hash.d.ts.map +1 -1
  128. package/dist/utils/hash.js.map +1 -1
  129. package/dist/utils/headers.d.ts +2 -0
  130. package/dist/utils/headers.d.ts.map +1 -0
  131. package/dist/utils/headers.js +8 -0
  132. package/dist/utils/headers.js.map +1 -0
  133. package/dist/utils/json-parse.d.ts +8 -1
  134. package/dist/utils/json-parse.d.ts.map +1 -1
  135. package/dist/utils/json-parse.js +89 -5
  136. package/dist/utils/json-parse.js.map +1 -1
  137. package/dist/utils/oauth/anthropic.d.ts +14 -6
  138. package/dist/utils/oauth/anthropic.d.ts.map +1 -1
  139. package/dist/utils/oauth/anthropic.js +288 -57
  140. package/dist/utils/oauth/anthropic.js.map +1 -1
  141. package/dist/utils/oauth/github-copilot.d.ts.map +1 -1
  142. package/dist/utils/oauth/github-copilot.js +23 -12
  143. package/dist/utils/oauth/github-copilot.js.map +1 -1
  144. package/dist/utils/oauth/index.d.ts +0 -4
  145. package/dist/utils/oauth/index.d.ts.map +1 -1
  146. package/dist/utils/oauth/index.js +0 -10
  147. package/dist/utils/oauth/index.js.map +1 -1
  148. package/dist/utils/oauth/oauth-page.d.ts +3 -0
  149. package/dist/utils/oauth/oauth-page.d.ts.map +1 -0
  150. package/dist/utils/oauth/oauth-page.js +105 -0
  151. package/dist/utils/oauth/oauth-page.js.map +1 -0
  152. package/dist/utils/oauth/openai-codex.d.ts.map +1 -1
  153. package/dist/utils/oauth/openai-codex.js +51 -46
  154. package/dist/utils/oauth/openai-codex.js.map +1 -1
  155. package/dist/utils/oauth/pkce.d.ts.map +1 -1
  156. package/dist/utils/oauth/pkce.js.map +1 -1
  157. package/dist/utils/oauth/types.d.ts +10 -0
  158. package/dist/utils/oauth/types.d.ts.map +1 -1
  159. package/dist/utils/oauth/types.js.map +1 -1
  160. package/dist/utils/overflow.d.ts +7 -3
  161. package/dist/utils/overflow.d.ts.map +1 -1
  162. package/dist/utils/overflow.js +46 -13
  163. package/dist/utils/overflow.js.map +1 -1
  164. package/dist/utils/sanitize-unicode.d.ts.map +1 -1
  165. package/dist/utils/sanitize-unicode.js.map +1 -1
  166. package/dist/utils/typebox-helpers.d.ts +1 -1
  167. package/dist/utils/typebox-helpers.d.ts.map +1 -1
  168. package/dist/utils/typebox-helpers.js +1 -1
  169. package/dist/utils/typebox-helpers.js.map +1 -1
  170. package/dist/utils/validation.d.ts.map +1 -1
  171. package/dist/utils/validation.js +247 -38
  172. package/dist/utils/validation.js.map +1 -1
  173. package/package.json +44 -14
  174. package/bedrock-provider.d.ts +0 -1
  175. package/bedrock-provider.js +0 -1
  176. package/dist/providers/google-gemini-cli.d.ts +0 -74
  177. package/dist/providers/google-gemini-cli.d.ts.map +0 -1
  178. package/dist/providers/google-gemini-cli.js +0 -754
  179. package/dist/providers/google-gemini-cli.js.map +0 -1
  180. package/dist/utils/oauth/google-antigravity.d.ts +0 -26
  181. package/dist/utils/oauth/google-antigravity.d.ts.map +0 -1
  182. package/dist/utils/oauth/google-antigravity.js +0 -373
  183. package/dist/utils/oauth/google-antigravity.js.map +0 -1
  184. package/dist/utils/oauth/google-gemini-cli.d.ts +0 -26
  185. package/dist/utils/oauth/google-gemini-cli.d.ts.map +0 -1
  186. package/dist/utils/oauth/google-gemini-cli.js +0 -478
  187. package/dist/utils/oauth/google-gemini-cli.js.map +0 -1
@@ -1,11 +1,13 @@
1
1
  import OpenAI from "openai";
2
2
  import { getEnvApiKey } from "../env-api-keys.js";
3
- import { calculateCost, supportsXhigh } from "../models.js";
3
+ import { calculateCost, clampThinkingLevel } from "../models.js";
4
4
  import { AssistantMessageEventStream } from "../utils/event-stream.js";
5
+ import { headersToRecord } from "../utils/headers.js";
5
6
  import { parseStreamingJson } from "../utils/json-parse.js";
6
7
  import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
8
+ import { isCloudflareProvider, resolveCloudflareBaseUrl } from "./cloudflare.js";
7
9
  import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
8
- import { buildBaseOptions, clampReasoning } from "./simple-options.js";
10
+ import { buildBaseOptions } from "./simple-options.js";
9
11
  import { transformMessages } from "./transform-messages.js";
10
12
  /**
11
13
  * Check if conversation messages contain tool calls or tool results.
@@ -25,6 +27,27 @@ function hasToolHistory(messages) {
25
27
  }
26
28
  return false;
27
29
  }
30
+ function isTextContentBlock(block) {
31
+ return block.type === "text";
32
+ }
33
+ function isThinkingContentBlock(block) {
34
+ return block.type === "thinking";
35
+ }
36
+ function isToolCallBlock(block) {
37
+ return block.type === "toolCall";
38
+ }
39
+ function isImageContentBlock(block) {
40
+ return block.type === "image";
41
+ }
42
+ function resolveCacheRetention(cacheRetention) {
43
+ if (cacheRetention) {
44
+ return cacheRetention;
45
+ }
46
+ if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
47
+ return "long";
48
+ }
49
+ return "short";
50
+ }
28
51
  export const streamOpenAICompletions = (model, context, options) => {
29
52
  const stream = new AssistantMessageEventStream();
30
53
  (async () => {
@@ -47,128 +70,185 @@ export const streamOpenAICompletions = (model, context, options) => {
47
70
  };
48
71
  try {
49
72
  const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
50
- const client = createClient(model, context, apiKey, options?.headers);
51
- const params = buildParams(model, context, options);
52
- options?.onPayload?.(params);
53
- const openaiStream = await client.chat.completions.create(params, { signal: options?.signal });
73
+ const compat = getCompat(model);
74
+ const cacheRetention = resolveCacheRetention(options?.cacheRetention);
75
+ const cacheSessionId = cacheRetention === "none" ? undefined : options?.sessionId;
76
+ const client = createClient(model, context, apiKey, options?.headers, cacheSessionId, compat);
77
+ let params = buildParams(model, context, options, compat, cacheRetention);
78
+ const nextParams = await options?.onPayload?.(params, model);
79
+ if (nextParams !== undefined) {
80
+ params = nextParams;
81
+ }
82
+ const requestOptions = {
83
+ ...(options?.signal ? { signal: options.signal } : {}),
84
+ ...(options?.timeoutMs !== undefined ? { timeout: options.timeoutMs } : {}),
85
+ ...(options?.maxRetries !== undefined ? { maxRetries: options.maxRetries } : {}),
86
+ };
87
+ const { data: openaiStream, response } = await client.chat.completions
88
+ .create(params, requestOptions)
89
+ .withResponse();
90
+ await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
54
91
  stream.push({ type: "start", partial: output });
55
- let currentBlock = null;
92
+ let textBlock = null;
93
+ let thinkingBlock = null;
94
+ const toolCallBlocksByIndex = new Map();
95
+ const toolCallBlocksById = new Map();
56
96
  const blocks = output.content;
57
- const blockIndex = () => blocks.length - 1;
58
- const finishCurrentBlock = (block) => {
59
- if (block) {
60
- if (block.type === "text") {
61
- stream.push({
62
- type: "text_end",
63
- contentIndex: blockIndex(),
64
- content: block.text,
65
- partial: output,
66
- });
67
- }
68
- else if (block.type === "thinking") {
69
- stream.push({
70
- type: "thinking_end",
71
- contentIndex: blockIndex(),
72
- content: block.thinking,
73
- partial: output,
74
- });
97
+ const getContentIndex = (block) => blocks.indexOf(block);
98
+ const finishBlock = (block) => {
99
+ const contentIndex = getContentIndex(block);
100
+ if (contentIndex === -1) {
101
+ return;
102
+ }
103
+ if (block.type === "text") {
104
+ stream.push({
105
+ type: "text_end",
106
+ contentIndex,
107
+ content: block.text,
108
+ partial: output,
109
+ });
110
+ }
111
+ else if (block.type === "thinking") {
112
+ stream.push({
113
+ type: "thinking_end",
114
+ contentIndex,
115
+ content: block.thinking,
116
+ partial: output,
117
+ });
118
+ }
119
+ else if (block.type === "toolCall") {
120
+ block.arguments = parseStreamingJson(block.partialArgs);
121
+ // Finalize in-place and strip the scratch buffers so replay only
122
+ // carries parsed arguments.
123
+ delete block.partialArgs;
124
+ delete block.streamIndex;
125
+ stream.push({
126
+ type: "toolcall_end",
127
+ contentIndex,
128
+ toolCall: block,
129
+ partial: output,
130
+ });
131
+ }
132
+ };
133
+ const ensureTextBlock = () => {
134
+ if (!textBlock) {
135
+ textBlock = { type: "text", text: "" };
136
+ blocks.push(textBlock);
137
+ stream.push({ type: "text_start", contentIndex: getContentIndex(textBlock), partial: output });
138
+ }
139
+ return textBlock;
140
+ };
141
+ const ensureThinkingBlock = (thinkingSignature) => {
142
+ if (!thinkingBlock) {
143
+ thinkingBlock = {
144
+ type: "thinking",
145
+ thinking: "",
146
+ thinkingSignature,
147
+ };
148
+ blocks.push(thinkingBlock);
149
+ stream.push({ type: "thinking_start", contentIndex: getContentIndex(thinkingBlock), partial: output });
150
+ }
151
+ return thinkingBlock;
152
+ };
153
+ const ensureToolCallBlock = (toolCall) => {
154
+ const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
155
+ let block = streamIndex !== undefined ? toolCallBlocksByIndex.get(streamIndex) : undefined;
156
+ if (!block && toolCall.id) {
157
+ block = toolCallBlocksById.get(toolCall.id);
158
+ }
159
+ if (!block) {
160
+ block = {
161
+ type: "toolCall",
162
+ id: toolCall.id || "",
163
+ name: toolCall.function?.name || "",
164
+ arguments: {},
165
+ partialArgs: "",
166
+ streamIndex,
167
+ };
168
+ if (streamIndex !== undefined) {
169
+ toolCallBlocksByIndex.set(streamIndex, block);
75
170
  }
76
- else if (block.type === "toolCall") {
77
- block.arguments = parseStreamingJson(block.partialArgs);
78
- delete block.partialArgs;
79
- stream.push({
80
- type: "toolcall_end",
81
- contentIndex: blockIndex(),
82
- toolCall: block,
83
- partial: output,
84
- });
171
+ if (toolCall.id) {
172
+ toolCallBlocksById.set(toolCall.id, block);
85
173
  }
174
+ blocks.push(block);
175
+ stream.push({
176
+ type: "toolcall_start",
177
+ contentIndex: getContentIndex(block),
178
+ partial: output,
179
+ });
86
180
  }
181
+ if (streamIndex !== undefined && block.streamIndex === undefined) {
182
+ block.streamIndex = streamIndex;
183
+ toolCallBlocksByIndex.set(streamIndex, block);
184
+ }
185
+ if (toolCall.id) {
186
+ toolCallBlocksById.set(toolCall.id, block);
187
+ }
188
+ return block;
87
189
  };
88
190
  for await (const chunk of openaiStream) {
191
+ if (!chunk || typeof chunk !== "object")
192
+ continue;
193
+ // OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,
194
+ // and each chunk in a streamed completion carries the same id.
195
+ output.responseId ||= chunk.id;
196
+ if (typeof chunk.model === "string" && chunk.model.length > 0 && chunk.model !== model.id) {
197
+ output.responseModel ||= chunk.model;
198
+ }
89
199
  if (chunk.usage) {
90
- const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
91
- const reasoningTokens = chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
92
- const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
93
- const outputTokens = (chunk.usage.completion_tokens || 0) + reasoningTokens;
94
- output.usage = {
95
- // OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
96
- input,
97
- output: outputTokens,
98
- cacheRead: cachedTokens,
99
- cacheWrite: 0,
100
- // Compute totalTokens ourselves since we add reasoning_tokens to output
101
- // and some providers (e.g., Groq) don't include them in total_tokens
102
- totalTokens: input + outputTokens + cachedTokens,
103
- cost: {
104
- input: 0,
105
- output: 0,
106
- cacheRead: 0,
107
- cacheWrite: 0,
108
- total: 0,
109
- },
110
- };
111
- calculateCost(model, output.usage);
200
+ output.usage = parseChunkUsage(chunk.usage, model);
112
201
  }
113
- const choice = chunk.choices?.[0];
202
+ const choice = Array.isArray(chunk.choices) ? chunk.choices[0] : undefined;
114
203
  if (!choice)
115
204
  continue;
205
+ // Fallback: some providers (e.g., Moonshot) return usage
206
+ // in choice.usage instead of the standard chunk.usage
207
+ if (!chunk.usage && choice.usage) {
208
+ output.usage = parseChunkUsage(choice.usage, model);
209
+ }
116
210
  if (choice.finish_reason) {
117
- output.stopReason = mapStopReason(choice.finish_reason);
211
+ const finishReasonResult = mapStopReason(choice.finish_reason);
212
+ output.stopReason = finishReasonResult.stopReason;
213
+ if (finishReasonResult.errorMessage) {
214
+ output.errorMessage = finishReasonResult.errorMessage;
215
+ }
118
216
  }
119
217
  if (choice.delta) {
120
218
  if (choice.delta.content !== null &&
121
219
  choice.delta.content !== undefined &&
122
220
  choice.delta.content.length > 0) {
123
- if (!currentBlock || currentBlock.type !== "text") {
124
- finishCurrentBlock(currentBlock);
125
- currentBlock = { type: "text", text: "" };
126
- output.content.push(currentBlock);
127
- stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
128
- }
129
- if (currentBlock.type === "text") {
130
- currentBlock.text += choice.delta.content;
131
- stream.push({
132
- type: "text_delta",
133
- contentIndex: blockIndex(),
134
- delta: choice.delta.content,
135
- partial: output,
136
- });
137
- }
221
+ const block = ensureTextBlock();
222
+ block.text += choice.delta.content;
223
+ stream.push({
224
+ type: "text_delta",
225
+ contentIndex: getContentIndex(block),
226
+ delta: choice.delta.content,
227
+ partial: output,
228
+ });
138
229
  }
139
230
  // Some endpoints return reasoning in reasoning_content (llama.cpp),
140
231
  // or reasoning (other openai compatible endpoints)
141
232
  // Use the first non-empty reasoning field to avoid duplication
142
233
  // (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
143
234
  const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
235
+ const deltaFields = choice.delta;
144
236
  let foundReasoningField = null;
145
237
  for (const field of reasoningFields) {
146
- if (choice.delta[field] !== null &&
147
- choice.delta[field] !== undefined &&
148
- choice.delta[field].length > 0) {
149
- if (!foundReasoningField) {
150
- foundReasoningField = field;
151
- break;
152
- }
238
+ const value = deltaFields[field];
239
+ if (typeof value === "string" && value.length > 0) {
240
+ foundReasoningField = field;
241
+ break;
153
242
  }
154
243
  }
155
244
  if (foundReasoningField) {
156
- if (!currentBlock || currentBlock.type !== "thinking") {
157
- finishCurrentBlock(currentBlock);
158
- currentBlock = {
159
- type: "thinking",
160
- thinking: "",
161
- thinkingSignature: foundReasoningField,
162
- };
163
- output.content.push(currentBlock);
164
- stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
165
- }
166
- if (currentBlock.type === "thinking") {
167
- const delta = choice.delta[foundReasoningField];
168
- currentBlock.thinking += delta;
245
+ const delta = deltaFields[foundReasoningField];
246
+ if (typeof delta === "string" && delta.length > 0) {
247
+ const block = ensureThinkingBlock(foundReasoningField);
248
+ block.thinking += delta;
169
249
  stream.push({
170
250
  type: "thinking_delta",
171
- contentIndex: blockIndex(),
251
+ contentIndex: getContentIndex(block),
172
252
  delta,
173
253
  partial: output,
174
254
  });
@@ -176,38 +256,26 @@ export const streamOpenAICompletions = (model, context, options) => {
176
256
  }
177
257
  if (choice?.delta?.tool_calls) {
178
258
  for (const toolCall of choice.delta.tool_calls) {
179
- if (!currentBlock ||
180
- currentBlock.type !== "toolCall" ||
181
- (toolCall.id && currentBlock.id !== toolCall.id)) {
182
- finishCurrentBlock(currentBlock);
183
- currentBlock = {
184
- type: "toolCall",
185
- id: toolCall.id || "",
186
- name: toolCall.function?.name || "",
187
- arguments: {},
188
- partialArgs: "",
189
- };
190
- output.content.push(currentBlock);
191
- stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
259
+ const block = ensureToolCallBlock(toolCall);
260
+ if (!block.id && toolCall.id) {
261
+ block.id = toolCall.id;
262
+ toolCallBlocksById.set(toolCall.id, block);
192
263
  }
193
- if (currentBlock.type === "toolCall") {
194
- if (toolCall.id)
195
- currentBlock.id = toolCall.id;
196
- if (toolCall.function?.name)
197
- currentBlock.name = toolCall.function.name;
198
- let delta = "";
199
- if (toolCall.function?.arguments) {
200
- delta = toolCall.function.arguments;
201
- currentBlock.partialArgs += toolCall.function.arguments;
202
- currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
203
- }
204
- stream.push({
205
- type: "toolcall_delta",
206
- contentIndex: blockIndex(),
207
- delta,
208
- partial: output,
209
- });
264
+ if (!block.name && toolCall.function?.name) {
265
+ block.name = toolCall.function.name;
266
+ }
267
+ let delta = "";
268
+ if (toolCall.function?.arguments) {
269
+ delta = toolCall.function.arguments;
270
+ block.partialArgs = (block.partialArgs ?? "") + toolCall.function.arguments;
271
+ block.arguments = parseStreamingJson(block.partialArgs);
210
272
  }
273
+ stream.push({
274
+ type: "toolcall_delta",
275
+ contentIndex: getContentIndex(block),
276
+ delta,
277
+ partial: output,
278
+ });
211
279
  }
212
280
  }
213
281
  const reasoningDetails = choice.delta.reasoning_details;
@@ -223,19 +291,28 @@ export const streamOpenAICompletions = (model, context, options) => {
223
291
  }
224
292
  }
225
293
  }
226
- finishCurrentBlock(currentBlock);
294
+ for (const block of blocks) {
295
+ finishBlock(block);
296
+ }
227
297
  if (options?.signal?.aborted) {
228
298
  throw new Error("Request was aborted");
229
299
  }
230
- if (output.stopReason === "aborted" || output.stopReason === "error") {
231
- throw new Error("An unknown error occurred");
300
+ if (output.stopReason === "aborted") {
301
+ throw new Error("Request was aborted");
302
+ }
303
+ if (output.stopReason === "error") {
304
+ throw new Error(output.errorMessage || "Provider returned an error stop reason");
232
305
  }
233
306
  stream.push({ type: "done", reason: output.stopReason, message: output });
234
307
  stream.end();
235
308
  }
236
309
  catch (error) {
237
- for (const block of output.content)
310
+ for (const block of output.content) {
238
311
  delete block.index;
312
+ // Streaming scratch buffers are only used during parsing; never persist them.
313
+ delete block.partialArgs;
314
+ delete block.streamIndex;
315
+ }
239
316
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
240
317
  output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
241
318
  // Some providers via OpenRouter give additional information in this field.
@@ -254,7 +331,8 @@ export const streamSimpleOpenAICompletions = (model, context, options) => {
254
331
  throw new Error(`No API key for provider: ${model.provider}`);
255
332
  }
256
333
  const base = buildBaseOptions(model, options, apiKey);
257
- const reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);
334
+ const clampedReasoning = options?.reasoning ? clampThinkingLevel(model, options.reasoning) : undefined;
335
+ const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
258
336
  const toolChoice = options?.toolChoice;
259
337
  return streamOpenAICompletions(model, context, {
260
338
  ...base,
@@ -262,7 +340,7 @@ export const streamSimpleOpenAICompletions = (model, context, options) => {
262
340
  toolChoice,
263
341
  });
264
342
  };
265
- function createClient(model, context, apiKey, optionsHeaders) {
343
+ function createClient(model, context, apiKey, optionsHeaders, sessionId, compat = getCompat(model)) {
266
344
  if (!apiKey) {
267
345
  if (!process.env.OPENAI_API_KEY) {
268
346
  throw new Error("OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.");
@@ -278,25 +356,41 @@ function createClient(model, context, apiKey, optionsHeaders) {
278
356
  });
279
357
  Object.assign(headers, copilotHeaders);
280
358
  }
359
+ if (sessionId && compat.sendSessionAffinityHeaders) {
360
+ headers.session_id = sessionId;
361
+ headers["x-client-request-id"] = sessionId;
362
+ headers["x-session-affinity"] = sessionId;
363
+ }
281
364
  // Merge options headers last so they can override defaults
282
365
  if (optionsHeaders) {
283
366
  Object.assign(headers, optionsHeaders);
284
367
  }
368
+ const defaultHeaders = model.provider === "cloudflare-ai-gateway"
369
+ ? {
370
+ ...headers,
371
+ Authorization: headers.Authorization ?? null,
372
+ "cf-aig-authorization": `Bearer ${apiKey}`,
373
+ }
374
+ : headers;
285
375
  return new OpenAI({
286
376
  apiKey,
287
- baseURL: model.baseUrl,
377
+ baseURL: isCloudflareProvider(model.provider) ? resolveCloudflareBaseUrl(model) : model.baseUrl,
288
378
  dangerouslyAllowBrowser: true,
289
- defaultHeaders: headers,
379
+ defaultHeaders,
290
380
  });
291
381
  }
292
- function buildParams(model, context, options) {
293
- const compat = getCompat(model);
382
+ function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
294
383
  const messages = convertMessages(model, context, compat);
295
- maybeAddOpenRouterAnthropicCacheControl(model, messages);
384
+ const cacheControl = getCompatCacheControl(compat, cacheRetention);
296
385
  const params = {
297
386
  model: model.id,
298
387
  messages,
299
388
  stream: true,
389
+ prompt_cache_key: (model.baseUrl.includes("api.openai.com") && cacheRetention !== "none") ||
390
+ (cacheRetention === "long" && compat.supportsLongCacheRetention)
391
+ ? options?.sessionId
392
+ : undefined,
393
+ prompt_cache_retention: cacheRetention === "long" && compat.supportsLongCacheRetention ? "24h" : undefined,
300
394
  };
301
395
  if (compat.supportsUsageInStreaming !== false) {
302
396
  params.stream_options = { include_usage: true };
@@ -315,23 +409,69 @@ function buildParams(model, context, options) {
315
409
  if (options?.temperature !== undefined) {
316
410
  params.temperature = options.temperature;
317
411
  }
318
- if (context.tools) {
412
+ if (context.tools && context.tools.length > 0) {
319
413
  params.tools = convertTools(context.tools, compat);
414
+ if (compat.zaiToolStream) {
415
+ params.tool_stream = true;
416
+ }
320
417
  }
321
418
  else if (hasToolHistory(context.messages)) {
322
419
  // Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
323
420
  params.tools = [];
324
421
  }
422
+ if (cacheControl) {
423
+ applyAnthropicCacheControl(messages, params.tools, cacheControl);
424
+ }
325
425
  if (options?.toolChoice) {
326
426
  params.tool_choice = options.toolChoice;
327
427
  }
328
- if ((compat.thinkingFormat === "zai" || compat.thinkingFormat === "qwen") && model.reasoning) {
329
- // Both Z.ai and Qwen use enable_thinking: boolean
428
+ if (compat.thinkingFormat === "zai" && model.reasoning) {
429
+ params.enable_thinking = !!options?.reasoningEffort;
430
+ }
431
+ else if (compat.thinkingFormat === "qwen" && model.reasoning) {
330
432
  params.enable_thinking = !!options?.reasoningEffort;
331
433
  }
434
+ else if (compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
435
+ params.chat_template_kwargs = {
436
+ enable_thinking: !!options?.reasoningEffort,
437
+ preserve_thinking: true,
438
+ };
439
+ }
440
+ else if (compat.thinkingFormat === "deepseek" && model.reasoning) {
441
+ params.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
442
+ if (options?.reasoningEffort) {
443
+ params.reasoning_effort =
444
+ model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
445
+ }
446
+ }
447
+ else if (compat.thinkingFormat === "openrouter" && model.reasoning) {
448
+ // OpenRouter normalizes reasoning across providers via a nested reasoning object.
449
+ const openRouterParams = params;
450
+ if (options?.reasoningEffort) {
451
+ openRouterParams.reasoning = {
452
+ effort: model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort,
453
+ };
454
+ }
455
+ else if (model.thinkingLevelMap?.off !== null) {
456
+ openRouterParams.reasoning = { effort: model.thinkingLevelMap?.off ?? "none" };
457
+ }
458
+ }
459
+ else if (compat.thinkingFormat === "together" && model.reasoning) {
460
+ const togetherParams = params;
461
+ togetherParams.reasoning = { enabled: !!options?.reasoningEffort };
462
+ if (options?.reasoningEffort && compat.supportsReasoningEffort) {
463
+ togetherParams.reasoning_effort = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
464
+ }
465
+ }
332
466
  else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
333
467
  // OpenAI-style reasoning_effort
334
- params.reasoning_effort = mapReasoningEffort(options.reasoningEffort, compat.reasoningEffortMap);
468
+ params.reasoning_effort = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
469
+ }
470
+ else if (!options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
471
+ const offValue = model.thinkingLevelMap?.off;
472
+ if (typeof offValue === "string") {
473
+ params.reasoning_effort = offValue;
474
+ }
335
475
  }
336
476
  // OpenRouter provider routing preferences
337
477
  if (model.baseUrl.includes("openrouter.ai") && model.compat?.openRouterRouting) {
@@ -351,37 +491,80 @@ function buildParams(model, context, options) {
351
491
  }
352
492
  return params;
353
493
  }
354
- function mapReasoningEffort(effort, reasoningEffortMap) {
355
- return reasoningEffortMap[effort] ?? effort;
494
+ function getCompatCacheControl(compat, cacheRetention) {
495
+ if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
496
+ return undefined;
497
+ }
498
+ const ttl = cacheRetention === "long" && compat.supportsLongCacheRetention ? "1h" : undefined;
499
+ return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
356
500
  }
357
- function maybeAddOpenRouterAnthropicCacheControl(model, messages) {
358
- if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/"))
359
- return;
360
- // Anthropic-style caching requires cache_control on a text part. Add a breakpoint
361
- // on the last user/assistant message (walking backwards until we find text content).
362
- for (let i = messages.length - 1; i >= 0; i--) {
363
- const msg = messages[i];
364
- if (msg.role !== "user" && msg.role !== "assistant")
365
- continue;
366
- const content = msg.content;
367
- if (typeof content === "string") {
368
- msg.content = [
369
- Object.assign({ type: "text", text: content }, { cache_control: { type: "ephemeral" } }),
370
- ];
501
+ function applyAnthropicCacheControl(messages, tools, cacheControl) {
502
+ addCacheControlToSystemPrompt(messages, cacheControl);
503
+ addCacheControlToLastTool(tools, cacheControl);
504
+ addCacheControlToLastConversationMessage(messages, cacheControl);
505
+ }
506
+ function addCacheControlToSystemPrompt(messages, cacheControl) {
507
+ for (const message of messages) {
508
+ if (message.role === "system" || message.role === "developer") {
509
+ addCacheControlToInstructionMessage(message, cacheControl);
371
510
  return;
372
511
  }
373
- if (!Array.isArray(content))
374
- continue;
375
- // Find last text part and add cache_control
376
- for (let j = content.length - 1; j >= 0; j--) {
377
- const part = content[j];
378
- if (part?.type === "text") {
379
- Object.assign(part, { cache_control: { type: "ephemeral" } });
512
+ }
513
+ }
514
+ function addCacheControlToLastConversationMessage(messages, cacheControl) {
515
+ for (let i = messages.length - 1; i >= 0; i--) {
516
+ const message = messages[i];
517
+ if (message.role === "user" || message.role === "assistant") {
518
+ if (addCacheControlToMessage(message, cacheControl)) {
380
519
  return;
381
520
  }
382
521
  }
383
522
  }
384
523
  }
524
+ function addCacheControlToLastTool(tools, cacheControl) {
525
+ if (!tools || tools.length === 0) {
526
+ return;
527
+ }
528
+ const lastTool = tools[tools.length - 1];
529
+ lastTool.cache_control = cacheControl;
530
+ }
531
+ function addCacheControlToInstructionMessage(message, cacheControl) {
532
+ return addCacheControlToTextContent(message, cacheControl);
533
+ }
534
+ function addCacheControlToMessage(message, cacheControl) {
535
+ if (message.role === "user" || message.role === "assistant") {
536
+ return addCacheControlToTextContent(message, cacheControl);
537
+ }
538
+ return false;
539
+ }
540
+ function addCacheControlToTextContent(message, cacheControl) {
541
+ const content = message.content;
542
+ if (typeof content === "string") {
543
+ if (content.length === 0) {
544
+ return false;
545
+ }
546
+ message.content = [
547
+ {
548
+ type: "text",
549
+ text: content,
550
+ cache_control: cacheControl,
551
+ },
552
+ ];
553
+ return true;
554
+ }
555
+ if (!Array.isArray(content)) {
556
+ return false;
557
+ }
558
+ for (let i = content.length - 1; i >= 0; i--) {
559
+ const part = content[i];
560
+ if (part?.type === "text") {
561
+ const textPart = part;
562
+ textPart.cache_control = cacheControl;
563
+ return true;
564
+ }
565
+ }
566
+ return false;
567
+ }
385
568
  export function convertMessages(model, context, compat) {
386
569
  const params = [];
387
570
  const normalizeToolCallId = (id) => {
@@ -439,14 +622,11 @@ export function convertMessages(model, context, compat) {
439
622
  };
440
623
  }
441
624
  });
442
- const filteredContent = !model.input.includes("image")
443
- ? content.filter((c) => c.type !== "image_url")
444
- : content;
445
- if (filteredContent.length === 0)
625
+ if (content.length === 0)
446
626
  continue;
447
627
  params.push({
448
628
  role: "user",
449
- content: filteredContent,
629
+ content,
450
630
  });
451
631
  }
452
632
  }
@@ -456,46 +636,50 @@ export function convertMessages(model, context, compat) {
456
636
  role: "assistant",
457
637
  content: compat.requiresAssistantAfterToolResult ? "" : null,
458
638
  };
459
- const textBlocks = msg.content.filter((b) => b.type === "text");
460
- // Filter out empty text blocks to avoid API validation errors
461
- const nonEmptyTextBlocks = textBlocks.filter((b) => b.text && b.text.trim().length > 0);
462
- if (nonEmptyTextBlocks.length > 0) {
463
- // GitHub Copilot requires assistant content as a string, not an array.
464
- // Sending as array causes Claude models to re-answer all previous prompts.
465
- if (model.provider === "github-copilot") {
466
- assistantMsg.content = nonEmptyTextBlocks.map((b) => sanitizeSurrogates(b.text)).join("");
467
- }
468
- else {
469
- assistantMsg.content = nonEmptyTextBlocks.map((b) => {
470
- return { type: "text", text: sanitizeSurrogates(b.text) };
471
- });
472
- }
473
- }
474
- // Handle thinking blocks
475
- const thinkingBlocks = msg.content.filter((b) => b.type === "thinking");
476
- // Filter out empty thinking blocks to avoid API validation errors
477
- const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
639
+ const assistantTextParts = msg.content
640
+ .filter(isTextContentBlock)
641
+ .filter((block) => block.text.trim().length > 0)
642
+ .map((block) => ({
643
+ type: "text",
644
+ text: sanitizeSurrogates(block.text),
645
+ }));
646
+ const assistantText = assistantTextParts.map((part) => part.text).join("");
647
+ const nonEmptyThinkingBlocks = msg.content
648
+ .filter(isThinkingContentBlock)
649
+ .filter((block) => block.thinking.trim().length > 0);
478
650
  if (nonEmptyThinkingBlocks.length > 0) {
479
651
  if (compat.requiresThinkingAsText) {
480
652
  // Convert thinking blocks to plain text (no tags to avoid model mimicking them)
481
- const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
482
- const textContent = assistantMsg.content;
483
- if (textContent) {
484
- textContent.unshift({ type: "text", text: thinkingText });
485
- }
486
- else {
487
- assistantMsg.content = [{ type: "text", text: thinkingText }];
488
- }
653
+ const thinkingText = nonEmptyThinkingBlocks
654
+ .map((block) => sanitizeSurrogates(block.thinking))
655
+ .join("\n\n");
656
+ assistantMsg.content = [{ type: "text", text: thinkingText }, ...assistantTextParts];
489
657
  }
490
658
  else {
659
+ // Always send assistant content as a plain string (OpenAI Chat Completions
660
+ // API standard format). Sending as an array of {type:"text", text:"..."}
661
+ // objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
662
+ // NVIDIA NIM) to mirror the content-block structure literally in their
663
+ // output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
664
+ if (assistantText.length > 0) {
665
+ assistantMsg.content = assistantText;
666
+ }
491
667
  // Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
492
668
  const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
493
669
  if (signature && signature.length > 0) {
494
- assistantMsg[signature] = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n");
670
+ assistantMsg[signature] = nonEmptyThinkingBlocks.map((block) => block.thinking).join("\n");
495
671
  }
496
672
  }
497
673
  }
498
- const toolCalls = msg.content.filter((b) => b.type === "toolCall");
674
+ else if (assistantText.length > 0) {
675
+ // Always send assistant content as a plain string (OpenAI Chat Completions
676
+ // API standard format). Sending as an array of {type:"text", text:"..."}
677
+ // objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
678
+ // NVIDIA NIM) to mirror the content-block structure literally in their
679
+ // output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
680
+ assistantMsg.content = assistantText;
681
+ }
682
+ const toolCalls = msg.content.filter(isToolCallBlock);
499
683
  if (toolCalls.length > 0) {
500
684
  assistantMsg.tool_calls = toolCalls.map((tc) => ({
501
685
  id: tc.id,
@@ -520,6 +704,11 @@ export function convertMessages(model, context, compat) {
520
704
  assistantMsg.reasoning_details = reasoningDetails;
521
705
  }
522
706
  }
707
+ if (compat.requiresReasoningContentOnAssistantMessages &&
708
+ model.reasoning &&
709
+ assistantMsg.reasoning_content === undefined) {
710
+ assistantMsg.reasoning_content = "";
711
+ }
523
712
  // Skip assistant messages that have no content and no tool calls.
524
713
  // Some providers require "either content or tool_calls, but not none".
525
714
  // Other providers also don't accept empty assistant messages.
@@ -540,8 +729,8 @@ export function convertMessages(model, context, compat) {
540
729
  const toolMsg = transformedMessages[j];
541
730
  // Extract text and image content
542
731
  const textResult = toolMsg.content
543
- .filter((c) => c.type === "text")
544
- .map((c) => c.text)
732
+ .filter(isTextContentBlock)
733
+ .map((block) => block.text)
545
734
  .join("\n");
546
735
  const hasImages = toolMsg.content.some((c) => c.type === "image");
547
736
  // Always send tool result with text (or placeholder if only images)
@@ -558,7 +747,7 @@ export function convertMessages(model, context, compat) {
558
747
  params.push(toolResultMsg);
559
748
  if (hasImages && model.input.includes("image")) {
560
749
  for (const block of toolMsg.content) {
561
- if (block.type === "image") {
750
+ if (isImageContentBlock(block)) {
562
751
  imageBlocks.push({
563
752
  type: "image_url",
564
753
  image_url: {
@@ -610,23 +799,51 @@ function convertTools(tools, compat) {
610
799
  },
611
800
  }));
612
801
  }
802
+ function parseChunkUsage(rawUsage, model) {
803
+ const promptTokens = rawUsage.prompt_tokens || 0;
804
+ const reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens ?? rawUsage.prompt_cache_hit_tokens ?? 0;
805
+ const cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;
806
+ // Normalize to pi-ai semantics:
807
+ // - cacheRead: hits from cache created by previous requests only
808
+ // - cacheWrite: tokens written to cache in this request
809
+ // Some OpenAI-compatible providers (observed on OpenRouter) report cached_tokens
810
+ // as (previous hits + current writes). In that case, remove cacheWrite from cacheRead.
811
+ const cacheReadTokens = cacheWriteTokens > 0 ? Math.max(0, reportedCachedTokens - cacheWriteTokens) : reportedCachedTokens;
812
+ const input = Math.max(0, promptTokens - cacheReadTokens - cacheWriteTokens);
813
+ // OpenAI completion_tokens already includes reasoning_tokens.
814
+ const outputTokens = rawUsage.completion_tokens || 0;
815
+ const usage = {
816
+ input,
817
+ output: outputTokens,
818
+ cacheRead: cacheReadTokens,
819
+ cacheWrite: cacheWriteTokens,
820
+ totalTokens: input + outputTokens + cacheReadTokens + cacheWriteTokens,
821
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
822
+ };
823
+ calculateCost(model, usage);
824
+ return usage;
825
+ }
613
826
  function mapStopReason(reason) {
614
827
  if (reason === null)
615
- return "stop";
828
+ return { stopReason: "stop" };
616
829
  switch (reason) {
617
830
  case "stop":
618
- return "stop";
831
+ case "end":
832
+ return { stopReason: "stop" };
619
833
  case "length":
620
- return "length";
834
+ return { stopReason: "length" };
621
835
  case "function_call":
622
836
  case "tool_calls":
623
- return "toolUse";
837
+ return { stopReason: "toolUse" };
624
838
  case "content_filter":
625
- return "error";
626
- default: {
627
- const _exhaustive = reason;
628
- throw new Error(`Unhandled stop reason: ${_exhaustive}`);
629
- }
839
+ return { stopReason: "error", errorMessage: "Provider finish_reason: content_filter" };
840
+ case "network_error":
841
+ return { stopReason: "error", errorMessage: "Provider finish_reason: network_error" };
842
+ default:
843
+ return {
844
+ stopReason: "error",
845
+ errorMessage: `Provider finish_reason: ${reason}`,
846
+ };
630
847
  }
631
848
  }
632
849
  /**
@@ -638,41 +855,54 @@ function detectCompat(model) {
638
855
  const provider = model.provider;
639
856
  const baseUrl = model.baseUrl;
640
857
  const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
858
+ const isTogether = provider === "together" || baseUrl.includes("api.together.ai") || baseUrl.includes("api.together.xyz");
859
+ const isMoonshot = provider === "moonshotai" || provider === "moonshotai-cn" || baseUrl.includes("api.moonshot.");
860
+ const isCloudflareWorkersAI = provider === "cloudflare-workers-ai" || baseUrl.includes("api.cloudflare.com");
861
+ const isCloudflareAiGateway = provider === "cloudflare-ai-gateway" || baseUrl.includes("gateway.ai.cloudflare.com");
641
862
  const isNonStandard = provider === "cerebras" ||
642
863
  baseUrl.includes("cerebras.ai") ||
643
864
  provider === "xai" ||
644
865
  baseUrl.includes("api.x.ai") ||
866
+ isTogether ||
645
867
  baseUrl.includes("chutes.ai") ||
646
868
  baseUrl.includes("deepseek.com") ||
869
+ baseUrl.includes("dashscope.aliyuncs.com") ||
647
870
  isZai ||
871
+ isMoonshot ||
648
872
  provider === "opencode" ||
649
- baseUrl.includes("opencode.ai");
650
- const useMaxTokens = baseUrl.includes("chutes.ai");
873
+ baseUrl.includes("opencode.ai") ||
874
+ isCloudflareWorkersAI ||
875
+ isCloudflareAiGateway;
876
+ const useMaxTokens = baseUrl.includes("chutes.ai") || isMoonshot || isCloudflareAiGateway || isTogether;
651
877
  const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
652
- const isGroq = provider === "groq" || baseUrl.includes("groq.com");
653
- const reasoningEffortMap = isGroq && model.id === "qwen/qwen3-32b"
654
- ? {
655
- minimal: "default",
656
- low: "default",
657
- medium: "default",
658
- high: "default",
659
- xhigh: "default",
660
- }
661
- : {};
878
+ const isDeepSeek = provider === "deepseek" || baseUrl.includes("deepseek.com");
879
+ const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
662
880
  return {
663
881
  supportsStore: !isNonStandard,
664
882
  supportsDeveloperRole: !isNonStandard,
665
- supportsReasoningEffort: !isGrok && !isZai,
666
- reasoningEffortMap,
883
+ supportsReasoningEffort: !isGrok && !isZai && !isMoonshot && !isTogether && !isCloudflareAiGateway,
667
884
  supportsUsageInStreaming: true,
668
885
  maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
669
886
  requiresToolResultName: false,
670
887
  requiresAssistantAfterToolResult: false,
671
888
  requiresThinkingAsText: false,
672
- thinkingFormat: isZai ? "zai" : "openai",
889
+ requiresReasoningContentOnAssistantMessages: isDeepSeek,
890
+ thinkingFormat: isDeepSeek
891
+ ? "deepseek"
892
+ : isZai
893
+ ? "zai"
894
+ : isTogether
895
+ ? "together"
896
+ : provider === "openrouter" || baseUrl.includes("openrouter.ai")
897
+ ? "openrouter"
898
+ : "openai",
673
899
  openRouterRouting: {},
674
900
  vercelGatewayRouting: {},
675
- supportsStrictMode: true,
901
+ zaiToolStream: false,
902
+ supportsStrictMode: !isMoonshot && !isTogether && !isCloudflareAiGateway,
903
+ cacheControlFormat,
904
+ sendSessionAffinityHeaders: false,
905
+ supportsLongCacheRetention: !(isTogether || isCloudflareWorkersAI || isCloudflareAiGateway),
676
906
  };
677
907
  }
678
908
  /**
@@ -687,16 +917,21 @@ function getCompat(model) {
687
917
  supportsStore: model.compat.supportsStore ?? detected.supportsStore,
688
918
  supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
689
919
  supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
690
- reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
691
920
  supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
692
921
  maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
693
922
  requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
694
923
  requiresAssistantAfterToolResult: model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
695
924
  requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
925
+ requiresReasoningContentOnAssistantMessages: model.compat.requiresReasoningContentOnAssistantMessages ??
926
+ detected.requiresReasoningContentOnAssistantMessages,
696
927
  thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
697
928
  openRouterRouting: model.compat.openRouterRouting ?? {},
698
929
  vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
930
+ zaiToolStream: model.compat.zaiToolStream ?? detected.zaiToolStream,
699
931
  supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
932
+ cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
933
+ sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
934
+ supportsLongCacheRetention: model.compat.supportsLongCacheRetention ?? detected.supportsLongCacheRetention,
700
935
  };
701
936
  }
702
937
  //# sourceMappingURL=openai-completions.js.map