@draht/ai 2026.5.12 → 2026.6.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api-registry.d.ts +1 -1
- package/dist/api-registry.d.ts.map +1 -1
- package/dist/api-registry.js.map +1 -1
- package/dist/bedrock-provider.d.ts +2 -2
- package/dist/bedrock-provider.d.ts.map +1 -1
- package/dist/bedrock-provider.js.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +14 -0
- package/dist/cli.js.map +1 -1
- package/dist/env-api-keys.d.ts +10 -1
- package/dist/env-api-keys.d.ts.map +1 -1
- package/dist/env-api-keys.js +110 -36
- package/dist/env-api-keys.js.map +1 -1
- package/dist/image-models.d.ts +10 -0
- package/dist/image-models.d.ts.map +1 -0
- package/dist/image-models.generated.d.ts +485 -0
- package/dist/image-models.generated.d.ts.map +1 -0
- package/dist/image-models.generated.js +487 -0
- package/dist/image-models.generated.js.map +1 -0
- package/dist/image-models.js +23 -0
- package/dist/image-models.js.map +1 -0
- package/dist/images-api-registry.d.ts +14 -0
- package/dist/images-api-registry.d.ts.map +1 -0
- package/dist/images-api-registry.js +22 -0
- package/dist/images-api-registry.js.map +1 -0
- package/dist/images.d.ts +4 -0
- package/dist/images.d.ts.map +1 -0
- package/dist/images.js +14 -0
- package/dist/images.js.map +1 -0
- package/dist/index.d.ts +31 -25
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -1
- package/dist/index.js.map +1 -1
- package/dist/models.d.ts +5 -8
- package/dist/models.d.ts.map +1 -1
- package/dist/models.generated.d.ts +4665 -1252
- package/dist/models.generated.d.ts.map +1 -1
- package/dist/models.generated.js +4877 -2833
- package/dist/models.generated.js.map +1 -1
- package/dist/models.js +33 -6
- package/dist/models.js.map +1 -1
- package/dist/oauth.d.ts +1 -1
- package/dist/oauth.d.ts.map +1 -1
- package/dist/oauth.js.map +1 -1
- package/dist/providers/amazon-bedrock.d.ts +19 -1
- package/dist/providers/amazon-bedrock.d.ts.map +1 -1
- package/dist/providers/amazon-bedrock.js +278 -89
- package/dist/providers/amazon-bedrock.js.map +1 -1
- package/dist/providers/anthropic.d.ts +37 -6
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +300 -114
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/azure-openai-responses.d.ts +1 -1
- package/dist/providers/azure-openai-responses.d.ts.map +1 -1
- package/dist/providers/azure-openai-responses.js +68 -21
- package/dist/providers/azure-openai-responses.js.map +1 -1
- package/dist/providers/cloudflare.d.ts +13 -0
- package/dist/providers/cloudflare.d.ts.map +1 -0
- package/dist/providers/cloudflare.js +26 -0
- package/dist/providers/cloudflare.js.map +1 -0
- package/dist/providers/faux.d.ts +1 -1
- package/dist/providers/faux.d.ts.map +1 -1
- package/dist/providers/faux.js +1 -0
- package/dist/providers/faux.js.map +1 -1
- package/dist/providers/github-copilot-headers.d.ts +1 -1
- package/dist/providers/github-copilot-headers.d.ts.map +1 -1
- package/dist/providers/github-copilot-headers.js.map +1 -1
- package/dist/providers/google-shared.d.ts +8 -3
- package/dist/providers/google-shared.d.ts.map +1 -1
- package/dist/providers/google-shared.js +34 -17
- package/dist/providers/google-shared.js.map +1 -1
- package/dist/providers/google-vertex.d.ts +2 -2
- package/dist/providers/google-vertex.d.ts.map +1 -1
- package/dist/providers/google-vertex.js +45 -18
- package/dist/providers/google-vertex.js.map +1 -1
- package/dist/providers/google.d.ts +2 -2
- package/dist/providers/google.d.ts.map +1 -1
- package/dist/providers/google.js +9 -6
- package/dist/providers/google.js.map +1 -1
- package/dist/providers/images/openrouter.d.ts +3 -0
- package/dist/providers/images/openrouter.d.ts.map +1 -0
- package/dist/providers/images/openrouter.js +128 -0
- package/dist/providers/images/openrouter.js.map +1 -0
- package/dist/providers/images/register-builtins.d.ts +4 -0
- package/dist/providers/images/register-builtins.d.ts.map +1 -0
- package/dist/providers/images/register-builtins.js +34 -0
- package/dist/providers/images/register-builtins.js.map +1 -0
- package/dist/providers/mistral.d.ts +4 -1
- package/dist/providers/mistral.d.ts.map +1 -1
- package/dist/providers/mistral.js +43 -10
- package/dist/providers/mistral.js.map +1 -1
- package/dist/providers/openai-codex-responses.d.ts +22 -1
- package/dist/providers/openai-codex-responses.d.ts.map +1 -1
- package/dist/providers/openai-codex-responses.js +542 -111
- package/dist/providers/openai-codex-responses.js.map +1 -1
- package/dist/providers/openai-completions.d.ts +6 -2
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +447 -229
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai-prompt-cache.d.ts +3 -0
- package/dist/providers/openai-prompt-cache.d.ts.map +1 -0
- package/dist/providers/openai-prompt-cache.js +10 -0
- package/dist/providers/openai-prompt-cache.js.map +1 -0
- package/dist/providers/openai-responses-shared.d.ts +3 -2
- package/dist/providers/openai-responses-shared.d.ts.map +1 -1
- package/dist/providers/openai-responses-shared.js +41 -15
- package/dist/providers/openai-responses-shared.js.map +1 -1
- package/dist/providers/openai-responses.d.ts +1 -1
- package/dist/providers/openai-responses.d.ts.map +1 -1
- package/dist/providers/openai-responses.js +85 -40
- package/dist/providers/openai-responses.js.map +1 -1
- package/dist/providers/register-builtins.d.ts +10 -13
- package/dist/providers/register-builtins.d.ts.map +1 -1
- package/dist/providers/register-builtins.js +13 -20
- package/dist/providers/register-builtins.js.map +1 -1
- package/dist/providers/simple-options.d.ts +2 -2
- package/dist/providers/simple-options.d.ts.map +1 -1
- package/dist/providers/simple-options.js +8 -2
- package/dist/providers/simple-options.js.map +1 -1
- package/dist/providers/transform-messages.d.ts +1 -1
- package/dist/providers/transform-messages.d.ts.map +1 -1
- package/dist/providers/transform-messages.js +63 -34
- package/dist/providers/transform-messages.js.map +1 -1
- package/dist/session-resources.d.ts +4 -0
- package/dist/session-resources.d.ts.map +1 -0
- package/dist/session-resources.js +22 -0
- package/dist/session-resources.js.map +1 -0
- package/dist/stream.d.ts +3 -3
- package/dist/stream.d.ts.map +1 -1
- package/dist/stream.js +14 -2
- package/dist/stream.js.map +1 -1
- package/dist/types.d.ts +177 -14
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/abort-signals.d.ts +6 -0
- package/dist/utils/abort-signals.d.ts.map +1 -0
- package/dist/utils/abort-signals.js +34 -0
- package/dist/utils/abort-signals.js.map +1 -0
- package/dist/utils/diagnostics.d.ts +19 -0
- package/dist/utils/diagnostics.d.ts.map +1 -0
- package/dist/utils/diagnostics.js +25 -0
- package/dist/utils/diagnostics.js.map +1 -0
- package/dist/utils/event-stream.d.ts +3 -3
- package/dist/utils/event-stream.d.ts.map +1 -1
- package/dist/utils/event-stream.js +2 -2
- package/dist/utils/event-stream.js.map +1 -1
- package/dist/utils/headers.d.ts +2 -0
- package/dist/utils/headers.d.ts.map +1 -0
- package/dist/utils/headers.js +8 -0
- package/dist/utils/headers.js.map +1 -0
- package/dist/utils/json-parse.d.ts +8 -1
- package/dist/utils/json-parse.d.ts.map +1 -1
- package/dist/utils/json-parse.js +89 -5
- package/dist/utils/json-parse.js.map +1 -1
- package/dist/utils/node-http-proxy.d.ts +10 -0
- package/dist/utils/node-http-proxy.d.ts.map +1 -0
- package/dist/utils/node-http-proxy.js +97 -0
- package/dist/utils/node-http-proxy.js.map +1 -0
- package/dist/utils/oauth/anthropic.d.ts +1 -1
- package/dist/utils/oauth/anthropic.d.ts.map +1 -1
- package/dist/utils/oauth/anthropic.js +1 -1
- package/dist/utils/oauth/anthropic.js.map +1 -1
- package/dist/utils/oauth/device-code.d.ts +21 -0
- package/dist/utils/oauth/device-code.d.ts.map +1 -0
- package/dist/utils/oauth/device-code.js +56 -0
- package/dist/utils/oauth/device-code.js.map +1 -0
- package/dist/utils/oauth/github-copilot.d.ts +3 -3
- package/dist/utils/oauth/github-copilot.d.ts.map +1 -1
- package/dist/utils/oauth/github-copilot.js +58 -70
- package/dist/utils/oauth/github-copilot.js.map +1 -1
- package/dist/utils/oauth/index.d.ts +8 -11
- package/dist/utils/oauth/index.d.ts.map +1 -1
- package/dist/utils/oauth/index.js +2 -11
- package/dist/utils/oauth/index.js.map +1 -1
- package/dist/utils/oauth/openai-codex.d.ts +11 -2
- package/dist/utils/oauth/openai-codex.d.ts.map +1 -1
- package/dist/utils/oauth/openai-codex.js +187 -73
- package/dist/utils/oauth/openai-codex.js.map +1 -1
- package/dist/utils/oauth/types.d.ts +18 -1
- package/dist/utils/oauth/types.d.ts.map +1 -1
- package/dist/utils/oauth/types.js.map +1 -1
- package/dist/utils/overflow.d.ts +7 -3
- package/dist/utils/overflow.d.ts.map +1 -1
- package/dist/utils/overflow.js +25 -3
- package/dist/utils/overflow.js.map +1 -1
- package/dist/utils/typebox-helpers.d.ts +1 -1
- package/dist/utils/typebox-helpers.d.ts.map +1 -1
- package/dist/utils/typebox-helpers.js +1 -1
- package/dist/utils/typebox-helpers.js.map +1 -1
- package/dist/utils/validation.d.ts +1 -1
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +242 -41
- package/dist/utils/validation.js.map +1 -1
- package/package.json +14 -15
- package/dist/providers/google-gemini-cli.d.ts +0 -74
- package/dist/providers/google-gemini-cli.d.ts.map +0 -1
- package/dist/providers/google-gemini-cli.js +0 -776
- package/dist/providers/google-gemini-cli.js.map +0 -1
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
|
-
import {
|
|
3
|
-
import { calculateCost, supportsXhigh } from "../models.js";
|
|
2
|
+
import { calculateCost, clampThinkingLevel } from "../models.js";
|
|
4
3
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
|
4
|
+
import { headersToRecord } from "../utils/headers.js";
|
|
5
5
|
import { parseStreamingJson } from "../utils/json-parse.js";
|
|
6
6
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
|
7
|
+
import { isCloudflareProvider, resolveCloudflareBaseUrl } from "./cloudflare.js";
|
|
7
8
|
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
|
|
8
|
-
import {
|
|
9
|
+
import { clampOpenAIPromptCacheKey } from "./openai-prompt-cache.js";
|
|
10
|
+
import { buildBaseOptions, clampToXhigh } from "./simple-options.js";
|
|
9
11
|
import { transformMessages } from "./transform-messages.js";
|
|
10
12
|
/**
|
|
11
13
|
* Check if conversation messages contain tool calls or tool results.
|
|
@@ -25,6 +27,27 @@ function hasToolHistory(messages) {
|
|
|
25
27
|
}
|
|
26
28
|
return false;
|
|
27
29
|
}
|
|
30
|
+
function isTextContentBlock(block) {
|
|
31
|
+
return block.type === "text";
|
|
32
|
+
}
|
|
33
|
+
function isThinkingContentBlock(block) {
|
|
34
|
+
return block.type === "thinking";
|
|
35
|
+
}
|
|
36
|
+
function isToolCallBlock(block) {
|
|
37
|
+
return block.type === "toolCall";
|
|
38
|
+
}
|
|
39
|
+
function isImageContentBlock(block) {
|
|
40
|
+
return block.type === "image";
|
|
41
|
+
}
|
|
42
|
+
function resolveCacheRetention(cacheRetention) {
|
|
43
|
+
if (cacheRetention) {
|
|
44
|
+
return cacheRetention;
|
|
45
|
+
}
|
|
46
|
+
if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
|
|
47
|
+
return "long";
|
|
48
|
+
}
|
|
49
|
+
return "short";
|
|
50
|
+
}
|
|
28
51
|
export const streamOpenAICompletions = (model, context, options) => {
|
|
29
52
|
const stream = new AssistantMessageEventStream();
|
|
30
53
|
(async () => {
|
|
@@ -46,47 +69,127 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
46
69
|
timestamp: Date.now(),
|
|
47
70
|
};
|
|
48
71
|
try {
|
|
49
|
-
const apiKey = options?.apiKey
|
|
50
|
-
|
|
51
|
-
|
|
72
|
+
const apiKey = options?.apiKey;
|
|
73
|
+
if (!apiKey) {
|
|
74
|
+
throw new Error(`No API key for provider: ${model.provider}`);
|
|
75
|
+
}
|
|
76
|
+
const compat = getCompat(model);
|
|
77
|
+
const cacheRetention = resolveCacheRetention(options?.cacheRetention);
|
|
78
|
+
const cacheSessionId = cacheRetention === "none" ? undefined : options?.sessionId;
|
|
79
|
+
const client = createClient(model, context, apiKey, options?.headers, cacheSessionId, compat);
|
|
80
|
+
let params = buildParams(model, context, options, compat, cacheRetention);
|
|
52
81
|
const nextParams = await options?.onPayload?.(params, model);
|
|
53
82
|
if (nextParams !== undefined) {
|
|
54
83
|
params = nextParams;
|
|
55
84
|
}
|
|
56
|
-
const
|
|
85
|
+
const requestOptions = {
|
|
86
|
+
...(options?.signal ? { signal: options.signal } : {}),
|
|
87
|
+
...(options?.timeoutMs !== undefined ? { timeout: options.timeoutMs } : {}),
|
|
88
|
+
maxRetries: options?.maxRetries ?? 0,
|
|
89
|
+
};
|
|
90
|
+
const { data: openaiStream, response } = await client.chat.completions
|
|
91
|
+
.create(params, requestOptions)
|
|
92
|
+
.withResponse();
|
|
93
|
+
await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
|
|
57
94
|
stream.push({ type: "start", partial: output });
|
|
58
|
-
let
|
|
95
|
+
let textBlock = null;
|
|
96
|
+
let thinkingBlock = null;
|
|
97
|
+
let hasFinishReason = false;
|
|
98
|
+
const toolCallBlocksByIndex = new Map();
|
|
99
|
+
const toolCallBlocksById = new Map();
|
|
59
100
|
const blocks = output.content;
|
|
60
|
-
const
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
101
|
+
const getContentIndex = (block) => blocks.indexOf(block);
|
|
102
|
+
const finishBlock = (block) => {
|
|
103
|
+
const contentIndex = getContentIndex(block);
|
|
104
|
+
if (contentIndex === -1) {
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
if (block.type === "text") {
|
|
108
|
+
stream.push({
|
|
109
|
+
type: "text_end",
|
|
110
|
+
contentIndex,
|
|
111
|
+
content: block.text,
|
|
112
|
+
partial: output,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
else if (block.type === "thinking") {
|
|
116
|
+
stream.push({
|
|
117
|
+
type: "thinking_end",
|
|
118
|
+
contentIndex,
|
|
119
|
+
content: block.thinking,
|
|
120
|
+
partial: output,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
else if (block.type === "toolCall") {
|
|
124
|
+
block.arguments = parseStreamingJson(block.partialArgs);
|
|
125
|
+
// Finalize in-place and strip the scratch buffers so replay only
|
|
126
|
+
// carries parsed arguments.
|
|
127
|
+
delete block.partialArgs;
|
|
128
|
+
delete block.streamIndex;
|
|
129
|
+
stream.push({
|
|
130
|
+
type: "toolcall_end",
|
|
131
|
+
contentIndex,
|
|
132
|
+
toolCall: block,
|
|
133
|
+
partial: output,
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
const ensureTextBlock = () => {
|
|
138
|
+
if (!textBlock) {
|
|
139
|
+
textBlock = { type: "text", text: "" };
|
|
140
|
+
blocks.push(textBlock);
|
|
141
|
+
stream.push({ type: "text_start", contentIndex: getContentIndex(textBlock), partial: output });
|
|
142
|
+
}
|
|
143
|
+
return textBlock;
|
|
144
|
+
};
|
|
145
|
+
const ensureThinkingBlock = (thinkingSignature) => {
|
|
146
|
+
if (!thinkingBlock) {
|
|
147
|
+
thinkingBlock = {
|
|
148
|
+
type: "thinking",
|
|
149
|
+
thinking: "",
|
|
150
|
+
thinkingSignature,
|
|
151
|
+
};
|
|
152
|
+
blocks.push(thinkingBlock);
|
|
153
|
+
stream.push({ type: "thinking_start", contentIndex: getContentIndex(thinkingBlock), partial: output });
|
|
154
|
+
}
|
|
155
|
+
return thinkingBlock;
|
|
156
|
+
};
|
|
157
|
+
const ensureToolCallBlock = (toolCall) => {
|
|
158
|
+
const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
|
|
159
|
+
let block = streamIndex !== undefined ? toolCallBlocksByIndex.get(streamIndex) : undefined;
|
|
160
|
+
if (!block && toolCall.id) {
|
|
161
|
+
block = toolCallBlocksById.get(toolCall.id);
|
|
162
|
+
}
|
|
163
|
+
if (!block) {
|
|
164
|
+
block = {
|
|
165
|
+
type: "toolCall",
|
|
166
|
+
id: toolCall.id || "",
|
|
167
|
+
name: toolCall.function?.name || "",
|
|
168
|
+
arguments: {},
|
|
169
|
+
partialArgs: "",
|
|
170
|
+
streamIndex,
|
|
171
|
+
};
|
|
172
|
+
if (streamIndex !== undefined) {
|
|
173
|
+
toolCallBlocksByIndex.set(streamIndex, block);
|
|
70
174
|
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
type: "thinking_end",
|
|
74
|
-
contentIndex: blockIndex(),
|
|
75
|
-
content: block.thinking,
|
|
76
|
-
partial: output,
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
else if (block.type === "toolCall") {
|
|
80
|
-
block.arguments = parseStreamingJson(block.partialArgs);
|
|
81
|
-
delete block.partialArgs;
|
|
82
|
-
stream.push({
|
|
83
|
-
type: "toolcall_end",
|
|
84
|
-
contentIndex: blockIndex(),
|
|
85
|
-
toolCall: block,
|
|
86
|
-
partial: output,
|
|
87
|
-
});
|
|
175
|
+
if (toolCall.id) {
|
|
176
|
+
toolCallBlocksById.set(toolCall.id, block);
|
|
88
177
|
}
|
|
178
|
+
blocks.push(block);
|
|
179
|
+
stream.push({
|
|
180
|
+
type: "toolcall_start",
|
|
181
|
+
contentIndex: getContentIndex(block),
|
|
182
|
+
partial: output,
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
if (streamIndex !== undefined && block.streamIndex === undefined) {
|
|
186
|
+
block.streamIndex = streamIndex;
|
|
187
|
+
toolCallBlocksByIndex.set(streamIndex, block);
|
|
188
|
+
}
|
|
189
|
+
if (toolCall.id) {
|
|
190
|
+
toolCallBlocksById.set(toolCall.id, block);
|
|
89
191
|
}
|
|
192
|
+
return block;
|
|
90
193
|
};
|
|
91
194
|
for await (const chunk of openaiStream) {
|
|
92
195
|
if (!chunk || typeof chunk !== "object")
|
|
@@ -94,6 +197,9 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
94
197
|
// OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,
|
|
95
198
|
// and each chunk in a streamed completion carries the same id.
|
|
96
199
|
output.responseId ||= chunk.id;
|
|
200
|
+
if (typeof chunk.model === "string" && chunk.model.length > 0 && chunk.model !== model.id) {
|
|
201
|
+
output.responseModel ||= chunk.model;
|
|
202
|
+
}
|
|
97
203
|
if (chunk.usage) {
|
|
98
204
|
output.usage = parseChunkUsage(chunk.usage, model);
|
|
99
205
|
}
|
|
@@ -111,60 +217,46 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
111
217
|
if (finishReasonResult.errorMessage) {
|
|
112
218
|
output.errorMessage = finishReasonResult.errorMessage;
|
|
113
219
|
}
|
|
220
|
+
hasFinishReason = true;
|
|
114
221
|
}
|
|
115
222
|
if (choice.delta) {
|
|
116
223
|
if (choice.delta.content !== null &&
|
|
117
224
|
choice.delta.content !== undefined &&
|
|
118
225
|
choice.delta.content.length > 0) {
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
stream.push({
|
|
128
|
-
type: "text_delta",
|
|
129
|
-
contentIndex: blockIndex(),
|
|
130
|
-
delta: choice.delta.content,
|
|
131
|
-
partial: output,
|
|
132
|
-
});
|
|
133
|
-
}
|
|
226
|
+
const block = ensureTextBlock();
|
|
227
|
+
block.text += choice.delta.content;
|
|
228
|
+
stream.push({
|
|
229
|
+
type: "text_delta",
|
|
230
|
+
contentIndex: getContentIndex(block),
|
|
231
|
+
delta: choice.delta.content,
|
|
232
|
+
partial: output,
|
|
233
|
+
});
|
|
134
234
|
}
|
|
135
235
|
// Some endpoints return reasoning in reasoning_content (llama.cpp),
|
|
136
236
|
// or reasoning (other openai compatible endpoints)
|
|
137
237
|
// Use the first non-empty reasoning field to avoid duplication
|
|
138
238
|
// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
|
|
139
239
|
const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
240
|
+
const deltaFields = choice.delta;
|
|
140
241
|
let foundReasoningField = null;
|
|
141
242
|
for (const field of reasoningFields) {
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
foundReasoningField = field;
|
|
147
|
-
break;
|
|
148
|
-
}
|
|
243
|
+
const value = deltaFields[field];
|
|
244
|
+
if (typeof value === "string" && value.length > 0) {
|
|
245
|
+
foundReasoningField = field;
|
|
246
|
+
break;
|
|
149
247
|
}
|
|
150
248
|
}
|
|
151
249
|
if (foundReasoningField) {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
output.content.push(currentBlock);
|
|
160
|
-
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
|
161
|
-
}
|
|
162
|
-
if (currentBlock.type === "thinking") {
|
|
163
|
-
const delta = choice.delta[foundReasoningField];
|
|
164
|
-
currentBlock.thinking += delta;
|
|
250
|
+
const delta = deltaFields[foundReasoningField];
|
|
251
|
+
if (typeof delta === "string" && delta.length > 0) {
|
|
252
|
+
const thinkingSignature = model.provider === "opencode-go" && foundReasoningField === "reasoning"
|
|
253
|
+
? "reasoning_content"
|
|
254
|
+
: foundReasoningField;
|
|
255
|
+
const block = ensureThinkingBlock(thinkingSignature);
|
|
256
|
+
block.thinking += delta;
|
|
165
257
|
stream.push({
|
|
166
258
|
type: "thinking_delta",
|
|
167
|
-
contentIndex:
|
|
259
|
+
contentIndex: getContentIndex(block),
|
|
168
260
|
delta,
|
|
169
261
|
partial: output,
|
|
170
262
|
});
|
|
@@ -172,38 +264,26 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
172
264
|
}
|
|
173
265
|
if (choice?.delta?.tool_calls) {
|
|
174
266
|
for (const toolCall of choice.delta.tool_calls) {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
currentBlock = {
|
|
180
|
-
type: "toolCall",
|
|
181
|
-
id: toolCall.id || "",
|
|
182
|
-
name: toolCall.function?.name || "",
|
|
183
|
-
arguments: {},
|
|
184
|
-
partialArgs: "",
|
|
185
|
-
};
|
|
186
|
-
output.content.push(currentBlock);
|
|
187
|
-
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
|
267
|
+
const block = ensureToolCallBlock(toolCall);
|
|
268
|
+
if (!block.id && toolCall.id) {
|
|
269
|
+
block.id = toolCall.id;
|
|
270
|
+
toolCallBlocksById.set(toolCall.id, block);
|
|
188
271
|
}
|
|
189
|
-
if (
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
currentBlock.partialArgs += toolCall.function.arguments;
|
|
198
|
-
currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
|
|
199
|
-
}
|
|
200
|
-
stream.push({
|
|
201
|
-
type: "toolcall_delta",
|
|
202
|
-
contentIndex: blockIndex(),
|
|
203
|
-
delta,
|
|
204
|
-
partial: output,
|
|
205
|
-
});
|
|
272
|
+
if (!block.name && toolCall.function?.name) {
|
|
273
|
+
block.name = toolCall.function.name;
|
|
274
|
+
}
|
|
275
|
+
let delta = "";
|
|
276
|
+
if (toolCall.function?.arguments) {
|
|
277
|
+
delta = toolCall.function.arguments;
|
|
278
|
+
block.partialArgs = (block.partialArgs ?? "") + toolCall.function.arguments;
|
|
279
|
+
block.arguments = parseStreamingJson(block.partialArgs);
|
|
206
280
|
}
|
|
281
|
+
stream.push({
|
|
282
|
+
type: "toolcall_delta",
|
|
283
|
+
contentIndex: getContentIndex(block),
|
|
284
|
+
delta,
|
|
285
|
+
partial: output,
|
|
286
|
+
});
|
|
207
287
|
}
|
|
208
288
|
}
|
|
209
289
|
const reasoningDetails = choice.delta.reasoning_details;
|
|
@@ -219,7 +299,9 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
219
299
|
}
|
|
220
300
|
}
|
|
221
301
|
}
|
|
222
|
-
|
|
302
|
+
for (const block of blocks) {
|
|
303
|
+
finishBlock(block);
|
|
304
|
+
}
|
|
223
305
|
if (options?.signal?.aborted) {
|
|
224
306
|
throw new Error("Request was aborted");
|
|
225
307
|
}
|
|
@@ -229,12 +311,19 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
229
311
|
if (output.stopReason === "error") {
|
|
230
312
|
throw new Error(output.errorMessage || "Provider returned an error stop reason");
|
|
231
313
|
}
|
|
314
|
+
if (!hasFinishReason) {
|
|
315
|
+
throw new Error("Stream ended without finish_reason");
|
|
316
|
+
}
|
|
232
317
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
233
318
|
stream.end();
|
|
234
319
|
}
|
|
235
320
|
catch (error) {
|
|
236
|
-
for (const block of output.content)
|
|
321
|
+
for (const block of output.content) {
|
|
237
322
|
delete block.index;
|
|
323
|
+
// Streaming scratch buffers are only used during parsing; never persist them.
|
|
324
|
+
delete block.partialArgs;
|
|
325
|
+
delete block.streamIndex;
|
|
326
|
+
}
|
|
238
327
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
239
328
|
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
|
240
329
|
// Some providers via OpenRouter give additional information in this field.
|
|
@@ -248,12 +337,14 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
248
337
|
return stream;
|
|
249
338
|
};
|
|
250
339
|
export const streamSimpleOpenAICompletions = (model, context, options) => {
|
|
251
|
-
const apiKey = options?.apiKey
|
|
340
|
+
const apiKey = options?.apiKey;
|
|
252
341
|
if (!apiKey) {
|
|
253
342
|
throw new Error(`No API key for provider: ${model.provider}`);
|
|
254
343
|
}
|
|
255
344
|
const base = buildBaseOptions(model, options, apiKey);
|
|
256
|
-
const
|
|
345
|
+
const rawLevel = options?.reasoning ? clampThinkingLevel(model, options.reasoning) : undefined;
|
|
346
|
+
const clampedReasoning = rawLevel === "off" ? rawLevel : clampToXhigh(rawLevel);
|
|
347
|
+
const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
|
|
257
348
|
const toolChoice = options?.toolChoice;
|
|
258
349
|
return streamOpenAICompletions(model, context, {
|
|
259
350
|
...base,
|
|
@@ -261,13 +352,7 @@ export const streamSimpleOpenAICompletions = (model, context, options) => {
|
|
|
261
352
|
toolChoice,
|
|
262
353
|
});
|
|
263
354
|
};
|
|
264
|
-
function createClient(model, context, apiKey, optionsHeaders) {
|
|
265
|
-
if (!apiKey) {
|
|
266
|
-
if (!process.env.OPENAI_API_KEY) {
|
|
267
|
-
throw new Error("OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.");
|
|
268
|
-
}
|
|
269
|
-
apiKey = process.env.OPENAI_API_KEY;
|
|
270
|
-
}
|
|
355
|
+
function createClient(model, context, apiKey, optionsHeaders, sessionId, compat = getCompat(model)) {
|
|
271
356
|
const headers = { ...model.headers };
|
|
272
357
|
if (model.provider === "github-copilot") {
|
|
273
358
|
const hasImages = hasCopilotVisionInput(context.messages);
|
|
@@ -277,25 +362,41 @@ function createClient(model, context, apiKey, optionsHeaders) {
|
|
|
277
362
|
});
|
|
278
363
|
Object.assign(headers, copilotHeaders);
|
|
279
364
|
}
|
|
365
|
+
if (sessionId && compat.sendSessionAffinityHeaders) {
|
|
366
|
+
headers.session_id = sessionId;
|
|
367
|
+
headers["x-client-request-id"] = sessionId;
|
|
368
|
+
headers["x-session-affinity"] = sessionId;
|
|
369
|
+
}
|
|
280
370
|
// Merge options headers last so they can override defaults
|
|
281
371
|
if (optionsHeaders) {
|
|
282
372
|
Object.assign(headers, optionsHeaders);
|
|
283
373
|
}
|
|
374
|
+
const defaultHeaders = model.provider === "cloudflare-ai-gateway"
|
|
375
|
+
? {
|
|
376
|
+
...headers,
|
|
377
|
+
Authorization: headers.Authorization ?? null,
|
|
378
|
+
"cf-aig-authorization": `Bearer ${apiKey}`,
|
|
379
|
+
}
|
|
380
|
+
: headers;
|
|
284
381
|
return new OpenAI({
|
|
285
382
|
apiKey,
|
|
286
|
-
baseURL: model.baseUrl,
|
|
383
|
+
baseURL: isCloudflareProvider(model.provider) ? resolveCloudflareBaseUrl(model) : model.baseUrl,
|
|
287
384
|
dangerouslyAllowBrowser: true,
|
|
288
|
-
defaultHeaders
|
|
385
|
+
defaultHeaders,
|
|
289
386
|
});
|
|
290
387
|
}
|
|
291
|
-
function buildParams(model, context, options) {
|
|
292
|
-
const compat = getCompat(model);
|
|
388
|
+
function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
|
|
293
389
|
const messages = convertMessages(model, context, compat);
|
|
294
|
-
|
|
390
|
+
const cacheControl = getCompatCacheControl(compat, cacheRetention);
|
|
295
391
|
const params = {
|
|
296
392
|
model: model.id,
|
|
297
393
|
messages,
|
|
298
394
|
stream: true,
|
|
395
|
+
prompt_cache_key: (model.baseUrl.includes("api.openai.com") && cacheRetention !== "none") ||
|
|
396
|
+
(cacheRetention === "long" && compat.supportsLongCacheRetention)
|
|
397
|
+
? clampOpenAIPromptCacheKey(options?.sessionId)
|
|
398
|
+
: undefined,
|
|
399
|
+
prompt_cache_retention: cacheRetention === "long" && compat.supportsLongCacheRetention ? "24h" : undefined,
|
|
299
400
|
};
|
|
300
401
|
if (compat.supportsUsageInStreaming !== false) {
|
|
301
402
|
params.stream_options = { include_usage: true };
|
|
@@ -314,7 +415,7 @@ function buildParams(model, context, options) {
|
|
|
314
415
|
if (options?.temperature !== undefined) {
|
|
315
416
|
params.temperature = options.temperature;
|
|
316
417
|
}
|
|
317
|
-
if (context.tools) {
|
|
418
|
+
if (context.tools && context.tools.length > 0) {
|
|
318
419
|
params.tools = convertTools(context.tools, compat);
|
|
319
420
|
if (compat.zaiToolStream) {
|
|
320
421
|
params.tool_stream = true;
|
|
@@ -324,36 +425,78 @@ function buildParams(model, context, options) {
|
|
|
324
425
|
// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
|
|
325
426
|
params.tools = [];
|
|
326
427
|
}
|
|
428
|
+
if (cacheControl) {
|
|
429
|
+
applyAnthropicCacheControl(messages, params.tools, cacheControl);
|
|
430
|
+
}
|
|
327
431
|
if (options?.toolChoice) {
|
|
328
432
|
params.tool_choice = options.toolChoice;
|
|
329
433
|
}
|
|
330
434
|
if (compat.thinkingFormat === "zai" && model.reasoning) {
|
|
331
|
-
|
|
435
|
+
const zaiParams = params;
|
|
436
|
+
zaiParams.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
|
|
332
437
|
}
|
|
333
438
|
else if (compat.thinkingFormat === "qwen" && model.reasoning) {
|
|
334
439
|
params.enable_thinking = !!options?.reasoningEffort;
|
|
335
440
|
}
|
|
336
441
|
else if (compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
|
|
337
|
-
params.chat_template_kwargs = {
|
|
442
|
+
params.chat_template_kwargs = {
|
|
443
|
+
enable_thinking: !!options?.reasoningEffort,
|
|
444
|
+
preserve_thinking: true,
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
else if (compat.thinkingFormat === "deepseek" && model.reasoning) {
|
|
448
|
+
params.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
|
|
449
|
+
if (options?.reasoningEffort && compat.supportsReasoningEffort) {
|
|
450
|
+
params.reasoning_effort =
|
|
451
|
+
model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
|
|
452
|
+
}
|
|
338
453
|
}
|
|
339
454
|
else if (compat.thinkingFormat === "openrouter" && model.reasoning) {
|
|
340
455
|
// OpenRouter normalizes reasoning across providers via a nested reasoning object.
|
|
341
456
|
const openRouterParams = params;
|
|
342
457
|
if (options?.reasoningEffort) {
|
|
343
458
|
openRouterParams.reasoning = {
|
|
344
|
-
effort:
|
|
459
|
+
effort: model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort,
|
|
345
460
|
};
|
|
346
461
|
}
|
|
347
|
-
else {
|
|
348
|
-
openRouterParams.reasoning = { effort: "none" };
|
|
462
|
+
else if (model.thinkingLevelMap?.off !== null) {
|
|
463
|
+
openRouterParams.reasoning = { effort: model.thinkingLevelMap?.off ?? "none" };
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
else if (compat.thinkingFormat === "ant-ling" && model.reasoning && options?.reasoningEffort) {
|
|
467
|
+
const effort = model.thinkingLevelMap?.[options.reasoningEffort];
|
|
468
|
+
if (typeof effort === "string") {
|
|
469
|
+
params.reasoning = { effort };
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
else if (compat.thinkingFormat === "together" && model.reasoning) {
|
|
473
|
+
const togetherParams = params;
|
|
474
|
+
togetherParams.reasoning = { enabled: !!options?.reasoningEffort };
|
|
475
|
+
if (options?.reasoningEffort && compat.supportsReasoningEffort) {
|
|
476
|
+
togetherParams.reasoning_effort = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
else if (compat.thinkingFormat === "string-thinking" && model.reasoning) {
|
|
480
|
+
const stringThinkingParams = params;
|
|
481
|
+
if (options?.reasoningEffort) {
|
|
482
|
+
stringThinkingParams.thinking = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
|
|
483
|
+
}
|
|
484
|
+
else if (model.thinkingLevelMap?.off !== null) {
|
|
485
|
+
stringThinkingParams.thinking = model.thinkingLevelMap?.off ?? "none";
|
|
349
486
|
}
|
|
350
487
|
}
|
|
351
488
|
else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
|
|
352
489
|
// OpenAI-style reasoning_effort
|
|
353
|
-
params.reasoning_effort =
|
|
490
|
+
params.reasoning_effort = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
|
|
491
|
+
}
|
|
492
|
+
else if (!options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
|
|
493
|
+
const offValue = model.thinkingLevelMap?.off;
|
|
494
|
+
if (typeof offValue === "string") {
|
|
495
|
+
params.reasoning_effort = offValue;
|
|
496
|
+
}
|
|
354
497
|
}
|
|
355
498
|
// OpenRouter provider routing preferences
|
|
356
|
-
if (model.
|
|
499
|
+
if (model.compat?.openRouterRouting) {
|
|
357
500
|
params.provider = model.compat.openRouterRouting;
|
|
358
501
|
}
|
|
359
502
|
// Vercel AI Gateway provider routing preferences
|
|
@@ -370,37 +513,80 @@ function buildParams(model, context, options) {
|
|
|
370
513
|
}
|
|
371
514
|
return params;
|
|
372
515
|
}
|
|
373
|
-
function
|
|
374
|
-
|
|
516
|
+
function getCompatCacheControl(compat, cacheRetention) {
|
|
517
|
+
if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
|
|
518
|
+
return undefined;
|
|
519
|
+
}
|
|
520
|
+
const ttl = cacheRetention === "long" && compat.supportsLongCacheRetention ? "1h" : undefined;
|
|
521
|
+
return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
|
|
375
522
|
}
|
|
376
|
-
function
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
if (
|
|
384
|
-
|
|
385
|
-
const content = msg.content;
|
|
386
|
-
if (typeof content === "string") {
|
|
387
|
-
msg.content = [
|
|
388
|
-
Object.assign({ type: "text", text: content }, { cache_control: { type: "ephemeral" } }),
|
|
389
|
-
];
|
|
523
|
+
function applyAnthropicCacheControl(messages, tools, cacheControl) {
|
|
524
|
+
addCacheControlToSystemPrompt(messages, cacheControl);
|
|
525
|
+
addCacheControlToLastTool(tools, cacheControl);
|
|
526
|
+
addCacheControlToLastConversationMessage(messages, cacheControl);
|
|
527
|
+
}
|
|
528
|
+
function addCacheControlToSystemPrompt(messages, cacheControl) {
|
|
529
|
+
for (const message of messages) {
|
|
530
|
+
if (message.role === "system" || message.role === "developer") {
|
|
531
|
+
addCacheControlToInstructionMessage(message, cacheControl);
|
|
390
532
|
return;
|
|
391
533
|
}
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
function addCacheControlToLastConversationMessage(messages, cacheControl) {
|
|
537
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
538
|
+
const message = messages[i];
|
|
539
|
+
if (message.role === "user" || message.role === "assistant") {
|
|
540
|
+
if (addCacheControlToMessage(message, cacheControl)) {
|
|
399
541
|
return;
|
|
400
542
|
}
|
|
401
543
|
}
|
|
402
544
|
}
|
|
403
545
|
}
|
|
546
|
+
function addCacheControlToLastTool(tools, cacheControl) {
|
|
547
|
+
if (!tools || tools.length === 0) {
|
|
548
|
+
return;
|
|
549
|
+
}
|
|
550
|
+
const lastTool = tools[tools.length - 1];
|
|
551
|
+
lastTool.cache_control = cacheControl;
|
|
552
|
+
}
|
|
553
|
+
function addCacheControlToInstructionMessage(message, cacheControl) {
|
|
554
|
+
return addCacheControlToTextContent(message, cacheControl);
|
|
555
|
+
}
|
|
556
|
+
function addCacheControlToMessage(message, cacheControl) {
|
|
557
|
+
if (message.role === "user" || message.role === "assistant") {
|
|
558
|
+
return addCacheControlToTextContent(message, cacheControl);
|
|
559
|
+
}
|
|
560
|
+
return false;
|
|
561
|
+
}
|
|
562
|
+
function addCacheControlToTextContent(message, cacheControl) {
|
|
563
|
+
const content = message.content;
|
|
564
|
+
if (typeof content === "string") {
|
|
565
|
+
if (content.length === 0) {
|
|
566
|
+
return false;
|
|
567
|
+
}
|
|
568
|
+
message.content = [
|
|
569
|
+
{
|
|
570
|
+
type: "text",
|
|
571
|
+
text: content,
|
|
572
|
+
cache_control: cacheControl,
|
|
573
|
+
},
|
|
574
|
+
];
|
|
575
|
+
return true;
|
|
576
|
+
}
|
|
577
|
+
if (!Array.isArray(content)) {
|
|
578
|
+
return false;
|
|
579
|
+
}
|
|
580
|
+
for (let i = content.length - 1; i >= 0; i--) {
|
|
581
|
+
const part = content[i];
|
|
582
|
+
if (part?.type === "text") {
|
|
583
|
+
const textPart = part;
|
|
584
|
+
textPart.cache_control = cacheControl;
|
|
585
|
+
return true;
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
return false;
|
|
589
|
+
}
|
|
404
590
|
export function convertMessages(model, context, compat) {
|
|
405
591
|
const params = [];
|
|
406
592
|
const normalizeToolCallId = (id) => {
|
|
@@ -458,14 +644,11 @@ export function convertMessages(model, context, compat) {
|
|
|
458
644
|
};
|
|
459
645
|
}
|
|
460
646
|
});
|
|
461
|
-
|
|
462
|
-
? content.filter((c) => c.type !== "image_url")
|
|
463
|
-
: content;
|
|
464
|
-
if (filteredContent.length === 0)
|
|
647
|
+
if (content.length === 0)
|
|
465
648
|
continue;
|
|
466
649
|
params.push({
|
|
467
650
|
role: "user",
|
|
468
|
-
content
|
|
651
|
+
content,
|
|
469
652
|
});
|
|
470
653
|
}
|
|
471
654
|
}
|
|
@@ -475,48 +658,53 @@ export function convertMessages(model, context, compat) {
|
|
|
475
658
|
role: "assistant",
|
|
476
659
|
content: compat.requiresAssistantAfterToolResult ? "" : null,
|
|
477
660
|
};
|
|
478
|
-
const
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
// Filter out empty thinking blocks to avoid API validation errors
|
|
492
|
-
const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
|
|
493
|
-
if (compat.requiresThinkingAsText) {
|
|
494
|
-
if (nonEmptyThinkingBlocks.length > 0) {
|
|
661
|
+
const assistantTextParts = msg.content
|
|
662
|
+
.filter(isTextContentBlock)
|
|
663
|
+
.filter((block) => block.text.trim().length > 0)
|
|
664
|
+
.map((block) => ({
|
|
665
|
+
type: "text",
|
|
666
|
+
text: sanitizeSurrogates(block.text),
|
|
667
|
+
}));
|
|
668
|
+
const assistantText = assistantTextParts.map((part) => part.text).join("");
|
|
669
|
+
const nonEmptyThinkingBlocks = msg.content
|
|
670
|
+
.filter(isThinkingContentBlock)
|
|
671
|
+
.filter((block) => block.thinking.trim().length > 0);
|
|
672
|
+
if (nonEmptyThinkingBlocks.length > 0) {
|
|
673
|
+
if (compat.requiresThinkingAsText) {
|
|
495
674
|
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
|
|
496
|
-
const thinkingText = nonEmptyThinkingBlocks
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
675
|
+
const thinkingText = nonEmptyThinkingBlocks
|
|
676
|
+
.map((block) => sanitizeSurrogates(block.thinking))
|
|
677
|
+
.join("\n\n");
|
|
678
|
+
assistantMsg.content = [{ type: "text", text: thinkingText }, ...assistantTextParts];
|
|
679
|
+
}
|
|
680
|
+
else {
|
|
681
|
+
// Always send assistant content as a plain string (OpenAI Chat Completions
|
|
682
|
+
// API standard format). Sending as an array of {type:"text", text:"..."}
|
|
683
|
+
// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
|
|
684
|
+
// NVIDIA NIM) to mirror the content-block structure literally in their
|
|
685
|
+
// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
|
|
686
|
+
if (assistantText.length > 0) {
|
|
687
|
+
assistantMsg.content = assistantText;
|
|
500
688
|
}
|
|
501
|
-
|
|
502
|
-
|
|
689
|
+
// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
|
|
690
|
+
let signature = nonEmptyThinkingBlocks[0].thinkingSignature;
|
|
691
|
+
if (model.provider === "opencode-go" && signature === "reasoning") {
|
|
692
|
+
signature = "reasoning_content";
|
|
693
|
+
}
|
|
694
|
+
if (signature && signature.length > 0) {
|
|
695
|
+
assistantMsg[signature] = nonEmptyThinkingBlocks.map((block) => block.thinking).join("\n");
|
|
503
696
|
}
|
|
504
697
|
}
|
|
505
698
|
}
|
|
506
|
-
else {
|
|
507
|
-
//
|
|
508
|
-
//
|
|
509
|
-
//
|
|
510
|
-
//
|
|
511
|
-
//
|
|
512
|
-
|
|
513
|
-
const signature = thinkingBlocks.find((b) => b.thinkingSignature && b.thinkingSignature.length > 0)?.thinkingSignature;
|
|
514
|
-
if (signature) {
|
|
515
|
-
const matchingBlocks = thinkingBlocks.filter((b) => b.thinkingSignature === signature);
|
|
516
|
-
assistantMsg[signature] = matchingBlocks.map((b) => b.thinking || "").join("\n");
|
|
517
|
-
}
|
|
699
|
+
else if (assistantText.length > 0) {
|
|
700
|
+
// Always send assistant content as a plain string (OpenAI Chat Completions
|
|
701
|
+
// API standard format). Sending as an array of {type:"text", text:"..."}
|
|
702
|
+
// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
|
|
703
|
+
// NVIDIA NIM) to mirror the content-block structure literally in their
|
|
704
|
+
// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
|
|
705
|
+
assistantMsg.content = assistantText;
|
|
518
706
|
}
|
|
519
|
-
const toolCalls = msg.content.filter(
|
|
707
|
+
const toolCalls = msg.content.filter(isToolCallBlock);
|
|
520
708
|
if (toolCalls.length > 0) {
|
|
521
709
|
assistantMsg.tool_calls = toolCalls.map((tc) => ({
|
|
522
710
|
id: tc.id,
|
|
@@ -541,6 +729,11 @@ export function convertMessages(model, context, compat) {
|
|
|
541
729
|
assistantMsg.reasoning_details = reasoningDetails;
|
|
542
730
|
}
|
|
543
731
|
}
|
|
732
|
+
if (compat.requiresReasoningContentOnAssistantMessages &&
|
|
733
|
+
model.reasoning &&
|
|
734
|
+
assistantMsg.reasoning_content === undefined) {
|
|
735
|
+
assistantMsg.reasoning_content = "";
|
|
736
|
+
}
|
|
544
737
|
// Skip assistant messages that have no content and no tool calls.
|
|
545
738
|
// Some providers require "either content or tool_calls, but not none".
|
|
546
739
|
// Other providers also don't accept empty assistant messages.
|
|
@@ -549,8 +742,7 @@ export function convertMessages(model, context, compat) {
|
|
|
549
742
|
const hasContent = content !== null &&
|
|
550
743
|
content !== undefined &&
|
|
551
744
|
(typeof content === "string" ? content.length > 0 : content.length > 0);
|
|
552
|
-
|
|
553
|
-
if (!hasContent && !hasReasoning && !assistantMsg.tool_calls) {
|
|
745
|
+
if (!hasContent && !assistantMsg.tool_calls) {
|
|
554
746
|
continue;
|
|
555
747
|
}
|
|
556
748
|
params.push(assistantMsg);
|
|
@@ -562,8 +754,8 @@ export function convertMessages(model, context, compat) {
|
|
|
562
754
|
const toolMsg = transformedMessages[j];
|
|
563
755
|
// Extract text and image content
|
|
564
756
|
const textResult = toolMsg.content
|
|
565
|
-
.filter(
|
|
566
|
-
.map((
|
|
757
|
+
.filter(isTextContentBlock)
|
|
758
|
+
.map((block) => block.text)
|
|
567
759
|
.join("\n");
|
|
568
760
|
const hasImages = toolMsg.content.some((c) => c.type === "image");
|
|
569
761
|
// Always send tool result with text (or placeholder if only images)
|
|
@@ -580,7 +772,7 @@ export function convertMessages(model, context, compat) {
|
|
|
580
772
|
params.push(toolResultMsg);
|
|
581
773
|
if (hasImages && model.input.includes("image")) {
|
|
582
774
|
for (const block of toolMsg.content) {
|
|
583
|
-
if (block
|
|
775
|
+
if (isImageContentBlock(block)) {
|
|
584
776
|
imageBlocks.push({
|
|
585
777
|
type: "image_url",
|
|
586
778
|
image_url: {
|
|
@@ -634,19 +826,19 @@ function convertTools(tools, compat) {
|
|
|
634
826
|
}
|
|
635
827
|
function parseChunkUsage(rawUsage, model) {
|
|
636
828
|
const promptTokens = rawUsage.prompt_tokens || 0;
|
|
637
|
-
const
|
|
829
|
+
const cacheReadTokens = rawUsage.prompt_tokens_details?.cached_tokens ?? rawUsage.prompt_cache_hit_tokens ?? 0;
|
|
638
830
|
const cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;
|
|
639
|
-
|
|
640
|
-
//
|
|
641
|
-
// -
|
|
642
|
-
//
|
|
643
|
-
//
|
|
644
|
-
//
|
|
645
|
-
|
|
831
|
+
// Follow documented OpenAI/OpenRouter semantics: cached_tokens is cache-read
|
|
832
|
+
// tokens (hits). OpenAI does not document or emit cache_write_tokens, but
|
|
833
|
+
// OpenRouter-compatible providers can include it as a separate write count.
|
|
834
|
+
// OpenRouter's own provider/tests affirm the separate mapping:
|
|
835
|
+
// https://github.com/OpenRouterTeam/ai-sdk-provider/pull/409
|
|
836
|
+
// Do not subtract writes from cached_tokens, otherwise spec-compliant
|
|
837
|
+
// providers are under-reported. DS4 mirrors this contract too:
|
|
838
|
+
// https://github.com/antirez/ds4/pull/29
|
|
646
839
|
const input = Math.max(0, promptTokens - cacheReadTokens - cacheWriteTokens);
|
|
647
|
-
//
|
|
648
|
-
|
|
649
|
-
const outputTokens = (rawUsage.completion_tokens || 0) + reasoningTokens;
|
|
840
|
+
// OpenAI completion_tokens already includes reasoning_tokens.
|
|
841
|
+
const outputTokens = rawUsage.completion_tokens || 0;
|
|
650
842
|
const usage = {
|
|
651
843
|
input,
|
|
652
844
|
output: outputTokens,
|
|
@@ -689,47 +881,69 @@ function mapStopReason(reason) {
|
|
|
689
881
|
function detectCompat(model) {
|
|
690
882
|
const provider = model.provider;
|
|
691
883
|
const baseUrl = model.baseUrl;
|
|
692
|
-
const isZai = provider === "zai" ||
|
|
693
|
-
|
|
884
|
+
const isZai = provider === "zai" ||
|
|
885
|
+
provider === "zai-coding-cn" ||
|
|
886
|
+
baseUrl.includes("api.z.ai") ||
|
|
887
|
+
baseUrl.includes("open.bigmodel.cn");
|
|
888
|
+
const isTogether = provider === "together" || baseUrl.includes("api.together.ai") || baseUrl.includes("api.together.xyz");
|
|
889
|
+
const isMoonshot = provider === "moonshotai" || provider === "moonshotai-cn" || baseUrl.includes("api.moonshot.");
|
|
890
|
+
const isOpenRouter = provider === "openrouter" || baseUrl.includes("openrouter.ai");
|
|
891
|
+
const isCloudflareWorkersAI = provider === "cloudflare-workers-ai" || baseUrl.includes("api.cloudflare.com");
|
|
892
|
+
const isCloudflareAiGateway = provider === "cloudflare-ai-gateway" || baseUrl.includes("gateway.ai.cloudflare.com");
|
|
893
|
+
const isNvidia = provider === "nvidia" || baseUrl.includes("integrate.api.nvidia.com");
|
|
894
|
+
const isAntLing = provider === "ant-ling" || baseUrl.includes("api.ant-ling.com");
|
|
895
|
+
const isNonStandard = isNvidia ||
|
|
896
|
+
provider === "cerebras" ||
|
|
694
897
|
baseUrl.includes("cerebras.ai") ||
|
|
695
898
|
provider === "xai" ||
|
|
696
899
|
baseUrl.includes("api.x.ai") ||
|
|
900
|
+
isTogether ||
|
|
697
901
|
baseUrl.includes("chutes.ai") ||
|
|
698
902
|
baseUrl.includes("deepseek.com") ||
|
|
699
903
|
isZai ||
|
|
904
|
+
isMoonshot ||
|
|
700
905
|
provider === "opencode" ||
|
|
701
|
-
baseUrl.includes("opencode.ai")
|
|
702
|
-
|
|
906
|
+
baseUrl.includes("opencode.ai") ||
|
|
907
|
+
isCloudflareWorkersAI ||
|
|
908
|
+
isCloudflareAiGateway ||
|
|
909
|
+
isAntLing;
|
|
910
|
+
const useMaxTokens = baseUrl.includes("chutes.ai") || isMoonshot || isCloudflareAiGateway || isTogether || isNvidia || isAntLing;
|
|
703
911
|
const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
|
|
704
|
-
const
|
|
705
|
-
const
|
|
706
|
-
|
|
707
|
-
minimal: "default",
|
|
708
|
-
low: "default",
|
|
709
|
-
medium: "default",
|
|
710
|
-
high: "default",
|
|
711
|
-
xhigh: "default",
|
|
712
|
-
}
|
|
713
|
-
: {};
|
|
912
|
+
const isDeepSeek = provider === "deepseek" || baseUrl.includes("deepseek.com");
|
|
913
|
+
const isOpenRouterDeveloperRoleModel = isOpenRouter && (model.id.startsWith("anthropic/") || model.id.startsWith("openai/"));
|
|
914
|
+
const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
|
|
714
915
|
return {
|
|
715
916
|
supportsStore: !isNonStandard,
|
|
716
|
-
supportsDeveloperRole: !isNonStandard,
|
|
717
|
-
supportsReasoningEffort: !isGrok && !isZai,
|
|
718
|
-
reasoningEffortMap,
|
|
917
|
+
supportsDeveloperRole: isOpenRouterDeveloperRoleModel || (!isNonStandard && !isOpenRouter),
|
|
918
|
+
supportsReasoningEffort: !isGrok && !isZai && !isMoonshot && !isTogether && !isCloudflareAiGateway && !isNvidia && !isAntLing,
|
|
719
919
|
supportsUsageInStreaming: true,
|
|
720
920
|
maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
|
|
721
921
|
requiresToolResultName: false,
|
|
722
922
|
requiresAssistantAfterToolResult: false,
|
|
723
923
|
requiresThinkingAsText: false,
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
924
|
+
requiresReasoningContentOnAssistantMessages: isDeepSeek,
|
|
925
|
+
thinkingFormat: isDeepSeek
|
|
926
|
+
? "deepseek"
|
|
927
|
+
: isZai
|
|
928
|
+
? "zai"
|
|
929
|
+
: isTogether
|
|
930
|
+
? "together"
|
|
931
|
+
: isAntLing
|
|
932
|
+
? "ant-ling"
|
|
933
|
+
: isOpenRouter
|
|
934
|
+
? "openrouter"
|
|
935
|
+
: "openai",
|
|
729
936
|
openRouterRouting: {},
|
|
730
937
|
vercelGatewayRouting: {},
|
|
731
938
|
zaiToolStream: false,
|
|
732
|
-
supportsStrictMode:
|
|
939
|
+
supportsStrictMode: !isMoonshot && !isTogether && !isCloudflareAiGateway && !isNvidia,
|
|
940
|
+
cacheControlFormat,
|
|
941
|
+
sendSessionAffinityHeaders: false,
|
|
942
|
+
supportsLongCacheRetention: !(isTogether ||
|
|
943
|
+
isCloudflareWorkersAI ||
|
|
944
|
+
isCloudflareAiGateway ||
|
|
945
|
+
isNvidia ||
|
|
946
|
+
isAntLing),
|
|
733
947
|
};
|
|
734
948
|
}
|
|
735
949
|
/**
|
|
@@ -744,17 +958,21 @@ function getCompat(model) {
|
|
|
744
958
|
supportsStore: model.compat.supportsStore ?? detected.supportsStore,
|
|
745
959
|
supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
|
|
746
960
|
supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
|
|
747
|
-
reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
|
|
748
961
|
supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
|
|
749
962
|
maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
|
|
750
963
|
requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
|
|
751
964
|
requiresAssistantAfterToolResult: model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
|
|
752
965
|
requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
|
|
966
|
+
requiresReasoningContentOnAssistantMessages: model.compat.requiresReasoningContentOnAssistantMessages ??
|
|
967
|
+
detected.requiresReasoningContentOnAssistantMessages,
|
|
753
968
|
thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
|
|
754
969
|
openRouterRouting: model.compat.openRouterRouting ?? {},
|
|
755
970
|
vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
|
|
756
971
|
zaiToolStream: model.compat.zaiToolStream ?? detected.zaiToolStream,
|
|
757
972
|
supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
|
|
973
|
+
cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
|
|
974
|
+
sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
|
|
975
|
+
supportsLongCacheRetention: model.compat.supportsLongCacheRetention ?? detected.supportsLongCacheRetention,
|
|
758
976
|
};
|
|
759
977
|
}
|
|
760
978
|
//# sourceMappingURL=openai-completions.js.map
|