phi-code-ai 0.56.3 → 0.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +258 -73
- package/dist/api-registry.d.ts.map +1 -1
- package/dist/api-registry.js.map +1 -1
- package/dist/bedrock-provider.d.ts.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +1 -1
- package/dist/cli.js.map +1 -1
- package/dist/env-api-keys.d.ts +9 -0
- package/dist/env-api-keys.d.ts.map +1 -1
- package/dist/env-api-keys.js +96 -30
- package/dist/env-api-keys.js.map +1 -1
- package/dist/image-models.d.ts +10 -0
- package/dist/image-models.d.ts.map +1 -0
- package/dist/image-models.generated.d.ts +305 -0
- package/dist/image-models.generated.d.ts.map +1 -0
- package/dist/image-models.generated.js +307 -0
- package/dist/image-models.generated.js.map +1 -0
- package/dist/image-models.js +23 -0
- package/dist/image-models.js.map +1 -0
- package/dist/images-api-registry.d.ts +14 -0
- package/dist/images-api-registry.d.ts.map +1 -0
- package/dist/images-api-registry.js +22 -0
- package/dist/images-api-registry.js.map +1 -0
- package/dist/images.d.ts +4 -0
- package/dist/images.d.ts.map +1 -0
- package/dist/images.js +14 -0
- package/dist/images.js.map +1 -0
- package/dist/index.d.ts +20 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -9
- package/dist/index.js.map +1 -1
- package/dist/models.d.ts +3 -9
- package/dist/models.d.ts.map +1 -1
- package/dist/models.generated.d.ts +6525 -2231
- package/dist/models.generated.d.ts.map +1 -1
- package/dist/models.generated.js +8992 -5524
- package/dist/models.generated.js.map +1 -1
- package/dist/models.js +28 -12
- package/dist/models.js.map +1 -1
- package/dist/oauth.d.ts.map +1 -1
- package/dist/providers/amazon-bedrock.d.ts +23 -0
- package/dist/providers/amazon-bedrock.d.ts.map +1 -1
- package/dist/providers/amazon-bedrock.js +206 -44
- package/dist/providers/amazon-bedrock.js.map +1 -1
- package/dist/providers/anthropic.d.ts +23 -2
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +294 -63
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/providers/azure-openai-responses.d.ts.map +1 -1
- package/dist/providers/azure-openai-responses.js +47 -23
- package/dist/providers/azure-openai-responses.js.map +1 -1
- package/dist/providers/cloudflare.d.ts +13 -0
- package/dist/providers/cloudflare.d.ts.map +1 -0
- package/dist/providers/cloudflare.js +26 -0
- package/dist/providers/cloudflare.js.map +1 -0
- package/dist/providers/faux.d.ts +56 -0
- package/dist/providers/faux.d.ts.map +1 -0
- package/dist/providers/faux.js +368 -0
- package/dist/providers/faux.js.map +1 -0
- package/dist/providers/github-copilot-headers.d.ts.map +1 -1
- package/dist/providers/github-copilot-headers.js.map +1 -1
- package/dist/providers/google-shared.d.ts +7 -2
- package/dist/providers/google-shared.d.ts.map +1 -1
- package/dist/providers/google-shared.js +53 -24
- package/dist/providers/google-shared.js.map +1 -1
- package/dist/providers/google-vertex.d.ts +1 -1
- package/dist/providers/google-vertex.d.ts.map +1 -1
- package/dist/providers/google-vertex.js +87 -16
- package/dist/providers/google-vertex.js.map +1 -1
- package/dist/providers/google.d.ts +1 -1
- package/dist/providers/google.d.ts.map +1 -1
- package/dist/providers/google.js +57 -9
- package/dist/providers/google.js.map +1 -1
- package/dist/providers/images/openrouter.d.ts +3 -0
- package/dist/providers/images/openrouter.d.ts.map +1 -0
- package/dist/providers/images/openrouter.js +129 -0
- package/dist/providers/images/openrouter.js.map +1 -0
- package/dist/providers/images/register-builtins.d.ts +4 -0
- package/dist/providers/images/register-builtins.d.ts.map +1 -0
- package/dist/providers/images/register-builtins.js +34 -0
- package/dist/providers/images/register-builtins.js.map +1 -0
- package/dist/providers/mistral.d.ts +3 -0
- package/dist/providers/mistral.d.ts.map +1 -1
- package/dist/providers/mistral.js +49 -9
- package/dist/providers/mistral.js.map +1 -1
- package/dist/providers/openai-codex-responses.d.ts +21 -0
- package/dist/providers/openai-codex-responses.d.ts.map +1 -1
- package/dist/providers/openai-codex-responses.js +443 -86
- package/dist/providers/openai-codex-responses.js.map +1 -1
- package/dist/providers/openai-completions.d.ts +5 -1
- package/dist/providers/openai-completions.d.ts.map +1 -1
- package/dist/providers/openai-completions.js +460 -225
- package/dist/providers/openai-completions.js.map +1 -1
- package/dist/providers/openai-responses-shared.d.ts +1 -0
- package/dist/providers/openai-responses-shared.d.ts.map +1 -1
- package/dist/providers/openai-responses-shared.js +95 -45
- package/dist/providers/openai-responses-shared.js.map +1 -1
- package/dist/providers/openai-responses.d.ts.map +1 -1
- package/dist/providers/openai-responses.js +66 -44
- package/dist/providers/openai-responses.js.map +1 -1
- package/dist/providers/register-builtins.d.ts +27 -2
- package/dist/providers/register-builtins.d.ts.map +1 -1
- package/dist/providers/register-builtins.js +157 -52
- package/dist/providers/register-builtins.js.map +1 -1
- package/dist/providers/simple-options.d.ts.map +1 -1
- package/dist/providers/simple-options.js +5 -1
- package/dist/providers/simple-options.js.map +1 -1
- package/dist/providers/transform-messages.d.ts.map +1 -1
- package/dist/providers/transform-messages.js +63 -34
- package/dist/providers/transform-messages.js.map +1 -1
- package/dist/session-resources.d.ts +4 -0
- package/dist/session-resources.d.ts.map +1 -0
- package/dist/session-resources.js +22 -0
- package/dist/session-resources.js.map +1 -0
- package/dist/stream.d.ts.map +1 -1
- package/dist/stream.js.map +1 -1
- package/dist/types.d.ts +219 -15
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/diagnostics.d.ts +19 -0
- package/dist/utils/diagnostics.d.ts.map +1 -0
- package/dist/utils/diagnostics.js +25 -0
- package/dist/utils/diagnostics.js.map +1 -0
- package/dist/utils/event-stream.d.ts.map +1 -1
- package/dist/utils/event-stream.js +7 -3
- package/dist/utils/event-stream.js.map +1 -1
- package/dist/utils/hash.d.ts.map +1 -1
- package/dist/utils/hash.js.map +1 -1
- package/dist/utils/headers.d.ts +2 -0
- package/dist/utils/headers.d.ts.map +1 -0
- package/dist/utils/headers.js +8 -0
- package/dist/utils/headers.js.map +1 -0
- package/dist/utils/json-parse.d.ts +8 -1
- package/dist/utils/json-parse.d.ts.map +1 -1
- package/dist/utils/json-parse.js +89 -5
- package/dist/utils/json-parse.js.map +1 -1
- package/dist/utils/oauth/anthropic.d.ts +14 -6
- package/dist/utils/oauth/anthropic.d.ts.map +1 -1
- package/dist/utils/oauth/anthropic.js +288 -57
- package/dist/utils/oauth/anthropic.js.map +1 -1
- package/dist/utils/oauth/github-copilot.d.ts.map +1 -1
- package/dist/utils/oauth/github-copilot.js +23 -12
- package/dist/utils/oauth/github-copilot.js.map +1 -1
- package/dist/utils/oauth/index.d.ts +0 -4
- package/dist/utils/oauth/index.d.ts.map +1 -1
- package/dist/utils/oauth/index.js +0 -10
- package/dist/utils/oauth/index.js.map +1 -1
- package/dist/utils/oauth/oauth-page.d.ts +3 -0
- package/dist/utils/oauth/oauth-page.d.ts.map +1 -0
- package/dist/utils/oauth/oauth-page.js +105 -0
- package/dist/utils/oauth/oauth-page.js.map +1 -0
- package/dist/utils/oauth/openai-codex.d.ts.map +1 -1
- package/dist/utils/oauth/openai-codex.js +51 -46
- package/dist/utils/oauth/openai-codex.js.map +1 -1
- package/dist/utils/oauth/pkce.d.ts.map +1 -1
- package/dist/utils/oauth/pkce.js.map +1 -1
- package/dist/utils/oauth/types.d.ts +10 -0
- package/dist/utils/oauth/types.d.ts.map +1 -1
- package/dist/utils/oauth/types.js.map +1 -1
- package/dist/utils/overflow.d.ts +7 -3
- package/dist/utils/overflow.d.ts.map +1 -1
- package/dist/utils/overflow.js +46 -13
- package/dist/utils/overflow.js.map +1 -1
- package/dist/utils/sanitize-unicode.d.ts.map +1 -1
- package/dist/utils/sanitize-unicode.js.map +1 -1
- package/dist/utils/typebox-helpers.d.ts +1 -1
- package/dist/utils/typebox-helpers.d.ts.map +1 -1
- package/dist/utils/typebox-helpers.js +1 -1
- package/dist/utils/typebox-helpers.js.map +1 -1
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +247 -38
- package/dist/utils/validation.js.map +1 -1
- package/package.json +44 -14
- package/bedrock-provider.d.ts +0 -1
- package/bedrock-provider.js +0 -1
- package/dist/providers/google-gemini-cli.d.ts +0 -74
- package/dist/providers/google-gemini-cli.d.ts.map +0 -1
- package/dist/providers/google-gemini-cli.js +0 -754
- package/dist/providers/google-gemini-cli.js.map +0 -1
- package/dist/utils/oauth/google-antigravity.d.ts +0 -26
- package/dist/utils/oauth/google-antigravity.d.ts.map +0 -1
- package/dist/utils/oauth/google-antigravity.js +0 -373
- package/dist/utils/oauth/google-antigravity.js.map +0 -1
- package/dist/utils/oauth/google-gemini-cli.d.ts +0 -26
- package/dist/utils/oauth/google-gemini-cli.d.ts.map +0 -1
- package/dist/utils/oauth/google-gemini-cli.js +0 -478
- package/dist/utils/oauth/google-gemini-cli.js.map +0 -1
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
2
|
import { getEnvApiKey } from "../env-api-keys.js";
|
|
3
|
-
import { calculateCost,
|
|
3
|
+
import { calculateCost, clampThinkingLevel } from "../models.js";
|
|
4
4
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
|
5
|
+
import { headersToRecord } from "../utils/headers.js";
|
|
5
6
|
import { parseStreamingJson } from "../utils/json-parse.js";
|
|
6
7
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
|
8
|
+
import { isCloudflareProvider, resolveCloudflareBaseUrl } from "./cloudflare.js";
|
|
7
9
|
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
|
|
8
|
-
import { buildBaseOptions
|
|
10
|
+
import { buildBaseOptions } from "./simple-options.js";
|
|
9
11
|
import { transformMessages } from "./transform-messages.js";
|
|
10
12
|
/**
|
|
11
13
|
* Check if conversation messages contain tool calls or tool results.
|
|
@@ -25,6 +27,27 @@ function hasToolHistory(messages) {
|
|
|
25
27
|
}
|
|
26
28
|
return false;
|
|
27
29
|
}
|
|
30
|
+
function isTextContentBlock(block) {
|
|
31
|
+
return block.type === "text";
|
|
32
|
+
}
|
|
33
|
+
function isThinkingContentBlock(block) {
|
|
34
|
+
return block.type === "thinking";
|
|
35
|
+
}
|
|
36
|
+
function isToolCallBlock(block) {
|
|
37
|
+
return block.type === "toolCall";
|
|
38
|
+
}
|
|
39
|
+
function isImageContentBlock(block) {
|
|
40
|
+
return block.type === "image";
|
|
41
|
+
}
|
|
42
|
+
function resolveCacheRetention(cacheRetention) {
|
|
43
|
+
if (cacheRetention) {
|
|
44
|
+
return cacheRetention;
|
|
45
|
+
}
|
|
46
|
+
if (typeof process !== "undefined" && process.env.PI_CACHE_RETENTION === "long") {
|
|
47
|
+
return "long";
|
|
48
|
+
}
|
|
49
|
+
return "short";
|
|
50
|
+
}
|
|
28
51
|
export const streamOpenAICompletions = (model, context, options) => {
|
|
29
52
|
const stream = new AssistantMessageEventStream();
|
|
30
53
|
(async () => {
|
|
@@ -47,128 +70,185 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
47
70
|
};
|
|
48
71
|
try {
|
|
49
72
|
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
|
50
|
-
const
|
|
51
|
-
const
|
|
52
|
-
options?.
|
|
53
|
-
const
|
|
73
|
+
const compat = getCompat(model);
|
|
74
|
+
const cacheRetention = resolveCacheRetention(options?.cacheRetention);
|
|
75
|
+
const cacheSessionId = cacheRetention === "none" ? undefined : options?.sessionId;
|
|
76
|
+
const client = createClient(model, context, apiKey, options?.headers, cacheSessionId, compat);
|
|
77
|
+
let params = buildParams(model, context, options, compat, cacheRetention);
|
|
78
|
+
const nextParams = await options?.onPayload?.(params, model);
|
|
79
|
+
if (nextParams !== undefined) {
|
|
80
|
+
params = nextParams;
|
|
81
|
+
}
|
|
82
|
+
const requestOptions = {
|
|
83
|
+
...(options?.signal ? { signal: options.signal } : {}),
|
|
84
|
+
...(options?.timeoutMs !== undefined ? { timeout: options.timeoutMs } : {}),
|
|
85
|
+
...(options?.maxRetries !== undefined ? { maxRetries: options.maxRetries } : {}),
|
|
86
|
+
};
|
|
87
|
+
const { data: openaiStream, response } = await client.chat.completions
|
|
88
|
+
.create(params, requestOptions)
|
|
89
|
+
.withResponse();
|
|
90
|
+
await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
|
|
54
91
|
stream.push({ type: "start", partial: output });
|
|
55
|
-
let
|
|
92
|
+
let textBlock = null;
|
|
93
|
+
let thinkingBlock = null;
|
|
94
|
+
const toolCallBlocksByIndex = new Map();
|
|
95
|
+
const toolCallBlocksById = new Map();
|
|
56
96
|
const blocks = output.content;
|
|
57
|
-
const
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
97
|
+
const getContentIndex = (block) => blocks.indexOf(block);
|
|
98
|
+
const finishBlock = (block) => {
|
|
99
|
+
const contentIndex = getContentIndex(block);
|
|
100
|
+
if (contentIndex === -1) {
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
if (block.type === "text") {
|
|
104
|
+
stream.push({
|
|
105
|
+
type: "text_end",
|
|
106
|
+
contentIndex,
|
|
107
|
+
content: block.text,
|
|
108
|
+
partial: output,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
else if (block.type === "thinking") {
|
|
112
|
+
stream.push({
|
|
113
|
+
type: "thinking_end",
|
|
114
|
+
contentIndex,
|
|
115
|
+
content: block.thinking,
|
|
116
|
+
partial: output,
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
else if (block.type === "toolCall") {
|
|
120
|
+
block.arguments = parseStreamingJson(block.partialArgs);
|
|
121
|
+
// Finalize in-place and strip the scratch buffers so replay only
|
|
122
|
+
// carries parsed arguments.
|
|
123
|
+
delete block.partialArgs;
|
|
124
|
+
delete block.streamIndex;
|
|
125
|
+
stream.push({
|
|
126
|
+
type: "toolcall_end",
|
|
127
|
+
contentIndex,
|
|
128
|
+
toolCall: block,
|
|
129
|
+
partial: output,
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
};
|
|
133
|
+
const ensureTextBlock = () => {
|
|
134
|
+
if (!textBlock) {
|
|
135
|
+
textBlock = { type: "text", text: "" };
|
|
136
|
+
blocks.push(textBlock);
|
|
137
|
+
stream.push({ type: "text_start", contentIndex: getContentIndex(textBlock), partial: output });
|
|
138
|
+
}
|
|
139
|
+
return textBlock;
|
|
140
|
+
};
|
|
141
|
+
const ensureThinkingBlock = (thinkingSignature) => {
|
|
142
|
+
if (!thinkingBlock) {
|
|
143
|
+
thinkingBlock = {
|
|
144
|
+
type: "thinking",
|
|
145
|
+
thinking: "",
|
|
146
|
+
thinkingSignature,
|
|
147
|
+
};
|
|
148
|
+
blocks.push(thinkingBlock);
|
|
149
|
+
stream.push({ type: "thinking_start", contentIndex: getContentIndex(thinkingBlock), partial: output });
|
|
150
|
+
}
|
|
151
|
+
return thinkingBlock;
|
|
152
|
+
};
|
|
153
|
+
const ensureToolCallBlock = (toolCall) => {
|
|
154
|
+
const streamIndex = typeof toolCall.index === "number" ? toolCall.index : undefined;
|
|
155
|
+
let block = streamIndex !== undefined ? toolCallBlocksByIndex.get(streamIndex) : undefined;
|
|
156
|
+
if (!block && toolCall.id) {
|
|
157
|
+
block = toolCallBlocksById.get(toolCall.id);
|
|
158
|
+
}
|
|
159
|
+
if (!block) {
|
|
160
|
+
block = {
|
|
161
|
+
type: "toolCall",
|
|
162
|
+
id: toolCall.id || "",
|
|
163
|
+
name: toolCall.function?.name || "",
|
|
164
|
+
arguments: {},
|
|
165
|
+
partialArgs: "",
|
|
166
|
+
streamIndex,
|
|
167
|
+
};
|
|
168
|
+
if (streamIndex !== undefined) {
|
|
169
|
+
toolCallBlocksByIndex.set(streamIndex, block);
|
|
75
170
|
}
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
delete block.partialArgs;
|
|
79
|
-
stream.push({
|
|
80
|
-
type: "toolcall_end",
|
|
81
|
-
contentIndex: blockIndex(),
|
|
82
|
-
toolCall: block,
|
|
83
|
-
partial: output,
|
|
84
|
-
});
|
|
171
|
+
if (toolCall.id) {
|
|
172
|
+
toolCallBlocksById.set(toolCall.id, block);
|
|
85
173
|
}
|
|
174
|
+
blocks.push(block);
|
|
175
|
+
stream.push({
|
|
176
|
+
type: "toolcall_start",
|
|
177
|
+
contentIndex: getContentIndex(block),
|
|
178
|
+
partial: output,
|
|
179
|
+
});
|
|
86
180
|
}
|
|
181
|
+
if (streamIndex !== undefined && block.streamIndex === undefined) {
|
|
182
|
+
block.streamIndex = streamIndex;
|
|
183
|
+
toolCallBlocksByIndex.set(streamIndex, block);
|
|
184
|
+
}
|
|
185
|
+
if (toolCall.id) {
|
|
186
|
+
toolCallBlocksById.set(toolCall.id, block);
|
|
187
|
+
}
|
|
188
|
+
return block;
|
|
87
189
|
};
|
|
88
190
|
for await (const chunk of openaiStream) {
|
|
191
|
+
if (!chunk || typeof chunk !== "object")
|
|
192
|
+
continue;
|
|
193
|
+
// OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,
|
|
194
|
+
// and each chunk in a streamed completion carries the same id.
|
|
195
|
+
output.responseId ||= chunk.id;
|
|
196
|
+
if (typeof chunk.model === "string" && chunk.model.length > 0 && chunk.model !== model.id) {
|
|
197
|
+
output.responseModel ||= chunk.model;
|
|
198
|
+
}
|
|
89
199
|
if (chunk.usage) {
|
|
90
|
-
|
|
91
|
-
const reasoningTokens = chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
|
|
92
|
-
const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
|
|
93
|
-
const outputTokens = (chunk.usage.completion_tokens || 0) + reasoningTokens;
|
|
94
|
-
output.usage = {
|
|
95
|
-
// OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
|
|
96
|
-
input,
|
|
97
|
-
output: outputTokens,
|
|
98
|
-
cacheRead: cachedTokens,
|
|
99
|
-
cacheWrite: 0,
|
|
100
|
-
// Compute totalTokens ourselves since we add reasoning_tokens to output
|
|
101
|
-
// and some providers (e.g., Groq) don't include them in total_tokens
|
|
102
|
-
totalTokens: input + outputTokens + cachedTokens,
|
|
103
|
-
cost: {
|
|
104
|
-
input: 0,
|
|
105
|
-
output: 0,
|
|
106
|
-
cacheRead: 0,
|
|
107
|
-
cacheWrite: 0,
|
|
108
|
-
total: 0,
|
|
109
|
-
},
|
|
110
|
-
};
|
|
111
|
-
calculateCost(model, output.usage);
|
|
200
|
+
output.usage = parseChunkUsage(chunk.usage, model);
|
|
112
201
|
}
|
|
113
|
-
const choice = chunk.choices
|
|
202
|
+
const choice = Array.isArray(chunk.choices) ? chunk.choices[0] : undefined;
|
|
114
203
|
if (!choice)
|
|
115
204
|
continue;
|
|
205
|
+
// Fallback: some providers (e.g., Moonshot) return usage
|
|
206
|
+
// in choice.usage instead of the standard chunk.usage
|
|
207
|
+
if (!chunk.usage && choice.usage) {
|
|
208
|
+
output.usage = parseChunkUsage(choice.usage, model);
|
|
209
|
+
}
|
|
116
210
|
if (choice.finish_reason) {
|
|
117
|
-
|
|
211
|
+
const finishReasonResult = mapStopReason(choice.finish_reason);
|
|
212
|
+
output.stopReason = finishReasonResult.stopReason;
|
|
213
|
+
if (finishReasonResult.errorMessage) {
|
|
214
|
+
output.errorMessage = finishReasonResult.errorMessage;
|
|
215
|
+
}
|
|
118
216
|
}
|
|
119
217
|
if (choice.delta) {
|
|
120
218
|
if (choice.delta.content !== null &&
|
|
121
219
|
choice.delta.content !== undefined &&
|
|
122
220
|
choice.delta.content.length > 0) {
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
stream.push({
|
|
132
|
-
type: "text_delta",
|
|
133
|
-
contentIndex: blockIndex(),
|
|
134
|
-
delta: choice.delta.content,
|
|
135
|
-
partial: output,
|
|
136
|
-
});
|
|
137
|
-
}
|
|
221
|
+
const block = ensureTextBlock();
|
|
222
|
+
block.text += choice.delta.content;
|
|
223
|
+
stream.push({
|
|
224
|
+
type: "text_delta",
|
|
225
|
+
contentIndex: getContentIndex(block),
|
|
226
|
+
delta: choice.delta.content,
|
|
227
|
+
partial: output,
|
|
228
|
+
});
|
|
138
229
|
}
|
|
139
230
|
// Some endpoints return reasoning in reasoning_content (llama.cpp),
|
|
140
231
|
// or reasoning (other openai compatible endpoints)
|
|
141
232
|
// Use the first non-empty reasoning field to avoid duplication
|
|
142
233
|
// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
|
|
143
234
|
const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
235
|
+
const deltaFields = choice.delta;
|
|
144
236
|
let foundReasoningField = null;
|
|
145
237
|
for (const field of reasoningFields) {
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
foundReasoningField = field;
|
|
151
|
-
break;
|
|
152
|
-
}
|
|
238
|
+
const value = deltaFields[field];
|
|
239
|
+
if (typeof value === "string" && value.length > 0) {
|
|
240
|
+
foundReasoningField = field;
|
|
241
|
+
break;
|
|
153
242
|
}
|
|
154
243
|
}
|
|
155
244
|
if (foundReasoningField) {
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
thinking: "",
|
|
161
|
-
thinkingSignature: foundReasoningField,
|
|
162
|
-
};
|
|
163
|
-
output.content.push(currentBlock);
|
|
164
|
-
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
|
165
|
-
}
|
|
166
|
-
if (currentBlock.type === "thinking") {
|
|
167
|
-
const delta = choice.delta[foundReasoningField];
|
|
168
|
-
currentBlock.thinking += delta;
|
|
245
|
+
const delta = deltaFields[foundReasoningField];
|
|
246
|
+
if (typeof delta === "string" && delta.length > 0) {
|
|
247
|
+
const block = ensureThinkingBlock(foundReasoningField);
|
|
248
|
+
block.thinking += delta;
|
|
169
249
|
stream.push({
|
|
170
250
|
type: "thinking_delta",
|
|
171
|
-
contentIndex:
|
|
251
|
+
contentIndex: getContentIndex(block),
|
|
172
252
|
delta,
|
|
173
253
|
partial: output,
|
|
174
254
|
});
|
|
@@ -176,38 +256,26 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
176
256
|
}
|
|
177
257
|
if (choice?.delta?.tool_calls) {
|
|
178
258
|
for (const toolCall of choice.delta.tool_calls) {
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
currentBlock = {
|
|
184
|
-
type: "toolCall",
|
|
185
|
-
id: toolCall.id || "",
|
|
186
|
-
name: toolCall.function?.name || "",
|
|
187
|
-
arguments: {},
|
|
188
|
-
partialArgs: "",
|
|
189
|
-
};
|
|
190
|
-
output.content.push(currentBlock);
|
|
191
|
-
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
|
259
|
+
const block = ensureToolCallBlock(toolCall);
|
|
260
|
+
if (!block.id && toolCall.id) {
|
|
261
|
+
block.id = toolCall.id;
|
|
262
|
+
toolCallBlocksById.set(toolCall.id, block);
|
|
192
263
|
}
|
|
193
|
-
if (
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
currentBlock.partialArgs += toolCall.function.arguments;
|
|
202
|
-
currentBlock.arguments = parseStreamingJson(currentBlock.partialArgs);
|
|
203
|
-
}
|
|
204
|
-
stream.push({
|
|
205
|
-
type: "toolcall_delta",
|
|
206
|
-
contentIndex: blockIndex(),
|
|
207
|
-
delta,
|
|
208
|
-
partial: output,
|
|
209
|
-
});
|
|
264
|
+
if (!block.name && toolCall.function?.name) {
|
|
265
|
+
block.name = toolCall.function.name;
|
|
266
|
+
}
|
|
267
|
+
let delta = "";
|
|
268
|
+
if (toolCall.function?.arguments) {
|
|
269
|
+
delta = toolCall.function.arguments;
|
|
270
|
+
block.partialArgs = (block.partialArgs ?? "") + toolCall.function.arguments;
|
|
271
|
+
block.arguments = parseStreamingJson(block.partialArgs);
|
|
210
272
|
}
|
|
273
|
+
stream.push({
|
|
274
|
+
type: "toolcall_delta",
|
|
275
|
+
contentIndex: getContentIndex(block),
|
|
276
|
+
delta,
|
|
277
|
+
partial: output,
|
|
278
|
+
});
|
|
211
279
|
}
|
|
212
280
|
}
|
|
213
281
|
const reasoningDetails = choice.delta.reasoning_details;
|
|
@@ -223,19 +291,28 @@ export const streamOpenAICompletions = (model, context, options) => {
|
|
|
223
291
|
}
|
|
224
292
|
}
|
|
225
293
|
}
|
|
226
|
-
|
|
294
|
+
for (const block of blocks) {
|
|
295
|
+
finishBlock(block);
|
|
296
|
+
}
|
|
227
297
|
if (options?.signal?.aborted) {
|
|
228
298
|
throw new Error("Request was aborted");
|
|
229
299
|
}
|
|
230
|
-
if (output.stopReason === "aborted"
|
|
231
|
-
throw new Error("
|
|
300
|
+
if (output.stopReason === "aborted") {
|
|
301
|
+
throw new Error("Request was aborted");
|
|
302
|
+
}
|
|
303
|
+
if (output.stopReason === "error") {
|
|
304
|
+
throw new Error(output.errorMessage || "Provider returned an error stop reason");
|
|
232
305
|
}
|
|
233
306
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
234
307
|
stream.end();
|
|
235
308
|
}
|
|
236
309
|
catch (error) {
|
|
237
|
-
for (const block of output.content)
|
|
310
|
+
for (const block of output.content) {
|
|
238
311
|
delete block.index;
|
|
312
|
+
// Streaming scratch buffers are only used during parsing; never persist them.
|
|
313
|
+
delete block.partialArgs;
|
|
314
|
+
delete block.streamIndex;
|
|
315
|
+
}
|
|
239
316
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
240
317
|
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
|
|
241
318
|
// Some providers via OpenRouter give additional information in this field.
|
|
@@ -254,7 +331,8 @@ export const streamSimpleOpenAICompletions = (model, context, options) => {
|
|
|
254
331
|
throw new Error(`No API key for provider: ${model.provider}`);
|
|
255
332
|
}
|
|
256
333
|
const base = buildBaseOptions(model, options, apiKey);
|
|
257
|
-
const
|
|
334
|
+
const clampedReasoning = options?.reasoning ? clampThinkingLevel(model, options.reasoning) : undefined;
|
|
335
|
+
const reasoningEffort = clampedReasoning === "off" ? undefined : clampedReasoning;
|
|
258
336
|
const toolChoice = options?.toolChoice;
|
|
259
337
|
return streamOpenAICompletions(model, context, {
|
|
260
338
|
...base,
|
|
@@ -262,7 +340,7 @@ export const streamSimpleOpenAICompletions = (model, context, options) => {
|
|
|
262
340
|
toolChoice,
|
|
263
341
|
});
|
|
264
342
|
};
|
|
265
|
-
function createClient(model, context, apiKey, optionsHeaders) {
|
|
343
|
+
function createClient(model, context, apiKey, optionsHeaders, sessionId, compat = getCompat(model)) {
|
|
266
344
|
if (!apiKey) {
|
|
267
345
|
if (!process.env.OPENAI_API_KEY) {
|
|
268
346
|
throw new Error("OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.");
|
|
@@ -278,25 +356,41 @@ function createClient(model, context, apiKey, optionsHeaders) {
|
|
|
278
356
|
});
|
|
279
357
|
Object.assign(headers, copilotHeaders);
|
|
280
358
|
}
|
|
359
|
+
if (sessionId && compat.sendSessionAffinityHeaders) {
|
|
360
|
+
headers.session_id = sessionId;
|
|
361
|
+
headers["x-client-request-id"] = sessionId;
|
|
362
|
+
headers["x-session-affinity"] = sessionId;
|
|
363
|
+
}
|
|
281
364
|
// Merge options headers last so they can override defaults
|
|
282
365
|
if (optionsHeaders) {
|
|
283
366
|
Object.assign(headers, optionsHeaders);
|
|
284
367
|
}
|
|
368
|
+
const defaultHeaders = model.provider === "cloudflare-ai-gateway"
|
|
369
|
+
? {
|
|
370
|
+
...headers,
|
|
371
|
+
Authorization: headers.Authorization ?? null,
|
|
372
|
+
"cf-aig-authorization": `Bearer ${apiKey}`,
|
|
373
|
+
}
|
|
374
|
+
: headers;
|
|
285
375
|
return new OpenAI({
|
|
286
376
|
apiKey,
|
|
287
|
-
baseURL: model.baseUrl,
|
|
377
|
+
baseURL: isCloudflareProvider(model.provider) ? resolveCloudflareBaseUrl(model) : model.baseUrl,
|
|
288
378
|
dangerouslyAllowBrowser: true,
|
|
289
|
-
defaultHeaders
|
|
379
|
+
defaultHeaders,
|
|
290
380
|
});
|
|
291
381
|
}
|
|
292
|
-
function buildParams(model, context, options) {
|
|
293
|
-
const compat = getCompat(model);
|
|
382
|
+
function buildParams(model, context, options, compat = getCompat(model), cacheRetention = resolveCacheRetention(options?.cacheRetention)) {
|
|
294
383
|
const messages = convertMessages(model, context, compat);
|
|
295
|
-
|
|
384
|
+
const cacheControl = getCompatCacheControl(compat, cacheRetention);
|
|
296
385
|
const params = {
|
|
297
386
|
model: model.id,
|
|
298
387
|
messages,
|
|
299
388
|
stream: true,
|
|
389
|
+
prompt_cache_key: (model.baseUrl.includes("api.openai.com") && cacheRetention !== "none") ||
|
|
390
|
+
(cacheRetention === "long" && compat.supportsLongCacheRetention)
|
|
391
|
+
? options?.sessionId
|
|
392
|
+
: undefined,
|
|
393
|
+
prompt_cache_retention: cacheRetention === "long" && compat.supportsLongCacheRetention ? "24h" : undefined,
|
|
300
394
|
};
|
|
301
395
|
if (compat.supportsUsageInStreaming !== false) {
|
|
302
396
|
params.stream_options = { include_usage: true };
|
|
@@ -315,23 +409,69 @@ function buildParams(model, context, options) {
|
|
|
315
409
|
if (options?.temperature !== undefined) {
|
|
316
410
|
params.temperature = options.temperature;
|
|
317
411
|
}
|
|
318
|
-
if (context.tools) {
|
|
412
|
+
if (context.tools && context.tools.length > 0) {
|
|
319
413
|
params.tools = convertTools(context.tools, compat);
|
|
414
|
+
if (compat.zaiToolStream) {
|
|
415
|
+
params.tool_stream = true;
|
|
416
|
+
}
|
|
320
417
|
}
|
|
321
418
|
else if (hasToolHistory(context.messages)) {
|
|
322
419
|
// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
|
|
323
420
|
params.tools = [];
|
|
324
421
|
}
|
|
422
|
+
if (cacheControl) {
|
|
423
|
+
applyAnthropicCacheControl(messages, params.tools, cacheControl);
|
|
424
|
+
}
|
|
325
425
|
if (options?.toolChoice) {
|
|
326
426
|
params.tool_choice = options.toolChoice;
|
|
327
427
|
}
|
|
328
|
-
if (
|
|
329
|
-
|
|
428
|
+
if (compat.thinkingFormat === "zai" && model.reasoning) {
|
|
429
|
+
params.enable_thinking = !!options?.reasoningEffort;
|
|
430
|
+
}
|
|
431
|
+
else if (compat.thinkingFormat === "qwen" && model.reasoning) {
|
|
330
432
|
params.enable_thinking = !!options?.reasoningEffort;
|
|
331
433
|
}
|
|
434
|
+
else if (compat.thinkingFormat === "qwen-chat-template" && model.reasoning) {
|
|
435
|
+
params.chat_template_kwargs = {
|
|
436
|
+
enable_thinking: !!options?.reasoningEffort,
|
|
437
|
+
preserve_thinking: true,
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
else if (compat.thinkingFormat === "deepseek" && model.reasoning) {
|
|
441
|
+
params.thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
|
|
442
|
+
if (options?.reasoningEffort) {
|
|
443
|
+
params.reasoning_effort =
|
|
444
|
+
model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
else if (compat.thinkingFormat === "openrouter" && model.reasoning) {
|
|
448
|
+
// OpenRouter normalizes reasoning across providers via a nested reasoning object.
|
|
449
|
+
const openRouterParams = params;
|
|
450
|
+
if (options?.reasoningEffort) {
|
|
451
|
+
openRouterParams.reasoning = {
|
|
452
|
+
effort: model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort,
|
|
453
|
+
};
|
|
454
|
+
}
|
|
455
|
+
else if (model.thinkingLevelMap?.off !== null) {
|
|
456
|
+
openRouterParams.reasoning = { effort: model.thinkingLevelMap?.off ?? "none" };
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
else if (compat.thinkingFormat === "together" && model.reasoning) {
|
|
460
|
+
const togetherParams = params;
|
|
461
|
+
togetherParams.reasoning = { enabled: !!options?.reasoningEffort };
|
|
462
|
+
if (options?.reasoningEffort && compat.supportsReasoningEffort) {
|
|
463
|
+
togetherParams.reasoning_effort = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
332
466
|
else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
|
|
333
467
|
// OpenAI-style reasoning_effort
|
|
334
|
-
params.reasoning_effort =
|
|
468
|
+
params.reasoning_effort = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;
|
|
469
|
+
}
|
|
470
|
+
else if (!options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
|
|
471
|
+
const offValue = model.thinkingLevelMap?.off;
|
|
472
|
+
if (typeof offValue === "string") {
|
|
473
|
+
params.reasoning_effort = offValue;
|
|
474
|
+
}
|
|
335
475
|
}
|
|
336
476
|
// OpenRouter provider routing preferences
|
|
337
477
|
if (model.baseUrl.includes("openrouter.ai") && model.compat?.openRouterRouting) {
|
|
@@ -351,37 +491,80 @@ function buildParams(model, context, options) {
|
|
|
351
491
|
}
|
|
352
492
|
return params;
|
|
353
493
|
}
|
|
354
|
-
function
|
|
355
|
-
|
|
494
|
+
function getCompatCacheControl(compat, cacheRetention) {
|
|
495
|
+
if (compat.cacheControlFormat !== "anthropic" || cacheRetention === "none") {
|
|
496
|
+
return undefined;
|
|
497
|
+
}
|
|
498
|
+
const ttl = cacheRetention === "long" && compat.supportsLongCacheRetention ? "1h" : undefined;
|
|
499
|
+
return { type: "ephemeral", ...(ttl ? { ttl } : {}) };
|
|
356
500
|
}
|
|
357
|
-
function
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
if (
|
|
365
|
-
|
|
366
|
-
const content = msg.content;
|
|
367
|
-
if (typeof content === "string") {
|
|
368
|
-
msg.content = [
|
|
369
|
-
Object.assign({ type: "text", text: content }, { cache_control: { type: "ephemeral" } }),
|
|
370
|
-
];
|
|
501
|
+
function applyAnthropicCacheControl(messages, tools, cacheControl) {
|
|
502
|
+
addCacheControlToSystemPrompt(messages, cacheControl);
|
|
503
|
+
addCacheControlToLastTool(tools, cacheControl);
|
|
504
|
+
addCacheControlToLastConversationMessage(messages, cacheControl);
|
|
505
|
+
}
|
|
506
|
+
function addCacheControlToSystemPrompt(messages, cacheControl) {
|
|
507
|
+
for (const message of messages) {
|
|
508
|
+
if (message.role === "system" || message.role === "developer") {
|
|
509
|
+
addCacheControlToInstructionMessage(message, cacheControl);
|
|
371
510
|
return;
|
|
372
511
|
}
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
function addCacheControlToLastConversationMessage(messages, cacheControl) {
|
|
515
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
516
|
+
const message = messages[i];
|
|
517
|
+
if (message.role === "user" || message.role === "assistant") {
|
|
518
|
+
if (addCacheControlToMessage(message, cacheControl)) {
|
|
380
519
|
return;
|
|
381
520
|
}
|
|
382
521
|
}
|
|
383
522
|
}
|
|
384
523
|
}
|
|
524
|
+
function addCacheControlToLastTool(tools, cacheControl) {
|
|
525
|
+
if (!tools || tools.length === 0) {
|
|
526
|
+
return;
|
|
527
|
+
}
|
|
528
|
+
const lastTool = tools[tools.length - 1];
|
|
529
|
+
lastTool.cache_control = cacheControl;
|
|
530
|
+
}
|
|
531
|
+
function addCacheControlToInstructionMessage(message, cacheControl) {
|
|
532
|
+
return addCacheControlToTextContent(message, cacheControl);
|
|
533
|
+
}
|
|
534
|
+
function addCacheControlToMessage(message, cacheControl) {
|
|
535
|
+
if (message.role === "user" || message.role === "assistant") {
|
|
536
|
+
return addCacheControlToTextContent(message, cacheControl);
|
|
537
|
+
}
|
|
538
|
+
return false;
|
|
539
|
+
}
|
|
540
|
+
function addCacheControlToTextContent(message, cacheControl) {
|
|
541
|
+
const content = message.content;
|
|
542
|
+
if (typeof content === "string") {
|
|
543
|
+
if (content.length === 0) {
|
|
544
|
+
return false;
|
|
545
|
+
}
|
|
546
|
+
message.content = [
|
|
547
|
+
{
|
|
548
|
+
type: "text",
|
|
549
|
+
text: content,
|
|
550
|
+
cache_control: cacheControl,
|
|
551
|
+
},
|
|
552
|
+
];
|
|
553
|
+
return true;
|
|
554
|
+
}
|
|
555
|
+
if (!Array.isArray(content)) {
|
|
556
|
+
return false;
|
|
557
|
+
}
|
|
558
|
+
for (let i = content.length - 1; i >= 0; i--) {
|
|
559
|
+
const part = content[i];
|
|
560
|
+
if (part?.type === "text") {
|
|
561
|
+
const textPart = part;
|
|
562
|
+
textPart.cache_control = cacheControl;
|
|
563
|
+
return true;
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
return false;
|
|
567
|
+
}
|
|
385
568
|
export function convertMessages(model, context, compat) {
|
|
386
569
|
const params = [];
|
|
387
570
|
const normalizeToolCallId = (id) => {
|
|
@@ -439,14 +622,11 @@ export function convertMessages(model, context, compat) {
|
|
|
439
622
|
};
|
|
440
623
|
}
|
|
441
624
|
});
|
|
442
|
-
|
|
443
|
-
? content.filter((c) => c.type !== "image_url")
|
|
444
|
-
: content;
|
|
445
|
-
if (filteredContent.length === 0)
|
|
625
|
+
if (content.length === 0)
|
|
446
626
|
continue;
|
|
447
627
|
params.push({
|
|
448
628
|
role: "user",
|
|
449
|
-
content
|
|
629
|
+
content,
|
|
450
630
|
});
|
|
451
631
|
}
|
|
452
632
|
}
|
|
@@ -456,46 +636,50 @@ export function convertMessages(model, context, compat) {
|
|
|
456
636
|
role: "assistant",
|
|
457
637
|
content: compat.requiresAssistantAfterToolResult ? "" : null,
|
|
458
638
|
};
|
|
459
|
-
const
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
return { type: "text", text: sanitizeSurrogates(b.text) };
|
|
471
|
-
});
|
|
472
|
-
}
|
|
473
|
-
}
|
|
474
|
-
// Handle thinking blocks
|
|
475
|
-
const thinkingBlocks = msg.content.filter((b) => b.type === "thinking");
|
|
476
|
-
// Filter out empty thinking blocks to avoid API validation errors
|
|
477
|
-
const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
|
|
639
|
+
const assistantTextParts = msg.content
|
|
640
|
+
.filter(isTextContentBlock)
|
|
641
|
+
.filter((block) => block.text.trim().length > 0)
|
|
642
|
+
.map((block) => ({
|
|
643
|
+
type: "text",
|
|
644
|
+
text: sanitizeSurrogates(block.text),
|
|
645
|
+
}));
|
|
646
|
+
const assistantText = assistantTextParts.map((part) => part.text).join("");
|
|
647
|
+
const nonEmptyThinkingBlocks = msg.content
|
|
648
|
+
.filter(isThinkingContentBlock)
|
|
649
|
+
.filter((block) => block.thinking.trim().length > 0);
|
|
478
650
|
if (nonEmptyThinkingBlocks.length > 0) {
|
|
479
651
|
if (compat.requiresThinkingAsText) {
|
|
480
652
|
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
|
|
481
|
-
const thinkingText = nonEmptyThinkingBlocks
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
}
|
|
486
|
-
else {
|
|
487
|
-
assistantMsg.content = [{ type: "text", text: thinkingText }];
|
|
488
|
-
}
|
|
653
|
+
const thinkingText = nonEmptyThinkingBlocks
|
|
654
|
+
.map((block) => sanitizeSurrogates(block.thinking))
|
|
655
|
+
.join("\n\n");
|
|
656
|
+
assistantMsg.content = [{ type: "text", text: thinkingText }, ...assistantTextParts];
|
|
489
657
|
}
|
|
490
658
|
else {
|
|
659
|
+
// Always send assistant content as a plain string (OpenAI Chat Completions
|
|
660
|
+
// API standard format). Sending as an array of {type:"text", text:"..."}
|
|
661
|
+
// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
|
|
662
|
+
// NVIDIA NIM) to mirror the content-block structure literally in their
|
|
663
|
+
// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
|
|
664
|
+
if (assistantText.length > 0) {
|
|
665
|
+
assistantMsg.content = assistantText;
|
|
666
|
+
}
|
|
491
667
|
// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
|
|
492
668
|
const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
|
|
493
669
|
if (signature && signature.length > 0) {
|
|
494
|
-
assistantMsg[signature] = nonEmptyThinkingBlocks.map((
|
|
670
|
+
assistantMsg[signature] = nonEmptyThinkingBlocks.map((block) => block.thinking).join("\n");
|
|
495
671
|
}
|
|
496
672
|
}
|
|
497
673
|
}
|
|
498
|
-
|
|
674
|
+
else if (assistantText.length > 0) {
|
|
675
|
+
// Always send assistant content as a plain string (OpenAI Chat Completions
|
|
676
|
+
// API standard format). Sending as an array of {type:"text", text:"..."}
|
|
677
|
+
// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
|
|
678
|
+
// NVIDIA NIM) to mirror the content-block structure literally in their
|
|
679
|
+
// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
|
|
680
|
+
assistantMsg.content = assistantText;
|
|
681
|
+
}
|
|
682
|
+
const toolCalls = msg.content.filter(isToolCallBlock);
|
|
499
683
|
if (toolCalls.length > 0) {
|
|
500
684
|
assistantMsg.tool_calls = toolCalls.map((tc) => ({
|
|
501
685
|
id: tc.id,
|
|
@@ -520,6 +704,11 @@ export function convertMessages(model, context, compat) {
|
|
|
520
704
|
assistantMsg.reasoning_details = reasoningDetails;
|
|
521
705
|
}
|
|
522
706
|
}
|
|
707
|
+
if (compat.requiresReasoningContentOnAssistantMessages &&
|
|
708
|
+
model.reasoning &&
|
|
709
|
+
assistantMsg.reasoning_content === undefined) {
|
|
710
|
+
assistantMsg.reasoning_content = "";
|
|
711
|
+
}
|
|
523
712
|
// Skip assistant messages that have no content and no tool calls.
|
|
524
713
|
// Some providers require "either content or tool_calls, but not none".
|
|
525
714
|
// Other providers also don't accept empty assistant messages.
|
|
@@ -540,8 +729,8 @@ export function convertMessages(model, context, compat) {
|
|
|
540
729
|
const toolMsg = transformedMessages[j];
|
|
541
730
|
// Extract text and image content
|
|
542
731
|
const textResult = toolMsg.content
|
|
543
|
-
.filter(
|
|
544
|
-
.map((
|
|
732
|
+
.filter(isTextContentBlock)
|
|
733
|
+
.map((block) => block.text)
|
|
545
734
|
.join("\n");
|
|
546
735
|
const hasImages = toolMsg.content.some((c) => c.type === "image");
|
|
547
736
|
// Always send tool result with text (or placeholder if only images)
|
|
@@ -558,7 +747,7 @@ export function convertMessages(model, context, compat) {
|
|
|
558
747
|
params.push(toolResultMsg);
|
|
559
748
|
if (hasImages && model.input.includes("image")) {
|
|
560
749
|
for (const block of toolMsg.content) {
|
|
561
|
-
if (block
|
|
750
|
+
if (isImageContentBlock(block)) {
|
|
562
751
|
imageBlocks.push({
|
|
563
752
|
type: "image_url",
|
|
564
753
|
image_url: {
|
|
@@ -610,23 +799,51 @@ function convertTools(tools, compat) {
|
|
|
610
799
|
},
|
|
611
800
|
}));
|
|
612
801
|
}
|
|
802
|
+
function parseChunkUsage(rawUsage, model) {
|
|
803
|
+
const promptTokens = rawUsage.prompt_tokens || 0;
|
|
804
|
+
const reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens ?? rawUsage.prompt_cache_hit_tokens ?? 0;
|
|
805
|
+
const cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;
|
|
806
|
+
// Normalize to pi-ai semantics:
|
|
807
|
+
// - cacheRead: hits from cache created by previous requests only
|
|
808
|
+
// - cacheWrite: tokens written to cache in this request
|
|
809
|
+
// Some OpenAI-compatible providers (observed on OpenRouter) report cached_tokens
|
|
810
|
+
// as (previous hits + current writes). In that case, remove cacheWrite from cacheRead.
|
|
811
|
+
const cacheReadTokens = cacheWriteTokens > 0 ? Math.max(0, reportedCachedTokens - cacheWriteTokens) : reportedCachedTokens;
|
|
812
|
+
const input = Math.max(0, promptTokens - cacheReadTokens - cacheWriteTokens);
|
|
813
|
+
// OpenAI completion_tokens already includes reasoning_tokens.
|
|
814
|
+
const outputTokens = rawUsage.completion_tokens || 0;
|
|
815
|
+
const usage = {
|
|
816
|
+
input,
|
|
817
|
+
output: outputTokens,
|
|
818
|
+
cacheRead: cacheReadTokens,
|
|
819
|
+
cacheWrite: cacheWriteTokens,
|
|
820
|
+
totalTokens: input + outputTokens + cacheReadTokens + cacheWriteTokens,
|
|
821
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
822
|
+
};
|
|
823
|
+
calculateCost(model, usage);
|
|
824
|
+
return usage;
|
|
825
|
+
}
|
|
613
826
|
function mapStopReason(reason) {
|
|
614
827
|
if (reason === null)
|
|
615
|
-
return "stop";
|
|
828
|
+
return { stopReason: "stop" };
|
|
616
829
|
switch (reason) {
|
|
617
830
|
case "stop":
|
|
618
|
-
|
|
831
|
+
case "end":
|
|
832
|
+
return { stopReason: "stop" };
|
|
619
833
|
case "length":
|
|
620
|
-
return "length";
|
|
834
|
+
return { stopReason: "length" };
|
|
621
835
|
case "function_call":
|
|
622
836
|
case "tool_calls":
|
|
623
|
-
return "toolUse";
|
|
837
|
+
return { stopReason: "toolUse" };
|
|
624
838
|
case "content_filter":
|
|
625
|
-
return "error";
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
839
|
+
return { stopReason: "error", errorMessage: "Provider finish_reason: content_filter" };
|
|
840
|
+
case "network_error":
|
|
841
|
+
return { stopReason: "error", errorMessage: "Provider finish_reason: network_error" };
|
|
842
|
+
default:
|
|
843
|
+
return {
|
|
844
|
+
stopReason: "error",
|
|
845
|
+
errorMessage: `Provider finish_reason: ${reason}`,
|
|
846
|
+
};
|
|
630
847
|
}
|
|
631
848
|
}
|
|
632
849
|
/**
|
|
@@ -638,41 +855,54 @@ function detectCompat(model) {
|
|
|
638
855
|
const provider = model.provider;
|
|
639
856
|
const baseUrl = model.baseUrl;
|
|
640
857
|
const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
|
|
858
|
+
const isTogether = provider === "together" || baseUrl.includes("api.together.ai") || baseUrl.includes("api.together.xyz");
|
|
859
|
+
const isMoonshot = provider === "moonshotai" || provider === "moonshotai-cn" || baseUrl.includes("api.moonshot.");
|
|
860
|
+
const isCloudflareWorkersAI = provider === "cloudflare-workers-ai" || baseUrl.includes("api.cloudflare.com");
|
|
861
|
+
const isCloudflareAiGateway = provider === "cloudflare-ai-gateway" || baseUrl.includes("gateway.ai.cloudflare.com");
|
|
641
862
|
const isNonStandard = provider === "cerebras" ||
|
|
642
863
|
baseUrl.includes("cerebras.ai") ||
|
|
643
864
|
provider === "xai" ||
|
|
644
865
|
baseUrl.includes("api.x.ai") ||
|
|
866
|
+
isTogether ||
|
|
645
867
|
baseUrl.includes("chutes.ai") ||
|
|
646
868
|
baseUrl.includes("deepseek.com") ||
|
|
869
|
+
baseUrl.includes("dashscope.aliyuncs.com") ||
|
|
647
870
|
isZai ||
|
|
871
|
+
isMoonshot ||
|
|
648
872
|
provider === "opencode" ||
|
|
649
|
-
baseUrl.includes("opencode.ai")
|
|
650
|
-
|
|
873
|
+
baseUrl.includes("opencode.ai") ||
|
|
874
|
+
isCloudflareWorkersAI ||
|
|
875
|
+
isCloudflareAiGateway;
|
|
876
|
+
const useMaxTokens = baseUrl.includes("chutes.ai") || isMoonshot || isCloudflareAiGateway || isTogether;
|
|
651
877
|
const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
|
|
652
|
-
const
|
|
653
|
-
const
|
|
654
|
-
? {
|
|
655
|
-
minimal: "default",
|
|
656
|
-
low: "default",
|
|
657
|
-
medium: "default",
|
|
658
|
-
high: "default",
|
|
659
|
-
xhigh: "default",
|
|
660
|
-
}
|
|
661
|
-
: {};
|
|
878
|
+
const isDeepSeek = provider === "deepseek" || baseUrl.includes("deepseek.com");
|
|
879
|
+
const cacheControlFormat = provider === "openrouter" && model.id.startsWith("anthropic/") ? "anthropic" : undefined;
|
|
662
880
|
return {
|
|
663
881
|
supportsStore: !isNonStandard,
|
|
664
882
|
supportsDeveloperRole: !isNonStandard,
|
|
665
|
-
supportsReasoningEffort: !isGrok && !isZai,
|
|
666
|
-
reasoningEffortMap,
|
|
883
|
+
supportsReasoningEffort: !isGrok && !isZai && !isMoonshot && !isTogether && !isCloudflareAiGateway,
|
|
667
884
|
supportsUsageInStreaming: true,
|
|
668
885
|
maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
|
|
669
886
|
requiresToolResultName: false,
|
|
670
887
|
requiresAssistantAfterToolResult: false,
|
|
671
888
|
requiresThinkingAsText: false,
|
|
672
|
-
|
|
889
|
+
requiresReasoningContentOnAssistantMessages: isDeepSeek,
|
|
890
|
+
thinkingFormat: isDeepSeek
|
|
891
|
+
? "deepseek"
|
|
892
|
+
: isZai
|
|
893
|
+
? "zai"
|
|
894
|
+
: isTogether
|
|
895
|
+
? "together"
|
|
896
|
+
: provider === "openrouter" || baseUrl.includes("openrouter.ai")
|
|
897
|
+
? "openrouter"
|
|
898
|
+
: "openai",
|
|
673
899
|
openRouterRouting: {},
|
|
674
900
|
vercelGatewayRouting: {},
|
|
675
|
-
|
|
901
|
+
zaiToolStream: false,
|
|
902
|
+
supportsStrictMode: !isMoonshot && !isTogether && !isCloudflareAiGateway,
|
|
903
|
+
cacheControlFormat,
|
|
904
|
+
sendSessionAffinityHeaders: false,
|
|
905
|
+
supportsLongCacheRetention: !(isTogether || isCloudflareWorkersAI || isCloudflareAiGateway),
|
|
676
906
|
};
|
|
677
907
|
}
|
|
678
908
|
/**
|
|
@@ -687,16 +917,21 @@ function getCompat(model) {
|
|
|
687
917
|
supportsStore: model.compat.supportsStore ?? detected.supportsStore,
|
|
688
918
|
supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
|
|
689
919
|
supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
|
|
690
|
-
reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
|
|
691
920
|
supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
|
|
692
921
|
maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
|
|
693
922
|
requiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,
|
|
694
923
|
requiresAssistantAfterToolResult: model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
|
|
695
924
|
requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
|
|
925
|
+
requiresReasoningContentOnAssistantMessages: model.compat.requiresReasoningContentOnAssistantMessages ??
|
|
926
|
+
detected.requiresReasoningContentOnAssistantMessages,
|
|
696
927
|
thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
|
|
697
928
|
openRouterRouting: model.compat.openRouterRouting ?? {},
|
|
698
929
|
vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
|
|
930
|
+
zaiToolStream: model.compat.zaiToolStream ?? detected.zaiToolStream,
|
|
699
931
|
supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
|
|
932
|
+
cacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,
|
|
933
|
+
sendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,
|
|
934
|
+
supportsLongCacheRetention: model.compat.supportsLongCacheRetention ?? detected.supportsLongCacheRetention,
|
|
700
935
|
};
|
|
701
936
|
}
|
|
702
937
|
//# sourceMappingURL=openai-completions.js.map
|