@oh-my-pi/pi-ai 8.2.2 → 8.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/index.ts +1 -0
- package/src/models.generated.ts +4 -38
- package/src/providers/azure-openai-responses.ts +688 -0
- package/src/providers/google.ts +1 -1
- package/src/providers/openai-codex-responses.ts +16 -16
- package/src/providers/openai-completions.ts +14 -3
- package/src/providers/openai-responses.ts +8 -16
- package/src/providers/transform-messages.ts +5 -16
- package/src/stream.ts +11 -0
- package/src/types.ts +17 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@oh-my-pi/pi-ai",
|
|
3
|
-
"version": "8.
|
|
3
|
+
"version": "8.4.0",
|
|
4
4
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.ts",
|
|
@@ -56,7 +56,7 @@
|
|
|
56
56
|
"test": "bun test"
|
|
57
57
|
},
|
|
58
58
|
"dependencies": {
|
|
59
|
-
"@oh-my-pi/pi-utils": "8.
|
|
59
|
+
"@oh-my-pi/pi-utils": "8.4.0",
|
|
60
60
|
"@anthropic-ai/sdk": "^0.71.2",
|
|
61
61
|
"@aws-sdk/client-bedrock-runtime": "^3.975.0",
|
|
62
62
|
"@bufbuild/protobuf": "^2.10.2",
|
package/src/index.ts
CHANGED
|
@@ -2,6 +2,7 @@ import "./utils/migrate-env";
|
|
|
2
2
|
|
|
3
3
|
export * from "./models";
|
|
4
4
|
export * from "./providers/anthropic";
|
|
5
|
+
export * from "./providers/azure-openai-responses";
|
|
5
6
|
export * from "./providers/cursor";
|
|
6
7
|
export * from "./providers/google";
|
|
7
8
|
export * from "./providers/google-gemini-cli";
|
package/src/models.generated.ts
CHANGED
|
@@ -4092,40 +4092,6 @@ export const MODELS = {
|
|
|
4092
4092
|
} satisfies Model<"openai-codex-responses">,
|
|
4093
4093
|
},
|
|
4094
4094
|
"opencode": {
|
|
4095
|
-
"alpha-gd4": {
|
|
4096
|
-
id: "alpha-gd4",
|
|
4097
|
-
name: "Alpha GD4",
|
|
4098
|
-
api: "anthropic-messages",
|
|
4099
|
-
provider: "opencode",
|
|
4100
|
-
baseUrl: "https://opencode.ai/zen",
|
|
4101
|
-
reasoning: true,
|
|
4102
|
-
input: ["text"],
|
|
4103
|
-
cost: {
|
|
4104
|
-
input: 0.5,
|
|
4105
|
-
output: 2,
|
|
4106
|
-
cacheRead: 0.15,
|
|
4107
|
-
cacheWrite: 0,
|
|
4108
|
-
},
|
|
4109
|
-
contextWindow: 262144,
|
|
4110
|
-
maxTokens: 32768,
|
|
4111
|
-
} satisfies Model<"anthropic-messages">,
|
|
4112
|
-
"alpha-glm-4.7": {
|
|
4113
|
-
id: "alpha-glm-4.7",
|
|
4114
|
-
name: "Alpha GLM-4.7",
|
|
4115
|
-
api: "openai-completions",
|
|
4116
|
-
provider: "opencode",
|
|
4117
|
-
baseUrl: "https://opencode.ai/zen/v1",
|
|
4118
|
-
reasoning: true,
|
|
4119
|
-
input: ["text"],
|
|
4120
|
-
cost: {
|
|
4121
|
-
input: 0.6,
|
|
4122
|
-
output: 2.2,
|
|
4123
|
-
cacheRead: 0.6,
|
|
4124
|
-
cacheWrite: 0,
|
|
4125
|
-
},
|
|
4126
|
-
contextWindow: 204800,
|
|
4127
|
-
maxTokens: 131072,
|
|
4128
|
-
} satisfies Model<"openai-completions">,
|
|
4129
4095
|
"big-pickle": {
|
|
4130
4096
|
id: "big-pickle",
|
|
4131
4097
|
name: "Big Pickle",
|
|
@@ -5310,10 +5276,10 @@ export const MODELS = {
|
|
|
5310
5276
|
reasoning: false,
|
|
5311
5277
|
input: ["text", "image"],
|
|
5312
5278
|
cost: {
|
|
5313
|
-
input: 0
|
|
5314
|
-
output: 0
|
|
5315
|
-
cacheRead: 0
|
|
5316
|
-
cacheWrite: 0
|
|
5279
|
+
input: 0,
|
|
5280
|
+
output: 0,
|
|
5281
|
+
cacheRead: 0,
|
|
5282
|
+
cacheWrite: 0,
|
|
5317
5283
|
},
|
|
5318
5284
|
contextWindow: 1048576,
|
|
5319
5285
|
maxTokens: 8192,
|
|
@@ -0,0 +1,688 @@
|
|
|
1
|
+
import type OpenAI from "openai";
|
|
2
|
+
import { AzureOpenAI } from "openai";
|
|
3
|
+
import type {
|
|
4
|
+
Tool as OpenAITool,
|
|
5
|
+
ResponseCreateParamsStreaming,
|
|
6
|
+
ResponseFunctionToolCall,
|
|
7
|
+
ResponseInput,
|
|
8
|
+
ResponseInputContent,
|
|
9
|
+
ResponseInputImage,
|
|
10
|
+
ResponseInputText,
|
|
11
|
+
ResponseOutputMessage,
|
|
12
|
+
ResponseReasoningItem,
|
|
13
|
+
} from "openai/resources/responses/responses";
|
|
14
|
+
import { calculateCost } from "../models";
|
|
15
|
+
import { getEnvApiKey } from "../stream";
|
|
16
|
+
import type {
|
|
17
|
+
Api,
|
|
18
|
+
AssistantMessage,
|
|
19
|
+
Context,
|
|
20
|
+
ImageContent,
|
|
21
|
+
Model,
|
|
22
|
+
StopReason,
|
|
23
|
+
StreamFunction,
|
|
24
|
+
StreamOptions,
|
|
25
|
+
TextContent,
|
|
26
|
+
ThinkingContent,
|
|
27
|
+
Tool,
|
|
28
|
+
ToolCall,
|
|
29
|
+
} from "../types";
|
|
30
|
+
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
31
|
+
import { parseStreamingJson } from "../utils/json-parse";
|
|
32
|
+
import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
|
|
33
|
+
import { sanitizeSurrogates } from "../utils/sanitize-unicode";
|
|
34
|
+
import { transformMessages } from "./transform-messages";
|
|
35
|
+
|
|
36
|
+
const DEFAULT_AZURE_API_VERSION = "v1";
|
|
37
|
+
|
|
38
|
+
function parseDeploymentNameMap(value: string | undefined): Map<string, string> {
|
|
39
|
+
const map = new Map<string, string>();
|
|
40
|
+
if (!value) return map;
|
|
41
|
+
for (const entry of value.split(",")) {
|
|
42
|
+
const trimmed = entry.trim();
|
|
43
|
+
if (!trimmed) continue;
|
|
44
|
+
const [modelId, deploymentName] = trimmed.split("=", 2);
|
|
45
|
+
if (!modelId || !deploymentName) continue;
|
|
46
|
+
map.set(modelId.trim(), deploymentName.trim());
|
|
47
|
+
}
|
|
48
|
+
return map;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function resolveDeploymentName(model: Model<"azure-openai-responses">, options?: AzureOpenAIResponsesOptions): string {
|
|
52
|
+
if (options?.azureDeploymentName) {
|
|
53
|
+
return options.azureDeploymentName;
|
|
54
|
+
}
|
|
55
|
+
const mappedDeployment = parseDeploymentNameMap(process.env.AZURE_OPENAI_DEPLOYMENT_NAME_MAP).get(model.id);
|
|
56
|
+
return mappedDeployment || model.id;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Azure OpenAI Responses-specific options
|
|
60
|
+
export interface AzureOpenAIResponsesOptions extends StreamOptions {
|
|
61
|
+
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
62
|
+
reasoningSummary?: "auto" | "detailed" | "concise" | null;
|
|
63
|
+
azureApiVersion?: string;
|
|
64
|
+
azureResourceName?: string;
|
|
65
|
+
azureBaseUrl?: string;
|
|
66
|
+
azureDeploymentName?: string;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Generate function for Azure OpenAI Responses API
|
|
71
|
+
*/
|
|
72
|
+
export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"> = (
|
|
73
|
+
model: Model<"azure-openai-responses">,
|
|
74
|
+
context: Context,
|
|
75
|
+
options?: AzureOpenAIResponsesOptions,
|
|
76
|
+
): AssistantMessageEventStream => {
|
|
77
|
+
const stream = new AssistantMessageEventStream();
|
|
78
|
+
|
|
79
|
+
// Start async processing
|
|
80
|
+
(async () => {
|
|
81
|
+
const startTime = Date.now();
|
|
82
|
+
let firstTokenTime: number | undefined;
|
|
83
|
+
const deploymentName = resolveDeploymentName(model, options);
|
|
84
|
+
|
|
85
|
+
const output: AssistantMessage = {
|
|
86
|
+
role: "assistant",
|
|
87
|
+
content: [],
|
|
88
|
+
api: "azure-openai-responses" as Api,
|
|
89
|
+
provider: model.provider,
|
|
90
|
+
model: model.id,
|
|
91
|
+
usage: {
|
|
92
|
+
input: 0,
|
|
93
|
+
output: 0,
|
|
94
|
+
cacheRead: 0,
|
|
95
|
+
cacheWrite: 0,
|
|
96
|
+
totalTokens: 0,
|
|
97
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
98
|
+
},
|
|
99
|
+
stopReason: "stop",
|
|
100
|
+
timestamp: Date.now(),
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
try {
|
|
104
|
+
// Create Azure OpenAI client
|
|
105
|
+
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
|
106
|
+
const client = createClient(model, apiKey, options);
|
|
107
|
+
const params = buildParams(model, context, options, deploymentName);
|
|
108
|
+
options?.onPayload?.(params);
|
|
109
|
+
const openaiStream = await client.responses.create(
|
|
110
|
+
params,
|
|
111
|
+
options?.signal ? { signal: options.signal } : undefined,
|
|
112
|
+
);
|
|
113
|
+
stream.push({ type: "start", partial: output });
|
|
114
|
+
|
|
115
|
+
let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
|
|
116
|
+
let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
|
|
117
|
+
const blocks = output.content;
|
|
118
|
+
const blockIndex = () => blocks.length - 1;
|
|
119
|
+
|
|
120
|
+
for await (const event of openaiStream) {
|
|
121
|
+
// Handle output item start
|
|
122
|
+
if (event.type === "response.output_item.added") {
|
|
123
|
+
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
124
|
+
const item = event.item;
|
|
125
|
+
if (item.type === "reasoning") {
|
|
126
|
+
currentItem = item;
|
|
127
|
+
currentBlock = { type: "thinking", thinking: "" };
|
|
128
|
+
output.content.push(currentBlock);
|
|
129
|
+
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
|
130
|
+
} else if (item.type === "message") {
|
|
131
|
+
currentItem = item;
|
|
132
|
+
currentBlock = { type: "text", text: "" };
|
|
133
|
+
output.content.push(currentBlock);
|
|
134
|
+
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
|
135
|
+
} else if (item.type === "function_call") {
|
|
136
|
+
currentItem = item;
|
|
137
|
+
currentBlock = {
|
|
138
|
+
type: "toolCall",
|
|
139
|
+
id: `${item.call_id}|${item.id}`,
|
|
140
|
+
name: item.name,
|
|
141
|
+
arguments: {},
|
|
142
|
+
partialJson: item.arguments || "",
|
|
143
|
+
};
|
|
144
|
+
output.content.push(currentBlock);
|
|
145
|
+
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// Handle reasoning summary deltas
|
|
149
|
+
else if (event.type === "response.reasoning_summary_part.added") {
|
|
150
|
+
if (currentItem && currentItem.type === "reasoning") {
|
|
151
|
+
currentItem.summary = currentItem.summary || [];
|
|
152
|
+
currentItem.summary.push(event.part);
|
|
153
|
+
}
|
|
154
|
+
} else if (event.type === "response.reasoning_summary_text.delta") {
|
|
155
|
+
if (
|
|
156
|
+
currentItem &&
|
|
157
|
+
currentItem.type === "reasoning" &&
|
|
158
|
+
currentBlock &&
|
|
159
|
+
currentBlock.type === "thinking"
|
|
160
|
+
) {
|
|
161
|
+
currentItem.summary = currentItem.summary || [];
|
|
162
|
+
const lastPart = currentItem.summary[currentItem.summary.length - 1];
|
|
163
|
+
if (lastPart) {
|
|
164
|
+
currentBlock.thinking += event.delta;
|
|
165
|
+
lastPart.text += event.delta;
|
|
166
|
+
stream.push({
|
|
167
|
+
type: "thinking_delta",
|
|
168
|
+
contentIndex: blockIndex(),
|
|
169
|
+
delta: event.delta,
|
|
170
|
+
partial: output,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
// Add a new line between summary parts (hack...)
|
|
176
|
+
else if (event.type === "response.reasoning_summary_part.done") {
|
|
177
|
+
if (
|
|
178
|
+
currentItem &&
|
|
179
|
+
currentItem.type === "reasoning" &&
|
|
180
|
+
currentBlock &&
|
|
181
|
+
currentBlock.type === "thinking"
|
|
182
|
+
) {
|
|
183
|
+
currentItem.summary = currentItem.summary || [];
|
|
184
|
+
const lastPart = currentItem.summary[currentItem.summary.length - 1];
|
|
185
|
+
if (lastPart) {
|
|
186
|
+
currentBlock.thinking += "\n\n";
|
|
187
|
+
lastPart.text += "\n\n";
|
|
188
|
+
stream.push({
|
|
189
|
+
type: "thinking_delta",
|
|
190
|
+
contentIndex: blockIndex(),
|
|
191
|
+
delta: "\n\n",
|
|
192
|
+
partial: output,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
// Handle text output deltas
|
|
198
|
+
else if (event.type === "response.content_part.added") {
|
|
199
|
+
if (currentItem && currentItem.type === "message") {
|
|
200
|
+
currentItem.content = currentItem.content || [];
|
|
201
|
+
// Filter out ReasoningText, only accept output_text and refusal
|
|
202
|
+
if (event.part.type === "output_text" || event.part.type === "refusal") {
|
|
203
|
+
currentItem.content.push(event.part);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
} else if (event.type === "response.output_text.delta") {
|
|
207
|
+
if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
|
|
208
|
+
if (!currentItem.content || currentItem.content.length === 0) {
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
const lastPart = currentItem.content[currentItem.content.length - 1];
|
|
212
|
+
if (lastPart && lastPart.type === "output_text") {
|
|
213
|
+
currentBlock.text += event.delta;
|
|
214
|
+
lastPart.text += event.delta;
|
|
215
|
+
stream.push({
|
|
216
|
+
type: "text_delta",
|
|
217
|
+
contentIndex: blockIndex(),
|
|
218
|
+
delta: event.delta,
|
|
219
|
+
partial: output,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
} else if (event.type === "response.refusal.delta") {
|
|
224
|
+
if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
|
|
225
|
+
if (!currentItem.content || currentItem.content.length === 0) {
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
const lastPart = currentItem.content[currentItem.content.length - 1];
|
|
229
|
+
if (lastPart && lastPart.type === "refusal") {
|
|
230
|
+
currentBlock.text += event.delta;
|
|
231
|
+
lastPart.refusal += event.delta;
|
|
232
|
+
stream.push({
|
|
233
|
+
type: "text_delta",
|
|
234
|
+
contentIndex: blockIndex(),
|
|
235
|
+
delta: event.delta,
|
|
236
|
+
partial: output,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
// Handle function call argument deltas
|
|
242
|
+
else if (event.type === "response.function_call_arguments.delta") {
|
|
243
|
+
if (
|
|
244
|
+
currentItem &&
|
|
245
|
+
currentItem.type === "function_call" &&
|
|
246
|
+
currentBlock &&
|
|
247
|
+
currentBlock.type === "toolCall"
|
|
248
|
+
) {
|
|
249
|
+
currentBlock.partialJson += event.delta;
|
|
250
|
+
currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
|
|
251
|
+
stream.push({
|
|
252
|
+
type: "toolcall_delta",
|
|
253
|
+
contentIndex: blockIndex(),
|
|
254
|
+
delta: event.delta,
|
|
255
|
+
partial: output,
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
// Handle function call arguments done (some providers send this instead of deltas)
|
|
260
|
+
else if (event.type === "response.function_call_arguments.done") {
|
|
261
|
+
if (currentItem?.type === "function_call" && currentBlock?.type === "toolCall") {
|
|
262
|
+
currentBlock.partialJson = event.arguments;
|
|
263
|
+
currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
// Handle output item completion
|
|
267
|
+
else if (event.type === "response.output_item.done") {
|
|
268
|
+
const item = event.item;
|
|
269
|
+
|
|
270
|
+
if (item.type === "reasoning" && currentBlock && currentBlock.type === "thinking") {
|
|
271
|
+
currentBlock.thinking = item.summary?.map(s => s.text).join("\n\n") || "";
|
|
272
|
+
currentBlock.thinkingSignature = JSON.stringify(item);
|
|
273
|
+
stream.push({
|
|
274
|
+
type: "thinking_end",
|
|
275
|
+
contentIndex: blockIndex(),
|
|
276
|
+
content: currentBlock.thinking,
|
|
277
|
+
partial: output,
|
|
278
|
+
});
|
|
279
|
+
currentBlock = null;
|
|
280
|
+
} else if (item.type === "message" && currentBlock && currentBlock.type === "text") {
|
|
281
|
+
currentBlock.text = item.content.map(c => (c.type === "output_text" ? c.text : c.refusal)).join("");
|
|
282
|
+
currentBlock.textSignature = item.id;
|
|
283
|
+
stream.push({
|
|
284
|
+
type: "text_end",
|
|
285
|
+
contentIndex: blockIndex(),
|
|
286
|
+
content: currentBlock.text,
|
|
287
|
+
partial: output,
|
|
288
|
+
});
|
|
289
|
+
currentBlock = null;
|
|
290
|
+
} else if (item.type === "function_call") {
|
|
291
|
+
const args =
|
|
292
|
+
currentBlock?.type === "toolCall" && currentBlock.partialJson
|
|
293
|
+
? JSON.parse(currentBlock.partialJson)
|
|
294
|
+
: JSON.parse(item.arguments);
|
|
295
|
+
const toolCall: ToolCall = {
|
|
296
|
+
type: "toolCall",
|
|
297
|
+
id: `${item.call_id}|${item.id}`,
|
|
298
|
+
name: item.name,
|
|
299
|
+
arguments: args,
|
|
300
|
+
};
|
|
301
|
+
currentBlock = null;
|
|
302
|
+
stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
// Handle completion
|
|
306
|
+
else if (event.type === "response.completed") {
|
|
307
|
+
const response = event.response;
|
|
308
|
+
if (response?.usage) {
|
|
309
|
+
const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0;
|
|
310
|
+
output.usage = {
|
|
311
|
+
// OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
|
|
312
|
+
input: (response.usage.input_tokens || 0) - cachedTokens,
|
|
313
|
+
output: response.usage.output_tokens || 0,
|
|
314
|
+
cacheRead: cachedTokens,
|
|
315
|
+
cacheWrite: 0,
|
|
316
|
+
totalTokens: response.usage.total_tokens || 0,
|
|
317
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
calculateCost(model, output.usage);
|
|
321
|
+
// Map status to stop reason
|
|
322
|
+
output.stopReason = mapStopReason(response?.status);
|
|
323
|
+
if (output.content.some(b => b.type === "toolCall") && output.stopReason === "stop") {
|
|
324
|
+
output.stopReason = "toolUse";
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
// Handle errors
|
|
328
|
+
else if (event.type === "error") {
|
|
329
|
+
throw new Error(`Error Code ${event.code}: ${event.message}` || "Unknown error");
|
|
330
|
+
} else if (event.type === "response.failed") {
|
|
331
|
+
throw new Error("Unknown error");
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
if (options?.signal?.aborted) {
|
|
336
|
+
throw new Error("Request was aborted");
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
|
340
|
+
throw new Error("An unkown error ocurred");
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
output.duration = Date.now() - startTime;
|
|
344
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
345
|
+
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
346
|
+
stream.end();
|
|
347
|
+
} catch (error) {
|
|
348
|
+
for (const block of output.content) delete (block as { index?: number }).index;
|
|
349
|
+
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
350
|
+
output.errorMessage = formatErrorMessageWithRetryAfter(error);
|
|
351
|
+
output.duration = Date.now() - startTime;
|
|
352
|
+
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
353
|
+
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
354
|
+
stream.end();
|
|
355
|
+
}
|
|
356
|
+
})();
|
|
357
|
+
|
|
358
|
+
return stream;
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
function normalizeAzureBaseUrl(baseUrl: string): string {
|
|
362
|
+
return baseUrl.replace(/\/+$/, "");
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
function buildDefaultBaseUrl(resourceName: string): string {
|
|
366
|
+
return `https://${resourceName}.openai.azure.com/openai/v1`;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
function resolveAzureConfig(
|
|
370
|
+
model: Model<"azure-openai-responses">,
|
|
371
|
+
options?: AzureOpenAIResponsesOptions,
|
|
372
|
+
): { baseUrl: string; apiVersion: string } {
|
|
373
|
+
const apiVersion = options?.azureApiVersion || process.env.AZURE_OPENAI_API_VERSION || DEFAULT_AZURE_API_VERSION;
|
|
374
|
+
|
|
375
|
+
const baseUrl = options?.azureBaseUrl?.trim() || process.env.AZURE_OPENAI_BASE_URL?.trim() || undefined;
|
|
376
|
+
const resourceName = options?.azureResourceName || process.env.AZURE_OPENAI_RESOURCE_NAME;
|
|
377
|
+
|
|
378
|
+
let resolvedBaseUrl = baseUrl;
|
|
379
|
+
|
|
380
|
+
if (!resolvedBaseUrl && resourceName) {
|
|
381
|
+
resolvedBaseUrl = buildDefaultBaseUrl(resourceName);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
if (!resolvedBaseUrl && model.baseUrl) {
|
|
385
|
+
resolvedBaseUrl = model.baseUrl;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
if (!resolvedBaseUrl) {
|
|
389
|
+
throw new Error(
|
|
390
|
+
"Azure OpenAI base URL is required. Set AZURE_OPENAI_BASE_URL or AZURE_OPENAI_RESOURCE_NAME, or pass azureBaseUrl, azureResourceName, or model.baseUrl.",
|
|
391
|
+
);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
return {
|
|
395
|
+
baseUrl: normalizeAzureBaseUrl(resolvedBaseUrl),
|
|
396
|
+
apiVersion,
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) {
|
|
401
|
+
if (!apiKey) {
|
|
402
|
+
if (!process.env.AZURE_OPENAI_API_KEY) {
|
|
403
|
+
throw new Error(
|
|
404
|
+
"Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument.",
|
|
405
|
+
);
|
|
406
|
+
}
|
|
407
|
+
apiKey = process.env.AZURE_OPENAI_API_KEY;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
const headers = { ...(model.headers ?? {}) };
|
|
411
|
+
|
|
412
|
+
if (options?.headers) {
|
|
413
|
+
Object.assign(headers, options.headers);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
|
|
417
|
+
|
|
418
|
+
return new AzureOpenAI({
|
|
419
|
+
apiKey,
|
|
420
|
+
apiVersion,
|
|
421
|
+
dangerouslyAllowBrowser: true,
|
|
422
|
+
defaultHeaders: headers,
|
|
423
|
+
baseURL: baseUrl,
|
|
424
|
+
});
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function buildParams(
|
|
428
|
+
model: Model<"azure-openai-responses">,
|
|
429
|
+
context: Context,
|
|
430
|
+
options: AzureOpenAIResponsesOptions | undefined,
|
|
431
|
+
deploymentName: string,
|
|
432
|
+
) {
|
|
433
|
+
const messages = convertMessages(model, context, true);
|
|
434
|
+
|
|
435
|
+
const params: ResponseCreateParamsStreaming = {
|
|
436
|
+
model: deploymentName,
|
|
437
|
+
input: messages,
|
|
438
|
+
stream: true,
|
|
439
|
+
prompt_cache_key: options?.sessionId,
|
|
440
|
+
};
|
|
441
|
+
|
|
442
|
+
if (options?.maxTokens) {
|
|
443
|
+
params.max_output_tokens = options?.maxTokens;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
if (options?.temperature !== undefined) {
|
|
447
|
+
params.temperature = options?.temperature;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
if (context.tools) {
|
|
451
|
+
params.tools = convertTools(context.tools);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
if (model.reasoning) {
|
|
455
|
+
if (options?.reasoningEffort || options?.reasoningSummary) {
|
|
456
|
+
params.reasoning = {
|
|
457
|
+
effort: options?.reasoningEffort || "medium",
|
|
458
|
+
summary: options?.reasoningSummary || "auto",
|
|
459
|
+
};
|
|
460
|
+
params.include = ["reasoning.encrypted_content"];
|
|
461
|
+
} else {
|
|
462
|
+
if (model.name.toLowerCase().startsWith("gpt-5")) {
|
|
463
|
+
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
|
|
464
|
+
messages.push({
|
|
465
|
+
role: "developer",
|
|
466
|
+
content: [
|
|
467
|
+
{
|
|
468
|
+
type: "input_text",
|
|
469
|
+
text: "# Juice: 0 !important",
|
|
470
|
+
},
|
|
471
|
+
],
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return params;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
function normalizeResponsesToolCallId(id: string): { callId: string; itemId: string } {
|
|
481
|
+
const [callId, itemId] = id.split("|");
|
|
482
|
+
if (callId && itemId) {
|
|
483
|
+
return { callId, itemId };
|
|
484
|
+
}
|
|
485
|
+
const hash = Bun.hash.xxHash64(id).toString(36);
|
|
486
|
+
return { callId: `call_${hash}`, itemId: `item_${hash}` };
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
function convertMessages(
|
|
490
|
+
model: Model<"azure-openai-responses">,
|
|
491
|
+
context: Context,
|
|
492
|
+
strictResponsesPairing: boolean,
|
|
493
|
+
): ResponseInput {
|
|
494
|
+
const messages: ResponseInput = [];
|
|
495
|
+
const knownCallIds = new Set<string>();
|
|
496
|
+
|
|
497
|
+
const transformedMessages = transformMessages(context.messages, model);
|
|
498
|
+
|
|
499
|
+
if (context.systemPrompt) {
|
|
500
|
+
const role = model.reasoning ? "developer" : "system";
|
|
501
|
+
messages.push({
|
|
502
|
+
role,
|
|
503
|
+
content: sanitizeSurrogates(context.systemPrompt),
|
|
504
|
+
});
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
let msgIndex = 0;
|
|
508
|
+
for (const msg of transformedMessages) {
|
|
509
|
+
if (msg.role === "user") {
|
|
510
|
+
if (typeof msg.content === "string") {
|
|
511
|
+
// Skip empty user messages
|
|
512
|
+
if (!msg.content || msg.content.trim() === "") continue;
|
|
513
|
+
messages.push({
|
|
514
|
+
role: "user",
|
|
515
|
+
content: [{ type: "input_text", text: sanitizeSurrogates(msg.content) }],
|
|
516
|
+
});
|
|
517
|
+
} else {
|
|
518
|
+
const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
|
|
519
|
+
if (item.type === "text") {
|
|
520
|
+
return {
|
|
521
|
+
type: "input_text",
|
|
522
|
+
text: sanitizeSurrogates(item.text),
|
|
523
|
+
} satisfies ResponseInputText;
|
|
524
|
+
}
|
|
525
|
+
return {
|
|
526
|
+
type: "input_image",
|
|
527
|
+
detail: "auto",
|
|
528
|
+
image_url: `data:${item.mimeType};base64,${item.data}`,
|
|
529
|
+
} satisfies ResponseInputImage;
|
|
530
|
+
});
|
|
531
|
+
// Filter out images if model doesn't support them, and empty text blocks
|
|
532
|
+
let filteredContent = !model.input.includes("image")
|
|
533
|
+
? content.filter(c => c.type !== "input_image")
|
|
534
|
+
: content;
|
|
535
|
+
filteredContent = filteredContent.filter(c => {
|
|
536
|
+
if (c.type === "input_text") {
|
|
537
|
+
return c.text.trim().length > 0;
|
|
538
|
+
}
|
|
539
|
+
return true; // Keep non-text content (images)
|
|
540
|
+
});
|
|
541
|
+
if (filteredContent.length === 0) continue;
|
|
542
|
+
messages.push({
|
|
543
|
+
role: "user",
|
|
544
|
+
content: filteredContent,
|
|
545
|
+
});
|
|
546
|
+
}
|
|
547
|
+
} else if (msg.role === "assistant") {
|
|
548
|
+
const output: ResponseInput = [];
|
|
549
|
+
const assistantMsg = msg as AssistantMessage;
|
|
550
|
+
|
|
551
|
+
// Check if this message is from a different model (same provider, different model ID).
|
|
552
|
+
// For such messages, tool call IDs with fc_ prefix need to be stripped to avoid
|
|
553
|
+
// OpenAI's reasoning/function_call pairing validation errors.
|
|
554
|
+
const isDifferentModel =
|
|
555
|
+
assistantMsg.model !== model.id &&
|
|
556
|
+
assistantMsg.provider === model.provider &&
|
|
557
|
+
assistantMsg.api === model.api;
|
|
558
|
+
|
|
559
|
+
for (const block of msg.content) {
|
|
560
|
+
// Do not submit thinking blocks if the completion had an error (i.e. abort)
|
|
561
|
+
if (block.type === "thinking" && msg.stopReason !== "error") {
|
|
562
|
+
if (block.thinkingSignature) {
|
|
563
|
+
const reasoningItem = JSON.parse(block.thinkingSignature);
|
|
564
|
+
output.push(reasoningItem);
|
|
565
|
+
}
|
|
566
|
+
} else if (block.type === "text") {
|
|
567
|
+
const textBlock = block as TextContent;
|
|
568
|
+
// OpenAI requires id to be max 64 characters
|
|
569
|
+
let msgId = textBlock.textSignature;
|
|
570
|
+
if (!msgId) {
|
|
571
|
+
msgId = `msg_${msgIndex}`;
|
|
572
|
+
} else if (msgId.length > 64) {
|
|
573
|
+
msgId = `msg_${Bun.hash.xxHash64(msgId).toString(36)}`;
|
|
574
|
+
}
|
|
575
|
+
output.push({
|
|
576
|
+
type: "message",
|
|
577
|
+
role: "assistant",
|
|
578
|
+
content: [{ type: "output_text", text: sanitizeSurrogates(textBlock.text), annotations: [] }],
|
|
579
|
+
status: "completed",
|
|
580
|
+
id: msgId,
|
|
581
|
+
} satisfies ResponseOutputMessage);
|
|
582
|
+
// Do not submit toolcall blocks if the completion had an error (i.e. abort)
|
|
583
|
+
} else if (block.type === "toolCall" && msg.stopReason !== "error") {
|
|
584
|
+
const toolCall = block as ToolCall;
|
|
585
|
+
const normalized = normalizeResponsesToolCallId(toolCall.id);
|
|
586
|
+
const callId = normalized.callId;
|
|
587
|
+
// For different-model messages, set id to undefined to avoid pairing validation.
|
|
588
|
+
// OpenAI tracks which fc_xxx IDs were paired with rs_xxx reasoning items.
|
|
589
|
+
// By omitting the id, we avoid triggering that validation (like cross-provider does).
|
|
590
|
+
let itemId: string | undefined = normalized.itemId;
|
|
591
|
+
if (isDifferentModel && itemId?.startsWith("fc_")) {
|
|
592
|
+
itemId = undefined;
|
|
593
|
+
}
|
|
594
|
+
knownCallIds.add(normalized.callId);
|
|
595
|
+
output.push({
|
|
596
|
+
type: "function_call",
|
|
597
|
+
id: itemId,
|
|
598
|
+
call_id: callId,
|
|
599
|
+
name: toolCall.name,
|
|
600
|
+
arguments: JSON.stringify(toolCall.arguments),
|
|
601
|
+
});
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
if (output.length === 0) continue;
|
|
605
|
+
messages.push(...output);
|
|
606
|
+
} else if (msg.role === "toolResult") {
|
|
607
|
+
// Extract text and image content
|
|
608
|
+
const textResult = msg.content
|
|
609
|
+
.filter(c => c.type === "text")
|
|
610
|
+
.map(c => (c as { text: string }).text)
|
|
611
|
+
.join("\n");
|
|
612
|
+
const hasImages = msg.content.some(c => c.type === "image");
|
|
613
|
+
const normalized = normalizeResponsesToolCallId(msg.toolCallId);
|
|
614
|
+
if (strictResponsesPairing && !knownCallIds.has(normalized.callId)) {
|
|
615
|
+
continue;
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
// Always send function_call_output with text (or placeholder if only images)
|
|
619
|
+
const hasText = textResult.length > 0;
|
|
620
|
+
messages.push({
|
|
621
|
+
type: "function_call_output",
|
|
622
|
+
call_id: normalized.callId,
|
|
623
|
+
output: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
// If there are images and model supports them, send a follow-up user message with images
|
|
627
|
+
if (hasImages && model.input.includes("image")) {
|
|
628
|
+
const contentParts: ResponseInputContent[] = [];
|
|
629
|
+
|
|
630
|
+
// Add text prefix
|
|
631
|
+
contentParts.push({
|
|
632
|
+
type: "input_text",
|
|
633
|
+
text: "Attached image(s) from tool result:",
|
|
634
|
+
} satisfies ResponseInputText);
|
|
635
|
+
|
|
636
|
+
// Add images
|
|
637
|
+
for (const block of msg.content) {
|
|
638
|
+
if (block.type === "image") {
|
|
639
|
+
contentParts.push({
|
|
640
|
+
type: "input_image",
|
|
641
|
+
detail: "auto",
|
|
642
|
+
image_url: `data:${(block as ImageContent).mimeType};base64,${(block as ImageContent).data}`,
|
|
643
|
+
} satisfies ResponseInputImage);
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
messages.push({
|
|
648
|
+
role: "user",
|
|
649
|
+
content: contentParts,
|
|
650
|
+
});
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
msgIndex++;
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
return messages;
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
function convertTools(tools: Tool[]): OpenAITool[] {
|
|
660
|
+
return tools.map(tool => ({
|
|
661
|
+
type: "function",
|
|
662
|
+
name: tool.name,
|
|
663
|
+
description: tool.description,
|
|
664
|
+
parameters: tool.parameters as Record<string, unknown>,
|
|
665
|
+
strict: false,
|
|
666
|
+
}));
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
|
|
670
|
+
if (!status) return "stop";
|
|
671
|
+
switch (status) {
|
|
672
|
+
case "completed":
|
|
673
|
+
return "stop";
|
|
674
|
+
case "incomplete":
|
|
675
|
+
return "length";
|
|
676
|
+
case "failed":
|
|
677
|
+
case "cancelled":
|
|
678
|
+
return "error";
|
|
679
|
+
// These two are wonky ...
|
|
680
|
+
case "in_progress":
|
|
681
|
+
case "queued":
|
|
682
|
+
return "stop";
|
|
683
|
+
default: {
|
|
684
|
+
const _exhaustive: never = status;
|
|
685
|
+
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
}
|
package/src/providers/google.ts
CHANGED
|
@@ -179,7 +179,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
|
|
|
179
179
|
type: "toolCall",
|
|
180
180
|
id: toolCallId,
|
|
181
181
|
name: part.functionCall.name || "",
|
|
182
|
-
arguments: part.functionCall.args as Record<string, any>,
|
|
182
|
+
arguments: (part.functionCall.args ?? {}) as Record<string, any>,
|
|
183
183
|
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
|
184
184
|
};
|
|
185
185
|
|
|
@@ -74,26 +74,12 @@ const CODEX_MAX_RETRIES = 2;
|
|
|
74
74
|
const CODEX_RETRYABLE_STATUS = new Set([408, 429, 500, 502, 503, 504]);
|
|
75
75
|
const CODEX_RETRY_DELAY_MS = 500;
|
|
76
76
|
|
|
77
|
-
/** Fast deterministic hash to shorten long strings */
|
|
78
|
-
function shortHash(str: string): string {
|
|
79
|
-
let h1 = 0xdeadbeef;
|
|
80
|
-
let h2 = 0x41c6ce57;
|
|
81
|
-
for (let i = 0; i < str.length; i++) {
|
|
82
|
-
const ch = str.charCodeAt(i);
|
|
83
|
-
h1 = Math.imul(h1 ^ ch, 2654435761);
|
|
84
|
-
h2 = Math.imul(h2 ^ ch, 1597334677);
|
|
85
|
-
}
|
|
86
|
-
h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
|
|
87
|
-
h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
|
|
88
|
-
return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
|
|
89
|
-
}
|
|
90
|
-
|
|
91
77
|
function normalizeResponsesToolCallId(id: string): { callId: string; itemId: string } {
|
|
92
78
|
const [callId, itemId] = id.split("|");
|
|
93
79
|
if (callId && itemId) {
|
|
94
80
|
return { callId, itemId };
|
|
95
81
|
}
|
|
96
|
-
const hash =
|
|
82
|
+
const hash = Bun.hash.xxHash64(id).toString(36);
|
|
97
83
|
return { callId: `call_${hash}`, itemId: `item_${hash}` };
|
|
98
84
|
}
|
|
99
85
|
|
|
@@ -298,6 +284,9 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
298
284
|
}
|
|
299
285
|
} else if (eventType === "response.output_text.delta") {
|
|
300
286
|
if (currentItem && currentItem.type === "message" && currentBlock?.type === "text") {
|
|
287
|
+
if (!currentItem.content || currentItem.content.length === 0) {
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
301
290
|
const lastPart = currentItem.content[currentItem.content.length - 1];
|
|
302
291
|
if (lastPart && lastPart.type === "output_text") {
|
|
303
292
|
const delta = (rawEvent as { delta?: string }).delta || "";
|
|
@@ -313,6 +302,9 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
313
302
|
}
|
|
314
303
|
} else if (eventType === "response.refusal.delta") {
|
|
315
304
|
if (currentItem && currentItem.type === "message" && currentBlock?.type === "text") {
|
|
305
|
+
if (!currentItem.content || currentItem.content.length === 0) {
|
|
306
|
+
continue;
|
|
307
|
+
}
|
|
316
308
|
const lastPart = currentItem.content[currentItem.content.length - 1];
|
|
317
309
|
if (lastPart && lastPart.type === "refusal") {
|
|
318
310
|
const delta = (rawEvent as { delta?: string }).delta || "";
|
|
@@ -338,6 +330,14 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
338
330
|
partial: output,
|
|
339
331
|
});
|
|
340
332
|
}
|
|
333
|
+
} else if (eventType === "response.function_call_arguments.done") {
|
|
334
|
+
if (currentItem?.type === "function_call" && currentBlock?.type === "toolCall") {
|
|
335
|
+
const args = (rawEvent as { arguments?: string }).arguments;
|
|
336
|
+
if (typeof args === "string") {
|
|
337
|
+
currentBlock.partialJson = args;
|
|
338
|
+
currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
341
|
} else if (eventType === "response.output_item.done") {
|
|
342
342
|
const item = rawEvent.item as ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall;
|
|
343
343
|
if (item.type === "reasoning" && currentBlock?.type === "thinking") {
|
|
@@ -622,7 +622,7 @@ function convertMessages(model: Model<"openai-codex-responses">, context: Contex
|
|
|
622
622
|
if (!msgId) {
|
|
623
623
|
msgId = `msg_${msgIndex}`;
|
|
624
624
|
} else if (msgId.length > 64) {
|
|
625
|
-
msgId = `msg_${
|
|
625
|
+
msgId = `msg_${Bun.hash.xxHash64(msgId).toString(36)}`;
|
|
626
626
|
}
|
|
627
627
|
output.push({
|
|
628
628
|
type: "message",
|
|
@@ -50,6 +50,10 @@ function normalizeMistralToolId(id: string, isMistral: boolean): string {
|
|
|
50
50
|
return normalized;
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
+
type ResolvedOpenAICompat = Required<Omit<OpenAICompat, "openRouterRouting">> & {
|
|
54
|
+
openRouterRouting?: OpenAICompat["openRouterRouting"];
|
|
55
|
+
};
|
|
56
|
+
|
|
53
57
|
/**
|
|
54
58
|
* Check if conversation messages contain tool calls or tool results.
|
|
55
59
|
* This is needed because Anthropic (via proxy) requires the tools param
|
|
@@ -429,6 +433,11 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
|
|
429
433
|
params.reasoning_effort = options.reasoningEffort;
|
|
430
434
|
}
|
|
431
435
|
|
|
436
|
+
// OpenRouter provider routing preferences
|
|
437
|
+
if (model.baseUrl.includes("openrouter.ai") && compat.openRouterRouting) {
|
|
438
|
+
(params as { provider?: unknown }).provider = compat.openRouterRouting;
|
|
439
|
+
}
|
|
440
|
+
|
|
432
441
|
return params;
|
|
433
442
|
}
|
|
434
443
|
|
|
@@ -468,7 +477,7 @@ function maybeAddOpenRouterAnthropicCacheControl(
|
|
|
468
477
|
export function convertMessages(
|
|
469
478
|
model: Model<"openai-completions">,
|
|
470
479
|
context: Context,
|
|
471
|
-
compat:
|
|
480
|
+
compat: ResolvedOpenAICompat,
|
|
472
481
|
): ChatCompletionMessageParam[] {
|
|
473
482
|
const params: ChatCompletionMessageParam[] = [];
|
|
474
483
|
|
|
@@ -718,7 +727,7 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): Sto
|
|
|
718
727
|
* Provider takes precedence over URL-based detection since it's explicitly configured.
|
|
719
728
|
* Returns a fully resolved OpenAICompat object with all fields set.
|
|
720
729
|
*/
|
|
721
|
-
function detectCompat(model: Model<"openai-completions">):
|
|
730
|
+
function detectCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
|
|
722
731
|
const provider = model.provider;
|
|
723
732
|
const baseUrl = model.baseUrl;
|
|
724
733
|
|
|
@@ -753,6 +762,7 @@ function detectCompat(model: Model<"openai-completions">): Required<OpenAICompat
|
|
|
753
762
|
requiresThinkingAsText: isMistral,
|
|
754
763
|
requiresMistralToolIds: isMistral,
|
|
755
764
|
thinkingFormat: isZai ? "zai" : "openai",
|
|
765
|
+
openRouterRouting: undefined,
|
|
756
766
|
};
|
|
757
767
|
}
|
|
758
768
|
|
|
@@ -760,7 +770,7 @@ function detectCompat(model: Model<"openai-completions">): Required<OpenAICompat
|
|
|
760
770
|
* Get resolved compatibility settings for a model.
|
|
761
771
|
* Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.
|
|
762
772
|
*/
|
|
763
|
-
function getCompat(model: Model<"openai-completions">):
|
|
773
|
+
function getCompat(model: Model<"openai-completions">): ResolvedOpenAICompat {
|
|
764
774
|
const detected = detectCompat(model);
|
|
765
775
|
if (!model.compat) return detected;
|
|
766
776
|
|
|
@@ -776,5 +786,6 @@ function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
|
|
|
776
786
|
requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
|
|
777
787
|
requiresMistralToolIds: model.compat.requiresMistralToolIds ?? detected.requiresMistralToolIds,
|
|
778
788
|
thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
|
|
789
|
+
openRouterRouting: model.compat.openRouterRouting ?? detected.openRouterRouting,
|
|
779
790
|
};
|
|
780
791
|
}
|
|
@@ -31,20 +31,6 @@ import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
|
|
|
31
31
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode";
|
|
32
32
|
import { transformMessages } from "./transform-messages";
|
|
33
33
|
|
|
34
|
-
/** Fast deterministic hash to shorten long strings */
|
|
35
|
-
function shortHash(str: string): string {
|
|
36
|
-
let h1 = 0xdeadbeef;
|
|
37
|
-
let h2 = 0x41c6ce57;
|
|
38
|
-
for (let i = 0; i < str.length; i++) {
|
|
39
|
-
const ch = str.charCodeAt(i);
|
|
40
|
-
h1 = Math.imul(h1 ^ ch, 2654435761);
|
|
41
|
-
h2 = Math.imul(h2 ^ ch, 1597334677);
|
|
42
|
-
}
|
|
43
|
-
h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
|
|
44
|
-
h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
|
|
45
|
-
return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
|
|
46
|
-
}
|
|
47
|
-
|
|
48
34
|
// OpenAI Responses-specific options
|
|
49
35
|
export interface OpenAIResponsesOptions extends StreamOptions {
|
|
50
36
|
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
@@ -195,6 +181,9 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|
|
195
181
|
}
|
|
196
182
|
} else if (event.type === "response.output_text.delta") {
|
|
197
183
|
if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
|
|
184
|
+
if (!currentItem.content || currentItem.content.length === 0) {
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
198
187
|
const lastPart = currentItem.content[currentItem.content.length - 1];
|
|
199
188
|
if (lastPart && lastPart.type === "output_text") {
|
|
200
189
|
currentBlock.text += event.delta;
|
|
@@ -209,6 +198,9 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|
|
209
198
|
}
|
|
210
199
|
} else if (event.type === "response.refusal.delta") {
|
|
211
200
|
if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
|
|
201
|
+
if (!currentItem.content || currentItem.content.length === 0) {
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
212
204
|
const lastPart = currentItem.content[currentItem.content.length - 1];
|
|
213
205
|
if (lastPart && lastPart.type === "refusal") {
|
|
214
206
|
currentBlock.text += event.delta;
|
|
@@ -449,7 +441,7 @@ function normalizeResponsesToolCallId(id: string): { callId: string; itemId: str
|
|
|
449
441
|
if (callId && itemId) {
|
|
450
442
|
return { callId, itemId };
|
|
451
443
|
}
|
|
452
|
-
const hash =
|
|
444
|
+
const hash = Bun.hash.xxHash64(id).toString(36);
|
|
453
445
|
return { callId: `call_${hash}`, itemId: `item_${hash}` };
|
|
454
446
|
}
|
|
455
447
|
|
|
@@ -542,7 +534,7 @@ function convertMessages(
|
|
|
542
534
|
if (!msgId) {
|
|
543
535
|
msgId = `msg_${msgIndex}`;
|
|
544
536
|
} else if (msgId.length > 64) {
|
|
545
|
-
msgId = `msg_${
|
|
537
|
+
msgId = `msg_${Bun.hash.xxHash64(msgId).toString(36)}`;
|
|
546
538
|
}
|
|
547
539
|
output.push({
|
|
548
540
|
type: "message",
|
|
@@ -9,26 +9,12 @@ function normalizeToolCallId(id: string): string {
|
|
|
9
9
|
return id.replace(/[^a-zA-Z0-9_-]/g, "").slice(0, 40);
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
-
/** Fast deterministic hash to shorten long strings */
|
|
13
|
-
function shortHash(str: string): string {
|
|
14
|
-
let h1 = 0xdeadbeef;
|
|
15
|
-
let h2 = 0x41c6ce57;
|
|
16
|
-
for (let i = 0; i < str.length; i++) {
|
|
17
|
-
const ch = str.charCodeAt(i);
|
|
18
|
-
h1 = Math.imul(h1 ^ ch, 2654435761);
|
|
19
|
-
h2 = Math.imul(h2 ^ ch, 1597334677);
|
|
20
|
-
}
|
|
21
|
-
h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
|
|
22
|
-
h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
|
|
23
|
-
return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
|
|
24
|
-
}
|
|
25
|
-
|
|
26
12
|
function normalizeResponsesToolCallId(id: string): string {
|
|
27
13
|
const [callId, itemId] = id.split("|");
|
|
28
14
|
if (callId && itemId) {
|
|
29
15
|
return id;
|
|
30
16
|
}
|
|
31
|
-
const hash =
|
|
17
|
+
const hash = Bun.hash.xxHash64(id).toString(36);
|
|
32
18
|
return `call_${hash}|item_${hash}`;
|
|
33
19
|
}
|
|
34
20
|
|
|
@@ -36,7 +22,10 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
|
|
|
36
22
|
// Build a map of original tool call IDs to normalized IDs for github-copilot cross-API switches
|
|
37
23
|
const toolCallIdMap = new Map<string, string>();
|
|
38
24
|
const skippedToolCallIds = new Set<string>();
|
|
39
|
-
const needsResponsesToolCallIds =
|
|
25
|
+
const needsResponsesToolCallIds =
|
|
26
|
+
model.api === "openai-responses" ||
|
|
27
|
+
model.api === "openai-codex-responses" ||
|
|
28
|
+
model.api === "azure-openai-responses";
|
|
40
29
|
|
|
41
30
|
// First pass: transform messages (thinking blocks, tool call ID normalization)
|
|
42
31
|
const transformed = messages.flatMap<Message>((msg): Message[] => {
|
package/src/stream.ts
CHANGED
|
@@ -4,6 +4,7 @@ import * as path from "node:path";
|
|
|
4
4
|
import { supportsXhigh } from "./models";
|
|
5
5
|
import { type BedrockOptions, streamBedrock } from "./providers/amazon-bedrock";
|
|
6
6
|
import { type AnthropicOptions, streamAnthropic } from "./providers/anthropic";
|
|
7
|
+
import { type AzureOpenAIResponsesOptions, streamAzureOpenAIResponses } from "./providers/azure-openai-responses";
|
|
7
8
|
import { type CursorOptions, streamCursor } from "./providers/cursor";
|
|
8
9
|
import { type GoogleOptions, streamGoogle } from "./providers/google";
|
|
9
10
|
import {
|
|
@@ -108,6 +109,7 @@ export function getEnvApiKey(provider: any): string | undefined {
|
|
|
108
109
|
minimax: "MINIMAX_API_KEY",
|
|
109
110
|
opencode: "OPENCODE_API_KEY",
|
|
110
111
|
cursor: "CURSOR_ACCESS_TOKEN",
|
|
112
|
+
"azure-openai-responses": "AZURE_OPENAI_API_KEY",
|
|
111
113
|
};
|
|
112
114
|
|
|
113
115
|
const envVar = envMap[provider];
|
|
@@ -144,6 +146,9 @@ export function stream<TApi extends Api>(
|
|
|
144
146
|
case "openai-responses":
|
|
145
147
|
return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any);
|
|
146
148
|
|
|
149
|
+
case "azure-openai-responses":
|
|
150
|
+
return streamAzureOpenAIResponses(model as Model<"azure-openai-responses">, context, providerOptions as any);
|
|
151
|
+
|
|
147
152
|
case "openai-codex-responses":
|
|
148
153
|
return streamOpenAICodexResponses(model as Model<"openai-codex-responses">, context, providerOptions as any);
|
|
149
154
|
|
|
@@ -345,6 +350,12 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
345
350
|
reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
|
|
346
351
|
} satisfies OpenAIResponsesOptions;
|
|
347
352
|
|
|
353
|
+
case "azure-openai-responses":
|
|
354
|
+
return {
|
|
355
|
+
...base,
|
|
356
|
+
reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
|
|
357
|
+
} satisfies AzureOpenAIResponsesOptions;
|
|
358
|
+
|
|
348
359
|
case "openai-codex-responses":
|
|
349
360
|
return {
|
|
350
361
|
...base,
|
package/src/types.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { TSchema } from "@sinclair/typebox";
|
|
2
2
|
import type { BedrockOptions } from "./providers/amazon-bedrock";
|
|
3
3
|
import type { AnthropicOptions } from "./providers/anthropic";
|
|
4
|
+
import type { AzureOpenAIResponsesOptions } from "./providers/azure-openai-responses";
|
|
4
5
|
import type { CursorOptions } from "./providers/cursor";
|
|
5
6
|
import type {
|
|
6
7
|
DeleteArgs,
|
|
@@ -33,6 +34,7 @@ export type Api =
|
|
|
33
34
|
| "openai-completions"
|
|
34
35
|
| "openai-responses"
|
|
35
36
|
| "openai-codex-responses"
|
|
37
|
+
| "azure-openai-responses"
|
|
36
38
|
| "anthropic-messages"
|
|
37
39
|
| "bedrock-converse-stream"
|
|
38
40
|
| "google-generative-ai"
|
|
@@ -46,6 +48,7 @@ export interface ApiOptionsMap {
|
|
|
46
48
|
"openai-completions": OpenAICompletionsOptions;
|
|
47
49
|
"openai-responses": OpenAIResponsesOptions;
|
|
48
50
|
"openai-codex-responses": OpenAICodexResponsesOptions;
|
|
51
|
+
"azure-openai-responses": AzureOpenAIResponsesOptions;
|
|
49
52
|
"google-generative-ai": GoogleOptions;
|
|
50
53
|
"google-gemini-cli": GoogleGeminiCliOptions;
|
|
51
54
|
"google-vertex": GoogleVertexOptions;
|
|
@@ -289,6 +292,20 @@ export interface OpenAICompat {
|
|
|
289
292
|
requiresMistralToolIds?: boolean;
|
|
290
293
|
/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "zai" uses thinking: { type: "enabled" }. Default: "openai". */
|
|
291
294
|
thinkingFormat?: "openai" | "zai";
|
|
295
|
+
/** OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. */
|
|
296
|
+
openRouterRouting?: OpenRouterRouting;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* OpenRouter provider routing preferences.
|
|
301
|
+
* Controls which upstream providers OpenRouter routes requests to.
|
|
302
|
+
* @see https://openrouter.ai/docs/provider-routing
|
|
303
|
+
*/
|
|
304
|
+
export interface OpenRouterRouting {
|
|
305
|
+
/** List of provider slugs to exclusively use for this request (e.g., ["amazon-bedrock", "anthropic"]). */
|
|
306
|
+
only?: string[];
|
|
307
|
+
/** List of provider slugs to try in order (e.g., ["anthropic", "openai"]). */
|
|
308
|
+
order?: string[];
|
|
292
309
|
}
|
|
293
310
|
|
|
294
311
|
// Model interface for the unified model system
|