veryfront 0.1.207 → 0.1.209
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/deno.js +1 -1
- package/esm/src/provider/runtime-loader.d.ts +47 -0
- package/esm/src/provider/runtime-loader.d.ts.map +1 -1
- package/esm/src/provider/runtime-loader.js +1386 -70
- package/esm/src/provider/types.d.ts +2 -0
- package/esm/src/provider/types.d.ts.map +1 -1
- package/esm/src/utils/version-constant.d.ts +1 -1
- package/esm/src/utils/version-constant.js +1 -1
- package/package.json +1 -1
- package/src/deno.js +1 -1
- package/src/src/provider/runtime-loader.ts +1872 -102
- package/src/src/provider/types.ts +2 -0
- package/src/src/utils/version-constant.ts +1 -1
|
@@ -13,6 +13,9 @@ function getAnthropicMessagesUrl(baseURL) {
|
|
|
13
13
|
function getOpenAIChatCompletionsUrl(baseURL) {
|
|
14
14
|
return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "chat/completions");
|
|
15
15
|
}
|
|
16
|
+
function getOpenAIResponsesUrl(baseURL) {
|
|
17
|
+
return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "responses");
|
|
18
|
+
}
|
|
16
19
|
function getGoogleGenerateContentUrl(baseURL, modelId) {
|
|
17
20
|
return joinUrl(baseURL ?? DEFAULT_GOOGLE_BASE_URL, `models/${encodeURIComponent(modelId)}:generateContent`);
|
|
18
21
|
}
|
|
@@ -77,26 +80,184 @@ function extractGoogleUsageTokens(payload) {
|
|
|
77
80
|
const promptTokenCount = usageMetadata?.promptTokenCount;
|
|
78
81
|
return typeof promptTokenCount === "number" ? promptTokenCount : undefined;
|
|
79
82
|
}
|
|
80
|
-
|
|
81
|
-
const
|
|
82
|
-
return
|
|
83
|
+
function createWarningCollector() {
|
|
84
|
+
const list = [];
|
|
85
|
+
return {
|
|
86
|
+
push(warning) {
|
|
87
|
+
list.push(warning);
|
|
88
|
+
},
|
|
89
|
+
drain() {
|
|
90
|
+
return list.slice();
|
|
91
|
+
},
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Base class for typed provider errors. The `retryable` flag is the
|
|
96
|
+
* primary signal for callers (or a retry wrapper) to decide whether to
|
|
97
|
+
* re-issue the request. `retryAfterMs` is set when the provider gave an
|
|
98
|
+
* explicit delay hint (Retry-After header, Retry-Info trailer).
|
|
99
|
+
*/
|
|
100
|
+
export class ProviderError extends Error {
|
|
101
|
+
provider;
|
|
102
|
+
status;
|
|
103
|
+
retryable;
|
|
104
|
+
retryAfterMs;
|
|
105
|
+
constructor(options) {
|
|
106
|
+
super(options.message);
|
|
107
|
+
this.name = globalThis[Symbol.for("import-meta-ponyfill-esmodule")](import.meta).name;
|
|
108
|
+
this.provider = options.provider;
|
|
109
|
+
this.status = options.status;
|
|
110
|
+
this.retryable = options.retryable;
|
|
111
|
+
if (options.retryAfterMs !== undefined) {
|
|
112
|
+
this.retryAfterMs = options.retryAfterMs;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
/** Provider reports it is overloaded (Anthropic 529, OpenAI/Google 503). */
|
|
117
|
+
export class ProviderOverloadedError extends ProviderError {
|
|
118
|
+
}
|
|
119
|
+
/** Provider is rate limiting this API key (OpenAI/Google 429 with Retry-After). */
|
|
120
|
+
export class ProviderRateLimitError extends ProviderError {
|
|
121
|
+
}
|
|
122
|
+
/** Provider account quota is exhausted — non-retryable. */
|
|
123
|
+
export class ProviderQuotaError extends ProviderError {
|
|
124
|
+
}
|
|
125
|
+
/** Non-retryable 4xx/5xx that doesn't fit another bucket. */
|
|
126
|
+
export class ProviderRequestError extends ProviderError {
|
|
127
|
+
}
|
|
128
|
+
function parseRetryAfterMs(header) {
|
|
129
|
+
if (!header)
|
|
130
|
+
return undefined;
|
|
131
|
+
const asNumber = Number(header);
|
|
132
|
+
if (Number.isFinite(asNumber) && asNumber >= 0) {
|
|
133
|
+
return Math.round(asNumber * 1000);
|
|
134
|
+
}
|
|
135
|
+
// HTTP-date form (rare in practice for LLM providers).
|
|
136
|
+
const parsed = Date.parse(header);
|
|
137
|
+
if (!Number.isNaN(parsed)) {
|
|
138
|
+
return Math.max(0, parsed - Date.now());
|
|
139
|
+
}
|
|
140
|
+
return undefined;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Inspect a non-2xx response and build the most specific ProviderError
|
|
144
|
+
* subclass we can. Reads the response body as text (it's already dead
|
|
145
|
+
* on the wire by this point). Body classification handles the cases
|
|
146
|
+
* where HTTP status alone is ambiguous — notably OpenAI
|
|
147
|
+
* `insufficient_quota` vs `rate_limit_exceeded` both arriving as 429.
|
|
148
|
+
*/
|
|
149
|
+
async function buildProviderError(provider, response) {
|
|
150
|
+
const rawBody = await response.text();
|
|
151
|
+
const message = rawBody.trim() || `${response.status} ${response.statusText}`.trim();
|
|
152
|
+
const status = response.status;
|
|
153
|
+
const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
|
|
154
|
+
const parsedBody = (() => {
|
|
155
|
+
try {
|
|
156
|
+
return JSON.parse(rawBody);
|
|
157
|
+
}
|
|
158
|
+
catch {
|
|
159
|
+
return undefined;
|
|
160
|
+
}
|
|
161
|
+
})();
|
|
162
|
+
const errorRecord = readRecord(parsedBody?.error);
|
|
163
|
+
const errorCode = typeof errorRecord?.code === "string"
|
|
164
|
+
? errorRecord.code
|
|
165
|
+
: typeof errorRecord?.type === "string"
|
|
166
|
+
? errorRecord.type
|
|
167
|
+
: typeof errorRecord?.status === "string"
|
|
168
|
+
? errorRecord.status
|
|
169
|
+
: undefined;
|
|
170
|
+
// Anthropic 529 = overloaded. Anthropic surfaces this with
|
|
171
|
+
// { error: { type: "overloaded_error" } } in the body.
|
|
172
|
+
if (provider === "anthropic" && status === 529) {
|
|
173
|
+
return new ProviderOverloadedError({
|
|
174
|
+
provider,
|
|
175
|
+
status,
|
|
176
|
+
message,
|
|
177
|
+
retryable: true,
|
|
178
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
// OpenAI / Google 503 = overloaded.
|
|
182
|
+
if ((provider === "openai" || provider === "google") && status === 503) {
|
|
183
|
+
return new ProviderOverloadedError({
|
|
184
|
+
provider,
|
|
185
|
+
status,
|
|
186
|
+
message,
|
|
187
|
+
retryable: true,
|
|
188
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
// OpenAI 429 splits based on the error code in the body:
|
|
192
|
+
// - insufficient_quota → hard quota, non-retryable
|
|
193
|
+
// - rate_limit_exceeded / tokens_per_min_exceeded → retry with Retry-After
|
|
194
|
+
if (provider === "openai" && status === 429) {
|
|
195
|
+
if (errorCode === "insufficient_quota") {
|
|
196
|
+
return new ProviderQuotaError({
|
|
197
|
+
provider,
|
|
198
|
+
status,
|
|
199
|
+
message,
|
|
200
|
+
retryable: false,
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
return new ProviderRateLimitError({
|
|
204
|
+
provider,
|
|
205
|
+
status,
|
|
206
|
+
message,
|
|
207
|
+
retryable: true,
|
|
208
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
// Google 429 RESOURCE_EXHAUSTED is almost always the daily free-tier
|
|
212
|
+
// quota — surface as a hard quota error so callers don't hot-loop on
|
|
213
|
+
// retries that can't possibly succeed until midnight UTC.
|
|
214
|
+
if (provider === "google" && status === 429) {
|
|
215
|
+
if (errorCode === "RESOURCE_EXHAUSTED") {
|
|
216
|
+
return new ProviderQuotaError({
|
|
217
|
+
provider,
|
|
218
|
+
status,
|
|
219
|
+
message,
|
|
220
|
+
retryable: false,
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
return new ProviderRateLimitError({
|
|
224
|
+
provider,
|
|
225
|
+
status,
|
|
226
|
+
message,
|
|
227
|
+
retryable: true,
|
|
228
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
return new ProviderRequestError({
|
|
232
|
+
provider,
|
|
233
|
+
status,
|
|
234
|
+
message,
|
|
235
|
+
retryable: false,
|
|
236
|
+
});
|
|
83
237
|
}
|
|
84
238
|
async function requestJson(options) {
|
|
85
239
|
const response = await options.fetchImpl(options.url, options.init);
|
|
86
240
|
if (!response.ok) {
|
|
87
|
-
const
|
|
88
|
-
|
|
241
|
+
const err = await buildProviderError(options.providerKind, response);
|
|
242
|
+
err.message = `${options.providerLabel} request failed: ${err.message}`;
|
|
243
|
+
throw err;
|
|
89
244
|
}
|
|
90
245
|
return response.json();
|
|
91
246
|
}
|
|
92
247
|
async function requestStream(options) {
|
|
93
248
|
const response = await options.fetchImpl(options.url, options.init);
|
|
94
249
|
if (!response.ok) {
|
|
95
|
-
const
|
|
96
|
-
|
|
250
|
+
const err = await buildProviderError(options.providerKind, response);
|
|
251
|
+
err.message = `${options.providerLabel} request failed: ${err.message}`;
|
|
252
|
+
throw err;
|
|
97
253
|
}
|
|
98
254
|
if (!response.body) {
|
|
99
|
-
throw new
|
|
255
|
+
throw new ProviderRequestError({
|
|
256
|
+
provider: options.providerKind,
|
|
257
|
+
status: response.status,
|
|
258
|
+
message: `${options.providerLabel} request failed: stream body missing`,
|
|
259
|
+
retryable: false,
|
|
260
|
+
});
|
|
100
261
|
}
|
|
101
262
|
return response.body;
|
|
102
263
|
}
|
|
@@ -133,6 +294,11 @@ function toOpenAICompatibleMessages(prompt) {
|
|
|
133
294
|
text += part.text;
|
|
134
295
|
continue;
|
|
135
296
|
}
|
|
297
|
+
// OpenAI Chat Completions has no roundtrip slot for Anthropic
|
|
298
|
+
// thinking blocks — they get dropped on replay. Anthropic-only.
|
|
299
|
+
if (part.type === "reasoning") {
|
|
300
|
+
continue;
|
|
301
|
+
}
|
|
136
302
|
toolCalls.push({
|
|
137
303
|
id: part.toolCallId,
|
|
138
304
|
type: "function",
|
|
@@ -222,6 +388,8 @@ function extractAnthropicUsage(payload) {
|
|
|
222
388
|
}
|
|
223
389
|
const inputTokens = usage.input_tokens;
|
|
224
390
|
const outputTokens = usage.output_tokens;
|
|
391
|
+
const cacheCreationInputTokens = usage.cache_creation_input_tokens;
|
|
392
|
+
const cacheReadInputTokens = usage.cache_read_input_tokens;
|
|
225
393
|
return {
|
|
226
394
|
inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
|
|
227
395
|
outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
|
|
@@ -229,6 +397,8 @@ function extractAnthropicUsage(payload) {
|
|
|
229
397
|
? (typeof inputTokens === "number" ? inputTokens : 0) +
|
|
230
398
|
(typeof outputTokens === "number" ? outputTokens : 0)
|
|
231
399
|
: undefined,
|
|
400
|
+
...(typeof cacheCreationInputTokens === "number" ? { cacheCreationInputTokens } : {}),
|
|
401
|
+
...(typeof cacheReadInputTokens === "number" ? { cacheReadInputTokens } : {}),
|
|
232
402
|
};
|
|
233
403
|
}
|
|
234
404
|
function mergeUsage(current, next) {
|
|
@@ -240,10 +410,15 @@ function mergeUsage(current, next) {
|
|
|
240
410
|
}
|
|
241
411
|
const inputTokens = next.inputTokens ?? current.inputTokens;
|
|
242
412
|
const outputTokens = next.outputTokens ?? current.outputTokens;
|
|
413
|
+
const cacheCreationInputTokens = next.cacheCreationInputTokens ??
|
|
414
|
+
current.cacheCreationInputTokens;
|
|
415
|
+
const cacheReadInputTokens = next.cacheReadInputTokens ?? current.cacheReadInputTokens;
|
|
243
416
|
return {
|
|
244
417
|
inputTokens,
|
|
245
418
|
outputTokens,
|
|
246
419
|
totalTokens: (inputTokens ?? 0) + (outputTokens ?? 0),
|
|
420
|
+
...(cacheCreationInputTokens !== undefined ? { cacheCreationInputTokens } : {}),
|
|
421
|
+
...(cacheReadInputTokens !== undefined ? { cacheReadInputTokens } : {}),
|
|
247
422
|
};
|
|
248
423
|
}
|
|
249
424
|
function normalizeAnthropicToolChoice(toolChoice) {
|
|
@@ -258,6 +433,23 @@ function toSnakeCaseRecord(record) {
|
|
|
258
433
|
value,
|
|
259
434
|
]));
|
|
260
435
|
}
|
|
436
|
+
/**
|
|
437
|
+
* Recursive snake_case key converter for nested config objects (used for
|
|
438
|
+
* Anthropic mcp_servers, where authorizationToken / toolConfiguration /
|
|
439
|
+
* allowedTools all need conversion).
|
|
440
|
+
*/
|
|
441
|
+
function deepSnakeCase(value) {
|
|
442
|
+
if (Array.isArray(value)) {
|
|
443
|
+
return value.map(deepSnakeCase);
|
|
444
|
+
}
|
|
445
|
+
if (value !== null && typeof value === "object") {
|
|
446
|
+
return Object.fromEntries(Object.entries(value).map(([key, v]) => [
|
|
447
|
+
key.replace(/[A-Z]/g, (match) => `_${match.toLowerCase()}`),
|
|
448
|
+
deepSnakeCase(v),
|
|
449
|
+
]));
|
|
450
|
+
}
|
|
451
|
+
return value;
|
|
452
|
+
}
|
|
261
453
|
function pushAnthropicUserContent(messages, content) {
|
|
262
454
|
if (content.length === 0) {
|
|
263
455
|
return;
|
|
@@ -272,7 +464,23 @@ function pushAnthropicUserContent(messages, content) {
|
|
|
272
464
|
content,
|
|
273
465
|
});
|
|
274
466
|
}
|
|
275
|
-
|
|
467
|
+
/**
|
|
468
|
+
* Resolves a {@link ProviderCacheTtl} into Anthropic's `cache_control` shape.
|
|
469
|
+
*
|
|
470
|
+
* Returns `undefined` when caching is not requested (`false` / `undefined`),
|
|
471
|
+
* `{ type: "ephemeral" }` for the 5-minute default (`true` / `"5m"`), or
|
|
472
|
+
* `{ type: "ephemeral", ttl: "1h" }` for the extended 1-hour cache.
|
|
473
|
+
*/
|
|
474
|
+
function resolveAnthropicCacheControlBlock(ttl) {
|
|
475
|
+
if (ttl === undefined || ttl === false) {
|
|
476
|
+
return undefined;
|
|
477
|
+
}
|
|
478
|
+
if (ttl === "1h") {
|
|
479
|
+
return { type: "ephemeral", ttl: "1h" };
|
|
480
|
+
}
|
|
481
|
+
return { type: "ephemeral" };
|
|
482
|
+
}
|
|
483
|
+
function toAnthropicMessages(prompt, systemCacheControl) {
|
|
276
484
|
const systemParts = [];
|
|
277
485
|
const messages = [];
|
|
278
486
|
for (const message of prompt) {
|
|
@@ -291,11 +499,32 @@ function toAnthropicMessages(prompt) {
|
|
|
291
499
|
case "assistant":
|
|
292
500
|
messages.push({
|
|
293
501
|
role: "assistant",
|
|
294
|
-
content: message.content.map((part) =>
|
|
295
|
-
type
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
502
|
+
content: message.content.map((part) => {
|
|
503
|
+
if (part.type === "text") {
|
|
504
|
+
return { type: "text", text: part.text };
|
|
505
|
+
}
|
|
506
|
+
if (part.type === "reasoning") {
|
|
507
|
+
// Redacted thinking blocks roundtrip as the encrypted blob
|
|
508
|
+
// form Anthropic gave us. Plain thinking blocks need the
|
|
509
|
+
// signature to verify on the server.
|
|
510
|
+
if (typeof part.redactedData === "string") {
|
|
511
|
+
return {
|
|
512
|
+
type: "redacted_thinking",
|
|
513
|
+
data: part.redactedData,
|
|
514
|
+
};
|
|
515
|
+
}
|
|
516
|
+
return {
|
|
517
|
+
type: "thinking",
|
|
518
|
+
thinking: part.text ?? "",
|
|
519
|
+
...(typeof part.signature === "string" ? { signature: part.signature } : {}),
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
return {
|
|
523
|
+
type: "tool_use",
|
|
524
|
+
id: part.toolCallId,
|
|
525
|
+
name: part.toolName,
|
|
526
|
+
input: part.input,
|
|
527
|
+
};
|
|
299
528
|
}),
|
|
300
529
|
});
|
|
301
530
|
break;
|
|
@@ -308,12 +537,55 @@ function toAnthropicMessages(prompt) {
|
|
|
308
537
|
break;
|
|
309
538
|
}
|
|
310
539
|
}
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
540
|
+
if (systemParts.length === 0) {
|
|
541
|
+
return { messages };
|
|
542
|
+
}
|
|
543
|
+
const joined = systemParts.join("\n\n");
|
|
544
|
+
// Cache-controlled system prompts must use the array-of-blocks form so the
|
|
545
|
+
// breakpoint lands on an individual content block. Callers that don't opt
|
|
546
|
+
// in keep the legacy raw-string form for backward compatibility.
|
|
547
|
+
if (systemCacheControl) {
|
|
548
|
+
return {
|
|
549
|
+
system: [{
|
|
550
|
+
type: "text",
|
|
551
|
+
text: joined,
|
|
552
|
+
cache_control: systemCacheControl,
|
|
553
|
+
}],
|
|
554
|
+
messages,
|
|
555
|
+
};
|
|
556
|
+
}
|
|
557
|
+
return { system: joined, messages };
|
|
558
|
+
}
|
|
559
|
+
/**
|
|
560
|
+
* Short-name → latest-versioned-type alias map for Anthropic provider tools.
|
|
561
|
+
*
|
|
562
|
+
* Anthropic tool types are date-stamped (e.g. `code_execution_20260120`) so
|
|
563
|
+
* callers either pin a version or get the latest. We accept both: a caller
|
|
564
|
+
* can pass `anthropic.code_execution` and we map to the latest known version,
|
|
565
|
+
* or pass `anthropic.code_execution_20250522` and we forward verbatim.
|
|
566
|
+
*
|
|
567
|
+
* Versions chosen here are the latest documented releases as of 2026-04-15
|
|
568
|
+
* — see https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview.
|
|
569
|
+
* When Anthropic ships newer versions, update this map.
|
|
570
|
+
*/
|
|
571
|
+
const ANTHROPIC_TOOL_VERSION_ALIASES = {
|
|
572
|
+
code_execution: "code_execution_20260120",
|
|
573
|
+
computer_use: "computer_20250124",
|
|
574
|
+
computer: "computer_20250124",
|
|
575
|
+
text_editor: "text_editor_20250728",
|
|
576
|
+
bash: "bash_20250124",
|
|
577
|
+
memory: "memory_20250818",
|
|
578
|
+
web_search: "web_search_20250305",
|
|
579
|
+
web_fetch: "web_fetch_20250910",
|
|
580
|
+
};
|
|
581
|
+
function resolveAnthropicProviderType(rawType) {
|
|
582
|
+
// Already-versioned types (contain a date stamp suffix) pass through verbatim.
|
|
583
|
+
if (/_\d{8}$/.test(rawType)) {
|
|
584
|
+
return rawType;
|
|
585
|
+
}
|
|
586
|
+
return ANTHROPIC_TOOL_VERSION_ALIASES[rawType] ?? rawType;
|
|
315
587
|
}
|
|
316
|
-
function toAnthropicTools(tools) {
|
|
588
|
+
function toAnthropicTools(tools, toolsCacheControl) {
|
|
317
589
|
if (!tools) {
|
|
318
590
|
return undefined;
|
|
319
591
|
}
|
|
@@ -330,17 +602,31 @@ function toAnthropicTools(tools) {
|
|
|
330
602
|
if (!tool.id.startsWith("anthropic.")) {
|
|
331
603
|
continue;
|
|
332
604
|
}
|
|
333
|
-
const
|
|
334
|
-
if (
|
|
605
|
+
const rawType = tool.id.slice("anthropic.".length);
|
|
606
|
+
if (rawType.length === 0) {
|
|
335
607
|
continue;
|
|
336
608
|
}
|
|
337
609
|
normalized.push({
|
|
338
|
-
type:
|
|
610
|
+
type: resolveAnthropicProviderType(rawType),
|
|
339
611
|
name: tool.name,
|
|
340
612
|
...toSnakeCaseRecord(tool.args),
|
|
341
613
|
});
|
|
342
614
|
}
|
|
343
|
-
|
|
615
|
+
if (normalized.length === 0) {
|
|
616
|
+
return undefined;
|
|
617
|
+
}
|
|
618
|
+
// Attach the cache breakpoint to the final tool entry so Anthropic caches
|
|
619
|
+
// the entire tools block up to and including that definition. Earlier tool
|
|
620
|
+
// entries are implicitly covered by the same breakpoint per Anthropic's
|
|
621
|
+
// walk-backward cache lookup behaviour.
|
|
622
|
+
if (toolsCacheControl) {
|
|
623
|
+
const lastIndex = normalized.length - 1;
|
|
624
|
+
normalized[lastIndex] = {
|
|
625
|
+
...normalized[lastIndex],
|
|
626
|
+
cache_control: toolsCacheControl,
|
|
627
|
+
};
|
|
628
|
+
}
|
|
629
|
+
return normalized;
|
|
344
630
|
}
|
|
345
631
|
function createAnthropicRequestHeaders(options) {
|
|
346
632
|
const headers = new Headers(options.extraHeaders);
|
|
@@ -397,27 +683,185 @@ function resolveAnthropicMaxTokens(modelId, callerMaxOutputTokens) {
|
|
|
397
683
|
}
|
|
398
684
|
return requested;
|
|
399
685
|
}
|
|
400
|
-
|
|
401
|
-
|
|
686
|
+
/**
|
|
687
|
+
* Map a unified reasoning effort level to an Anthropic `thinking.budget_tokens`
|
|
688
|
+
* value. Anthropic's minimum accepted budget is 1024; higher tiers give Claude
|
|
689
|
+
* more headroom to explore. `max` maps to the upper bound documented for
|
|
690
|
+
* Claude 4.x family (32k tokens of thinking — caller can override via
|
|
691
|
+
* `budgetTokens` if they need more).
|
|
692
|
+
*/
|
|
693
|
+
function resolveAnthropicThinkingBudget(option) {
|
|
694
|
+
if (!option || option.enabled !== true) {
|
|
695
|
+
return undefined;
|
|
696
|
+
}
|
|
697
|
+
if (typeof option.budgetTokens === "number" && option.budgetTokens >= 1024) {
|
|
698
|
+
return option.budgetTokens;
|
|
699
|
+
}
|
|
700
|
+
switch (option.effort) {
|
|
701
|
+
case "low":
|
|
702
|
+
return 1024;
|
|
703
|
+
case "high":
|
|
704
|
+
return 16_384;
|
|
705
|
+
case "max":
|
|
706
|
+
return 32_768;
|
|
707
|
+
case "medium":
|
|
708
|
+
default:
|
|
709
|
+
return 4096;
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
function buildAnthropicMessagesRequest(modelId, providerName, options, stream, warnings) {
|
|
713
|
+
const systemCacheControl = resolveAnthropicCacheControlBlock(options.cacheControl?.system);
|
|
714
|
+
const toolsCacheControl = resolveAnthropicCacheControlBlock(options.cacheControl?.tools);
|
|
715
|
+
const { system, messages } = toAnthropicMessages(options.prompt, systemCacheControl);
|
|
716
|
+
const anthropicTools = toAnthropicTools(options.tools, toolsCacheControl);
|
|
717
|
+
const thinkingBudget = resolveAnthropicThinkingBudget(options.reasoning);
|
|
718
|
+
const thinkingEnabled = thinkingBudget !== undefined;
|
|
719
|
+
// Anthropic doesn't support these unified options at all — emit warnings
|
|
720
|
+
// so callers don't quietly pass values that have zero effect.
|
|
721
|
+
if (options.presencePenalty !== undefined) {
|
|
722
|
+
warnings.push({
|
|
723
|
+
type: "unsupported-setting",
|
|
724
|
+
provider: "anthropic",
|
|
725
|
+
setting: "presencePenalty",
|
|
726
|
+
details: "Anthropic Messages API has no equivalent and the value was dropped.",
|
|
727
|
+
});
|
|
728
|
+
}
|
|
729
|
+
if (options.frequencyPenalty !== undefined) {
|
|
730
|
+
warnings.push({
|
|
731
|
+
type: "unsupported-setting",
|
|
732
|
+
provider: "anthropic",
|
|
733
|
+
setting: "frequencyPenalty",
|
|
734
|
+
details: "Anthropic Messages API has no equivalent and the value was dropped.",
|
|
735
|
+
});
|
|
736
|
+
}
|
|
737
|
+
if (options.seed !== undefined) {
|
|
738
|
+
warnings.push({
|
|
739
|
+
type: "unsupported-setting",
|
|
740
|
+
provider: "anthropic",
|
|
741
|
+
setting: "seed",
|
|
742
|
+
details: "Anthropic Messages API does not support deterministic seeding.",
|
|
743
|
+
});
|
|
744
|
+
}
|
|
745
|
+
if (options.topK !== undefined) {
|
|
746
|
+
warnings.push({
|
|
747
|
+
type: "unsupported-setting",
|
|
748
|
+
provider: "anthropic",
|
|
749
|
+
setting: "topK",
|
|
750
|
+
details: "Anthropic Messages API does not expose top_k on this surface.",
|
|
751
|
+
});
|
|
752
|
+
}
|
|
753
|
+
if (options.stopSequences && options.stopSequences.length > 4) {
|
|
754
|
+
warnings.push({
|
|
755
|
+
type: "unsupported-setting",
|
|
756
|
+
provider: "anthropic",
|
|
757
|
+
setting: "stopSequences",
|
|
758
|
+
details: `Anthropic accepts at most 4 stop sequences; ${options.stopSequences.length} were provided and the extras were truncated.`,
|
|
759
|
+
});
|
|
760
|
+
}
|
|
761
|
+
if (thinkingEnabled && options.temperature !== undefined) {
|
|
762
|
+
warnings.push({
|
|
763
|
+
type: "unsupported-setting",
|
|
764
|
+
provider: "anthropic",
|
|
765
|
+
setting: "temperature",
|
|
766
|
+
details: "Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
|
|
767
|
+
});
|
|
768
|
+
}
|
|
769
|
+
if (thinkingEnabled && options.topP !== undefined) {
|
|
770
|
+
warnings.push({
|
|
771
|
+
type: "unsupported-setting",
|
|
772
|
+
provider: "anthropic",
|
|
773
|
+
setting: "topP",
|
|
774
|
+
details: "Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
|
|
775
|
+
});
|
|
776
|
+
}
|
|
777
|
+
if (options.responseFormat && options.responseFormat.type !== "text") {
|
|
778
|
+
warnings.push({
|
|
779
|
+
type: "unsupported-setting",
|
|
780
|
+
provider: "anthropic",
|
|
781
|
+
setting: "responseFormat",
|
|
782
|
+
details: "Anthropic Messages API does not have a structured-output response_format equivalent. Use a tool with the schema as input_schema instead.",
|
|
783
|
+
});
|
|
784
|
+
}
|
|
785
|
+
// Anthropic requires max_tokens > budget_tokens when thinking is enabled.
|
|
786
|
+
// Growing max_tokens by the thinking budget preserves the caller's intended
|
|
787
|
+
// output budget, and we clamp the sum at the model's advertised maximum so
|
|
788
|
+
// the request never exceeds the API's hard cap.
|
|
789
|
+
const baseMaxTokens = resolveAnthropicMaxTokens(modelId, options.maxOutputTokens);
|
|
790
|
+
const maxTokens = thinkingEnabled
|
|
791
|
+
? Math.min(baseMaxTokens + (thinkingBudget ?? 0), getAnthropicModelCapabilities(modelId).maxOutputTokens)
|
|
792
|
+
: baseMaxTokens;
|
|
402
793
|
const body = {
|
|
403
794
|
model: modelId,
|
|
404
795
|
messages,
|
|
405
|
-
max_tokens:
|
|
796
|
+
max_tokens: maxTokens,
|
|
406
797
|
...(stream ? { stream: true } : {}),
|
|
407
798
|
...(system ? { system } : {}),
|
|
408
|
-
|
|
409
|
-
|
|
799
|
+
// Sampling params are mutually exclusive with thinking on Anthropic — the
|
|
800
|
+
// API rejects the combo outright. Drop them silently when thinking is on
|
|
801
|
+
// (callers see thinking's output instead of what they'd have gotten from
|
|
802
|
+
// custom sampling, which is the documented tradeoff).
|
|
803
|
+
...(!thinkingEnabled && options.temperature !== undefined
|
|
804
|
+
? { temperature: options.temperature }
|
|
805
|
+
: {}),
|
|
806
|
+
...(!thinkingEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
|
|
410
807
|
...(options.stopSequences && options.stopSequences.length > 0
|
|
411
|
-
? { stop_sequences: options.stopSequences }
|
|
808
|
+
? { stop_sequences: options.stopSequences.slice(0, 4) }
|
|
412
809
|
: {}),
|
|
413
|
-
...(
|
|
810
|
+
...(anthropicTools ? { tools: anthropicTools } : {}),
|
|
414
811
|
...(options.toolChoice !== undefined
|
|
415
812
|
? { tool_choice: normalizeAnthropicToolChoice(options.toolChoice) }
|
|
416
813
|
: {}),
|
|
814
|
+
...(thinkingEnabled ? { thinking: { type: "enabled", budget_tokens: thinkingBudget } } : {}),
|
|
815
|
+
...(typeof options.userId === "string" && options.userId.length > 0
|
|
816
|
+
? { metadata: { user_id: options.userId } }
|
|
817
|
+
: {}),
|
|
818
|
+
...(options.mcpServers && options.mcpServers.length > 0
|
|
819
|
+
? { mcp_servers: deepSnakeCase(options.mcpServers) }
|
|
820
|
+
: {}),
|
|
821
|
+
...(options.anthropicContainer !== undefined ? { container: options.anthropicContainer } : {}),
|
|
417
822
|
};
|
|
418
823
|
Object.assign(body, readProviderOptions(options.providerOptions, "anthropic", providerName));
|
|
419
824
|
return body;
|
|
420
825
|
}
|
|
826
|
+
/**
|
|
827
|
+
* Best-effort camelCase normalization of a single Anthropic citation
|
|
828
|
+
* record. Handles the union of fields across web_search_result_location,
|
|
829
|
+
* web_fetch_result_location, char_location, page_location, and
|
|
830
|
+
* content_block_location citation kinds — see
|
|
831
|
+
* https://docs.claude.com/en/docs/build-with-claude/citations
|
|
832
|
+
*/
|
|
833
|
+
function normalizeAnthropicCitation(raw) {
|
|
834
|
+
const r = readRecord(raw);
|
|
835
|
+
if (!r)
|
|
836
|
+
return undefined;
|
|
837
|
+
const typeStr = typeof r.type === "string" ? r.type : undefined;
|
|
838
|
+
if (!typeStr)
|
|
839
|
+
return undefined;
|
|
840
|
+
const out = { type: typeStr };
|
|
841
|
+
if (typeof r.cited_text === "string")
|
|
842
|
+
out.citedText = r.cited_text;
|
|
843
|
+
if (typeof r.url === "string")
|
|
844
|
+
out.url = r.url;
|
|
845
|
+
if (typeof r.title === "string")
|
|
846
|
+
out.title = r.title;
|
|
847
|
+
if (typeof r.start_char_index === "number")
|
|
848
|
+
out.startCharIndex = r.start_char_index;
|
|
849
|
+
if (typeof r.end_char_index === "number")
|
|
850
|
+
out.endCharIndex = r.end_char_index;
|
|
851
|
+
if (typeof r.start_block_index === "number")
|
|
852
|
+
out.startBlockIndex = r.start_block_index;
|
|
853
|
+
if (typeof r.end_block_index === "number")
|
|
854
|
+
out.endBlockIndex = r.end_block_index;
|
|
855
|
+
if (typeof r.start_page_number === "number")
|
|
856
|
+
out.startPageNumber = r.start_page_number;
|
|
857
|
+
if (typeof r.end_page_number === "number")
|
|
858
|
+
out.endPageNumber = r.end_page_number;
|
|
859
|
+
if (typeof r.document_index === "number")
|
|
860
|
+
out.documentIndex = r.document_index;
|
|
861
|
+
if (typeof r.document_title === "string")
|
|
862
|
+
out.documentTitle = r.document_title;
|
|
863
|
+
return out;
|
|
864
|
+
}
|
|
421
865
|
function buildAnthropicGenerateResult(payload) {
|
|
422
866
|
const record = readRecord(payload);
|
|
423
867
|
const content = Array.isArray(record?.content) ? record.content : [];
|
|
@@ -426,7 +870,40 @@ function buildAnthropicGenerateResult(payload) {
|
|
|
426
870
|
const block = readRecord(blockValue);
|
|
427
871
|
const blockType = typeof block?.type === "string" ? block.type : undefined;
|
|
428
872
|
if (blockType === "text" && typeof block?.text === "string" && block.text.length > 0) {
|
|
429
|
-
|
|
873
|
+
const citationsRaw = Array.isArray(block.citations) ? block.citations : undefined;
|
|
874
|
+
const citations = citationsRaw
|
|
875
|
+
?.flatMap((c) => {
|
|
876
|
+
const normalizedCitation = normalizeAnthropicCitation(c);
|
|
877
|
+
return normalizedCitation ? [normalizedCitation] : [];
|
|
878
|
+
});
|
|
879
|
+
normalized.push({
|
|
880
|
+
type: "text",
|
|
881
|
+
text: block.text,
|
|
882
|
+
...(citations && citations.length > 0 ? { citations } : {}),
|
|
883
|
+
});
|
|
884
|
+
continue;
|
|
885
|
+
}
|
|
886
|
+
// Thinking blocks carry the cleartext trace plus a signature that
|
|
887
|
+
// Anthropic uses to verify on subsequent turns. Surfacing both lets
|
|
888
|
+
// callers persist them as `reasoning` content parts and replay on
|
|
889
|
+
// the next turn so Claude can continue from the same thinking.
|
|
890
|
+
if (blockType === "thinking") {
|
|
891
|
+
normalized.push({
|
|
892
|
+
type: "reasoning",
|
|
893
|
+
...(typeof block?.thinking === "string" ? { text: block.thinking } : {}),
|
|
894
|
+
...(typeof block?.signature === "string" ? { signature: block.signature } : {}),
|
|
895
|
+
});
|
|
896
|
+
continue;
|
|
897
|
+
}
|
|
898
|
+
// Redacted thinking blocks arrive when Claude's safety classifier
|
|
899
|
+
// hides the trace. Pass the encrypted blob through opaquely so the
|
|
900
|
+
// caller can replay it on the next turn (Anthropic still needs the
|
|
901
|
+
// blob to verify continuity even though it can't read it).
|
|
902
|
+
if (blockType === "redacted_thinking" && typeof block?.data === "string") {
|
|
903
|
+
normalized.push({
|
|
904
|
+
type: "reasoning",
|
|
905
|
+
redactedData: block.data,
|
|
906
|
+
});
|
|
430
907
|
continue;
|
|
431
908
|
}
|
|
432
909
|
if ((blockType === "tool_use" || blockType === "server_tool_use") &&
|
|
@@ -537,6 +1014,19 @@ async function* streamAnthropicCompatibleParts(stream) {
|
|
|
537
1014
|
}
|
|
538
1015
|
continue;
|
|
539
1016
|
}
|
|
1017
|
+
// Redacted thinking blocks arrive as opaque encrypted payloads when
|
|
1018
|
+
// Claude's safety classifier flags the reasoning trace. Surface them
|
|
1019
|
+
// as a zero-length reasoning block so callers know thinking happened
|
|
1020
|
+
// without leaking the (legitimately hidden) contents.
|
|
1021
|
+
if (blockType === "redacted_thinking") {
|
|
1022
|
+
const reasoningId = `thinking-${index}`;
|
|
1023
|
+
reasoningBlocks.set(index, { id: reasoningId });
|
|
1024
|
+
yield {
|
|
1025
|
+
type: "reasoning-start",
|
|
1026
|
+
id: reasoningId,
|
|
1027
|
+
};
|
|
1028
|
+
continue;
|
|
1029
|
+
}
|
|
540
1030
|
if ((blockType === "tool_use" || blockType === "server_tool_use") &&
|
|
541
1031
|
typeof contentBlock?.id === "string" &&
|
|
542
1032
|
typeof contentBlock?.name === "string") {
|
|
@@ -696,10 +1186,13 @@ function extractOpenAIUsage(payload) {
|
|
|
696
1186
|
const inputTokens = usage.prompt_tokens;
|
|
697
1187
|
const outputTokens = usage.completion_tokens;
|
|
698
1188
|
const totalTokens = usage.total_tokens;
|
|
1189
|
+
const promptTokensDetails = readRecord(usage.prompt_tokens_details);
|
|
1190
|
+
const cachedTokens = promptTokensDetails?.cached_tokens;
|
|
699
1191
|
return {
|
|
700
1192
|
inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
|
|
701
1193
|
outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
|
|
702
1194
|
totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
|
|
1195
|
+
...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
|
|
703
1196
|
};
|
|
704
1197
|
}
|
|
705
1198
|
function extractOpenAIContentText(content) {
|
|
@@ -742,14 +1235,81 @@ function extractOpenAIToolCalls(message) {
|
|
|
742
1235
|
}
|
|
743
1236
|
return normalized;
|
|
744
1237
|
}
|
|
745
|
-
|
|
1238
|
+
/**
|
|
1239
|
+
* OpenAI reasoning models (o1 / o3 / o4 family) use the completion path but
|
|
1240
|
+
* have different constraints than chat models: sampling params are rejected,
|
|
1241
|
+
* and they accept a `reasoning_effort` field. We detect them by model id
|
|
1242
|
+
* prefix so callers don't have to configure it per runtime.
|
|
1243
|
+
*/
|
|
1244
|
+
function isOpenAIReasoningModel(modelId) {
|
|
1245
|
+
return /^o[134](-|$)/.test(modelId);
|
|
1246
|
+
}
|
|
1247
|
+
/**
|
|
1248
|
+
* Map the unified reasoning effort to OpenAI's `reasoning_effort` enum.
|
|
1249
|
+
* OpenAI doesn't accept "max" — we collapse it to "high".
|
|
1250
|
+
*/
|
|
1251
|
+
function resolveOpenAIReasoningEffort(option) {
|
|
1252
|
+
if (!option || option.enabled !== true) {
|
|
1253
|
+
return undefined;
|
|
1254
|
+
}
|
|
1255
|
+
switch (option.effort) {
|
|
1256
|
+
case "low":
|
|
1257
|
+
return "low";
|
|
1258
|
+
case "high":
|
|
1259
|
+
case "max":
|
|
1260
|
+
return "high";
|
|
1261
|
+
case "medium":
|
|
1262
|
+
default:
|
|
1263
|
+
return "medium";
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1266
|
+
function buildOpenAIChatRequest(modelId, providerName, options, stream, warnings) {
|
|
1267
|
+
const isReasoningModel = isOpenAIReasoningModel(modelId);
|
|
1268
|
+
const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
|
|
1269
|
+
const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
|
|
1270
|
+
// OpenAI Chat Completions has no top_k surface (it's exposed only on the
|
|
1271
|
+
// Responses API for some reasoning models). Quietly accepting it would
|
|
1272
|
+
// mislead callers into thinking it took effect.
|
|
1273
|
+
if (options.topK !== undefined) {
|
|
1274
|
+
warnings.push({
|
|
1275
|
+
type: "unsupported-setting",
|
|
1276
|
+
provider: "openai",
|
|
1277
|
+
setting: "topK",
|
|
1278
|
+
details: "OpenAI Chat Completions does not expose top_k; the value was dropped.",
|
|
1279
|
+
});
|
|
1280
|
+
}
|
|
1281
|
+
// Reasoning models (o1 / o3 / o4) reject sampling params outright. Emit
|
|
1282
|
+
// warnings at build time so callers see *why* the value didn't apply
|
|
1283
|
+
// rather than a 400 from the API.
|
|
1284
|
+
if (reasoningEnabled) {
|
|
1285
|
+
const dropped = [
|
|
1286
|
+
["temperature", "temperature"],
|
|
1287
|
+
["topP", "top_p"],
|
|
1288
|
+
["presencePenalty", "presence_penalty"],
|
|
1289
|
+
["frequencyPenalty", "frequency_penalty"],
|
|
1290
|
+
];
|
|
1291
|
+
for (const [key, openaiName] of dropped) {
|
|
1292
|
+
if (options[key] !== undefined) {
|
|
1293
|
+
warnings.push({
|
|
1294
|
+
type: "unsupported-setting",
|
|
1295
|
+
provider: "openai",
|
|
1296
|
+
setting: key,
|
|
1297
|
+
details: `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
|
|
1298
|
+
});
|
|
1299
|
+
}
|
|
1300
|
+
}
|
|
1301
|
+
}
|
|
746
1302
|
const body = {
|
|
747
1303
|
model: modelId,
|
|
748
1304
|
messages: toOpenAICompatibleMessages(options.prompt),
|
|
749
1305
|
...(stream ? { stream: true, stream_options: { include_usage: true } } : {}),
|
|
750
1306
|
...(options.maxOutputTokens !== undefined ? { max_tokens: options.maxOutputTokens } : {}),
|
|
751
|
-
|
|
752
|
-
|
|
1307
|
+
// OpenAI reasoning models reject temperature / top_p / frequency / presence.
|
|
1308
|
+
// Drop them silently rather than letting the API bounce the request.
|
|
1309
|
+
...(!reasoningEnabled && options.temperature !== undefined
|
|
1310
|
+
? { temperature: options.temperature }
|
|
1311
|
+
: {}),
|
|
1312
|
+
...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
|
|
753
1313
|
...(options.stopSequences && options.stopSequences.length > 0
|
|
754
1314
|
? { stop: options.stopSequences }
|
|
755
1315
|
: {}),
|
|
@@ -758,10 +1318,37 @@ function buildOpenAIChatRequest(modelId, providerName, options, stream) {
|
|
|
758
1318
|
: {}),
|
|
759
1319
|
...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
|
|
760
1320
|
...(options.seed !== undefined ? { seed: options.seed } : {}),
|
|
761
|
-
...(options.presencePenalty !== undefined
|
|
762
|
-
|
|
1321
|
+
...(!reasoningEnabled && options.presencePenalty !== undefined
|
|
1322
|
+
? { presence_penalty: options.presencePenalty }
|
|
1323
|
+
: {}),
|
|
1324
|
+
...(!reasoningEnabled && options.frequencyPenalty !== undefined
|
|
763
1325
|
? { frequency_penalty: options.frequencyPenalty }
|
|
764
1326
|
: {}),
|
|
1327
|
+
...(reasoningEffort !== undefined ? { reasoning_effort: reasoningEffort } : {}),
|
|
1328
|
+
...(typeof options.userId === "string" && options.userId.length > 0
|
|
1329
|
+
? { user: options.userId }
|
|
1330
|
+
: {}),
|
|
1331
|
+
...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
|
|
1332
|
+
...(options.parallelToolCalls !== undefined
|
|
1333
|
+
? { parallel_tool_calls: options.parallelToolCalls }
|
|
1334
|
+
: {}),
|
|
1335
|
+
...(options.responseFormat && options.responseFormat.type !== "text"
|
|
1336
|
+
? {
|
|
1337
|
+
response_format: options.responseFormat.type === "json" ? { type: "json_object" } : {
|
|
1338
|
+
type: "json_schema",
|
|
1339
|
+
json_schema: {
|
|
1340
|
+
name: options.responseFormat.name,
|
|
1341
|
+
...(typeof options.responseFormat.description === "string"
|
|
1342
|
+
? { description: options.responseFormat.description }
|
|
1343
|
+
: {}),
|
|
1344
|
+
schema: unwrapToolInputSchema(options.responseFormat.schema),
|
|
1345
|
+
...(options.responseFormat.strict !== undefined
|
|
1346
|
+
? { strict: options.responseFormat.strict }
|
|
1347
|
+
: {}),
|
|
1348
|
+
},
|
|
1349
|
+
},
|
|
1350
|
+
}
|
|
1351
|
+
: {}),
|
|
765
1352
|
};
|
|
766
1353
|
Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
|
|
767
1354
|
return body;
|
|
@@ -791,10 +1378,14 @@ function extractGoogleUsage(payload) {
|
|
|
791
1378
|
const inputTokens = usage.promptTokenCount;
|
|
792
1379
|
const outputTokens = usage.candidatesTokenCount;
|
|
793
1380
|
const totalTokens = usage.totalTokenCount;
|
|
1381
|
+
const cachedContentTokenCount = usage.cachedContentTokenCount;
|
|
794
1382
|
return {
|
|
795
1383
|
inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
|
|
796
1384
|
outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
|
|
797
1385
|
totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
|
|
1386
|
+
...(typeof cachedContentTokenCount === "number"
|
|
1387
|
+
? { cacheReadInputTokens: cachedContentTokenCount }
|
|
1388
|
+
: {}),
|
|
798
1389
|
};
|
|
799
1390
|
}
|
|
800
1391
|
function toGoogleContents(prompt) {
|
|
@@ -813,18 +1404,29 @@ function toGoogleContents(prompt) {
|
|
|
813
1404
|
parts: [{ text: readTextParts(message.content) }],
|
|
814
1405
|
});
|
|
815
1406
|
break;
|
|
816
|
-
case "assistant":
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
1407
|
+
case "assistant": {
|
|
1408
|
+
// Anthropic-only `reasoning` parts have no Gemini equivalent
|
|
1409
|
+
// and are dropped on replay.
|
|
1410
|
+
const parts = [];
|
|
1411
|
+
for (const part of message.content) {
|
|
1412
|
+
if (part.type === "text") {
|
|
1413
|
+
parts.push({ text: part.text });
|
|
1414
|
+
continue;
|
|
1415
|
+
}
|
|
1416
|
+
if (part.type === "reasoning") {
|
|
1417
|
+
continue;
|
|
1418
|
+
}
|
|
1419
|
+
parts.push({
|
|
820
1420
|
functionCall: {
|
|
821
1421
|
id: part.toolCallId,
|
|
822
1422
|
name: part.toolName,
|
|
823
1423
|
args: part.input,
|
|
824
1424
|
},
|
|
825
|
-
})
|
|
826
|
-
}
|
|
1425
|
+
});
|
|
1426
|
+
}
|
|
1427
|
+
contents.push({ role: "model", parts });
|
|
827
1428
|
break;
|
|
1429
|
+
}
|
|
828
1430
|
case "tool":
|
|
829
1431
|
contents.push({
|
|
830
1432
|
role: "user",
|
|
@@ -852,14 +1454,37 @@ function toGoogleTools(tools) {
|
|
|
852
1454
|
if (!tools) {
|
|
853
1455
|
return undefined;
|
|
854
1456
|
}
|
|
855
|
-
const functionDeclarations =
|
|
856
|
-
|
|
1457
|
+
const functionDeclarations = [];
|
|
1458
|
+
const providerEntries = [];
|
|
1459
|
+
for (const tool of tools) {
|
|
1460
|
+
if (tool.type === "function") {
|
|
1461
|
+
functionDeclarations.push({
|
|
857
1462
|
name: tool.name,
|
|
858
1463
|
...(typeof tool.description === "string" ? { description: tool.description } : {}),
|
|
859
1464
|
parameters: unwrapToolInputSchema(tool.inputSchema),
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
|
|
1465
|
+
});
|
|
1466
|
+
continue;
|
|
1467
|
+
}
|
|
1468
|
+
// Gemini provider tools — code_execution, google_search,
|
|
1469
|
+
// google_search_retrieval — each lives in its own tools[] entry
|
|
1470
|
+
// with a single key keyed by the camelCase tool name and an
|
|
1471
|
+
// optional config payload (caller-provided tool.args).
|
|
1472
|
+
if (!tool.id.startsWith("google.")) {
|
|
1473
|
+
continue;
|
|
1474
|
+
}
|
|
1475
|
+
const providerType = tool.id.slice("google.".length);
|
|
1476
|
+
if (providerType.length === 0) {
|
|
1477
|
+
continue;
|
|
1478
|
+
}
|
|
1479
|
+
const camelKey = providerType.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
|
|
1480
|
+
providerEntries.push({ [camelKey]: tool.args ?? {} });
|
|
1481
|
+
}
|
|
1482
|
+
const result = [];
|
|
1483
|
+
if (functionDeclarations.length > 0) {
|
|
1484
|
+
result.push({ functionDeclarations });
|
|
1485
|
+
}
|
|
1486
|
+
result.push(...providerEntries);
|
|
1487
|
+
return result.length > 0 ? result : undefined;
|
|
863
1488
|
}
|
|
864
1489
|
function unwrapToolInputSchema(inputSchema) {
|
|
865
1490
|
if (typeof inputSchema !== "object" || inputSchema === null || Array.isArray(inputSchema)) {
|
|
@@ -884,7 +1509,11 @@ function normalizeGoogleToolChoice(toolChoice) {
|
|
|
884
1509
|
}
|
|
885
1510
|
}
|
|
886
1511
|
const record = readRecord(toolChoice);
|
|
887
|
-
if (record
|
|
1512
|
+
if (!record)
|
|
1513
|
+
return undefined;
|
|
1514
|
+
// Single-tool restriction: { type: "tool", name } — pin to one
|
|
1515
|
+
// function via mode: ANY + allowedFunctionNames: [name].
|
|
1516
|
+
if (record.type === "tool" && typeof record.name === "string") {
|
|
888
1517
|
return {
|
|
889
1518
|
functionCallingConfig: {
|
|
890
1519
|
mode: "ANY",
|
|
@@ -892,9 +1521,66 @@ function normalizeGoogleToolChoice(toolChoice) {
|
|
|
892
1521
|
},
|
|
893
1522
|
};
|
|
894
1523
|
}
|
|
1524
|
+
// Multi-tool restriction: { type: "tools", names: string[] } — pin
|
|
1525
|
+
// to a subset via mode: ANY + the full allowedFunctionNames array.
|
|
1526
|
+
if (record.type === "tools" && Array.isArray(record.names)) {
|
|
1527
|
+
const names = record.names.filter((n) => typeof n === "string");
|
|
1528
|
+
if (names.length > 0) {
|
|
1529
|
+
return {
|
|
1530
|
+
functionCallingConfig: {
|
|
1531
|
+
mode: "ANY",
|
|
1532
|
+
allowedFunctionNames: names,
|
|
1533
|
+
},
|
|
1534
|
+
};
|
|
1535
|
+
}
|
|
1536
|
+
}
|
|
1537
|
+
// Explicit mode forms: { type: "auto" | "none" | "any" }.
|
|
1538
|
+
if (record.type === "auto") {
|
|
1539
|
+
return { functionCallingConfig: { mode: "AUTO" } };
|
|
1540
|
+
}
|
|
1541
|
+
if (record.type === "none") {
|
|
1542
|
+
return { functionCallingConfig: { mode: "NONE" } };
|
|
1543
|
+
}
|
|
1544
|
+
if (record.type === "any" || record.type === "required") {
|
|
1545
|
+
return { functionCallingConfig: { mode: "ANY" } };
|
|
1546
|
+
}
|
|
895
1547
|
return undefined;
|
|
896
1548
|
}
|
|
1549
|
+
/**
|
|
1550
|
+
* Map the unified reasoning option to Gemini's thinkingConfig. Gemini 2.5+
|
|
1551
|
+
* accepts `includeThoughts: true` to stream back `thought` parts, and
|
|
1552
|
+
* `thinkingBudget: N` to cap the thinking token count. The effort levels
|
|
1553
|
+
* here follow Google's own guidance (low ~= 512, medium ~= 2048,
|
|
1554
|
+
* high ~= 8192, max = -1 means "dynamic/no cap").
|
|
1555
|
+
*/
|
|
1556
|
+
function resolveGoogleThinkingConfig(option) {
|
|
1557
|
+
if (!option || option.enabled !== true) {
|
|
1558
|
+
return undefined;
|
|
1559
|
+
}
|
|
1560
|
+
const config = { includeThoughts: true };
|
|
1561
|
+
if (typeof option.budgetTokens === "number") {
|
|
1562
|
+
config.thinkingBudget = option.budgetTokens;
|
|
1563
|
+
return config;
|
|
1564
|
+
}
|
|
1565
|
+
switch (option.effort) {
|
|
1566
|
+
case "low":
|
|
1567
|
+
config.thinkingBudget = 512;
|
|
1568
|
+
break;
|
|
1569
|
+
case "high":
|
|
1570
|
+
config.thinkingBudget = 8192;
|
|
1571
|
+
break;
|
|
1572
|
+
case "max":
|
|
1573
|
+
config.thinkingBudget = -1;
|
|
1574
|
+
break;
|
|
1575
|
+
case "medium":
|
|
1576
|
+
default:
|
|
1577
|
+
config.thinkingBudget = 2048;
|
|
1578
|
+
break;
|
|
1579
|
+
}
|
|
1580
|
+
return config;
|
|
1581
|
+
}
|
|
897
1582
|
function buildGoogleGenerationConfig(options) {
|
|
1583
|
+
const thinkingConfig = resolveGoogleThinkingConfig(options.reasoning);
|
|
898
1584
|
const config = {
|
|
899
1585
|
...(options.maxOutputTokens !== undefined ? { maxOutputTokens: options.maxOutputTokens } : {}),
|
|
900
1586
|
...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
|
|
@@ -904,11 +1590,48 @@ function buildGoogleGenerationConfig(options) {
|
|
|
904
1590
|
? { stopSequences: options.stopSequences }
|
|
905
1591
|
: {}),
|
|
906
1592
|
...(options.seed !== undefined ? { seed: options.seed } : {}),
|
|
1593
|
+
...(thinkingConfig ? { thinkingConfig } : {}),
|
|
907
1594
|
};
|
|
908
1595
|
return Object.keys(config).length > 0 ? config : undefined;
|
|
909
1596
|
}
|
|
910
|
-
function buildGoogleGenerateContentRequest(providerName, options) {
|
|
1597
|
+
function buildGoogleGenerateContentRequest(providerName, options, warnings) {
|
|
1598
|
+
// Google generate-content surface doesn't accept presence/frequency
|
|
1599
|
+
// penalties on most current models. Emit warnings and let the request
|
|
1600
|
+
// through without them.
|
|
1601
|
+
if (options.presencePenalty !== undefined) {
|
|
1602
|
+
warnings.push({
|
|
1603
|
+
type: "unsupported-setting",
|
|
1604
|
+
provider: "google",
|
|
1605
|
+
setting: "presencePenalty",
|
|
1606
|
+
details: "Gemini generateContent does not accept presencePenalty; the value was dropped.",
|
|
1607
|
+
});
|
|
1608
|
+
}
|
|
1609
|
+
if (options.frequencyPenalty !== undefined) {
|
|
1610
|
+
warnings.push({
|
|
1611
|
+
type: "unsupported-setting",
|
|
1612
|
+
provider: "google",
|
|
1613
|
+
setting: "frequencyPenalty",
|
|
1614
|
+
details: "Gemini generateContent does not accept frequencyPenalty; the value was dropped.",
|
|
1615
|
+
});
|
|
1616
|
+
}
|
|
1617
|
+
if (options.responseFormat && options.responseFormat.type !== "text") {
|
|
1618
|
+
warnings.push({
|
|
1619
|
+
type: "unsupported-setting",
|
|
1620
|
+
provider: "google",
|
|
1621
|
+
setting: "responseFormat",
|
|
1622
|
+
details: "Gemini uses generationConfig.responseMimeType + responseSchema for structured outputs, which is a separate surface and not yet wired through this option.",
|
|
1623
|
+
});
|
|
1624
|
+
}
|
|
911
1625
|
const { systemInstruction, contents } = toGoogleContents(options.prompt);
|
|
1626
|
+
const generationConfig = buildGoogleGenerationConfig(options);
|
|
1627
|
+
// requestLabels wins over userId-derived labels: when callers explicitly
|
|
1628
|
+
// provide a label map, that's the source of truth. Otherwise fall back
|
|
1629
|
+
// to {user_id} derived from the unified userId option.
|
|
1630
|
+
const labels = options.requestLabels && Object.keys(options.requestLabels).length > 0
|
|
1631
|
+
? options.requestLabels
|
|
1632
|
+
: typeof options.userId === "string" && options.userId.length > 0
|
|
1633
|
+
? { user_id: options.userId }
|
|
1634
|
+
: undefined;
|
|
912
1635
|
const body = {
|
|
913
1636
|
contents,
|
|
914
1637
|
...(systemInstruction ? { systemInstruction } : {}),
|
|
@@ -916,8 +1639,13 @@ function buildGoogleGenerateContentRequest(providerName, options) {
|
|
|
916
1639
|
...(normalizeGoogleToolChoice(options.toolChoice)
|
|
917
1640
|
? { toolConfig: normalizeGoogleToolChoice(options.toolChoice) }
|
|
918
1641
|
: {}),
|
|
919
|
-
...(
|
|
920
|
-
|
|
1642
|
+
...(generationConfig ? { generationConfig } : {}),
|
|
1643
|
+
...(labels ? { labels } : {}),
|
|
1644
|
+
...(typeof options.googleCachedContent === "string" && options.googleCachedContent.length > 0
|
|
1645
|
+
? { cachedContent: options.googleCachedContent }
|
|
1646
|
+
: {}),
|
|
1647
|
+
...(options.googleSafetySettings && options.googleSafetySettings.length > 0
|
|
1648
|
+
? { safetySettings: options.googleSafetySettings }
|
|
921
1649
|
: {}),
|
|
922
1650
|
};
|
|
923
1651
|
Object.assign(body, readProviderOptions(options.providerOptions, "google", providerName));
|
|
@@ -961,10 +1689,18 @@ function buildGoogleGenerateResult(payload) {
|
|
|
961
1689
|
});
|
|
962
1690
|
}
|
|
963
1691
|
}
|
|
1692
|
+
// Gemini grounding (google_search / google_search_retrieval) returns
|
|
1693
|
+
// a per-candidate groundingMetadata object with web search queries,
|
|
1694
|
+
// grounding chunks, and citation indices into the response text.
|
|
1695
|
+
// Pass it through opaquely so callers can render footnotes / source
|
|
1696
|
+
// chips / "Search results" UI without parsing the wire shape.
|
|
1697
|
+
const candidate = extractFirstGoogleCandidate(payload);
|
|
1698
|
+
const groundingMetadata = readRecord(candidate?.groundingMetadata);
|
|
964
1699
|
return {
|
|
965
1700
|
content,
|
|
966
|
-
finishReason: normalizeGoogleFinishReason(
|
|
1701
|
+
finishReason: normalizeGoogleFinishReason(candidate?.finishReason),
|
|
967
1702
|
usage: extractGoogleUsage(payload),
|
|
1703
|
+
...(groundingMetadata ? { groundingMetadata } : {}),
|
|
968
1704
|
};
|
|
969
1705
|
}
|
|
970
1706
|
async function* streamGoogleCompatibleParts(stream) {
|
|
@@ -1238,11 +1974,13 @@ export function createOpenAIModelRuntime(config, modelId) {
|
|
|
1238
1974
|
doGenerate(optionsForRuntime) {
|
|
1239
1975
|
const options = optionsForRuntime;
|
|
1240
1976
|
const url = getOpenAIChatCompletionsUrl(config.baseURL);
|
|
1241
|
-
const
|
|
1977
|
+
const warnings = createWarningCollector();
|
|
1978
|
+
const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, false, warnings);
|
|
1242
1979
|
return requestJson({
|
|
1243
1980
|
url,
|
|
1244
1981
|
fetchImpl,
|
|
1245
1982
|
providerLabel: config.name ?? "openai",
|
|
1983
|
+
providerKind: "openai",
|
|
1246
1984
|
init: {
|
|
1247
1985
|
method: "POST",
|
|
1248
1986
|
headers: createRequestHeaders({
|
|
@@ -1253,16 +1991,24 @@ export function createOpenAIModelRuntime(config, modelId) {
|
|
|
1253
1991
|
body: JSON.stringify(body),
|
|
1254
1992
|
signal: options.abortSignal,
|
|
1255
1993
|
},
|
|
1256
|
-
}).then(
|
|
1994
|
+
}).then((payload) => {
|
|
1995
|
+
const drained = warnings.drain();
|
|
1996
|
+
return {
|
|
1997
|
+
...buildOpenAIGenerateResult(payload),
|
|
1998
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
1999
|
+
};
|
|
2000
|
+
});
|
|
1257
2001
|
},
|
|
1258
2002
|
doStream(optionsForRuntime) {
|
|
1259
2003
|
const options = optionsForRuntime;
|
|
1260
2004
|
const url = getOpenAIChatCompletionsUrl(config.baseURL);
|
|
1261
|
-
const
|
|
2005
|
+
const warnings = createWarningCollector();
|
|
2006
|
+
const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, true, warnings);
|
|
1262
2007
|
return requestStream({
|
|
1263
2008
|
url,
|
|
1264
2009
|
fetchImpl,
|
|
1265
2010
|
providerLabel: config.name ?? "openai",
|
|
2011
|
+
providerKind: "openai",
|
|
1266
2012
|
init: {
|
|
1267
2013
|
method: "POST",
|
|
1268
2014
|
headers: createRequestHeaders({
|
|
@@ -1273,9 +2019,549 @@ export function createOpenAIModelRuntime(config, modelId) {
|
|
|
1273
2019
|
body: JSON.stringify(body),
|
|
1274
2020
|
signal: options.abortSignal,
|
|
1275
2021
|
},
|
|
1276
|
-
}).then((responseStream) =>
|
|
1277
|
-
|
|
1278
|
-
|
|
2022
|
+
}).then((responseStream) => {
|
|
2023
|
+
const drained = warnings.drain();
|
|
2024
|
+
return {
|
|
2025
|
+
stream: ReadableStream.from(streamOpenAICompatibleParts(responseStream)),
|
|
2026
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2027
|
+
};
|
|
2028
|
+
});
|
|
2029
|
+
},
|
|
2030
|
+
};
|
|
2031
|
+
}
|
|
2032
|
+
/**
|
|
2033
|
+
* Convert the unified RuntimePromptMessage[] to the Responses API `input`
|
|
2034
|
+
* array shape. Differences from Chat Completions:
|
|
2035
|
+
* - System prompts go on the top-level `instructions` field, not inline.
|
|
2036
|
+
* - Content parts use `input_text` / `output_text` discriminants instead
|
|
2037
|
+
* of the Chat Completions plain-text shorthand.
|
|
2038
|
+
* - Assistant tool calls become standalone `function_call` items in the
|
|
2039
|
+
* input array, not nested `tool_calls` on a message.
|
|
2040
|
+
* - Tool results become standalone `function_call_output` items.
|
|
2041
|
+
* - Reasoning content parts roundtrip as `reasoning` items so callers can
|
|
2042
|
+
* replay multi-turn conversations with chain-of-thought intact.
|
|
2043
|
+
*/
|
|
2044
|
+
function toOpenAIResponsesInput(prompt) {
|
|
2045
|
+
const instructionsParts = [];
|
|
2046
|
+
const input = [];
|
|
2047
|
+
for (const message of prompt) {
|
|
2048
|
+
switch (message.role) {
|
|
2049
|
+
case "system":
|
|
2050
|
+
if (message.content.length > 0) {
|
|
2051
|
+
instructionsParts.push(message.content);
|
|
2052
|
+
}
|
|
2053
|
+
break;
|
|
2054
|
+
case "user":
|
|
2055
|
+
input.push({
|
|
2056
|
+
role: "user",
|
|
2057
|
+
content: [{ type: "input_text", text: readTextParts(message.content) }],
|
|
2058
|
+
});
|
|
2059
|
+
break;
|
|
2060
|
+
case "assistant": {
|
|
2061
|
+
const messageContent = [];
|
|
2062
|
+
for (const part of message.content) {
|
|
2063
|
+
if (part.type === "text") {
|
|
2064
|
+
messageContent.push({ type: "output_text", text: part.text });
|
|
2065
|
+
continue;
|
|
2066
|
+
}
|
|
2067
|
+
if (part.type === "reasoning") {
|
|
2068
|
+
// Reasoning items are top-level entries in the input array,
|
|
2069
|
+
// not nested inside the assistant message — flush whatever
|
|
2070
|
+
// text we've accumulated first, then push the reasoning item.
|
|
2071
|
+
if (messageContent.length > 0) {
|
|
2072
|
+
input.push({ role: "assistant", content: [...messageContent] });
|
|
2073
|
+
messageContent.length = 0;
|
|
2074
|
+
}
|
|
2075
|
+
const summary = [];
|
|
2076
|
+
if (typeof part.text === "string" && part.text.length > 0) {
|
|
2077
|
+
summary.push({ type: "summary_text", text: part.text });
|
|
2078
|
+
}
|
|
2079
|
+
input.push({
|
|
2080
|
+
type: "reasoning",
|
|
2081
|
+
...(typeof part.signature === "string" ? { encrypted_content: part.signature } : {}),
|
|
2082
|
+
summary,
|
|
2083
|
+
});
|
|
2084
|
+
continue;
|
|
2085
|
+
}
|
|
2086
|
+
// tool-call: flush message content, then push as standalone
|
|
2087
|
+
// function_call item per Responses API shape.
|
|
2088
|
+
if (messageContent.length > 0) {
|
|
2089
|
+
input.push({ role: "assistant", content: [...messageContent] });
|
|
2090
|
+
messageContent.length = 0;
|
|
2091
|
+
}
|
|
2092
|
+
input.push({
|
|
2093
|
+
type: "function_call",
|
|
2094
|
+
call_id: part.toolCallId,
|
|
2095
|
+
name: part.toolName,
|
|
2096
|
+
arguments: stringifyJsonValue(part.input),
|
|
2097
|
+
});
|
|
2098
|
+
}
|
|
2099
|
+
if (messageContent.length > 0) {
|
|
2100
|
+
input.push({ role: "assistant", content: messageContent });
|
|
2101
|
+
}
|
|
2102
|
+
break;
|
|
2103
|
+
}
|
|
2104
|
+
case "tool":
|
|
2105
|
+
for (const part of message.content) {
|
|
2106
|
+
input.push({
|
|
2107
|
+
type: "function_call_output",
|
|
2108
|
+
call_id: part.toolCallId,
|
|
2109
|
+
output: stringifyJsonValue(part.output.value),
|
|
2110
|
+
});
|
|
2111
|
+
}
|
|
2112
|
+
break;
|
|
2113
|
+
}
|
|
2114
|
+
}
|
|
2115
|
+
return {
|
|
2116
|
+
...(instructionsParts.length > 0 ? { instructions: instructionsParts.join("\n\n") } : {}),
|
|
2117
|
+
input,
|
|
2118
|
+
};
|
|
2119
|
+
}
|
|
2120
|
+
/**
|
|
2121
|
+
* Tools on the Responses API differ from Chat Completions: instead of
|
|
2122
|
+
* `{ type: "function", function: { name, parameters } }` the function
|
|
2123
|
+
* shape lifts the name/parameters/strict to the top of the entry. Native
|
|
2124
|
+
* tools (web_search, file_search, computer_use, code_interpreter) live
|
|
2125
|
+
* alongside function tools in the same array.
|
|
2126
|
+
*/
|
|
2127
|
+
function toOpenAIResponsesTools(tools) {
|
|
2128
|
+
if (!tools)
|
|
2129
|
+
return undefined;
|
|
2130
|
+
const normalized = [];
|
|
2131
|
+
for (const tool of tools) {
|
|
2132
|
+
if (tool.type === "function") {
|
|
2133
|
+
normalized.push({
|
|
2134
|
+
type: "function",
|
|
2135
|
+
name: tool.name,
|
|
2136
|
+
...(typeof tool.description === "string" ? { description: tool.description } : {}),
|
|
2137
|
+
parameters: unwrapToolInputSchema(tool.inputSchema),
|
|
2138
|
+
});
|
|
2139
|
+
continue;
|
|
2140
|
+
}
|
|
2141
|
+
if (!tool.id.startsWith("openai."))
|
|
2142
|
+
continue;
|
|
2143
|
+
const providerType = tool.id.slice("openai.".length);
|
|
2144
|
+
if (providerType.length === 0)
|
|
2145
|
+
continue;
|
|
2146
|
+
normalized.push({
|
|
2147
|
+
type: providerType,
|
|
2148
|
+
...toSnakeCaseRecord(tool.args),
|
|
2149
|
+
});
|
|
2150
|
+
}
|
|
2151
|
+
return normalized.length > 0 ? normalized : undefined;
|
|
2152
|
+
}
|
|
2153
|
+
function buildOpenAIResponsesRequest(modelId, providerName, options, stream, warnings) {
|
|
2154
|
+
const isReasoningModel = isOpenAIReasoningModel(modelId);
|
|
2155
|
+
const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
|
|
2156
|
+
const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
|
|
2157
|
+
// Same param-sanitization rules as Chat Completions: reasoning models
|
|
2158
|
+
// reject sampling params. Drop with a warning.
|
|
2159
|
+
if (options.topK !== undefined) {
|
|
2160
|
+
warnings.push({
|
|
2161
|
+
type: "unsupported-setting",
|
|
2162
|
+
provider: "openai",
|
|
2163
|
+
setting: "topK",
|
|
2164
|
+
details: "OpenAI Responses API does not expose top_k; the value was dropped.",
|
|
2165
|
+
});
|
|
2166
|
+
}
|
|
2167
|
+
if (reasoningEnabled) {
|
|
2168
|
+
const dropped = [
|
|
2169
|
+
["temperature", "temperature"],
|
|
2170
|
+
["topP", "top_p"],
|
|
2171
|
+
["presencePenalty", "presence_penalty"],
|
|
2172
|
+
["frequencyPenalty", "frequency_penalty"],
|
|
2173
|
+
];
|
|
2174
|
+
for (const [key, openaiName] of dropped) {
|
|
2175
|
+
if (options[key] !== undefined) {
|
|
2176
|
+
warnings.push({
|
|
2177
|
+
type: "unsupported-setting",
|
|
2178
|
+
provider: "openai",
|
|
2179
|
+
setting: key,
|
|
2180
|
+
details: `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
|
|
2181
|
+
});
|
|
2182
|
+
}
|
|
2183
|
+
}
|
|
2184
|
+
}
|
|
2185
|
+
const { instructions, input } = toOpenAIResponsesInput(options.prompt);
|
|
2186
|
+
const responsesTools = toOpenAIResponsesTools(options.tools);
|
|
2187
|
+
const body = {
|
|
2188
|
+
model: modelId,
|
|
2189
|
+
input,
|
|
2190
|
+
...(instructions !== undefined ? { instructions } : {}),
|
|
2191
|
+
...(stream ? { stream: true } : {}),
|
|
2192
|
+
...(options.maxOutputTokens !== undefined
|
|
2193
|
+
? { max_output_tokens: options.maxOutputTokens }
|
|
2194
|
+
: {}),
|
|
2195
|
+
...(!reasoningEnabled && options.temperature !== undefined
|
|
2196
|
+
? { temperature: options.temperature }
|
|
2197
|
+
: {}),
|
|
2198
|
+
...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
|
|
2199
|
+
...(responsesTools ? { tools: responsesTools } : {}),
|
|
2200
|
+
...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
|
|
2201
|
+
// The Responses API surfaces reasoning effort + summary verbosity
|
|
2202
|
+
// in a structured `reasoning` object instead of a flat field. We
|
|
2203
|
+
// request "auto" summary so callers see structured summary parts
|
|
2204
|
+
// without having to opt into them per request.
|
|
2205
|
+
...(reasoningEffort !== undefined
|
|
2206
|
+
? { reasoning: { effort: reasoningEffort, summary: "auto" } }
|
|
2207
|
+
: {}),
|
|
2208
|
+
...(typeof options.userId === "string" && options.userId.length > 0
|
|
2209
|
+
? { user: options.userId }
|
|
2210
|
+
: {}),
|
|
2211
|
+
...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
|
|
2212
|
+
...(options.parallelToolCalls !== undefined
|
|
2213
|
+
? { parallel_tool_calls: options.parallelToolCalls }
|
|
2214
|
+
: {}),
|
|
2215
|
+
// Responses API uses `text.format` instead of Chat Completions'
|
|
2216
|
+
// `response_format`. The shape is similar but nested under `text`.
|
|
2217
|
+
...(options.responseFormat && options.responseFormat.type !== "text"
|
|
2218
|
+
? {
|
|
2219
|
+
text: {
|
|
2220
|
+
format: options.responseFormat.type === "json" ? { type: "json_object" } : {
|
|
2221
|
+
type: "json_schema",
|
|
2222
|
+
name: options.responseFormat.name,
|
|
2223
|
+
...(typeof options.responseFormat.description === "string"
|
|
2224
|
+
? { description: options.responseFormat.description }
|
|
2225
|
+
: {}),
|
|
2226
|
+
schema: unwrapToolInputSchema(options.responseFormat.schema),
|
|
2227
|
+
...(options.responseFormat.strict !== undefined
|
|
2228
|
+
? { strict: options.responseFormat.strict }
|
|
2229
|
+
: {}),
|
|
2230
|
+
},
|
|
2231
|
+
},
|
|
2232
|
+
}
|
|
2233
|
+
: {}),
|
|
2234
|
+
};
|
|
2235
|
+
Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
|
|
2236
|
+
return body;
|
|
2237
|
+
}
|
|
2238
|
+
/**
|
|
2239
|
+
* The Responses API uses `input_tokens` / `output_tokens` field names
|
|
2240
|
+
* instead of Chat Completions' `prompt_tokens` / `completion_tokens`.
|
|
2241
|
+
* It also nests cached input tokens under `input_tokens_details` and
|
|
2242
|
+
* exposes reasoning tokens via `output_tokens_details.reasoning_tokens`.
|
|
2243
|
+
*/
|
|
2244
|
+
function extractOpenAIResponsesUsage(payload) {
|
|
2245
|
+
const record = readRecord(payload);
|
|
2246
|
+
// Streaming usage lives on response.completed inside `response.usage`;
|
|
2247
|
+
// non-streaming has it at the top level.
|
|
2248
|
+
const responseRecord = readRecord(record?.response);
|
|
2249
|
+
const usage = readRecord(responseRecord?.usage) ?? readRecord(record?.usage);
|
|
2250
|
+
if (!usage)
|
|
2251
|
+
return undefined;
|
|
2252
|
+
const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : undefined;
|
|
2253
|
+
const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : undefined;
|
|
2254
|
+
const totalTokens = typeof usage.total_tokens === "number"
|
|
2255
|
+
? usage.total_tokens
|
|
2256
|
+
: (inputTokens !== undefined || outputTokens !== undefined
|
|
2257
|
+
? (inputTokens ?? 0) + (outputTokens ?? 0)
|
|
2258
|
+
: undefined);
|
|
2259
|
+
const inputDetails = readRecord(usage.input_tokens_details);
|
|
2260
|
+
const cachedTokens = inputDetails?.cached_tokens;
|
|
2261
|
+
return {
|
|
2262
|
+
inputTokens,
|
|
2263
|
+
outputTokens,
|
|
2264
|
+
totalTokens,
|
|
2265
|
+
...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
|
|
2266
|
+
};
|
|
2267
|
+
}
|
|
2268
|
+
function normalizeOpenAIResponsesFinishReason(raw) {
|
|
2269
|
+
if (typeof raw !== "string")
|
|
2270
|
+
return null;
|
|
2271
|
+
switch (raw) {
|
|
2272
|
+
case "completed":
|
|
2273
|
+
return { unified: "stop", raw };
|
|
2274
|
+
case "incomplete":
|
|
2275
|
+
return { unified: "length", raw };
|
|
2276
|
+
case "failed":
|
|
2277
|
+
return { unified: "error", raw };
|
|
2278
|
+
case "in_progress":
|
|
2279
|
+
return null;
|
|
2280
|
+
default:
|
|
2281
|
+
return raw;
|
|
2282
|
+
}
|
|
2283
|
+
}
|
|
2284
|
+
function buildOpenAIResponsesGenerateResult(payload) {
|
|
2285
|
+
const record = readRecord(payload);
|
|
2286
|
+
const output = Array.isArray(record?.output) ? record.output : [];
|
|
2287
|
+
const content = [];
|
|
2288
|
+
for (const item of output) {
|
|
2289
|
+
const itemRecord = readRecord(item);
|
|
2290
|
+
const itemType = typeof itemRecord?.type === "string" ? itemRecord.type : undefined;
|
|
2291
|
+
if (itemType === "message" && Array.isArray(itemRecord?.content)) {
|
|
2292
|
+
// A message item bundles one or more output_text parts. Concat
|
|
2293
|
+
// their texts into a single text content entry.
|
|
2294
|
+
let text = "";
|
|
2295
|
+
for (const part of itemRecord.content) {
|
|
2296
|
+
const p = readRecord(part);
|
|
2297
|
+
if (typeof p?.type === "string" && p.type === "output_text" && typeof p.text === "string") {
|
|
2298
|
+
text += p.text;
|
|
2299
|
+
}
|
|
2300
|
+
}
|
|
2301
|
+
if (text.length > 0) {
|
|
2302
|
+
content.push({ type: "text", text });
|
|
2303
|
+
}
|
|
2304
|
+
continue;
|
|
2305
|
+
}
|
|
2306
|
+
if (itemType === "function_call") {
|
|
2307
|
+
content.push({
|
|
2308
|
+
type: "tool-call",
|
|
2309
|
+
toolCallId: typeof itemRecord?.call_id === "string"
|
|
2310
|
+
? itemRecord.call_id
|
|
2311
|
+
: (typeof itemRecord?.id === "string" ? itemRecord.id : ""),
|
|
2312
|
+
toolName: typeof itemRecord?.name === "string" ? itemRecord.name : "",
|
|
2313
|
+
input: typeof itemRecord?.arguments === "string"
|
|
2314
|
+
? itemRecord.arguments
|
|
2315
|
+
: stringifyJsonValue(itemRecord?.arguments ?? {}),
|
|
2316
|
+
});
|
|
2317
|
+
continue;
|
|
2318
|
+
}
|
|
2319
|
+
if (itemType === "reasoning") {
|
|
2320
|
+
const summary = Array.isArray(itemRecord?.summary) ? itemRecord.summary : [];
|
|
2321
|
+
const summaries = [];
|
|
2322
|
+
for (const s of summary) {
|
|
2323
|
+
const sr = readRecord(s);
|
|
2324
|
+
if (typeof sr?.text === "string" && sr.text.length > 0) {
|
|
2325
|
+
summaries.push({
|
|
2326
|
+
...(typeof sr?.id === "string" ? { id: sr.id } : {}),
|
|
2327
|
+
text: sr.text,
|
|
2328
|
+
});
|
|
2329
|
+
}
|
|
2330
|
+
}
|
|
2331
|
+
content.push({
|
|
2332
|
+
type: "reasoning",
|
|
2333
|
+
...(summaries.length > 0 ? { summaries } : {}),
|
|
2334
|
+
...(typeof itemRecord?.encrypted_content === "string"
|
|
2335
|
+
? { signature: itemRecord.encrypted_content }
|
|
2336
|
+
: {}),
|
|
2337
|
+
});
|
|
2338
|
+
continue;
|
|
2339
|
+
}
|
|
2340
|
+
}
|
|
2341
|
+
return {
|
|
2342
|
+
content,
|
|
2343
|
+
finishReason: normalizeOpenAIResponsesFinishReason(record?.status),
|
|
2344
|
+
usage: extractOpenAIResponsesUsage(payload),
|
|
2345
|
+
};
|
|
2346
|
+
}
|
|
2347
|
+
/**
|
|
2348
|
+
* Parse the Responses API streaming event grammar into the same UI part
|
|
2349
|
+
* shapes the existing OpenAI / Anthropic / Google streams emit. The
|
|
2350
|
+
* Responses API uses a strict event-typed protocol — every event has a
|
|
2351
|
+
* `type` field naming the lifecycle phase — instead of the loose
|
|
2352
|
+
* `delta`-based shape Chat Completions uses.
|
|
2353
|
+
*/
|
|
2354
|
+
async function* streamOpenAIResponsesParts(stream) {
|
|
2355
|
+
const decoder = new TextDecoder();
|
|
2356
|
+
let buffer = "";
|
|
2357
|
+
const reasoningBlocks = new Map();
|
|
2358
|
+
const functionCalls = new Map();
|
|
2359
|
+
const startedToolCalls = new Set();
|
|
2360
|
+
let finishReason = null;
|
|
2361
|
+
let usage;
|
|
2362
|
+
let reasoningCounter = 0;
|
|
2363
|
+
for await (const chunk of stream) {
|
|
2364
|
+
buffer += decoder.decode(chunk, { stream: true });
|
|
2365
|
+
const parsed = parseSseChunk(buffer);
|
|
2366
|
+
buffer = parsed.remainder;
|
|
2367
|
+
for (const event of parsed.events) {
|
|
2368
|
+
if (event === "[DONE]")
|
|
2369
|
+
continue;
|
|
2370
|
+
const record = readRecord(event);
|
|
2371
|
+
const type = typeof record?.type === "string" ? record.type : undefined;
|
|
2372
|
+
if (!type)
|
|
2373
|
+
continue;
|
|
2374
|
+
// response.output_item.added: a new output item begins. Track
|
|
2375
|
+
// function_call items so their argument deltas can be attributed,
|
|
2376
|
+
// and reasoning items so summary deltas can group correctly.
|
|
2377
|
+
if (type === "response.output_item.added") {
|
|
2378
|
+
const item = readRecord(record?.item);
|
|
2379
|
+
const itemType = typeof item?.type === "string" ? item.type : undefined;
|
|
2380
|
+
const itemId = typeof item?.id === "string" ? item.id : undefined;
|
|
2381
|
+
if (itemType === "function_call" && itemId) {
|
|
2382
|
+
const callId = typeof item?.call_id === "string" ? item.call_id : itemId;
|
|
2383
|
+
const name = typeof item?.name === "string" ? item.name : "";
|
|
2384
|
+
functionCalls.set(itemId, {
|
|
2385
|
+
id: itemId,
|
|
2386
|
+
toolCallId: callId,
|
|
2387
|
+
name,
|
|
2388
|
+
arguments: "",
|
|
2389
|
+
});
|
|
2390
|
+
}
|
|
2391
|
+
if (itemType === "reasoning" && itemId) {
|
|
2392
|
+
reasoningBlocks.set(itemId, {
|
|
2393
|
+
id: `reasoning-${reasoningCounter++}`,
|
|
2394
|
+
emittedStart: false,
|
|
2395
|
+
});
|
|
2396
|
+
}
|
|
2397
|
+
continue;
|
|
2398
|
+
}
|
|
2399
|
+
// response.output_text.delta: text chunk for a message item.
|
|
2400
|
+
if (type === "response.output_text.delta" && typeof record?.delta === "string") {
|
|
2401
|
+
if (record.delta.length > 0) {
|
|
2402
|
+
yield { type: "text-delta", delta: record.delta };
|
|
2403
|
+
}
|
|
2404
|
+
continue;
|
|
2405
|
+
}
|
|
2406
|
+
// response.reasoning_summary_text.delta: reasoning summary text
|
|
2407
|
+
// chunk. The first delta on an item lazily emits the
|
|
2408
|
+
// reasoning-start event so callers can group deltas into a part.
|
|
2409
|
+
if (type === "response.reasoning_summary_text.delta" && typeof record?.delta === "string") {
|
|
2410
|
+
const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
|
|
2411
|
+
const state = itemId ? reasoningBlocks.get(itemId) : undefined;
|
|
2412
|
+
if (state && record.delta.length > 0) {
|
|
2413
|
+
if (!state.emittedStart) {
|
|
2414
|
+
yield { type: "reasoning-start", id: state.id };
|
|
2415
|
+
state.emittedStart = true;
|
|
2416
|
+
}
|
|
2417
|
+
yield { type: "reasoning-delta", id: state.id, delta: record.delta };
|
|
2418
|
+
}
|
|
2419
|
+
continue;
|
|
2420
|
+
}
|
|
2421
|
+
// response.function_call_arguments.delta: tool call argument
|
|
2422
|
+
// chunk. The first delta lazily emits tool-input-start.
|
|
2423
|
+
if (type === "response.function_call_arguments.delta" && typeof record?.delta === "string") {
|
|
2424
|
+
const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
|
|
2425
|
+
const state = itemId ? functionCalls.get(itemId) : undefined;
|
|
2426
|
+
if (state && record.delta.length > 0) {
|
|
2427
|
+
if (!startedToolCalls.has(state.id)) {
|
|
2428
|
+
yield {
|
|
2429
|
+
type: "tool-input-start",
|
|
2430
|
+
id: state.toolCallId,
|
|
2431
|
+
toolName: state.name,
|
|
2432
|
+
};
|
|
2433
|
+
startedToolCalls.add(state.id);
|
|
2434
|
+
}
|
|
2435
|
+
state.arguments += record.delta;
|
|
2436
|
+
yield {
|
|
2437
|
+
type: "tool-input-delta",
|
|
2438
|
+
id: state.toolCallId,
|
|
2439
|
+
delta: record.delta,
|
|
2440
|
+
};
|
|
2441
|
+
}
|
|
2442
|
+
continue;
|
|
2443
|
+
}
|
|
2444
|
+
// response.output_item.done: an item has finished emitting deltas.
|
|
2445
|
+
// Close any reasoning or function-call streams that were open.
|
|
2446
|
+
if (type === "response.output_item.done") {
|
|
2447
|
+
const item = readRecord(record?.item);
|
|
2448
|
+
const itemType = typeof item?.type === "string" ? item.type : undefined;
|
|
2449
|
+
const itemId = typeof item?.id === "string" ? item.id : undefined;
|
|
2450
|
+
if (itemType === "reasoning" && itemId) {
|
|
2451
|
+
const state = reasoningBlocks.get(itemId);
|
|
2452
|
+
if (state?.emittedStart) {
|
|
2453
|
+
yield { type: "reasoning-end", id: state.id };
|
|
2454
|
+
}
|
|
2455
|
+
reasoningBlocks.delete(itemId);
|
|
2456
|
+
}
|
|
2457
|
+
if (itemType === "function_call" && itemId) {
|
|
2458
|
+
const state = functionCalls.get(itemId);
|
|
2459
|
+
if (state) {
|
|
2460
|
+
yield {
|
|
2461
|
+
type: "tool-call",
|
|
2462
|
+
toolCallId: state.toolCallId,
|
|
2463
|
+
toolName: state.name,
|
|
2464
|
+
input: state.arguments,
|
|
2465
|
+
};
|
|
2466
|
+
}
|
|
2467
|
+
functionCalls.delete(itemId);
|
|
2468
|
+
}
|
|
2469
|
+
continue;
|
|
2470
|
+
}
|
|
2471
|
+
// response.completed: terminal event with the final response object
|
|
2472
|
+
// (status + usage). Capture both for the final finish part.
|
|
2473
|
+
if (type === "response.completed") {
|
|
2474
|
+
usage = extractOpenAIResponsesUsage(record) ?? usage;
|
|
2475
|
+
const responseRecord = readRecord(record?.response);
|
|
2476
|
+
finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status);
|
|
2477
|
+
continue;
|
|
2478
|
+
}
|
|
2479
|
+
if (type === "response.failed" || type === "response.incomplete") {
|
|
2480
|
+
const responseRecord = readRecord(record?.response);
|
|
2481
|
+
finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status) ??
|
|
2482
|
+
(type === "response.failed"
|
|
2483
|
+
? { unified: "error", raw: "failed" }
|
|
2484
|
+
: { unified: "length", raw: "incomplete" });
|
|
2485
|
+
usage = extractOpenAIResponsesUsage(record) ?? usage;
|
|
2486
|
+
continue;
|
|
2487
|
+
}
|
|
2488
|
+
}
|
|
2489
|
+
}
|
|
2490
|
+
// Close any reasoning streams still open at end-of-stream (defensive
|
|
2491
|
+
// — a clean Responses API stream always closes them via output_item.done).
|
|
2492
|
+
for (const state of reasoningBlocks.values()) {
|
|
2493
|
+
if (state.emittedStart) {
|
|
2494
|
+
yield { type: "reasoning-end", id: state.id };
|
|
2495
|
+
}
|
|
2496
|
+
}
|
|
2497
|
+
yield {
|
|
2498
|
+
type: "finish",
|
|
2499
|
+
finishReason,
|
|
2500
|
+
...(usage ? { usage } : {}),
|
|
2501
|
+
};
|
|
2502
|
+
}
|
|
2503
|
+
export function createOpenAIResponsesRuntime(config, modelId) {
|
|
2504
|
+
const fetchImpl = config.fetch ?? globalThis.fetch;
|
|
2505
|
+
return {
|
|
2506
|
+
provider: config.name ?? "openai",
|
|
2507
|
+
modelId,
|
|
2508
|
+
specificationVersion: "v3",
|
|
2509
|
+
supportedUrls: {},
|
|
2510
|
+
doGenerate(optionsForRuntime) {
|
|
2511
|
+
const options = optionsForRuntime;
|
|
2512
|
+
const url = getOpenAIResponsesUrl(config.baseURL);
|
|
2513
|
+
const warnings = createWarningCollector();
|
|
2514
|
+
const body = buildOpenAIResponsesRequest(modelId, config.name ?? "openai", options, false, warnings);
|
|
2515
|
+
return requestJson({
|
|
2516
|
+
url,
|
|
2517
|
+
fetchImpl,
|
|
2518
|
+
providerLabel: config.name ?? "openai",
|
|
2519
|
+
providerKind: "openai",
|
|
2520
|
+
init: {
|
|
2521
|
+
method: "POST",
|
|
2522
|
+
headers: createRequestHeaders({
|
|
2523
|
+
apiKeyHeaderName: "authorization",
|
|
2524
|
+
apiKey: `Bearer ${config.apiKey}`,
|
|
2525
|
+
extraHeaders: options.headers,
|
|
2526
|
+
}),
|
|
2527
|
+
body: JSON.stringify(body),
|
|
2528
|
+
signal: options.abortSignal,
|
|
2529
|
+
},
|
|
2530
|
+
}).then((payload) => {
|
|
2531
|
+
const drained = warnings.drain();
|
|
2532
|
+
return {
|
|
2533
|
+
...buildOpenAIResponsesGenerateResult(payload),
|
|
2534
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2535
|
+
};
|
|
2536
|
+
});
|
|
2537
|
+
},
|
|
2538
|
+
doStream(optionsForRuntime) {
|
|
2539
|
+
const options = optionsForRuntime;
|
|
2540
|
+
const url = getOpenAIResponsesUrl(config.baseURL);
|
|
2541
|
+
const warnings = createWarningCollector();
|
|
2542
|
+
const body = buildOpenAIResponsesRequest(modelId, config.name ?? "openai", options, true, warnings);
|
|
2543
|
+
return requestStream({
|
|
2544
|
+
url,
|
|
2545
|
+
fetchImpl,
|
|
2546
|
+
providerLabel: config.name ?? "openai",
|
|
2547
|
+
providerKind: "openai",
|
|
2548
|
+
init: {
|
|
2549
|
+
method: "POST",
|
|
2550
|
+
headers: createRequestHeaders({
|
|
2551
|
+
apiKeyHeaderName: "authorization",
|
|
2552
|
+
apiKey: `Bearer ${config.apiKey}`,
|
|
2553
|
+
extraHeaders: options.headers,
|
|
2554
|
+
}),
|
|
2555
|
+
body: JSON.stringify(body),
|
|
2556
|
+
signal: options.abortSignal,
|
|
2557
|
+
},
|
|
2558
|
+
}).then((responseStream) => {
|
|
2559
|
+
const drained = warnings.drain();
|
|
2560
|
+
return {
|
|
2561
|
+
stream: ReadableStream.from(streamOpenAIResponsesParts(responseStream)),
|
|
2562
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2563
|
+
};
|
|
2564
|
+
});
|
|
1279
2565
|
},
|
|
1280
2566
|
};
|
|
1281
2567
|
}
|
|
@@ -1289,11 +2575,13 @@ export function createAnthropicModelRuntime(config, modelId) {
|
|
|
1289
2575
|
doGenerate(optionsForRuntime) {
|
|
1290
2576
|
const options = optionsForRuntime;
|
|
1291
2577
|
const url = getAnthropicMessagesUrl(config.baseURL);
|
|
1292
|
-
const
|
|
2578
|
+
const warnings = createWarningCollector();
|
|
2579
|
+
const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, false, warnings);
|
|
1293
2580
|
return requestJson({
|
|
1294
2581
|
url,
|
|
1295
2582
|
fetchImpl,
|
|
1296
2583
|
providerLabel: config.name ?? "anthropic",
|
|
2584
|
+
providerKind: "anthropic",
|
|
1297
2585
|
init: {
|
|
1298
2586
|
method: "POST",
|
|
1299
2587
|
headers: createAnthropicRequestHeaders({
|
|
@@ -1304,16 +2592,24 @@ export function createAnthropicModelRuntime(config, modelId) {
|
|
|
1304
2592
|
body: JSON.stringify(body),
|
|
1305
2593
|
signal: options.abortSignal,
|
|
1306
2594
|
},
|
|
1307
|
-
}).then(
|
|
2595
|
+
}).then((payload) => {
|
|
2596
|
+
const drained = warnings.drain();
|
|
2597
|
+
return {
|
|
2598
|
+
...buildAnthropicGenerateResult(payload),
|
|
2599
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2600
|
+
};
|
|
2601
|
+
});
|
|
1308
2602
|
},
|
|
1309
2603
|
doStream(optionsForRuntime) {
|
|
1310
2604
|
const options = optionsForRuntime;
|
|
1311
2605
|
const url = getAnthropicMessagesUrl(config.baseURL);
|
|
1312
|
-
const
|
|
2606
|
+
const warnings = createWarningCollector();
|
|
2607
|
+
const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, true, warnings);
|
|
1313
2608
|
return requestStream({
|
|
1314
2609
|
url,
|
|
1315
2610
|
fetchImpl,
|
|
1316
2611
|
providerLabel: config.name ?? "anthropic",
|
|
2612
|
+
providerKind: "anthropic",
|
|
1317
2613
|
init: {
|
|
1318
2614
|
method: "POST",
|
|
1319
2615
|
headers: createAnthropicRequestHeaders({
|
|
@@ -1324,9 +2620,13 @@ export function createAnthropicModelRuntime(config, modelId) {
|
|
|
1324
2620
|
body: JSON.stringify(body),
|
|
1325
2621
|
signal: options.abortSignal,
|
|
1326
2622
|
},
|
|
1327
|
-
}).then((responseStream) =>
|
|
1328
|
-
|
|
1329
|
-
|
|
2623
|
+
}).then((responseStream) => {
|
|
2624
|
+
const drained = warnings.drain();
|
|
2625
|
+
return {
|
|
2626
|
+
stream: ReadableStream.from(streamAnthropicCompatibleParts(responseStream)),
|
|
2627
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2628
|
+
};
|
|
2629
|
+
});
|
|
1330
2630
|
},
|
|
1331
2631
|
};
|
|
1332
2632
|
}
|
|
@@ -1340,11 +2640,13 @@ export function createGoogleModelRuntime(config, modelId) {
|
|
|
1340
2640
|
doGenerate(optionsForRuntime) {
|
|
1341
2641
|
const options = optionsForRuntime;
|
|
1342
2642
|
const url = getGoogleGenerateContentUrl(config.baseURL, modelId);
|
|
1343
|
-
const
|
|
2643
|
+
const warnings = createWarningCollector();
|
|
2644
|
+
const body = buildGoogleGenerateContentRequest(config.name ?? "google", options, warnings);
|
|
1344
2645
|
return requestJson({
|
|
1345
2646
|
url,
|
|
1346
2647
|
fetchImpl,
|
|
1347
2648
|
providerLabel: config.name ?? "google",
|
|
2649
|
+
providerKind: "google",
|
|
1348
2650
|
init: {
|
|
1349
2651
|
method: "POST",
|
|
1350
2652
|
headers: createRequestHeaders({
|
|
@@ -1355,16 +2657,24 @@ export function createGoogleModelRuntime(config, modelId) {
|
|
|
1355
2657
|
body: JSON.stringify(body),
|
|
1356
2658
|
signal: options.abortSignal,
|
|
1357
2659
|
},
|
|
1358
|
-
}).then(
|
|
2660
|
+
}).then((payload) => {
|
|
2661
|
+
const drained = warnings.drain();
|
|
2662
|
+
return {
|
|
2663
|
+
...buildGoogleGenerateResult(payload),
|
|
2664
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2665
|
+
};
|
|
2666
|
+
});
|
|
1359
2667
|
},
|
|
1360
2668
|
doStream(optionsForRuntime) {
|
|
1361
2669
|
const options = optionsForRuntime;
|
|
1362
2670
|
const url = getGoogleStreamGenerateContentUrl(config.baseURL, modelId);
|
|
1363
|
-
const
|
|
2671
|
+
const warnings = createWarningCollector();
|
|
2672
|
+
const body = buildGoogleGenerateContentRequest(config.name ?? "google", options, warnings);
|
|
1364
2673
|
return requestStream({
|
|
1365
2674
|
url,
|
|
1366
2675
|
fetchImpl,
|
|
1367
2676
|
providerLabel: config.name ?? "google",
|
|
2677
|
+
providerKind: "google",
|
|
1368
2678
|
init: {
|
|
1369
2679
|
method: "POST",
|
|
1370
2680
|
headers: createRequestHeaders({
|
|
@@ -1375,9 +2685,13 @@ export function createGoogleModelRuntime(config, modelId) {
|
|
|
1375
2685
|
body: JSON.stringify(body),
|
|
1376
2686
|
signal: options.abortSignal,
|
|
1377
2687
|
},
|
|
1378
|
-
}).then((responseStream) =>
|
|
1379
|
-
|
|
1380
|
-
|
|
2688
|
+
}).then((responseStream) => {
|
|
2689
|
+
const drained = warnings.drain();
|
|
2690
|
+
return {
|
|
2691
|
+
stream: ReadableStream.from(streamGoogleCompatibleParts(responseStream)),
|
|
2692
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2693
|
+
};
|
|
2694
|
+
});
|
|
1381
2695
|
},
|
|
1382
2696
|
};
|
|
1383
2697
|
}
|
|
@@ -1400,6 +2714,7 @@ export function createOpenAIEmbeddingRuntime(config, modelId) {
|
|
|
1400
2714
|
url,
|
|
1401
2715
|
fetchImpl,
|
|
1402
2716
|
providerLabel: config.name ?? "openai",
|
|
2717
|
+
providerKind: "openai",
|
|
1403
2718
|
init: {
|
|
1404
2719
|
method: "POST",
|
|
1405
2720
|
headers: {
|
|
@@ -1442,6 +2757,7 @@ export function createGoogleEmbeddingRuntime(config, modelId) {
|
|
|
1442
2757
|
url,
|
|
1443
2758
|
fetchImpl,
|
|
1444
2759
|
providerLabel: config.name ?? "google",
|
|
2760
|
+
providerKind: "google",
|
|
1445
2761
|
init: {
|
|
1446
2762
|
method: "POST",
|
|
1447
2763
|
headers: {
|