veryfront 0.1.207 → 0.1.209
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/deno.js +1 -1
- package/esm/src/provider/runtime-loader.d.ts +47 -0
- package/esm/src/provider/runtime-loader.d.ts.map +1 -1
- package/esm/src/provider/runtime-loader.js +1386 -70
- package/esm/src/provider/types.d.ts +2 -0
- package/esm/src/provider/types.d.ts.map +1 -1
- package/esm/src/utils/version-constant.d.ts +1 -1
- package/esm/src/utils/version-constant.js +1 -1
- package/package.json +1 -1
- package/src/deno.js +1 -1
- package/src/src/provider/runtime-loader.ts +1872 -102
- package/src/src/provider/types.ts +2 -0
- package/src/src/utils/version-constant.ts +1 -1
|
@@ -36,6 +36,18 @@ type RuntimePromptMessage =
|
|
|
36
36
|
input: unknown;
|
|
37
37
|
providerExecuted?: boolean;
|
|
38
38
|
}
|
|
39
|
+
| {
|
|
40
|
+
// Anthropic thinking block replay. Carries the original signed
|
|
41
|
+
// thinking trace so that on the next turn Anthropic can verify
|
|
42
|
+
// the signature and let Claude continue reasoning from the same
|
|
43
|
+
// point. `text` + `signature` are the normal pair for an
|
|
44
|
+
// un-redacted thinking block; `redactedData` is set instead of
|
|
45
|
+
// both when Anthropic returned an encrypted opaque payload.
|
|
46
|
+
type: "reasoning";
|
|
47
|
+
text?: string;
|
|
48
|
+
signature?: string;
|
|
49
|
+
redactedData?: string;
|
|
50
|
+
}
|
|
39
51
|
>;
|
|
40
52
|
}
|
|
41
53
|
| {
|
|
@@ -60,6 +72,67 @@ type RuntimeToolDefinition =
|
|
|
60
72
|
id: `${string}.${string}`;
|
|
61
73
|
args: Record<string, unknown>;
|
|
62
74
|
};
|
|
75
|
+
/**
|
|
76
|
+
* TTL for a single prompt-cache breakpoint.
|
|
77
|
+
*
|
|
78
|
+
* `true` and `"5m"` both map to Anthropic's default ephemeral (5-minute) cache.
|
|
79
|
+
* `"1h"` maps to the extended 1-hour cache at a 2x write cost. Callers can
|
|
80
|
+
* pick per breakpoint target.
|
|
81
|
+
*/
|
|
82
|
+
type ProviderCacheTtl = boolean | "5m" | "1h";
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Per-provider prompt / context caching controls.
|
|
86
|
+
*
|
|
87
|
+
* For Anthropic, flipping these on emits `cache_control: { type: "ephemeral" }`
|
|
88
|
+
* breakpoints on the assembled system prompt and/or the last tool definition
|
|
89
|
+
* sent to the Messages API, enabling Anthropic's explicit prompt cache.
|
|
90
|
+
*
|
|
91
|
+
* OpenAI's prompt cache is automatic on gpt-4o+ and has no request-side
|
|
92
|
+
* directive to emit, so this option is a no-op for the OpenAI runtime. Google
|
|
93
|
+
* uses a separate `cachedContent` resource model that is intentionally not
|
|
94
|
+
* covered by this option (it belongs on a dedicated Gemini-specific surface).
|
|
95
|
+
*/
|
|
96
|
+
type ProviderCacheControlOption = {
|
|
97
|
+
/**
|
|
98
|
+
* Attach a cache breakpoint to the final system-prompt text block.
|
|
99
|
+
* Use when the system prompt is large and reused across requests.
|
|
100
|
+
*/
|
|
101
|
+
system?: ProviderCacheTtl;
|
|
102
|
+
/**
|
|
103
|
+
* Attach a cache breakpoint to the last tool definition in `tools`.
|
|
104
|
+
* Use when the tool schemas are large and identical across requests.
|
|
105
|
+
*/
|
|
106
|
+
tools?: ProviderCacheTtl;
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Unified effort level for extended reasoning / thinking. Maps to
|
|
111
|
+
* per-provider knobs: Anthropic `thinking.budget_tokens`, OpenAI
|
|
112
|
+
* `reasoning_effort`, Gemini `thinkingConfig.thinkingBudget`.
|
|
113
|
+
*/
|
|
114
|
+
type ProviderReasoningEffort = "low" | "medium" | "high" | "max";
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Unified reasoning / thinking request option.
|
|
118
|
+
*
|
|
119
|
+
* Setting `enabled: true` turns on extended thinking on providers that
|
|
120
|
+
* support it (Anthropic Claude 4.x, OpenAI o-series, Gemini 2.5+). The
|
|
121
|
+
* `effort` field picks a coarse budget; when `budgetTokens` is set it
|
|
122
|
+
* wins for providers that take a numeric budget (Anthropic, Gemini).
|
|
123
|
+
*
|
|
124
|
+
* Providers that do not support reasoning treat this as a no-op. On
|
|
125
|
+
* Anthropic + OpenAI, enabling reasoning also disables sampling params
|
|
126
|
+
* that the providers reject in combination (`temperature`, `topP`,
|
|
127
|
+
* `topK`, `presencePenalty`, `frequencyPenalty`) — silently dropping
|
|
128
|
+
* them rather than failing the request.
|
|
129
|
+
*/
|
|
130
|
+
type ProviderReasoningOption = {
|
|
131
|
+
enabled?: boolean;
|
|
132
|
+
effort?: ProviderReasoningEffort;
|
|
133
|
+
budgetTokens?: number;
|
|
134
|
+
};
|
|
135
|
+
|
|
63
136
|
type OpenAICompatibleLanguageOptions = {
|
|
64
137
|
prompt: RuntimePromptMessage[];
|
|
65
138
|
maxOutputTokens?: number;
|
|
@@ -76,6 +149,128 @@ type OpenAICompatibleLanguageOptions = {
|
|
|
76
149
|
providerOptions?: Record<string, unknown>;
|
|
77
150
|
includeRawChunks?: boolean;
|
|
78
151
|
abortSignal?: AbortSignal;
|
|
152
|
+
/**
|
|
153
|
+
* Per-provider prompt / context caching controls. See
|
|
154
|
+
* {@link ProviderCacheControlOption}. When unset, caching behaviour is
|
|
155
|
+
* unchanged on every provider.
|
|
156
|
+
*/
|
|
157
|
+
cacheControl?: ProviderCacheControlOption;
|
|
158
|
+
/**
|
|
159
|
+
* Enable extended reasoning / thinking on providers that support it.
|
|
160
|
+
* See {@link ProviderReasoningOption}. When unset, reasoning behaviour
|
|
161
|
+
* is unchanged on every provider.
|
|
162
|
+
*/
|
|
163
|
+
reasoning?: ProviderReasoningOption;
|
|
164
|
+
/**
|
|
165
|
+
* Stable per-user identifier for rate-limiting, abuse detection, and
|
|
166
|
+
* billing attribution. Maps to:
|
|
167
|
+
* - Anthropic: `metadata.user_id`
|
|
168
|
+
* - OpenAI: `user`
|
|
169
|
+
* - Google: `labels.user_id` (when {@link requestLabels} is unset)
|
|
170
|
+
*/
|
|
171
|
+
userId?: string;
|
|
172
|
+
/**
|
|
173
|
+
* Provider-specific label map for Google Gemini's `labels` field.
|
|
174
|
+
* Anthropic and OpenAI don't have an arbitrary-label equivalent, so
|
|
175
|
+
* this is intentionally Google-only. When unset, no labels are sent.
|
|
176
|
+
*/
|
|
177
|
+
requestLabels?: Record<string, string>;
|
|
178
|
+
/**
|
|
179
|
+
* OpenAI-specific. Maps to the `service_tier` field on Chat Completions
|
|
180
|
+
* which trades latency for cost. Documented values:
|
|
181
|
+
*
|
|
182
|
+
* - `default` — standard processing (default if unset)
|
|
183
|
+
* - `flex` — lower-priority queue, lower per-token cost, longer
|
|
184
|
+
* expected latency. Useful for batchy or non-interactive workloads.
|
|
185
|
+
* - `scale` — reserved-capacity tier with strict latency SLOs.
|
|
186
|
+
* - `auto` — let OpenAI pick.
|
|
187
|
+
*
|
|
188
|
+
* Forwarded verbatim. Anthropic and Google have no equivalent and
|
|
189
|
+
* the field is silently omitted on those providers.
|
|
190
|
+
*/
|
|
191
|
+
serviceTier?: "auto" | "default" | "flex" | "scale";
|
|
192
|
+
/**
|
|
193
|
+
* OpenAI-specific. When `false`, OpenAI runs tool calls sequentially
|
|
194
|
+
* instead of in parallel. Useful for ordered side effects where
|
|
195
|
+
* concurrent calls would race. Default behaviour (unset) is parallel.
|
|
196
|
+
*/
|
|
197
|
+
parallelToolCalls?: boolean;
|
|
198
|
+
/**
|
|
199
|
+
* Structured-output response format. Maps to OpenAI's `response_format`
|
|
200
|
+
* field on Chat Completions (and Responses). Three variants:
|
|
201
|
+
*
|
|
202
|
+
* - `{ type: "text" }` — the default (no constraint).
|
|
203
|
+
* - `{ type: "json" }` — emits OpenAI's `response_format:
|
|
204
|
+
* { type: "json_object" }` to force the model to return valid JSON.
|
|
205
|
+
* - `{ type: "json_schema", name, schema, strict? }` — emits
|
|
206
|
+
* OpenAI's `response_format: { type: "json_schema", json_schema: {
|
|
207
|
+
* name, schema, strict } }` for fully constrained structured
|
|
208
|
+
* outputs (gpt-4o-2024-08-06+).
|
|
209
|
+
*
|
|
210
|
+
* On Anthropic and Google this option emits an "unsupported-setting"
|
|
211
|
+
* warning when set to anything other than `text` (those providers
|
|
212
|
+
* have their own structured-output surfaces and need a dedicated
|
|
213
|
+
* follow-up to wire them in).
|
|
214
|
+
*/
|
|
215
|
+
responseFormat?:
|
|
216
|
+
| { type: "text" }
|
|
217
|
+
| { type: "json" }
|
|
218
|
+
| {
|
|
219
|
+
type: "json_schema";
|
|
220
|
+
name: string;
|
|
221
|
+
schema: unknown;
|
|
222
|
+
description?: string;
|
|
223
|
+
strict?: boolean;
|
|
224
|
+
};
|
|
225
|
+
/**
|
|
226
|
+
* Anthropic-specific. `container` field for programmatic tool calling
|
|
227
|
+
* and agent skills. Anthropic uses this to scope a session to a
|
|
228
|
+
* sandboxed container (e.g. for Computer Use, code execution
|
|
229
|
+
* sandboxes, or skills loaded from a container). Forwarded verbatim.
|
|
230
|
+
*
|
|
231
|
+
* The shape varies — string container id or a structured object
|
|
232
|
+
* depending on the feature. Caller passes whatever Anthropic's docs
|
|
233
|
+
* specify for the target feature.
|
|
234
|
+
*/
|
|
235
|
+
anthropicContainer?: unknown;
|
|
236
|
+
/**
|
|
237
|
+
* Google-specific. Reference to a previously-created Gemini cached
|
|
238
|
+
* content resource (created via the separate caches API) to attach
|
|
239
|
+
* to this request. Resource name format:
|
|
240
|
+
* `cachedContents/<id>`. See https://ai.google.dev/gemini-api/docs/caching.
|
|
241
|
+
*
|
|
242
|
+
* Cache creation itself is out of scope for the runtime — callers
|
|
243
|
+
* use the Gemini REST API or SDK to create the cache, then pass the
|
|
244
|
+
* resource name here on each subsequent generate call to attach the
|
|
245
|
+
* cached prefix and avoid re-paying for it.
|
|
246
|
+
*/
|
|
247
|
+
googleCachedContent?: string;
|
|
248
|
+
/**
|
|
249
|
+
* Google-specific. Per-request safety filter configuration for
|
|
250
|
+
* Gemini. Each entry pairs a HARM_CATEGORY_* with a threshold
|
|
251
|
+
* (BLOCK_NONE / BLOCK_LOW_AND_ABOVE / BLOCK_MEDIUM_AND_ABOVE /
|
|
252
|
+
* BLOCK_ONLY_HIGH). Forwarded verbatim as the `safetySettings`
|
|
253
|
+
* field. See https://ai.google.dev/gemini-api/docs/safety-settings.
|
|
254
|
+
*/
|
|
255
|
+
googleSafetySettings?: Array<{
|
|
256
|
+
category: string;
|
|
257
|
+
threshold: string;
|
|
258
|
+
}>;
|
|
259
|
+
/**
|
|
260
|
+
* Anthropic-specific. Native MCP server definitions to pass directly
|
|
261
|
+
* on the Messages API request body. Lets callers register MCP servers
|
|
262
|
+
* server-side instead of reloading them into local function tools.
|
|
263
|
+
*
|
|
264
|
+
* Caller must opt into the MCP beta by adding the matching header to
|
|
265
|
+
* `headers`, e.g. `{ "anthropic-beta": "mcp-client-2025-04-04" }`.
|
|
266
|
+
* Without that header Anthropic will reject the request.
|
|
267
|
+
*
|
|
268
|
+
* Each entry is forwarded with camelCase keys converted to snake_case
|
|
269
|
+
* so `authorizationToken` → `authorization_token`,
|
|
270
|
+
* `toolConfiguration.allowedTools` → `tool_configuration.allowed_tools`,
|
|
271
|
+
* etc.
|
|
272
|
+
*/
|
|
273
|
+
mcpServers?: Array<Record<string, unknown>>;
|
|
79
274
|
};
|
|
80
275
|
type OpenAICompatibleChatMessage =
|
|
81
276
|
| { role: "system"; content: string }
|
|
@@ -142,7 +337,12 @@ type AnthropicCompatibleRequest = {
|
|
|
142
337
|
messages: AnthropicCompatibleMessage[];
|
|
143
338
|
max_tokens: number;
|
|
144
339
|
stream?: boolean;
|
|
145
|
-
|
|
340
|
+
/**
|
|
341
|
+
* String form is the classic shorthand. Array-of-blocks form is required
|
|
342
|
+
* when the system prompt carries a cache_control breakpoint, because
|
|
343
|
+
* cache_control lives on an individual content block, not on a raw string.
|
|
344
|
+
*/
|
|
345
|
+
system?: string | Array<Record<string, unknown>>;
|
|
146
346
|
temperature?: number;
|
|
147
347
|
top_p?: number;
|
|
148
348
|
stop_sequences?: string[];
|
|
@@ -168,9 +368,7 @@ type GoogleCompatibleRequest = {
|
|
|
168
368
|
systemInstruction?: {
|
|
169
369
|
parts: Array<{ text: string }>;
|
|
170
370
|
};
|
|
171
|
-
tools?: Array<
|
|
172
|
-
functionDeclarations: Array<Record<string, unknown>>;
|
|
173
|
-
}>;
|
|
371
|
+
tools?: Array<Record<string, unknown>>;
|
|
174
372
|
toolConfig?: {
|
|
175
373
|
functionCallingConfig: Record<string, unknown>;
|
|
176
374
|
};
|
|
@@ -198,6 +396,10 @@ function getOpenAIChatCompletionsUrl(baseURL?: string): string {
|
|
|
198
396
|
return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "chat/completions");
|
|
199
397
|
}
|
|
200
398
|
|
|
399
|
+
function getOpenAIResponsesUrl(baseURL?: string): string {
|
|
400
|
+
return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "responses");
|
|
401
|
+
}
|
|
402
|
+
|
|
201
403
|
function getGoogleGenerateContentUrl(baseURL: string | undefined, modelId: string): string {
|
|
202
404
|
return joinUrl(
|
|
203
405
|
baseURL ?? DEFAULT_GOOGLE_BASE_URL,
|
|
@@ -287,9 +489,203 @@ function extractGoogleUsageTokens(payload: unknown): number | undefined {
|
|
|
287
489
|
return typeof promptTokenCount === "number" ? promptTokenCount : undefined;
|
|
288
490
|
}
|
|
289
491
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
492
|
+
type ProviderKind = "anthropic" | "openai" | "google";
|
|
493
|
+
|
|
494
|
+
/**
|
|
495
|
+
* Structured warning emitted when a provider runtime drops or rewrites a
|
|
496
|
+
* caller-provided option. Mirrors the AI ecosystem convention (Vercel AI
|
|
497
|
+
* SDK, LangChain) of returning `unsupported-setting` warnings on the
|
|
498
|
+
* runtime result so callers can discover silently-dropped fields without
|
|
499
|
+
* having to read the source.
|
|
500
|
+
*/
|
|
501
|
+
export type ProviderWarning = {
|
|
502
|
+
type: "unsupported-setting" | "other";
|
|
503
|
+
setting?: string;
|
|
504
|
+
details?: string;
|
|
505
|
+
provider: ProviderKind;
|
|
506
|
+
};
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* Mutable warning collector handed to per-provider request builders so
|
|
510
|
+
* they can append entries during the build pass instead of plumbing a
|
|
511
|
+
* return-tuple shape through every helper.
|
|
512
|
+
*/
|
|
513
|
+
type WarningCollector = {
|
|
514
|
+
push(warning: ProviderWarning): void;
|
|
515
|
+
drain(): ProviderWarning[];
|
|
516
|
+
};
|
|
517
|
+
|
|
518
|
+
function createWarningCollector(): WarningCollector {
|
|
519
|
+
const list: ProviderWarning[] = [];
|
|
520
|
+
return {
|
|
521
|
+
push(warning) {
|
|
522
|
+
list.push(warning);
|
|
523
|
+
},
|
|
524
|
+
drain() {
|
|
525
|
+
return list.slice();
|
|
526
|
+
},
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
/**
|
|
531
|
+
* Base class for typed provider errors. The `retryable` flag is the
|
|
532
|
+
* primary signal for callers (or a retry wrapper) to decide whether to
|
|
533
|
+
* re-issue the request. `retryAfterMs` is set when the provider gave an
|
|
534
|
+
* explicit delay hint (Retry-After header, Retry-Info trailer).
|
|
535
|
+
*/
|
|
536
|
+
export class ProviderError extends Error {
|
|
537
|
+
readonly provider: ProviderKind;
|
|
538
|
+
readonly status: number;
|
|
539
|
+
readonly retryable: boolean;
|
|
540
|
+
readonly retryAfterMs?: number;
|
|
541
|
+
|
|
542
|
+
constructor(options: {
|
|
543
|
+
provider: ProviderKind;
|
|
544
|
+
status: number;
|
|
545
|
+
message: string;
|
|
546
|
+
retryable: boolean;
|
|
547
|
+
retryAfterMs?: number;
|
|
548
|
+
}) {
|
|
549
|
+
super(options.message);
|
|
550
|
+
this.name = new.target.name;
|
|
551
|
+
this.provider = options.provider;
|
|
552
|
+
this.status = options.status;
|
|
553
|
+
this.retryable = options.retryable;
|
|
554
|
+
if (options.retryAfterMs !== undefined) {
|
|
555
|
+
this.retryAfterMs = options.retryAfterMs;
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
/** Provider reports it is overloaded (Anthropic 529, OpenAI/Google 503). */
|
|
561
|
+
export class ProviderOverloadedError extends ProviderError {}
|
|
562
|
+
|
|
563
|
+
/** Provider is rate limiting this API key (OpenAI/Google 429 with Retry-After). */
|
|
564
|
+
export class ProviderRateLimitError extends ProviderError {}
|
|
565
|
+
|
|
566
|
+
/** Provider account quota is exhausted — non-retryable. */
|
|
567
|
+
export class ProviderQuotaError extends ProviderError {}
|
|
568
|
+
|
|
569
|
+
/** Non-retryable 4xx/5xx that doesn't fit another bucket. */
|
|
570
|
+
export class ProviderRequestError extends ProviderError {}
|
|
571
|
+
|
|
572
|
+
function parseRetryAfterMs(header: string | null): number | undefined {
|
|
573
|
+
if (!header) return undefined;
|
|
574
|
+
const asNumber = Number(header);
|
|
575
|
+
if (Number.isFinite(asNumber) && asNumber >= 0) {
|
|
576
|
+
return Math.round(asNumber * 1000);
|
|
577
|
+
}
|
|
578
|
+
// HTTP-date form (rare in practice for LLM providers).
|
|
579
|
+
const parsed = Date.parse(header);
|
|
580
|
+
if (!Number.isNaN(parsed)) {
|
|
581
|
+
return Math.max(0, parsed - Date.now());
|
|
582
|
+
}
|
|
583
|
+
return undefined;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
/**
|
|
587
|
+
* Inspect a non-2xx response and build the most specific ProviderError
|
|
588
|
+
* subclass we can. Reads the response body as text (it's already dead
|
|
589
|
+
* on the wire by this point). Body classification handles the cases
|
|
590
|
+
* where HTTP status alone is ambiguous — notably OpenAI
|
|
591
|
+
* `insufficient_quota` vs `rate_limit_exceeded` both arriving as 429.
|
|
592
|
+
*/
|
|
593
|
+
async function buildProviderError(
|
|
594
|
+
provider: ProviderKind,
|
|
595
|
+
response: Response,
|
|
596
|
+
): Promise<ProviderError> {
|
|
597
|
+
const rawBody = await response.text();
|
|
598
|
+
const message = rawBody.trim() || `${response.status} ${response.statusText}`.trim();
|
|
599
|
+
const status = response.status;
|
|
600
|
+
const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
|
|
601
|
+
|
|
602
|
+
const parsedBody = (() => {
|
|
603
|
+
try {
|
|
604
|
+
return JSON.parse(rawBody) as Record<string, unknown>;
|
|
605
|
+
} catch {
|
|
606
|
+
return undefined;
|
|
607
|
+
}
|
|
608
|
+
})();
|
|
609
|
+
const errorRecord = readRecord(parsedBody?.error);
|
|
610
|
+
const errorCode = typeof errorRecord?.code === "string"
|
|
611
|
+
? errorRecord.code
|
|
612
|
+
: typeof errorRecord?.type === "string"
|
|
613
|
+
? errorRecord.type
|
|
614
|
+
: typeof errorRecord?.status === "string"
|
|
615
|
+
? errorRecord.status
|
|
616
|
+
: undefined;
|
|
617
|
+
|
|
618
|
+
// Anthropic 529 = overloaded. Anthropic surfaces this with
|
|
619
|
+
// { error: { type: "overloaded_error" } } in the body.
|
|
620
|
+
if (provider === "anthropic" && status === 529) {
|
|
621
|
+
return new ProviderOverloadedError({
|
|
622
|
+
provider,
|
|
623
|
+
status,
|
|
624
|
+
message,
|
|
625
|
+
retryable: true,
|
|
626
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
627
|
+
});
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
// OpenAI / Google 503 = overloaded.
|
|
631
|
+
if ((provider === "openai" || provider === "google") && status === 503) {
|
|
632
|
+
return new ProviderOverloadedError({
|
|
633
|
+
provider,
|
|
634
|
+
status,
|
|
635
|
+
message,
|
|
636
|
+
retryable: true,
|
|
637
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
638
|
+
});
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// OpenAI 429 splits based on the error code in the body:
|
|
642
|
+
// - insufficient_quota → hard quota, non-retryable
|
|
643
|
+
// - rate_limit_exceeded / tokens_per_min_exceeded → retry with Retry-After
|
|
644
|
+
if (provider === "openai" && status === 429) {
|
|
645
|
+
if (errorCode === "insufficient_quota") {
|
|
646
|
+
return new ProviderQuotaError({
|
|
647
|
+
provider,
|
|
648
|
+
status,
|
|
649
|
+
message,
|
|
650
|
+
retryable: false,
|
|
651
|
+
});
|
|
652
|
+
}
|
|
653
|
+
return new ProviderRateLimitError({
|
|
654
|
+
provider,
|
|
655
|
+
status,
|
|
656
|
+
message,
|
|
657
|
+
retryable: true,
|
|
658
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
// Google 429 RESOURCE_EXHAUSTED is almost always the daily free-tier
|
|
663
|
+
// quota — surface as a hard quota error so callers don't hot-loop on
|
|
664
|
+
// retries that can't possibly succeed until midnight UTC.
|
|
665
|
+
if (provider === "google" && status === 429) {
|
|
666
|
+
if (errorCode === "RESOURCE_EXHAUSTED") {
|
|
667
|
+
return new ProviderQuotaError({
|
|
668
|
+
provider,
|
|
669
|
+
status,
|
|
670
|
+
message,
|
|
671
|
+
retryable: false,
|
|
672
|
+
});
|
|
673
|
+
}
|
|
674
|
+
return new ProviderRateLimitError({
|
|
675
|
+
provider,
|
|
676
|
+
status,
|
|
677
|
+
message,
|
|
678
|
+
retryable: true,
|
|
679
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
680
|
+
});
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
return new ProviderRequestError({
|
|
684
|
+
provider,
|
|
685
|
+
status,
|
|
686
|
+
message,
|
|
687
|
+
retryable: false,
|
|
688
|
+
});
|
|
293
689
|
}
|
|
294
690
|
|
|
295
691
|
async function requestJson(options: {
|
|
@@ -297,11 +693,13 @@ async function requestJson(options: {
|
|
|
297
693
|
fetchImpl: typeof globalThis.fetch;
|
|
298
694
|
init: RequestInit;
|
|
299
695
|
providerLabel: string;
|
|
696
|
+
providerKind: ProviderKind;
|
|
300
697
|
}): Promise<unknown> {
|
|
301
698
|
const response = await options.fetchImpl(options.url, options.init);
|
|
302
699
|
if (!response.ok) {
|
|
303
|
-
const
|
|
304
|
-
|
|
700
|
+
const err = await buildProviderError(options.providerKind, response);
|
|
701
|
+
err.message = `${options.providerLabel} request failed: ${err.message}`;
|
|
702
|
+
throw err;
|
|
305
703
|
}
|
|
306
704
|
|
|
307
705
|
return response.json();
|
|
@@ -312,15 +710,22 @@ async function requestStream(options: {
|
|
|
312
710
|
fetchImpl: typeof globalThis.fetch;
|
|
313
711
|
init: RequestInit;
|
|
314
712
|
providerLabel: string;
|
|
713
|
+
providerKind: ProviderKind;
|
|
315
714
|
}): Promise<ReadableStream<Uint8Array>> {
|
|
316
715
|
const response = await options.fetchImpl(options.url, options.init);
|
|
317
716
|
if (!response.ok) {
|
|
318
|
-
const
|
|
319
|
-
|
|
717
|
+
const err = await buildProviderError(options.providerKind, response);
|
|
718
|
+
err.message = `${options.providerLabel} request failed: ${err.message}`;
|
|
719
|
+
throw err;
|
|
320
720
|
}
|
|
321
721
|
|
|
322
722
|
if (!response.body) {
|
|
323
|
-
throw new
|
|
723
|
+
throw new ProviderRequestError({
|
|
724
|
+
provider: options.providerKind,
|
|
725
|
+
status: response.status,
|
|
726
|
+
message: `${options.providerLabel} request failed: stream body missing`,
|
|
727
|
+
retryable: false,
|
|
728
|
+
});
|
|
324
729
|
}
|
|
325
730
|
|
|
326
731
|
return response.body;
|
|
@@ -366,6 +771,11 @@ function toOpenAICompatibleMessages(prompt: RuntimePromptMessage[]): OpenAICompa
|
|
|
366
771
|
text += part.text;
|
|
367
772
|
continue;
|
|
368
773
|
}
|
|
774
|
+
// OpenAI Chat Completions has no roundtrip slot for Anthropic
|
|
775
|
+
// thinking blocks — they get dropped on replay. Anthropic-only.
|
|
776
|
+
if (part.type === "reasoning") {
|
|
777
|
+
continue;
|
|
778
|
+
}
|
|
369
779
|
|
|
370
780
|
toolCalls.push({
|
|
371
781
|
id: part.toolCallId,
|
|
@@ -473,9 +883,15 @@ function normalizeAnthropicFinishReason(
|
|
|
473
883
|
}
|
|
474
884
|
}
|
|
475
885
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
886
|
+
type RuntimeUsage = {
|
|
887
|
+
inputTokens?: number;
|
|
888
|
+
outputTokens?: number;
|
|
889
|
+
totalTokens?: number;
|
|
890
|
+
cacheCreationInputTokens?: number;
|
|
891
|
+
cacheReadInputTokens?: number;
|
|
892
|
+
};
|
|
893
|
+
|
|
894
|
+
function extractAnthropicUsage(payload: unknown): RuntimeUsage | undefined {
|
|
479
895
|
const record = readRecord(payload);
|
|
480
896
|
const usage = readRecord(record?.usage);
|
|
481
897
|
if (!usage) {
|
|
@@ -484,6 +900,8 @@ function extractAnthropicUsage(payload: unknown):
|
|
|
484
900
|
|
|
485
901
|
const inputTokens = usage.input_tokens;
|
|
486
902
|
const outputTokens = usage.output_tokens;
|
|
903
|
+
const cacheCreationInputTokens = usage.cache_creation_input_tokens;
|
|
904
|
+
const cacheReadInputTokens = usage.cache_read_input_tokens;
|
|
487
905
|
|
|
488
906
|
return {
|
|
489
907
|
inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
|
|
@@ -492,17 +910,15 @@ function extractAnthropicUsage(payload: unknown):
|
|
|
492
910
|
? (typeof inputTokens === "number" ? inputTokens : 0) +
|
|
493
911
|
(typeof outputTokens === "number" ? outputTokens : 0)
|
|
494
912
|
: undefined,
|
|
913
|
+
...(typeof cacheCreationInputTokens === "number" ? { cacheCreationInputTokens } : {}),
|
|
914
|
+
...(typeof cacheReadInputTokens === "number" ? { cacheReadInputTokens } : {}),
|
|
495
915
|
};
|
|
496
916
|
}
|
|
497
917
|
|
|
498
918
|
function mergeUsage(
|
|
499
|
-
current:
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
next:
|
|
503
|
-
| { inputTokens?: number; outputTokens?: number; totalTokens?: number }
|
|
504
|
-
| undefined,
|
|
505
|
-
): { inputTokens?: number; outputTokens?: number; totalTokens?: number } | undefined {
|
|
919
|
+
current: RuntimeUsage | undefined,
|
|
920
|
+
next: RuntimeUsage | undefined,
|
|
921
|
+
): RuntimeUsage | undefined {
|
|
506
922
|
if (!current) {
|
|
507
923
|
return next;
|
|
508
924
|
}
|
|
@@ -513,11 +929,16 @@ function mergeUsage(
|
|
|
513
929
|
|
|
514
930
|
const inputTokens = next.inputTokens ?? current.inputTokens;
|
|
515
931
|
const outputTokens = next.outputTokens ?? current.outputTokens;
|
|
932
|
+
const cacheCreationInputTokens = next.cacheCreationInputTokens ??
|
|
933
|
+
current.cacheCreationInputTokens;
|
|
934
|
+
const cacheReadInputTokens = next.cacheReadInputTokens ?? current.cacheReadInputTokens;
|
|
516
935
|
|
|
517
936
|
return {
|
|
518
937
|
inputTokens,
|
|
519
938
|
outputTokens,
|
|
520
939
|
totalTokens: (inputTokens ?? 0) + (outputTokens ?? 0),
|
|
940
|
+
...(cacheCreationInputTokens !== undefined ? { cacheCreationInputTokens } : {}),
|
|
941
|
+
...(cacheReadInputTokens !== undefined ? { cacheReadInputTokens } : {}),
|
|
521
942
|
};
|
|
522
943
|
}
|
|
523
944
|
|
|
@@ -538,6 +959,26 @@ function toSnakeCaseRecord(record: Record<string, unknown>): Record<string, unkn
|
|
|
538
959
|
);
|
|
539
960
|
}
|
|
540
961
|
|
|
962
|
+
/**
|
|
963
|
+
* Recursive snake_case key converter for nested config objects (used for
|
|
964
|
+
* Anthropic mcp_servers, where authorizationToken / toolConfiguration /
|
|
965
|
+
* allowedTools all need conversion).
|
|
966
|
+
*/
|
|
967
|
+
function deepSnakeCase(value: unknown): unknown {
|
|
968
|
+
if (Array.isArray(value)) {
|
|
969
|
+
return value.map(deepSnakeCase);
|
|
970
|
+
}
|
|
971
|
+
if (value !== null && typeof value === "object") {
|
|
972
|
+
return Object.fromEntries(
|
|
973
|
+
Object.entries(value as Record<string, unknown>).map(([key, v]) => [
|
|
974
|
+
key.replace(/[A-Z]/g, (match) => `_${match.toLowerCase()}`),
|
|
975
|
+
deepSnakeCase(v),
|
|
976
|
+
]),
|
|
977
|
+
);
|
|
978
|
+
}
|
|
979
|
+
return value;
|
|
980
|
+
}
|
|
981
|
+
|
|
541
982
|
function pushAnthropicUserContent(
|
|
542
983
|
messages: AnthropicCompatibleMessage[],
|
|
543
984
|
content: Array<Record<string, unknown>>,
|
|
@@ -558,9 +999,32 @@ function pushAnthropicUserContent(
|
|
|
558
999
|
});
|
|
559
1000
|
}
|
|
560
1001
|
|
|
1002
|
+
/**
|
|
1003
|
+
* Resolves a {@link ProviderCacheTtl} into Anthropic's `cache_control` shape.
|
|
1004
|
+
*
|
|
1005
|
+
* Returns `undefined` when caching is not requested (`false` / `undefined`),
|
|
1006
|
+
* `{ type: "ephemeral" }` for the 5-minute default (`true` / `"5m"`), or
|
|
1007
|
+
* `{ type: "ephemeral", ttl: "1h" }` for the extended 1-hour cache.
|
|
1008
|
+
*/
|
|
1009
|
+
function resolveAnthropicCacheControlBlock(
|
|
1010
|
+
ttl: ProviderCacheTtl | undefined,
|
|
1011
|
+
): { type: "ephemeral"; ttl?: "1h" } | undefined {
|
|
1012
|
+
if (ttl === undefined || ttl === false) {
|
|
1013
|
+
return undefined;
|
|
1014
|
+
}
|
|
1015
|
+
if (ttl === "1h") {
|
|
1016
|
+
return { type: "ephemeral", ttl: "1h" };
|
|
1017
|
+
}
|
|
1018
|
+
return { type: "ephemeral" };
|
|
1019
|
+
}
|
|
1020
|
+
|
|
561
1021
|
function toAnthropicMessages(
|
|
562
1022
|
prompt: RuntimePromptMessage[],
|
|
563
|
-
|
|
1023
|
+
systemCacheControl?: { type: "ephemeral"; ttl?: "1h" },
|
|
1024
|
+
): {
|
|
1025
|
+
system?: string | Array<Record<string, unknown>>;
|
|
1026
|
+
messages: AnthropicCompatibleMessage[];
|
|
1027
|
+
} {
|
|
564
1028
|
const systemParts: string[] = [];
|
|
565
1029
|
const messages: AnthropicCompatibleMessage[] = [];
|
|
566
1030
|
|
|
@@ -580,14 +1044,33 @@ function toAnthropicMessages(
|
|
|
580
1044
|
case "assistant":
|
|
581
1045
|
messages.push({
|
|
582
1046
|
role: "assistant",
|
|
583
|
-
content: message.content.map((part) =>
|
|
584
|
-
part.type === "text"
|
|
1047
|
+
content: message.content.map((part) => {
|
|
1048
|
+
if (part.type === "text") {
|
|
1049
|
+
return { type: "text", text: part.text };
|
|
1050
|
+
}
|
|
1051
|
+
if (part.type === "reasoning") {
|
|
1052
|
+
// Redacted thinking blocks roundtrip as the encrypted blob
|
|
1053
|
+
// form Anthropic gave us. Plain thinking blocks need the
|
|
1054
|
+
// signature to verify on the server.
|
|
1055
|
+
if (typeof part.redactedData === "string") {
|
|
1056
|
+
return {
|
|
1057
|
+
type: "redacted_thinking",
|
|
1058
|
+
data: part.redactedData,
|
|
1059
|
+
};
|
|
1060
|
+
}
|
|
1061
|
+
return {
|
|
1062
|
+
type: "thinking",
|
|
1063
|
+
thinking: part.text ?? "",
|
|
1064
|
+
...(typeof part.signature === "string" ? { signature: part.signature } : {}),
|
|
1065
|
+
};
|
|
1066
|
+
}
|
|
1067
|
+
return {
|
|
585
1068
|
type: "tool_use",
|
|
586
1069
|
id: part.toolCallId,
|
|
587
1070
|
name: part.toolName,
|
|
588
1071
|
input: part.input,
|
|
589
|
-
}
|
|
590
|
-
),
|
|
1072
|
+
};
|
|
1073
|
+
}),
|
|
591
1074
|
});
|
|
592
1075
|
break;
|
|
593
1076
|
case "tool":
|
|
@@ -603,14 +1086,63 @@ function toAnthropicMessages(
|
|
|
603
1086
|
}
|
|
604
1087
|
}
|
|
605
1088
|
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
1089
|
+
if (systemParts.length === 0) {
|
|
1090
|
+
return { messages };
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
const joined = systemParts.join("\n\n");
|
|
1094
|
+
|
|
1095
|
+
// Cache-controlled system prompts must use the array-of-blocks form so the
|
|
1096
|
+
// breakpoint lands on an individual content block. Callers that don't opt
|
|
1097
|
+
// in keep the legacy raw-string form for backward compatibility.
|
|
1098
|
+
if (systemCacheControl) {
|
|
1099
|
+
return {
|
|
1100
|
+
system: [{
|
|
1101
|
+
type: "text",
|
|
1102
|
+
text: joined,
|
|
1103
|
+
cache_control: systemCacheControl,
|
|
1104
|
+
}],
|
|
1105
|
+
messages,
|
|
1106
|
+
};
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
return { system: joined, messages };
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
/**
|
|
1113
|
+
* Short-name → latest-versioned-type alias map for Anthropic provider tools.
|
|
1114
|
+
*
|
|
1115
|
+
* Anthropic tool types are date-stamped (e.g. `code_execution_20260120`) so
|
|
1116
|
+
* callers either pin a version or get the latest. We accept both: a caller
|
|
1117
|
+
* can pass `anthropic.code_execution` and we map to the latest known version,
|
|
1118
|
+
* or pass `anthropic.code_execution_20250522` and we forward verbatim.
|
|
1119
|
+
*
|
|
1120
|
+
* Versions chosen here are the latest documented releases as of 2026-04-15
|
|
1121
|
+
* — see https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview.
|
|
1122
|
+
* When Anthropic ships newer versions, update this map.
|
|
1123
|
+
*/
|
|
1124
|
+
const ANTHROPIC_TOOL_VERSION_ALIASES: Record<string, string> = {
|
|
1125
|
+
code_execution: "code_execution_20260120",
|
|
1126
|
+
computer_use: "computer_20250124",
|
|
1127
|
+
computer: "computer_20250124",
|
|
1128
|
+
text_editor: "text_editor_20250728",
|
|
1129
|
+
bash: "bash_20250124",
|
|
1130
|
+
memory: "memory_20250818",
|
|
1131
|
+
web_search: "web_search_20250305",
|
|
1132
|
+
web_fetch: "web_fetch_20250910",
|
|
1133
|
+
};
|
|
1134
|
+
|
|
1135
|
+
function resolveAnthropicProviderType(rawType: string): string {
|
|
1136
|
+
// Already-versioned types (contain a date stamp suffix) pass through verbatim.
|
|
1137
|
+
if (/_\d{8}$/.test(rawType)) {
|
|
1138
|
+
return rawType;
|
|
1139
|
+
}
|
|
1140
|
+
return ANTHROPIC_TOOL_VERSION_ALIASES[rawType] ?? rawType;
|
|
610
1141
|
}
|
|
611
1142
|
|
|
612
1143
|
function toAnthropicTools(
|
|
613
1144
|
tools: RuntimeToolDefinition[] | undefined,
|
|
1145
|
+
toolsCacheControl?: { type: "ephemeral"; ttl?: "1h" },
|
|
614
1146
|
): Array<Record<string, unknown>> | undefined {
|
|
615
1147
|
if (!tools) {
|
|
616
1148
|
return undefined;
|
|
@@ -632,19 +1164,35 @@ function toAnthropicTools(
|
|
|
632
1164
|
continue;
|
|
633
1165
|
}
|
|
634
1166
|
|
|
635
|
-
const
|
|
636
|
-
if (
|
|
1167
|
+
const rawType = tool.id.slice("anthropic.".length);
|
|
1168
|
+
if (rawType.length === 0) {
|
|
637
1169
|
continue;
|
|
638
1170
|
}
|
|
639
1171
|
|
|
640
1172
|
normalized.push({
|
|
641
|
-
type:
|
|
1173
|
+
type: resolveAnthropicProviderType(rawType),
|
|
642
1174
|
name: tool.name,
|
|
643
1175
|
...toSnakeCaseRecord(tool.args),
|
|
644
1176
|
});
|
|
645
1177
|
}
|
|
646
1178
|
|
|
647
|
-
|
|
1179
|
+
if (normalized.length === 0) {
|
|
1180
|
+
return undefined;
|
|
1181
|
+
}
|
|
1182
|
+
|
|
1183
|
+
// Attach the cache breakpoint to the final tool entry so Anthropic caches
|
|
1184
|
+
// the entire tools block up to and including that definition. Earlier tool
|
|
1185
|
+
// entries are implicitly covered by the same breakpoint per Anthropic's
|
|
1186
|
+
// walk-backward cache lookup behaviour.
|
|
1187
|
+
if (toolsCacheControl) {
|
|
1188
|
+
const lastIndex = normalized.length - 1;
|
|
1189
|
+
normalized[lastIndex] = {
|
|
1190
|
+
...normalized[lastIndex],
|
|
1191
|
+
cache_control: toolsCacheControl,
|
|
1192
|
+
};
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
return normalized;
|
|
648
1196
|
}
|
|
649
1197
|
|
|
650
1198
|
function createAnthropicRequestHeaders(options: {
|
|
@@ -717,47 +1265,244 @@ function resolveAnthropicMaxTokens(
|
|
|
717
1265
|
return requested;
|
|
718
1266
|
}
|
|
719
1267
|
|
|
1268
|
+
/**
|
|
1269
|
+
* Map a unified reasoning effort level to an Anthropic `thinking.budget_tokens`
|
|
1270
|
+
* value. Anthropic's minimum accepted budget is 1024; higher tiers give Claude
|
|
1271
|
+
* more headroom to explore. `max` maps to the upper bound documented for
|
|
1272
|
+
* Claude 4.x family (32k tokens of thinking — caller can override via
|
|
1273
|
+
* `budgetTokens` if they need more).
|
|
1274
|
+
*/
|
|
1275
|
+
function resolveAnthropicThinkingBudget(
|
|
1276
|
+
option: ProviderReasoningOption | undefined,
|
|
1277
|
+
): number | undefined {
|
|
1278
|
+
if (!option || option.enabled !== true) {
|
|
1279
|
+
return undefined;
|
|
1280
|
+
}
|
|
1281
|
+
if (typeof option.budgetTokens === "number" && option.budgetTokens >= 1024) {
|
|
1282
|
+
return option.budgetTokens;
|
|
1283
|
+
}
|
|
1284
|
+
switch (option.effort) {
|
|
1285
|
+
case "low":
|
|
1286
|
+
return 1024;
|
|
1287
|
+
case "high":
|
|
1288
|
+
return 16_384;
|
|
1289
|
+
case "max":
|
|
1290
|
+
return 32_768;
|
|
1291
|
+
case "medium":
|
|
1292
|
+
default:
|
|
1293
|
+
return 4096;
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
|
|
720
1297
|
function buildAnthropicMessagesRequest(
|
|
721
1298
|
modelId: string,
|
|
722
1299
|
providerName: string,
|
|
723
1300
|
options: OpenAICompatibleLanguageOptions,
|
|
724
1301
|
stream: boolean,
|
|
1302
|
+
warnings: WarningCollector,
|
|
725
1303
|
): AnthropicCompatibleRequest {
|
|
726
|
-
const
|
|
1304
|
+
const systemCacheControl = resolveAnthropicCacheControlBlock(
|
|
1305
|
+
options.cacheControl?.system,
|
|
1306
|
+
);
|
|
1307
|
+
const toolsCacheControl = resolveAnthropicCacheControlBlock(
|
|
1308
|
+
options.cacheControl?.tools,
|
|
1309
|
+
);
|
|
1310
|
+
|
|
1311
|
+
const { system, messages } = toAnthropicMessages(options.prompt, systemCacheControl);
|
|
1312
|
+
const anthropicTools = toAnthropicTools(options.tools, toolsCacheControl);
|
|
1313
|
+
const thinkingBudget = resolveAnthropicThinkingBudget(options.reasoning);
|
|
1314
|
+
const thinkingEnabled = thinkingBudget !== undefined;
|
|
1315
|
+
|
|
1316
|
+
// Anthropic doesn't support these unified options at all — emit warnings
|
|
1317
|
+
// so callers don't quietly pass values that have zero effect.
|
|
1318
|
+
if (options.presencePenalty !== undefined) {
|
|
1319
|
+
warnings.push({
|
|
1320
|
+
type: "unsupported-setting",
|
|
1321
|
+
provider: "anthropic",
|
|
1322
|
+
setting: "presencePenalty",
|
|
1323
|
+
details: "Anthropic Messages API has no equivalent and the value was dropped.",
|
|
1324
|
+
});
|
|
1325
|
+
}
|
|
1326
|
+
if (options.frequencyPenalty !== undefined) {
|
|
1327
|
+
warnings.push({
|
|
1328
|
+
type: "unsupported-setting",
|
|
1329
|
+
provider: "anthropic",
|
|
1330
|
+
setting: "frequencyPenalty",
|
|
1331
|
+
details: "Anthropic Messages API has no equivalent and the value was dropped.",
|
|
1332
|
+
});
|
|
1333
|
+
}
|
|
1334
|
+
if (options.seed !== undefined) {
|
|
1335
|
+
warnings.push({
|
|
1336
|
+
type: "unsupported-setting",
|
|
1337
|
+
provider: "anthropic",
|
|
1338
|
+
setting: "seed",
|
|
1339
|
+
details: "Anthropic Messages API does not support deterministic seeding.",
|
|
1340
|
+
});
|
|
1341
|
+
}
|
|
1342
|
+
if (options.topK !== undefined) {
|
|
1343
|
+
warnings.push({
|
|
1344
|
+
type: "unsupported-setting",
|
|
1345
|
+
provider: "anthropic",
|
|
1346
|
+
setting: "topK",
|
|
1347
|
+
details: "Anthropic Messages API does not expose top_k on this surface.",
|
|
1348
|
+
});
|
|
1349
|
+
}
|
|
1350
|
+
if (
|
|
1351
|
+
options.stopSequences && options.stopSequences.length > 4
|
|
1352
|
+
) {
|
|
1353
|
+
warnings.push({
|
|
1354
|
+
type: "unsupported-setting",
|
|
1355
|
+
provider: "anthropic",
|
|
1356
|
+
setting: "stopSequences",
|
|
1357
|
+
details:
|
|
1358
|
+
`Anthropic accepts at most 4 stop sequences; ${options.stopSequences.length} were provided and the extras were truncated.`,
|
|
1359
|
+
});
|
|
1360
|
+
}
|
|
1361
|
+
if (thinkingEnabled && options.temperature !== undefined) {
|
|
1362
|
+
warnings.push({
|
|
1363
|
+
type: "unsupported-setting",
|
|
1364
|
+
provider: "anthropic",
|
|
1365
|
+
setting: "temperature",
|
|
1366
|
+
details:
|
|
1367
|
+
"Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
|
|
1368
|
+
});
|
|
1369
|
+
}
|
|
1370
|
+
if (thinkingEnabled && options.topP !== undefined) {
|
|
1371
|
+
warnings.push({
|
|
1372
|
+
type: "unsupported-setting",
|
|
1373
|
+
provider: "anthropic",
|
|
1374
|
+
setting: "topP",
|
|
1375
|
+
details:
|
|
1376
|
+
"Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
|
|
1377
|
+
});
|
|
1378
|
+
}
|
|
1379
|
+
if (options.responseFormat && options.responseFormat.type !== "text") {
|
|
1380
|
+
warnings.push({
|
|
1381
|
+
type: "unsupported-setting",
|
|
1382
|
+
provider: "anthropic",
|
|
1383
|
+
setting: "responseFormat",
|
|
1384
|
+
details:
|
|
1385
|
+
"Anthropic Messages API does not have a structured-output response_format equivalent. Use a tool with the schema as input_schema instead.",
|
|
1386
|
+
});
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1389
|
+
// Anthropic requires max_tokens > budget_tokens when thinking is enabled.
|
|
1390
|
+
// Growing max_tokens by the thinking budget preserves the caller's intended
|
|
1391
|
+
// output budget, and we clamp the sum at the model's advertised maximum so
|
|
1392
|
+
// the request never exceeds the API's hard cap.
|
|
1393
|
+
const baseMaxTokens = resolveAnthropicMaxTokens(modelId, options.maxOutputTokens);
|
|
1394
|
+
const maxTokens = thinkingEnabled
|
|
1395
|
+
? Math.min(
|
|
1396
|
+
baseMaxTokens + (thinkingBudget ?? 0),
|
|
1397
|
+
getAnthropicModelCapabilities(modelId).maxOutputTokens,
|
|
1398
|
+
)
|
|
1399
|
+
: baseMaxTokens;
|
|
1400
|
+
|
|
727
1401
|
const body: AnthropicCompatibleRequest = {
|
|
728
1402
|
model: modelId,
|
|
729
1403
|
messages,
|
|
730
|
-
max_tokens:
|
|
1404
|
+
max_tokens: maxTokens,
|
|
731
1405
|
...(stream ? { stream: true } : {}),
|
|
732
1406
|
...(system ? { system } : {}),
|
|
733
|
-
|
|
734
|
-
|
|
1407
|
+
// Sampling params are mutually exclusive with thinking on Anthropic — the
|
|
1408
|
+
// API rejects the combo outright. Drop them silently when thinking is on
|
|
1409
|
+
// (callers see thinking's output instead of what they'd have gotten from
|
|
1410
|
+
// custom sampling, which is the documented tradeoff).
|
|
1411
|
+
...(!thinkingEnabled && options.temperature !== undefined
|
|
1412
|
+
? { temperature: options.temperature }
|
|
1413
|
+
: {}),
|
|
1414
|
+
...(!thinkingEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
|
|
735
1415
|
...(options.stopSequences && options.stopSequences.length > 0
|
|
736
|
-
? { stop_sequences: options.stopSequences }
|
|
1416
|
+
? { stop_sequences: options.stopSequences.slice(0, 4) }
|
|
737
1417
|
: {}),
|
|
738
|
-
...(
|
|
1418
|
+
...(anthropicTools ? { tools: anthropicTools } : {}),
|
|
739
1419
|
...(options.toolChoice !== undefined
|
|
740
1420
|
? { tool_choice: normalizeAnthropicToolChoice(options.toolChoice) }
|
|
741
1421
|
: {}),
|
|
1422
|
+
...(thinkingEnabled ? { thinking: { type: "enabled", budget_tokens: thinkingBudget } } : {}),
|
|
1423
|
+
...(typeof options.userId === "string" && options.userId.length > 0
|
|
1424
|
+
? { metadata: { user_id: options.userId } }
|
|
1425
|
+
: {}),
|
|
1426
|
+
...(options.mcpServers && options.mcpServers.length > 0
|
|
1427
|
+
? { mcp_servers: deepSnakeCase(options.mcpServers) as unknown[] }
|
|
1428
|
+
: {}),
|
|
1429
|
+
...(options.anthropicContainer !== undefined ? { container: options.anthropicContainer } : {}),
|
|
742
1430
|
};
|
|
743
1431
|
|
|
744
1432
|
Object.assign(body, readProviderOptions(options.providerOptions, "anthropic", providerName));
|
|
745
1433
|
return body;
|
|
746
1434
|
}
|
|
747
1435
|
|
|
1436
|
+
type AnthropicReasoningContent = {
|
|
1437
|
+
type: "reasoning";
|
|
1438
|
+
text?: string;
|
|
1439
|
+
signature?: string;
|
|
1440
|
+
redactedData?: string;
|
|
1441
|
+
};
|
|
1442
|
+
|
|
1443
|
+
type AnthropicCitation = {
|
|
1444
|
+
type: string;
|
|
1445
|
+
citedText?: string;
|
|
1446
|
+
url?: string;
|
|
1447
|
+
title?: string;
|
|
1448
|
+
startCharIndex?: number;
|
|
1449
|
+
endCharIndex?: number;
|
|
1450
|
+
startBlockIndex?: number;
|
|
1451
|
+
endBlockIndex?: number;
|
|
1452
|
+
startPageNumber?: number;
|
|
1453
|
+
endPageNumber?: number;
|
|
1454
|
+
documentIndex?: number;
|
|
1455
|
+
documentTitle?: string;
|
|
1456
|
+
};
|
|
1457
|
+
|
|
1458
|
+
type AnthropicTextContent = {
|
|
1459
|
+
type: "text";
|
|
1460
|
+
text: string;
|
|
1461
|
+
citations?: AnthropicCitation[];
|
|
1462
|
+
};
|
|
1463
|
+
|
|
1464
|
+
/**
|
|
1465
|
+
* Best-effort camelCase normalization of a single Anthropic citation
|
|
1466
|
+
* record. Handles the union of fields across web_search_result_location,
|
|
1467
|
+
* web_fetch_result_location, char_location, page_location, and
|
|
1468
|
+
* content_block_location citation kinds — see
|
|
1469
|
+
* https://docs.claude.com/en/docs/build-with-claude/citations
|
|
1470
|
+
*/
|
|
1471
|
+
function normalizeAnthropicCitation(raw: unknown): AnthropicCitation | undefined {
|
|
1472
|
+
const r = readRecord(raw);
|
|
1473
|
+
if (!r) return undefined;
|
|
1474
|
+
const typeStr = typeof r.type === "string" ? r.type : undefined;
|
|
1475
|
+
if (!typeStr) return undefined;
|
|
1476
|
+
const out: AnthropicCitation = { type: typeStr };
|
|
1477
|
+
if (typeof r.cited_text === "string") out.citedText = r.cited_text;
|
|
1478
|
+
if (typeof r.url === "string") out.url = r.url;
|
|
1479
|
+
if (typeof r.title === "string") out.title = r.title;
|
|
1480
|
+
if (typeof r.start_char_index === "number") out.startCharIndex = r.start_char_index;
|
|
1481
|
+
if (typeof r.end_char_index === "number") out.endCharIndex = r.end_char_index;
|
|
1482
|
+
if (typeof r.start_block_index === "number") out.startBlockIndex = r.start_block_index;
|
|
1483
|
+
if (typeof r.end_block_index === "number") out.endBlockIndex = r.end_block_index;
|
|
1484
|
+
if (typeof r.start_page_number === "number") out.startPageNumber = r.start_page_number;
|
|
1485
|
+
if (typeof r.end_page_number === "number") out.endPageNumber = r.end_page_number;
|
|
1486
|
+
if (typeof r.document_index === "number") out.documentIndex = r.document_index;
|
|
1487
|
+
if (typeof r.document_title === "string") out.documentTitle = r.document_title;
|
|
1488
|
+
return out;
|
|
1489
|
+
}
|
|
1490
|
+
|
|
748
1491
|
function buildAnthropicGenerateResult(payload: unknown): {
|
|
749
1492
|
content: Array<
|
|
750
|
-
|
|
|
1493
|
+
| AnthropicTextContent
|
|
1494
|
+
| AnthropicReasoningContent
|
|
751
1495
|
| { type: "tool-call"; toolCallId: string; toolName: string; input: string }
|
|
752
1496
|
| { type: "tool-result"; toolCallId: string; toolName: string; result: unknown }
|
|
753
1497
|
>;
|
|
754
1498
|
finishReason?: string | { unified: string; raw: string } | null;
|
|
755
|
-
usage?:
|
|
1499
|
+
usage?: RuntimeUsage;
|
|
756
1500
|
} {
|
|
757
1501
|
const record = readRecord(payload);
|
|
758
1502
|
const content = Array.isArray(record?.content) ? record.content : [];
|
|
759
1503
|
const normalized: Array<
|
|
760
|
-
|
|
|
1504
|
+
| AnthropicTextContent
|
|
1505
|
+
| AnthropicReasoningContent
|
|
761
1506
|
| { type: "tool-call"; toolCallId: string; toolName: string; input: string }
|
|
762
1507
|
| { type: "tool-result"; toolCallId: string; toolName: string; result: unknown }
|
|
763
1508
|
> = [];
|
|
@@ -767,7 +1512,42 @@ function buildAnthropicGenerateResult(payload: unknown): {
|
|
|
767
1512
|
const blockType = typeof block?.type === "string" ? block.type : undefined;
|
|
768
1513
|
|
|
769
1514
|
if (blockType === "text" && typeof block?.text === "string" && block.text.length > 0) {
|
|
770
|
-
|
|
1515
|
+
const citationsRaw = Array.isArray(block.citations) ? block.citations : undefined;
|
|
1516
|
+
const citations = citationsRaw
|
|
1517
|
+
?.flatMap((c) => {
|
|
1518
|
+
const normalizedCitation = normalizeAnthropicCitation(c);
|
|
1519
|
+
return normalizedCitation ? [normalizedCitation] : [];
|
|
1520
|
+
});
|
|
1521
|
+
normalized.push({
|
|
1522
|
+
type: "text",
|
|
1523
|
+
text: block.text,
|
|
1524
|
+
...(citations && citations.length > 0 ? { citations } : {}),
|
|
1525
|
+
});
|
|
1526
|
+
continue;
|
|
1527
|
+
}
|
|
1528
|
+
|
|
1529
|
+
// Thinking blocks carry the cleartext trace plus a signature that
|
|
1530
|
+
// Anthropic uses to verify on subsequent turns. Surfacing both lets
|
|
1531
|
+
// callers persist them as `reasoning` content parts and replay on
|
|
1532
|
+
// the next turn so Claude can continue from the same thinking.
|
|
1533
|
+
if (blockType === "thinking") {
|
|
1534
|
+
normalized.push({
|
|
1535
|
+
type: "reasoning",
|
|
1536
|
+
...(typeof block?.thinking === "string" ? { text: block.thinking } : {}),
|
|
1537
|
+
...(typeof block?.signature === "string" ? { signature: block.signature } : {}),
|
|
1538
|
+
});
|
|
1539
|
+
continue;
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
// Redacted thinking blocks arrive when Claude's safety classifier
|
|
1543
|
+
// hides the trace. Pass the encrypted blob through opaquely so the
|
|
1544
|
+
// caller can replay it on the next turn (Anthropic still needs the
|
|
1545
|
+
// blob to verify continuity even though it can't read it).
|
|
1546
|
+
if (blockType === "redacted_thinking" && typeof block?.data === "string") {
|
|
1547
|
+
normalized.push({
|
|
1548
|
+
type: "reasoning",
|
|
1549
|
+
redactedData: block.data,
|
|
1550
|
+
});
|
|
771
1551
|
continue;
|
|
772
1552
|
}
|
|
773
1553
|
|
|
@@ -857,7 +1637,7 @@ async function* streamAnthropicCompatibleParts(
|
|
|
857
1637
|
const toolCalls = new Map<number, AnthropicStreamToolCallState>();
|
|
858
1638
|
const reasoningBlocks = new Map<number, AnthropicStreamReasoningState>();
|
|
859
1639
|
let finishReason: string | { unified: string; raw: string } | null = null;
|
|
860
|
-
let usage:
|
|
1640
|
+
let usage: RuntimeUsage | undefined;
|
|
861
1641
|
|
|
862
1642
|
for await (const chunk of stream) {
|
|
863
1643
|
buffer += decoder.decode(chunk, { stream: true });
|
|
@@ -909,6 +1689,20 @@ async function* streamAnthropicCompatibleParts(
|
|
|
909
1689
|
continue;
|
|
910
1690
|
}
|
|
911
1691
|
|
|
1692
|
+
// Redacted thinking blocks arrive as opaque encrypted payloads when
|
|
1693
|
+
// Claude's safety classifier flags the reasoning trace. Surface them
|
|
1694
|
+
// as a zero-length reasoning block so callers know thinking happened
|
|
1695
|
+
// without leaking the (legitimately hidden) contents.
|
|
1696
|
+
if (blockType === "redacted_thinking") {
|
|
1697
|
+
const reasoningId = `thinking-${index}`;
|
|
1698
|
+
reasoningBlocks.set(index, { id: reasoningId });
|
|
1699
|
+
yield {
|
|
1700
|
+
type: "reasoning-start",
|
|
1701
|
+
id: reasoningId,
|
|
1702
|
+
};
|
|
1703
|
+
continue;
|
|
1704
|
+
}
|
|
1705
|
+
|
|
912
1706
|
if (
|
|
913
1707
|
(blockType === "tool_use" || blockType === "server_tool_use") &&
|
|
914
1708
|
typeof contentBlock?.id === "string" &&
|
|
@@ -1094,9 +1888,7 @@ function normalizeOpenAIFinishReason(
|
|
|
1094
1888
|
return raw;
|
|
1095
1889
|
}
|
|
1096
1890
|
|
|
1097
|
-
function extractOpenAIUsage(payload: unknown):
|
|
1098
|
-
| { inputTokens?: number; outputTokens?: number; totalTokens?: number }
|
|
1099
|
-
| undefined {
|
|
1891
|
+
function extractOpenAIUsage(payload: unknown): RuntimeUsage | undefined {
|
|
1100
1892
|
const record = readRecord(payload);
|
|
1101
1893
|
const usage = readRecord(record?.usage);
|
|
1102
1894
|
if (!usage) {
|
|
@@ -1106,11 +1898,14 @@ function extractOpenAIUsage(payload: unknown):
|
|
|
1106
1898
|
const inputTokens = usage.prompt_tokens;
|
|
1107
1899
|
const outputTokens = usage.completion_tokens;
|
|
1108
1900
|
const totalTokens = usage.total_tokens;
|
|
1901
|
+
const promptTokensDetails = readRecord(usage.prompt_tokens_details);
|
|
1902
|
+
const cachedTokens = promptTokensDetails?.cached_tokens;
|
|
1109
1903
|
|
|
1110
1904
|
return {
|
|
1111
1905
|
inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
|
|
1112
1906
|
outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
|
|
1113
1907
|
totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
|
|
1908
|
+
...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
|
|
1114
1909
|
};
|
|
1115
1910
|
}
|
|
1116
1911
|
|
|
@@ -1165,19 +1960,95 @@ function extractOpenAIToolCalls(message: Record<string, unknown>): Array<{
|
|
|
1165
1960
|
return normalized;
|
|
1166
1961
|
}
|
|
1167
1962
|
|
|
1963
|
+
/**
|
|
1964
|
+
* OpenAI reasoning models (o1 / o3 / o4 family) use the completion path but
|
|
1965
|
+
* have different constraints than chat models: sampling params are rejected,
|
|
1966
|
+
* and they accept a `reasoning_effort` field. We detect them by model id
|
|
1967
|
+
* prefix so callers don't have to configure it per runtime.
|
|
1968
|
+
*/
|
|
1969
|
+
function isOpenAIReasoningModel(modelId: string): boolean {
|
|
1970
|
+
return /^o[134](-|$)/.test(modelId);
|
|
1971
|
+
}
|
|
1972
|
+
|
|
1973
|
+
/**
|
|
1974
|
+
* Map the unified reasoning effort to OpenAI's `reasoning_effort` enum.
|
|
1975
|
+
* OpenAI doesn't accept "max" — we collapse it to "high".
|
|
1976
|
+
*/
|
|
1977
|
+
function resolveOpenAIReasoningEffort(
|
|
1978
|
+
option: ProviderReasoningOption | undefined,
|
|
1979
|
+
): "low" | "medium" | "high" | undefined {
|
|
1980
|
+
if (!option || option.enabled !== true) {
|
|
1981
|
+
return undefined;
|
|
1982
|
+
}
|
|
1983
|
+
switch (option.effort) {
|
|
1984
|
+
case "low":
|
|
1985
|
+
return "low";
|
|
1986
|
+
case "high":
|
|
1987
|
+
case "max":
|
|
1988
|
+
return "high";
|
|
1989
|
+
case "medium":
|
|
1990
|
+
default:
|
|
1991
|
+
return "medium";
|
|
1992
|
+
}
|
|
1993
|
+
}
|
|
1994
|
+
|
|
1168
1995
|
function buildOpenAIChatRequest(
|
|
1169
1996
|
modelId: string,
|
|
1170
1997
|
providerName: string,
|
|
1171
1998
|
options: OpenAICompatibleLanguageOptions,
|
|
1172
1999
|
stream: boolean,
|
|
2000
|
+
warnings: WarningCollector,
|
|
1173
2001
|
): OpenAICompatibleChatRequest {
|
|
2002
|
+
const isReasoningModel = isOpenAIReasoningModel(modelId);
|
|
2003
|
+
const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
|
|
2004
|
+
const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
|
|
2005
|
+
|
|
2006
|
+
// OpenAI Chat Completions has no top_k surface (it's exposed only on the
|
|
2007
|
+
// Responses API for some reasoning models). Quietly accepting it would
|
|
2008
|
+
// mislead callers into thinking it took effect.
|
|
2009
|
+
if (options.topK !== undefined) {
|
|
2010
|
+
warnings.push({
|
|
2011
|
+
type: "unsupported-setting",
|
|
2012
|
+
provider: "openai",
|
|
2013
|
+
setting: "topK",
|
|
2014
|
+
details: "OpenAI Chat Completions does not expose top_k; the value was dropped.",
|
|
2015
|
+
});
|
|
2016
|
+
}
|
|
2017
|
+
|
|
2018
|
+
// Reasoning models (o1 / o3 / o4) reject sampling params outright. Emit
|
|
2019
|
+
// warnings at build time so callers see *why* the value didn't apply
|
|
2020
|
+
// rather than a 400 from the API.
|
|
2021
|
+
if (reasoningEnabled) {
|
|
2022
|
+
const dropped: Array<[keyof typeof options, string]> = [
|
|
2023
|
+
["temperature", "temperature"],
|
|
2024
|
+
["topP", "top_p"],
|
|
2025
|
+
["presencePenalty", "presence_penalty"],
|
|
2026
|
+
["frequencyPenalty", "frequency_penalty"],
|
|
2027
|
+
];
|
|
2028
|
+
for (const [key, openaiName] of dropped) {
|
|
2029
|
+
if (options[key] !== undefined) {
|
|
2030
|
+
warnings.push({
|
|
2031
|
+
type: "unsupported-setting",
|
|
2032
|
+
provider: "openai",
|
|
2033
|
+
setting: key,
|
|
2034
|
+
details:
|
|
2035
|
+
`Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
|
|
2036
|
+
});
|
|
2037
|
+
}
|
|
2038
|
+
}
|
|
2039
|
+
}
|
|
2040
|
+
|
|
1174
2041
|
const body: OpenAICompatibleChatRequest = {
|
|
1175
2042
|
model: modelId,
|
|
1176
2043
|
messages: toOpenAICompatibleMessages(options.prompt),
|
|
1177
2044
|
...(stream ? { stream: true, stream_options: { include_usage: true } } : {}),
|
|
1178
2045
|
...(options.maxOutputTokens !== undefined ? { max_tokens: options.maxOutputTokens } : {}),
|
|
1179
|
-
|
|
1180
|
-
|
|
2046
|
+
// OpenAI reasoning models reject temperature / top_p / frequency / presence.
|
|
2047
|
+
// Drop them silently rather than letting the API bounce the request.
|
|
2048
|
+
...(!reasoningEnabled && options.temperature !== undefined
|
|
2049
|
+
? { temperature: options.temperature }
|
|
2050
|
+
: {}),
|
|
2051
|
+
...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
|
|
1181
2052
|
...(options.stopSequences && options.stopSequences.length > 0
|
|
1182
2053
|
? { stop: options.stopSequences }
|
|
1183
2054
|
: {}),
|
|
@@ -1186,10 +2057,37 @@ function buildOpenAIChatRequest(
|
|
|
1186
2057
|
: {}),
|
|
1187
2058
|
...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
|
|
1188
2059
|
...(options.seed !== undefined ? { seed: options.seed } : {}),
|
|
1189
|
-
...(options.presencePenalty !== undefined
|
|
1190
|
-
|
|
2060
|
+
...(!reasoningEnabled && options.presencePenalty !== undefined
|
|
2061
|
+
? { presence_penalty: options.presencePenalty }
|
|
2062
|
+
: {}),
|
|
2063
|
+
...(!reasoningEnabled && options.frequencyPenalty !== undefined
|
|
1191
2064
|
? { frequency_penalty: options.frequencyPenalty }
|
|
1192
2065
|
: {}),
|
|
2066
|
+
...(reasoningEffort !== undefined ? { reasoning_effort: reasoningEffort } : {}),
|
|
2067
|
+
...(typeof options.userId === "string" && options.userId.length > 0
|
|
2068
|
+
? { user: options.userId }
|
|
2069
|
+
: {}),
|
|
2070
|
+
...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
|
|
2071
|
+
...(options.parallelToolCalls !== undefined
|
|
2072
|
+
? { parallel_tool_calls: options.parallelToolCalls }
|
|
2073
|
+
: {}),
|
|
2074
|
+
...(options.responseFormat && options.responseFormat.type !== "text"
|
|
2075
|
+
? {
|
|
2076
|
+
response_format: options.responseFormat.type === "json" ? { type: "json_object" } : {
|
|
2077
|
+
type: "json_schema",
|
|
2078
|
+
json_schema: {
|
|
2079
|
+
name: options.responseFormat.name,
|
|
2080
|
+
...(typeof options.responseFormat.description === "string"
|
|
2081
|
+
? { description: options.responseFormat.description }
|
|
2082
|
+
: {}),
|
|
2083
|
+
schema: unwrapToolInputSchema(options.responseFormat.schema),
|
|
2084
|
+
...(options.responseFormat.strict !== undefined
|
|
2085
|
+
? { strict: options.responseFormat.strict }
|
|
2086
|
+
: {}),
|
|
2087
|
+
},
|
|
2088
|
+
},
|
|
2089
|
+
}
|
|
2090
|
+
: {}),
|
|
1193
2091
|
};
|
|
1194
2092
|
|
|
1195
2093
|
Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
|
|
@@ -1216,9 +2114,7 @@ function normalizeGoogleFinishReason(
|
|
|
1216
2114
|
}
|
|
1217
2115
|
}
|
|
1218
2116
|
|
|
1219
|
-
function extractGoogleUsage(payload: unknown):
|
|
1220
|
-
| { inputTokens?: number; outputTokens?: number; totalTokens?: number }
|
|
1221
|
-
| undefined {
|
|
2117
|
+
function extractGoogleUsage(payload: unknown): RuntimeUsage | undefined {
|
|
1222
2118
|
const record = readRecord(payload);
|
|
1223
2119
|
const usage = readRecord(record?.usageMetadata);
|
|
1224
2120
|
if (!usage) {
|
|
@@ -1228,11 +2124,15 @@ function extractGoogleUsage(payload: unknown):
|
|
|
1228
2124
|
const inputTokens = usage.promptTokenCount;
|
|
1229
2125
|
const outputTokens = usage.candidatesTokenCount;
|
|
1230
2126
|
const totalTokens = usage.totalTokenCount;
|
|
2127
|
+
const cachedContentTokenCount = usage.cachedContentTokenCount;
|
|
1231
2128
|
|
|
1232
2129
|
return {
|
|
1233
2130
|
inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
|
|
1234
2131
|
outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
|
|
1235
2132
|
totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
|
|
2133
|
+
...(typeof cachedContentTokenCount === "number"
|
|
2134
|
+
? { cacheReadInputTokens: cachedContentTokenCount }
|
|
2135
|
+
: {}),
|
|
1236
2136
|
};
|
|
1237
2137
|
}
|
|
1238
2138
|
|
|
@@ -1258,20 +2158,29 @@ function toGoogleContents(
|
|
|
1258
2158
|
parts: [{ text: readTextParts(message.content) }],
|
|
1259
2159
|
});
|
|
1260
2160
|
break;
|
|
1261
|
-
case "assistant":
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
2161
|
+
case "assistant": {
|
|
2162
|
+
// Anthropic-only `reasoning` parts have no Gemini equivalent
|
|
2163
|
+
// and are dropped on replay.
|
|
2164
|
+
const parts: Array<Record<string, unknown>> = [];
|
|
2165
|
+
for (const part of message.content) {
|
|
2166
|
+
if (part.type === "text") {
|
|
2167
|
+
parts.push({ text: part.text });
|
|
2168
|
+
continue;
|
|
2169
|
+
}
|
|
2170
|
+
if (part.type === "reasoning") {
|
|
2171
|
+
continue;
|
|
2172
|
+
}
|
|
2173
|
+
parts.push({
|
|
2174
|
+
functionCall: {
|
|
2175
|
+
id: part.toolCallId,
|
|
2176
|
+
name: part.toolName,
|
|
2177
|
+
args: part.input,
|
|
2178
|
+
},
|
|
2179
|
+
});
|
|
2180
|
+
}
|
|
2181
|
+
contents.push({ role: "model", parts });
|
|
1274
2182
|
break;
|
|
2183
|
+
}
|
|
1275
2184
|
case "tool":
|
|
1276
2185
|
contents.push({
|
|
1277
2186
|
role: "user",
|
|
@@ -1299,22 +2208,45 @@ function toGoogleContents(
|
|
|
1299
2208
|
|
|
1300
2209
|
function toGoogleTools(
|
|
1301
2210
|
tools: RuntimeToolDefinition[] | undefined,
|
|
1302
|
-
):
|
|
2211
|
+
): Array<Record<string, unknown>> | undefined {
|
|
1303
2212
|
if (!tools) {
|
|
1304
2213
|
return undefined;
|
|
1305
2214
|
}
|
|
1306
2215
|
|
|
1307
|
-
const functionDeclarations =
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
2216
|
+
const functionDeclarations: Array<Record<string, unknown>> = [];
|
|
2217
|
+
const providerEntries: Array<Record<string, unknown>> = [];
|
|
2218
|
+
|
|
2219
|
+
for (const tool of tools) {
|
|
2220
|
+
if (tool.type === "function") {
|
|
2221
|
+
functionDeclarations.push({
|
|
2222
|
+
name: tool.name,
|
|
1311
2223
|
...(typeof tool.description === "string" ? { description: tool.description } : {}),
|
|
1312
2224
|
parameters: unwrapToolInputSchema(tool.inputSchema),
|
|
1313
|
-
}
|
|
1314
|
-
|
|
1315
|
-
|
|
2225
|
+
});
|
|
2226
|
+
continue;
|
|
2227
|
+
}
|
|
1316
2228
|
|
|
1317
|
-
|
|
2229
|
+
// Gemini provider tools — code_execution, google_search,
|
|
2230
|
+
// google_search_retrieval — each lives in its own tools[] entry
|
|
2231
|
+
// with a single key keyed by the camelCase tool name and an
|
|
2232
|
+
// optional config payload (caller-provided tool.args).
|
|
2233
|
+
if (!tool.id.startsWith("google.")) {
|
|
2234
|
+
continue;
|
|
2235
|
+
}
|
|
2236
|
+
const providerType = tool.id.slice("google.".length);
|
|
2237
|
+
if (providerType.length === 0) {
|
|
2238
|
+
continue;
|
|
2239
|
+
}
|
|
2240
|
+
const camelKey = providerType.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
|
|
2241
|
+
providerEntries.push({ [camelKey]: tool.args ?? {} });
|
|
2242
|
+
}
|
|
2243
|
+
|
|
2244
|
+
const result: Array<Record<string, unknown>> = [];
|
|
2245
|
+
if (functionDeclarations.length > 0) {
|
|
2246
|
+
result.push({ functionDeclarations });
|
|
2247
|
+
}
|
|
2248
|
+
result.push(...providerEntries);
|
|
2249
|
+
return result.length > 0 ? result : undefined;
|
|
1318
2250
|
}
|
|
1319
2251
|
|
|
1320
2252
|
function unwrapToolInputSchema(inputSchema: unknown): unknown {
|
|
@@ -1346,7 +2278,11 @@ function normalizeGoogleToolChoice(toolChoice: unknown):
|
|
|
1346
2278
|
}
|
|
1347
2279
|
|
|
1348
2280
|
const record = readRecord(toolChoice);
|
|
1349
|
-
if (record
|
|
2281
|
+
if (!record) return undefined;
|
|
2282
|
+
|
|
2283
|
+
// Single-tool restriction: { type: "tool", name } — pin to one
|
|
2284
|
+
// function via mode: ANY + allowedFunctionNames: [name].
|
|
2285
|
+
if (record.type === "tool" && typeof record.name === "string") {
|
|
1350
2286
|
return {
|
|
1351
2287
|
functionCallingConfig: {
|
|
1352
2288
|
mode: "ANY",
|
|
@@ -1355,12 +2291,74 @@ function normalizeGoogleToolChoice(toolChoice: unknown):
|
|
|
1355
2291
|
};
|
|
1356
2292
|
}
|
|
1357
2293
|
|
|
2294
|
+
// Multi-tool restriction: { type: "tools", names: string[] } — pin
|
|
2295
|
+
// to a subset via mode: ANY + the full allowedFunctionNames array.
|
|
2296
|
+
if (record.type === "tools" && Array.isArray(record.names)) {
|
|
2297
|
+
const names = record.names.filter((n): n is string => typeof n === "string");
|
|
2298
|
+
if (names.length > 0) {
|
|
2299
|
+
return {
|
|
2300
|
+
functionCallingConfig: {
|
|
2301
|
+
mode: "ANY",
|
|
2302
|
+
allowedFunctionNames: names,
|
|
2303
|
+
},
|
|
2304
|
+
};
|
|
2305
|
+
}
|
|
2306
|
+
}
|
|
2307
|
+
|
|
2308
|
+
// Explicit mode forms: { type: "auto" | "none" | "any" }.
|
|
2309
|
+
if (record.type === "auto") {
|
|
2310
|
+
return { functionCallingConfig: { mode: "AUTO" } };
|
|
2311
|
+
}
|
|
2312
|
+
if (record.type === "none") {
|
|
2313
|
+
return { functionCallingConfig: { mode: "NONE" } };
|
|
2314
|
+
}
|
|
2315
|
+
if (record.type === "any" || record.type === "required") {
|
|
2316
|
+
return { functionCallingConfig: { mode: "ANY" } };
|
|
2317
|
+
}
|
|
2318
|
+
|
|
1358
2319
|
return undefined;
|
|
1359
2320
|
}
|
|
1360
2321
|
|
|
2322
|
+
/**
|
|
2323
|
+
* Map the unified reasoning option to Gemini's thinkingConfig. Gemini 2.5+
|
|
2324
|
+
* accepts `includeThoughts: true` to stream back `thought` parts, and
|
|
2325
|
+
* `thinkingBudget: N` to cap the thinking token count. The effort levels
|
|
2326
|
+
* here follow Google's own guidance (low ~= 512, medium ~= 2048,
|
|
2327
|
+
* high ~= 8192, max = -1 means "dynamic/no cap").
|
|
2328
|
+
*/
|
|
2329
|
+
function resolveGoogleThinkingConfig(
|
|
2330
|
+
option: ProviderReasoningOption | undefined,
|
|
2331
|
+
): Record<string, unknown> | undefined {
|
|
2332
|
+
if (!option || option.enabled !== true) {
|
|
2333
|
+
return undefined;
|
|
2334
|
+
}
|
|
2335
|
+
const config: Record<string, unknown> = { includeThoughts: true };
|
|
2336
|
+
if (typeof option.budgetTokens === "number") {
|
|
2337
|
+
config.thinkingBudget = option.budgetTokens;
|
|
2338
|
+
return config;
|
|
2339
|
+
}
|
|
2340
|
+
switch (option.effort) {
|
|
2341
|
+
case "low":
|
|
2342
|
+
config.thinkingBudget = 512;
|
|
2343
|
+
break;
|
|
2344
|
+
case "high":
|
|
2345
|
+
config.thinkingBudget = 8192;
|
|
2346
|
+
break;
|
|
2347
|
+
case "max":
|
|
2348
|
+
config.thinkingBudget = -1;
|
|
2349
|
+
break;
|
|
2350
|
+
case "medium":
|
|
2351
|
+
default:
|
|
2352
|
+
config.thinkingBudget = 2048;
|
|
2353
|
+
break;
|
|
2354
|
+
}
|
|
2355
|
+
return config;
|
|
2356
|
+
}
|
|
2357
|
+
|
|
1361
2358
|
function buildGoogleGenerationConfig(
|
|
1362
2359
|
options: OpenAICompatibleLanguageOptions,
|
|
1363
2360
|
): Record<string, unknown> | undefined {
|
|
2361
|
+
const thinkingConfig = resolveGoogleThinkingConfig(options.reasoning);
|
|
1364
2362
|
const config: Record<string, unknown> = {
|
|
1365
2363
|
...(options.maxOutputTokens !== undefined ? { maxOutputTokens: options.maxOutputTokens } : {}),
|
|
1366
2364
|
...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
|
|
@@ -1370,6 +2368,7 @@ function buildGoogleGenerationConfig(
|
|
|
1370
2368
|
? { stopSequences: options.stopSequences }
|
|
1371
2369
|
: {}),
|
|
1372
2370
|
...(options.seed !== undefined ? { seed: options.seed } : {}),
|
|
2371
|
+
...(thinkingConfig ? { thinkingConfig } : {}),
|
|
1373
2372
|
};
|
|
1374
2373
|
|
|
1375
2374
|
return Object.keys(config).length > 0 ? config : undefined;
|
|
@@ -1378,8 +2377,47 @@ function buildGoogleGenerationConfig(
|
|
|
1378
2377
|
function buildGoogleGenerateContentRequest(
|
|
1379
2378
|
providerName: string,
|
|
1380
2379
|
options: OpenAICompatibleLanguageOptions,
|
|
2380
|
+
warnings: WarningCollector,
|
|
1381
2381
|
): GoogleCompatibleRequest {
|
|
2382
|
+
// Google generate-content surface doesn't accept presence/frequency
|
|
2383
|
+
// penalties on most current models. Emit warnings and let the request
|
|
2384
|
+
// through without them.
|
|
2385
|
+
if (options.presencePenalty !== undefined) {
|
|
2386
|
+
warnings.push({
|
|
2387
|
+
type: "unsupported-setting",
|
|
2388
|
+
provider: "google",
|
|
2389
|
+
setting: "presencePenalty",
|
|
2390
|
+
details: "Gemini generateContent does not accept presencePenalty; the value was dropped.",
|
|
2391
|
+
});
|
|
2392
|
+
}
|
|
2393
|
+
if (options.frequencyPenalty !== undefined) {
|
|
2394
|
+
warnings.push({
|
|
2395
|
+
type: "unsupported-setting",
|
|
2396
|
+
provider: "google",
|
|
2397
|
+
setting: "frequencyPenalty",
|
|
2398
|
+
details: "Gemini generateContent does not accept frequencyPenalty; the value was dropped.",
|
|
2399
|
+
});
|
|
2400
|
+
}
|
|
2401
|
+
if (options.responseFormat && options.responseFormat.type !== "text") {
|
|
2402
|
+
warnings.push({
|
|
2403
|
+
type: "unsupported-setting",
|
|
2404
|
+
provider: "google",
|
|
2405
|
+
setting: "responseFormat",
|
|
2406
|
+
details:
|
|
2407
|
+
"Gemini uses generationConfig.responseMimeType + responseSchema for structured outputs, which is a separate surface and not yet wired through this option.",
|
|
2408
|
+
});
|
|
2409
|
+
}
|
|
2410
|
+
|
|
1382
2411
|
const { systemInstruction, contents } = toGoogleContents(options.prompt);
|
|
2412
|
+
const generationConfig = buildGoogleGenerationConfig(options);
|
|
2413
|
+
// requestLabels wins over userId-derived labels: when callers explicitly
|
|
2414
|
+
// provide a label map, that's the source of truth. Otherwise fall back
|
|
2415
|
+
// to {user_id} derived from the unified userId option.
|
|
2416
|
+
const labels = options.requestLabels && Object.keys(options.requestLabels).length > 0
|
|
2417
|
+
? options.requestLabels
|
|
2418
|
+
: typeof options.userId === "string" && options.userId.length > 0
|
|
2419
|
+
? { user_id: options.userId }
|
|
2420
|
+
: undefined;
|
|
1383
2421
|
const body: GoogleCompatibleRequest = {
|
|
1384
2422
|
contents,
|
|
1385
2423
|
...(systemInstruction ? { systemInstruction } : {}),
|
|
@@ -1387,8 +2425,13 @@ function buildGoogleGenerateContentRequest(
|
|
|
1387
2425
|
...(normalizeGoogleToolChoice(options.toolChoice)
|
|
1388
2426
|
? { toolConfig: normalizeGoogleToolChoice(options.toolChoice) }
|
|
1389
2427
|
: {}),
|
|
1390
|
-
...(
|
|
1391
|
-
|
|
2428
|
+
...(generationConfig ? { generationConfig } : {}),
|
|
2429
|
+
...(labels ? { labels } : {}),
|
|
2430
|
+
...(typeof options.googleCachedContent === "string" && options.googleCachedContent.length > 0
|
|
2431
|
+
? { cachedContent: options.googleCachedContent }
|
|
2432
|
+
: {}),
|
|
2433
|
+
...(options.googleSafetySettings && options.googleSafetySettings.length > 0
|
|
2434
|
+
? { safetySettings: options.googleSafetySettings }
|
|
1392
2435
|
: {}),
|
|
1393
2436
|
};
|
|
1394
2437
|
|
|
@@ -1426,7 +2469,8 @@ function buildGoogleGenerateResult(payload: unknown): {
|
|
|
1426
2469
|
| { type: "tool-call"; toolCallId: string; toolName: string; input: string }
|
|
1427
2470
|
>;
|
|
1428
2471
|
finishReason?: string | { unified: string; raw: string } | null;
|
|
1429
|
-
usage?:
|
|
2472
|
+
usage?: RuntimeUsage;
|
|
2473
|
+
groundingMetadata?: Record<string, unknown>;
|
|
1430
2474
|
} {
|
|
1431
2475
|
const parts = extractGoogleCandidateParts(payload);
|
|
1432
2476
|
const content: Array<
|
|
@@ -1451,10 +2495,19 @@ function buildGoogleGenerateResult(payload: unknown): {
|
|
|
1451
2495
|
}
|
|
1452
2496
|
}
|
|
1453
2497
|
|
|
2498
|
+
// Gemini grounding (google_search / google_search_retrieval) returns
|
|
2499
|
+
// a per-candidate groundingMetadata object with web search queries,
|
|
2500
|
+
// grounding chunks, and citation indices into the response text.
|
|
2501
|
+
// Pass it through opaquely so callers can render footnotes / source
|
|
2502
|
+
// chips / "Search results" UI without parsing the wire shape.
|
|
2503
|
+
const candidate = extractFirstGoogleCandidate(payload);
|
|
2504
|
+
const groundingMetadata = readRecord(candidate?.groundingMetadata);
|
|
2505
|
+
|
|
1454
2506
|
return {
|
|
1455
2507
|
content,
|
|
1456
|
-
finishReason: normalizeGoogleFinishReason(
|
|
2508
|
+
finishReason: normalizeGoogleFinishReason(candidate?.finishReason),
|
|
1457
2509
|
usage: extractGoogleUsage(payload),
|
|
2510
|
+
...(groundingMetadata ? { groundingMetadata } : {}),
|
|
1458
2511
|
};
|
|
1459
2512
|
}
|
|
1460
2513
|
|
|
@@ -1467,7 +2520,7 @@ async function* streamGoogleCompatibleParts(
|
|
|
1467
2520
|
let reasoningId: string | null = null;
|
|
1468
2521
|
let reasoningIndex = 0;
|
|
1469
2522
|
let finishReason: string | { unified: string; raw: string } | null = null;
|
|
1470
|
-
let usage:
|
|
2523
|
+
let usage: RuntimeUsage | undefined;
|
|
1471
2524
|
|
|
1472
2525
|
for await (const chunk of stream) {
|
|
1473
2526
|
buffer += decoder.decode(chunk, { stream: true });
|
|
@@ -1599,7 +2652,7 @@ function buildOpenAIGenerateResult(payload: unknown): {
|
|
|
1599
2652
|
}
|
|
1600
2653
|
>;
|
|
1601
2654
|
finishReason?: string | { unified: string; raw: string } | null;
|
|
1602
|
-
usage?:
|
|
2655
|
+
usage?: RuntimeUsage;
|
|
1603
2656
|
} {
|
|
1604
2657
|
const choice = extractFirstChoice(payload);
|
|
1605
2658
|
const message = readRecord(choice?.message);
|
|
@@ -1630,7 +2683,7 @@ async function* streamOpenAICompatibleParts(
|
|
|
1630
2683
|
let reasoningId: string | null = null;
|
|
1631
2684
|
let reasoningIndex = 0;
|
|
1632
2685
|
let finishReason: string | { unified: string; raw: string } | null = null;
|
|
1633
|
-
let usage:
|
|
2686
|
+
let usage: RuntimeUsage | undefined;
|
|
1634
2687
|
|
|
1635
2688
|
for await (const chunk of stream) {
|
|
1636
2689
|
buffer += decoder.decode(chunk, { stream: true });
|
|
@@ -1788,11 +2841,19 @@ export function createOpenAIModelRuntime(
|
|
|
1788
2841
|
doGenerate(optionsForRuntime: unknown) {
|
|
1789
2842
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1790
2843
|
const url = getOpenAIChatCompletionsUrl(config.baseURL);
|
|
1791
|
-
const
|
|
2844
|
+
const warnings = createWarningCollector();
|
|
2845
|
+
const body = buildOpenAIChatRequest(
|
|
2846
|
+
modelId,
|
|
2847
|
+
config.name ?? "openai",
|
|
2848
|
+
options,
|
|
2849
|
+
false,
|
|
2850
|
+
warnings,
|
|
2851
|
+
);
|
|
1792
2852
|
return requestJson({
|
|
1793
2853
|
url,
|
|
1794
2854
|
fetchImpl,
|
|
1795
2855
|
providerLabel: config.name ?? "openai",
|
|
2856
|
+
providerKind: "openai",
|
|
1796
2857
|
init: {
|
|
1797
2858
|
method: "POST",
|
|
1798
2859
|
headers: createRequestHeaders({
|
|
@@ -1803,16 +2864,30 @@ export function createOpenAIModelRuntime(
|
|
|
1803
2864
|
body: JSON.stringify(body),
|
|
1804
2865
|
signal: options.abortSignal,
|
|
1805
2866
|
},
|
|
1806
|
-
}).then(
|
|
2867
|
+
}).then((payload) => {
|
|
2868
|
+
const drained = warnings.drain();
|
|
2869
|
+
return {
|
|
2870
|
+
...buildOpenAIGenerateResult(payload),
|
|
2871
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2872
|
+
};
|
|
2873
|
+
});
|
|
1807
2874
|
},
|
|
1808
2875
|
doStream(optionsForRuntime: unknown) {
|
|
1809
2876
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1810
2877
|
const url = getOpenAIChatCompletionsUrl(config.baseURL);
|
|
1811
|
-
const
|
|
2878
|
+
const warnings = createWarningCollector();
|
|
2879
|
+
const body = buildOpenAIChatRequest(
|
|
2880
|
+
modelId,
|
|
2881
|
+
config.name ?? "openai",
|
|
2882
|
+
options,
|
|
2883
|
+
true,
|
|
2884
|
+
warnings,
|
|
2885
|
+
);
|
|
1812
2886
|
return requestStream({
|
|
1813
2887
|
url,
|
|
1814
2888
|
fetchImpl,
|
|
1815
2889
|
providerLabel: config.name ?? "openai",
|
|
2890
|
+
providerKind: "openai",
|
|
1816
2891
|
init: {
|
|
1817
2892
|
method: "POST",
|
|
1818
2893
|
headers: createRequestHeaders({
|
|
@@ -1823,9 +2898,664 @@ export function createOpenAIModelRuntime(
|
|
|
1823
2898
|
body: JSON.stringify(body),
|
|
1824
2899
|
signal: options.abortSignal,
|
|
1825
2900
|
},
|
|
1826
|
-
}).then((responseStream) =>
|
|
1827
|
-
|
|
1828
|
-
|
|
2901
|
+
}).then((responseStream) => {
|
|
2902
|
+
const drained = warnings.drain();
|
|
2903
|
+
return {
|
|
2904
|
+
stream: ReadableStream.from(streamOpenAICompatibleParts(responseStream)),
|
|
2905
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2906
|
+
};
|
|
2907
|
+
});
|
|
2908
|
+
},
|
|
2909
|
+
};
|
|
2910
|
+
}
|
|
2911
|
+
|
|
2912
|
+
// =============================================================================
|
|
2913
|
+
// OpenAI Responses API runtime (#1077, deferred from #1052 C4)
|
|
2914
|
+
// =============================================================================
|
|
2915
|
+
//
|
|
2916
|
+
// The Responses API (/v1/responses) is a different surface than Chat
|
|
2917
|
+
// Completions. Same provider, different request shape, different streaming
|
|
2918
|
+
// event grammar, different response shape, and different reasoning-summary
|
|
2919
|
+
// surface. This runtime is parallel to createOpenAIModelRuntime so each
|
|
2920
|
+
// path stays focused on one wire format.
|
|
2921
|
+
//
|
|
2922
|
+
// Why parallel runtimes instead of a flag? See the rationale in #1077.
|
|
2923
|
+
//
|
|
2924
|
+
// docs: https://platform.openai.com/docs/api-reference/responses
|
|
2925
|
+
|
|
2926
|
+
type OpenAIResponsesInputItem = Record<string, unknown>;
|
|
2927
|
+
|
|
2928
|
+
type OpenAIResponsesRequest = {
|
|
2929
|
+
model: string;
|
|
2930
|
+
input: OpenAIResponsesInputItem[];
|
|
2931
|
+
instructions?: string;
|
|
2932
|
+
stream?: boolean;
|
|
2933
|
+
max_output_tokens?: number;
|
|
2934
|
+
temperature?: number;
|
|
2935
|
+
top_p?: number;
|
|
2936
|
+
tools?: Array<Record<string, unknown>>;
|
|
2937
|
+
tool_choice?: unknown;
|
|
2938
|
+
reasoning?: { effort?: string; summary?: string };
|
|
2939
|
+
metadata?: Record<string, string>;
|
|
2940
|
+
user?: string;
|
|
2941
|
+
service_tier?: string;
|
|
2942
|
+
parallel_tool_calls?: boolean;
|
|
2943
|
+
text?: { format: Record<string, unknown> };
|
|
2944
|
+
[key: string]: unknown;
|
|
2945
|
+
};
|
|
2946
|
+
|
|
2947
|
+
/**
|
|
2948
|
+
* Convert the unified RuntimePromptMessage[] to the Responses API `input`
|
|
2949
|
+
* array shape. Differences from Chat Completions:
|
|
2950
|
+
* - System prompts go on the top-level `instructions` field, not inline.
|
|
2951
|
+
* - Content parts use `input_text` / `output_text` discriminants instead
|
|
2952
|
+
* of the Chat Completions plain-text shorthand.
|
|
2953
|
+
* - Assistant tool calls become standalone `function_call` items in the
|
|
2954
|
+
* input array, not nested `tool_calls` on a message.
|
|
2955
|
+
* - Tool results become standalone `function_call_output` items.
|
|
2956
|
+
* - Reasoning content parts roundtrip as `reasoning` items so callers can
|
|
2957
|
+
* replay multi-turn conversations with chain-of-thought intact.
|
|
2958
|
+
*/
|
|
2959
|
+
function toOpenAIResponsesInput(
|
|
2960
|
+
prompt: RuntimePromptMessage[],
|
|
2961
|
+
): { instructions?: string; input: OpenAIResponsesInputItem[] } {
|
|
2962
|
+
const instructionsParts: string[] = [];
|
|
2963
|
+
const input: OpenAIResponsesInputItem[] = [];
|
|
2964
|
+
|
|
2965
|
+
for (const message of prompt) {
|
|
2966
|
+
switch (message.role) {
|
|
2967
|
+
case "system":
|
|
2968
|
+
if (message.content.length > 0) {
|
|
2969
|
+
instructionsParts.push(message.content);
|
|
2970
|
+
}
|
|
2971
|
+
break;
|
|
2972
|
+
case "user":
|
|
2973
|
+
input.push({
|
|
2974
|
+
role: "user",
|
|
2975
|
+
content: [{ type: "input_text", text: readTextParts(message.content) }],
|
|
2976
|
+
});
|
|
2977
|
+
break;
|
|
2978
|
+
case "assistant": {
|
|
2979
|
+
const messageContent: Array<Record<string, unknown>> = [];
|
|
2980
|
+
for (const part of message.content) {
|
|
2981
|
+
if (part.type === "text") {
|
|
2982
|
+
messageContent.push({ type: "output_text", text: part.text });
|
|
2983
|
+
continue;
|
|
2984
|
+
}
|
|
2985
|
+
if (part.type === "reasoning") {
|
|
2986
|
+
// Reasoning items are top-level entries in the input array,
|
|
2987
|
+
// not nested inside the assistant message — flush whatever
|
|
2988
|
+
// text we've accumulated first, then push the reasoning item.
|
|
2989
|
+
if (messageContent.length > 0) {
|
|
2990
|
+
input.push({ role: "assistant", content: [...messageContent] });
|
|
2991
|
+
messageContent.length = 0;
|
|
2992
|
+
}
|
|
2993
|
+
const summary: Array<Record<string, unknown>> = [];
|
|
2994
|
+
if (typeof part.text === "string" && part.text.length > 0) {
|
|
2995
|
+
summary.push({ type: "summary_text", text: part.text });
|
|
2996
|
+
}
|
|
2997
|
+
input.push({
|
|
2998
|
+
type: "reasoning",
|
|
2999
|
+
...(typeof part.signature === "string" ? { encrypted_content: part.signature } : {}),
|
|
3000
|
+
summary,
|
|
3001
|
+
});
|
|
3002
|
+
continue;
|
|
3003
|
+
}
|
|
3004
|
+
// tool-call: flush message content, then push as standalone
|
|
3005
|
+
// function_call item per Responses API shape.
|
|
3006
|
+
if (messageContent.length > 0) {
|
|
3007
|
+
input.push({ role: "assistant", content: [...messageContent] });
|
|
3008
|
+
messageContent.length = 0;
|
|
3009
|
+
}
|
|
3010
|
+
input.push({
|
|
3011
|
+
type: "function_call",
|
|
3012
|
+
call_id: part.toolCallId,
|
|
3013
|
+
name: part.toolName,
|
|
3014
|
+
arguments: stringifyJsonValue(part.input),
|
|
3015
|
+
});
|
|
3016
|
+
}
|
|
3017
|
+
if (messageContent.length > 0) {
|
|
3018
|
+
input.push({ role: "assistant", content: messageContent });
|
|
3019
|
+
}
|
|
3020
|
+
break;
|
|
3021
|
+
}
|
|
3022
|
+
case "tool":
|
|
3023
|
+
for (const part of message.content) {
|
|
3024
|
+
input.push({
|
|
3025
|
+
type: "function_call_output",
|
|
3026
|
+
call_id: part.toolCallId,
|
|
3027
|
+
output: stringifyJsonValue(part.output.value),
|
|
3028
|
+
});
|
|
3029
|
+
}
|
|
3030
|
+
break;
|
|
3031
|
+
}
|
|
3032
|
+
}
|
|
3033
|
+
|
|
3034
|
+
return {
|
|
3035
|
+
...(instructionsParts.length > 0 ? { instructions: instructionsParts.join("\n\n") } : {}),
|
|
3036
|
+
input,
|
|
3037
|
+
};
|
|
3038
|
+
}
|
|
3039
|
+
|
|
3040
|
+
/**
|
|
3041
|
+
* Tools on the Responses API differ from Chat Completions: instead of
|
|
3042
|
+
* `{ type: "function", function: { name, parameters } }` the function
|
|
3043
|
+
* shape lifts the name/parameters/strict to the top of the entry. Native
|
|
3044
|
+
* tools (web_search, file_search, computer_use, code_interpreter) live
|
|
3045
|
+
* alongside function tools in the same array.
|
|
3046
|
+
*/
|
|
3047
|
+
function toOpenAIResponsesTools(
|
|
3048
|
+
tools: RuntimeToolDefinition[] | undefined,
|
|
3049
|
+
): Array<Record<string, unknown>> | undefined {
|
|
3050
|
+
if (!tools) return undefined;
|
|
3051
|
+
const normalized: Array<Record<string, unknown>> = [];
|
|
3052
|
+
for (const tool of tools) {
|
|
3053
|
+
if (tool.type === "function") {
|
|
3054
|
+
normalized.push({
|
|
3055
|
+
type: "function",
|
|
3056
|
+
name: tool.name,
|
|
3057
|
+
...(typeof tool.description === "string" ? { description: tool.description } : {}),
|
|
3058
|
+
parameters: unwrapToolInputSchema(tool.inputSchema),
|
|
3059
|
+
});
|
|
3060
|
+
continue;
|
|
3061
|
+
}
|
|
3062
|
+
if (!tool.id.startsWith("openai.")) continue;
|
|
3063
|
+
const providerType = tool.id.slice("openai.".length);
|
|
3064
|
+
if (providerType.length === 0) continue;
|
|
3065
|
+
normalized.push({
|
|
3066
|
+
type: providerType,
|
|
3067
|
+
...toSnakeCaseRecord(tool.args),
|
|
3068
|
+
});
|
|
3069
|
+
}
|
|
3070
|
+
return normalized.length > 0 ? normalized : undefined;
|
|
3071
|
+
}
|
|
3072
|
+
|
|
3073
|
+
function buildOpenAIResponsesRequest(
|
|
3074
|
+
modelId: string,
|
|
3075
|
+
providerName: string,
|
|
3076
|
+
options: OpenAICompatibleLanguageOptions,
|
|
3077
|
+
stream: boolean,
|
|
3078
|
+
warnings: WarningCollector,
|
|
3079
|
+
): OpenAIResponsesRequest {
|
|
3080
|
+
const isReasoningModel = isOpenAIReasoningModel(modelId);
|
|
3081
|
+
const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
|
|
3082
|
+
const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
|
|
3083
|
+
|
|
3084
|
+
// Same param-sanitization rules as Chat Completions: reasoning models
|
|
3085
|
+
// reject sampling params. Drop with a warning.
|
|
3086
|
+
if (options.topK !== undefined) {
|
|
3087
|
+
warnings.push({
|
|
3088
|
+
type: "unsupported-setting",
|
|
3089
|
+
provider: "openai",
|
|
3090
|
+
setting: "topK",
|
|
3091
|
+
details: "OpenAI Responses API does not expose top_k; the value was dropped.",
|
|
3092
|
+
});
|
|
3093
|
+
}
|
|
3094
|
+
if (reasoningEnabled) {
|
|
3095
|
+
const dropped: Array<[keyof typeof options, string]> = [
|
|
3096
|
+
["temperature", "temperature"],
|
|
3097
|
+
["topP", "top_p"],
|
|
3098
|
+
["presencePenalty", "presence_penalty"],
|
|
3099
|
+
["frequencyPenalty", "frequency_penalty"],
|
|
3100
|
+
];
|
|
3101
|
+
for (const [key, openaiName] of dropped) {
|
|
3102
|
+
if (options[key] !== undefined) {
|
|
3103
|
+
warnings.push({
|
|
3104
|
+
type: "unsupported-setting",
|
|
3105
|
+
provider: "openai",
|
|
3106
|
+
setting: key,
|
|
3107
|
+
details:
|
|
3108
|
+
`Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
|
|
3109
|
+
});
|
|
3110
|
+
}
|
|
3111
|
+
}
|
|
3112
|
+
}
|
|
3113
|
+
|
|
3114
|
+
const { instructions, input } = toOpenAIResponsesInput(options.prompt);
|
|
3115
|
+
const responsesTools = toOpenAIResponsesTools(options.tools);
|
|
3116
|
+
|
|
3117
|
+
const body: OpenAIResponsesRequest = {
|
|
3118
|
+
model: modelId,
|
|
3119
|
+
input,
|
|
3120
|
+
...(instructions !== undefined ? { instructions } : {}),
|
|
3121
|
+
...(stream ? { stream: true } : {}),
|
|
3122
|
+
...(options.maxOutputTokens !== undefined
|
|
3123
|
+
? { max_output_tokens: options.maxOutputTokens }
|
|
3124
|
+
: {}),
|
|
3125
|
+
...(!reasoningEnabled && options.temperature !== undefined
|
|
3126
|
+
? { temperature: options.temperature }
|
|
3127
|
+
: {}),
|
|
3128
|
+
...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
|
|
3129
|
+
...(responsesTools ? { tools: responsesTools } : {}),
|
|
3130
|
+
...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
|
|
3131
|
+
// The Responses API surfaces reasoning effort + summary verbosity
|
|
3132
|
+
// in a structured `reasoning` object instead of a flat field. We
|
|
3133
|
+
// request "auto" summary so callers see structured summary parts
|
|
3134
|
+
// without having to opt into them per request.
|
|
3135
|
+
...(reasoningEffort !== undefined
|
|
3136
|
+
? { reasoning: { effort: reasoningEffort, summary: "auto" } }
|
|
3137
|
+
: {}),
|
|
3138
|
+
...(typeof options.userId === "string" && options.userId.length > 0
|
|
3139
|
+
? { user: options.userId }
|
|
3140
|
+
: {}),
|
|
3141
|
+
...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
|
|
3142
|
+
...(options.parallelToolCalls !== undefined
|
|
3143
|
+
? { parallel_tool_calls: options.parallelToolCalls }
|
|
3144
|
+
: {}),
|
|
3145
|
+
// Responses API uses `text.format` instead of Chat Completions'
|
|
3146
|
+
// `response_format`. The shape is similar but nested under `text`.
|
|
3147
|
+
...(options.responseFormat && options.responseFormat.type !== "text"
|
|
3148
|
+
? {
|
|
3149
|
+
text: {
|
|
3150
|
+
format: options.responseFormat.type === "json" ? { type: "json_object" } : {
|
|
3151
|
+
type: "json_schema",
|
|
3152
|
+
name: options.responseFormat.name,
|
|
3153
|
+
...(typeof options.responseFormat.description === "string"
|
|
3154
|
+
? { description: options.responseFormat.description }
|
|
3155
|
+
: {}),
|
|
3156
|
+
schema: unwrapToolInputSchema(options.responseFormat.schema),
|
|
3157
|
+
...(options.responseFormat.strict !== undefined
|
|
3158
|
+
? { strict: options.responseFormat.strict }
|
|
3159
|
+
: {}),
|
|
3160
|
+
},
|
|
3161
|
+
},
|
|
3162
|
+
}
|
|
3163
|
+
: {}),
|
|
3164
|
+
};
|
|
3165
|
+
|
|
3166
|
+
Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
|
|
3167
|
+
return body;
|
|
3168
|
+
}
|
|
3169
|
+
|
|
3170
|
+
/**
|
|
3171
|
+
* The Responses API uses `input_tokens` / `output_tokens` field names
|
|
3172
|
+
* instead of Chat Completions' `prompt_tokens` / `completion_tokens`.
|
|
3173
|
+
* It also nests cached input tokens under `input_tokens_details` and
|
|
3174
|
+
* exposes reasoning tokens via `output_tokens_details.reasoning_tokens`.
|
|
3175
|
+
*/
|
|
3176
|
+
function extractOpenAIResponsesUsage(payload: unknown): RuntimeUsage | undefined {
|
|
3177
|
+
const record = readRecord(payload);
|
|
3178
|
+
// Streaming usage lives on response.completed inside `response.usage`;
|
|
3179
|
+
// non-streaming has it at the top level.
|
|
3180
|
+
const responseRecord = readRecord(record?.response);
|
|
3181
|
+
const usage = readRecord(responseRecord?.usage) ?? readRecord(record?.usage);
|
|
3182
|
+
if (!usage) return undefined;
|
|
3183
|
+
|
|
3184
|
+
const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : undefined;
|
|
3185
|
+
const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : undefined;
|
|
3186
|
+
const totalTokens = typeof usage.total_tokens === "number"
|
|
3187
|
+
? usage.total_tokens
|
|
3188
|
+
: (inputTokens !== undefined || outputTokens !== undefined
|
|
3189
|
+
? (inputTokens ?? 0) + (outputTokens ?? 0)
|
|
3190
|
+
: undefined);
|
|
3191
|
+
const inputDetails = readRecord(usage.input_tokens_details);
|
|
3192
|
+
const cachedTokens = inputDetails?.cached_tokens;
|
|
3193
|
+
|
|
3194
|
+
return {
|
|
3195
|
+
inputTokens,
|
|
3196
|
+
outputTokens,
|
|
3197
|
+
totalTokens,
|
|
3198
|
+
...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
|
|
3199
|
+
};
|
|
3200
|
+
}
|
|
3201
|
+
|
|
3202
|
+
function normalizeOpenAIResponsesFinishReason(
|
|
3203
|
+
raw: unknown,
|
|
3204
|
+
): string | { unified: string; raw: string } | null {
|
|
3205
|
+
if (typeof raw !== "string") return null;
|
|
3206
|
+
switch (raw) {
|
|
3207
|
+
case "completed":
|
|
3208
|
+
return { unified: "stop", raw };
|
|
3209
|
+
case "incomplete":
|
|
3210
|
+
return { unified: "length", raw };
|
|
3211
|
+
case "failed":
|
|
3212
|
+
return { unified: "error", raw };
|
|
3213
|
+
case "in_progress":
|
|
3214
|
+
return null;
|
|
3215
|
+
default:
|
|
3216
|
+
return raw;
|
|
3217
|
+
}
|
|
3218
|
+
}
|
|
3219
|
+
|
|
3220
|
+
type OpenAIResponsesContentPart =
|
|
3221
|
+
| { type: "text"; text: string }
|
|
3222
|
+
| {
|
|
3223
|
+
type: "reasoning";
|
|
3224
|
+
summaries?: Array<{ id?: string; text: string }>;
|
|
3225
|
+
signature?: string;
|
|
3226
|
+
}
|
|
3227
|
+
| { type: "tool-call"; toolCallId: string; toolName: string; input: string };
|
|
3228
|
+
|
|
3229
|
+
function buildOpenAIResponsesGenerateResult(payload: unknown): {
|
|
3230
|
+
content: OpenAIResponsesContentPart[];
|
|
3231
|
+
finishReason?: string | { unified: string; raw: string } | null;
|
|
3232
|
+
usage?: RuntimeUsage;
|
|
3233
|
+
} {
|
|
3234
|
+
const record = readRecord(payload);
|
|
3235
|
+
const output = Array.isArray(record?.output) ? record.output : [];
|
|
3236
|
+
const content: OpenAIResponsesContentPart[] = [];
|
|
3237
|
+
|
|
3238
|
+
for (const item of output) {
|
|
3239
|
+
const itemRecord = readRecord(item);
|
|
3240
|
+
const itemType = typeof itemRecord?.type === "string" ? itemRecord.type : undefined;
|
|
3241
|
+
|
|
3242
|
+
if (itemType === "message" && Array.isArray(itemRecord?.content)) {
|
|
3243
|
+
// A message item bundles one or more output_text parts. Concat
|
|
3244
|
+
// their texts into a single text content entry.
|
|
3245
|
+
let text = "";
|
|
3246
|
+
for (const part of itemRecord.content) {
|
|
3247
|
+
const p = readRecord(part);
|
|
3248
|
+
if (typeof p?.type === "string" && p.type === "output_text" && typeof p.text === "string") {
|
|
3249
|
+
text += p.text;
|
|
3250
|
+
}
|
|
3251
|
+
}
|
|
3252
|
+
if (text.length > 0) {
|
|
3253
|
+
content.push({ type: "text", text });
|
|
3254
|
+
}
|
|
3255
|
+
continue;
|
|
3256
|
+
}
|
|
3257
|
+
|
|
3258
|
+
if (itemType === "function_call") {
|
|
3259
|
+
content.push({
|
|
3260
|
+
type: "tool-call",
|
|
3261
|
+
toolCallId: typeof itemRecord?.call_id === "string"
|
|
3262
|
+
? itemRecord.call_id
|
|
3263
|
+
: (typeof itemRecord?.id === "string" ? itemRecord.id : ""),
|
|
3264
|
+
toolName: typeof itemRecord?.name === "string" ? itemRecord.name : "",
|
|
3265
|
+
input: typeof itemRecord?.arguments === "string"
|
|
3266
|
+
? itemRecord.arguments
|
|
3267
|
+
: stringifyJsonValue(itemRecord?.arguments ?? {}),
|
|
3268
|
+
});
|
|
3269
|
+
continue;
|
|
3270
|
+
}
|
|
3271
|
+
|
|
3272
|
+
if (itemType === "reasoning") {
|
|
3273
|
+
const summary = Array.isArray(itemRecord?.summary) ? itemRecord.summary : [];
|
|
3274
|
+
const summaries: Array<{ id?: string; text: string }> = [];
|
|
3275
|
+
for (const s of summary) {
|
|
3276
|
+
const sr = readRecord(s);
|
|
3277
|
+
if (typeof sr?.text === "string" && sr.text.length > 0) {
|
|
3278
|
+
summaries.push({
|
|
3279
|
+
...(typeof sr?.id === "string" ? { id: sr.id } : {}),
|
|
3280
|
+
text: sr.text,
|
|
3281
|
+
});
|
|
3282
|
+
}
|
|
3283
|
+
}
|
|
3284
|
+
content.push({
|
|
3285
|
+
type: "reasoning",
|
|
3286
|
+
...(summaries.length > 0 ? { summaries } : {}),
|
|
3287
|
+
...(typeof itemRecord?.encrypted_content === "string"
|
|
3288
|
+
? { signature: itemRecord.encrypted_content }
|
|
3289
|
+
: {}),
|
|
3290
|
+
});
|
|
3291
|
+
continue;
|
|
3292
|
+
}
|
|
3293
|
+
}
|
|
3294
|
+
|
|
3295
|
+
return {
|
|
3296
|
+
content,
|
|
3297
|
+
finishReason: normalizeOpenAIResponsesFinishReason(record?.status),
|
|
3298
|
+
usage: extractOpenAIResponsesUsage(payload),
|
|
3299
|
+
};
|
|
3300
|
+
}
|
|
3301
|
+
|
|
3302
|
+
type OpenAIResponsesStreamReasoningState = {
|
|
3303
|
+
id: string;
|
|
3304
|
+
emittedStart: boolean;
|
|
3305
|
+
};
|
|
3306
|
+
|
|
3307
|
+
type OpenAIResponsesStreamFunctionCallState = {
|
|
3308
|
+
id: string;
|
|
3309
|
+
toolCallId: string;
|
|
3310
|
+
name: string;
|
|
3311
|
+
arguments: string;
|
|
3312
|
+
};
|
|
3313
|
+
|
|
3314
|
+
/**
|
|
3315
|
+
* Parse the Responses API streaming event grammar into the same UI part
|
|
3316
|
+
* shapes the existing OpenAI / Anthropic / Google streams emit. The
|
|
3317
|
+
* Responses API uses a strict event-typed protocol — every event has a
|
|
3318
|
+
* `type` field naming the lifecycle phase — instead of the loose
|
|
3319
|
+
* `delta`-based shape Chat Completions uses.
|
|
3320
|
+
*/
|
|
3321
|
+
async function* streamOpenAIResponsesParts(
|
|
3322
|
+
stream: ReadableStream<Uint8Array>,
|
|
3323
|
+
): AsyncIterable<unknown> {
|
|
3324
|
+
const decoder = new TextDecoder();
|
|
3325
|
+
let buffer = "";
|
|
3326
|
+
const reasoningBlocks = new Map<string, OpenAIResponsesStreamReasoningState>();
|
|
3327
|
+
const functionCalls = new Map<string, OpenAIResponsesStreamFunctionCallState>();
|
|
3328
|
+
const startedToolCalls = new Set<string>();
|
|
3329
|
+
let finishReason: string | { unified: string; raw: string } | null = null;
|
|
3330
|
+
let usage: RuntimeUsage | undefined;
|
|
3331
|
+
let reasoningCounter = 0;
|
|
3332
|
+
|
|
3333
|
+
for await (const chunk of stream) {
|
|
3334
|
+
buffer += decoder.decode(chunk, { stream: true });
|
|
3335
|
+
const parsed = parseSseChunk(buffer);
|
|
3336
|
+
buffer = parsed.remainder;
|
|
3337
|
+
|
|
3338
|
+
for (const event of parsed.events) {
|
|
3339
|
+
if (event === "[DONE]") continue;
|
|
3340
|
+
const record = readRecord(event);
|
|
3341
|
+
const type = typeof record?.type === "string" ? record.type : undefined;
|
|
3342
|
+
if (!type) continue;
|
|
3343
|
+
|
|
3344
|
+
// response.output_item.added: a new output item begins. Track
|
|
3345
|
+
// function_call items so their argument deltas can be attributed,
|
|
3346
|
+
// and reasoning items so summary deltas can group correctly.
|
|
3347
|
+
if (type === "response.output_item.added") {
|
|
3348
|
+
const item = readRecord(record?.item);
|
|
3349
|
+
const itemType = typeof item?.type === "string" ? item.type : undefined;
|
|
3350
|
+
const itemId = typeof item?.id === "string" ? item.id : undefined;
|
|
3351
|
+
if (itemType === "function_call" && itemId) {
|
|
3352
|
+
const callId = typeof item?.call_id === "string" ? item.call_id : itemId;
|
|
3353
|
+
const name = typeof item?.name === "string" ? item.name : "";
|
|
3354
|
+
functionCalls.set(itemId, {
|
|
3355
|
+
id: itemId,
|
|
3356
|
+
toolCallId: callId,
|
|
3357
|
+
name,
|
|
3358
|
+
arguments: "",
|
|
3359
|
+
});
|
|
3360
|
+
}
|
|
3361
|
+
if (itemType === "reasoning" && itemId) {
|
|
3362
|
+
reasoningBlocks.set(itemId, {
|
|
3363
|
+
id: `reasoning-${reasoningCounter++}`,
|
|
3364
|
+
emittedStart: false,
|
|
3365
|
+
});
|
|
3366
|
+
}
|
|
3367
|
+
continue;
|
|
3368
|
+
}
|
|
3369
|
+
|
|
3370
|
+
// response.output_text.delta: text chunk for a message item.
|
|
3371
|
+
if (type === "response.output_text.delta" && typeof record?.delta === "string") {
|
|
3372
|
+
if (record.delta.length > 0) {
|
|
3373
|
+
yield { type: "text-delta", delta: record.delta };
|
|
3374
|
+
}
|
|
3375
|
+
continue;
|
|
3376
|
+
}
|
|
3377
|
+
|
|
3378
|
+
// response.reasoning_summary_text.delta: reasoning summary text
|
|
3379
|
+
// chunk. The first delta on an item lazily emits the
|
|
3380
|
+
// reasoning-start event so callers can group deltas into a part.
|
|
3381
|
+
if (type === "response.reasoning_summary_text.delta" && typeof record?.delta === "string") {
|
|
3382
|
+
const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
|
|
3383
|
+
const state = itemId ? reasoningBlocks.get(itemId) : undefined;
|
|
3384
|
+
if (state && record.delta.length > 0) {
|
|
3385
|
+
if (!state.emittedStart) {
|
|
3386
|
+
yield { type: "reasoning-start", id: state.id };
|
|
3387
|
+
state.emittedStart = true;
|
|
3388
|
+
}
|
|
3389
|
+
yield { type: "reasoning-delta", id: state.id, delta: record.delta };
|
|
3390
|
+
}
|
|
3391
|
+
continue;
|
|
3392
|
+
}
|
|
3393
|
+
|
|
3394
|
+
// response.function_call_arguments.delta: tool call argument
|
|
3395
|
+
// chunk. The first delta lazily emits tool-input-start.
|
|
3396
|
+
if (type === "response.function_call_arguments.delta" && typeof record?.delta === "string") {
|
|
3397
|
+
const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
|
|
3398
|
+
const state = itemId ? functionCalls.get(itemId) : undefined;
|
|
3399
|
+
if (state && record.delta.length > 0) {
|
|
3400
|
+
if (!startedToolCalls.has(state.id)) {
|
|
3401
|
+
yield {
|
|
3402
|
+
type: "tool-input-start",
|
|
3403
|
+
id: state.toolCallId,
|
|
3404
|
+
toolName: state.name,
|
|
3405
|
+
};
|
|
3406
|
+
startedToolCalls.add(state.id);
|
|
3407
|
+
}
|
|
3408
|
+
state.arguments += record.delta;
|
|
3409
|
+
yield {
|
|
3410
|
+
type: "tool-input-delta",
|
|
3411
|
+
id: state.toolCallId,
|
|
3412
|
+
delta: record.delta,
|
|
3413
|
+
};
|
|
3414
|
+
}
|
|
3415
|
+
continue;
|
|
3416
|
+
}
|
|
3417
|
+
|
|
3418
|
+
// response.output_item.done: an item has finished emitting deltas.
|
|
3419
|
+
// Close any reasoning or function-call streams that were open.
|
|
3420
|
+
if (type === "response.output_item.done") {
|
|
3421
|
+
const item = readRecord(record?.item);
|
|
3422
|
+
const itemType = typeof item?.type === "string" ? item.type : undefined;
|
|
3423
|
+
const itemId = typeof item?.id === "string" ? item.id : undefined;
|
|
3424
|
+
if (itemType === "reasoning" && itemId) {
|
|
3425
|
+
const state = reasoningBlocks.get(itemId);
|
|
3426
|
+
if (state?.emittedStart) {
|
|
3427
|
+
yield { type: "reasoning-end", id: state.id };
|
|
3428
|
+
}
|
|
3429
|
+
reasoningBlocks.delete(itemId);
|
|
3430
|
+
}
|
|
3431
|
+
if (itemType === "function_call" && itemId) {
|
|
3432
|
+
const state = functionCalls.get(itemId);
|
|
3433
|
+
if (state) {
|
|
3434
|
+
yield {
|
|
3435
|
+
type: "tool-call",
|
|
3436
|
+
toolCallId: state.toolCallId,
|
|
3437
|
+
toolName: state.name,
|
|
3438
|
+
input: state.arguments,
|
|
3439
|
+
};
|
|
3440
|
+
}
|
|
3441
|
+
functionCalls.delete(itemId);
|
|
3442
|
+
}
|
|
3443
|
+
continue;
|
|
3444
|
+
}
|
|
3445
|
+
|
|
3446
|
+
// response.completed: terminal event with the final response object
|
|
3447
|
+
// (status + usage). Capture both for the final finish part.
|
|
3448
|
+
if (type === "response.completed") {
|
|
3449
|
+
usage = extractOpenAIResponsesUsage(record) ?? usage;
|
|
3450
|
+
const responseRecord = readRecord(record?.response);
|
|
3451
|
+
finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status);
|
|
3452
|
+
continue;
|
|
3453
|
+
}
|
|
3454
|
+
|
|
3455
|
+
if (type === "response.failed" || type === "response.incomplete") {
|
|
3456
|
+
const responseRecord = readRecord(record?.response);
|
|
3457
|
+
finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status) ??
|
|
3458
|
+
(type === "response.failed"
|
|
3459
|
+
? { unified: "error", raw: "failed" }
|
|
3460
|
+
: { unified: "length", raw: "incomplete" });
|
|
3461
|
+
usage = extractOpenAIResponsesUsage(record) ?? usage;
|
|
3462
|
+
continue;
|
|
3463
|
+
}
|
|
3464
|
+
}
|
|
3465
|
+
}
|
|
3466
|
+
|
|
3467
|
+
// Close any reasoning streams still open at end-of-stream (defensive
|
|
3468
|
+
// — a clean Responses API stream always closes them via output_item.done).
|
|
3469
|
+
for (const state of reasoningBlocks.values()) {
|
|
3470
|
+
if (state.emittedStart) {
|
|
3471
|
+
yield { type: "reasoning-end", id: state.id };
|
|
3472
|
+
}
|
|
3473
|
+
}
|
|
3474
|
+
|
|
3475
|
+
yield {
|
|
3476
|
+
type: "finish",
|
|
3477
|
+
finishReason,
|
|
3478
|
+
...(usage ? { usage } : {}),
|
|
3479
|
+
};
|
|
3480
|
+
}
|
|
3481
|
+
|
|
3482
|
+
export function createOpenAIResponsesRuntime(
|
|
3483
|
+
config: OpenAIRuntimeConfig,
|
|
3484
|
+
modelId: string,
|
|
3485
|
+
): ModelRuntime {
|
|
3486
|
+
const fetchImpl = config.fetch ?? globalThis.fetch;
|
|
3487
|
+
return {
|
|
3488
|
+
provider: config.name ?? "openai",
|
|
3489
|
+
modelId,
|
|
3490
|
+
specificationVersion: "v3",
|
|
3491
|
+
supportedUrls: {},
|
|
3492
|
+
doGenerate(optionsForRuntime: unknown) {
|
|
3493
|
+
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
3494
|
+
const url = getOpenAIResponsesUrl(config.baseURL);
|
|
3495
|
+
const warnings = createWarningCollector();
|
|
3496
|
+
const body = buildOpenAIResponsesRequest(
|
|
3497
|
+
modelId,
|
|
3498
|
+
config.name ?? "openai",
|
|
3499
|
+
options,
|
|
3500
|
+
false,
|
|
3501
|
+
warnings,
|
|
3502
|
+
);
|
|
3503
|
+
return requestJson({
|
|
3504
|
+
url,
|
|
3505
|
+
fetchImpl,
|
|
3506
|
+
providerLabel: config.name ?? "openai",
|
|
3507
|
+
providerKind: "openai",
|
|
3508
|
+
init: {
|
|
3509
|
+
method: "POST",
|
|
3510
|
+
headers: createRequestHeaders({
|
|
3511
|
+
apiKeyHeaderName: "authorization",
|
|
3512
|
+
apiKey: `Bearer ${config.apiKey}`,
|
|
3513
|
+
extraHeaders: options.headers,
|
|
3514
|
+
}),
|
|
3515
|
+
body: JSON.stringify(body),
|
|
3516
|
+
signal: options.abortSignal,
|
|
3517
|
+
},
|
|
3518
|
+
}).then((payload) => {
|
|
3519
|
+
const drained = warnings.drain();
|
|
3520
|
+
return {
|
|
3521
|
+
...buildOpenAIResponsesGenerateResult(payload),
|
|
3522
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
3523
|
+
};
|
|
3524
|
+
});
|
|
3525
|
+
},
|
|
3526
|
+
doStream(optionsForRuntime: unknown) {
|
|
3527
|
+
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
3528
|
+
const url = getOpenAIResponsesUrl(config.baseURL);
|
|
3529
|
+
const warnings = createWarningCollector();
|
|
3530
|
+
const body = buildOpenAIResponsesRequest(
|
|
3531
|
+
modelId,
|
|
3532
|
+
config.name ?? "openai",
|
|
3533
|
+
options,
|
|
3534
|
+
true,
|
|
3535
|
+
warnings,
|
|
3536
|
+
);
|
|
3537
|
+
return requestStream({
|
|
3538
|
+
url,
|
|
3539
|
+
fetchImpl,
|
|
3540
|
+
providerLabel: config.name ?? "openai",
|
|
3541
|
+
providerKind: "openai",
|
|
3542
|
+
init: {
|
|
3543
|
+
method: "POST",
|
|
3544
|
+
headers: createRequestHeaders({
|
|
3545
|
+
apiKeyHeaderName: "authorization",
|
|
3546
|
+
apiKey: `Bearer ${config.apiKey}`,
|
|
3547
|
+
extraHeaders: options.headers,
|
|
3548
|
+
}),
|
|
3549
|
+
body: JSON.stringify(body),
|
|
3550
|
+
signal: options.abortSignal,
|
|
3551
|
+
},
|
|
3552
|
+
}).then((responseStream) => {
|
|
3553
|
+
const drained = warnings.drain();
|
|
3554
|
+
return {
|
|
3555
|
+
stream: ReadableStream.from(streamOpenAIResponsesParts(responseStream)),
|
|
3556
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
3557
|
+
};
|
|
3558
|
+
});
|
|
1829
3559
|
},
|
|
1830
3560
|
};
|
|
1831
3561
|
}
|
|
@@ -1843,16 +3573,19 @@ export function createAnthropicModelRuntime(
|
|
|
1843
3573
|
doGenerate(optionsForRuntime: unknown) {
|
|
1844
3574
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1845
3575
|
const url = getAnthropicMessagesUrl(config.baseURL);
|
|
3576
|
+
const warnings = createWarningCollector();
|
|
1846
3577
|
const body = buildAnthropicMessagesRequest(
|
|
1847
3578
|
modelId,
|
|
1848
3579
|
config.name ?? "anthropic",
|
|
1849
3580
|
options,
|
|
1850
3581
|
false,
|
|
3582
|
+
warnings,
|
|
1851
3583
|
);
|
|
1852
3584
|
return requestJson({
|
|
1853
3585
|
url,
|
|
1854
3586
|
fetchImpl,
|
|
1855
3587
|
providerLabel: config.name ?? "anthropic",
|
|
3588
|
+
providerKind: "anthropic",
|
|
1856
3589
|
init: {
|
|
1857
3590
|
method: "POST",
|
|
1858
3591
|
headers: createAnthropicRequestHeaders({
|
|
@@ -1863,21 +3596,30 @@ export function createAnthropicModelRuntime(
|
|
|
1863
3596
|
body: JSON.stringify(body),
|
|
1864
3597
|
signal: options.abortSignal,
|
|
1865
3598
|
},
|
|
1866
|
-
}).then(
|
|
3599
|
+
}).then((payload) => {
|
|
3600
|
+
const drained = warnings.drain();
|
|
3601
|
+
return {
|
|
3602
|
+
...buildAnthropicGenerateResult(payload),
|
|
3603
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
3604
|
+
};
|
|
3605
|
+
});
|
|
1867
3606
|
},
|
|
1868
3607
|
doStream(optionsForRuntime: unknown) {
|
|
1869
3608
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1870
3609
|
const url = getAnthropicMessagesUrl(config.baseURL);
|
|
3610
|
+
const warnings = createWarningCollector();
|
|
1871
3611
|
const body = buildAnthropicMessagesRequest(
|
|
1872
3612
|
modelId,
|
|
1873
3613
|
config.name ?? "anthropic",
|
|
1874
3614
|
options,
|
|
1875
3615
|
true,
|
|
3616
|
+
warnings,
|
|
1876
3617
|
);
|
|
1877
3618
|
return requestStream({
|
|
1878
3619
|
url,
|
|
1879
3620
|
fetchImpl,
|
|
1880
3621
|
providerLabel: config.name ?? "anthropic",
|
|
3622
|
+
providerKind: "anthropic",
|
|
1881
3623
|
init: {
|
|
1882
3624
|
method: "POST",
|
|
1883
3625
|
headers: createAnthropicRequestHeaders({
|
|
@@ -1888,9 +3630,13 @@ export function createAnthropicModelRuntime(
|
|
|
1888
3630
|
body: JSON.stringify(body),
|
|
1889
3631
|
signal: options.abortSignal,
|
|
1890
3632
|
},
|
|
1891
|
-
}).then((responseStream) =>
|
|
1892
|
-
|
|
1893
|
-
|
|
3633
|
+
}).then((responseStream) => {
|
|
3634
|
+
const drained = warnings.drain();
|
|
3635
|
+
return {
|
|
3636
|
+
stream: ReadableStream.from(streamAnthropicCompatibleParts(responseStream)),
|
|
3637
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
3638
|
+
};
|
|
3639
|
+
});
|
|
1894
3640
|
},
|
|
1895
3641
|
};
|
|
1896
3642
|
}
|
|
@@ -1908,11 +3654,17 @@ export function createGoogleModelRuntime(
|
|
|
1908
3654
|
doGenerate(optionsForRuntime: unknown) {
|
|
1909
3655
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1910
3656
|
const url = getGoogleGenerateContentUrl(config.baseURL, modelId);
|
|
1911
|
-
const
|
|
3657
|
+
const warnings = createWarningCollector();
|
|
3658
|
+
const body = buildGoogleGenerateContentRequest(
|
|
3659
|
+
config.name ?? "google",
|
|
3660
|
+
options,
|
|
3661
|
+
warnings,
|
|
3662
|
+
);
|
|
1912
3663
|
return requestJson({
|
|
1913
3664
|
url,
|
|
1914
3665
|
fetchImpl,
|
|
1915
3666
|
providerLabel: config.name ?? "google",
|
|
3667
|
+
providerKind: "google",
|
|
1916
3668
|
init: {
|
|
1917
3669
|
method: "POST",
|
|
1918
3670
|
headers: createRequestHeaders({
|
|
@@ -1923,16 +3675,28 @@ export function createGoogleModelRuntime(
|
|
|
1923
3675
|
body: JSON.stringify(body),
|
|
1924
3676
|
signal: options.abortSignal,
|
|
1925
3677
|
},
|
|
1926
|
-
}).then(
|
|
3678
|
+
}).then((payload) => {
|
|
3679
|
+
const drained = warnings.drain();
|
|
3680
|
+
return {
|
|
3681
|
+
...buildGoogleGenerateResult(payload),
|
|
3682
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
3683
|
+
};
|
|
3684
|
+
});
|
|
1927
3685
|
},
|
|
1928
3686
|
doStream(optionsForRuntime: unknown) {
|
|
1929
3687
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1930
3688
|
const url = getGoogleStreamGenerateContentUrl(config.baseURL, modelId);
|
|
1931
|
-
const
|
|
3689
|
+
const warnings = createWarningCollector();
|
|
3690
|
+
const body = buildGoogleGenerateContentRequest(
|
|
3691
|
+
config.name ?? "google",
|
|
3692
|
+
options,
|
|
3693
|
+
warnings,
|
|
3694
|
+
);
|
|
1932
3695
|
return requestStream({
|
|
1933
3696
|
url,
|
|
1934
3697
|
fetchImpl,
|
|
1935
3698
|
providerLabel: config.name ?? "google",
|
|
3699
|
+
providerKind: "google",
|
|
1936
3700
|
init: {
|
|
1937
3701
|
method: "POST",
|
|
1938
3702
|
headers: createRequestHeaders({
|
|
@@ -1943,9 +3707,13 @@ export function createGoogleModelRuntime(
|
|
|
1943
3707
|
body: JSON.stringify(body),
|
|
1944
3708
|
signal: options.abortSignal,
|
|
1945
3709
|
},
|
|
1946
|
-
}).then((responseStream) =>
|
|
1947
|
-
|
|
1948
|
-
|
|
3710
|
+
}).then((responseStream) => {
|
|
3711
|
+
const drained = warnings.drain();
|
|
3712
|
+
return {
|
|
3713
|
+
stream: ReadableStream.from(streamGoogleCompatibleParts(responseStream)),
|
|
3714
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
3715
|
+
};
|
|
3716
|
+
});
|
|
1949
3717
|
},
|
|
1950
3718
|
};
|
|
1951
3719
|
}
|
|
@@ -1973,6 +3741,7 @@ export function createOpenAIEmbeddingRuntime(
|
|
|
1973
3741
|
url,
|
|
1974
3742
|
fetchImpl,
|
|
1975
3743
|
providerLabel: config.name ?? "openai",
|
|
3744
|
+
providerKind: "openai",
|
|
1976
3745
|
init: {
|
|
1977
3746
|
method: "POST",
|
|
1978
3747
|
headers: {
|
|
@@ -2021,6 +3790,7 @@ export function createGoogleEmbeddingRuntime(
|
|
|
2021
3790
|
url,
|
|
2022
3791
|
fetchImpl,
|
|
2023
3792
|
providerLabel: config.name ?? "google",
|
|
3793
|
+
providerKind: "google",
|
|
2024
3794
|
init: {
|
|
2025
3795
|
method: "POST",
|
|
2026
3796
|
headers: {
|