veryfront 0.1.207 → 0.1.208
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/deno.js +1 -1
- package/esm/src/provider/runtime-loader.d.ts +46 -0
- package/esm/src/provider/runtime-loader.d.ts.map +1 -1
- package/esm/src/provider/runtime-loader.js +847 -70
- package/esm/src/provider/types.d.ts +2 -0
- package/esm/src/provider/types.d.ts.map +1 -1
- package/esm/src/utils/version-constant.d.ts +1 -1
- package/esm/src/utils/version-constant.js +1 -1
- package/package.json +1 -1
- package/src/deno.js +1 -1
- package/src/src/provider/runtime-loader.ts +1216 -101
- package/src/src/provider/types.ts +2 -0
- package/src/src/utils/version-constant.ts +1 -1
|
@@ -36,6 +36,18 @@ type RuntimePromptMessage =
|
|
|
36
36
|
input: unknown;
|
|
37
37
|
providerExecuted?: boolean;
|
|
38
38
|
}
|
|
39
|
+
| {
|
|
40
|
+
// Anthropic thinking block replay. Carries the original signed
|
|
41
|
+
// thinking trace so that on the next turn Anthropic can verify
|
|
42
|
+
// the signature and let Claude continue reasoning from the same
|
|
43
|
+
// point. `text` + `signature` are the normal pair for an
|
|
44
|
+
// un-redacted thinking block; `redactedData` is set instead of
|
|
45
|
+
// both when Anthropic returned an encrypted opaque payload.
|
|
46
|
+
type: "reasoning";
|
|
47
|
+
text?: string;
|
|
48
|
+
signature?: string;
|
|
49
|
+
redactedData?: string;
|
|
50
|
+
}
|
|
39
51
|
>;
|
|
40
52
|
}
|
|
41
53
|
| {
|
|
@@ -60,6 +72,67 @@ type RuntimeToolDefinition =
|
|
|
60
72
|
id: `${string}.${string}`;
|
|
61
73
|
args: Record<string, unknown>;
|
|
62
74
|
};
|
|
75
|
+
/**
|
|
76
|
+
* TTL for a single prompt-cache breakpoint.
|
|
77
|
+
*
|
|
78
|
+
* `true` and `"5m"` both map to Anthropic's default ephemeral (5-minute) cache.
|
|
79
|
+
* `"1h"` maps to the extended 1-hour cache at a 2x write cost. Callers can
|
|
80
|
+
* pick per breakpoint target.
|
|
81
|
+
*/
|
|
82
|
+
type ProviderCacheTtl = boolean | "5m" | "1h";
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Per-provider prompt / context caching controls.
|
|
86
|
+
*
|
|
87
|
+
* For Anthropic, flipping these on emits `cache_control: { type: "ephemeral" }`
|
|
88
|
+
* breakpoints on the assembled system prompt and/or the last tool definition
|
|
89
|
+
* sent to the Messages API, enabling Anthropic's explicit prompt cache.
|
|
90
|
+
*
|
|
91
|
+
* OpenAI's prompt cache is automatic on gpt-4o+ and has no request-side
|
|
92
|
+
* directive to emit, so this option is a no-op for the OpenAI runtime. Google
|
|
93
|
+
* uses a separate `cachedContent` resource model that is intentionally not
|
|
94
|
+
* covered by this option (it belongs on a dedicated Gemini-specific surface).
|
|
95
|
+
*/
|
|
96
|
+
type ProviderCacheControlOption = {
|
|
97
|
+
/**
|
|
98
|
+
* Attach a cache breakpoint to the final system-prompt text block.
|
|
99
|
+
* Use when the system prompt is large and reused across requests.
|
|
100
|
+
*/
|
|
101
|
+
system?: ProviderCacheTtl;
|
|
102
|
+
/**
|
|
103
|
+
* Attach a cache breakpoint to the last tool definition in `tools`.
|
|
104
|
+
* Use when the tool schemas are large and identical across requests.
|
|
105
|
+
*/
|
|
106
|
+
tools?: ProviderCacheTtl;
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Unified effort level for extended reasoning / thinking. Maps to
|
|
111
|
+
* per-provider knobs: Anthropic `thinking.budget_tokens`, OpenAI
|
|
112
|
+
* `reasoning_effort`, Gemini `thinkingConfig.thinkingBudget`.
|
|
113
|
+
*/
|
|
114
|
+
type ProviderReasoningEffort = "low" | "medium" | "high" | "max";
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Unified reasoning / thinking request option.
|
|
118
|
+
*
|
|
119
|
+
* Setting `enabled: true` turns on extended thinking on providers that
|
|
120
|
+
* support it (Anthropic Claude 4.x, OpenAI o-series, Gemini 2.5+). The
|
|
121
|
+
* `effort` field picks a coarse budget; when `budgetTokens` is set it
|
|
122
|
+
* wins for providers that take a numeric budget (Anthropic, Gemini).
|
|
123
|
+
*
|
|
124
|
+
* Providers that do not support reasoning treat this as a no-op. On
|
|
125
|
+
* Anthropic + OpenAI, enabling reasoning also disables sampling params
|
|
126
|
+
* that the providers reject in combination (`temperature`, `topP`,
|
|
127
|
+
* `topK`, `presencePenalty`, `frequencyPenalty`) — silently dropping
|
|
128
|
+
* them rather than failing the request.
|
|
129
|
+
*/
|
|
130
|
+
type ProviderReasoningOption = {
|
|
131
|
+
enabled?: boolean;
|
|
132
|
+
effort?: ProviderReasoningEffort;
|
|
133
|
+
budgetTokens?: number;
|
|
134
|
+
};
|
|
135
|
+
|
|
63
136
|
type OpenAICompatibleLanguageOptions = {
|
|
64
137
|
prompt: RuntimePromptMessage[];
|
|
65
138
|
maxOutputTokens?: number;
|
|
@@ -76,6 +149,128 @@ type OpenAICompatibleLanguageOptions = {
|
|
|
76
149
|
providerOptions?: Record<string, unknown>;
|
|
77
150
|
includeRawChunks?: boolean;
|
|
78
151
|
abortSignal?: AbortSignal;
|
|
152
|
+
/**
|
|
153
|
+
* Per-provider prompt / context caching controls. See
|
|
154
|
+
* {@link ProviderCacheControlOption}. When unset, caching behaviour is
|
|
155
|
+
* unchanged on every provider.
|
|
156
|
+
*/
|
|
157
|
+
cacheControl?: ProviderCacheControlOption;
|
|
158
|
+
/**
|
|
159
|
+
* Enable extended reasoning / thinking on providers that support it.
|
|
160
|
+
* See {@link ProviderReasoningOption}. When unset, reasoning behaviour
|
|
161
|
+
* is unchanged on every provider.
|
|
162
|
+
*/
|
|
163
|
+
reasoning?: ProviderReasoningOption;
|
|
164
|
+
/**
|
|
165
|
+
* Stable per-user identifier for rate-limiting, abuse detection, and
|
|
166
|
+
* billing attribution. Maps to:
|
|
167
|
+
* - Anthropic: `metadata.user_id`
|
|
168
|
+
* - OpenAI: `user`
|
|
169
|
+
* - Google: `labels.user_id` (when {@link requestLabels} is unset)
|
|
170
|
+
*/
|
|
171
|
+
userId?: string;
|
|
172
|
+
/**
|
|
173
|
+
* Provider-specific label map for Google Gemini's `labels` field.
|
|
174
|
+
* Anthropic and OpenAI don't have an arbitrary-label equivalent, so
|
|
175
|
+
* this is intentionally Google-only. When unset, no labels are sent.
|
|
176
|
+
*/
|
|
177
|
+
requestLabels?: Record<string, string>;
|
|
178
|
+
/**
|
|
179
|
+
* OpenAI-specific. Maps to the `service_tier` field on Chat Completions
|
|
180
|
+
* which trades latency for cost. Documented values:
|
|
181
|
+
*
|
|
182
|
+
* - `default` — standard processing (default if unset)
|
|
183
|
+
* - `flex` — lower-priority queue, lower per-token cost, longer
|
|
184
|
+
* expected latency. Useful for batchy or non-interactive workloads.
|
|
185
|
+
* - `scale` — reserved-capacity tier with strict latency SLOs.
|
|
186
|
+
* - `auto` — let OpenAI pick.
|
|
187
|
+
*
|
|
188
|
+
* Forwarded verbatim. Anthropic and Google have no equivalent and
|
|
189
|
+
* the field is silently omitted on those providers.
|
|
190
|
+
*/
|
|
191
|
+
serviceTier?: "auto" | "default" | "flex" | "scale";
|
|
192
|
+
/**
|
|
193
|
+
* OpenAI-specific. When `false`, OpenAI runs tool calls sequentially
|
|
194
|
+
* instead of in parallel. Useful for ordered side effects where
|
|
195
|
+
* concurrent calls would race. Default behaviour (unset) is parallel.
|
|
196
|
+
*/
|
|
197
|
+
parallelToolCalls?: boolean;
|
|
198
|
+
/**
|
|
199
|
+
* Structured-output response format. Maps to OpenAI's `response_format`
|
|
200
|
+
* field on Chat Completions (and Responses). Three variants:
|
|
201
|
+
*
|
|
202
|
+
* - `{ type: "text" }` — the default (no constraint).
|
|
203
|
+
* - `{ type: "json" }` — emits OpenAI's `response_format:
|
|
204
|
+
* { type: "json_object" }` to force the model to return valid JSON.
|
|
205
|
+
* - `{ type: "json_schema", name, schema, strict? }` — emits
|
|
206
|
+
* OpenAI's `response_format: { type: "json_schema", json_schema: {
|
|
207
|
+
* name, schema, strict } }` for fully constrained structured
|
|
208
|
+
* outputs (gpt-4o-2024-08-06+).
|
|
209
|
+
*
|
|
210
|
+
* On Anthropic and Google this option emits an "unsupported-setting"
|
|
211
|
+
* warning when set to anything other than `text` (those providers
|
|
212
|
+
* have their own structured-output surfaces and need a dedicated
|
|
213
|
+
* follow-up to wire them in).
|
|
214
|
+
*/
|
|
215
|
+
responseFormat?:
|
|
216
|
+
| { type: "text" }
|
|
217
|
+
| { type: "json" }
|
|
218
|
+
| {
|
|
219
|
+
type: "json_schema";
|
|
220
|
+
name: string;
|
|
221
|
+
schema: unknown;
|
|
222
|
+
description?: string;
|
|
223
|
+
strict?: boolean;
|
|
224
|
+
};
|
|
225
|
+
/**
|
|
226
|
+
* Anthropic-specific. `container` field for programmatic tool calling
|
|
227
|
+
* and agent skills. Anthropic uses this to scope a session to a
|
|
228
|
+
* sandboxed container (e.g. for Computer Use, code execution
|
|
229
|
+
* sandboxes, or skills loaded from a container). Forwarded verbatim.
|
|
230
|
+
*
|
|
231
|
+
* The shape varies — string container id or a structured object
|
|
232
|
+
* depending on the feature. Caller passes whatever Anthropic's docs
|
|
233
|
+
* specify for the target feature.
|
|
234
|
+
*/
|
|
235
|
+
anthropicContainer?: unknown;
|
|
236
|
+
/**
|
|
237
|
+
* Google-specific. Reference to a previously-created Gemini cached
|
|
238
|
+
* content resource (created via the separate caches API) to attach
|
|
239
|
+
* to this request. Resource name format:
|
|
240
|
+
* `cachedContents/<id>`. See https://ai.google.dev/gemini-api/docs/caching.
|
|
241
|
+
*
|
|
242
|
+
* Cache creation itself is out of scope for the runtime — callers
|
|
243
|
+
* use the Gemini REST API or SDK to create the cache, then pass the
|
|
244
|
+
* resource name here on each subsequent generate call to attach the
|
|
245
|
+
* cached prefix and avoid re-paying for it.
|
|
246
|
+
*/
|
|
247
|
+
googleCachedContent?: string;
|
|
248
|
+
/**
|
|
249
|
+
* Google-specific. Per-request safety filter configuration for
|
|
250
|
+
* Gemini. Each entry pairs a HARM_CATEGORY_* with a threshold
|
|
251
|
+
* (BLOCK_NONE / BLOCK_LOW_AND_ABOVE / BLOCK_MEDIUM_AND_ABOVE /
|
|
252
|
+
* BLOCK_ONLY_HIGH). Forwarded verbatim as the `safetySettings`
|
|
253
|
+
* field. See https://ai.google.dev/gemini-api/docs/safety-settings.
|
|
254
|
+
*/
|
|
255
|
+
googleSafetySettings?: Array<{
|
|
256
|
+
category: string;
|
|
257
|
+
threshold: string;
|
|
258
|
+
}>;
|
|
259
|
+
/**
|
|
260
|
+
* Anthropic-specific. Native MCP server definitions to pass directly
|
|
261
|
+
* on the Messages API request body. Lets callers register MCP servers
|
|
262
|
+
* server-side instead of reloading them into local function tools.
|
|
263
|
+
*
|
|
264
|
+
* Caller must opt into the MCP beta by adding the matching header to
|
|
265
|
+
* `headers`, e.g. `{ "anthropic-beta": "mcp-client-2025-04-04" }`.
|
|
266
|
+
* Without that header Anthropic will reject the request.
|
|
267
|
+
*
|
|
268
|
+
* Each entry is forwarded with camelCase keys converted to snake_case
|
|
269
|
+
* so `authorizationToken` → `authorization_token`,
|
|
270
|
+
* `toolConfiguration.allowedTools` → `tool_configuration.allowed_tools`,
|
|
271
|
+
* etc.
|
|
272
|
+
*/
|
|
273
|
+
mcpServers?: Array<Record<string, unknown>>;
|
|
79
274
|
};
|
|
80
275
|
type OpenAICompatibleChatMessage =
|
|
81
276
|
| { role: "system"; content: string }
|
|
@@ -142,7 +337,12 @@ type AnthropicCompatibleRequest = {
|
|
|
142
337
|
messages: AnthropicCompatibleMessage[];
|
|
143
338
|
max_tokens: number;
|
|
144
339
|
stream?: boolean;
|
|
145
|
-
|
|
340
|
+
/**
|
|
341
|
+
* String form is the classic shorthand. Array-of-blocks form is required
|
|
342
|
+
* when the system prompt carries a cache_control breakpoint, because
|
|
343
|
+
* cache_control lives on an individual content block, not on a raw string.
|
|
344
|
+
*/
|
|
345
|
+
system?: string | Array<Record<string, unknown>>;
|
|
146
346
|
temperature?: number;
|
|
147
347
|
top_p?: number;
|
|
148
348
|
stop_sequences?: string[];
|
|
@@ -168,9 +368,7 @@ type GoogleCompatibleRequest = {
|
|
|
168
368
|
systemInstruction?: {
|
|
169
369
|
parts: Array<{ text: string }>;
|
|
170
370
|
};
|
|
171
|
-
tools?: Array<
|
|
172
|
-
functionDeclarations: Array<Record<string, unknown>>;
|
|
173
|
-
}>;
|
|
371
|
+
tools?: Array<Record<string, unknown>>;
|
|
174
372
|
toolConfig?: {
|
|
175
373
|
functionCallingConfig: Record<string, unknown>;
|
|
176
374
|
};
|
|
@@ -287,9 +485,203 @@ function extractGoogleUsageTokens(payload: unknown): number | undefined {
|
|
|
287
485
|
return typeof promptTokenCount === "number" ? promptTokenCount : undefined;
|
|
288
486
|
}
|
|
289
487
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
488
|
+
type ProviderKind = "anthropic" | "openai" | "google";
|
|
489
|
+
|
|
490
|
+
/**
|
|
491
|
+
* Structured warning emitted when a provider runtime drops or rewrites a
|
|
492
|
+
* caller-provided option. Mirrors the AI ecosystem convention (Vercel AI
|
|
493
|
+
* SDK, LangChain) of returning `unsupported-setting` warnings on the
|
|
494
|
+
* runtime result so callers can discover silently-dropped fields without
|
|
495
|
+
* having to read the source.
|
|
496
|
+
*/
|
|
497
|
+
export type ProviderWarning = {
|
|
498
|
+
type: "unsupported-setting" | "other";
|
|
499
|
+
setting?: string;
|
|
500
|
+
details?: string;
|
|
501
|
+
provider: ProviderKind;
|
|
502
|
+
};
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Mutable warning collector handed to per-provider request builders so
|
|
506
|
+
* they can append entries during the build pass instead of plumbing a
|
|
507
|
+
* return-tuple shape through every helper.
|
|
508
|
+
*/
|
|
509
|
+
type WarningCollector = {
|
|
510
|
+
push(warning: ProviderWarning): void;
|
|
511
|
+
drain(): ProviderWarning[];
|
|
512
|
+
};
|
|
513
|
+
|
|
514
|
+
function createWarningCollector(): WarningCollector {
|
|
515
|
+
const list: ProviderWarning[] = [];
|
|
516
|
+
return {
|
|
517
|
+
push(warning) {
|
|
518
|
+
list.push(warning);
|
|
519
|
+
},
|
|
520
|
+
drain() {
|
|
521
|
+
return list.slice();
|
|
522
|
+
},
|
|
523
|
+
};
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
/**
|
|
527
|
+
* Base class for typed provider errors. The `retryable` flag is the
|
|
528
|
+
* primary signal for callers (or a retry wrapper) to decide whether to
|
|
529
|
+
* re-issue the request. `retryAfterMs` is set when the provider gave an
|
|
530
|
+
* explicit delay hint (Retry-After header, Retry-Info trailer).
|
|
531
|
+
*/
|
|
532
|
+
export class ProviderError extends Error {
|
|
533
|
+
readonly provider: ProviderKind;
|
|
534
|
+
readonly status: number;
|
|
535
|
+
readonly retryable: boolean;
|
|
536
|
+
readonly retryAfterMs?: number;
|
|
537
|
+
|
|
538
|
+
constructor(options: {
|
|
539
|
+
provider: ProviderKind;
|
|
540
|
+
status: number;
|
|
541
|
+
message: string;
|
|
542
|
+
retryable: boolean;
|
|
543
|
+
retryAfterMs?: number;
|
|
544
|
+
}) {
|
|
545
|
+
super(options.message);
|
|
546
|
+
this.name = new.target.name;
|
|
547
|
+
this.provider = options.provider;
|
|
548
|
+
this.status = options.status;
|
|
549
|
+
this.retryable = options.retryable;
|
|
550
|
+
if (options.retryAfterMs !== undefined) {
|
|
551
|
+
this.retryAfterMs = options.retryAfterMs;
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
/** Provider reports it is overloaded (Anthropic 529, OpenAI/Google 503). */
|
|
557
|
+
export class ProviderOverloadedError extends ProviderError {}
|
|
558
|
+
|
|
559
|
+
/** Provider is rate limiting this API key (OpenAI/Google 429 with Retry-After). */
|
|
560
|
+
export class ProviderRateLimitError extends ProviderError {}
|
|
561
|
+
|
|
562
|
+
/** Provider account quota is exhausted — non-retryable. */
|
|
563
|
+
export class ProviderQuotaError extends ProviderError {}
|
|
564
|
+
|
|
565
|
+
/** Non-retryable 4xx/5xx that doesn't fit another bucket. */
|
|
566
|
+
export class ProviderRequestError extends ProviderError {}
|
|
567
|
+
|
|
568
|
+
function parseRetryAfterMs(header: string | null): number | undefined {
|
|
569
|
+
if (!header) return undefined;
|
|
570
|
+
const asNumber = Number(header);
|
|
571
|
+
if (Number.isFinite(asNumber) && asNumber >= 0) {
|
|
572
|
+
return Math.round(asNumber * 1000);
|
|
573
|
+
}
|
|
574
|
+
// HTTP-date form (rare in practice for LLM providers).
|
|
575
|
+
const parsed = Date.parse(header);
|
|
576
|
+
if (!Number.isNaN(parsed)) {
|
|
577
|
+
return Math.max(0, parsed - Date.now());
|
|
578
|
+
}
|
|
579
|
+
return undefined;
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
/**
|
|
583
|
+
* Inspect a non-2xx response and build the most specific ProviderError
|
|
584
|
+
* subclass we can. Reads the response body as text (it's already dead
|
|
585
|
+
* on the wire by this point). Body classification handles the cases
|
|
586
|
+
* where HTTP status alone is ambiguous — notably OpenAI
|
|
587
|
+
* `insufficient_quota` vs `rate_limit_exceeded` both arriving as 429.
|
|
588
|
+
*/
|
|
589
|
+
async function buildProviderError(
|
|
590
|
+
provider: ProviderKind,
|
|
591
|
+
response: Response,
|
|
592
|
+
): Promise<ProviderError> {
|
|
593
|
+
const rawBody = await response.text();
|
|
594
|
+
const message = rawBody.trim() || `${response.status} ${response.statusText}`.trim();
|
|
595
|
+
const status = response.status;
|
|
596
|
+
const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
|
|
597
|
+
|
|
598
|
+
const parsedBody = (() => {
|
|
599
|
+
try {
|
|
600
|
+
return JSON.parse(rawBody) as Record<string, unknown>;
|
|
601
|
+
} catch {
|
|
602
|
+
return undefined;
|
|
603
|
+
}
|
|
604
|
+
})();
|
|
605
|
+
const errorRecord = readRecord(parsedBody?.error);
|
|
606
|
+
const errorCode = typeof errorRecord?.code === "string"
|
|
607
|
+
? errorRecord.code
|
|
608
|
+
: typeof errorRecord?.type === "string"
|
|
609
|
+
? errorRecord.type
|
|
610
|
+
: typeof errorRecord?.status === "string"
|
|
611
|
+
? errorRecord.status
|
|
612
|
+
: undefined;
|
|
613
|
+
|
|
614
|
+
// Anthropic 529 = overloaded. Anthropic surfaces this with
|
|
615
|
+
// { error: { type: "overloaded_error" } } in the body.
|
|
616
|
+
if (provider === "anthropic" && status === 529) {
|
|
617
|
+
return new ProviderOverloadedError({
|
|
618
|
+
provider,
|
|
619
|
+
status,
|
|
620
|
+
message,
|
|
621
|
+
retryable: true,
|
|
622
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
623
|
+
});
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// OpenAI / Google 503 = overloaded.
|
|
627
|
+
if ((provider === "openai" || provider === "google") && status === 503) {
|
|
628
|
+
return new ProviderOverloadedError({
|
|
629
|
+
provider,
|
|
630
|
+
status,
|
|
631
|
+
message,
|
|
632
|
+
retryable: true,
|
|
633
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// OpenAI 429 splits based on the error code in the body:
|
|
638
|
+
// - insufficient_quota → hard quota, non-retryable
|
|
639
|
+
// - rate_limit_exceeded / tokens_per_min_exceeded → retry with Retry-After
|
|
640
|
+
if (provider === "openai" && status === 429) {
|
|
641
|
+
if (errorCode === "insufficient_quota") {
|
|
642
|
+
return new ProviderQuotaError({
|
|
643
|
+
provider,
|
|
644
|
+
status,
|
|
645
|
+
message,
|
|
646
|
+
retryable: false,
|
|
647
|
+
});
|
|
648
|
+
}
|
|
649
|
+
return new ProviderRateLimitError({
|
|
650
|
+
provider,
|
|
651
|
+
status,
|
|
652
|
+
message,
|
|
653
|
+
retryable: true,
|
|
654
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
655
|
+
});
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
// Google 429 RESOURCE_EXHAUSTED is almost always the daily free-tier
|
|
659
|
+
// quota — surface as a hard quota error so callers don't hot-loop on
|
|
660
|
+
// retries that can't possibly succeed until midnight UTC.
|
|
661
|
+
if (provider === "google" && status === 429) {
|
|
662
|
+
if (errorCode === "RESOURCE_EXHAUSTED") {
|
|
663
|
+
return new ProviderQuotaError({
|
|
664
|
+
provider,
|
|
665
|
+
status,
|
|
666
|
+
message,
|
|
667
|
+
retryable: false,
|
|
668
|
+
});
|
|
669
|
+
}
|
|
670
|
+
return new ProviderRateLimitError({
|
|
671
|
+
provider,
|
|
672
|
+
status,
|
|
673
|
+
message,
|
|
674
|
+
retryable: true,
|
|
675
|
+
...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
return new ProviderRequestError({
|
|
680
|
+
provider,
|
|
681
|
+
status,
|
|
682
|
+
message,
|
|
683
|
+
retryable: false,
|
|
684
|
+
});
|
|
293
685
|
}
|
|
294
686
|
|
|
295
687
|
async function requestJson(options: {
|
|
@@ -297,11 +689,13 @@ async function requestJson(options: {
|
|
|
297
689
|
fetchImpl: typeof globalThis.fetch;
|
|
298
690
|
init: RequestInit;
|
|
299
691
|
providerLabel: string;
|
|
692
|
+
providerKind: ProviderKind;
|
|
300
693
|
}): Promise<unknown> {
|
|
301
694
|
const response = await options.fetchImpl(options.url, options.init);
|
|
302
695
|
if (!response.ok) {
|
|
303
|
-
const
|
|
304
|
-
|
|
696
|
+
const err = await buildProviderError(options.providerKind, response);
|
|
697
|
+
err.message = `${options.providerLabel} request failed: ${err.message}`;
|
|
698
|
+
throw err;
|
|
305
699
|
}
|
|
306
700
|
|
|
307
701
|
return response.json();
|
|
@@ -312,15 +706,22 @@ async function requestStream(options: {
|
|
|
312
706
|
fetchImpl: typeof globalThis.fetch;
|
|
313
707
|
init: RequestInit;
|
|
314
708
|
providerLabel: string;
|
|
709
|
+
providerKind: ProviderKind;
|
|
315
710
|
}): Promise<ReadableStream<Uint8Array>> {
|
|
316
711
|
const response = await options.fetchImpl(options.url, options.init);
|
|
317
712
|
if (!response.ok) {
|
|
318
|
-
const
|
|
319
|
-
|
|
713
|
+
const err = await buildProviderError(options.providerKind, response);
|
|
714
|
+
err.message = `${options.providerLabel} request failed: ${err.message}`;
|
|
715
|
+
throw err;
|
|
320
716
|
}
|
|
321
717
|
|
|
322
718
|
if (!response.body) {
|
|
323
|
-
throw new
|
|
719
|
+
throw new ProviderRequestError({
|
|
720
|
+
provider: options.providerKind,
|
|
721
|
+
status: response.status,
|
|
722
|
+
message: `${options.providerLabel} request failed: stream body missing`,
|
|
723
|
+
retryable: false,
|
|
724
|
+
});
|
|
324
725
|
}
|
|
325
726
|
|
|
326
727
|
return response.body;
|
|
@@ -366,6 +767,11 @@ function toOpenAICompatibleMessages(prompt: RuntimePromptMessage[]): OpenAICompa
|
|
|
366
767
|
text += part.text;
|
|
367
768
|
continue;
|
|
368
769
|
}
|
|
770
|
+
// OpenAI Chat Completions has no roundtrip slot for Anthropic
|
|
771
|
+
// thinking blocks — they get dropped on replay. Anthropic-only.
|
|
772
|
+
if (part.type === "reasoning") {
|
|
773
|
+
continue;
|
|
774
|
+
}
|
|
369
775
|
|
|
370
776
|
toolCalls.push({
|
|
371
777
|
id: part.toolCallId,
|
|
@@ -473,9 +879,15 @@ function normalizeAnthropicFinishReason(
|
|
|
473
879
|
}
|
|
474
880
|
}
|
|
475
881
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
882
|
+
type RuntimeUsage = {
|
|
883
|
+
inputTokens?: number;
|
|
884
|
+
outputTokens?: number;
|
|
885
|
+
totalTokens?: number;
|
|
886
|
+
cacheCreationInputTokens?: number;
|
|
887
|
+
cacheReadInputTokens?: number;
|
|
888
|
+
};
|
|
889
|
+
|
|
890
|
+
function extractAnthropicUsage(payload: unknown): RuntimeUsage | undefined {
|
|
479
891
|
const record = readRecord(payload);
|
|
480
892
|
const usage = readRecord(record?.usage);
|
|
481
893
|
if (!usage) {
|
|
@@ -484,6 +896,8 @@ function extractAnthropicUsage(payload: unknown):
|
|
|
484
896
|
|
|
485
897
|
const inputTokens = usage.input_tokens;
|
|
486
898
|
const outputTokens = usage.output_tokens;
|
|
899
|
+
const cacheCreationInputTokens = usage.cache_creation_input_tokens;
|
|
900
|
+
const cacheReadInputTokens = usage.cache_read_input_tokens;
|
|
487
901
|
|
|
488
902
|
return {
|
|
489
903
|
inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
|
|
@@ -492,17 +906,15 @@ function extractAnthropicUsage(payload: unknown):
|
|
|
492
906
|
? (typeof inputTokens === "number" ? inputTokens : 0) +
|
|
493
907
|
(typeof outputTokens === "number" ? outputTokens : 0)
|
|
494
908
|
: undefined,
|
|
909
|
+
...(typeof cacheCreationInputTokens === "number" ? { cacheCreationInputTokens } : {}),
|
|
910
|
+
...(typeof cacheReadInputTokens === "number" ? { cacheReadInputTokens } : {}),
|
|
495
911
|
};
|
|
496
912
|
}
|
|
497
913
|
|
|
498
914
|
function mergeUsage(
|
|
499
|
-
current:
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
next:
|
|
503
|
-
| { inputTokens?: number; outputTokens?: number; totalTokens?: number }
|
|
504
|
-
| undefined,
|
|
505
|
-
): { inputTokens?: number; outputTokens?: number; totalTokens?: number } | undefined {
|
|
915
|
+
current: RuntimeUsage | undefined,
|
|
916
|
+
next: RuntimeUsage | undefined,
|
|
917
|
+
): RuntimeUsage | undefined {
|
|
506
918
|
if (!current) {
|
|
507
919
|
return next;
|
|
508
920
|
}
|
|
@@ -513,11 +925,16 @@ function mergeUsage(
|
|
|
513
925
|
|
|
514
926
|
const inputTokens = next.inputTokens ?? current.inputTokens;
|
|
515
927
|
const outputTokens = next.outputTokens ?? current.outputTokens;
|
|
928
|
+
const cacheCreationInputTokens = next.cacheCreationInputTokens ??
|
|
929
|
+
current.cacheCreationInputTokens;
|
|
930
|
+
const cacheReadInputTokens = next.cacheReadInputTokens ?? current.cacheReadInputTokens;
|
|
516
931
|
|
|
517
932
|
return {
|
|
518
933
|
inputTokens,
|
|
519
934
|
outputTokens,
|
|
520
935
|
totalTokens: (inputTokens ?? 0) + (outputTokens ?? 0),
|
|
936
|
+
...(cacheCreationInputTokens !== undefined ? { cacheCreationInputTokens } : {}),
|
|
937
|
+
...(cacheReadInputTokens !== undefined ? { cacheReadInputTokens } : {}),
|
|
521
938
|
};
|
|
522
939
|
}
|
|
523
940
|
|
|
@@ -538,6 +955,26 @@ function toSnakeCaseRecord(record: Record<string, unknown>): Record<string, unkn
|
|
|
538
955
|
);
|
|
539
956
|
}
|
|
540
957
|
|
|
958
|
+
/**
|
|
959
|
+
* Recursive snake_case key converter for nested config objects (used for
|
|
960
|
+
* Anthropic mcp_servers, where authorizationToken / toolConfiguration /
|
|
961
|
+
* allowedTools all need conversion).
|
|
962
|
+
*/
|
|
963
|
+
function deepSnakeCase(value: unknown): unknown {
|
|
964
|
+
if (Array.isArray(value)) {
|
|
965
|
+
return value.map(deepSnakeCase);
|
|
966
|
+
}
|
|
967
|
+
if (value !== null && typeof value === "object") {
|
|
968
|
+
return Object.fromEntries(
|
|
969
|
+
Object.entries(value as Record<string, unknown>).map(([key, v]) => [
|
|
970
|
+
key.replace(/[A-Z]/g, (match) => `_${match.toLowerCase()}`),
|
|
971
|
+
deepSnakeCase(v),
|
|
972
|
+
]),
|
|
973
|
+
);
|
|
974
|
+
}
|
|
975
|
+
return value;
|
|
976
|
+
}
|
|
977
|
+
|
|
541
978
|
function pushAnthropicUserContent(
|
|
542
979
|
messages: AnthropicCompatibleMessage[],
|
|
543
980
|
content: Array<Record<string, unknown>>,
|
|
@@ -558,9 +995,32 @@ function pushAnthropicUserContent(
|
|
|
558
995
|
});
|
|
559
996
|
}
|
|
560
997
|
|
|
998
|
+
/**
|
|
999
|
+
* Resolves a {@link ProviderCacheTtl} into Anthropic's `cache_control` shape.
|
|
1000
|
+
*
|
|
1001
|
+
* Returns `undefined` when caching is not requested (`false` / `undefined`),
|
|
1002
|
+
* `{ type: "ephemeral" }` for the 5-minute default (`true` / `"5m"`), or
|
|
1003
|
+
* `{ type: "ephemeral", ttl: "1h" }` for the extended 1-hour cache.
|
|
1004
|
+
*/
|
|
1005
|
+
function resolveAnthropicCacheControlBlock(
|
|
1006
|
+
ttl: ProviderCacheTtl | undefined,
|
|
1007
|
+
): { type: "ephemeral"; ttl?: "1h" } | undefined {
|
|
1008
|
+
if (ttl === undefined || ttl === false) {
|
|
1009
|
+
return undefined;
|
|
1010
|
+
}
|
|
1011
|
+
if (ttl === "1h") {
|
|
1012
|
+
return { type: "ephemeral", ttl: "1h" };
|
|
1013
|
+
}
|
|
1014
|
+
return { type: "ephemeral" };
|
|
1015
|
+
}
|
|
1016
|
+
|
|
561
1017
|
function toAnthropicMessages(
|
|
562
1018
|
prompt: RuntimePromptMessage[],
|
|
563
|
-
|
|
1019
|
+
systemCacheControl?: { type: "ephemeral"; ttl?: "1h" },
|
|
1020
|
+
): {
|
|
1021
|
+
system?: string | Array<Record<string, unknown>>;
|
|
1022
|
+
messages: AnthropicCompatibleMessage[];
|
|
1023
|
+
} {
|
|
564
1024
|
const systemParts: string[] = [];
|
|
565
1025
|
const messages: AnthropicCompatibleMessage[] = [];
|
|
566
1026
|
|
|
@@ -580,14 +1040,33 @@ function toAnthropicMessages(
|
|
|
580
1040
|
case "assistant":
|
|
581
1041
|
messages.push({
|
|
582
1042
|
role: "assistant",
|
|
583
|
-
content: message.content.map((part) =>
|
|
584
|
-
part.type === "text"
|
|
1043
|
+
content: message.content.map((part) => {
|
|
1044
|
+
if (part.type === "text") {
|
|
1045
|
+
return { type: "text", text: part.text };
|
|
1046
|
+
}
|
|
1047
|
+
if (part.type === "reasoning") {
|
|
1048
|
+
// Redacted thinking blocks roundtrip as the encrypted blob
|
|
1049
|
+
// form Anthropic gave us. Plain thinking blocks need the
|
|
1050
|
+
// signature to verify on the server.
|
|
1051
|
+
if (typeof part.redactedData === "string") {
|
|
1052
|
+
return {
|
|
1053
|
+
type: "redacted_thinking",
|
|
1054
|
+
data: part.redactedData,
|
|
1055
|
+
};
|
|
1056
|
+
}
|
|
1057
|
+
return {
|
|
1058
|
+
type: "thinking",
|
|
1059
|
+
thinking: part.text ?? "",
|
|
1060
|
+
...(typeof part.signature === "string" ? { signature: part.signature } : {}),
|
|
1061
|
+
};
|
|
1062
|
+
}
|
|
1063
|
+
return {
|
|
585
1064
|
type: "tool_use",
|
|
586
1065
|
id: part.toolCallId,
|
|
587
1066
|
name: part.toolName,
|
|
588
1067
|
input: part.input,
|
|
589
|
-
}
|
|
590
|
-
),
|
|
1068
|
+
};
|
|
1069
|
+
}),
|
|
591
1070
|
});
|
|
592
1071
|
break;
|
|
593
1072
|
case "tool":
|
|
@@ -603,14 +1082,63 @@ function toAnthropicMessages(
|
|
|
603
1082
|
}
|
|
604
1083
|
}
|
|
605
1084
|
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
1085
|
+
if (systemParts.length === 0) {
|
|
1086
|
+
return { messages };
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
const joined = systemParts.join("\n\n");
|
|
1090
|
+
|
|
1091
|
+
// Cache-controlled system prompts must use the array-of-blocks form so the
|
|
1092
|
+
// breakpoint lands on an individual content block. Callers that don't opt
|
|
1093
|
+
// in keep the legacy raw-string form for backward compatibility.
|
|
1094
|
+
if (systemCacheControl) {
|
|
1095
|
+
return {
|
|
1096
|
+
system: [{
|
|
1097
|
+
type: "text",
|
|
1098
|
+
text: joined,
|
|
1099
|
+
cache_control: systemCacheControl,
|
|
1100
|
+
}],
|
|
1101
|
+
messages,
|
|
1102
|
+
};
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
return { system: joined, messages };
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
/**
|
|
1109
|
+
* Short-name → latest-versioned-type alias map for Anthropic provider tools.
|
|
1110
|
+
*
|
|
1111
|
+
* Anthropic tool types are date-stamped (e.g. `code_execution_20260120`) so
|
|
1112
|
+
* callers either pin a version or get the latest. We accept both: a caller
|
|
1113
|
+
* can pass `anthropic.code_execution` and we map to the latest known version,
|
|
1114
|
+
* or pass `anthropic.code_execution_20250522` and we forward verbatim.
|
|
1115
|
+
*
|
|
1116
|
+
* Versions chosen here are the latest documented releases as of 2026-04-15
|
|
1117
|
+
* — see https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview.
|
|
1118
|
+
* When Anthropic ships newer versions, update this map.
|
|
1119
|
+
*/
|
|
1120
|
+
const ANTHROPIC_TOOL_VERSION_ALIASES: Record<string, string> = {
|
|
1121
|
+
code_execution: "code_execution_20260120",
|
|
1122
|
+
computer_use: "computer_20250124",
|
|
1123
|
+
computer: "computer_20250124",
|
|
1124
|
+
text_editor: "text_editor_20250728",
|
|
1125
|
+
bash: "bash_20250124",
|
|
1126
|
+
memory: "memory_20250818",
|
|
1127
|
+
web_search: "web_search_20250305",
|
|
1128
|
+
web_fetch: "web_fetch_20250910",
|
|
1129
|
+
};
|
|
1130
|
+
|
|
1131
|
+
function resolveAnthropicProviderType(rawType: string): string {
|
|
1132
|
+
// Already-versioned types (contain a date stamp suffix) pass through verbatim.
|
|
1133
|
+
if (/_\d{8}$/.test(rawType)) {
|
|
1134
|
+
return rawType;
|
|
1135
|
+
}
|
|
1136
|
+
return ANTHROPIC_TOOL_VERSION_ALIASES[rawType] ?? rawType;
|
|
610
1137
|
}
|
|
611
1138
|
|
|
612
1139
|
function toAnthropicTools(
|
|
613
1140
|
tools: RuntimeToolDefinition[] | undefined,
|
|
1141
|
+
toolsCacheControl?: { type: "ephemeral"; ttl?: "1h" },
|
|
614
1142
|
): Array<Record<string, unknown>> | undefined {
|
|
615
1143
|
if (!tools) {
|
|
616
1144
|
return undefined;
|
|
@@ -632,19 +1160,35 @@ function toAnthropicTools(
|
|
|
632
1160
|
continue;
|
|
633
1161
|
}
|
|
634
1162
|
|
|
635
|
-
const
|
|
636
|
-
if (
|
|
1163
|
+
const rawType = tool.id.slice("anthropic.".length);
|
|
1164
|
+
if (rawType.length === 0) {
|
|
637
1165
|
continue;
|
|
638
1166
|
}
|
|
639
1167
|
|
|
640
1168
|
normalized.push({
|
|
641
|
-
type:
|
|
1169
|
+
type: resolveAnthropicProviderType(rawType),
|
|
642
1170
|
name: tool.name,
|
|
643
1171
|
...toSnakeCaseRecord(tool.args),
|
|
644
1172
|
});
|
|
645
1173
|
}
|
|
646
1174
|
|
|
647
|
-
|
|
1175
|
+
if (normalized.length === 0) {
|
|
1176
|
+
return undefined;
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
// Attach the cache breakpoint to the final tool entry so Anthropic caches
|
|
1180
|
+
// the entire tools block up to and including that definition. Earlier tool
|
|
1181
|
+
// entries are implicitly covered by the same breakpoint per Anthropic's
|
|
1182
|
+
// walk-backward cache lookup behaviour.
|
|
1183
|
+
if (toolsCacheControl) {
|
|
1184
|
+
const lastIndex = normalized.length - 1;
|
|
1185
|
+
normalized[lastIndex] = {
|
|
1186
|
+
...normalized[lastIndex],
|
|
1187
|
+
cache_control: toolsCacheControl,
|
|
1188
|
+
};
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
return normalized;
|
|
648
1192
|
}
|
|
649
1193
|
|
|
650
1194
|
function createAnthropicRequestHeaders(options: {
|
|
@@ -717,47 +1261,244 @@ function resolveAnthropicMaxTokens(
|
|
|
717
1261
|
return requested;
|
|
718
1262
|
}
|
|
719
1263
|
|
|
1264
|
+
/**
|
|
1265
|
+
* Map a unified reasoning effort level to an Anthropic `thinking.budget_tokens`
|
|
1266
|
+
* value. Anthropic's minimum accepted budget is 1024; higher tiers give Claude
|
|
1267
|
+
* more headroom to explore. `max` maps to the upper bound documented for
|
|
1268
|
+
* Claude 4.x family (32k tokens of thinking — caller can override via
|
|
1269
|
+
* `budgetTokens` if they need more).
|
|
1270
|
+
*/
|
|
1271
|
+
function resolveAnthropicThinkingBudget(
|
|
1272
|
+
option: ProviderReasoningOption | undefined,
|
|
1273
|
+
): number | undefined {
|
|
1274
|
+
if (!option || option.enabled !== true) {
|
|
1275
|
+
return undefined;
|
|
1276
|
+
}
|
|
1277
|
+
if (typeof option.budgetTokens === "number" && option.budgetTokens >= 1024) {
|
|
1278
|
+
return option.budgetTokens;
|
|
1279
|
+
}
|
|
1280
|
+
switch (option.effort) {
|
|
1281
|
+
case "low":
|
|
1282
|
+
return 1024;
|
|
1283
|
+
case "high":
|
|
1284
|
+
return 16_384;
|
|
1285
|
+
case "max":
|
|
1286
|
+
return 32_768;
|
|
1287
|
+
case "medium":
|
|
1288
|
+
default:
|
|
1289
|
+
return 4096;
|
|
1290
|
+
}
|
|
1291
|
+
}
|
|
1292
|
+
|
|
720
1293
|
function buildAnthropicMessagesRequest(
|
|
721
1294
|
modelId: string,
|
|
722
1295
|
providerName: string,
|
|
723
1296
|
options: OpenAICompatibleLanguageOptions,
|
|
724
1297
|
stream: boolean,
|
|
1298
|
+
warnings: WarningCollector,
|
|
725
1299
|
): AnthropicCompatibleRequest {
|
|
726
|
-
const
|
|
1300
|
+
const systemCacheControl = resolveAnthropicCacheControlBlock(
|
|
1301
|
+
options.cacheControl?.system,
|
|
1302
|
+
);
|
|
1303
|
+
const toolsCacheControl = resolveAnthropicCacheControlBlock(
|
|
1304
|
+
options.cacheControl?.tools,
|
|
1305
|
+
);
|
|
1306
|
+
|
|
1307
|
+
const { system, messages } = toAnthropicMessages(options.prompt, systemCacheControl);
|
|
1308
|
+
const anthropicTools = toAnthropicTools(options.tools, toolsCacheControl);
|
|
1309
|
+
const thinkingBudget = resolveAnthropicThinkingBudget(options.reasoning);
|
|
1310
|
+
const thinkingEnabled = thinkingBudget !== undefined;
|
|
1311
|
+
|
|
1312
|
+
// Anthropic doesn't support these unified options at all — emit warnings
|
|
1313
|
+
// so callers don't quietly pass values that have zero effect.
|
|
1314
|
+
if (options.presencePenalty !== undefined) {
|
|
1315
|
+
warnings.push({
|
|
1316
|
+
type: "unsupported-setting",
|
|
1317
|
+
provider: "anthropic",
|
|
1318
|
+
setting: "presencePenalty",
|
|
1319
|
+
details: "Anthropic Messages API has no equivalent and the value was dropped.",
|
|
1320
|
+
});
|
|
1321
|
+
}
|
|
1322
|
+
if (options.frequencyPenalty !== undefined) {
|
|
1323
|
+
warnings.push({
|
|
1324
|
+
type: "unsupported-setting",
|
|
1325
|
+
provider: "anthropic",
|
|
1326
|
+
setting: "frequencyPenalty",
|
|
1327
|
+
details: "Anthropic Messages API has no equivalent and the value was dropped.",
|
|
1328
|
+
});
|
|
1329
|
+
}
|
|
1330
|
+
if (options.seed !== undefined) {
|
|
1331
|
+
warnings.push({
|
|
1332
|
+
type: "unsupported-setting",
|
|
1333
|
+
provider: "anthropic",
|
|
1334
|
+
setting: "seed",
|
|
1335
|
+
details: "Anthropic Messages API does not support deterministic seeding.",
|
|
1336
|
+
});
|
|
1337
|
+
}
|
|
1338
|
+
if (options.topK !== undefined) {
|
|
1339
|
+
warnings.push({
|
|
1340
|
+
type: "unsupported-setting",
|
|
1341
|
+
provider: "anthropic",
|
|
1342
|
+
setting: "topK",
|
|
1343
|
+
details: "Anthropic Messages API does not expose top_k on this surface.",
|
|
1344
|
+
});
|
|
1345
|
+
}
|
|
1346
|
+
if (
|
|
1347
|
+
options.stopSequences && options.stopSequences.length > 4
|
|
1348
|
+
) {
|
|
1349
|
+
warnings.push({
|
|
1350
|
+
type: "unsupported-setting",
|
|
1351
|
+
provider: "anthropic",
|
|
1352
|
+
setting: "stopSequences",
|
|
1353
|
+
details:
|
|
1354
|
+
`Anthropic accepts at most 4 stop sequences; ${options.stopSequences.length} were provided and the extras were truncated.`,
|
|
1355
|
+
});
|
|
1356
|
+
}
|
|
1357
|
+
if (thinkingEnabled && options.temperature !== undefined) {
|
|
1358
|
+
warnings.push({
|
|
1359
|
+
type: "unsupported-setting",
|
|
1360
|
+
provider: "anthropic",
|
|
1361
|
+
setting: "temperature",
|
|
1362
|
+
details:
|
|
1363
|
+
"Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
|
|
1364
|
+
});
|
|
1365
|
+
}
|
|
1366
|
+
if (thinkingEnabled && options.topP !== undefined) {
|
|
1367
|
+
warnings.push({
|
|
1368
|
+
type: "unsupported-setting",
|
|
1369
|
+
provider: "anthropic",
|
|
1370
|
+
setting: "topP",
|
|
1371
|
+
details:
|
|
1372
|
+
"Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
|
|
1373
|
+
});
|
|
1374
|
+
}
|
|
1375
|
+
if (options.responseFormat && options.responseFormat.type !== "text") {
|
|
1376
|
+
warnings.push({
|
|
1377
|
+
type: "unsupported-setting",
|
|
1378
|
+
provider: "anthropic",
|
|
1379
|
+
setting: "responseFormat",
|
|
1380
|
+
details:
|
|
1381
|
+
"Anthropic Messages API does not have a structured-output response_format equivalent. Use a tool with the schema as input_schema instead.",
|
|
1382
|
+
});
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
// Anthropic requires max_tokens > budget_tokens when thinking is enabled.
|
|
1386
|
+
// Growing max_tokens by the thinking budget preserves the caller's intended
|
|
1387
|
+
// output budget, and we clamp the sum at the model's advertised maximum so
|
|
1388
|
+
// the request never exceeds the API's hard cap.
|
|
1389
|
+
const baseMaxTokens = resolveAnthropicMaxTokens(modelId, options.maxOutputTokens);
|
|
1390
|
+
const maxTokens = thinkingEnabled
|
|
1391
|
+
? Math.min(
|
|
1392
|
+
baseMaxTokens + (thinkingBudget ?? 0),
|
|
1393
|
+
getAnthropicModelCapabilities(modelId).maxOutputTokens,
|
|
1394
|
+
)
|
|
1395
|
+
: baseMaxTokens;
|
|
1396
|
+
|
|
727
1397
|
const body: AnthropicCompatibleRequest = {
|
|
728
1398
|
model: modelId,
|
|
729
1399
|
messages,
|
|
730
|
-
max_tokens:
|
|
1400
|
+
max_tokens: maxTokens,
|
|
731
1401
|
...(stream ? { stream: true } : {}),
|
|
732
1402
|
...(system ? { system } : {}),
|
|
733
|
-
|
|
734
|
-
|
|
1403
|
+
// Sampling params are mutually exclusive with thinking on Anthropic — the
|
|
1404
|
+
// API rejects the combo outright. Drop them silently when thinking is on
|
|
1405
|
+
// (callers see thinking's output instead of what they'd have gotten from
|
|
1406
|
+
// custom sampling, which is the documented tradeoff).
|
|
1407
|
+
...(!thinkingEnabled && options.temperature !== undefined
|
|
1408
|
+
? { temperature: options.temperature }
|
|
1409
|
+
: {}),
|
|
1410
|
+
...(!thinkingEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
|
|
735
1411
|
...(options.stopSequences && options.stopSequences.length > 0
|
|
736
|
-
? { stop_sequences: options.stopSequences }
|
|
1412
|
+
? { stop_sequences: options.stopSequences.slice(0, 4) }
|
|
737
1413
|
: {}),
|
|
738
|
-
...(
|
|
1414
|
+
...(anthropicTools ? { tools: anthropicTools } : {}),
|
|
739
1415
|
...(options.toolChoice !== undefined
|
|
740
1416
|
? { tool_choice: normalizeAnthropicToolChoice(options.toolChoice) }
|
|
741
1417
|
: {}),
|
|
1418
|
+
...(thinkingEnabled ? { thinking: { type: "enabled", budget_tokens: thinkingBudget } } : {}),
|
|
1419
|
+
...(typeof options.userId === "string" && options.userId.length > 0
|
|
1420
|
+
? { metadata: { user_id: options.userId } }
|
|
1421
|
+
: {}),
|
|
1422
|
+
...(options.mcpServers && options.mcpServers.length > 0
|
|
1423
|
+
? { mcp_servers: deepSnakeCase(options.mcpServers) as unknown[] }
|
|
1424
|
+
: {}),
|
|
1425
|
+
...(options.anthropicContainer !== undefined ? { container: options.anthropicContainer } : {}),
|
|
742
1426
|
};
|
|
743
1427
|
|
|
744
1428
|
Object.assign(body, readProviderOptions(options.providerOptions, "anthropic", providerName));
|
|
745
1429
|
return body;
|
|
746
1430
|
}
|
|
747
1431
|
|
|
1432
|
+
type AnthropicReasoningContent = {
|
|
1433
|
+
type: "reasoning";
|
|
1434
|
+
text?: string;
|
|
1435
|
+
signature?: string;
|
|
1436
|
+
redactedData?: string;
|
|
1437
|
+
};
|
|
1438
|
+
|
|
1439
|
+
type AnthropicCitation = {
|
|
1440
|
+
type: string;
|
|
1441
|
+
citedText?: string;
|
|
1442
|
+
url?: string;
|
|
1443
|
+
title?: string;
|
|
1444
|
+
startCharIndex?: number;
|
|
1445
|
+
endCharIndex?: number;
|
|
1446
|
+
startBlockIndex?: number;
|
|
1447
|
+
endBlockIndex?: number;
|
|
1448
|
+
startPageNumber?: number;
|
|
1449
|
+
endPageNumber?: number;
|
|
1450
|
+
documentIndex?: number;
|
|
1451
|
+
documentTitle?: string;
|
|
1452
|
+
};
|
|
1453
|
+
|
|
1454
|
+
type AnthropicTextContent = {
|
|
1455
|
+
type: "text";
|
|
1456
|
+
text: string;
|
|
1457
|
+
citations?: AnthropicCitation[];
|
|
1458
|
+
};
|
|
1459
|
+
|
|
1460
|
+
/**
|
|
1461
|
+
* Best-effort camelCase normalization of a single Anthropic citation
|
|
1462
|
+
* record. Handles the union of fields across web_search_result_location,
|
|
1463
|
+
* web_fetch_result_location, char_location, page_location, and
|
|
1464
|
+
* content_block_location citation kinds — see
|
|
1465
|
+
* https://docs.claude.com/en/docs/build-with-claude/citations
|
|
1466
|
+
*/
|
|
1467
|
+
function normalizeAnthropicCitation(raw: unknown): AnthropicCitation | undefined {
|
|
1468
|
+
const r = readRecord(raw);
|
|
1469
|
+
if (!r) return undefined;
|
|
1470
|
+
const typeStr = typeof r.type === "string" ? r.type : undefined;
|
|
1471
|
+
if (!typeStr) return undefined;
|
|
1472
|
+
const out: AnthropicCitation = { type: typeStr };
|
|
1473
|
+
if (typeof r.cited_text === "string") out.citedText = r.cited_text;
|
|
1474
|
+
if (typeof r.url === "string") out.url = r.url;
|
|
1475
|
+
if (typeof r.title === "string") out.title = r.title;
|
|
1476
|
+
if (typeof r.start_char_index === "number") out.startCharIndex = r.start_char_index;
|
|
1477
|
+
if (typeof r.end_char_index === "number") out.endCharIndex = r.end_char_index;
|
|
1478
|
+
if (typeof r.start_block_index === "number") out.startBlockIndex = r.start_block_index;
|
|
1479
|
+
if (typeof r.end_block_index === "number") out.endBlockIndex = r.end_block_index;
|
|
1480
|
+
if (typeof r.start_page_number === "number") out.startPageNumber = r.start_page_number;
|
|
1481
|
+
if (typeof r.end_page_number === "number") out.endPageNumber = r.end_page_number;
|
|
1482
|
+
if (typeof r.document_index === "number") out.documentIndex = r.document_index;
|
|
1483
|
+
if (typeof r.document_title === "string") out.documentTitle = r.document_title;
|
|
1484
|
+
return out;
|
|
1485
|
+
}
|
|
1486
|
+
|
|
748
1487
|
function buildAnthropicGenerateResult(payload: unknown): {
|
|
749
1488
|
content: Array<
|
|
750
|
-
|
|
|
1489
|
+
| AnthropicTextContent
|
|
1490
|
+
| AnthropicReasoningContent
|
|
751
1491
|
| { type: "tool-call"; toolCallId: string; toolName: string; input: string }
|
|
752
1492
|
| { type: "tool-result"; toolCallId: string; toolName: string; result: unknown }
|
|
753
1493
|
>;
|
|
754
1494
|
finishReason?: string | { unified: string; raw: string } | null;
|
|
755
|
-
usage?:
|
|
1495
|
+
usage?: RuntimeUsage;
|
|
756
1496
|
} {
|
|
757
1497
|
const record = readRecord(payload);
|
|
758
1498
|
const content = Array.isArray(record?.content) ? record.content : [];
|
|
759
1499
|
const normalized: Array<
|
|
760
|
-
|
|
|
1500
|
+
| AnthropicTextContent
|
|
1501
|
+
| AnthropicReasoningContent
|
|
761
1502
|
| { type: "tool-call"; toolCallId: string; toolName: string; input: string }
|
|
762
1503
|
| { type: "tool-result"; toolCallId: string; toolName: string; result: unknown }
|
|
763
1504
|
> = [];
|
|
@@ -767,7 +1508,42 @@ function buildAnthropicGenerateResult(payload: unknown): {
|
|
|
767
1508
|
const blockType = typeof block?.type === "string" ? block.type : undefined;
|
|
768
1509
|
|
|
769
1510
|
if (blockType === "text" && typeof block?.text === "string" && block.text.length > 0) {
|
|
770
|
-
|
|
1511
|
+
const citationsRaw = Array.isArray(block.citations) ? block.citations : undefined;
|
|
1512
|
+
const citations = citationsRaw
|
|
1513
|
+
?.flatMap((c) => {
|
|
1514
|
+
const normalizedCitation = normalizeAnthropicCitation(c);
|
|
1515
|
+
return normalizedCitation ? [normalizedCitation] : [];
|
|
1516
|
+
});
|
|
1517
|
+
normalized.push({
|
|
1518
|
+
type: "text",
|
|
1519
|
+
text: block.text,
|
|
1520
|
+
...(citations && citations.length > 0 ? { citations } : {}),
|
|
1521
|
+
});
|
|
1522
|
+
continue;
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
// Thinking blocks carry the cleartext trace plus a signature that
|
|
1526
|
+
// Anthropic uses to verify on subsequent turns. Surfacing both lets
|
|
1527
|
+
// callers persist them as `reasoning` content parts and replay on
|
|
1528
|
+
// the next turn so Claude can continue from the same thinking.
|
|
1529
|
+
if (blockType === "thinking") {
|
|
1530
|
+
normalized.push({
|
|
1531
|
+
type: "reasoning",
|
|
1532
|
+
...(typeof block?.thinking === "string" ? { text: block.thinking } : {}),
|
|
1533
|
+
...(typeof block?.signature === "string" ? { signature: block.signature } : {}),
|
|
1534
|
+
});
|
|
1535
|
+
continue;
|
|
1536
|
+
}
|
|
1537
|
+
|
|
1538
|
+
// Redacted thinking blocks arrive when Claude's safety classifier
|
|
1539
|
+
// hides the trace. Pass the encrypted blob through opaquely so the
|
|
1540
|
+
// caller can replay it on the next turn (Anthropic still needs the
|
|
1541
|
+
// blob to verify continuity even though it can't read it).
|
|
1542
|
+
if (blockType === "redacted_thinking" && typeof block?.data === "string") {
|
|
1543
|
+
normalized.push({
|
|
1544
|
+
type: "reasoning",
|
|
1545
|
+
redactedData: block.data,
|
|
1546
|
+
});
|
|
771
1547
|
continue;
|
|
772
1548
|
}
|
|
773
1549
|
|
|
@@ -857,7 +1633,7 @@ async function* streamAnthropicCompatibleParts(
|
|
|
857
1633
|
const toolCalls = new Map<number, AnthropicStreamToolCallState>();
|
|
858
1634
|
const reasoningBlocks = new Map<number, AnthropicStreamReasoningState>();
|
|
859
1635
|
let finishReason: string | { unified: string; raw: string } | null = null;
|
|
860
|
-
let usage:
|
|
1636
|
+
let usage: RuntimeUsage | undefined;
|
|
861
1637
|
|
|
862
1638
|
for await (const chunk of stream) {
|
|
863
1639
|
buffer += decoder.decode(chunk, { stream: true });
|
|
@@ -909,6 +1685,20 @@ async function* streamAnthropicCompatibleParts(
|
|
|
909
1685
|
continue;
|
|
910
1686
|
}
|
|
911
1687
|
|
|
1688
|
+
// Redacted thinking blocks arrive as opaque encrypted payloads when
|
|
1689
|
+
// Claude's safety classifier flags the reasoning trace. Surface them
|
|
1690
|
+
// as a zero-length reasoning block so callers know thinking happened
|
|
1691
|
+
// without leaking the (legitimately hidden) contents.
|
|
1692
|
+
if (blockType === "redacted_thinking") {
|
|
1693
|
+
const reasoningId = `thinking-${index}`;
|
|
1694
|
+
reasoningBlocks.set(index, { id: reasoningId });
|
|
1695
|
+
yield {
|
|
1696
|
+
type: "reasoning-start",
|
|
1697
|
+
id: reasoningId,
|
|
1698
|
+
};
|
|
1699
|
+
continue;
|
|
1700
|
+
}
|
|
1701
|
+
|
|
912
1702
|
if (
|
|
913
1703
|
(blockType === "tool_use" || blockType === "server_tool_use") &&
|
|
914
1704
|
typeof contentBlock?.id === "string" &&
|
|
@@ -1094,9 +1884,7 @@ function normalizeOpenAIFinishReason(
|
|
|
1094
1884
|
return raw;
|
|
1095
1885
|
}
|
|
1096
1886
|
|
|
1097
|
-
function extractOpenAIUsage(payload: unknown):
|
|
1098
|
-
| { inputTokens?: number; outputTokens?: number; totalTokens?: number }
|
|
1099
|
-
| undefined {
|
|
1887
|
+
function extractOpenAIUsage(payload: unknown): RuntimeUsage | undefined {
|
|
1100
1888
|
const record = readRecord(payload);
|
|
1101
1889
|
const usage = readRecord(record?.usage);
|
|
1102
1890
|
if (!usage) {
|
|
@@ -1106,11 +1894,14 @@ function extractOpenAIUsage(payload: unknown):
|
|
|
1106
1894
|
const inputTokens = usage.prompt_tokens;
|
|
1107
1895
|
const outputTokens = usage.completion_tokens;
|
|
1108
1896
|
const totalTokens = usage.total_tokens;
|
|
1897
|
+
const promptTokensDetails = readRecord(usage.prompt_tokens_details);
|
|
1898
|
+
const cachedTokens = promptTokensDetails?.cached_tokens;
|
|
1109
1899
|
|
|
1110
1900
|
return {
|
|
1111
1901
|
inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
|
|
1112
1902
|
outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
|
|
1113
1903
|
totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
|
|
1904
|
+
...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
|
|
1114
1905
|
};
|
|
1115
1906
|
}
|
|
1116
1907
|
|
|
@@ -1165,19 +1956,95 @@ function extractOpenAIToolCalls(message: Record<string, unknown>): Array<{
|
|
|
1165
1956
|
return normalized;
|
|
1166
1957
|
}
|
|
1167
1958
|
|
|
1959
|
+
/**
|
|
1960
|
+
* OpenAI reasoning models (o1 / o3 / o4 family) use the completion path but
|
|
1961
|
+
* have different constraints than chat models: sampling params are rejected,
|
|
1962
|
+
* and they accept a `reasoning_effort` field. We detect them by model id
|
|
1963
|
+
* prefix so callers don't have to configure it per runtime.
|
|
1964
|
+
*/
|
|
1965
|
+
function isOpenAIReasoningModel(modelId: string): boolean {
|
|
1966
|
+
return /^o[134](-|$)/.test(modelId);
|
|
1967
|
+
}
|
|
1968
|
+
|
|
1969
|
+
/**
|
|
1970
|
+
* Map the unified reasoning effort to OpenAI's `reasoning_effort` enum.
|
|
1971
|
+
* OpenAI doesn't accept "max" — we collapse it to "high".
|
|
1972
|
+
*/
|
|
1973
|
+
function resolveOpenAIReasoningEffort(
|
|
1974
|
+
option: ProviderReasoningOption | undefined,
|
|
1975
|
+
): "low" | "medium" | "high" | undefined {
|
|
1976
|
+
if (!option || option.enabled !== true) {
|
|
1977
|
+
return undefined;
|
|
1978
|
+
}
|
|
1979
|
+
switch (option.effort) {
|
|
1980
|
+
case "low":
|
|
1981
|
+
return "low";
|
|
1982
|
+
case "high":
|
|
1983
|
+
case "max":
|
|
1984
|
+
return "high";
|
|
1985
|
+
case "medium":
|
|
1986
|
+
default:
|
|
1987
|
+
return "medium";
|
|
1988
|
+
}
|
|
1989
|
+
}
|
|
1990
|
+
|
|
1168
1991
|
function buildOpenAIChatRequest(
|
|
1169
1992
|
modelId: string,
|
|
1170
1993
|
providerName: string,
|
|
1171
1994
|
options: OpenAICompatibleLanguageOptions,
|
|
1172
1995
|
stream: boolean,
|
|
1996
|
+
warnings: WarningCollector,
|
|
1173
1997
|
): OpenAICompatibleChatRequest {
|
|
1998
|
+
const isReasoningModel = isOpenAIReasoningModel(modelId);
|
|
1999
|
+
const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
|
|
2000
|
+
const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
|
|
2001
|
+
|
|
2002
|
+
// OpenAI Chat Completions has no top_k surface (it's exposed only on the
|
|
2003
|
+
// Responses API for some reasoning models). Quietly accepting it would
|
|
2004
|
+
// mislead callers into thinking it took effect.
|
|
2005
|
+
if (options.topK !== undefined) {
|
|
2006
|
+
warnings.push({
|
|
2007
|
+
type: "unsupported-setting",
|
|
2008
|
+
provider: "openai",
|
|
2009
|
+
setting: "topK",
|
|
2010
|
+
details: "OpenAI Chat Completions does not expose top_k; the value was dropped.",
|
|
2011
|
+
});
|
|
2012
|
+
}
|
|
2013
|
+
|
|
2014
|
+
// Reasoning models (o1 / o3 / o4) reject sampling params outright. Emit
|
|
2015
|
+
// warnings at build time so callers see *why* the value didn't apply
|
|
2016
|
+
// rather than a 400 from the API.
|
|
2017
|
+
if (reasoningEnabled) {
|
|
2018
|
+
const dropped: Array<[keyof typeof options, string]> = [
|
|
2019
|
+
["temperature", "temperature"],
|
|
2020
|
+
["topP", "top_p"],
|
|
2021
|
+
["presencePenalty", "presence_penalty"],
|
|
2022
|
+
["frequencyPenalty", "frequency_penalty"],
|
|
2023
|
+
];
|
|
2024
|
+
for (const [key, openaiName] of dropped) {
|
|
2025
|
+
if (options[key] !== undefined) {
|
|
2026
|
+
warnings.push({
|
|
2027
|
+
type: "unsupported-setting",
|
|
2028
|
+
provider: "openai",
|
|
2029
|
+
setting: key,
|
|
2030
|
+
details:
|
|
2031
|
+
`Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
|
|
2032
|
+
});
|
|
2033
|
+
}
|
|
2034
|
+
}
|
|
2035
|
+
}
|
|
2036
|
+
|
|
1174
2037
|
const body: OpenAICompatibleChatRequest = {
|
|
1175
2038
|
model: modelId,
|
|
1176
2039
|
messages: toOpenAICompatibleMessages(options.prompt),
|
|
1177
2040
|
...(stream ? { stream: true, stream_options: { include_usage: true } } : {}),
|
|
1178
2041
|
...(options.maxOutputTokens !== undefined ? { max_tokens: options.maxOutputTokens } : {}),
|
|
1179
|
-
|
|
1180
|
-
|
|
2042
|
+
// OpenAI reasoning models reject temperature / top_p / frequency / presence.
|
|
2043
|
+
// Drop them silently rather than letting the API bounce the request.
|
|
2044
|
+
...(!reasoningEnabled && options.temperature !== undefined
|
|
2045
|
+
? { temperature: options.temperature }
|
|
2046
|
+
: {}),
|
|
2047
|
+
...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
|
|
1181
2048
|
...(options.stopSequences && options.stopSequences.length > 0
|
|
1182
2049
|
? { stop: options.stopSequences }
|
|
1183
2050
|
: {}),
|
|
@@ -1186,10 +2053,37 @@ function buildOpenAIChatRequest(
|
|
|
1186
2053
|
: {}),
|
|
1187
2054
|
...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
|
|
1188
2055
|
...(options.seed !== undefined ? { seed: options.seed } : {}),
|
|
1189
|
-
...(options.presencePenalty !== undefined
|
|
1190
|
-
|
|
2056
|
+
...(!reasoningEnabled && options.presencePenalty !== undefined
|
|
2057
|
+
? { presence_penalty: options.presencePenalty }
|
|
2058
|
+
: {}),
|
|
2059
|
+
...(!reasoningEnabled && options.frequencyPenalty !== undefined
|
|
1191
2060
|
? { frequency_penalty: options.frequencyPenalty }
|
|
1192
2061
|
: {}),
|
|
2062
|
+
...(reasoningEffort !== undefined ? { reasoning_effort: reasoningEffort } : {}),
|
|
2063
|
+
...(typeof options.userId === "string" && options.userId.length > 0
|
|
2064
|
+
? { user: options.userId }
|
|
2065
|
+
: {}),
|
|
2066
|
+
...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
|
|
2067
|
+
...(options.parallelToolCalls !== undefined
|
|
2068
|
+
? { parallel_tool_calls: options.parallelToolCalls }
|
|
2069
|
+
: {}),
|
|
2070
|
+
...(options.responseFormat && options.responseFormat.type !== "text"
|
|
2071
|
+
? {
|
|
2072
|
+
response_format: options.responseFormat.type === "json" ? { type: "json_object" } : {
|
|
2073
|
+
type: "json_schema",
|
|
2074
|
+
json_schema: {
|
|
2075
|
+
name: options.responseFormat.name,
|
|
2076
|
+
...(typeof options.responseFormat.description === "string"
|
|
2077
|
+
? { description: options.responseFormat.description }
|
|
2078
|
+
: {}),
|
|
2079
|
+
schema: unwrapToolInputSchema(options.responseFormat.schema),
|
|
2080
|
+
...(options.responseFormat.strict !== undefined
|
|
2081
|
+
? { strict: options.responseFormat.strict }
|
|
2082
|
+
: {}),
|
|
2083
|
+
},
|
|
2084
|
+
},
|
|
2085
|
+
}
|
|
2086
|
+
: {}),
|
|
1193
2087
|
};
|
|
1194
2088
|
|
|
1195
2089
|
Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
|
|
@@ -1216,9 +2110,7 @@ function normalizeGoogleFinishReason(
|
|
|
1216
2110
|
}
|
|
1217
2111
|
}
|
|
1218
2112
|
|
|
1219
|
-
function extractGoogleUsage(payload: unknown):
|
|
1220
|
-
| { inputTokens?: number; outputTokens?: number; totalTokens?: number }
|
|
1221
|
-
| undefined {
|
|
2113
|
+
function extractGoogleUsage(payload: unknown): RuntimeUsage | undefined {
|
|
1222
2114
|
const record = readRecord(payload);
|
|
1223
2115
|
const usage = readRecord(record?.usageMetadata);
|
|
1224
2116
|
if (!usage) {
|
|
@@ -1228,11 +2120,15 @@ function extractGoogleUsage(payload: unknown):
|
|
|
1228
2120
|
const inputTokens = usage.promptTokenCount;
|
|
1229
2121
|
const outputTokens = usage.candidatesTokenCount;
|
|
1230
2122
|
const totalTokens = usage.totalTokenCount;
|
|
2123
|
+
const cachedContentTokenCount = usage.cachedContentTokenCount;
|
|
1231
2124
|
|
|
1232
2125
|
return {
|
|
1233
2126
|
inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
|
|
1234
2127
|
outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
|
|
1235
2128
|
totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
|
|
2129
|
+
...(typeof cachedContentTokenCount === "number"
|
|
2130
|
+
? { cacheReadInputTokens: cachedContentTokenCount }
|
|
2131
|
+
: {}),
|
|
1236
2132
|
};
|
|
1237
2133
|
}
|
|
1238
2134
|
|
|
@@ -1258,20 +2154,29 @@ function toGoogleContents(
|
|
|
1258
2154
|
parts: [{ text: readTextParts(message.content) }],
|
|
1259
2155
|
});
|
|
1260
2156
|
break;
|
|
1261
|
-
case "assistant":
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
2157
|
+
case "assistant": {
|
|
2158
|
+
// Anthropic-only `reasoning` parts have no Gemini equivalent
|
|
2159
|
+
// and are dropped on replay.
|
|
2160
|
+
const parts: Array<Record<string, unknown>> = [];
|
|
2161
|
+
for (const part of message.content) {
|
|
2162
|
+
if (part.type === "text") {
|
|
2163
|
+
parts.push({ text: part.text });
|
|
2164
|
+
continue;
|
|
2165
|
+
}
|
|
2166
|
+
if (part.type === "reasoning") {
|
|
2167
|
+
continue;
|
|
2168
|
+
}
|
|
2169
|
+
parts.push({
|
|
2170
|
+
functionCall: {
|
|
2171
|
+
id: part.toolCallId,
|
|
2172
|
+
name: part.toolName,
|
|
2173
|
+
args: part.input,
|
|
2174
|
+
},
|
|
2175
|
+
});
|
|
2176
|
+
}
|
|
2177
|
+
contents.push({ role: "model", parts });
|
|
1274
2178
|
break;
|
|
2179
|
+
}
|
|
1275
2180
|
case "tool":
|
|
1276
2181
|
contents.push({
|
|
1277
2182
|
role: "user",
|
|
@@ -1299,22 +2204,45 @@ function toGoogleContents(
|
|
|
1299
2204
|
|
|
1300
2205
|
function toGoogleTools(
|
|
1301
2206
|
tools: RuntimeToolDefinition[] | undefined,
|
|
1302
|
-
):
|
|
2207
|
+
): Array<Record<string, unknown>> | undefined {
|
|
1303
2208
|
if (!tools) {
|
|
1304
2209
|
return undefined;
|
|
1305
2210
|
}
|
|
1306
2211
|
|
|
1307
|
-
const functionDeclarations =
|
|
1308
|
-
|
|
1309
|
-
|
|
2212
|
+
const functionDeclarations: Array<Record<string, unknown>> = [];
|
|
2213
|
+
const providerEntries: Array<Record<string, unknown>> = [];
|
|
2214
|
+
|
|
2215
|
+
for (const tool of tools) {
|
|
2216
|
+
if (tool.type === "function") {
|
|
2217
|
+
functionDeclarations.push({
|
|
1310
2218
|
name: tool.name,
|
|
1311
2219
|
...(typeof tool.description === "string" ? { description: tool.description } : {}),
|
|
1312
2220
|
parameters: unwrapToolInputSchema(tool.inputSchema),
|
|
1313
|
-
}
|
|
1314
|
-
|
|
1315
|
-
|
|
2221
|
+
});
|
|
2222
|
+
continue;
|
|
2223
|
+
}
|
|
1316
2224
|
|
|
1317
|
-
|
|
2225
|
+
// Gemini provider tools — code_execution, google_search,
|
|
2226
|
+
// google_search_retrieval — each lives in its own tools[] entry
|
|
2227
|
+
// with a single key keyed by the camelCase tool name and an
|
|
2228
|
+
// optional config payload (caller-provided tool.args).
|
|
2229
|
+
if (!tool.id.startsWith("google.")) {
|
|
2230
|
+
continue;
|
|
2231
|
+
}
|
|
2232
|
+
const providerType = tool.id.slice("google.".length);
|
|
2233
|
+
if (providerType.length === 0) {
|
|
2234
|
+
continue;
|
|
2235
|
+
}
|
|
2236
|
+
const camelKey = providerType.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
|
|
2237
|
+
providerEntries.push({ [camelKey]: tool.args ?? {} });
|
|
2238
|
+
}
|
|
2239
|
+
|
|
2240
|
+
const result: Array<Record<string, unknown>> = [];
|
|
2241
|
+
if (functionDeclarations.length > 0) {
|
|
2242
|
+
result.push({ functionDeclarations });
|
|
2243
|
+
}
|
|
2244
|
+
result.push(...providerEntries);
|
|
2245
|
+
return result.length > 0 ? result : undefined;
|
|
1318
2246
|
}
|
|
1319
2247
|
|
|
1320
2248
|
function unwrapToolInputSchema(inputSchema: unknown): unknown {
|
|
@@ -1346,7 +2274,11 @@ function normalizeGoogleToolChoice(toolChoice: unknown):
|
|
|
1346
2274
|
}
|
|
1347
2275
|
|
|
1348
2276
|
const record = readRecord(toolChoice);
|
|
1349
|
-
if (record
|
|
2277
|
+
if (!record) return undefined;
|
|
2278
|
+
|
|
2279
|
+
// Single-tool restriction: { type: "tool", name } — pin to one
|
|
2280
|
+
// function via mode: ANY + allowedFunctionNames: [name].
|
|
2281
|
+
if (record.type === "tool" && typeof record.name === "string") {
|
|
1350
2282
|
return {
|
|
1351
2283
|
functionCallingConfig: {
|
|
1352
2284
|
mode: "ANY",
|
|
@@ -1355,12 +2287,74 @@ function normalizeGoogleToolChoice(toolChoice: unknown):
|
|
|
1355
2287
|
};
|
|
1356
2288
|
}
|
|
1357
2289
|
|
|
2290
|
+
// Multi-tool restriction: { type: "tools", names: string[] } — pin
|
|
2291
|
+
// to a subset via mode: ANY + the full allowedFunctionNames array.
|
|
2292
|
+
if (record.type === "tools" && Array.isArray(record.names)) {
|
|
2293
|
+
const names = record.names.filter((n): n is string => typeof n === "string");
|
|
2294
|
+
if (names.length > 0) {
|
|
2295
|
+
return {
|
|
2296
|
+
functionCallingConfig: {
|
|
2297
|
+
mode: "ANY",
|
|
2298
|
+
allowedFunctionNames: names,
|
|
2299
|
+
},
|
|
2300
|
+
};
|
|
2301
|
+
}
|
|
2302
|
+
}
|
|
2303
|
+
|
|
2304
|
+
// Explicit mode forms: { type: "auto" | "none" | "any" }.
|
|
2305
|
+
if (record.type === "auto") {
|
|
2306
|
+
return { functionCallingConfig: { mode: "AUTO" } };
|
|
2307
|
+
}
|
|
2308
|
+
if (record.type === "none") {
|
|
2309
|
+
return { functionCallingConfig: { mode: "NONE" } };
|
|
2310
|
+
}
|
|
2311
|
+
if (record.type === "any" || record.type === "required") {
|
|
2312
|
+
return { functionCallingConfig: { mode: "ANY" } };
|
|
2313
|
+
}
|
|
2314
|
+
|
|
1358
2315
|
return undefined;
|
|
1359
2316
|
}
|
|
1360
2317
|
|
|
2318
|
+
/**
|
|
2319
|
+
* Map the unified reasoning option to Gemini's thinkingConfig. Gemini 2.5+
|
|
2320
|
+
* accepts `includeThoughts: true` to stream back `thought` parts, and
|
|
2321
|
+
* `thinkingBudget: N` to cap the thinking token count. The effort levels
|
|
2322
|
+
* here follow Google's own guidance (low ~= 512, medium ~= 2048,
|
|
2323
|
+
* high ~= 8192, max = -1 means "dynamic/no cap").
|
|
2324
|
+
*/
|
|
2325
|
+
function resolveGoogleThinkingConfig(
|
|
2326
|
+
option: ProviderReasoningOption | undefined,
|
|
2327
|
+
): Record<string, unknown> | undefined {
|
|
2328
|
+
if (!option || option.enabled !== true) {
|
|
2329
|
+
return undefined;
|
|
2330
|
+
}
|
|
2331
|
+
const config: Record<string, unknown> = { includeThoughts: true };
|
|
2332
|
+
if (typeof option.budgetTokens === "number") {
|
|
2333
|
+
config.thinkingBudget = option.budgetTokens;
|
|
2334
|
+
return config;
|
|
2335
|
+
}
|
|
2336
|
+
switch (option.effort) {
|
|
2337
|
+
case "low":
|
|
2338
|
+
config.thinkingBudget = 512;
|
|
2339
|
+
break;
|
|
2340
|
+
case "high":
|
|
2341
|
+
config.thinkingBudget = 8192;
|
|
2342
|
+
break;
|
|
2343
|
+
case "max":
|
|
2344
|
+
config.thinkingBudget = -1;
|
|
2345
|
+
break;
|
|
2346
|
+
case "medium":
|
|
2347
|
+
default:
|
|
2348
|
+
config.thinkingBudget = 2048;
|
|
2349
|
+
break;
|
|
2350
|
+
}
|
|
2351
|
+
return config;
|
|
2352
|
+
}
|
|
2353
|
+
|
|
1361
2354
|
function buildGoogleGenerationConfig(
|
|
1362
2355
|
options: OpenAICompatibleLanguageOptions,
|
|
1363
2356
|
): Record<string, unknown> | undefined {
|
|
2357
|
+
const thinkingConfig = resolveGoogleThinkingConfig(options.reasoning);
|
|
1364
2358
|
const config: Record<string, unknown> = {
|
|
1365
2359
|
...(options.maxOutputTokens !== undefined ? { maxOutputTokens: options.maxOutputTokens } : {}),
|
|
1366
2360
|
...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
|
|
@@ -1370,6 +2364,7 @@ function buildGoogleGenerationConfig(
|
|
|
1370
2364
|
? { stopSequences: options.stopSequences }
|
|
1371
2365
|
: {}),
|
|
1372
2366
|
...(options.seed !== undefined ? { seed: options.seed } : {}),
|
|
2367
|
+
...(thinkingConfig ? { thinkingConfig } : {}),
|
|
1373
2368
|
};
|
|
1374
2369
|
|
|
1375
2370
|
return Object.keys(config).length > 0 ? config : undefined;
|
|
@@ -1378,8 +2373,47 @@ function buildGoogleGenerationConfig(
|
|
|
1378
2373
|
function buildGoogleGenerateContentRequest(
|
|
1379
2374
|
providerName: string,
|
|
1380
2375
|
options: OpenAICompatibleLanguageOptions,
|
|
2376
|
+
warnings: WarningCollector,
|
|
1381
2377
|
): GoogleCompatibleRequest {
|
|
2378
|
+
// Google generate-content surface doesn't accept presence/frequency
|
|
2379
|
+
// penalties on most current models. Emit warnings and let the request
|
|
2380
|
+
// through without them.
|
|
2381
|
+
if (options.presencePenalty !== undefined) {
|
|
2382
|
+
warnings.push({
|
|
2383
|
+
type: "unsupported-setting",
|
|
2384
|
+
provider: "google",
|
|
2385
|
+
setting: "presencePenalty",
|
|
2386
|
+
details: "Gemini generateContent does not accept presencePenalty; the value was dropped.",
|
|
2387
|
+
});
|
|
2388
|
+
}
|
|
2389
|
+
if (options.frequencyPenalty !== undefined) {
|
|
2390
|
+
warnings.push({
|
|
2391
|
+
type: "unsupported-setting",
|
|
2392
|
+
provider: "google",
|
|
2393
|
+
setting: "frequencyPenalty",
|
|
2394
|
+
details: "Gemini generateContent does not accept frequencyPenalty; the value was dropped.",
|
|
2395
|
+
});
|
|
2396
|
+
}
|
|
2397
|
+
if (options.responseFormat && options.responseFormat.type !== "text") {
|
|
2398
|
+
warnings.push({
|
|
2399
|
+
type: "unsupported-setting",
|
|
2400
|
+
provider: "google",
|
|
2401
|
+
setting: "responseFormat",
|
|
2402
|
+
details:
|
|
2403
|
+
"Gemini uses generationConfig.responseMimeType + responseSchema for structured outputs, which is a separate surface and not yet wired through this option.",
|
|
2404
|
+
});
|
|
2405
|
+
}
|
|
2406
|
+
|
|
1382
2407
|
const { systemInstruction, contents } = toGoogleContents(options.prompt);
|
|
2408
|
+
const generationConfig = buildGoogleGenerationConfig(options);
|
|
2409
|
+
// requestLabels wins over userId-derived labels: when callers explicitly
|
|
2410
|
+
// provide a label map, that's the source of truth. Otherwise fall back
|
|
2411
|
+
// to {user_id} derived from the unified userId option.
|
|
2412
|
+
const labels = options.requestLabels && Object.keys(options.requestLabels).length > 0
|
|
2413
|
+
? options.requestLabels
|
|
2414
|
+
: typeof options.userId === "string" && options.userId.length > 0
|
|
2415
|
+
? { user_id: options.userId }
|
|
2416
|
+
: undefined;
|
|
1383
2417
|
const body: GoogleCompatibleRequest = {
|
|
1384
2418
|
contents,
|
|
1385
2419
|
...(systemInstruction ? { systemInstruction } : {}),
|
|
@@ -1387,8 +2421,13 @@ function buildGoogleGenerateContentRequest(
|
|
|
1387
2421
|
...(normalizeGoogleToolChoice(options.toolChoice)
|
|
1388
2422
|
? { toolConfig: normalizeGoogleToolChoice(options.toolChoice) }
|
|
1389
2423
|
: {}),
|
|
1390
|
-
...(
|
|
1391
|
-
|
|
2424
|
+
...(generationConfig ? { generationConfig } : {}),
|
|
2425
|
+
...(labels ? { labels } : {}),
|
|
2426
|
+
...(typeof options.googleCachedContent === "string" && options.googleCachedContent.length > 0
|
|
2427
|
+
? { cachedContent: options.googleCachedContent }
|
|
2428
|
+
: {}),
|
|
2429
|
+
...(options.googleSafetySettings && options.googleSafetySettings.length > 0
|
|
2430
|
+
? { safetySettings: options.googleSafetySettings }
|
|
1392
2431
|
: {}),
|
|
1393
2432
|
};
|
|
1394
2433
|
|
|
@@ -1426,7 +2465,8 @@ function buildGoogleGenerateResult(payload: unknown): {
|
|
|
1426
2465
|
| { type: "tool-call"; toolCallId: string; toolName: string; input: string }
|
|
1427
2466
|
>;
|
|
1428
2467
|
finishReason?: string | { unified: string; raw: string } | null;
|
|
1429
|
-
usage?:
|
|
2468
|
+
usage?: RuntimeUsage;
|
|
2469
|
+
groundingMetadata?: Record<string, unknown>;
|
|
1430
2470
|
} {
|
|
1431
2471
|
const parts = extractGoogleCandidateParts(payload);
|
|
1432
2472
|
const content: Array<
|
|
@@ -1451,10 +2491,19 @@ function buildGoogleGenerateResult(payload: unknown): {
|
|
|
1451
2491
|
}
|
|
1452
2492
|
}
|
|
1453
2493
|
|
|
2494
|
+
// Gemini grounding (google_search / google_search_retrieval) returns
|
|
2495
|
+
// a per-candidate groundingMetadata object with web search queries,
|
|
2496
|
+
// grounding chunks, and citation indices into the response text.
|
|
2497
|
+
// Pass it through opaquely so callers can render footnotes / source
|
|
2498
|
+
// chips / "Search results" UI without parsing the wire shape.
|
|
2499
|
+
const candidate = extractFirstGoogleCandidate(payload);
|
|
2500
|
+
const groundingMetadata = readRecord(candidate?.groundingMetadata);
|
|
2501
|
+
|
|
1454
2502
|
return {
|
|
1455
2503
|
content,
|
|
1456
|
-
finishReason: normalizeGoogleFinishReason(
|
|
2504
|
+
finishReason: normalizeGoogleFinishReason(candidate?.finishReason),
|
|
1457
2505
|
usage: extractGoogleUsage(payload),
|
|
2506
|
+
...(groundingMetadata ? { groundingMetadata } : {}),
|
|
1458
2507
|
};
|
|
1459
2508
|
}
|
|
1460
2509
|
|
|
@@ -1467,7 +2516,7 @@ async function* streamGoogleCompatibleParts(
|
|
|
1467
2516
|
let reasoningId: string | null = null;
|
|
1468
2517
|
let reasoningIndex = 0;
|
|
1469
2518
|
let finishReason: string | { unified: string; raw: string } | null = null;
|
|
1470
|
-
let usage:
|
|
2519
|
+
let usage: RuntimeUsage | undefined;
|
|
1471
2520
|
|
|
1472
2521
|
for await (const chunk of stream) {
|
|
1473
2522
|
buffer += decoder.decode(chunk, { stream: true });
|
|
@@ -1599,7 +2648,7 @@ function buildOpenAIGenerateResult(payload: unknown): {
|
|
|
1599
2648
|
}
|
|
1600
2649
|
>;
|
|
1601
2650
|
finishReason?: string | { unified: string; raw: string } | null;
|
|
1602
|
-
usage?:
|
|
2651
|
+
usage?: RuntimeUsage;
|
|
1603
2652
|
} {
|
|
1604
2653
|
const choice = extractFirstChoice(payload);
|
|
1605
2654
|
const message = readRecord(choice?.message);
|
|
@@ -1630,7 +2679,7 @@ async function* streamOpenAICompatibleParts(
|
|
|
1630
2679
|
let reasoningId: string | null = null;
|
|
1631
2680
|
let reasoningIndex = 0;
|
|
1632
2681
|
let finishReason: string | { unified: string; raw: string } | null = null;
|
|
1633
|
-
let usage:
|
|
2682
|
+
let usage: RuntimeUsage | undefined;
|
|
1634
2683
|
|
|
1635
2684
|
for await (const chunk of stream) {
|
|
1636
2685
|
buffer += decoder.decode(chunk, { stream: true });
|
|
@@ -1788,11 +2837,19 @@ export function createOpenAIModelRuntime(
|
|
|
1788
2837
|
doGenerate(optionsForRuntime: unknown) {
|
|
1789
2838
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1790
2839
|
const url = getOpenAIChatCompletionsUrl(config.baseURL);
|
|
1791
|
-
const
|
|
2840
|
+
const warnings = createWarningCollector();
|
|
2841
|
+
const body = buildOpenAIChatRequest(
|
|
2842
|
+
modelId,
|
|
2843
|
+
config.name ?? "openai",
|
|
2844
|
+
options,
|
|
2845
|
+
false,
|
|
2846
|
+
warnings,
|
|
2847
|
+
);
|
|
1792
2848
|
return requestJson({
|
|
1793
2849
|
url,
|
|
1794
2850
|
fetchImpl,
|
|
1795
2851
|
providerLabel: config.name ?? "openai",
|
|
2852
|
+
providerKind: "openai",
|
|
1796
2853
|
init: {
|
|
1797
2854
|
method: "POST",
|
|
1798
2855
|
headers: createRequestHeaders({
|
|
@@ -1803,16 +2860,30 @@ export function createOpenAIModelRuntime(
|
|
|
1803
2860
|
body: JSON.stringify(body),
|
|
1804
2861
|
signal: options.abortSignal,
|
|
1805
2862
|
},
|
|
1806
|
-
}).then(
|
|
2863
|
+
}).then((payload) => {
|
|
2864
|
+
const drained = warnings.drain();
|
|
2865
|
+
return {
|
|
2866
|
+
...buildOpenAIGenerateResult(payload),
|
|
2867
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2868
|
+
};
|
|
2869
|
+
});
|
|
1807
2870
|
},
|
|
1808
2871
|
doStream(optionsForRuntime: unknown) {
|
|
1809
2872
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1810
2873
|
const url = getOpenAIChatCompletionsUrl(config.baseURL);
|
|
1811
|
-
const
|
|
2874
|
+
const warnings = createWarningCollector();
|
|
2875
|
+
const body = buildOpenAIChatRequest(
|
|
2876
|
+
modelId,
|
|
2877
|
+
config.name ?? "openai",
|
|
2878
|
+
options,
|
|
2879
|
+
true,
|
|
2880
|
+
warnings,
|
|
2881
|
+
);
|
|
1812
2882
|
return requestStream({
|
|
1813
2883
|
url,
|
|
1814
2884
|
fetchImpl,
|
|
1815
2885
|
providerLabel: config.name ?? "openai",
|
|
2886
|
+
providerKind: "openai",
|
|
1816
2887
|
init: {
|
|
1817
2888
|
method: "POST",
|
|
1818
2889
|
headers: createRequestHeaders({
|
|
@@ -1823,9 +2894,13 @@ export function createOpenAIModelRuntime(
|
|
|
1823
2894
|
body: JSON.stringify(body),
|
|
1824
2895
|
signal: options.abortSignal,
|
|
1825
2896
|
},
|
|
1826
|
-
}).then((responseStream) =>
|
|
1827
|
-
|
|
1828
|
-
|
|
2897
|
+
}).then((responseStream) => {
|
|
2898
|
+
const drained = warnings.drain();
|
|
2899
|
+
return {
|
|
2900
|
+
stream: ReadableStream.from(streamOpenAICompatibleParts(responseStream)),
|
|
2901
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2902
|
+
};
|
|
2903
|
+
});
|
|
1829
2904
|
},
|
|
1830
2905
|
};
|
|
1831
2906
|
}
|
|
@@ -1843,16 +2918,19 @@ export function createAnthropicModelRuntime(
|
|
|
1843
2918
|
doGenerate(optionsForRuntime: unknown) {
|
|
1844
2919
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1845
2920
|
const url = getAnthropicMessagesUrl(config.baseURL);
|
|
2921
|
+
const warnings = createWarningCollector();
|
|
1846
2922
|
const body = buildAnthropicMessagesRequest(
|
|
1847
2923
|
modelId,
|
|
1848
2924
|
config.name ?? "anthropic",
|
|
1849
2925
|
options,
|
|
1850
2926
|
false,
|
|
2927
|
+
warnings,
|
|
1851
2928
|
);
|
|
1852
2929
|
return requestJson({
|
|
1853
2930
|
url,
|
|
1854
2931
|
fetchImpl,
|
|
1855
2932
|
providerLabel: config.name ?? "anthropic",
|
|
2933
|
+
providerKind: "anthropic",
|
|
1856
2934
|
init: {
|
|
1857
2935
|
method: "POST",
|
|
1858
2936
|
headers: createAnthropicRequestHeaders({
|
|
@@ -1863,21 +2941,30 @@ export function createAnthropicModelRuntime(
|
|
|
1863
2941
|
body: JSON.stringify(body),
|
|
1864
2942
|
signal: options.abortSignal,
|
|
1865
2943
|
},
|
|
1866
|
-
}).then(
|
|
2944
|
+
}).then((payload) => {
|
|
2945
|
+
const drained = warnings.drain();
|
|
2946
|
+
return {
|
|
2947
|
+
...buildAnthropicGenerateResult(payload),
|
|
2948
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2949
|
+
};
|
|
2950
|
+
});
|
|
1867
2951
|
},
|
|
1868
2952
|
doStream(optionsForRuntime: unknown) {
|
|
1869
2953
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1870
2954
|
const url = getAnthropicMessagesUrl(config.baseURL);
|
|
2955
|
+
const warnings = createWarningCollector();
|
|
1871
2956
|
const body = buildAnthropicMessagesRequest(
|
|
1872
2957
|
modelId,
|
|
1873
2958
|
config.name ?? "anthropic",
|
|
1874
2959
|
options,
|
|
1875
2960
|
true,
|
|
2961
|
+
warnings,
|
|
1876
2962
|
);
|
|
1877
2963
|
return requestStream({
|
|
1878
2964
|
url,
|
|
1879
2965
|
fetchImpl,
|
|
1880
2966
|
providerLabel: config.name ?? "anthropic",
|
|
2967
|
+
providerKind: "anthropic",
|
|
1881
2968
|
init: {
|
|
1882
2969
|
method: "POST",
|
|
1883
2970
|
headers: createAnthropicRequestHeaders({
|
|
@@ -1888,9 +2975,13 @@ export function createAnthropicModelRuntime(
|
|
|
1888
2975
|
body: JSON.stringify(body),
|
|
1889
2976
|
signal: options.abortSignal,
|
|
1890
2977
|
},
|
|
1891
|
-
}).then((responseStream) =>
|
|
1892
|
-
|
|
1893
|
-
|
|
2978
|
+
}).then((responseStream) => {
|
|
2979
|
+
const drained = warnings.drain();
|
|
2980
|
+
return {
|
|
2981
|
+
stream: ReadableStream.from(streamAnthropicCompatibleParts(responseStream)),
|
|
2982
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
2983
|
+
};
|
|
2984
|
+
});
|
|
1894
2985
|
},
|
|
1895
2986
|
};
|
|
1896
2987
|
}
|
|
@@ -1908,11 +2999,17 @@ export function createGoogleModelRuntime(
|
|
|
1908
2999
|
doGenerate(optionsForRuntime: unknown) {
|
|
1909
3000
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1910
3001
|
const url = getGoogleGenerateContentUrl(config.baseURL, modelId);
|
|
1911
|
-
const
|
|
3002
|
+
const warnings = createWarningCollector();
|
|
3003
|
+
const body = buildGoogleGenerateContentRequest(
|
|
3004
|
+
config.name ?? "google",
|
|
3005
|
+
options,
|
|
3006
|
+
warnings,
|
|
3007
|
+
);
|
|
1912
3008
|
return requestJson({
|
|
1913
3009
|
url,
|
|
1914
3010
|
fetchImpl,
|
|
1915
3011
|
providerLabel: config.name ?? "google",
|
|
3012
|
+
providerKind: "google",
|
|
1916
3013
|
init: {
|
|
1917
3014
|
method: "POST",
|
|
1918
3015
|
headers: createRequestHeaders({
|
|
@@ -1923,16 +3020,28 @@ export function createGoogleModelRuntime(
|
|
|
1923
3020
|
body: JSON.stringify(body),
|
|
1924
3021
|
signal: options.abortSignal,
|
|
1925
3022
|
},
|
|
1926
|
-
}).then(
|
|
3023
|
+
}).then((payload) => {
|
|
3024
|
+
const drained = warnings.drain();
|
|
3025
|
+
return {
|
|
3026
|
+
...buildGoogleGenerateResult(payload),
|
|
3027
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
3028
|
+
};
|
|
3029
|
+
});
|
|
1927
3030
|
},
|
|
1928
3031
|
doStream(optionsForRuntime: unknown) {
|
|
1929
3032
|
const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
|
|
1930
3033
|
const url = getGoogleStreamGenerateContentUrl(config.baseURL, modelId);
|
|
1931
|
-
const
|
|
3034
|
+
const warnings = createWarningCollector();
|
|
3035
|
+
const body = buildGoogleGenerateContentRequest(
|
|
3036
|
+
config.name ?? "google",
|
|
3037
|
+
options,
|
|
3038
|
+
warnings,
|
|
3039
|
+
);
|
|
1932
3040
|
return requestStream({
|
|
1933
3041
|
url,
|
|
1934
3042
|
fetchImpl,
|
|
1935
3043
|
providerLabel: config.name ?? "google",
|
|
3044
|
+
providerKind: "google",
|
|
1936
3045
|
init: {
|
|
1937
3046
|
method: "POST",
|
|
1938
3047
|
headers: createRequestHeaders({
|
|
@@ -1943,9 +3052,13 @@ export function createGoogleModelRuntime(
|
|
|
1943
3052
|
body: JSON.stringify(body),
|
|
1944
3053
|
signal: options.abortSignal,
|
|
1945
3054
|
},
|
|
1946
|
-
}).then((responseStream) =>
|
|
1947
|
-
|
|
1948
|
-
|
|
3055
|
+
}).then((responseStream) => {
|
|
3056
|
+
const drained = warnings.drain();
|
|
3057
|
+
return {
|
|
3058
|
+
stream: ReadableStream.from(streamGoogleCompatibleParts(responseStream)),
|
|
3059
|
+
...(drained.length > 0 ? { warnings: drained } : {}),
|
|
3060
|
+
};
|
|
3061
|
+
});
|
|
1949
3062
|
},
|
|
1950
3063
|
};
|
|
1951
3064
|
}
|
|
@@ -1973,6 +3086,7 @@ export function createOpenAIEmbeddingRuntime(
|
|
|
1973
3086
|
url,
|
|
1974
3087
|
fetchImpl,
|
|
1975
3088
|
providerLabel: config.name ?? "openai",
|
|
3089
|
+
providerKind: "openai",
|
|
1976
3090
|
init: {
|
|
1977
3091
|
method: "POST",
|
|
1978
3092
|
headers: {
|
|
@@ -2021,6 +3135,7 @@ export function createGoogleEmbeddingRuntime(
|
|
|
2021
3135
|
url,
|
|
2022
3136
|
fetchImpl,
|
|
2023
3137
|
providerLabel: config.name ?? "google",
|
|
3138
|
+
providerKind: "google",
|
|
2024
3139
|
init: {
|
|
2025
3140
|
method: "POST",
|
|
2026
3141
|
headers: {
|