veryfront 0.1.207 → 0.1.208

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,6 +36,18 @@ type RuntimePromptMessage =
36
36
  input: unknown;
37
37
  providerExecuted?: boolean;
38
38
  }
39
+ | {
40
+ // Anthropic thinking block replay. Carries the original signed
41
+ // thinking trace so that on the next turn Anthropic can verify
42
+ // the signature and let Claude continue reasoning from the same
43
+ // point. `text` + `signature` are the normal pair for an
44
+ // un-redacted thinking block; `redactedData` is set instead of
45
+ // both when Anthropic returned an encrypted opaque payload.
46
+ type: "reasoning";
47
+ text?: string;
48
+ signature?: string;
49
+ redactedData?: string;
50
+ }
39
51
  >;
40
52
  }
41
53
  | {
@@ -60,6 +72,67 @@ type RuntimeToolDefinition =
60
72
  id: `${string}.${string}`;
61
73
  args: Record<string, unknown>;
62
74
  };
75
+ /**
76
+ * TTL for a single prompt-cache breakpoint.
77
+ *
78
+ * `true` and `"5m"` both map to Anthropic's default ephemeral (5-minute) cache.
79
+ * `"1h"` maps to the extended 1-hour cache at a 2x write cost. Callers can
80
+ * pick per breakpoint target.
81
+ */
82
+ type ProviderCacheTtl = boolean | "5m" | "1h";
83
+
84
+ /**
85
+ * Per-provider prompt / context caching controls.
86
+ *
87
+ * For Anthropic, flipping these on emits `cache_control: { type: "ephemeral" }`
88
+ * breakpoints on the assembled system prompt and/or the last tool definition
89
+ * sent to the Messages API, enabling Anthropic's explicit prompt cache.
90
+ *
91
+ * OpenAI's prompt cache is automatic on gpt-4o+ and has no request-side
92
+ * directive to emit, so this option is a no-op for the OpenAI runtime. Google
93
+ * uses a separate `cachedContent` resource model that is intentionally not
94
+ * covered by this option (it belongs on a dedicated Gemini-specific surface).
95
+ */
96
+ type ProviderCacheControlOption = {
97
+ /**
98
+ * Attach a cache breakpoint to the final system-prompt text block.
99
+ * Use when the system prompt is large and reused across requests.
100
+ */
101
+ system?: ProviderCacheTtl;
102
+ /**
103
+ * Attach a cache breakpoint to the last tool definition in `tools`.
104
+ * Use when the tool schemas are large and identical across requests.
105
+ */
106
+ tools?: ProviderCacheTtl;
107
+ };
108
+
109
+ /**
110
+ * Unified effort level for extended reasoning / thinking. Maps to
111
+ * per-provider knobs: Anthropic `thinking.budget_tokens`, OpenAI
112
+ * `reasoning_effort`, Gemini `thinkingConfig.thinkingBudget`.
113
+ */
114
+ type ProviderReasoningEffort = "low" | "medium" | "high" | "max";
115
+
116
+ /**
117
+ * Unified reasoning / thinking request option.
118
+ *
119
+ * Setting `enabled: true` turns on extended thinking on providers that
120
+ * support it (Anthropic Claude 4.x, OpenAI o-series, Gemini 2.5+). The
121
+ * `effort` field picks a coarse budget; when `budgetTokens` is set it
122
+ * wins for providers that take a numeric budget (Anthropic, Gemini).
123
+ *
124
+ * Providers that do not support reasoning treat this as a no-op. On
125
+ * Anthropic + OpenAI, enabling reasoning also disables sampling params
126
+ * that the providers reject in combination (`temperature`, `topP`,
127
+ * `topK`, `presencePenalty`, `frequencyPenalty`) — silently dropping
128
+ * them rather than failing the request.
129
+ */
130
+ type ProviderReasoningOption = {
131
+ enabled?: boolean;
132
+ effort?: ProviderReasoningEffort;
133
+ budgetTokens?: number;
134
+ };
135
+
63
136
  type OpenAICompatibleLanguageOptions = {
64
137
  prompt: RuntimePromptMessage[];
65
138
  maxOutputTokens?: number;
@@ -76,6 +149,128 @@ type OpenAICompatibleLanguageOptions = {
76
149
  providerOptions?: Record<string, unknown>;
77
150
  includeRawChunks?: boolean;
78
151
  abortSignal?: AbortSignal;
152
+ /**
153
+ * Per-provider prompt / context caching controls. See
154
+ * {@link ProviderCacheControlOption}. When unset, caching behaviour is
155
+ * unchanged on every provider.
156
+ */
157
+ cacheControl?: ProviderCacheControlOption;
158
+ /**
159
+ * Enable extended reasoning / thinking on providers that support it.
160
+ * See {@link ProviderReasoningOption}. When unset, reasoning behaviour
161
+ * is unchanged on every provider.
162
+ */
163
+ reasoning?: ProviderReasoningOption;
164
+ /**
165
+ * Stable per-user identifier for rate-limiting, abuse detection, and
166
+ * billing attribution. Maps to:
167
+ * - Anthropic: `metadata.user_id`
168
+ * - OpenAI: `user`
169
+ * - Google: `labels.user_id` (when {@link requestLabels} is unset)
170
+ */
171
+ userId?: string;
172
+ /**
173
+ * Provider-specific label map for Google Gemini's `labels` field.
174
+ * Anthropic and OpenAI don't have an arbitrary-label equivalent, so
175
+ * this is intentionally Google-only. When unset, no labels are sent.
176
+ */
177
+ requestLabels?: Record<string, string>;
178
+ /**
179
+ * OpenAI-specific. Maps to the `service_tier` field on Chat Completions
180
+ * which trades latency for cost. Documented values:
181
+ *
182
+ * - `default` — standard processing (default if unset)
183
+ * - `flex` — lower-priority queue, lower per-token cost, longer
184
+ * expected latency. Useful for batchy or non-interactive workloads.
185
+ * - `scale` — reserved-capacity tier with strict latency SLOs.
186
+ * - `auto` — let OpenAI pick.
187
+ *
188
+ * Forwarded verbatim. Anthropic and Google have no equivalent and
189
+ * the field is silently omitted on those providers.
190
+ */
191
+ serviceTier?: "auto" | "default" | "flex" | "scale";
192
+ /**
193
+ * OpenAI-specific. When `false`, OpenAI runs tool calls sequentially
194
+ * instead of in parallel. Useful for ordered side effects where
195
+ * concurrent calls would race. Default behaviour (unset) is parallel.
196
+ */
197
+ parallelToolCalls?: boolean;
198
+ /**
199
+ * Structured-output response format. Maps to OpenAI's `response_format`
200
+ * field on Chat Completions (and Responses). Three variants:
201
+ *
202
+ * - `{ type: "text" }` — the default (no constraint).
203
+ * - `{ type: "json" }` — emits OpenAI's `response_format:
204
+ * { type: "json_object" }` to force the model to return valid JSON.
205
+ * - `{ type: "json_schema", name, schema, strict? }` — emits
206
+ * OpenAI's `response_format: { type: "json_schema", json_schema: {
207
+ * name, schema, strict } }` for fully constrained structured
208
+ * outputs (gpt-4o-2024-08-06+).
209
+ *
210
+ * On Anthropic and Google this option emits an "unsupported-setting"
211
+ * warning when set to anything other than `text` (those providers
212
+ * have their own structured-output surfaces and need a dedicated
213
+ * follow-up to wire them in).
214
+ */
215
+ responseFormat?:
216
+ | { type: "text" }
217
+ | { type: "json" }
218
+ | {
219
+ type: "json_schema";
220
+ name: string;
221
+ schema: unknown;
222
+ description?: string;
223
+ strict?: boolean;
224
+ };
225
+ /**
226
+ * Anthropic-specific. `container` field for programmatic tool calling
227
+ * and agent skills. Anthropic uses this to scope a session to a
228
+ * sandboxed container (e.g. for Computer Use, code execution
229
+ * sandboxes, or skills loaded from a container). Forwarded verbatim.
230
+ *
231
+ * The shape varies — string container id or a structured object
232
+ * depending on the feature. Caller passes whatever Anthropic's docs
233
+ * specify for the target feature.
234
+ */
235
+ anthropicContainer?: unknown;
236
+ /**
237
+ * Google-specific. Reference to a previously-created Gemini cached
238
+ * content resource (created via the separate caches API) to attach
239
+ * to this request. Resource name format:
240
+ * `cachedContents/<id>`. See https://ai.google.dev/gemini-api/docs/caching.
241
+ *
242
+ * Cache creation itself is out of scope for the runtime — callers
243
+ * use the Gemini REST API or SDK to create the cache, then pass the
244
+ * resource name here on each subsequent generate call to attach the
245
+ * cached prefix and avoid re-paying for it.
246
+ */
247
+ googleCachedContent?: string;
248
+ /**
249
+ * Google-specific. Per-request safety filter configuration for
250
+ * Gemini. Each entry pairs a HARM_CATEGORY_* with a threshold
251
+ * (BLOCK_NONE / BLOCK_LOW_AND_ABOVE / BLOCK_MEDIUM_AND_ABOVE /
252
+ * BLOCK_ONLY_HIGH). Forwarded verbatim as the `safetySettings`
253
+ * field. See https://ai.google.dev/gemini-api/docs/safety-settings.
254
+ */
255
+ googleSafetySettings?: Array<{
256
+ category: string;
257
+ threshold: string;
258
+ }>;
259
+ /**
260
+ * Anthropic-specific. Native MCP server definitions to pass directly
261
+ * on the Messages API request body. Lets callers register MCP servers
262
+ * server-side instead of reloading them into local function tools.
263
+ *
264
+ * Caller must opt into the MCP beta by adding the matching header to
265
+ * `headers`, e.g. `{ "anthropic-beta": "mcp-client-2025-04-04" }`.
266
+ * Without that header Anthropic will reject the request.
267
+ *
268
+ * Each entry is forwarded with camelCase keys converted to snake_case
269
+ * so `authorizationToken` → `authorization_token`,
270
+ * `toolConfiguration.allowedTools` → `tool_configuration.allowed_tools`,
271
+ * etc.
272
+ */
273
+ mcpServers?: Array<Record<string, unknown>>;
79
274
  };
80
275
  type OpenAICompatibleChatMessage =
81
276
  | { role: "system"; content: string }
@@ -142,7 +337,12 @@ type AnthropicCompatibleRequest = {
142
337
  messages: AnthropicCompatibleMessage[];
143
338
  max_tokens: number;
144
339
  stream?: boolean;
145
- system?: string;
340
+ /**
341
+ * String form is the classic shorthand. Array-of-blocks form is required
342
+ * when the system prompt carries a cache_control breakpoint, because
343
+ * cache_control lives on an individual content block, not on a raw string.
344
+ */
345
+ system?: string | Array<Record<string, unknown>>;
146
346
  temperature?: number;
147
347
  top_p?: number;
148
348
  stop_sequences?: string[];
@@ -168,9 +368,7 @@ type GoogleCompatibleRequest = {
168
368
  systemInstruction?: {
169
369
  parts: Array<{ text: string }>;
170
370
  };
171
- tools?: Array<{
172
- functionDeclarations: Array<Record<string, unknown>>;
173
- }>;
371
+ tools?: Array<Record<string, unknown>>;
174
372
  toolConfig?: {
175
373
  functionCallingConfig: Record<string, unknown>;
176
374
  };
@@ -287,9 +485,203 @@ function extractGoogleUsageTokens(payload: unknown): number | undefined {
287
485
  return typeof promptTokenCount === "number" ? promptTokenCount : undefined;
288
486
  }
289
487
 
290
- async function readErrorMessage(response: Response): Promise<string> {
291
- const text = await response.text();
292
- return text.trim() || `${response.status} ${response.statusText}`.trim();
488
+ type ProviderKind = "anthropic" | "openai" | "google";
489
+
490
+ /**
491
+ * Structured warning emitted when a provider runtime drops or rewrites a
492
+ * caller-provided option. Mirrors the AI ecosystem convention (Vercel AI
493
+ * SDK, LangChain) of returning `unsupported-setting` warnings on the
494
+ * runtime result so callers can discover silently-dropped fields without
495
+ * having to read the source.
496
+ */
497
+ export type ProviderWarning = {
498
+ type: "unsupported-setting" | "other";
499
+ setting?: string;
500
+ details?: string;
501
+ provider: ProviderKind;
502
+ };
503
+
504
+ /**
505
+ * Mutable warning collector handed to per-provider request builders so
506
+ * they can append entries during the build pass instead of plumbing a
507
+ * return-tuple shape through every helper.
508
+ */
509
+ type WarningCollector = {
510
+ push(warning: ProviderWarning): void;
511
+ drain(): ProviderWarning[];
512
+ };
513
+
514
+ function createWarningCollector(): WarningCollector {
515
+ const list: ProviderWarning[] = [];
516
+ return {
517
+ push(warning) {
518
+ list.push(warning);
519
+ },
520
+ drain() {
521
+ return list.slice();
522
+ },
523
+ };
524
+ }
525
+
526
+ /**
527
+ * Base class for typed provider errors. The `retryable` flag is the
528
+ * primary signal for callers (or a retry wrapper) to decide whether to
529
+ * re-issue the request. `retryAfterMs` is set when the provider gave an
530
+ * explicit delay hint (Retry-After header, Retry-Info trailer).
531
+ */
532
+ export class ProviderError extends Error {
533
+ readonly provider: ProviderKind;
534
+ readonly status: number;
535
+ readonly retryable: boolean;
536
+ readonly retryAfterMs?: number;
537
+
538
+ constructor(options: {
539
+ provider: ProviderKind;
540
+ status: number;
541
+ message: string;
542
+ retryable: boolean;
543
+ retryAfterMs?: number;
544
+ }) {
545
+ super(options.message);
546
+ this.name = new.target.name;
547
+ this.provider = options.provider;
548
+ this.status = options.status;
549
+ this.retryable = options.retryable;
550
+ if (options.retryAfterMs !== undefined) {
551
+ this.retryAfterMs = options.retryAfterMs;
552
+ }
553
+ }
554
+ }
555
+
556
+ /** Provider reports it is overloaded (Anthropic 529, OpenAI/Google 503). */
557
+ export class ProviderOverloadedError extends ProviderError {}
558
+
559
+ /** Provider is rate limiting this API key (OpenAI/Google 429 with Retry-After). */
560
+ export class ProviderRateLimitError extends ProviderError {}
561
+
562
+ /** Provider account quota is exhausted — non-retryable. */
563
+ export class ProviderQuotaError extends ProviderError {}
564
+
565
+ /** Non-retryable 4xx/5xx that doesn't fit another bucket. */
566
+ export class ProviderRequestError extends ProviderError {}
567
+
568
+ function parseRetryAfterMs(header: string | null): number | undefined {
569
+ if (!header) return undefined;
570
+ const asNumber = Number(header);
571
+ if (Number.isFinite(asNumber) && asNumber >= 0) {
572
+ return Math.round(asNumber * 1000);
573
+ }
574
+ // HTTP-date form (rare in practice for LLM providers).
575
+ const parsed = Date.parse(header);
576
+ if (!Number.isNaN(parsed)) {
577
+ return Math.max(0, parsed - Date.now());
578
+ }
579
+ return undefined;
580
+ }
581
+
582
+ /**
583
+ * Inspect a non-2xx response and build the most specific ProviderError
584
+ * subclass we can. Reads the response body as text (it's already dead
585
+ * on the wire by this point). Body classification handles the cases
586
+ * where HTTP status alone is ambiguous — notably OpenAI
587
+ * `insufficient_quota` vs `rate_limit_exceeded` both arriving as 429.
588
+ */
589
+ async function buildProviderError(
590
+ provider: ProviderKind,
591
+ response: Response,
592
+ ): Promise<ProviderError> {
593
+ const rawBody = await response.text();
594
+ const message = rawBody.trim() || `${response.status} ${response.statusText}`.trim();
595
+ const status = response.status;
596
+ const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
597
+
598
+ const parsedBody = (() => {
599
+ try {
600
+ return JSON.parse(rawBody) as Record<string, unknown>;
601
+ } catch {
602
+ return undefined;
603
+ }
604
+ })();
605
+ const errorRecord = readRecord(parsedBody?.error);
606
+ const errorCode = typeof errorRecord?.code === "string"
607
+ ? errorRecord.code
608
+ : typeof errorRecord?.type === "string"
609
+ ? errorRecord.type
610
+ : typeof errorRecord?.status === "string"
611
+ ? errorRecord.status
612
+ : undefined;
613
+
614
+ // Anthropic 529 = overloaded. Anthropic surfaces this with
615
+ // { error: { type: "overloaded_error" } } in the body.
616
+ if (provider === "anthropic" && status === 529) {
617
+ return new ProviderOverloadedError({
618
+ provider,
619
+ status,
620
+ message,
621
+ retryable: true,
622
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
623
+ });
624
+ }
625
+
626
+ // OpenAI / Google 503 = overloaded.
627
+ if ((provider === "openai" || provider === "google") && status === 503) {
628
+ return new ProviderOverloadedError({
629
+ provider,
630
+ status,
631
+ message,
632
+ retryable: true,
633
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
634
+ });
635
+ }
636
+
637
+ // OpenAI 429 splits based on the error code in the body:
638
+ // - insufficient_quota → hard quota, non-retryable
639
+ // - rate_limit_exceeded / tokens_per_min_exceeded → retry with Retry-After
640
+ if (provider === "openai" && status === 429) {
641
+ if (errorCode === "insufficient_quota") {
642
+ return new ProviderQuotaError({
643
+ provider,
644
+ status,
645
+ message,
646
+ retryable: false,
647
+ });
648
+ }
649
+ return new ProviderRateLimitError({
650
+ provider,
651
+ status,
652
+ message,
653
+ retryable: true,
654
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
655
+ });
656
+ }
657
+
658
+ // Google 429 RESOURCE_EXHAUSTED is almost always the daily free-tier
659
+ // quota — surface as a hard quota error so callers don't hot-loop on
660
+ // retries that can't possibly succeed until midnight UTC.
661
+ if (provider === "google" && status === 429) {
662
+ if (errorCode === "RESOURCE_EXHAUSTED") {
663
+ return new ProviderQuotaError({
664
+ provider,
665
+ status,
666
+ message,
667
+ retryable: false,
668
+ });
669
+ }
670
+ return new ProviderRateLimitError({
671
+ provider,
672
+ status,
673
+ message,
674
+ retryable: true,
675
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
676
+ });
677
+ }
678
+
679
+ return new ProviderRequestError({
680
+ provider,
681
+ status,
682
+ message,
683
+ retryable: false,
684
+ });
293
685
  }
294
686
 
295
687
  async function requestJson(options: {
@@ -297,11 +689,13 @@ async function requestJson(options: {
297
689
  fetchImpl: typeof globalThis.fetch;
298
690
  init: RequestInit;
299
691
  providerLabel: string;
692
+ providerKind: ProviderKind;
300
693
  }): Promise<unknown> {
301
694
  const response = await options.fetchImpl(options.url, options.init);
302
695
  if (!response.ok) {
303
- const message = await readErrorMessage(response);
304
- throw new Error(`${options.providerLabel} request failed: ${message}`);
696
+ const err = await buildProviderError(options.providerKind, response);
697
+ err.message = `${options.providerLabel} request failed: ${err.message}`;
698
+ throw err;
305
699
  }
306
700
 
307
701
  return response.json();
@@ -312,15 +706,22 @@ async function requestStream(options: {
312
706
  fetchImpl: typeof globalThis.fetch;
313
707
  init: RequestInit;
314
708
  providerLabel: string;
709
+ providerKind: ProviderKind;
315
710
  }): Promise<ReadableStream<Uint8Array>> {
316
711
  const response = await options.fetchImpl(options.url, options.init);
317
712
  if (!response.ok) {
318
- const message = await readErrorMessage(response);
319
- throw new Error(`${options.providerLabel} request failed: ${message}`);
713
+ const err = await buildProviderError(options.providerKind, response);
714
+ err.message = `${options.providerLabel} request failed: ${err.message}`;
715
+ throw err;
320
716
  }
321
717
 
322
718
  if (!response.body) {
323
- throw new Error(`${options.providerLabel} request failed: stream body missing`);
719
+ throw new ProviderRequestError({
720
+ provider: options.providerKind,
721
+ status: response.status,
722
+ message: `${options.providerLabel} request failed: stream body missing`,
723
+ retryable: false,
724
+ });
324
725
  }
325
726
 
326
727
  return response.body;
@@ -366,6 +767,11 @@ function toOpenAICompatibleMessages(prompt: RuntimePromptMessage[]): OpenAICompa
366
767
  text += part.text;
367
768
  continue;
368
769
  }
770
+ // OpenAI Chat Completions has no roundtrip slot for Anthropic
771
+ // thinking blocks — they get dropped on replay. Anthropic-only.
772
+ if (part.type === "reasoning") {
773
+ continue;
774
+ }
369
775
 
370
776
  toolCalls.push({
371
777
  id: part.toolCallId,
@@ -473,9 +879,15 @@ function normalizeAnthropicFinishReason(
473
879
  }
474
880
  }
475
881
 
476
- function extractAnthropicUsage(payload: unknown):
477
- | { inputTokens?: number; outputTokens?: number; totalTokens?: number }
478
- | undefined {
882
+ type RuntimeUsage = {
883
+ inputTokens?: number;
884
+ outputTokens?: number;
885
+ totalTokens?: number;
886
+ cacheCreationInputTokens?: number;
887
+ cacheReadInputTokens?: number;
888
+ };
889
+
890
+ function extractAnthropicUsage(payload: unknown): RuntimeUsage | undefined {
479
891
  const record = readRecord(payload);
480
892
  const usage = readRecord(record?.usage);
481
893
  if (!usage) {
@@ -484,6 +896,8 @@ function extractAnthropicUsage(payload: unknown):
484
896
 
485
897
  const inputTokens = usage.input_tokens;
486
898
  const outputTokens = usage.output_tokens;
899
+ const cacheCreationInputTokens = usage.cache_creation_input_tokens;
900
+ const cacheReadInputTokens = usage.cache_read_input_tokens;
487
901
 
488
902
  return {
489
903
  inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
@@ -492,17 +906,15 @@ function extractAnthropicUsage(payload: unknown):
492
906
  ? (typeof inputTokens === "number" ? inputTokens : 0) +
493
907
  (typeof outputTokens === "number" ? outputTokens : 0)
494
908
  : undefined,
909
+ ...(typeof cacheCreationInputTokens === "number" ? { cacheCreationInputTokens } : {}),
910
+ ...(typeof cacheReadInputTokens === "number" ? { cacheReadInputTokens } : {}),
495
911
  };
496
912
  }
497
913
 
498
914
  function mergeUsage(
499
- current:
500
- | { inputTokens?: number; outputTokens?: number; totalTokens?: number }
501
- | undefined,
502
- next:
503
- | { inputTokens?: number; outputTokens?: number; totalTokens?: number }
504
- | undefined,
505
- ): { inputTokens?: number; outputTokens?: number; totalTokens?: number } | undefined {
915
+ current: RuntimeUsage | undefined,
916
+ next: RuntimeUsage | undefined,
917
+ ): RuntimeUsage | undefined {
506
918
  if (!current) {
507
919
  return next;
508
920
  }
@@ -513,11 +925,16 @@ function mergeUsage(
513
925
 
514
926
  const inputTokens = next.inputTokens ?? current.inputTokens;
515
927
  const outputTokens = next.outputTokens ?? current.outputTokens;
928
+ const cacheCreationInputTokens = next.cacheCreationInputTokens ??
929
+ current.cacheCreationInputTokens;
930
+ const cacheReadInputTokens = next.cacheReadInputTokens ?? current.cacheReadInputTokens;
516
931
 
517
932
  return {
518
933
  inputTokens,
519
934
  outputTokens,
520
935
  totalTokens: (inputTokens ?? 0) + (outputTokens ?? 0),
936
+ ...(cacheCreationInputTokens !== undefined ? { cacheCreationInputTokens } : {}),
937
+ ...(cacheReadInputTokens !== undefined ? { cacheReadInputTokens } : {}),
521
938
  };
522
939
  }
523
940
 
@@ -538,6 +955,26 @@ function toSnakeCaseRecord(record: Record<string, unknown>): Record<string, unkn
538
955
  );
539
956
  }
540
957
 
958
+ /**
959
+ * Recursive snake_case key converter for nested config objects (used for
960
+ * Anthropic mcp_servers, where authorizationToken / toolConfiguration /
961
+ * allowedTools all need conversion).
962
+ */
963
+ function deepSnakeCase(value: unknown): unknown {
964
+ if (Array.isArray(value)) {
965
+ return value.map(deepSnakeCase);
966
+ }
967
+ if (value !== null && typeof value === "object") {
968
+ return Object.fromEntries(
969
+ Object.entries(value as Record<string, unknown>).map(([key, v]) => [
970
+ key.replace(/[A-Z]/g, (match) => `_${match.toLowerCase()}`),
971
+ deepSnakeCase(v),
972
+ ]),
973
+ );
974
+ }
975
+ return value;
976
+ }
977
+
541
978
  function pushAnthropicUserContent(
542
979
  messages: AnthropicCompatibleMessage[],
543
980
  content: Array<Record<string, unknown>>,
@@ -558,9 +995,32 @@ function pushAnthropicUserContent(
558
995
  });
559
996
  }
560
997
 
998
+ /**
999
+ * Resolves a {@link ProviderCacheTtl} into Anthropic's `cache_control` shape.
1000
+ *
1001
+ * Returns `undefined` when caching is not requested (`false` / `undefined`),
1002
+ * `{ type: "ephemeral" }` for the 5-minute default (`true` / `"5m"`), or
1003
+ * `{ type: "ephemeral", ttl: "1h" }` for the extended 1-hour cache.
1004
+ */
1005
+ function resolveAnthropicCacheControlBlock(
1006
+ ttl: ProviderCacheTtl | undefined,
1007
+ ): { type: "ephemeral"; ttl?: "1h" } | undefined {
1008
+ if (ttl === undefined || ttl === false) {
1009
+ return undefined;
1010
+ }
1011
+ if (ttl === "1h") {
1012
+ return { type: "ephemeral", ttl: "1h" };
1013
+ }
1014
+ return { type: "ephemeral" };
1015
+ }
1016
+
561
1017
  function toAnthropicMessages(
562
1018
  prompt: RuntimePromptMessage[],
563
- ): { system?: string; messages: AnthropicCompatibleMessage[] } {
1019
+ systemCacheControl?: { type: "ephemeral"; ttl?: "1h" },
1020
+ ): {
1021
+ system?: string | Array<Record<string, unknown>>;
1022
+ messages: AnthropicCompatibleMessage[];
1023
+ } {
564
1024
  const systemParts: string[] = [];
565
1025
  const messages: AnthropicCompatibleMessage[] = [];
566
1026
 
@@ -580,14 +1040,33 @@ function toAnthropicMessages(
580
1040
  case "assistant":
581
1041
  messages.push({
582
1042
  role: "assistant",
583
- content: message.content.map((part) =>
584
- part.type === "text" ? { type: "text", text: part.text } : {
1043
+ content: message.content.map((part) => {
1044
+ if (part.type === "text") {
1045
+ return { type: "text", text: part.text };
1046
+ }
1047
+ if (part.type === "reasoning") {
1048
+ // Redacted thinking blocks roundtrip as the encrypted blob
1049
+ // form Anthropic gave us. Plain thinking blocks need the
1050
+ // signature to verify on the server.
1051
+ if (typeof part.redactedData === "string") {
1052
+ return {
1053
+ type: "redacted_thinking",
1054
+ data: part.redactedData,
1055
+ };
1056
+ }
1057
+ return {
1058
+ type: "thinking",
1059
+ thinking: part.text ?? "",
1060
+ ...(typeof part.signature === "string" ? { signature: part.signature } : {}),
1061
+ };
1062
+ }
1063
+ return {
585
1064
  type: "tool_use",
586
1065
  id: part.toolCallId,
587
1066
  name: part.toolName,
588
1067
  input: part.input,
589
- }
590
- ),
1068
+ };
1069
+ }),
591
1070
  });
592
1071
  break;
593
1072
  case "tool":
@@ -603,14 +1082,63 @@ function toAnthropicMessages(
603
1082
  }
604
1083
  }
605
1084
 
606
- return {
607
- ...(systemParts.length > 0 ? { system: systemParts.join("\n\n") } : {}),
608
- messages,
609
- };
1085
+ if (systemParts.length === 0) {
1086
+ return { messages };
1087
+ }
1088
+
1089
+ const joined = systemParts.join("\n\n");
1090
+
1091
+ // Cache-controlled system prompts must use the array-of-blocks form so the
1092
+ // breakpoint lands on an individual content block. Callers that don't opt
1093
+ // in keep the legacy raw-string form for backward compatibility.
1094
+ if (systemCacheControl) {
1095
+ return {
1096
+ system: [{
1097
+ type: "text",
1098
+ text: joined,
1099
+ cache_control: systemCacheControl,
1100
+ }],
1101
+ messages,
1102
+ };
1103
+ }
1104
+
1105
+ return { system: joined, messages };
1106
+ }
1107
+
1108
+ /**
1109
+ * Short-name → latest-versioned-type alias map for Anthropic provider tools.
1110
+ *
1111
+ * Anthropic tool types are date-stamped (e.g. `code_execution_20260120`) so
1112
+ * callers either pin a version or get the latest. We accept both: a caller
1113
+ * can pass `anthropic.code_execution` and we map to the latest known version,
1114
+ * or pass `anthropic.code_execution_20250522` and we forward verbatim.
1115
+ *
1116
+ * Versions chosen here are the latest documented releases as of 2026-04-15
1117
+ * — see https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview.
1118
+ * When Anthropic ships newer versions, update this map.
1119
+ */
1120
+ const ANTHROPIC_TOOL_VERSION_ALIASES: Record<string, string> = {
1121
+ code_execution: "code_execution_20260120",
1122
+ computer_use: "computer_20250124",
1123
+ computer: "computer_20250124",
1124
+ text_editor: "text_editor_20250728",
1125
+ bash: "bash_20250124",
1126
+ memory: "memory_20250818",
1127
+ web_search: "web_search_20250305",
1128
+ web_fetch: "web_fetch_20250910",
1129
+ };
1130
+
1131
+ function resolveAnthropicProviderType(rawType: string): string {
1132
+ // Already-versioned types (contain a date stamp suffix) pass through verbatim.
1133
+ if (/_\d{8}$/.test(rawType)) {
1134
+ return rawType;
1135
+ }
1136
+ return ANTHROPIC_TOOL_VERSION_ALIASES[rawType] ?? rawType;
610
1137
  }
611
1138
 
612
1139
  function toAnthropicTools(
613
1140
  tools: RuntimeToolDefinition[] | undefined,
1141
+ toolsCacheControl?: { type: "ephemeral"; ttl?: "1h" },
614
1142
  ): Array<Record<string, unknown>> | undefined {
615
1143
  if (!tools) {
616
1144
  return undefined;
@@ -632,19 +1160,35 @@ function toAnthropicTools(
632
1160
  continue;
633
1161
  }
634
1162
 
635
- const providerType = tool.id.slice("anthropic.".length);
636
- if (providerType.length === 0) {
1163
+ const rawType = tool.id.slice("anthropic.".length);
1164
+ if (rawType.length === 0) {
637
1165
  continue;
638
1166
  }
639
1167
 
640
1168
  normalized.push({
641
- type: providerType,
1169
+ type: resolveAnthropicProviderType(rawType),
642
1170
  name: tool.name,
643
1171
  ...toSnakeCaseRecord(tool.args),
644
1172
  });
645
1173
  }
646
1174
 
647
- return normalized.length > 0 ? normalized : undefined;
1175
+ if (normalized.length === 0) {
1176
+ return undefined;
1177
+ }
1178
+
1179
+ // Attach the cache breakpoint to the final tool entry so Anthropic caches
1180
+ // the entire tools block up to and including that definition. Earlier tool
1181
+ // entries are implicitly covered by the same breakpoint per Anthropic's
1182
+ // walk-backward cache lookup behaviour.
1183
+ if (toolsCacheControl) {
1184
+ const lastIndex = normalized.length - 1;
1185
+ normalized[lastIndex] = {
1186
+ ...normalized[lastIndex],
1187
+ cache_control: toolsCacheControl,
1188
+ };
1189
+ }
1190
+
1191
+ return normalized;
648
1192
  }
649
1193
 
650
1194
  function createAnthropicRequestHeaders(options: {
@@ -717,47 +1261,244 @@ function resolveAnthropicMaxTokens(
717
1261
  return requested;
718
1262
  }
719
1263
 
1264
+ /**
1265
+ * Map a unified reasoning effort level to an Anthropic `thinking.budget_tokens`
1266
+ * value. Anthropic's minimum accepted budget is 1024; higher tiers give Claude
1267
+ * more headroom to explore. `max` maps to the upper bound documented for
1268
+ * Claude 4.x family (32k tokens of thinking — caller can override via
1269
+ * `budgetTokens` if they need more).
1270
+ */
1271
+ function resolveAnthropicThinkingBudget(
1272
+ option: ProviderReasoningOption | undefined,
1273
+ ): number | undefined {
1274
+ if (!option || option.enabled !== true) {
1275
+ return undefined;
1276
+ }
1277
+ if (typeof option.budgetTokens === "number" && option.budgetTokens >= 1024) {
1278
+ return option.budgetTokens;
1279
+ }
1280
+ switch (option.effort) {
1281
+ case "low":
1282
+ return 1024;
1283
+ case "high":
1284
+ return 16_384;
1285
+ case "max":
1286
+ return 32_768;
1287
+ case "medium":
1288
+ default:
1289
+ return 4096;
1290
+ }
1291
+ }
1292
+
720
1293
  function buildAnthropicMessagesRequest(
721
1294
  modelId: string,
722
1295
  providerName: string,
723
1296
  options: OpenAICompatibleLanguageOptions,
724
1297
  stream: boolean,
1298
+ warnings: WarningCollector,
725
1299
  ): AnthropicCompatibleRequest {
726
- const { system, messages } = toAnthropicMessages(options.prompt);
1300
+ const systemCacheControl = resolveAnthropicCacheControlBlock(
1301
+ options.cacheControl?.system,
1302
+ );
1303
+ const toolsCacheControl = resolveAnthropicCacheControlBlock(
1304
+ options.cacheControl?.tools,
1305
+ );
1306
+
1307
+ const { system, messages } = toAnthropicMessages(options.prompt, systemCacheControl);
1308
+ const anthropicTools = toAnthropicTools(options.tools, toolsCacheControl);
1309
+ const thinkingBudget = resolveAnthropicThinkingBudget(options.reasoning);
1310
+ const thinkingEnabled = thinkingBudget !== undefined;
1311
+
1312
+ // Anthropic doesn't support these unified options at all — emit warnings
1313
+ // so callers don't quietly pass values that have zero effect.
1314
+ if (options.presencePenalty !== undefined) {
1315
+ warnings.push({
1316
+ type: "unsupported-setting",
1317
+ provider: "anthropic",
1318
+ setting: "presencePenalty",
1319
+ details: "Anthropic Messages API has no equivalent and the value was dropped.",
1320
+ });
1321
+ }
1322
+ if (options.frequencyPenalty !== undefined) {
1323
+ warnings.push({
1324
+ type: "unsupported-setting",
1325
+ provider: "anthropic",
1326
+ setting: "frequencyPenalty",
1327
+ details: "Anthropic Messages API has no equivalent and the value was dropped.",
1328
+ });
1329
+ }
1330
+ if (options.seed !== undefined) {
1331
+ warnings.push({
1332
+ type: "unsupported-setting",
1333
+ provider: "anthropic",
1334
+ setting: "seed",
1335
+ details: "Anthropic Messages API does not support deterministic seeding.",
1336
+ });
1337
+ }
1338
+ if (options.topK !== undefined) {
1339
+ warnings.push({
1340
+ type: "unsupported-setting",
1341
+ provider: "anthropic",
1342
+ setting: "topK",
1343
+ details: "Anthropic Messages API does not expose top_k on this surface.",
1344
+ });
1345
+ }
1346
+ if (
1347
+ options.stopSequences && options.stopSequences.length > 4
1348
+ ) {
1349
+ warnings.push({
1350
+ type: "unsupported-setting",
1351
+ provider: "anthropic",
1352
+ setting: "stopSequences",
1353
+ details:
1354
+ `Anthropic accepts at most 4 stop sequences; ${options.stopSequences.length} were provided and the extras were truncated.`,
1355
+ });
1356
+ }
1357
+ if (thinkingEnabled && options.temperature !== undefined) {
1358
+ warnings.push({
1359
+ type: "unsupported-setting",
1360
+ provider: "anthropic",
1361
+ setting: "temperature",
1362
+ details:
1363
+ "Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
1364
+ });
1365
+ }
1366
+ if (thinkingEnabled && options.topP !== undefined) {
1367
+ warnings.push({
1368
+ type: "unsupported-setting",
1369
+ provider: "anthropic",
1370
+ setting: "topP",
1371
+ details:
1372
+ "Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
1373
+ });
1374
+ }
1375
+ if (options.responseFormat && options.responseFormat.type !== "text") {
1376
+ warnings.push({
1377
+ type: "unsupported-setting",
1378
+ provider: "anthropic",
1379
+ setting: "responseFormat",
1380
+ details:
1381
+ "Anthropic Messages API does not have a structured-output response_format equivalent. Use a tool with the schema as input_schema instead.",
1382
+ });
1383
+ }
1384
+
1385
+ // Anthropic requires max_tokens > budget_tokens when thinking is enabled.
1386
+ // Growing max_tokens by the thinking budget preserves the caller's intended
1387
+ // output budget, and we clamp the sum at the model's advertised maximum so
1388
+ // the request never exceeds the API's hard cap.
1389
+ const baseMaxTokens = resolveAnthropicMaxTokens(modelId, options.maxOutputTokens);
1390
+ const maxTokens = thinkingEnabled
1391
+ ? Math.min(
1392
+ baseMaxTokens + (thinkingBudget ?? 0),
1393
+ getAnthropicModelCapabilities(modelId).maxOutputTokens,
1394
+ )
1395
+ : baseMaxTokens;
1396
+
727
1397
  const body: AnthropicCompatibleRequest = {
728
1398
  model: modelId,
729
1399
  messages,
730
- max_tokens: resolveAnthropicMaxTokens(modelId, options.maxOutputTokens),
1400
+ max_tokens: maxTokens,
731
1401
  ...(stream ? { stream: true } : {}),
732
1402
  ...(system ? { system } : {}),
733
- ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
734
- ...(options.topP !== undefined ? { top_p: options.topP } : {}),
1403
+ // Sampling params are mutually exclusive with thinking on Anthropic — the
1404
+ // API rejects the combo outright. Drop them silently when thinking is on
1405
+ // (callers see thinking's output instead of what they'd have gotten from
1406
+ // custom sampling, which is the documented tradeoff).
1407
+ ...(!thinkingEnabled && options.temperature !== undefined
1408
+ ? { temperature: options.temperature }
1409
+ : {}),
1410
+ ...(!thinkingEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
735
1411
  ...(options.stopSequences && options.stopSequences.length > 0
736
- ? { stop_sequences: options.stopSequences }
1412
+ ? { stop_sequences: options.stopSequences.slice(0, 4) }
737
1413
  : {}),
738
- ...(toAnthropicTools(options.tools) ? { tools: toAnthropicTools(options.tools) } : {}),
1414
+ ...(anthropicTools ? { tools: anthropicTools } : {}),
739
1415
  ...(options.toolChoice !== undefined
740
1416
  ? { tool_choice: normalizeAnthropicToolChoice(options.toolChoice) }
741
1417
  : {}),
1418
+ ...(thinkingEnabled ? { thinking: { type: "enabled", budget_tokens: thinkingBudget } } : {}),
1419
+ ...(typeof options.userId === "string" && options.userId.length > 0
1420
+ ? { metadata: { user_id: options.userId } }
1421
+ : {}),
1422
+ ...(options.mcpServers && options.mcpServers.length > 0
1423
+ ? { mcp_servers: deepSnakeCase(options.mcpServers) as unknown[] }
1424
+ : {}),
1425
+ ...(options.anthropicContainer !== undefined ? { container: options.anthropicContainer } : {}),
742
1426
  };
743
1427
 
744
1428
  Object.assign(body, readProviderOptions(options.providerOptions, "anthropic", providerName));
745
1429
  return body;
746
1430
  }
747
1431
 
1432
+ type AnthropicReasoningContent = {
1433
+ type: "reasoning";
1434
+ text?: string;
1435
+ signature?: string;
1436
+ redactedData?: string;
1437
+ };
1438
+
1439
+ type AnthropicCitation = {
1440
+ type: string;
1441
+ citedText?: string;
1442
+ url?: string;
1443
+ title?: string;
1444
+ startCharIndex?: number;
1445
+ endCharIndex?: number;
1446
+ startBlockIndex?: number;
1447
+ endBlockIndex?: number;
1448
+ startPageNumber?: number;
1449
+ endPageNumber?: number;
1450
+ documentIndex?: number;
1451
+ documentTitle?: string;
1452
+ };
1453
+
1454
+ type AnthropicTextContent = {
1455
+ type: "text";
1456
+ text: string;
1457
+ citations?: AnthropicCitation[];
1458
+ };
1459
+
1460
+ /**
1461
+ * Best-effort camelCase normalization of a single Anthropic citation
1462
+ * record. Handles the union of fields across web_search_result_location,
1463
+ * web_fetch_result_location, char_location, page_location, and
1464
+ * content_block_location citation kinds — see
1465
+ * https://docs.claude.com/en/docs/build-with-claude/citations
1466
+ */
1467
+ function normalizeAnthropicCitation(raw: unknown): AnthropicCitation | undefined {
1468
+ const r = readRecord(raw);
1469
+ if (!r) return undefined;
1470
+ const typeStr = typeof r.type === "string" ? r.type : undefined;
1471
+ if (!typeStr) return undefined;
1472
+ const out: AnthropicCitation = { type: typeStr };
1473
+ if (typeof r.cited_text === "string") out.citedText = r.cited_text;
1474
+ if (typeof r.url === "string") out.url = r.url;
1475
+ if (typeof r.title === "string") out.title = r.title;
1476
+ if (typeof r.start_char_index === "number") out.startCharIndex = r.start_char_index;
1477
+ if (typeof r.end_char_index === "number") out.endCharIndex = r.end_char_index;
1478
+ if (typeof r.start_block_index === "number") out.startBlockIndex = r.start_block_index;
1479
+ if (typeof r.end_block_index === "number") out.endBlockIndex = r.end_block_index;
1480
+ if (typeof r.start_page_number === "number") out.startPageNumber = r.start_page_number;
1481
+ if (typeof r.end_page_number === "number") out.endPageNumber = r.end_page_number;
1482
+ if (typeof r.document_index === "number") out.documentIndex = r.document_index;
1483
+ if (typeof r.document_title === "string") out.documentTitle = r.document_title;
1484
+ return out;
1485
+ }
1486
+
748
1487
  function buildAnthropicGenerateResult(payload: unknown): {
749
1488
  content: Array<
750
- | { type: "text"; text: string }
1489
+ | AnthropicTextContent
1490
+ | AnthropicReasoningContent
751
1491
  | { type: "tool-call"; toolCallId: string; toolName: string; input: string }
752
1492
  | { type: "tool-result"; toolCallId: string; toolName: string; result: unknown }
753
1493
  >;
754
1494
  finishReason?: string | { unified: string; raw: string } | null;
755
- usage?: { inputTokens?: number; outputTokens?: number; totalTokens?: number };
1495
+ usage?: RuntimeUsage;
756
1496
  } {
757
1497
  const record = readRecord(payload);
758
1498
  const content = Array.isArray(record?.content) ? record.content : [];
759
1499
  const normalized: Array<
760
- | { type: "text"; text: string }
1500
+ | AnthropicTextContent
1501
+ | AnthropicReasoningContent
761
1502
  | { type: "tool-call"; toolCallId: string; toolName: string; input: string }
762
1503
  | { type: "tool-result"; toolCallId: string; toolName: string; result: unknown }
763
1504
  > = [];
@@ -767,7 +1508,42 @@ function buildAnthropicGenerateResult(payload: unknown): {
767
1508
  const blockType = typeof block?.type === "string" ? block.type : undefined;
768
1509
 
769
1510
  if (blockType === "text" && typeof block?.text === "string" && block.text.length > 0) {
770
- normalized.push({ type: "text", text: block.text });
1511
+ const citationsRaw = Array.isArray(block.citations) ? block.citations : undefined;
1512
+ const citations = citationsRaw
1513
+ ?.flatMap((c) => {
1514
+ const normalizedCitation = normalizeAnthropicCitation(c);
1515
+ return normalizedCitation ? [normalizedCitation] : [];
1516
+ });
1517
+ normalized.push({
1518
+ type: "text",
1519
+ text: block.text,
1520
+ ...(citations && citations.length > 0 ? { citations } : {}),
1521
+ });
1522
+ continue;
1523
+ }
1524
+
1525
+ // Thinking blocks carry the cleartext trace plus a signature that
1526
+ // Anthropic uses to verify on subsequent turns. Surfacing both lets
1527
+ // callers persist them as `reasoning` content parts and replay on
1528
+ // the next turn so Claude can continue from the same thinking.
1529
+ if (blockType === "thinking") {
1530
+ normalized.push({
1531
+ type: "reasoning",
1532
+ ...(typeof block?.thinking === "string" ? { text: block.thinking } : {}),
1533
+ ...(typeof block?.signature === "string" ? { signature: block.signature } : {}),
1534
+ });
1535
+ continue;
1536
+ }
1537
+
1538
+ // Redacted thinking blocks arrive when Claude's safety classifier
1539
+ // hides the trace. Pass the encrypted blob through opaquely so the
1540
+ // caller can replay it on the next turn (Anthropic still needs the
1541
+ // blob to verify continuity even though it can't read it).
1542
+ if (blockType === "redacted_thinking" && typeof block?.data === "string") {
1543
+ normalized.push({
1544
+ type: "reasoning",
1545
+ redactedData: block.data,
1546
+ });
771
1547
  continue;
772
1548
  }
773
1549
 
@@ -857,7 +1633,7 @@ async function* streamAnthropicCompatibleParts(
857
1633
  const toolCalls = new Map<number, AnthropicStreamToolCallState>();
858
1634
  const reasoningBlocks = new Map<number, AnthropicStreamReasoningState>();
859
1635
  let finishReason: string | { unified: string; raw: string } | null = null;
860
- let usage: { inputTokens?: number; outputTokens?: number; totalTokens?: number } | undefined;
1636
+ let usage: RuntimeUsage | undefined;
861
1637
 
862
1638
  for await (const chunk of stream) {
863
1639
  buffer += decoder.decode(chunk, { stream: true });
@@ -909,6 +1685,20 @@ async function* streamAnthropicCompatibleParts(
909
1685
  continue;
910
1686
  }
911
1687
 
1688
+ // Redacted thinking blocks arrive as opaque encrypted payloads when
1689
+ // Claude's safety classifier flags the reasoning trace. Surface them
1690
+ // as a zero-length reasoning block so callers know thinking happened
1691
+ // without leaking the (legitimately hidden) contents.
1692
+ if (blockType === "redacted_thinking") {
1693
+ const reasoningId = `thinking-${index}`;
1694
+ reasoningBlocks.set(index, { id: reasoningId });
1695
+ yield {
1696
+ type: "reasoning-start",
1697
+ id: reasoningId,
1698
+ };
1699
+ continue;
1700
+ }
1701
+
912
1702
  if (
913
1703
  (blockType === "tool_use" || blockType === "server_tool_use") &&
914
1704
  typeof contentBlock?.id === "string" &&
@@ -1094,9 +1884,7 @@ function normalizeOpenAIFinishReason(
1094
1884
  return raw;
1095
1885
  }
1096
1886
 
1097
- function extractOpenAIUsage(payload: unknown):
1098
- | { inputTokens?: number; outputTokens?: number; totalTokens?: number }
1099
- | undefined {
1887
+ function extractOpenAIUsage(payload: unknown): RuntimeUsage | undefined {
1100
1888
  const record = readRecord(payload);
1101
1889
  const usage = readRecord(record?.usage);
1102
1890
  if (!usage) {
@@ -1106,11 +1894,14 @@ function extractOpenAIUsage(payload: unknown):
1106
1894
  const inputTokens = usage.prompt_tokens;
1107
1895
  const outputTokens = usage.completion_tokens;
1108
1896
  const totalTokens = usage.total_tokens;
1897
+ const promptTokensDetails = readRecord(usage.prompt_tokens_details);
1898
+ const cachedTokens = promptTokensDetails?.cached_tokens;
1109
1899
 
1110
1900
  return {
1111
1901
  inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
1112
1902
  outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
1113
1903
  totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
1904
+ ...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
1114
1905
  };
1115
1906
  }
1116
1907
 
@@ -1165,19 +1956,95 @@ function extractOpenAIToolCalls(message: Record<string, unknown>): Array<{
1165
1956
  return normalized;
1166
1957
  }
1167
1958
 
1959
+ /**
1960
+ * OpenAI reasoning models (o1 / o3 / o4 family) use the completion path but
1961
+ * have different constraints than chat models: sampling params are rejected,
1962
+ * and they accept a `reasoning_effort` field. We detect them by model id
1963
+ * prefix so callers don't have to configure it per runtime.
1964
+ */
1965
+ function isOpenAIReasoningModel(modelId: string): boolean {
1966
+ return /^o[134](-|$)/.test(modelId);
1967
+ }
1968
+
1969
+ /**
1970
+ * Map the unified reasoning effort to OpenAI's `reasoning_effort` enum.
1971
+ * OpenAI doesn't accept "max" — we collapse it to "high".
1972
+ */
1973
+ function resolveOpenAIReasoningEffort(
1974
+ option: ProviderReasoningOption | undefined,
1975
+ ): "low" | "medium" | "high" | undefined {
1976
+ if (!option || option.enabled !== true) {
1977
+ return undefined;
1978
+ }
1979
+ switch (option.effort) {
1980
+ case "low":
1981
+ return "low";
1982
+ case "high":
1983
+ case "max":
1984
+ return "high";
1985
+ case "medium":
1986
+ default:
1987
+ return "medium";
1988
+ }
1989
+ }
1990
+
1168
1991
  function buildOpenAIChatRequest(
1169
1992
  modelId: string,
1170
1993
  providerName: string,
1171
1994
  options: OpenAICompatibleLanguageOptions,
1172
1995
  stream: boolean,
1996
+ warnings: WarningCollector,
1173
1997
  ): OpenAICompatibleChatRequest {
1998
+ const isReasoningModel = isOpenAIReasoningModel(modelId);
1999
+ const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
2000
+ const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
2001
+
2002
+ // OpenAI Chat Completions has no top_k surface (it's exposed only on the
2003
+ // Responses API for some reasoning models). Quietly accepting it would
2004
+ // mislead callers into thinking it took effect.
2005
+ if (options.topK !== undefined) {
2006
+ warnings.push({
2007
+ type: "unsupported-setting",
2008
+ provider: "openai",
2009
+ setting: "topK",
2010
+ details: "OpenAI Chat Completions does not expose top_k; the value was dropped.",
2011
+ });
2012
+ }
2013
+
2014
+ // Reasoning models (o1 / o3 / o4) reject sampling params outright. Emit
2015
+ // warnings at build time so callers see *why* the value didn't apply
2016
+ // rather than a 400 from the API.
2017
+ if (reasoningEnabled) {
2018
+ const dropped: Array<[keyof typeof options, string]> = [
2019
+ ["temperature", "temperature"],
2020
+ ["topP", "top_p"],
2021
+ ["presencePenalty", "presence_penalty"],
2022
+ ["frequencyPenalty", "frequency_penalty"],
2023
+ ];
2024
+ for (const [key, openaiName] of dropped) {
2025
+ if (options[key] !== undefined) {
2026
+ warnings.push({
2027
+ type: "unsupported-setting",
2028
+ provider: "openai",
2029
+ setting: key,
2030
+ details:
2031
+ `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
2032
+ });
2033
+ }
2034
+ }
2035
+ }
2036
+
1174
2037
  const body: OpenAICompatibleChatRequest = {
1175
2038
  model: modelId,
1176
2039
  messages: toOpenAICompatibleMessages(options.prompt),
1177
2040
  ...(stream ? { stream: true, stream_options: { include_usage: true } } : {}),
1178
2041
  ...(options.maxOutputTokens !== undefined ? { max_tokens: options.maxOutputTokens } : {}),
1179
- ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
1180
- ...(options.topP !== undefined ? { top_p: options.topP } : {}),
2042
+ // OpenAI reasoning models reject temperature / top_p / frequency / presence.
2043
+ // Drop them silently rather than letting the API bounce the request.
2044
+ ...(!reasoningEnabled && options.temperature !== undefined
2045
+ ? { temperature: options.temperature }
2046
+ : {}),
2047
+ ...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
1181
2048
  ...(options.stopSequences && options.stopSequences.length > 0
1182
2049
  ? { stop: options.stopSequences }
1183
2050
  : {}),
@@ -1186,10 +2053,37 @@ function buildOpenAIChatRequest(
1186
2053
  : {}),
1187
2054
  ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
1188
2055
  ...(options.seed !== undefined ? { seed: options.seed } : {}),
1189
- ...(options.presencePenalty !== undefined ? { presence_penalty: options.presencePenalty } : {}),
1190
- ...(options.frequencyPenalty !== undefined
2056
+ ...(!reasoningEnabled && options.presencePenalty !== undefined
2057
+ ? { presence_penalty: options.presencePenalty }
2058
+ : {}),
2059
+ ...(!reasoningEnabled && options.frequencyPenalty !== undefined
1191
2060
  ? { frequency_penalty: options.frequencyPenalty }
1192
2061
  : {}),
2062
+ ...(reasoningEffort !== undefined ? { reasoning_effort: reasoningEffort } : {}),
2063
+ ...(typeof options.userId === "string" && options.userId.length > 0
2064
+ ? { user: options.userId }
2065
+ : {}),
2066
+ ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
2067
+ ...(options.parallelToolCalls !== undefined
2068
+ ? { parallel_tool_calls: options.parallelToolCalls }
2069
+ : {}),
2070
+ ...(options.responseFormat && options.responseFormat.type !== "text"
2071
+ ? {
2072
+ response_format: options.responseFormat.type === "json" ? { type: "json_object" } : {
2073
+ type: "json_schema",
2074
+ json_schema: {
2075
+ name: options.responseFormat.name,
2076
+ ...(typeof options.responseFormat.description === "string"
2077
+ ? { description: options.responseFormat.description }
2078
+ : {}),
2079
+ schema: unwrapToolInputSchema(options.responseFormat.schema),
2080
+ ...(options.responseFormat.strict !== undefined
2081
+ ? { strict: options.responseFormat.strict }
2082
+ : {}),
2083
+ },
2084
+ },
2085
+ }
2086
+ : {}),
1193
2087
  };
1194
2088
 
1195
2089
  Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
@@ -1216,9 +2110,7 @@ function normalizeGoogleFinishReason(
1216
2110
  }
1217
2111
  }
1218
2112
 
1219
- function extractGoogleUsage(payload: unknown):
1220
- | { inputTokens?: number; outputTokens?: number; totalTokens?: number }
1221
- | undefined {
2113
+ function extractGoogleUsage(payload: unknown): RuntimeUsage | undefined {
1222
2114
  const record = readRecord(payload);
1223
2115
  const usage = readRecord(record?.usageMetadata);
1224
2116
  if (!usage) {
@@ -1228,11 +2120,15 @@ function extractGoogleUsage(payload: unknown):
1228
2120
  const inputTokens = usage.promptTokenCount;
1229
2121
  const outputTokens = usage.candidatesTokenCount;
1230
2122
  const totalTokens = usage.totalTokenCount;
2123
+ const cachedContentTokenCount = usage.cachedContentTokenCount;
1231
2124
 
1232
2125
  return {
1233
2126
  inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
1234
2127
  outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
1235
2128
  totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
2129
+ ...(typeof cachedContentTokenCount === "number"
2130
+ ? { cacheReadInputTokens: cachedContentTokenCount }
2131
+ : {}),
1236
2132
  };
1237
2133
  }
1238
2134
 
@@ -1258,20 +2154,29 @@ function toGoogleContents(
1258
2154
  parts: [{ text: readTextParts(message.content) }],
1259
2155
  });
1260
2156
  break;
1261
- case "assistant":
1262
- contents.push({
1263
- role: "model",
1264
- parts: message.content.map((part) =>
1265
- part.type === "text" ? { text: part.text } : {
1266
- functionCall: {
1267
- id: part.toolCallId,
1268
- name: part.toolName,
1269
- args: part.input,
1270
- },
1271
- }
1272
- ),
1273
- });
2157
+ case "assistant": {
2158
+ // Anthropic-only `reasoning` parts have no Gemini equivalent
2159
+ // and are dropped on replay.
2160
+ const parts: Array<Record<string, unknown>> = [];
2161
+ for (const part of message.content) {
2162
+ if (part.type === "text") {
2163
+ parts.push({ text: part.text });
2164
+ continue;
2165
+ }
2166
+ if (part.type === "reasoning") {
2167
+ continue;
2168
+ }
2169
+ parts.push({
2170
+ functionCall: {
2171
+ id: part.toolCallId,
2172
+ name: part.toolName,
2173
+ args: part.input,
2174
+ },
2175
+ });
2176
+ }
2177
+ contents.push({ role: "model", parts });
1274
2178
  break;
2179
+ }
1275
2180
  case "tool":
1276
2181
  contents.push({
1277
2182
  role: "user",
@@ -1299,22 +2204,45 @@ function toGoogleContents(
1299
2204
 
1300
2205
  function toGoogleTools(
1301
2206
  tools: RuntimeToolDefinition[] | undefined,
1302
- ): GoogleCompatibleRequest["tools"] | undefined {
2207
+ ): Array<Record<string, unknown>> | undefined {
1303
2208
  if (!tools) {
1304
2209
  return undefined;
1305
2210
  }
1306
2211
 
1307
- const functionDeclarations = tools.flatMap((tool) =>
1308
- tool.type === "function"
1309
- ? [{
2212
+ const functionDeclarations: Array<Record<string, unknown>> = [];
2213
+ const providerEntries: Array<Record<string, unknown>> = [];
2214
+
2215
+ for (const tool of tools) {
2216
+ if (tool.type === "function") {
2217
+ functionDeclarations.push({
1310
2218
  name: tool.name,
1311
2219
  ...(typeof tool.description === "string" ? { description: tool.description } : {}),
1312
2220
  parameters: unwrapToolInputSchema(tool.inputSchema),
1313
- }]
1314
- : []
1315
- );
2221
+ });
2222
+ continue;
2223
+ }
1316
2224
 
1317
- return functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined;
2225
+ // Gemini provider tools code_execution, google_search,
2226
+ // google_search_retrieval — each lives in its own tools[] entry
2227
+ // with a single key keyed by the camelCase tool name and an
2228
+ // optional config payload (caller-provided tool.args).
2229
+ if (!tool.id.startsWith("google.")) {
2230
+ continue;
2231
+ }
2232
+ const providerType = tool.id.slice("google.".length);
2233
+ if (providerType.length === 0) {
2234
+ continue;
2235
+ }
2236
+ const camelKey = providerType.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
2237
+ providerEntries.push({ [camelKey]: tool.args ?? {} });
2238
+ }
2239
+
2240
+ const result: Array<Record<string, unknown>> = [];
2241
+ if (functionDeclarations.length > 0) {
2242
+ result.push({ functionDeclarations });
2243
+ }
2244
+ result.push(...providerEntries);
2245
+ return result.length > 0 ? result : undefined;
1318
2246
  }
1319
2247
 
1320
2248
  function unwrapToolInputSchema(inputSchema: unknown): unknown {
@@ -1346,7 +2274,11 @@ function normalizeGoogleToolChoice(toolChoice: unknown):
1346
2274
  }
1347
2275
 
1348
2276
  const record = readRecord(toolChoice);
1349
- if (record?.type === "tool" && typeof record.name === "string") {
2277
+ if (!record) return undefined;
2278
+
2279
+ // Single-tool restriction: { type: "tool", name } — pin to one
2280
+ // function via mode: ANY + allowedFunctionNames: [name].
2281
+ if (record.type === "tool" && typeof record.name === "string") {
1350
2282
  return {
1351
2283
  functionCallingConfig: {
1352
2284
  mode: "ANY",
@@ -1355,12 +2287,74 @@ function normalizeGoogleToolChoice(toolChoice: unknown):
1355
2287
  };
1356
2288
  }
1357
2289
 
2290
+ // Multi-tool restriction: { type: "tools", names: string[] } — pin
2291
+ // to a subset via mode: ANY + the full allowedFunctionNames array.
2292
+ if (record.type === "tools" && Array.isArray(record.names)) {
2293
+ const names = record.names.filter((n): n is string => typeof n === "string");
2294
+ if (names.length > 0) {
2295
+ return {
2296
+ functionCallingConfig: {
2297
+ mode: "ANY",
2298
+ allowedFunctionNames: names,
2299
+ },
2300
+ };
2301
+ }
2302
+ }
2303
+
2304
+ // Explicit mode forms: { type: "auto" | "none" | "any" }.
2305
+ if (record.type === "auto") {
2306
+ return { functionCallingConfig: { mode: "AUTO" } };
2307
+ }
2308
+ if (record.type === "none") {
2309
+ return { functionCallingConfig: { mode: "NONE" } };
2310
+ }
2311
+ if (record.type === "any" || record.type === "required") {
2312
+ return { functionCallingConfig: { mode: "ANY" } };
2313
+ }
2314
+
1358
2315
  return undefined;
1359
2316
  }
1360
2317
 
2318
+ /**
2319
+ * Map the unified reasoning option to Gemini's thinkingConfig. Gemini 2.5+
2320
+ * accepts `includeThoughts: true` to stream back `thought` parts, and
2321
+ * `thinkingBudget: N` to cap the thinking token count. The effort levels
2322
+ * here follow Google's own guidance (low ~= 512, medium ~= 2048,
2323
+ * high ~= 8192, max = -1 means "dynamic/no cap").
2324
+ */
2325
+ function resolveGoogleThinkingConfig(
2326
+ option: ProviderReasoningOption | undefined,
2327
+ ): Record<string, unknown> | undefined {
2328
+ if (!option || option.enabled !== true) {
2329
+ return undefined;
2330
+ }
2331
+ const config: Record<string, unknown> = { includeThoughts: true };
2332
+ if (typeof option.budgetTokens === "number") {
2333
+ config.thinkingBudget = option.budgetTokens;
2334
+ return config;
2335
+ }
2336
+ switch (option.effort) {
2337
+ case "low":
2338
+ config.thinkingBudget = 512;
2339
+ break;
2340
+ case "high":
2341
+ config.thinkingBudget = 8192;
2342
+ break;
2343
+ case "max":
2344
+ config.thinkingBudget = -1;
2345
+ break;
2346
+ case "medium":
2347
+ default:
2348
+ config.thinkingBudget = 2048;
2349
+ break;
2350
+ }
2351
+ return config;
2352
+ }
2353
+
1361
2354
  function buildGoogleGenerationConfig(
1362
2355
  options: OpenAICompatibleLanguageOptions,
1363
2356
  ): Record<string, unknown> | undefined {
2357
+ const thinkingConfig = resolveGoogleThinkingConfig(options.reasoning);
1364
2358
  const config: Record<string, unknown> = {
1365
2359
  ...(options.maxOutputTokens !== undefined ? { maxOutputTokens: options.maxOutputTokens } : {}),
1366
2360
  ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
@@ -1370,6 +2364,7 @@ function buildGoogleGenerationConfig(
1370
2364
  ? { stopSequences: options.stopSequences }
1371
2365
  : {}),
1372
2366
  ...(options.seed !== undefined ? { seed: options.seed } : {}),
2367
+ ...(thinkingConfig ? { thinkingConfig } : {}),
1373
2368
  };
1374
2369
 
1375
2370
  return Object.keys(config).length > 0 ? config : undefined;
@@ -1378,8 +2373,47 @@ function buildGoogleGenerationConfig(
1378
2373
  function buildGoogleGenerateContentRequest(
1379
2374
  providerName: string,
1380
2375
  options: OpenAICompatibleLanguageOptions,
2376
+ warnings: WarningCollector,
1381
2377
  ): GoogleCompatibleRequest {
2378
+ // Google generate-content surface doesn't accept presence/frequency
2379
+ // penalties on most current models. Emit warnings and let the request
2380
+ // through without them.
2381
+ if (options.presencePenalty !== undefined) {
2382
+ warnings.push({
2383
+ type: "unsupported-setting",
2384
+ provider: "google",
2385
+ setting: "presencePenalty",
2386
+ details: "Gemini generateContent does not accept presencePenalty; the value was dropped.",
2387
+ });
2388
+ }
2389
+ if (options.frequencyPenalty !== undefined) {
2390
+ warnings.push({
2391
+ type: "unsupported-setting",
2392
+ provider: "google",
2393
+ setting: "frequencyPenalty",
2394
+ details: "Gemini generateContent does not accept frequencyPenalty; the value was dropped.",
2395
+ });
2396
+ }
2397
+ if (options.responseFormat && options.responseFormat.type !== "text") {
2398
+ warnings.push({
2399
+ type: "unsupported-setting",
2400
+ provider: "google",
2401
+ setting: "responseFormat",
2402
+ details:
2403
+ "Gemini uses generationConfig.responseMimeType + responseSchema for structured outputs, which is a separate surface and not yet wired through this option.",
2404
+ });
2405
+ }
2406
+
1382
2407
  const { systemInstruction, contents } = toGoogleContents(options.prompt);
2408
+ const generationConfig = buildGoogleGenerationConfig(options);
2409
+ // requestLabels wins over userId-derived labels: when callers explicitly
2410
+ // provide a label map, that's the source of truth. Otherwise fall back
2411
+ // to {user_id} derived from the unified userId option.
2412
+ const labels = options.requestLabels && Object.keys(options.requestLabels).length > 0
2413
+ ? options.requestLabels
2414
+ : typeof options.userId === "string" && options.userId.length > 0
2415
+ ? { user_id: options.userId }
2416
+ : undefined;
1383
2417
  const body: GoogleCompatibleRequest = {
1384
2418
  contents,
1385
2419
  ...(systemInstruction ? { systemInstruction } : {}),
@@ -1387,8 +2421,13 @@ function buildGoogleGenerateContentRequest(
1387
2421
  ...(normalizeGoogleToolChoice(options.toolChoice)
1388
2422
  ? { toolConfig: normalizeGoogleToolChoice(options.toolChoice) }
1389
2423
  : {}),
1390
- ...(buildGoogleGenerationConfig(options)
1391
- ? { generationConfig: buildGoogleGenerationConfig(options) }
2424
+ ...(generationConfig ? { generationConfig } : {}),
2425
+ ...(labels ? { labels } : {}),
2426
+ ...(typeof options.googleCachedContent === "string" && options.googleCachedContent.length > 0
2427
+ ? { cachedContent: options.googleCachedContent }
2428
+ : {}),
2429
+ ...(options.googleSafetySettings && options.googleSafetySettings.length > 0
2430
+ ? { safetySettings: options.googleSafetySettings }
1392
2431
  : {}),
1393
2432
  };
1394
2433
 
@@ -1426,7 +2465,8 @@ function buildGoogleGenerateResult(payload: unknown): {
1426
2465
  | { type: "tool-call"; toolCallId: string; toolName: string; input: string }
1427
2466
  >;
1428
2467
  finishReason?: string | { unified: string; raw: string } | null;
1429
- usage?: { inputTokens?: number; outputTokens?: number; totalTokens?: number };
2468
+ usage?: RuntimeUsage;
2469
+ groundingMetadata?: Record<string, unknown>;
1430
2470
  } {
1431
2471
  const parts = extractGoogleCandidateParts(payload);
1432
2472
  const content: Array<
@@ -1451,10 +2491,19 @@ function buildGoogleGenerateResult(payload: unknown): {
1451
2491
  }
1452
2492
  }
1453
2493
 
2494
+ // Gemini grounding (google_search / google_search_retrieval) returns
2495
+ // a per-candidate groundingMetadata object with web search queries,
2496
+ // grounding chunks, and citation indices into the response text.
2497
+ // Pass it through opaquely so callers can render footnotes / source
2498
+ // chips / "Search results" UI without parsing the wire shape.
2499
+ const candidate = extractFirstGoogleCandidate(payload);
2500
+ const groundingMetadata = readRecord(candidate?.groundingMetadata);
2501
+
1454
2502
  return {
1455
2503
  content,
1456
- finishReason: normalizeGoogleFinishReason(extractFirstGoogleCandidate(payload)?.finishReason),
2504
+ finishReason: normalizeGoogleFinishReason(candidate?.finishReason),
1457
2505
  usage: extractGoogleUsage(payload),
2506
+ ...(groundingMetadata ? { groundingMetadata } : {}),
1458
2507
  };
1459
2508
  }
1460
2509
 
@@ -1467,7 +2516,7 @@ async function* streamGoogleCompatibleParts(
1467
2516
  let reasoningId: string | null = null;
1468
2517
  let reasoningIndex = 0;
1469
2518
  let finishReason: string | { unified: string; raw: string } | null = null;
1470
- let usage: { inputTokens?: number; outputTokens?: number; totalTokens?: number } | undefined;
2519
+ let usage: RuntimeUsage | undefined;
1471
2520
 
1472
2521
  for await (const chunk of stream) {
1473
2522
  buffer += decoder.decode(chunk, { stream: true });
@@ -1599,7 +2648,7 @@ function buildOpenAIGenerateResult(payload: unknown): {
1599
2648
  }
1600
2649
  >;
1601
2650
  finishReason?: string | { unified: string; raw: string } | null;
1602
- usage?: { inputTokens?: number; outputTokens?: number; totalTokens?: number };
2651
+ usage?: RuntimeUsage;
1603
2652
  } {
1604
2653
  const choice = extractFirstChoice(payload);
1605
2654
  const message = readRecord(choice?.message);
@@ -1630,7 +2679,7 @@ async function* streamOpenAICompatibleParts(
1630
2679
  let reasoningId: string | null = null;
1631
2680
  let reasoningIndex = 0;
1632
2681
  let finishReason: string | { unified: string; raw: string } | null = null;
1633
- let usage: { inputTokens?: number; outputTokens?: number; totalTokens?: number } | undefined;
2682
+ let usage: RuntimeUsage | undefined;
1634
2683
 
1635
2684
  for await (const chunk of stream) {
1636
2685
  buffer += decoder.decode(chunk, { stream: true });
@@ -1788,11 +2837,19 @@ export function createOpenAIModelRuntime(
1788
2837
  doGenerate(optionsForRuntime: unknown) {
1789
2838
  const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
1790
2839
  const url = getOpenAIChatCompletionsUrl(config.baseURL);
1791
- const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, false);
2840
+ const warnings = createWarningCollector();
2841
+ const body = buildOpenAIChatRequest(
2842
+ modelId,
2843
+ config.name ?? "openai",
2844
+ options,
2845
+ false,
2846
+ warnings,
2847
+ );
1792
2848
  return requestJson({
1793
2849
  url,
1794
2850
  fetchImpl,
1795
2851
  providerLabel: config.name ?? "openai",
2852
+ providerKind: "openai",
1796
2853
  init: {
1797
2854
  method: "POST",
1798
2855
  headers: createRequestHeaders({
@@ -1803,16 +2860,30 @@ export function createOpenAIModelRuntime(
1803
2860
  body: JSON.stringify(body),
1804
2861
  signal: options.abortSignal,
1805
2862
  },
1806
- }).then(buildOpenAIGenerateResult);
2863
+ }).then((payload) => {
2864
+ const drained = warnings.drain();
2865
+ return {
2866
+ ...buildOpenAIGenerateResult(payload),
2867
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2868
+ };
2869
+ });
1807
2870
  },
1808
2871
  doStream(optionsForRuntime: unknown) {
1809
2872
  const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
1810
2873
  const url = getOpenAIChatCompletionsUrl(config.baseURL);
1811
- const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, true);
2874
+ const warnings = createWarningCollector();
2875
+ const body = buildOpenAIChatRequest(
2876
+ modelId,
2877
+ config.name ?? "openai",
2878
+ options,
2879
+ true,
2880
+ warnings,
2881
+ );
1812
2882
  return requestStream({
1813
2883
  url,
1814
2884
  fetchImpl,
1815
2885
  providerLabel: config.name ?? "openai",
2886
+ providerKind: "openai",
1816
2887
  init: {
1817
2888
  method: "POST",
1818
2889
  headers: createRequestHeaders({
@@ -1823,9 +2894,13 @@ export function createOpenAIModelRuntime(
1823
2894
  body: JSON.stringify(body),
1824
2895
  signal: options.abortSignal,
1825
2896
  },
1826
- }).then((responseStream) => ({
1827
- stream: ReadableStream.from(streamOpenAICompatibleParts(responseStream)),
1828
- }));
2897
+ }).then((responseStream) => {
2898
+ const drained = warnings.drain();
2899
+ return {
2900
+ stream: ReadableStream.from(streamOpenAICompatibleParts(responseStream)),
2901
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2902
+ };
2903
+ });
1829
2904
  },
1830
2905
  };
1831
2906
  }
@@ -1843,16 +2918,19 @@ export function createAnthropicModelRuntime(
1843
2918
  doGenerate(optionsForRuntime: unknown) {
1844
2919
  const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
1845
2920
  const url = getAnthropicMessagesUrl(config.baseURL);
2921
+ const warnings = createWarningCollector();
1846
2922
  const body = buildAnthropicMessagesRequest(
1847
2923
  modelId,
1848
2924
  config.name ?? "anthropic",
1849
2925
  options,
1850
2926
  false,
2927
+ warnings,
1851
2928
  );
1852
2929
  return requestJson({
1853
2930
  url,
1854
2931
  fetchImpl,
1855
2932
  providerLabel: config.name ?? "anthropic",
2933
+ providerKind: "anthropic",
1856
2934
  init: {
1857
2935
  method: "POST",
1858
2936
  headers: createAnthropicRequestHeaders({
@@ -1863,21 +2941,30 @@ export function createAnthropicModelRuntime(
1863
2941
  body: JSON.stringify(body),
1864
2942
  signal: options.abortSignal,
1865
2943
  },
1866
- }).then(buildAnthropicGenerateResult);
2944
+ }).then((payload) => {
2945
+ const drained = warnings.drain();
2946
+ return {
2947
+ ...buildAnthropicGenerateResult(payload),
2948
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2949
+ };
2950
+ });
1867
2951
  },
1868
2952
  doStream(optionsForRuntime: unknown) {
1869
2953
  const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
1870
2954
  const url = getAnthropicMessagesUrl(config.baseURL);
2955
+ const warnings = createWarningCollector();
1871
2956
  const body = buildAnthropicMessagesRequest(
1872
2957
  modelId,
1873
2958
  config.name ?? "anthropic",
1874
2959
  options,
1875
2960
  true,
2961
+ warnings,
1876
2962
  );
1877
2963
  return requestStream({
1878
2964
  url,
1879
2965
  fetchImpl,
1880
2966
  providerLabel: config.name ?? "anthropic",
2967
+ providerKind: "anthropic",
1881
2968
  init: {
1882
2969
  method: "POST",
1883
2970
  headers: createAnthropicRequestHeaders({
@@ -1888,9 +2975,13 @@ export function createAnthropicModelRuntime(
1888
2975
  body: JSON.stringify(body),
1889
2976
  signal: options.abortSignal,
1890
2977
  },
1891
- }).then((responseStream) => ({
1892
- stream: ReadableStream.from(streamAnthropicCompatibleParts(responseStream)),
1893
- }));
2978
+ }).then((responseStream) => {
2979
+ const drained = warnings.drain();
2980
+ return {
2981
+ stream: ReadableStream.from(streamAnthropicCompatibleParts(responseStream)),
2982
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2983
+ };
2984
+ });
1894
2985
  },
1895
2986
  };
1896
2987
  }
@@ -1908,11 +2999,17 @@ export function createGoogleModelRuntime(
1908
2999
  doGenerate(optionsForRuntime: unknown) {
1909
3000
  const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
1910
3001
  const url = getGoogleGenerateContentUrl(config.baseURL, modelId);
1911
- const body = buildGoogleGenerateContentRequest(config.name ?? "google", options);
3002
+ const warnings = createWarningCollector();
3003
+ const body = buildGoogleGenerateContentRequest(
3004
+ config.name ?? "google",
3005
+ options,
3006
+ warnings,
3007
+ );
1912
3008
  return requestJson({
1913
3009
  url,
1914
3010
  fetchImpl,
1915
3011
  providerLabel: config.name ?? "google",
3012
+ providerKind: "google",
1916
3013
  init: {
1917
3014
  method: "POST",
1918
3015
  headers: createRequestHeaders({
@@ -1923,16 +3020,28 @@ export function createGoogleModelRuntime(
1923
3020
  body: JSON.stringify(body),
1924
3021
  signal: options.abortSignal,
1925
3022
  },
1926
- }).then(buildGoogleGenerateResult);
3023
+ }).then((payload) => {
3024
+ const drained = warnings.drain();
3025
+ return {
3026
+ ...buildGoogleGenerateResult(payload),
3027
+ ...(drained.length > 0 ? { warnings: drained } : {}),
3028
+ };
3029
+ });
1927
3030
  },
1928
3031
  doStream(optionsForRuntime: unknown) {
1929
3032
  const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
1930
3033
  const url = getGoogleStreamGenerateContentUrl(config.baseURL, modelId);
1931
- const body = buildGoogleGenerateContentRequest(config.name ?? "google", options);
3034
+ const warnings = createWarningCollector();
3035
+ const body = buildGoogleGenerateContentRequest(
3036
+ config.name ?? "google",
3037
+ options,
3038
+ warnings,
3039
+ );
1932
3040
  return requestStream({
1933
3041
  url,
1934
3042
  fetchImpl,
1935
3043
  providerLabel: config.name ?? "google",
3044
+ providerKind: "google",
1936
3045
  init: {
1937
3046
  method: "POST",
1938
3047
  headers: createRequestHeaders({
@@ -1943,9 +3052,13 @@ export function createGoogleModelRuntime(
1943
3052
  body: JSON.stringify(body),
1944
3053
  signal: options.abortSignal,
1945
3054
  },
1946
- }).then((responseStream) => ({
1947
- stream: ReadableStream.from(streamGoogleCompatibleParts(responseStream)),
1948
- }));
3055
+ }).then((responseStream) => {
3056
+ const drained = warnings.drain();
3057
+ return {
3058
+ stream: ReadableStream.from(streamGoogleCompatibleParts(responseStream)),
3059
+ ...(drained.length > 0 ? { warnings: drained } : {}),
3060
+ };
3061
+ });
1949
3062
  },
1950
3063
  };
1951
3064
  }
@@ -1973,6 +3086,7 @@ export function createOpenAIEmbeddingRuntime(
1973
3086
  url,
1974
3087
  fetchImpl,
1975
3088
  providerLabel: config.name ?? "openai",
3089
+ providerKind: "openai",
1976
3090
  init: {
1977
3091
  method: "POST",
1978
3092
  headers: {
@@ -2021,6 +3135,7 @@ export function createGoogleEmbeddingRuntime(
2021
3135
  url,
2022
3136
  fetchImpl,
2023
3137
  providerLabel: config.name ?? "google",
3138
+ providerKind: "google",
2024
3139
  init: {
2025
3140
  method: "POST",
2026
3141
  headers: {