veryfront 0.1.206 → 0.1.208

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -77,26 +77,184 @@ function extractGoogleUsageTokens(payload) {
77
77
  const promptTokenCount = usageMetadata?.promptTokenCount;
78
78
  return typeof promptTokenCount === "number" ? promptTokenCount : undefined;
79
79
  }
80
- async function readErrorMessage(response) {
81
- const text = await response.text();
82
- return text.trim() || `${response.status} ${response.statusText}`.trim();
80
+ function createWarningCollector() {
81
+ const list = [];
82
+ return {
83
+ push(warning) {
84
+ list.push(warning);
85
+ },
86
+ drain() {
87
+ return list.slice();
88
+ },
89
+ };
90
+ }
91
+ /**
92
+ * Base class for typed provider errors. The `retryable` flag is the
93
+ * primary signal for callers (or a retry wrapper) to decide whether to
94
+ * re-issue the request. `retryAfterMs` is set when the provider gave an
95
+ * explicit delay hint (Retry-After header, Retry-Info trailer).
96
+ */
97
+ export class ProviderError extends Error {
98
+ provider;
99
+ status;
100
+ retryable;
101
+ retryAfterMs;
102
+ constructor(options) {
103
+ super(options.message);
104
+ this.name = globalThis[Symbol.for("import-meta-ponyfill-esmodule")](import.meta).name;
105
+ this.provider = options.provider;
106
+ this.status = options.status;
107
+ this.retryable = options.retryable;
108
+ if (options.retryAfterMs !== undefined) {
109
+ this.retryAfterMs = options.retryAfterMs;
110
+ }
111
+ }
112
+ }
113
+ /** Provider reports it is overloaded (Anthropic 529, OpenAI/Google 503). */
114
+ export class ProviderOverloadedError extends ProviderError {
115
+ }
116
+ /** Provider is rate limiting this API key (OpenAI/Google 429 with Retry-After). */
117
+ export class ProviderRateLimitError extends ProviderError {
118
+ }
119
+ /** Provider account quota is exhausted — non-retryable. */
120
+ export class ProviderQuotaError extends ProviderError {
121
+ }
122
+ /** Non-retryable 4xx/5xx that doesn't fit another bucket. */
123
+ export class ProviderRequestError extends ProviderError {
124
+ }
125
+ function parseRetryAfterMs(header) {
126
+ if (!header)
127
+ return undefined;
128
+ const asNumber = Number(header);
129
+ if (Number.isFinite(asNumber) && asNumber >= 0) {
130
+ return Math.round(asNumber * 1000);
131
+ }
132
+ // HTTP-date form (rare in practice for LLM providers).
133
+ const parsed = Date.parse(header);
134
+ if (!Number.isNaN(parsed)) {
135
+ return Math.max(0, parsed - Date.now());
136
+ }
137
+ return undefined;
138
+ }
139
+ /**
140
+ * Inspect a non-2xx response and build the most specific ProviderError
141
+ * subclass we can. Reads the response body as text (it's already dead
142
+ * on the wire by this point). Body classification handles the cases
143
+ * where HTTP status alone is ambiguous — notably OpenAI
144
+ * `insufficient_quota` vs `rate_limit_exceeded` both arriving as 429.
145
+ */
146
+ async function buildProviderError(provider, response) {
147
+ const rawBody = await response.text();
148
+ const message = rawBody.trim() || `${response.status} ${response.statusText}`.trim();
149
+ const status = response.status;
150
+ const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
151
+ const parsedBody = (() => {
152
+ try {
153
+ return JSON.parse(rawBody);
154
+ }
155
+ catch {
156
+ return undefined;
157
+ }
158
+ })();
159
+ const errorRecord = readRecord(parsedBody?.error);
160
+ const errorCode = typeof errorRecord?.code === "string"
161
+ ? errorRecord.code
162
+ : typeof errorRecord?.type === "string"
163
+ ? errorRecord.type
164
+ : typeof errorRecord?.status === "string"
165
+ ? errorRecord.status
166
+ : undefined;
167
+ // Anthropic 529 = overloaded. Anthropic surfaces this with
168
+ // { error: { type: "overloaded_error" } } in the body.
169
+ if (provider === "anthropic" && status === 529) {
170
+ return new ProviderOverloadedError({
171
+ provider,
172
+ status,
173
+ message,
174
+ retryable: true,
175
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
176
+ });
177
+ }
178
+ // OpenAI / Google 503 = overloaded.
179
+ if ((provider === "openai" || provider === "google") && status === 503) {
180
+ return new ProviderOverloadedError({
181
+ provider,
182
+ status,
183
+ message,
184
+ retryable: true,
185
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
186
+ });
187
+ }
188
+ // OpenAI 429 splits based on the error code in the body:
189
+ // - insufficient_quota → hard quota, non-retryable
190
+ // - rate_limit_exceeded / tokens_per_min_exceeded → retry with Retry-After
191
+ if (provider === "openai" && status === 429) {
192
+ if (errorCode === "insufficient_quota") {
193
+ return new ProviderQuotaError({
194
+ provider,
195
+ status,
196
+ message,
197
+ retryable: false,
198
+ });
199
+ }
200
+ return new ProviderRateLimitError({
201
+ provider,
202
+ status,
203
+ message,
204
+ retryable: true,
205
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
206
+ });
207
+ }
208
+ // Google 429 RESOURCE_EXHAUSTED is almost always the daily free-tier
209
+ // quota — surface as a hard quota error so callers don't hot-loop on
210
+ // retries that can't possibly succeed until midnight UTC.
211
+ if (provider === "google" && status === 429) {
212
+ if (errorCode === "RESOURCE_EXHAUSTED") {
213
+ return new ProviderQuotaError({
214
+ provider,
215
+ status,
216
+ message,
217
+ retryable: false,
218
+ });
219
+ }
220
+ return new ProviderRateLimitError({
221
+ provider,
222
+ status,
223
+ message,
224
+ retryable: true,
225
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
226
+ });
227
+ }
228
+ return new ProviderRequestError({
229
+ provider,
230
+ status,
231
+ message,
232
+ retryable: false,
233
+ });
83
234
  }
84
235
  async function requestJson(options) {
85
236
  const response = await options.fetchImpl(options.url, options.init);
86
237
  if (!response.ok) {
87
- const message = await readErrorMessage(response);
88
- throw new Error(`${options.providerLabel} request failed: ${message}`);
238
+ const err = await buildProviderError(options.providerKind, response);
239
+ err.message = `${options.providerLabel} request failed: ${err.message}`;
240
+ throw err;
89
241
  }
90
242
  return response.json();
91
243
  }
92
244
  async function requestStream(options) {
93
245
  const response = await options.fetchImpl(options.url, options.init);
94
246
  if (!response.ok) {
95
- const message = await readErrorMessage(response);
96
- throw new Error(`${options.providerLabel} request failed: ${message}`);
247
+ const err = await buildProviderError(options.providerKind, response);
248
+ err.message = `${options.providerLabel} request failed: ${err.message}`;
249
+ throw err;
97
250
  }
98
251
  if (!response.body) {
99
- throw new Error(`${options.providerLabel} request failed: stream body missing`);
252
+ throw new ProviderRequestError({
253
+ provider: options.providerKind,
254
+ status: response.status,
255
+ message: `${options.providerLabel} request failed: stream body missing`,
256
+ retryable: false,
257
+ });
100
258
  }
101
259
  return response.body;
102
260
  }
@@ -133,6 +291,11 @@ function toOpenAICompatibleMessages(prompt) {
133
291
  text += part.text;
134
292
  continue;
135
293
  }
294
+ // OpenAI Chat Completions has no roundtrip slot for Anthropic
295
+ // thinking blocks — they get dropped on replay. Anthropic-only.
296
+ if (part.type === "reasoning") {
297
+ continue;
298
+ }
136
299
  toolCalls.push({
137
300
  id: part.toolCallId,
138
301
  type: "function",
@@ -222,6 +385,8 @@ function extractAnthropicUsage(payload) {
222
385
  }
223
386
  const inputTokens = usage.input_tokens;
224
387
  const outputTokens = usage.output_tokens;
388
+ const cacheCreationInputTokens = usage.cache_creation_input_tokens;
389
+ const cacheReadInputTokens = usage.cache_read_input_tokens;
225
390
  return {
226
391
  inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
227
392
  outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
@@ -229,6 +394,8 @@ function extractAnthropicUsage(payload) {
229
394
  ? (typeof inputTokens === "number" ? inputTokens : 0) +
230
395
  (typeof outputTokens === "number" ? outputTokens : 0)
231
396
  : undefined,
397
+ ...(typeof cacheCreationInputTokens === "number" ? { cacheCreationInputTokens } : {}),
398
+ ...(typeof cacheReadInputTokens === "number" ? { cacheReadInputTokens } : {}),
232
399
  };
233
400
  }
234
401
  function mergeUsage(current, next) {
@@ -240,10 +407,15 @@ function mergeUsage(current, next) {
240
407
  }
241
408
  const inputTokens = next.inputTokens ?? current.inputTokens;
242
409
  const outputTokens = next.outputTokens ?? current.outputTokens;
410
+ const cacheCreationInputTokens = next.cacheCreationInputTokens ??
411
+ current.cacheCreationInputTokens;
412
+ const cacheReadInputTokens = next.cacheReadInputTokens ?? current.cacheReadInputTokens;
243
413
  return {
244
414
  inputTokens,
245
415
  outputTokens,
246
416
  totalTokens: (inputTokens ?? 0) + (outputTokens ?? 0),
417
+ ...(cacheCreationInputTokens !== undefined ? { cacheCreationInputTokens } : {}),
418
+ ...(cacheReadInputTokens !== undefined ? { cacheReadInputTokens } : {}),
247
419
  };
248
420
  }
249
421
  function normalizeAnthropicToolChoice(toolChoice) {
@@ -258,6 +430,23 @@ function toSnakeCaseRecord(record) {
258
430
  value,
259
431
  ]));
260
432
  }
433
+ /**
434
+ * Recursive snake_case key converter for nested config objects (used for
435
+ * Anthropic mcp_servers, where authorizationToken / toolConfiguration /
436
+ * allowedTools all need conversion).
437
+ */
438
+ function deepSnakeCase(value) {
439
+ if (Array.isArray(value)) {
440
+ return value.map(deepSnakeCase);
441
+ }
442
+ if (value !== null && typeof value === "object") {
443
+ return Object.fromEntries(Object.entries(value).map(([key, v]) => [
444
+ key.replace(/[A-Z]/g, (match) => `_${match.toLowerCase()}`),
445
+ deepSnakeCase(v),
446
+ ]));
447
+ }
448
+ return value;
449
+ }
261
450
  function pushAnthropicUserContent(messages, content) {
262
451
  if (content.length === 0) {
263
452
  return;
@@ -272,7 +461,23 @@ function pushAnthropicUserContent(messages, content) {
272
461
  content,
273
462
  });
274
463
  }
275
- function toAnthropicMessages(prompt) {
464
+ /**
465
+ * Resolves a {@link ProviderCacheTtl} into Anthropic's `cache_control` shape.
466
+ *
467
+ * Returns `undefined` when caching is not requested (`false` / `undefined`),
468
+ * `{ type: "ephemeral" }` for the 5-minute default (`true` / `"5m"`), or
469
+ * `{ type: "ephemeral", ttl: "1h" }` for the extended 1-hour cache.
470
+ */
471
+ function resolveAnthropicCacheControlBlock(ttl) {
472
+ if (ttl === undefined || ttl === false) {
473
+ return undefined;
474
+ }
475
+ if (ttl === "1h") {
476
+ return { type: "ephemeral", ttl: "1h" };
477
+ }
478
+ return { type: "ephemeral" };
479
+ }
480
+ function toAnthropicMessages(prompt, systemCacheControl) {
276
481
  const systemParts = [];
277
482
  const messages = [];
278
483
  for (const message of prompt) {
@@ -291,11 +496,32 @@ function toAnthropicMessages(prompt) {
291
496
  case "assistant":
292
497
  messages.push({
293
498
  role: "assistant",
294
- content: message.content.map((part) => part.type === "text" ? { type: "text", text: part.text } : {
295
- type: "tool_use",
296
- id: part.toolCallId,
297
- name: part.toolName,
298
- input: part.input,
499
+ content: message.content.map((part) => {
500
+ if (part.type === "text") {
501
+ return { type: "text", text: part.text };
502
+ }
503
+ if (part.type === "reasoning") {
504
+ // Redacted thinking blocks roundtrip as the encrypted blob
505
+ // form Anthropic gave us. Plain thinking blocks need the
506
+ // signature to verify on the server.
507
+ if (typeof part.redactedData === "string") {
508
+ return {
509
+ type: "redacted_thinking",
510
+ data: part.redactedData,
511
+ };
512
+ }
513
+ return {
514
+ type: "thinking",
515
+ thinking: part.text ?? "",
516
+ ...(typeof part.signature === "string" ? { signature: part.signature } : {}),
517
+ };
518
+ }
519
+ return {
520
+ type: "tool_use",
521
+ id: part.toolCallId,
522
+ name: part.toolName,
523
+ input: part.input,
524
+ };
299
525
  }),
300
526
  });
301
527
  break;
@@ -308,12 +534,55 @@ function toAnthropicMessages(prompt) {
308
534
  break;
309
535
  }
310
536
  }
311
- return {
312
- ...(systemParts.length > 0 ? { system: systemParts.join("\n\n") } : {}),
313
- messages,
314
- };
537
+ if (systemParts.length === 0) {
538
+ return { messages };
539
+ }
540
+ const joined = systemParts.join("\n\n");
541
+ // Cache-controlled system prompts must use the array-of-blocks form so the
542
+ // breakpoint lands on an individual content block. Callers that don't opt
543
+ // in keep the legacy raw-string form for backward compatibility.
544
+ if (systemCacheControl) {
545
+ return {
546
+ system: [{
547
+ type: "text",
548
+ text: joined,
549
+ cache_control: systemCacheControl,
550
+ }],
551
+ messages,
552
+ };
553
+ }
554
+ return { system: joined, messages };
315
555
  }
316
- function toAnthropicTools(tools) {
556
+ /**
557
+ * Short-name → latest-versioned-type alias map for Anthropic provider tools.
558
+ *
559
+ * Anthropic tool types are date-stamped (e.g. `code_execution_20260120`) so
560
+ * callers either pin a version or get the latest. We accept both: a caller
561
+ * can pass `anthropic.code_execution` and we map to the latest known version,
562
+ * or pass `anthropic.code_execution_20250522` and we forward verbatim.
563
+ *
564
+ * Versions chosen here are the latest documented releases as of 2026-04-15
565
+ * — see https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview.
566
+ * When Anthropic ships newer versions, update this map.
567
+ */
568
+ const ANTHROPIC_TOOL_VERSION_ALIASES = {
569
+ code_execution: "code_execution_20260120",
570
+ computer_use: "computer_20250124",
571
+ computer: "computer_20250124",
572
+ text_editor: "text_editor_20250728",
573
+ bash: "bash_20250124",
574
+ memory: "memory_20250818",
575
+ web_search: "web_search_20250305",
576
+ web_fetch: "web_fetch_20250910",
577
+ };
578
+ function resolveAnthropicProviderType(rawType) {
579
+ // Already-versioned types (contain a date stamp suffix) pass through verbatim.
580
+ if (/_\d{8}$/.test(rawType)) {
581
+ return rawType;
582
+ }
583
+ return ANTHROPIC_TOOL_VERSION_ALIASES[rawType] ?? rawType;
584
+ }
585
+ function toAnthropicTools(tools, toolsCacheControl) {
317
586
  if (!tools) {
318
587
  return undefined;
319
588
  }
@@ -330,17 +599,31 @@ function toAnthropicTools(tools) {
330
599
  if (!tool.id.startsWith("anthropic.")) {
331
600
  continue;
332
601
  }
333
- const providerType = tool.id.slice("anthropic.".length);
334
- if (providerType.length === 0) {
602
+ const rawType = tool.id.slice("anthropic.".length);
603
+ if (rawType.length === 0) {
335
604
  continue;
336
605
  }
337
606
  normalized.push({
338
- type: providerType,
607
+ type: resolveAnthropicProviderType(rawType),
339
608
  name: tool.name,
340
609
  ...toSnakeCaseRecord(tool.args),
341
610
  });
342
611
  }
343
- return normalized.length > 0 ? normalized : undefined;
612
+ if (normalized.length === 0) {
613
+ return undefined;
614
+ }
615
+ // Attach the cache breakpoint to the final tool entry so Anthropic caches
616
+ // the entire tools block up to and including that definition. Earlier tool
617
+ // entries are implicitly covered by the same breakpoint per Anthropic's
618
+ // walk-backward cache lookup behaviour.
619
+ if (toolsCacheControl) {
620
+ const lastIndex = normalized.length - 1;
621
+ normalized[lastIndex] = {
622
+ ...normalized[lastIndex],
623
+ cache_control: toolsCacheControl,
624
+ };
625
+ }
626
+ return normalized;
344
627
  }
345
628
  function createAnthropicRequestHeaders(options) {
346
629
  const headers = new Headers(options.extraHeaders);
@@ -397,27 +680,185 @@ function resolveAnthropicMaxTokens(modelId, callerMaxOutputTokens) {
397
680
  }
398
681
  return requested;
399
682
  }
400
- function buildAnthropicMessagesRequest(modelId, providerName, options, stream) {
401
- const { system, messages } = toAnthropicMessages(options.prompt);
683
+ /**
684
+ * Map a unified reasoning effort level to an Anthropic `thinking.budget_tokens`
685
+ * value. Anthropic's minimum accepted budget is 1024; higher tiers give Claude
686
+ * more headroom to explore. `max` maps to the upper bound documented for
687
+ * Claude 4.x family (32k tokens of thinking — caller can override via
688
+ * `budgetTokens` if they need more).
689
+ */
690
+ function resolveAnthropicThinkingBudget(option) {
691
+ if (!option || option.enabled !== true) {
692
+ return undefined;
693
+ }
694
+ if (typeof option.budgetTokens === "number" && option.budgetTokens >= 1024) {
695
+ return option.budgetTokens;
696
+ }
697
+ switch (option.effort) {
698
+ case "low":
699
+ return 1024;
700
+ case "high":
701
+ return 16_384;
702
+ case "max":
703
+ return 32_768;
704
+ case "medium":
705
+ default:
706
+ return 4096;
707
+ }
708
+ }
709
+ function buildAnthropicMessagesRequest(modelId, providerName, options, stream, warnings) {
710
+ const systemCacheControl = resolveAnthropicCacheControlBlock(options.cacheControl?.system);
711
+ const toolsCacheControl = resolveAnthropicCacheControlBlock(options.cacheControl?.tools);
712
+ const { system, messages } = toAnthropicMessages(options.prompt, systemCacheControl);
713
+ const anthropicTools = toAnthropicTools(options.tools, toolsCacheControl);
714
+ const thinkingBudget = resolveAnthropicThinkingBudget(options.reasoning);
715
+ const thinkingEnabled = thinkingBudget !== undefined;
716
+ // Anthropic doesn't support these unified options at all — emit warnings
717
+ // so callers don't quietly pass values that have zero effect.
718
+ if (options.presencePenalty !== undefined) {
719
+ warnings.push({
720
+ type: "unsupported-setting",
721
+ provider: "anthropic",
722
+ setting: "presencePenalty",
723
+ details: "Anthropic Messages API has no equivalent and the value was dropped.",
724
+ });
725
+ }
726
+ if (options.frequencyPenalty !== undefined) {
727
+ warnings.push({
728
+ type: "unsupported-setting",
729
+ provider: "anthropic",
730
+ setting: "frequencyPenalty",
731
+ details: "Anthropic Messages API has no equivalent and the value was dropped.",
732
+ });
733
+ }
734
+ if (options.seed !== undefined) {
735
+ warnings.push({
736
+ type: "unsupported-setting",
737
+ provider: "anthropic",
738
+ setting: "seed",
739
+ details: "Anthropic Messages API does not support deterministic seeding.",
740
+ });
741
+ }
742
+ if (options.topK !== undefined) {
743
+ warnings.push({
744
+ type: "unsupported-setting",
745
+ provider: "anthropic",
746
+ setting: "topK",
747
+ details: "Anthropic Messages API does not expose top_k on this surface.",
748
+ });
749
+ }
750
+ if (options.stopSequences && options.stopSequences.length > 4) {
751
+ warnings.push({
752
+ type: "unsupported-setting",
753
+ provider: "anthropic",
754
+ setting: "stopSequences",
755
+ details: `Anthropic accepts at most 4 stop sequences; ${options.stopSequences.length} were provided and the extras were truncated.`,
756
+ });
757
+ }
758
+ if (thinkingEnabled && options.temperature !== undefined) {
759
+ warnings.push({
760
+ type: "unsupported-setting",
761
+ provider: "anthropic",
762
+ setting: "temperature",
763
+ details: "Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
764
+ });
765
+ }
766
+ if (thinkingEnabled && options.topP !== undefined) {
767
+ warnings.push({
768
+ type: "unsupported-setting",
769
+ provider: "anthropic",
770
+ setting: "topP",
771
+ details: "Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
772
+ });
773
+ }
774
+ if (options.responseFormat && options.responseFormat.type !== "text") {
775
+ warnings.push({
776
+ type: "unsupported-setting",
777
+ provider: "anthropic",
778
+ setting: "responseFormat",
779
+ details: "Anthropic Messages API does not have a structured-output response_format equivalent. Use a tool with the schema as input_schema instead.",
780
+ });
781
+ }
782
+ // Anthropic requires max_tokens > budget_tokens when thinking is enabled.
783
+ // Growing max_tokens by the thinking budget preserves the caller's intended
784
+ // output budget, and we clamp the sum at the model's advertised maximum so
785
+ // the request never exceeds the API's hard cap.
786
+ const baseMaxTokens = resolveAnthropicMaxTokens(modelId, options.maxOutputTokens);
787
+ const maxTokens = thinkingEnabled
788
+ ? Math.min(baseMaxTokens + (thinkingBudget ?? 0), getAnthropicModelCapabilities(modelId).maxOutputTokens)
789
+ : baseMaxTokens;
402
790
  const body = {
403
791
  model: modelId,
404
792
  messages,
405
- max_tokens: resolveAnthropicMaxTokens(modelId, options.maxOutputTokens),
793
+ max_tokens: maxTokens,
406
794
  ...(stream ? { stream: true } : {}),
407
795
  ...(system ? { system } : {}),
408
- ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
409
- ...(options.topP !== undefined ? { top_p: options.topP } : {}),
796
+ // Sampling params are mutually exclusive with thinking on Anthropic — the
797
+ // API rejects the combo outright. Drop them silently when thinking is on
798
+ // (callers see thinking's output instead of what they'd have gotten from
799
+ // custom sampling, which is the documented tradeoff).
800
+ ...(!thinkingEnabled && options.temperature !== undefined
801
+ ? { temperature: options.temperature }
802
+ : {}),
803
+ ...(!thinkingEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
410
804
  ...(options.stopSequences && options.stopSequences.length > 0
411
- ? { stop_sequences: options.stopSequences }
805
+ ? { stop_sequences: options.stopSequences.slice(0, 4) }
412
806
  : {}),
413
- ...(toAnthropicTools(options.tools) ? { tools: toAnthropicTools(options.tools) } : {}),
807
+ ...(anthropicTools ? { tools: anthropicTools } : {}),
414
808
  ...(options.toolChoice !== undefined
415
809
  ? { tool_choice: normalizeAnthropicToolChoice(options.toolChoice) }
416
810
  : {}),
811
+ ...(thinkingEnabled ? { thinking: { type: "enabled", budget_tokens: thinkingBudget } } : {}),
812
+ ...(typeof options.userId === "string" && options.userId.length > 0
813
+ ? { metadata: { user_id: options.userId } }
814
+ : {}),
815
+ ...(options.mcpServers && options.mcpServers.length > 0
816
+ ? { mcp_servers: deepSnakeCase(options.mcpServers) }
817
+ : {}),
818
+ ...(options.anthropicContainer !== undefined ? { container: options.anthropicContainer } : {}),
417
819
  };
418
820
  Object.assign(body, readProviderOptions(options.providerOptions, "anthropic", providerName));
419
821
  return body;
420
822
  }
823
+ /**
824
+ * Best-effort camelCase normalization of a single Anthropic citation
825
+ * record. Handles the union of fields across web_search_result_location,
826
+ * web_fetch_result_location, char_location, page_location, and
827
+ * content_block_location citation kinds — see
828
+ * https://docs.claude.com/en/docs/build-with-claude/citations
829
+ */
830
+ function normalizeAnthropicCitation(raw) {
831
+ const r = readRecord(raw);
832
+ if (!r)
833
+ return undefined;
834
+ const typeStr = typeof r.type === "string" ? r.type : undefined;
835
+ if (!typeStr)
836
+ return undefined;
837
+ const out = { type: typeStr };
838
+ if (typeof r.cited_text === "string")
839
+ out.citedText = r.cited_text;
840
+ if (typeof r.url === "string")
841
+ out.url = r.url;
842
+ if (typeof r.title === "string")
843
+ out.title = r.title;
844
+ if (typeof r.start_char_index === "number")
845
+ out.startCharIndex = r.start_char_index;
846
+ if (typeof r.end_char_index === "number")
847
+ out.endCharIndex = r.end_char_index;
848
+ if (typeof r.start_block_index === "number")
849
+ out.startBlockIndex = r.start_block_index;
850
+ if (typeof r.end_block_index === "number")
851
+ out.endBlockIndex = r.end_block_index;
852
+ if (typeof r.start_page_number === "number")
853
+ out.startPageNumber = r.start_page_number;
854
+ if (typeof r.end_page_number === "number")
855
+ out.endPageNumber = r.end_page_number;
856
+ if (typeof r.document_index === "number")
857
+ out.documentIndex = r.document_index;
858
+ if (typeof r.document_title === "string")
859
+ out.documentTitle = r.document_title;
860
+ return out;
861
+ }
421
862
  function buildAnthropicGenerateResult(payload) {
422
863
  const record = readRecord(payload);
423
864
  const content = Array.isArray(record?.content) ? record.content : [];
@@ -426,7 +867,40 @@ function buildAnthropicGenerateResult(payload) {
426
867
  const block = readRecord(blockValue);
427
868
  const blockType = typeof block?.type === "string" ? block.type : undefined;
428
869
  if (blockType === "text" && typeof block?.text === "string" && block.text.length > 0) {
429
- normalized.push({ type: "text", text: block.text });
870
+ const citationsRaw = Array.isArray(block.citations) ? block.citations : undefined;
871
+ const citations = citationsRaw
872
+ ?.flatMap((c) => {
873
+ const normalizedCitation = normalizeAnthropicCitation(c);
874
+ return normalizedCitation ? [normalizedCitation] : [];
875
+ });
876
+ normalized.push({
877
+ type: "text",
878
+ text: block.text,
879
+ ...(citations && citations.length > 0 ? { citations } : {}),
880
+ });
881
+ continue;
882
+ }
883
+ // Thinking blocks carry the cleartext trace plus a signature that
884
+ // Anthropic uses to verify on subsequent turns. Surfacing both lets
885
+ // callers persist them as `reasoning` content parts and replay on
886
+ // the next turn so Claude can continue from the same thinking.
887
+ if (blockType === "thinking") {
888
+ normalized.push({
889
+ type: "reasoning",
890
+ ...(typeof block?.thinking === "string" ? { text: block.thinking } : {}),
891
+ ...(typeof block?.signature === "string" ? { signature: block.signature } : {}),
892
+ });
893
+ continue;
894
+ }
895
+ // Redacted thinking blocks arrive when Claude's safety classifier
896
+ // hides the trace. Pass the encrypted blob through opaquely so the
897
+ // caller can replay it on the next turn (Anthropic still needs the
898
+ // blob to verify continuity even though it can't read it).
899
+ if (blockType === "redacted_thinking" && typeof block?.data === "string") {
900
+ normalized.push({
901
+ type: "reasoning",
902
+ redactedData: block.data,
903
+ });
430
904
  continue;
431
905
  }
432
906
  if ((blockType === "tool_use" || blockType === "server_tool_use") &&
@@ -537,6 +1011,19 @@ async function* streamAnthropicCompatibleParts(stream) {
537
1011
  }
538
1012
  continue;
539
1013
  }
1014
+ // Redacted thinking blocks arrive as opaque encrypted payloads when
1015
+ // Claude's safety classifier flags the reasoning trace. Surface them
1016
+ // as a zero-length reasoning block so callers know thinking happened
1017
+ // without leaking the (legitimately hidden) contents.
1018
+ if (blockType === "redacted_thinking") {
1019
+ const reasoningId = `thinking-${index}`;
1020
+ reasoningBlocks.set(index, { id: reasoningId });
1021
+ yield {
1022
+ type: "reasoning-start",
1023
+ id: reasoningId,
1024
+ };
1025
+ continue;
1026
+ }
540
1027
  if ((blockType === "tool_use" || blockType === "server_tool_use") &&
541
1028
  typeof contentBlock?.id === "string" &&
542
1029
  typeof contentBlock?.name === "string") {
@@ -696,10 +1183,13 @@ function extractOpenAIUsage(payload) {
696
1183
  const inputTokens = usage.prompt_tokens;
697
1184
  const outputTokens = usage.completion_tokens;
698
1185
  const totalTokens = usage.total_tokens;
1186
+ const promptTokensDetails = readRecord(usage.prompt_tokens_details);
1187
+ const cachedTokens = promptTokensDetails?.cached_tokens;
699
1188
  return {
700
1189
  inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
701
1190
  outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
702
1191
  totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
1192
+ ...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
703
1193
  };
704
1194
  }
705
1195
  function extractOpenAIContentText(content) {
@@ -742,14 +1232,81 @@ function extractOpenAIToolCalls(message) {
742
1232
  }
743
1233
  return normalized;
744
1234
  }
745
- function buildOpenAIChatRequest(modelId, providerName, options, stream) {
1235
+ /**
1236
+ * OpenAI reasoning models (o1 / o3 / o4 family) use the completion path but
1237
+ * have different constraints than chat models: sampling params are rejected,
1238
+ * and they accept a `reasoning_effort` field. We detect them by model id
1239
+ * prefix so callers don't have to configure it per runtime.
1240
+ */
1241
+ function isOpenAIReasoningModel(modelId) {
1242
+ return /^o[134](-|$)/.test(modelId);
1243
+ }
1244
+ /**
1245
+ * Map the unified reasoning effort to OpenAI's `reasoning_effort` enum.
1246
+ * OpenAI doesn't accept "max" — we collapse it to "high".
1247
+ */
1248
+ function resolveOpenAIReasoningEffort(option) {
1249
+ if (!option || option.enabled !== true) {
1250
+ return undefined;
1251
+ }
1252
+ switch (option.effort) {
1253
+ case "low":
1254
+ return "low";
1255
+ case "high":
1256
+ case "max":
1257
+ return "high";
1258
+ case "medium":
1259
+ default:
1260
+ return "medium";
1261
+ }
1262
+ }
1263
+ function buildOpenAIChatRequest(modelId, providerName, options, stream, warnings) {
1264
+ const isReasoningModel = isOpenAIReasoningModel(modelId);
1265
+ const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
1266
+ const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
1267
+ // OpenAI Chat Completions has no top_k surface (it's exposed only on the
1268
+ // Responses API for some reasoning models). Quietly accepting it would
1269
+ // mislead callers into thinking it took effect.
1270
+ if (options.topK !== undefined) {
1271
+ warnings.push({
1272
+ type: "unsupported-setting",
1273
+ provider: "openai",
1274
+ setting: "topK",
1275
+ details: "OpenAI Chat Completions does not expose top_k; the value was dropped.",
1276
+ });
1277
+ }
1278
+ // Reasoning models (o1 / o3 / o4) reject sampling params outright. Emit
1279
+ // warnings at build time so callers see *why* the value didn't apply
1280
+ // rather than a 400 from the API.
1281
+ if (reasoningEnabled) {
1282
+ const dropped = [
1283
+ ["temperature", "temperature"],
1284
+ ["topP", "top_p"],
1285
+ ["presencePenalty", "presence_penalty"],
1286
+ ["frequencyPenalty", "frequency_penalty"],
1287
+ ];
1288
+ for (const [key, openaiName] of dropped) {
1289
+ if (options[key] !== undefined) {
1290
+ warnings.push({
1291
+ type: "unsupported-setting",
1292
+ provider: "openai",
1293
+ setting: key,
1294
+ details: `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
1295
+ });
1296
+ }
1297
+ }
1298
+ }
746
1299
  const body = {
747
1300
  model: modelId,
748
1301
  messages: toOpenAICompatibleMessages(options.prompt),
749
1302
  ...(stream ? { stream: true, stream_options: { include_usage: true } } : {}),
750
1303
  ...(options.maxOutputTokens !== undefined ? { max_tokens: options.maxOutputTokens } : {}),
751
- ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
752
- ...(options.topP !== undefined ? { top_p: options.topP } : {}),
1304
+ // OpenAI reasoning models reject temperature / top_p / frequency / presence.
1305
+ // Drop them silently rather than letting the API bounce the request.
1306
+ ...(!reasoningEnabled && options.temperature !== undefined
1307
+ ? { temperature: options.temperature }
1308
+ : {}),
1309
+ ...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
753
1310
  ...(options.stopSequences && options.stopSequences.length > 0
754
1311
  ? { stop: options.stopSequences }
755
1312
  : {}),
@@ -758,10 +1315,37 @@ function buildOpenAIChatRequest(modelId, providerName, options, stream) {
758
1315
  : {}),
759
1316
  ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
760
1317
  ...(options.seed !== undefined ? { seed: options.seed } : {}),
761
- ...(options.presencePenalty !== undefined ? { presence_penalty: options.presencePenalty } : {}),
762
- ...(options.frequencyPenalty !== undefined
1318
+ ...(!reasoningEnabled && options.presencePenalty !== undefined
1319
+ ? { presence_penalty: options.presencePenalty }
1320
+ : {}),
1321
+ ...(!reasoningEnabled && options.frequencyPenalty !== undefined
763
1322
  ? { frequency_penalty: options.frequencyPenalty }
764
1323
  : {}),
1324
+ ...(reasoningEffort !== undefined ? { reasoning_effort: reasoningEffort } : {}),
1325
+ ...(typeof options.userId === "string" && options.userId.length > 0
1326
+ ? { user: options.userId }
1327
+ : {}),
1328
+ ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
1329
+ ...(options.parallelToolCalls !== undefined
1330
+ ? { parallel_tool_calls: options.parallelToolCalls }
1331
+ : {}),
1332
+ ...(options.responseFormat && options.responseFormat.type !== "text"
1333
+ ? {
1334
+ response_format: options.responseFormat.type === "json" ? { type: "json_object" } : {
1335
+ type: "json_schema",
1336
+ json_schema: {
1337
+ name: options.responseFormat.name,
1338
+ ...(typeof options.responseFormat.description === "string"
1339
+ ? { description: options.responseFormat.description }
1340
+ : {}),
1341
+ schema: unwrapToolInputSchema(options.responseFormat.schema),
1342
+ ...(options.responseFormat.strict !== undefined
1343
+ ? { strict: options.responseFormat.strict }
1344
+ : {}),
1345
+ },
1346
+ },
1347
+ }
1348
+ : {}),
765
1349
  };
766
1350
  Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
767
1351
  return body;
@@ -791,10 +1375,14 @@ function extractGoogleUsage(payload) {
791
1375
  const inputTokens = usage.promptTokenCount;
792
1376
  const outputTokens = usage.candidatesTokenCount;
793
1377
  const totalTokens = usage.totalTokenCount;
1378
+ const cachedContentTokenCount = usage.cachedContentTokenCount;
794
1379
  return {
795
1380
  inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
796
1381
  outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
797
1382
  totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
1383
+ ...(typeof cachedContentTokenCount === "number"
1384
+ ? { cacheReadInputTokens: cachedContentTokenCount }
1385
+ : {}),
798
1386
  };
799
1387
  }
800
1388
  function toGoogleContents(prompt) {
@@ -813,18 +1401,29 @@ function toGoogleContents(prompt) {
813
1401
  parts: [{ text: readTextParts(message.content) }],
814
1402
  });
815
1403
  break;
816
- case "assistant":
817
- contents.push({
818
- role: "model",
819
- parts: message.content.map((part) => part.type === "text" ? { text: part.text } : {
1404
+ case "assistant": {
1405
+ // Anthropic-only `reasoning` parts have no Gemini equivalent
1406
+ // and are dropped on replay.
1407
+ const parts = [];
1408
+ for (const part of message.content) {
1409
+ if (part.type === "text") {
1410
+ parts.push({ text: part.text });
1411
+ continue;
1412
+ }
1413
+ if (part.type === "reasoning") {
1414
+ continue;
1415
+ }
1416
+ parts.push({
820
1417
  functionCall: {
821
1418
  id: part.toolCallId,
822
1419
  name: part.toolName,
823
1420
  args: part.input,
824
1421
  },
825
- }),
826
- });
1422
+ });
1423
+ }
1424
+ contents.push({ role: "model", parts });
827
1425
  break;
1426
+ }
828
1427
  case "tool":
829
1428
  contents.push({
830
1429
  role: "user",
@@ -852,14 +1451,37 @@ function toGoogleTools(tools) {
852
1451
  if (!tools) {
853
1452
  return undefined;
854
1453
  }
855
- const functionDeclarations = tools.flatMap((tool) => tool.type === "function"
856
- ? [{
1454
+ const functionDeclarations = [];
1455
+ const providerEntries = [];
1456
+ for (const tool of tools) {
1457
+ if (tool.type === "function") {
1458
+ functionDeclarations.push({
857
1459
  name: tool.name,
858
1460
  ...(typeof tool.description === "string" ? { description: tool.description } : {}),
859
1461
  parameters: unwrapToolInputSchema(tool.inputSchema),
860
- }]
861
- : []);
862
- return functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined;
1462
+ });
1463
+ continue;
1464
+ }
1465
+ // Gemini provider tools — code_execution, google_search,
1466
+ // google_search_retrieval — each lives in its own tools[] entry
1467
+ // with a single key keyed by the camelCase tool name and an
1468
+ // optional config payload (caller-provided tool.args).
1469
+ if (!tool.id.startsWith("google.")) {
1470
+ continue;
1471
+ }
1472
+ const providerType = tool.id.slice("google.".length);
1473
+ if (providerType.length === 0) {
1474
+ continue;
1475
+ }
1476
+ const camelKey = providerType.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
1477
+ providerEntries.push({ [camelKey]: tool.args ?? {} });
1478
+ }
1479
+ const result = [];
1480
+ if (functionDeclarations.length > 0) {
1481
+ result.push({ functionDeclarations });
1482
+ }
1483
+ result.push(...providerEntries);
1484
+ return result.length > 0 ? result : undefined;
863
1485
  }
864
1486
  function unwrapToolInputSchema(inputSchema) {
865
1487
  if (typeof inputSchema !== "object" || inputSchema === null || Array.isArray(inputSchema)) {
@@ -884,7 +1506,11 @@ function normalizeGoogleToolChoice(toolChoice) {
884
1506
  }
885
1507
  }
886
1508
  const record = readRecord(toolChoice);
887
- if (record?.type === "tool" && typeof record.name === "string") {
1509
+ if (!record)
1510
+ return undefined;
1511
+ // Single-tool restriction: { type: "tool", name } — pin to one
1512
+ // function via mode: ANY + allowedFunctionNames: [name].
1513
+ if (record.type === "tool" && typeof record.name === "string") {
888
1514
  return {
889
1515
  functionCallingConfig: {
890
1516
  mode: "ANY",
@@ -892,9 +1518,66 @@ function normalizeGoogleToolChoice(toolChoice) {
892
1518
  },
893
1519
  };
894
1520
  }
1521
+ // Multi-tool restriction: { type: "tools", names: string[] } — pin
1522
+ // to a subset via mode: ANY + the full allowedFunctionNames array.
1523
+ if (record.type === "tools" && Array.isArray(record.names)) {
1524
+ const names = record.names.filter((n) => typeof n === "string");
1525
+ if (names.length > 0) {
1526
+ return {
1527
+ functionCallingConfig: {
1528
+ mode: "ANY",
1529
+ allowedFunctionNames: names,
1530
+ },
1531
+ };
1532
+ }
1533
+ }
1534
+ // Explicit mode forms: { type: "auto" | "none" | "any" }.
1535
+ if (record.type === "auto") {
1536
+ return { functionCallingConfig: { mode: "AUTO" } };
1537
+ }
1538
+ if (record.type === "none") {
1539
+ return { functionCallingConfig: { mode: "NONE" } };
1540
+ }
1541
+ if (record.type === "any" || record.type === "required") {
1542
+ return { functionCallingConfig: { mode: "ANY" } };
1543
+ }
895
1544
  return undefined;
896
1545
  }
1546
+ /**
1547
+ * Map the unified reasoning option to Gemini's thinkingConfig. Gemini 2.5+
1548
+ * accepts `includeThoughts: true` to stream back `thought` parts, and
1549
+ * `thinkingBudget: N` to cap the thinking token count. The effort levels
1550
+ * here follow Google's own guidance (low ~= 512, medium ~= 2048,
1551
+ * high ~= 8192, max = -1 means "dynamic/no cap").
1552
+ */
1553
+ function resolveGoogleThinkingConfig(option) {
1554
+ if (!option || option.enabled !== true) {
1555
+ return undefined;
1556
+ }
1557
+ const config = { includeThoughts: true };
1558
+ if (typeof option.budgetTokens === "number") {
1559
+ config.thinkingBudget = option.budgetTokens;
1560
+ return config;
1561
+ }
1562
+ switch (option.effort) {
1563
+ case "low":
1564
+ config.thinkingBudget = 512;
1565
+ break;
1566
+ case "high":
1567
+ config.thinkingBudget = 8192;
1568
+ break;
1569
+ case "max":
1570
+ config.thinkingBudget = -1;
1571
+ break;
1572
+ case "medium":
1573
+ default:
1574
+ config.thinkingBudget = 2048;
1575
+ break;
1576
+ }
1577
+ return config;
1578
+ }
897
1579
  function buildGoogleGenerationConfig(options) {
1580
+ const thinkingConfig = resolveGoogleThinkingConfig(options.reasoning);
898
1581
  const config = {
899
1582
  ...(options.maxOutputTokens !== undefined ? { maxOutputTokens: options.maxOutputTokens } : {}),
900
1583
  ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
@@ -904,11 +1587,48 @@ function buildGoogleGenerationConfig(options) {
904
1587
  ? { stopSequences: options.stopSequences }
905
1588
  : {}),
906
1589
  ...(options.seed !== undefined ? { seed: options.seed } : {}),
1590
+ ...(thinkingConfig ? { thinkingConfig } : {}),
907
1591
  };
908
1592
  return Object.keys(config).length > 0 ? config : undefined;
909
1593
  }
910
- function buildGoogleGenerateContentRequest(providerName, options) {
1594
+ function buildGoogleGenerateContentRequest(providerName, options, warnings) {
1595
+ // Google generate-content surface doesn't accept presence/frequency
1596
+ // penalties on most current models. Emit warnings and let the request
1597
+ // through without them.
1598
+ if (options.presencePenalty !== undefined) {
1599
+ warnings.push({
1600
+ type: "unsupported-setting",
1601
+ provider: "google",
1602
+ setting: "presencePenalty",
1603
+ details: "Gemini generateContent does not accept presencePenalty; the value was dropped.",
1604
+ });
1605
+ }
1606
+ if (options.frequencyPenalty !== undefined) {
1607
+ warnings.push({
1608
+ type: "unsupported-setting",
1609
+ provider: "google",
1610
+ setting: "frequencyPenalty",
1611
+ details: "Gemini generateContent does not accept frequencyPenalty; the value was dropped.",
1612
+ });
1613
+ }
1614
+ if (options.responseFormat && options.responseFormat.type !== "text") {
1615
+ warnings.push({
1616
+ type: "unsupported-setting",
1617
+ provider: "google",
1618
+ setting: "responseFormat",
1619
+ details: "Gemini uses generationConfig.responseMimeType + responseSchema for structured outputs, which is a separate surface and not yet wired through this option.",
1620
+ });
1621
+ }
911
1622
  const { systemInstruction, contents } = toGoogleContents(options.prompt);
1623
+ const generationConfig = buildGoogleGenerationConfig(options);
1624
+ // requestLabels wins over userId-derived labels: when callers explicitly
1625
+ // provide a label map, that's the source of truth. Otherwise fall back
1626
+ // to {user_id} derived from the unified userId option.
1627
+ const labels = options.requestLabels && Object.keys(options.requestLabels).length > 0
1628
+ ? options.requestLabels
1629
+ : typeof options.userId === "string" && options.userId.length > 0
1630
+ ? { user_id: options.userId }
1631
+ : undefined;
912
1632
  const body = {
913
1633
  contents,
914
1634
  ...(systemInstruction ? { systemInstruction } : {}),
@@ -916,8 +1636,13 @@ function buildGoogleGenerateContentRequest(providerName, options) {
916
1636
  ...(normalizeGoogleToolChoice(options.toolChoice)
917
1637
  ? { toolConfig: normalizeGoogleToolChoice(options.toolChoice) }
918
1638
  : {}),
919
- ...(buildGoogleGenerationConfig(options)
920
- ? { generationConfig: buildGoogleGenerationConfig(options) }
1639
+ ...(generationConfig ? { generationConfig } : {}),
1640
+ ...(labels ? { labels } : {}),
1641
+ ...(typeof options.googleCachedContent === "string" && options.googleCachedContent.length > 0
1642
+ ? { cachedContent: options.googleCachedContent }
1643
+ : {}),
1644
+ ...(options.googleSafetySettings && options.googleSafetySettings.length > 0
1645
+ ? { safetySettings: options.googleSafetySettings }
921
1646
  : {}),
922
1647
  };
923
1648
  Object.assign(body, readProviderOptions(options.providerOptions, "google", providerName));
@@ -961,10 +1686,18 @@ function buildGoogleGenerateResult(payload) {
961
1686
  });
962
1687
  }
963
1688
  }
1689
+ // Gemini grounding (google_search / google_search_retrieval) returns
1690
+ // a per-candidate groundingMetadata object with web search queries,
1691
+ // grounding chunks, and citation indices into the response text.
1692
+ // Pass it through opaquely so callers can render footnotes / source
1693
+ // chips / "Search results" UI without parsing the wire shape.
1694
+ const candidate = extractFirstGoogleCandidate(payload);
1695
+ const groundingMetadata = readRecord(candidate?.groundingMetadata);
964
1696
  return {
965
1697
  content,
966
- finishReason: normalizeGoogleFinishReason(extractFirstGoogleCandidate(payload)?.finishReason),
1698
+ finishReason: normalizeGoogleFinishReason(candidate?.finishReason),
967
1699
  usage: extractGoogleUsage(payload),
1700
+ ...(groundingMetadata ? { groundingMetadata } : {}),
968
1701
  };
969
1702
  }
970
1703
  async function* streamGoogleCompatibleParts(stream) {
@@ -1238,11 +1971,13 @@ export function createOpenAIModelRuntime(config, modelId) {
1238
1971
  doGenerate(optionsForRuntime) {
1239
1972
  const options = optionsForRuntime;
1240
1973
  const url = getOpenAIChatCompletionsUrl(config.baseURL);
1241
- const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, false);
1974
+ const warnings = createWarningCollector();
1975
+ const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, false, warnings);
1242
1976
  return requestJson({
1243
1977
  url,
1244
1978
  fetchImpl,
1245
1979
  providerLabel: config.name ?? "openai",
1980
+ providerKind: "openai",
1246
1981
  init: {
1247
1982
  method: "POST",
1248
1983
  headers: createRequestHeaders({
@@ -1253,16 +1988,24 @@ export function createOpenAIModelRuntime(config, modelId) {
1253
1988
  body: JSON.stringify(body),
1254
1989
  signal: options.abortSignal,
1255
1990
  },
1256
- }).then(buildOpenAIGenerateResult);
1991
+ }).then((payload) => {
1992
+ const drained = warnings.drain();
1993
+ return {
1994
+ ...buildOpenAIGenerateResult(payload),
1995
+ ...(drained.length > 0 ? { warnings: drained } : {}),
1996
+ };
1997
+ });
1257
1998
  },
1258
1999
  doStream(optionsForRuntime) {
1259
2000
  const options = optionsForRuntime;
1260
2001
  const url = getOpenAIChatCompletionsUrl(config.baseURL);
1261
- const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, true);
2002
+ const warnings = createWarningCollector();
2003
+ const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, true, warnings);
1262
2004
  return requestStream({
1263
2005
  url,
1264
2006
  fetchImpl,
1265
2007
  providerLabel: config.name ?? "openai",
2008
+ providerKind: "openai",
1266
2009
  init: {
1267
2010
  method: "POST",
1268
2011
  headers: createRequestHeaders({
@@ -1273,9 +2016,13 @@ export function createOpenAIModelRuntime(config, modelId) {
1273
2016
  body: JSON.stringify(body),
1274
2017
  signal: options.abortSignal,
1275
2018
  },
1276
- }).then((responseStream) => ({
1277
- stream: ReadableStream.from(streamOpenAICompatibleParts(responseStream)),
1278
- }));
2019
+ }).then((responseStream) => {
2020
+ const drained = warnings.drain();
2021
+ return {
2022
+ stream: ReadableStream.from(streamOpenAICompatibleParts(responseStream)),
2023
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2024
+ };
2025
+ });
1279
2026
  },
1280
2027
  };
1281
2028
  }
@@ -1289,11 +2036,13 @@ export function createAnthropicModelRuntime(config, modelId) {
1289
2036
  doGenerate(optionsForRuntime) {
1290
2037
  const options = optionsForRuntime;
1291
2038
  const url = getAnthropicMessagesUrl(config.baseURL);
1292
- const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, false);
2039
+ const warnings = createWarningCollector();
2040
+ const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, false, warnings);
1293
2041
  return requestJson({
1294
2042
  url,
1295
2043
  fetchImpl,
1296
2044
  providerLabel: config.name ?? "anthropic",
2045
+ providerKind: "anthropic",
1297
2046
  init: {
1298
2047
  method: "POST",
1299
2048
  headers: createAnthropicRequestHeaders({
@@ -1304,16 +2053,24 @@ export function createAnthropicModelRuntime(config, modelId) {
1304
2053
  body: JSON.stringify(body),
1305
2054
  signal: options.abortSignal,
1306
2055
  },
1307
- }).then(buildAnthropicGenerateResult);
2056
+ }).then((payload) => {
2057
+ const drained = warnings.drain();
2058
+ return {
2059
+ ...buildAnthropicGenerateResult(payload),
2060
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2061
+ };
2062
+ });
1308
2063
  },
1309
2064
  doStream(optionsForRuntime) {
1310
2065
  const options = optionsForRuntime;
1311
2066
  const url = getAnthropicMessagesUrl(config.baseURL);
1312
- const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, true);
2067
+ const warnings = createWarningCollector();
2068
+ const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, true, warnings);
1313
2069
  return requestStream({
1314
2070
  url,
1315
2071
  fetchImpl,
1316
2072
  providerLabel: config.name ?? "anthropic",
2073
+ providerKind: "anthropic",
1317
2074
  init: {
1318
2075
  method: "POST",
1319
2076
  headers: createAnthropicRequestHeaders({
@@ -1324,9 +2081,13 @@ export function createAnthropicModelRuntime(config, modelId) {
1324
2081
  body: JSON.stringify(body),
1325
2082
  signal: options.abortSignal,
1326
2083
  },
1327
- }).then((responseStream) => ({
1328
- stream: ReadableStream.from(streamAnthropicCompatibleParts(responseStream)),
1329
- }));
2084
+ }).then((responseStream) => {
2085
+ const drained = warnings.drain();
2086
+ return {
2087
+ stream: ReadableStream.from(streamAnthropicCompatibleParts(responseStream)),
2088
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2089
+ };
2090
+ });
1330
2091
  },
1331
2092
  };
1332
2093
  }
@@ -1340,11 +2101,13 @@ export function createGoogleModelRuntime(config, modelId) {
1340
2101
  doGenerate(optionsForRuntime) {
1341
2102
  const options = optionsForRuntime;
1342
2103
  const url = getGoogleGenerateContentUrl(config.baseURL, modelId);
1343
- const body = buildGoogleGenerateContentRequest(config.name ?? "google", options);
2104
+ const warnings = createWarningCollector();
2105
+ const body = buildGoogleGenerateContentRequest(config.name ?? "google", options, warnings);
1344
2106
  return requestJson({
1345
2107
  url,
1346
2108
  fetchImpl,
1347
2109
  providerLabel: config.name ?? "google",
2110
+ providerKind: "google",
1348
2111
  init: {
1349
2112
  method: "POST",
1350
2113
  headers: createRequestHeaders({
@@ -1355,16 +2118,24 @@ export function createGoogleModelRuntime(config, modelId) {
1355
2118
  body: JSON.stringify(body),
1356
2119
  signal: options.abortSignal,
1357
2120
  },
1358
- }).then(buildGoogleGenerateResult);
2121
+ }).then((payload) => {
2122
+ const drained = warnings.drain();
2123
+ return {
2124
+ ...buildGoogleGenerateResult(payload),
2125
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2126
+ };
2127
+ });
1359
2128
  },
1360
2129
  doStream(optionsForRuntime) {
1361
2130
  const options = optionsForRuntime;
1362
2131
  const url = getGoogleStreamGenerateContentUrl(config.baseURL, modelId);
1363
- const body = buildGoogleGenerateContentRequest(config.name ?? "google", options);
2132
+ const warnings = createWarningCollector();
2133
+ const body = buildGoogleGenerateContentRequest(config.name ?? "google", options, warnings);
1364
2134
  return requestStream({
1365
2135
  url,
1366
2136
  fetchImpl,
1367
2137
  providerLabel: config.name ?? "google",
2138
+ providerKind: "google",
1368
2139
  init: {
1369
2140
  method: "POST",
1370
2141
  headers: createRequestHeaders({
@@ -1375,9 +2146,13 @@ export function createGoogleModelRuntime(config, modelId) {
1375
2146
  body: JSON.stringify(body),
1376
2147
  signal: options.abortSignal,
1377
2148
  },
1378
- }).then((responseStream) => ({
1379
- stream: ReadableStream.from(streamGoogleCompatibleParts(responseStream)),
1380
- }));
2149
+ }).then((responseStream) => {
2150
+ const drained = warnings.drain();
2151
+ return {
2152
+ stream: ReadableStream.from(streamGoogleCompatibleParts(responseStream)),
2153
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2154
+ };
2155
+ });
1381
2156
  },
1382
2157
  };
1383
2158
  }
@@ -1400,6 +2175,7 @@ export function createOpenAIEmbeddingRuntime(config, modelId) {
1400
2175
  url,
1401
2176
  fetchImpl,
1402
2177
  providerLabel: config.name ?? "openai",
2178
+ providerKind: "openai",
1403
2179
  init: {
1404
2180
  method: "POST",
1405
2181
  headers: {
@@ -1442,6 +2218,7 @@ export function createGoogleEmbeddingRuntime(config, modelId) {
1442
2218
  url,
1443
2219
  fetchImpl,
1444
2220
  providerLabel: config.name ?? "google",
2221
+ providerKind: "google",
1445
2222
  init: {
1446
2223
  method: "POST",
1447
2224
  headers: {