veryfront 0.1.207 → 0.1.209

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,9 @@ function getAnthropicMessagesUrl(baseURL) {
13
13
  function getOpenAIChatCompletionsUrl(baseURL) {
14
14
  return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "chat/completions");
15
15
  }
16
+ function getOpenAIResponsesUrl(baseURL) {
17
+ return joinUrl(baseURL ?? DEFAULT_OPENAI_BASE_URL, "responses");
18
+ }
16
19
  function getGoogleGenerateContentUrl(baseURL, modelId) {
17
20
  return joinUrl(baseURL ?? DEFAULT_GOOGLE_BASE_URL, `models/${encodeURIComponent(modelId)}:generateContent`);
18
21
  }
@@ -77,26 +80,184 @@ function extractGoogleUsageTokens(payload) {
77
80
  const promptTokenCount = usageMetadata?.promptTokenCount;
78
81
  return typeof promptTokenCount === "number" ? promptTokenCount : undefined;
79
82
  }
80
- async function readErrorMessage(response) {
81
- const text = await response.text();
82
- return text.trim() || `${response.status} ${response.statusText}`.trim();
83
+ function createWarningCollector() {
84
+ const list = [];
85
+ return {
86
+ push(warning) {
87
+ list.push(warning);
88
+ },
89
+ drain() {
90
+ return list.slice();
91
+ },
92
+ };
93
+ }
94
+ /**
95
+ * Base class for typed provider errors. The `retryable` flag is the
96
+ * primary signal for callers (or a retry wrapper) to decide whether to
97
+ * re-issue the request. `retryAfterMs` is set when the provider gave an
98
+ * explicit delay hint (Retry-After header, Retry-Info trailer).
99
+ */
100
+ export class ProviderError extends Error {
101
+ provider;
102
+ status;
103
+ retryable;
104
+ retryAfterMs;
105
+ constructor(options) {
106
+ super(options.message);
107
+ this.name = globalThis[Symbol.for("import-meta-ponyfill-esmodule")](import.meta).name;
108
+ this.provider = options.provider;
109
+ this.status = options.status;
110
+ this.retryable = options.retryable;
111
+ if (options.retryAfterMs !== undefined) {
112
+ this.retryAfterMs = options.retryAfterMs;
113
+ }
114
+ }
115
+ }
116
+ /** Provider reports it is overloaded (Anthropic 529, OpenAI/Google 503). */
117
+ export class ProviderOverloadedError extends ProviderError {
118
+ }
119
+ /** Provider is rate limiting this API key (OpenAI/Google 429 with Retry-After). */
120
+ export class ProviderRateLimitError extends ProviderError {
121
+ }
122
+ /** Provider account quota is exhausted — non-retryable. */
123
+ export class ProviderQuotaError extends ProviderError {
124
+ }
125
+ /** Non-retryable 4xx/5xx that doesn't fit another bucket. */
126
+ export class ProviderRequestError extends ProviderError {
127
+ }
128
+ function parseRetryAfterMs(header) {
129
+ if (!header)
130
+ return undefined;
131
+ const asNumber = Number(header);
132
+ if (Number.isFinite(asNumber) && asNumber >= 0) {
133
+ return Math.round(asNumber * 1000);
134
+ }
135
+ // HTTP-date form (rare in practice for LLM providers).
136
+ const parsed = Date.parse(header);
137
+ if (!Number.isNaN(parsed)) {
138
+ return Math.max(0, parsed - Date.now());
139
+ }
140
+ return undefined;
141
+ }
142
+ /**
143
+ * Inspect a non-2xx response and build the most specific ProviderError
144
+ * subclass we can. Reads the response body as text (it's already dead
145
+ * on the wire by this point). Body classification handles the cases
146
+ * where HTTP status alone is ambiguous — notably OpenAI
147
+ * `insufficient_quota` vs `rate_limit_exceeded` both arriving as 429.
148
+ */
149
+ async function buildProviderError(provider, response) {
150
+ const rawBody = await response.text();
151
+ const message = rawBody.trim() || `${response.status} ${response.statusText}`.trim();
152
+ const status = response.status;
153
+ const retryAfterMs = parseRetryAfterMs(response.headers.get("retry-after"));
154
+ const parsedBody = (() => {
155
+ try {
156
+ return JSON.parse(rawBody);
157
+ }
158
+ catch {
159
+ return undefined;
160
+ }
161
+ })();
162
+ const errorRecord = readRecord(parsedBody?.error);
163
+ const errorCode = typeof errorRecord?.code === "string"
164
+ ? errorRecord.code
165
+ : typeof errorRecord?.type === "string"
166
+ ? errorRecord.type
167
+ : typeof errorRecord?.status === "string"
168
+ ? errorRecord.status
169
+ : undefined;
170
+ // Anthropic 529 = overloaded. Anthropic surfaces this with
171
+ // { error: { type: "overloaded_error" } } in the body.
172
+ if (provider === "anthropic" && status === 529) {
173
+ return new ProviderOverloadedError({
174
+ provider,
175
+ status,
176
+ message,
177
+ retryable: true,
178
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
179
+ });
180
+ }
181
+ // OpenAI / Google 503 = overloaded.
182
+ if ((provider === "openai" || provider === "google") && status === 503) {
183
+ return new ProviderOverloadedError({
184
+ provider,
185
+ status,
186
+ message,
187
+ retryable: true,
188
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
189
+ });
190
+ }
191
+ // OpenAI 429 splits based on the error code in the body:
192
+ // - insufficient_quota → hard quota, non-retryable
193
+ // - rate_limit_exceeded / tokens_per_min_exceeded → retry with Retry-After
194
+ if (provider === "openai" && status === 429) {
195
+ if (errorCode === "insufficient_quota") {
196
+ return new ProviderQuotaError({
197
+ provider,
198
+ status,
199
+ message,
200
+ retryable: false,
201
+ });
202
+ }
203
+ return new ProviderRateLimitError({
204
+ provider,
205
+ status,
206
+ message,
207
+ retryable: true,
208
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
209
+ });
210
+ }
211
+ // Google 429 RESOURCE_EXHAUSTED is almost always the daily free-tier
212
+ // quota — surface as a hard quota error so callers don't hot-loop on
213
+ // retries that can't possibly succeed until midnight UTC.
214
+ if (provider === "google" && status === 429) {
215
+ if (errorCode === "RESOURCE_EXHAUSTED") {
216
+ return new ProviderQuotaError({
217
+ provider,
218
+ status,
219
+ message,
220
+ retryable: false,
221
+ });
222
+ }
223
+ return new ProviderRateLimitError({
224
+ provider,
225
+ status,
226
+ message,
227
+ retryable: true,
228
+ ...(retryAfterMs !== undefined ? { retryAfterMs } : {}),
229
+ });
230
+ }
231
+ return new ProviderRequestError({
232
+ provider,
233
+ status,
234
+ message,
235
+ retryable: false,
236
+ });
83
237
  }
84
238
  async function requestJson(options) {
85
239
  const response = await options.fetchImpl(options.url, options.init);
86
240
  if (!response.ok) {
87
- const message = await readErrorMessage(response);
88
- throw new Error(`${options.providerLabel} request failed: ${message}`);
241
+ const err = await buildProviderError(options.providerKind, response);
242
+ err.message = `${options.providerLabel} request failed: ${err.message}`;
243
+ throw err;
89
244
  }
90
245
  return response.json();
91
246
  }
92
247
  async function requestStream(options) {
93
248
  const response = await options.fetchImpl(options.url, options.init);
94
249
  if (!response.ok) {
95
- const message = await readErrorMessage(response);
96
- throw new Error(`${options.providerLabel} request failed: ${message}`);
250
+ const err = await buildProviderError(options.providerKind, response);
251
+ err.message = `${options.providerLabel} request failed: ${err.message}`;
252
+ throw err;
97
253
  }
98
254
  if (!response.body) {
99
- throw new Error(`${options.providerLabel} request failed: stream body missing`);
255
+ throw new ProviderRequestError({
256
+ provider: options.providerKind,
257
+ status: response.status,
258
+ message: `${options.providerLabel} request failed: stream body missing`,
259
+ retryable: false,
260
+ });
100
261
  }
101
262
  return response.body;
102
263
  }
@@ -133,6 +294,11 @@ function toOpenAICompatibleMessages(prompt) {
133
294
  text += part.text;
134
295
  continue;
135
296
  }
297
+ // OpenAI Chat Completions has no roundtrip slot for Anthropic
298
+ // thinking blocks — they get dropped on replay. Anthropic-only.
299
+ if (part.type === "reasoning") {
300
+ continue;
301
+ }
136
302
  toolCalls.push({
137
303
  id: part.toolCallId,
138
304
  type: "function",
@@ -222,6 +388,8 @@ function extractAnthropicUsage(payload) {
222
388
  }
223
389
  const inputTokens = usage.input_tokens;
224
390
  const outputTokens = usage.output_tokens;
391
+ const cacheCreationInputTokens = usage.cache_creation_input_tokens;
392
+ const cacheReadInputTokens = usage.cache_read_input_tokens;
225
393
  return {
226
394
  inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
227
395
  outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
@@ -229,6 +397,8 @@ function extractAnthropicUsage(payload) {
229
397
  ? (typeof inputTokens === "number" ? inputTokens : 0) +
230
398
  (typeof outputTokens === "number" ? outputTokens : 0)
231
399
  : undefined,
400
+ ...(typeof cacheCreationInputTokens === "number" ? { cacheCreationInputTokens } : {}),
401
+ ...(typeof cacheReadInputTokens === "number" ? { cacheReadInputTokens } : {}),
232
402
  };
233
403
  }
234
404
  function mergeUsage(current, next) {
@@ -240,10 +410,15 @@ function mergeUsage(current, next) {
240
410
  }
241
411
  const inputTokens = next.inputTokens ?? current.inputTokens;
242
412
  const outputTokens = next.outputTokens ?? current.outputTokens;
413
+ const cacheCreationInputTokens = next.cacheCreationInputTokens ??
414
+ current.cacheCreationInputTokens;
415
+ const cacheReadInputTokens = next.cacheReadInputTokens ?? current.cacheReadInputTokens;
243
416
  return {
244
417
  inputTokens,
245
418
  outputTokens,
246
419
  totalTokens: (inputTokens ?? 0) + (outputTokens ?? 0),
420
+ ...(cacheCreationInputTokens !== undefined ? { cacheCreationInputTokens } : {}),
421
+ ...(cacheReadInputTokens !== undefined ? { cacheReadInputTokens } : {}),
247
422
  };
248
423
  }
249
424
  function normalizeAnthropicToolChoice(toolChoice) {
@@ -258,6 +433,23 @@ function toSnakeCaseRecord(record) {
258
433
  value,
259
434
  ]));
260
435
  }
436
+ /**
437
+ * Recursive snake_case key converter for nested config objects (used for
438
+ * Anthropic mcp_servers, where authorizationToken / toolConfiguration /
439
+ * allowedTools all need conversion).
440
+ */
441
+ function deepSnakeCase(value) {
442
+ if (Array.isArray(value)) {
443
+ return value.map(deepSnakeCase);
444
+ }
445
+ if (value !== null && typeof value === "object") {
446
+ return Object.fromEntries(Object.entries(value).map(([key, v]) => [
447
+ key.replace(/[A-Z]/g, (match) => `_${match.toLowerCase()}`),
448
+ deepSnakeCase(v),
449
+ ]));
450
+ }
451
+ return value;
452
+ }
261
453
  function pushAnthropicUserContent(messages, content) {
262
454
  if (content.length === 0) {
263
455
  return;
@@ -272,7 +464,23 @@ function pushAnthropicUserContent(messages, content) {
272
464
  content,
273
465
  });
274
466
  }
275
- function toAnthropicMessages(prompt) {
467
+ /**
468
+ * Resolves a {@link ProviderCacheTtl} into Anthropic's `cache_control` shape.
469
+ *
470
+ * Returns `undefined` when caching is not requested (`false` / `undefined`),
471
+ * `{ type: "ephemeral" }` for the 5-minute default (`true` / `"5m"`), or
472
+ * `{ type: "ephemeral", ttl: "1h" }` for the extended 1-hour cache.
473
+ */
474
+ function resolveAnthropicCacheControlBlock(ttl) {
475
+ if (ttl === undefined || ttl === false) {
476
+ return undefined;
477
+ }
478
+ if (ttl === "1h") {
479
+ return { type: "ephemeral", ttl: "1h" };
480
+ }
481
+ return { type: "ephemeral" };
482
+ }
483
+ function toAnthropicMessages(prompt, systemCacheControl) {
276
484
  const systemParts = [];
277
485
  const messages = [];
278
486
  for (const message of prompt) {
@@ -291,11 +499,32 @@ function toAnthropicMessages(prompt) {
291
499
  case "assistant":
292
500
  messages.push({
293
501
  role: "assistant",
294
- content: message.content.map((part) => part.type === "text" ? { type: "text", text: part.text } : {
295
- type: "tool_use",
296
- id: part.toolCallId,
297
- name: part.toolName,
298
- input: part.input,
502
+ content: message.content.map((part) => {
503
+ if (part.type === "text") {
504
+ return { type: "text", text: part.text };
505
+ }
506
+ if (part.type === "reasoning") {
507
+ // Redacted thinking blocks roundtrip as the encrypted blob
508
+ // form Anthropic gave us. Plain thinking blocks need the
509
+ // signature to verify on the server.
510
+ if (typeof part.redactedData === "string") {
511
+ return {
512
+ type: "redacted_thinking",
513
+ data: part.redactedData,
514
+ };
515
+ }
516
+ return {
517
+ type: "thinking",
518
+ thinking: part.text ?? "",
519
+ ...(typeof part.signature === "string" ? { signature: part.signature } : {}),
520
+ };
521
+ }
522
+ return {
523
+ type: "tool_use",
524
+ id: part.toolCallId,
525
+ name: part.toolName,
526
+ input: part.input,
527
+ };
299
528
  }),
300
529
  });
301
530
  break;
@@ -308,12 +537,55 @@ function toAnthropicMessages(prompt) {
308
537
  break;
309
538
  }
310
539
  }
311
- return {
312
- ...(systemParts.length > 0 ? { system: systemParts.join("\n\n") } : {}),
313
- messages,
314
- };
540
+ if (systemParts.length === 0) {
541
+ return { messages };
542
+ }
543
+ const joined = systemParts.join("\n\n");
544
+ // Cache-controlled system prompts must use the array-of-blocks form so the
545
+ // breakpoint lands on an individual content block. Callers that don't opt
546
+ // in keep the legacy raw-string form for backward compatibility.
547
+ if (systemCacheControl) {
548
+ return {
549
+ system: [{
550
+ type: "text",
551
+ text: joined,
552
+ cache_control: systemCacheControl,
553
+ }],
554
+ messages,
555
+ };
556
+ }
557
+ return { system: joined, messages };
558
+ }
559
+ /**
560
+ * Short-name → latest-versioned-type alias map for Anthropic provider tools.
561
+ *
562
+ * Anthropic tool types are date-stamped (e.g. `code_execution_20260120`) so
563
+ * callers either pin a version or get the latest. We accept both: a caller
564
+ * can pass `anthropic.code_execution` and we map to the latest known version,
565
+ * or pass `anthropic.code_execution_20250522` and we forward verbatim.
566
+ *
567
+ * Versions chosen here are the latest documented releases as of 2026-04-15
568
+ * — see https://docs.claude.com/en/docs/agents-and-tools/tool-use/overview.
569
+ * When Anthropic ships newer versions, update this map.
570
+ */
571
+ const ANTHROPIC_TOOL_VERSION_ALIASES = {
572
+ code_execution: "code_execution_20260120",
573
+ computer_use: "computer_20250124",
574
+ computer: "computer_20250124",
575
+ text_editor: "text_editor_20250728",
576
+ bash: "bash_20250124",
577
+ memory: "memory_20250818",
578
+ web_search: "web_search_20250305",
579
+ web_fetch: "web_fetch_20250910",
580
+ };
581
+ function resolveAnthropicProviderType(rawType) {
582
+ // Already-versioned types (contain a date stamp suffix) pass through verbatim.
583
+ if (/_\d{8}$/.test(rawType)) {
584
+ return rawType;
585
+ }
586
+ return ANTHROPIC_TOOL_VERSION_ALIASES[rawType] ?? rawType;
315
587
  }
316
- function toAnthropicTools(tools) {
588
+ function toAnthropicTools(tools, toolsCacheControl) {
317
589
  if (!tools) {
318
590
  return undefined;
319
591
  }
@@ -330,17 +602,31 @@ function toAnthropicTools(tools) {
330
602
  if (!tool.id.startsWith("anthropic.")) {
331
603
  continue;
332
604
  }
333
- const providerType = tool.id.slice("anthropic.".length);
334
- if (providerType.length === 0) {
605
+ const rawType = tool.id.slice("anthropic.".length);
606
+ if (rawType.length === 0) {
335
607
  continue;
336
608
  }
337
609
  normalized.push({
338
- type: providerType,
610
+ type: resolveAnthropicProviderType(rawType),
339
611
  name: tool.name,
340
612
  ...toSnakeCaseRecord(tool.args),
341
613
  });
342
614
  }
343
- return normalized.length > 0 ? normalized : undefined;
615
+ if (normalized.length === 0) {
616
+ return undefined;
617
+ }
618
+ // Attach the cache breakpoint to the final tool entry so Anthropic caches
619
+ // the entire tools block up to and including that definition. Earlier tool
620
+ // entries are implicitly covered by the same breakpoint per Anthropic's
621
+ // walk-backward cache lookup behaviour.
622
+ if (toolsCacheControl) {
623
+ const lastIndex = normalized.length - 1;
624
+ normalized[lastIndex] = {
625
+ ...normalized[lastIndex],
626
+ cache_control: toolsCacheControl,
627
+ };
628
+ }
629
+ return normalized;
344
630
  }
345
631
  function createAnthropicRequestHeaders(options) {
346
632
  const headers = new Headers(options.extraHeaders);
@@ -397,27 +683,185 @@ function resolveAnthropicMaxTokens(modelId, callerMaxOutputTokens) {
397
683
  }
398
684
  return requested;
399
685
  }
400
- function buildAnthropicMessagesRequest(modelId, providerName, options, stream) {
401
- const { system, messages } = toAnthropicMessages(options.prompt);
686
+ /**
687
+ * Map a unified reasoning effort level to an Anthropic `thinking.budget_tokens`
688
+ * value. Anthropic's minimum accepted budget is 1024; higher tiers give Claude
689
+ * more headroom to explore. `max` maps to the upper bound documented for
690
+ * Claude 4.x family (32k tokens of thinking — caller can override via
691
+ * `budgetTokens` if they need more).
692
+ */
693
+ function resolveAnthropicThinkingBudget(option) {
694
+ if (!option || option.enabled !== true) {
695
+ return undefined;
696
+ }
697
+ if (typeof option.budgetTokens === "number" && option.budgetTokens >= 1024) {
698
+ return option.budgetTokens;
699
+ }
700
+ switch (option.effort) {
701
+ case "low":
702
+ return 1024;
703
+ case "high":
704
+ return 16_384;
705
+ case "max":
706
+ return 32_768;
707
+ case "medium":
708
+ default:
709
+ return 4096;
710
+ }
711
+ }
712
+ function buildAnthropicMessagesRequest(modelId, providerName, options, stream, warnings) {
713
+ const systemCacheControl = resolveAnthropicCacheControlBlock(options.cacheControl?.system);
714
+ const toolsCacheControl = resolveAnthropicCacheControlBlock(options.cacheControl?.tools);
715
+ const { system, messages } = toAnthropicMessages(options.prompt, systemCacheControl);
716
+ const anthropicTools = toAnthropicTools(options.tools, toolsCacheControl);
717
+ const thinkingBudget = resolveAnthropicThinkingBudget(options.reasoning);
718
+ const thinkingEnabled = thinkingBudget !== undefined;
719
+ // Anthropic doesn't support these unified options at all — emit warnings
720
+ // so callers don't quietly pass values that have zero effect.
721
+ if (options.presencePenalty !== undefined) {
722
+ warnings.push({
723
+ type: "unsupported-setting",
724
+ provider: "anthropic",
725
+ setting: "presencePenalty",
726
+ details: "Anthropic Messages API has no equivalent and the value was dropped.",
727
+ });
728
+ }
729
+ if (options.frequencyPenalty !== undefined) {
730
+ warnings.push({
731
+ type: "unsupported-setting",
732
+ provider: "anthropic",
733
+ setting: "frequencyPenalty",
734
+ details: "Anthropic Messages API has no equivalent and the value was dropped.",
735
+ });
736
+ }
737
+ if (options.seed !== undefined) {
738
+ warnings.push({
739
+ type: "unsupported-setting",
740
+ provider: "anthropic",
741
+ setting: "seed",
742
+ details: "Anthropic Messages API does not support deterministic seeding.",
743
+ });
744
+ }
745
+ if (options.topK !== undefined) {
746
+ warnings.push({
747
+ type: "unsupported-setting",
748
+ provider: "anthropic",
749
+ setting: "topK",
750
+ details: "Anthropic Messages API does not expose top_k on this surface.",
751
+ });
752
+ }
753
+ if (options.stopSequences && options.stopSequences.length > 4) {
754
+ warnings.push({
755
+ type: "unsupported-setting",
756
+ provider: "anthropic",
757
+ setting: "stopSequences",
758
+ details: `Anthropic accepts at most 4 stop sequences; ${options.stopSequences.length} were provided and the extras were truncated.`,
759
+ });
760
+ }
761
+ if (thinkingEnabled && options.temperature !== undefined) {
762
+ warnings.push({
763
+ type: "unsupported-setting",
764
+ provider: "anthropic",
765
+ setting: "temperature",
766
+ details: "Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
767
+ });
768
+ }
769
+ if (thinkingEnabled && options.topP !== undefined) {
770
+ warnings.push({
771
+ type: "unsupported-setting",
772
+ provider: "anthropic",
773
+ setting: "topP",
774
+ details: "Dropped because Anthropic rejects sampling params when extended thinking is enabled.",
775
+ });
776
+ }
777
+ if (options.responseFormat && options.responseFormat.type !== "text") {
778
+ warnings.push({
779
+ type: "unsupported-setting",
780
+ provider: "anthropic",
781
+ setting: "responseFormat",
782
+ details: "Anthropic Messages API does not have a structured-output response_format equivalent. Use a tool with the schema as input_schema instead.",
783
+ });
784
+ }
785
+ // Anthropic requires max_tokens > budget_tokens when thinking is enabled.
786
+ // Growing max_tokens by the thinking budget preserves the caller's intended
787
+ // output budget, and we clamp the sum at the model's advertised maximum so
788
+ // the request never exceeds the API's hard cap.
789
+ const baseMaxTokens = resolveAnthropicMaxTokens(modelId, options.maxOutputTokens);
790
+ const maxTokens = thinkingEnabled
791
+ ? Math.min(baseMaxTokens + (thinkingBudget ?? 0), getAnthropicModelCapabilities(modelId).maxOutputTokens)
792
+ : baseMaxTokens;
402
793
  const body = {
403
794
  model: modelId,
404
795
  messages,
405
- max_tokens: resolveAnthropicMaxTokens(modelId, options.maxOutputTokens),
796
+ max_tokens: maxTokens,
406
797
  ...(stream ? { stream: true } : {}),
407
798
  ...(system ? { system } : {}),
408
- ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
409
- ...(options.topP !== undefined ? { top_p: options.topP } : {}),
799
+ // Sampling params are mutually exclusive with thinking on Anthropic — the
800
+ // API rejects the combo outright. Drop them silently when thinking is on
801
+ // (callers see thinking's output instead of what they'd have gotten from
802
+ // custom sampling, which is the documented tradeoff).
803
+ ...(!thinkingEnabled && options.temperature !== undefined
804
+ ? { temperature: options.temperature }
805
+ : {}),
806
+ ...(!thinkingEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
410
807
  ...(options.stopSequences && options.stopSequences.length > 0
411
- ? { stop_sequences: options.stopSequences }
808
+ ? { stop_sequences: options.stopSequences.slice(0, 4) }
412
809
  : {}),
413
- ...(toAnthropicTools(options.tools) ? { tools: toAnthropicTools(options.tools) } : {}),
810
+ ...(anthropicTools ? { tools: anthropicTools } : {}),
414
811
  ...(options.toolChoice !== undefined
415
812
  ? { tool_choice: normalizeAnthropicToolChoice(options.toolChoice) }
416
813
  : {}),
814
+ ...(thinkingEnabled ? { thinking: { type: "enabled", budget_tokens: thinkingBudget } } : {}),
815
+ ...(typeof options.userId === "string" && options.userId.length > 0
816
+ ? { metadata: { user_id: options.userId } }
817
+ : {}),
818
+ ...(options.mcpServers && options.mcpServers.length > 0
819
+ ? { mcp_servers: deepSnakeCase(options.mcpServers) }
820
+ : {}),
821
+ ...(options.anthropicContainer !== undefined ? { container: options.anthropicContainer } : {}),
417
822
  };
418
823
  Object.assign(body, readProviderOptions(options.providerOptions, "anthropic", providerName));
419
824
  return body;
420
825
  }
826
+ /**
827
+ * Best-effort camelCase normalization of a single Anthropic citation
828
+ * record. Handles the union of fields across web_search_result_location,
829
+ * web_fetch_result_location, char_location, page_location, and
830
+ * content_block_location citation kinds — see
831
+ * https://docs.claude.com/en/docs/build-with-claude/citations
832
+ */
833
+ function normalizeAnthropicCitation(raw) {
834
+ const r = readRecord(raw);
835
+ if (!r)
836
+ return undefined;
837
+ const typeStr = typeof r.type === "string" ? r.type : undefined;
838
+ if (!typeStr)
839
+ return undefined;
840
+ const out = { type: typeStr };
841
+ if (typeof r.cited_text === "string")
842
+ out.citedText = r.cited_text;
843
+ if (typeof r.url === "string")
844
+ out.url = r.url;
845
+ if (typeof r.title === "string")
846
+ out.title = r.title;
847
+ if (typeof r.start_char_index === "number")
848
+ out.startCharIndex = r.start_char_index;
849
+ if (typeof r.end_char_index === "number")
850
+ out.endCharIndex = r.end_char_index;
851
+ if (typeof r.start_block_index === "number")
852
+ out.startBlockIndex = r.start_block_index;
853
+ if (typeof r.end_block_index === "number")
854
+ out.endBlockIndex = r.end_block_index;
855
+ if (typeof r.start_page_number === "number")
856
+ out.startPageNumber = r.start_page_number;
857
+ if (typeof r.end_page_number === "number")
858
+ out.endPageNumber = r.end_page_number;
859
+ if (typeof r.document_index === "number")
860
+ out.documentIndex = r.document_index;
861
+ if (typeof r.document_title === "string")
862
+ out.documentTitle = r.document_title;
863
+ return out;
864
+ }
421
865
  function buildAnthropicGenerateResult(payload) {
422
866
  const record = readRecord(payload);
423
867
  const content = Array.isArray(record?.content) ? record.content : [];
@@ -426,7 +870,40 @@ function buildAnthropicGenerateResult(payload) {
426
870
  const block = readRecord(blockValue);
427
871
  const blockType = typeof block?.type === "string" ? block.type : undefined;
428
872
  if (blockType === "text" && typeof block?.text === "string" && block.text.length > 0) {
429
- normalized.push({ type: "text", text: block.text });
873
+ const citationsRaw = Array.isArray(block.citations) ? block.citations : undefined;
874
+ const citations = citationsRaw
875
+ ?.flatMap((c) => {
876
+ const normalizedCitation = normalizeAnthropicCitation(c);
877
+ return normalizedCitation ? [normalizedCitation] : [];
878
+ });
879
+ normalized.push({
880
+ type: "text",
881
+ text: block.text,
882
+ ...(citations && citations.length > 0 ? { citations } : {}),
883
+ });
884
+ continue;
885
+ }
886
+ // Thinking blocks carry the cleartext trace plus a signature that
887
+ // Anthropic uses to verify on subsequent turns. Surfacing both lets
888
+ // callers persist them as `reasoning` content parts and replay on
889
+ // the next turn so Claude can continue from the same thinking.
890
+ if (blockType === "thinking") {
891
+ normalized.push({
892
+ type: "reasoning",
893
+ ...(typeof block?.thinking === "string" ? { text: block.thinking } : {}),
894
+ ...(typeof block?.signature === "string" ? { signature: block.signature } : {}),
895
+ });
896
+ continue;
897
+ }
898
+ // Redacted thinking blocks arrive when Claude's safety classifier
899
+ // hides the trace. Pass the encrypted blob through opaquely so the
900
+ // caller can replay it on the next turn (Anthropic still needs the
901
+ // blob to verify continuity even though it can't read it).
902
+ if (blockType === "redacted_thinking" && typeof block?.data === "string") {
903
+ normalized.push({
904
+ type: "reasoning",
905
+ redactedData: block.data,
906
+ });
430
907
  continue;
431
908
  }
432
909
  if ((blockType === "tool_use" || blockType === "server_tool_use") &&
@@ -537,6 +1014,19 @@ async function* streamAnthropicCompatibleParts(stream) {
537
1014
  }
538
1015
  continue;
539
1016
  }
1017
+ // Redacted thinking blocks arrive as opaque encrypted payloads when
1018
+ // Claude's safety classifier flags the reasoning trace. Surface them
1019
+ // as a zero-length reasoning block so callers know thinking happened
1020
+ // without leaking the (legitimately hidden) contents.
1021
+ if (blockType === "redacted_thinking") {
1022
+ const reasoningId = `thinking-${index}`;
1023
+ reasoningBlocks.set(index, { id: reasoningId });
1024
+ yield {
1025
+ type: "reasoning-start",
1026
+ id: reasoningId,
1027
+ };
1028
+ continue;
1029
+ }
540
1030
  if ((blockType === "tool_use" || blockType === "server_tool_use") &&
541
1031
  typeof contentBlock?.id === "string" &&
542
1032
  typeof contentBlock?.name === "string") {
@@ -696,10 +1186,13 @@ function extractOpenAIUsage(payload) {
696
1186
  const inputTokens = usage.prompt_tokens;
697
1187
  const outputTokens = usage.completion_tokens;
698
1188
  const totalTokens = usage.total_tokens;
1189
+ const promptTokensDetails = readRecord(usage.prompt_tokens_details);
1190
+ const cachedTokens = promptTokensDetails?.cached_tokens;
699
1191
  return {
700
1192
  inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
701
1193
  outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
702
1194
  totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
1195
+ ...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
703
1196
  };
704
1197
  }
705
1198
  function extractOpenAIContentText(content) {
@@ -742,14 +1235,81 @@ function extractOpenAIToolCalls(message) {
742
1235
  }
743
1236
  return normalized;
744
1237
  }
745
- function buildOpenAIChatRequest(modelId, providerName, options, stream) {
1238
+ /**
1239
+ * OpenAI reasoning models (o1 / o3 / o4 family) use the completion path but
1240
+ * have different constraints than chat models: sampling params are rejected,
1241
+ * and they accept a `reasoning_effort` field. We detect them by model id
1242
+ * prefix so callers don't have to configure it per runtime.
1243
+ */
1244
+ function isOpenAIReasoningModel(modelId) {
1245
+ return /^o[134](-|$)/.test(modelId);
1246
+ }
1247
+ /**
1248
+ * Map the unified reasoning effort to OpenAI's `reasoning_effort` enum.
1249
+ * OpenAI doesn't accept "max" — we collapse it to "high".
1250
+ */
1251
+ function resolveOpenAIReasoningEffort(option) {
1252
+ if (!option || option.enabled !== true) {
1253
+ return undefined;
1254
+ }
1255
+ switch (option.effort) {
1256
+ case "low":
1257
+ return "low";
1258
+ case "high":
1259
+ case "max":
1260
+ return "high";
1261
+ case "medium":
1262
+ default:
1263
+ return "medium";
1264
+ }
1265
+ }
1266
+ function buildOpenAIChatRequest(modelId, providerName, options, stream, warnings) {
1267
+ const isReasoningModel = isOpenAIReasoningModel(modelId);
1268
+ const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
1269
+ const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
1270
+ // OpenAI Chat Completions has no top_k surface (it's exposed only on the
1271
+ // Responses API for some reasoning models). Quietly accepting it would
1272
+ // mislead callers into thinking it took effect.
1273
+ if (options.topK !== undefined) {
1274
+ warnings.push({
1275
+ type: "unsupported-setting",
1276
+ provider: "openai",
1277
+ setting: "topK",
1278
+ details: "OpenAI Chat Completions does not expose top_k; the value was dropped.",
1279
+ });
1280
+ }
1281
+ // Reasoning models (o1 / o3 / o4) reject sampling params outright. Emit
1282
+ // warnings at build time so callers see *why* the value didn't apply
1283
+ // rather than a 400 from the API.
1284
+ if (reasoningEnabled) {
1285
+ const dropped = [
1286
+ ["temperature", "temperature"],
1287
+ ["topP", "top_p"],
1288
+ ["presencePenalty", "presence_penalty"],
1289
+ ["frequencyPenalty", "frequency_penalty"],
1290
+ ];
1291
+ for (const [key, openaiName] of dropped) {
1292
+ if (options[key] !== undefined) {
1293
+ warnings.push({
1294
+ type: "unsupported-setting",
1295
+ provider: "openai",
1296
+ setting: key,
1297
+ details: `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
1298
+ });
1299
+ }
1300
+ }
1301
+ }
746
1302
  const body = {
747
1303
  model: modelId,
748
1304
  messages: toOpenAICompatibleMessages(options.prompt),
749
1305
  ...(stream ? { stream: true, stream_options: { include_usage: true } } : {}),
750
1306
  ...(options.maxOutputTokens !== undefined ? { max_tokens: options.maxOutputTokens } : {}),
751
- ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
752
- ...(options.topP !== undefined ? { top_p: options.topP } : {}),
1307
+ // OpenAI reasoning models reject temperature / top_p / frequency / presence.
1308
+ // Drop them silently rather than letting the API bounce the request.
1309
+ ...(!reasoningEnabled && options.temperature !== undefined
1310
+ ? { temperature: options.temperature }
1311
+ : {}),
1312
+ ...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
753
1313
  ...(options.stopSequences && options.stopSequences.length > 0
754
1314
  ? { stop: options.stopSequences }
755
1315
  : {}),
@@ -758,10 +1318,37 @@ function buildOpenAIChatRequest(modelId, providerName, options, stream) {
758
1318
  : {}),
759
1319
  ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
760
1320
  ...(options.seed !== undefined ? { seed: options.seed } : {}),
761
- ...(options.presencePenalty !== undefined ? { presence_penalty: options.presencePenalty } : {}),
762
- ...(options.frequencyPenalty !== undefined
1321
+ ...(!reasoningEnabled && options.presencePenalty !== undefined
1322
+ ? { presence_penalty: options.presencePenalty }
1323
+ : {}),
1324
+ ...(!reasoningEnabled && options.frequencyPenalty !== undefined
763
1325
  ? { frequency_penalty: options.frequencyPenalty }
764
1326
  : {}),
1327
+ ...(reasoningEffort !== undefined ? { reasoning_effort: reasoningEffort } : {}),
1328
+ ...(typeof options.userId === "string" && options.userId.length > 0
1329
+ ? { user: options.userId }
1330
+ : {}),
1331
+ ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
1332
+ ...(options.parallelToolCalls !== undefined
1333
+ ? { parallel_tool_calls: options.parallelToolCalls }
1334
+ : {}),
1335
+ ...(options.responseFormat && options.responseFormat.type !== "text"
1336
+ ? {
1337
+ response_format: options.responseFormat.type === "json" ? { type: "json_object" } : {
1338
+ type: "json_schema",
1339
+ json_schema: {
1340
+ name: options.responseFormat.name,
1341
+ ...(typeof options.responseFormat.description === "string"
1342
+ ? { description: options.responseFormat.description }
1343
+ : {}),
1344
+ schema: unwrapToolInputSchema(options.responseFormat.schema),
1345
+ ...(options.responseFormat.strict !== undefined
1346
+ ? { strict: options.responseFormat.strict }
1347
+ : {}),
1348
+ },
1349
+ },
1350
+ }
1351
+ : {}),
765
1352
  };
766
1353
  Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
767
1354
  return body;
@@ -791,10 +1378,14 @@ function extractGoogleUsage(payload) {
791
1378
  const inputTokens = usage.promptTokenCount;
792
1379
  const outputTokens = usage.candidatesTokenCount;
793
1380
  const totalTokens = usage.totalTokenCount;
1381
+ const cachedContentTokenCount = usage.cachedContentTokenCount;
794
1382
  return {
795
1383
  inputTokens: typeof inputTokens === "number" ? inputTokens : undefined,
796
1384
  outputTokens: typeof outputTokens === "number" ? outputTokens : undefined,
797
1385
  totalTokens: typeof totalTokens === "number" ? totalTokens : undefined,
1386
+ ...(typeof cachedContentTokenCount === "number"
1387
+ ? { cacheReadInputTokens: cachedContentTokenCount }
1388
+ : {}),
798
1389
  };
799
1390
  }
800
1391
  function toGoogleContents(prompt) {
@@ -813,18 +1404,29 @@ function toGoogleContents(prompt) {
813
1404
  parts: [{ text: readTextParts(message.content) }],
814
1405
  });
815
1406
  break;
816
- case "assistant":
817
- contents.push({
818
- role: "model",
819
- parts: message.content.map((part) => part.type === "text" ? { text: part.text } : {
1407
+ case "assistant": {
1408
+ // Anthropic-only `reasoning` parts have no Gemini equivalent
1409
+ // and are dropped on replay.
1410
+ const parts = [];
1411
+ for (const part of message.content) {
1412
+ if (part.type === "text") {
1413
+ parts.push({ text: part.text });
1414
+ continue;
1415
+ }
1416
+ if (part.type === "reasoning") {
1417
+ continue;
1418
+ }
1419
+ parts.push({
820
1420
  functionCall: {
821
1421
  id: part.toolCallId,
822
1422
  name: part.toolName,
823
1423
  args: part.input,
824
1424
  },
825
- }),
826
- });
1425
+ });
1426
+ }
1427
+ contents.push({ role: "model", parts });
827
1428
  break;
1429
+ }
828
1430
  case "tool":
829
1431
  contents.push({
830
1432
  role: "user",
@@ -852,14 +1454,37 @@ function toGoogleTools(tools) {
852
1454
  if (!tools) {
853
1455
  return undefined;
854
1456
  }
855
- const functionDeclarations = tools.flatMap((tool) => tool.type === "function"
856
- ? [{
1457
+ const functionDeclarations = [];
1458
+ const providerEntries = [];
1459
+ for (const tool of tools) {
1460
+ if (tool.type === "function") {
1461
+ functionDeclarations.push({
857
1462
  name: tool.name,
858
1463
  ...(typeof tool.description === "string" ? { description: tool.description } : {}),
859
1464
  parameters: unwrapToolInputSchema(tool.inputSchema),
860
- }]
861
- : []);
862
- return functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined;
1465
+ });
1466
+ continue;
1467
+ }
1468
+ // Gemini provider tools — code_execution, google_search,
1469
+ // google_search_retrieval — each lives in its own tools[] entry
1470
+ // with a single key keyed by the camelCase tool name and an
1471
+ // optional config payload (caller-provided tool.args).
1472
+ if (!tool.id.startsWith("google.")) {
1473
+ continue;
1474
+ }
1475
+ const providerType = tool.id.slice("google.".length);
1476
+ if (providerType.length === 0) {
1477
+ continue;
1478
+ }
1479
+ const camelKey = providerType.replace(/_([a-z])/g, (_, ch) => ch.toUpperCase());
1480
+ providerEntries.push({ [camelKey]: tool.args ?? {} });
1481
+ }
1482
+ const result = [];
1483
+ if (functionDeclarations.length > 0) {
1484
+ result.push({ functionDeclarations });
1485
+ }
1486
+ result.push(...providerEntries);
1487
+ return result.length > 0 ? result : undefined;
863
1488
  }
864
1489
  function unwrapToolInputSchema(inputSchema) {
865
1490
  if (typeof inputSchema !== "object" || inputSchema === null || Array.isArray(inputSchema)) {
@@ -884,7 +1509,11 @@ function normalizeGoogleToolChoice(toolChoice) {
884
1509
  }
885
1510
  }
886
1511
  const record = readRecord(toolChoice);
887
- if (record?.type === "tool" && typeof record.name === "string") {
1512
+ if (!record)
1513
+ return undefined;
1514
+ // Single-tool restriction: { type: "tool", name } — pin to one
1515
+ // function via mode: ANY + allowedFunctionNames: [name].
1516
+ if (record.type === "tool" && typeof record.name === "string") {
888
1517
  return {
889
1518
  functionCallingConfig: {
890
1519
  mode: "ANY",
@@ -892,9 +1521,66 @@ function normalizeGoogleToolChoice(toolChoice) {
892
1521
  },
893
1522
  };
894
1523
  }
1524
+ // Multi-tool restriction: { type: "tools", names: string[] } — pin
1525
+ // to a subset via mode: ANY + the full allowedFunctionNames array.
1526
+ if (record.type === "tools" && Array.isArray(record.names)) {
1527
+ const names = record.names.filter((n) => typeof n === "string");
1528
+ if (names.length > 0) {
1529
+ return {
1530
+ functionCallingConfig: {
1531
+ mode: "ANY",
1532
+ allowedFunctionNames: names,
1533
+ },
1534
+ };
1535
+ }
1536
+ }
1537
+ // Explicit mode forms: { type: "auto" | "none" | "any" }.
1538
+ if (record.type === "auto") {
1539
+ return { functionCallingConfig: { mode: "AUTO" } };
1540
+ }
1541
+ if (record.type === "none") {
1542
+ return { functionCallingConfig: { mode: "NONE" } };
1543
+ }
1544
+ if (record.type === "any" || record.type === "required") {
1545
+ return { functionCallingConfig: { mode: "ANY" } };
1546
+ }
895
1547
  return undefined;
896
1548
  }
1549
+ /**
1550
+ * Map the unified reasoning option to Gemini's thinkingConfig. Gemini 2.5+
1551
+ * accepts `includeThoughts: true` to stream back `thought` parts, and
1552
+ * `thinkingBudget: N` to cap the thinking token count. The effort levels
1553
+ * here follow Google's own guidance (low ~= 512, medium ~= 2048,
1554
+ * high ~= 8192, max = -1 means "dynamic/no cap").
1555
+ */
1556
+ function resolveGoogleThinkingConfig(option) {
1557
+ if (!option || option.enabled !== true) {
1558
+ return undefined;
1559
+ }
1560
+ const config = { includeThoughts: true };
1561
+ if (typeof option.budgetTokens === "number") {
1562
+ config.thinkingBudget = option.budgetTokens;
1563
+ return config;
1564
+ }
1565
+ switch (option.effort) {
1566
+ case "low":
1567
+ config.thinkingBudget = 512;
1568
+ break;
1569
+ case "high":
1570
+ config.thinkingBudget = 8192;
1571
+ break;
1572
+ case "max":
1573
+ config.thinkingBudget = -1;
1574
+ break;
1575
+ case "medium":
1576
+ default:
1577
+ config.thinkingBudget = 2048;
1578
+ break;
1579
+ }
1580
+ return config;
1581
+ }
897
1582
  function buildGoogleGenerationConfig(options) {
1583
+ const thinkingConfig = resolveGoogleThinkingConfig(options.reasoning);
898
1584
  const config = {
899
1585
  ...(options.maxOutputTokens !== undefined ? { maxOutputTokens: options.maxOutputTokens } : {}),
900
1586
  ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),
@@ -904,11 +1590,48 @@ function buildGoogleGenerationConfig(options) {
904
1590
  ? { stopSequences: options.stopSequences }
905
1591
  : {}),
906
1592
  ...(options.seed !== undefined ? { seed: options.seed } : {}),
1593
+ ...(thinkingConfig ? { thinkingConfig } : {}),
907
1594
  };
908
1595
  return Object.keys(config).length > 0 ? config : undefined;
909
1596
  }
910
- function buildGoogleGenerateContentRequest(providerName, options) {
1597
+ function buildGoogleGenerateContentRequest(providerName, options, warnings) {
1598
+ // Google generate-content surface doesn't accept presence/frequency
1599
+ // penalties on most current models. Emit warnings and let the request
1600
+ // through without them.
1601
+ if (options.presencePenalty !== undefined) {
1602
+ warnings.push({
1603
+ type: "unsupported-setting",
1604
+ provider: "google",
1605
+ setting: "presencePenalty",
1606
+ details: "Gemini generateContent does not accept presencePenalty; the value was dropped.",
1607
+ });
1608
+ }
1609
+ if (options.frequencyPenalty !== undefined) {
1610
+ warnings.push({
1611
+ type: "unsupported-setting",
1612
+ provider: "google",
1613
+ setting: "frequencyPenalty",
1614
+ details: "Gemini generateContent does not accept frequencyPenalty; the value was dropped.",
1615
+ });
1616
+ }
1617
+ if (options.responseFormat && options.responseFormat.type !== "text") {
1618
+ warnings.push({
1619
+ type: "unsupported-setting",
1620
+ provider: "google",
1621
+ setting: "responseFormat",
1622
+ details: "Gemini uses generationConfig.responseMimeType + responseSchema for structured outputs, which is a separate surface and not yet wired through this option.",
1623
+ });
1624
+ }
911
1625
  const { systemInstruction, contents } = toGoogleContents(options.prompt);
1626
+ const generationConfig = buildGoogleGenerationConfig(options);
1627
+ // requestLabels wins over userId-derived labels: when callers explicitly
1628
+ // provide a label map, that's the source of truth. Otherwise fall back
1629
+ // to {user_id} derived from the unified userId option.
1630
+ const labels = options.requestLabels && Object.keys(options.requestLabels).length > 0
1631
+ ? options.requestLabels
1632
+ : typeof options.userId === "string" && options.userId.length > 0
1633
+ ? { user_id: options.userId }
1634
+ : undefined;
912
1635
  const body = {
913
1636
  contents,
914
1637
  ...(systemInstruction ? { systemInstruction } : {}),
@@ -916,8 +1639,13 @@ function buildGoogleGenerateContentRequest(providerName, options) {
916
1639
  ...(normalizeGoogleToolChoice(options.toolChoice)
917
1640
  ? { toolConfig: normalizeGoogleToolChoice(options.toolChoice) }
918
1641
  : {}),
919
- ...(buildGoogleGenerationConfig(options)
920
- ? { generationConfig: buildGoogleGenerationConfig(options) }
1642
+ ...(generationConfig ? { generationConfig } : {}),
1643
+ ...(labels ? { labels } : {}),
1644
+ ...(typeof options.googleCachedContent === "string" && options.googleCachedContent.length > 0
1645
+ ? { cachedContent: options.googleCachedContent }
1646
+ : {}),
1647
+ ...(options.googleSafetySettings && options.googleSafetySettings.length > 0
1648
+ ? { safetySettings: options.googleSafetySettings }
921
1649
  : {}),
922
1650
  };
923
1651
  Object.assign(body, readProviderOptions(options.providerOptions, "google", providerName));
@@ -961,10 +1689,18 @@ function buildGoogleGenerateResult(payload) {
961
1689
  });
962
1690
  }
963
1691
  }
1692
+ // Gemini grounding (google_search / google_search_retrieval) returns
1693
+ // a per-candidate groundingMetadata object with web search queries,
1694
+ // grounding chunks, and citation indices into the response text.
1695
+ // Pass it through opaquely so callers can render footnotes / source
1696
+ // chips / "Search results" UI without parsing the wire shape.
1697
+ const candidate = extractFirstGoogleCandidate(payload);
1698
+ const groundingMetadata = readRecord(candidate?.groundingMetadata);
964
1699
  return {
965
1700
  content,
966
- finishReason: normalizeGoogleFinishReason(extractFirstGoogleCandidate(payload)?.finishReason),
1701
+ finishReason: normalizeGoogleFinishReason(candidate?.finishReason),
967
1702
  usage: extractGoogleUsage(payload),
1703
+ ...(groundingMetadata ? { groundingMetadata } : {}),
968
1704
  };
969
1705
  }
970
1706
  async function* streamGoogleCompatibleParts(stream) {
@@ -1238,11 +1974,13 @@ export function createOpenAIModelRuntime(config, modelId) {
1238
1974
  doGenerate(optionsForRuntime) {
1239
1975
  const options = optionsForRuntime;
1240
1976
  const url = getOpenAIChatCompletionsUrl(config.baseURL);
1241
- const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, false);
1977
+ const warnings = createWarningCollector();
1978
+ const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, false, warnings);
1242
1979
  return requestJson({
1243
1980
  url,
1244
1981
  fetchImpl,
1245
1982
  providerLabel: config.name ?? "openai",
1983
+ providerKind: "openai",
1246
1984
  init: {
1247
1985
  method: "POST",
1248
1986
  headers: createRequestHeaders({
@@ -1253,16 +1991,24 @@ export function createOpenAIModelRuntime(config, modelId) {
1253
1991
  body: JSON.stringify(body),
1254
1992
  signal: options.abortSignal,
1255
1993
  },
1256
- }).then(buildOpenAIGenerateResult);
1994
+ }).then((payload) => {
1995
+ const drained = warnings.drain();
1996
+ return {
1997
+ ...buildOpenAIGenerateResult(payload),
1998
+ ...(drained.length > 0 ? { warnings: drained } : {}),
1999
+ };
2000
+ });
1257
2001
  },
1258
2002
  doStream(optionsForRuntime) {
1259
2003
  const options = optionsForRuntime;
1260
2004
  const url = getOpenAIChatCompletionsUrl(config.baseURL);
1261
- const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, true);
2005
+ const warnings = createWarningCollector();
2006
+ const body = buildOpenAIChatRequest(modelId, config.name ?? "openai", options, true, warnings);
1262
2007
  return requestStream({
1263
2008
  url,
1264
2009
  fetchImpl,
1265
2010
  providerLabel: config.name ?? "openai",
2011
+ providerKind: "openai",
1266
2012
  init: {
1267
2013
  method: "POST",
1268
2014
  headers: createRequestHeaders({
@@ -1273,9 +2019,549 @@ export function createOpenAIModelRuntime(config, modelId) {
1273
2019
  body: JSON.stringify(body),
1274
2020
  signal: options.abortSignal,
1275
2021
  },
1276
- }).then((responseStream) => ({
1277
- stream: ReadableStream.from(streamOpenAICompatibleParts(responseStream)),
1278
- }));
2022
+ }).then((responseStream) => {
2023
+ const drained = warnings.drain();
2024
+ return {
2025
+ stream: ReadableStream.from(streamOpenAICompatibleParts(responseStream)),
2026
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2027
+ };
2028
+ });
2029
+ },
2030
+ };
2031
+ }
2032
+ /**
2033
+ * Convert the unified RuntimePromptMessage[] to the Responses API `input`
2034
+ * array shape. Differences from Chat Completions:
2035
+ * - System prompts go on the top-level `instructions` field, not inline.
2036
+ * - Content parts use `input_text` / `output_text` discriminants instead
2037
+ * of the Chat Completions plain-text shorthand.
2038
+ * - Assistant tool calls become standalone `function_call` items in the
2039
+ * input array, not nested `tool_calls` on a message.
2040
+ * - Tool results become standalone `function_call_output` items.
2041
+ * - Reasoning content parts roundtrip as `reasoning` items so callers can
2042
+ * replay multi-turn conversations with chain-of-thought intact.
2043
+ */
2044
+ function toOpenAIResponsesInput(prompt) {
2045
+ const instructionsParts = [];
2046
+ const input = [];
2047
+ for (const message of prompt) {
2048
+ switch (message.role) {
2049
+ case "system":
2050
+ if (message.content.length > 0) {
2051
+ instructionsParts.push(message.content);
2052
+ }
2053
+ break;
2054
+ case "user":
2055
+ input.push({
2056
+ role: "user",
2057
+ content: [{ type: "input_text", text: readTextParts(message.content) }],
2058
+ });
2059
+ break;
2060
+ case "assistant": {
2061
+ const messageContent = [];
2062
+ for (const part of message.content) {
2063
+ if (part.type === "text") {
2064
+ messageContent.push({ type: "output_text", text: part.text });
2065
+ continue;
2066
+ }
2067
+ if (part.type === "reasoning") {
2068
+ // Reasoning items are top-level entries in the input array,
2069
+ // not nested inside the assistant message — flush whatever
2070
+ // text we've accumulated first, then push the reasoning item.
2071
+ if (messageContent.length > 0) {
2072
+ input.push({ role: "assistant", content: [...messageContent] });
2073
+ messageContent.length = 0;
2074
+ }
2075
+ const summary = [];
2076
+ if (typeof part.text === "string" && part.text.length > 0) {
2077
+ summary.push({ type: "summary_text", text: part.text });
2078
+ }
2079
+ input.push({
2080
+ type: "reasoning",
2081
+ ...(typeof part.signature === "string" ? { encrypted_content: part.signature } : {}),
2082
+ summary,
2083
+ });
2084
+ continue;
2085
+ }
2086
+ // tool-call: flush message content, then push as standalone
2087
+ // function_call item per Responses API shape.
2088
+ if (messageContent.length > 0) {
2089
+ input.push({ role: "assistant", content: [...messageContent] });
2090
+ messageContent.length = 0;
2091
+ }
2092
+ input.push({
2093
+ type: "function_call",
2094
+ call_id: part.toolCallId,
2095
+ name: part.toolName,
2096
+ arguments: stringifyJsonValue(part.input),
2097
+ });
2098
+ }
2099
+ if (messageContent.length > 0) {
2100
+ input.push({ role: "assistant", content: messageContent });
2101
+ }
2102
+ break;
2103
+ }
2104
+ case "tool":
2105
+ for (const part of message.content) {
2106
+ input.push({
2107
+ type: "function_call_output",
2108
+ call_id: part.toolCallId,
2109
+ output: stringifyJsonValue(part.output.value),
2110
+ });
2111
+ }
2112
+ break;
2113
+ }
2114
+ }
2115
+ return {
2116
+ ...(instructionsParts.length > 0 ? { instructions: instructionsParts.join("\n\n") } : {}),
2117
+ input,
2118
+ };
2119
+ }
2120
+ /**
2121
+ * Tools on the Responses API differ from Chat Completions: instead of
2122
+ * `{ type: "function", function: { name, parameters } }` the function
2123
+ * shape lifts the name/parameters/strict to the top of the entry. Native
2124
+ * tools (web_search, file_search, computer_use, code_interpreter) live
2125
+ * alongside function tools in the same array.
2126
+ */
2127
+ function toOpenAIResponsesTools(tools) {
2128
+ if (!tools)
2129
+ return undefined;
2130
+ const normalized = [];
2131
+ for (const tool of tools) {
2132
+ if (tool.type === "function") {
2133
+ normalized.push({
2134
+ type: "function",
2135
+ name: tool.name,
2136
+ ...(typeof tool.description === "string" ? { description: tool.description } : {}),
2137
+ parameters: unwrapToolInputSchema(tool.inputSchema),
2138
+ });
2139
+ continue;
2140
+ }
2141
+ if (!tool.id.startsWith("openai."))
2142
+ continue;
2143
+ const providerType = tool.id.slice("openai.".length);
2144
+ if (providerType.length === 0)
2145
+ continue;
2146
+ normalized.push({
2147
+ type: providerType,
2148
+ ...toSnakeCaseRecord(tool.args),
2149
+ });
2150
+ }
2151
+ return normalized.length > 0 ? normalized : undefined;
2152
+ }
2153
+ function buildOpenAIResponsesRequest(modelId, providerName, options, stream, warnings) {
2154
+ const isReasoningModel = isOpenAIReasoningModel(modelId);
2155
+ const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
2156
+ const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
2157
+ // Same param-sanitization rules as Chat Completions: reasoning models
2158
+ // reject sampling params. Drop with a warning.
2159
+ if (options.topK !== undefined) {
2160
+ warnings.push({
2161
+ type: "unsupported-setting",
2162
+ provider: "openai",
2163
+ setting: "topK",
2164
+ details: "OpenAI Responses API does not expose top_k; the value was dropped.",
2165
+ });
2166
+ }
2167
+ if (reasoningEnabled) {
2168
+ const dropped = [
2169
+ ["temperature", "temperature"],
2170
+ ["topP", "top_p"],
2171
+ ["presencePenalty", "presence_penalty"],
2172
+ ["frequencyPenalty", "frequency_penalty"],
2173
+ ];
2174
+ for (const [key, openaiName] of dropped) {
2175
+ if (options[key] !== undefined) {
2176
+ warnings.push({
2177
+ type: "unsupported-setting",
2178
+ provider: "openai",
2179
+ setting: key,
2180
+ details: `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
2181
+ });
2182
+ }
2183
+ }
2184
+ }
2185
+ const { instructions, input } = toOpenAIResponsesInput(options.prompt);
2186
+ const responsesTools = toOpenAIResponsesTools(options.tools);
2187
+ const body = {
2188
+ model: modelId,
2189
+ input,
2190
+ ...(instructions !== undefined ? { instructions } : {}),
2191
+ ...(stream ? { stream: true } : {}),
2192
+ ...(options.maxOutputTokens !== undefined
2193
+ ? { max_output_tokens: options.maxOutputTokens }
2194
+ : {}),
2195
+ ...(!reasoningEnabled && options.temperature !== undefined
2196
+ ? { temperature: options.temperature }
2197
+ : {}),
2198
+ ...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
2199
+ ...(responsesTools ? { tools: responsesTools } : {}),
2200
+ ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
2201
+ // The Responses API surfaces reasoning effort + summary verbosity
2202
+ // in a structured `reasoning` object instead of a flat field. We
2203
+ // request "auto" summary so callers see structured summary parts
2204
+ // without having to opt into them per request.
2205
+ ...(reasoningEffort !== undefined
2206
+ ? { reasoning: { effort: reasoningEffort, summary: "auto" } }
2207
+ : {}),
2208
+ ...(typeof options.userId === "string" && options.userId.length > 0
2209
+ ? { user: options.userId }
2210
+ : {}),
2211
+ ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
2212
+ ...(options.parallelToolCalls !== undefined
2213
+ ? { parallel_tool_calls: options.parallelToolCalls }
2214
+ : {}),
2215
+ // Responses API uses `text.format` instead of Chat Completions'
2216
+ // `response_format`. The shape is similar but nested under `text`.
2217
+ ...(options.responseFormat && options.responseFormat.type !== "text"
2218
+ ? {
2219
+ text: {
2220
+ format: options.responseFormat.type === "json" ? { type: "json_object" } : {
2221
+ type: "json_schema",
2222
+ name: options.responseFormat.name,
2223
+ ...(typeof options.responseFormat.description === "string"
2224
+ ? { description: options.responseFormat.description }
2225
+ : {}),
2226
+ schema: unwrapToolInputSchema(options.responseFormat.schema),
2227
+ ...(options.responseFormat.strict !== undefined
2228
+ ? { strict: options.responseFormat.strict }
2229
+ : {}),
2230
+ },
2231
+ },
2232
+ }
2233
+ : {}),
2234
+ };
2235
+ Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
2236
+ return body;
2237
+ }
2238
+ /**
2239
+ * The Responses API uses `input_tokens` / `output_tokens` field names
2240
+ * instead of Chat Completions' `prompt_tokens` / `completion_tokens`.
2241
+ * It also nests cached input tokens under `input_tokens_details` and
2242
+ * exposes reasoning tokens via `output_tokens_details.reasoning_tokens`.
2243
+ */
2244
+ function extractOpenAIResponsesUsage(payload) {
2245
+ const record = readRecord(payload);
2246
+ // Streaming usage lives on response.completed inside `response.usage`;
2247
+ // non-streaming has it at the top level.
2248
+ const responseRecord = readRecord(record?.response);
2249
+ const usage = readRecord(responseRecord?.usage) ?? readRecord(record?.usage);
2250
+ if (!usage)
2251
+ return undefined;
2252
+ const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : undefined;
2253
+ const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : undefined;
2254
+ const totalTokens = typeof usage.total_tokens === "number"
2255
+ ? usage.total_tokens
2256
+ : (inputTokens !== undefined || outputTokens !== undefined
2257
+ ? (inputTokens ?? 0) + (outputTokens ?? 0)
2258
+ : undefined);
2259
+ const inputDetails = readRecord(usage.input_tokens_details);
2260
+ const cachedTokens = inputDetails?.cached_tokens;
2261
+ return {
2262
+ inputTokens,
2263
+ outputTokens,
2264
+ totalTokens,
2265
+ ...(typeof cachedTokens === "number" ? { cacheReadInputTokens: cachedTokens } : {}),
2266
+ };
2267
+ }
2268
+ function normalizeOpenAIResponsesFinishReason(raw) {
2269
+ if (typeof raw !== "string")
2270
+ return null;
2271
+ switch (raw) {
2272
+ case "completed":
2273
+ return { unified: "stop", raw };
2274
+ case "incomplete":
2275
+ return { unified: "length", raw };
2276
+ case "failed":
2277
+ return { unified: "error", raw };
2278
+ case "in_progress":
2279
+ return null;
2280
+ default:
2281
+ return raw;
2282
+ }
2283
+ }
2284
+ function buildOpenAIResponsesGenerateResult(payload) {
2285
+ const record = readRecord(payload);
2286
+ const output = Array.isArray(record?.output) ? record.output : [];
2287
+ const content = [];
2288
+ for (const item of output) {
2289
+ const itemRecord = readRecord(item);
2290
+ const itemType = typeof itemRecord?.type === "string" ? itemRecord.type : undefined;
2291
+ if (itemType === "message" && Array.isArray(itemRecord?.content)) {
2292
+ // A message item bundles one or more output_text parts. Concat
2293
+ // their texts into a single text content entry.
2294
+ let text = "";
2295
+ for (const part of itemRecord.content) {
2296
+ const p = readRecord(part);
2297
+ if (typeof p?.type === "string" && p.type === "output_text" && typeof p.text === "string") {
2298
+ text += p.text;
2299
+ }
2300
+ }
2301
+ if (text.length > 0) {
2302
+ content.push({ type: "text", text });
2303
+ }
2304
+ continue;
2305
+ }
2306
+ if (itemType === "function_call") {
2307
+ content.push({
2308
+ type: "tool-call",
2309
+ toolCallId: typeof itemRecord?.call_id === "string"
2310
+ ? itemRecord.call_id
2311
+ : (typeof itemRecord?.id === "string" ? itemRecord.id : ""),
2312
+ toolName: typeof itemRecord?.name === "string" ? itemRecord.name : "",
2313
+ input: typeof itemRecord?.arguments === "string"
2314
+ ? itemRecord.arguments
2315
+ : stringifyJsonValue(itemRecord?.arguments ?? {}),
2316
+ });
2317
+ continue;
2318
+ }
2319
+ if (itemType === "reasoning") {
2320
+ const summary = Array.isArray(itemRecord?.summary) ? itemRecord.summary : [];
2321
+ const summaries = [];
2322
+ for (const s of summary) {
2323
+ const sr = readRecord(s);
2324
+ if (typeof sr?.text === "string" && sr.text.length > 0) {
2325
+ summaries.push({
2326
+ ...(typeof sr?.id === "string" ? { id: sr.id } : {}),
2327
+ text: sr.text,
2328
+ });
2329
+ }
2330
+ }
2331
+ content.push({
2332
+ type: "reasoning",
2333
+ ...(summaries.length > 0 ? { summaries } : {}),
2334
+ ...(typeof itemRecord?.encrypted_content === "string"
2335
+ ? { signature: itemRecord.encrypted_content }
2336
+ : {}),
2337
+ });
2338
+ continue;
2339
+ }
2340
+ }
2341
+ return {
2342
+ content,
2343
+ finishReason: normalizeOpenAIResponsesFinishReason(record?.status),
2344
+ usage: extractOpenAIResponsesUsage(payload),
2345
+ };
2346
+ }
2347
+ /**
2348
+ * Parse the Responses API streaming event grammar into the same UI part
2349
+ * shapes the existing OpenAI / Anthropic / Google streams emit. The
2350
+ * Responses API uses a strict event-typed protocol — every event has a
2351
+ * `type` field naming the lifecycle phase — instead of the loose
2352
+ * `delta`-based shape Chat Completions uses.
2353
+ */
2354
+ async function* streamOpenAIResponsesParts(stream) {
2355
+ const decoder = new TextDecoder();
2356
+ let buffer = "";
2357
+ const reasoningBlocks = new Map();
2358
+ const functionCalls = new Map();
2359
+ const startedToolCalls = new Set();
2360
+ let finishReason = null;
2361
+ let usage;
2362
+ let reasoningCounter = 0;
2363
+ for await (const chunk of stream) {
2364
+ buffer += decoder.decode(chunk, { stream: true });
2365
+ const parsed = parseSseChunk(buffer);
2366
+ buffer = parsed.remainder;
2367
+ for (const event of parsed.events) {
2368
+ if (event === "[DONE]")
2369
+ continue;
2370
+ const record = readRecord(event);
2371
+ const type = typeof record?.type === "string" ? record.type : undefined;
2372
+ if (!type)
2373
+ continue;
2374
+ // response.output_item.added: a new output item begins. Track
2375
+ // function_call items so their argument deltas can be attributed,
2376
+ // and reasoning items so summary deltas can group correctly.
2377
+ if (type === "response.output_item.added") {
2378
+ const item = readRecord(record?.item);
2379
+ const itemType = typeof item?.type === "string" ? item.type : undefined;
2380
+ const itemId = typeof item?.id === "string" ? item.id : undefined;
2381
+ if (itemType === "function_call" && itemId) {
2382
+ const callId = typeof item?.call_id === "string" ? item.call_id : itemId;
2383
+ const name = typeof item?.name === "string" ? item.name : "";
2384
+ functionCalls.set(itemId, {
2385
+ id: itemId,
2386
+ toolCallId: callId,
2387
+ name,
2388
+ arguments: "",
2389
+ });
2390
+ }
2391
+ if (itemType === "reasoning" && itemId) {
2392
+ reasoningBlocks.set(itemId, {
2393
+ id: `reasoning-${reasoningCounter++}`,
2394
+ emittedStart: false,
2395
+ });
2396
+ }
2397
+ continue;
2398
+ }
2399
+ // response.output_text.delta: text chunk for a message item.
2400
+ if (type === "response.output_text.delta" && typeof record?.delta === "string") {
2401
+ if (record.delta.length > 0) {
2402
+ yield { type: "text-delta", delta: record.delta };
2403
+ }
2404
+ continue;
2405
+ }
2406
+ // response.reasoning_summary_text.delta: reasoning summary text
2407
+ // chunk. The first delta on an item lazily emits the
2408
+ // reasoning-start event so callers can group deltas into a part.
2409
+ if (type === "response.reasoning_summary_text.delta" && typeof record?.delta === "string") {
2410
+ const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
2411
+ const state = itemId ? reasoningBlocks.get(itemId) : undefined;
2412
+ if (state && record.delta.length > 0) {
2413
+ if (!state.emittedStart) {
2414
+ yield { type: "reasoning-start", id: state.id };
2415
+ state.emittedStart = true;
2416
+ }
2417
+ yield { type: "reasoning-delta", id: state.id, delta: record.delta };
2418
+ }
2419
+ continue;
2420
+ }
2421
+ // response.function_call_arguments.delta: tool call argument
2422
+ // chunk. The first delta lazily emits tool-input-start.
2423
+ if (type === "response.function_call_arguments.delta" && typeof record?.delta === "string") {
2424
+ const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
2425
+ const state = itemId ? functionCalls.get(itemId) : undefined;
2426
+ if (state && record.delta.length > 0) {
2427
+ if (!startedToolCalls.has(state.id)) {
2428
+ yield {
2429
+ type: "tool-input-start",
2430
+ id: state.toolCallId,
2431
+ toolName: state.name,
2432
+ };
2433
+ startedToolCalls.add(state.id);
2434
+ }
2435
+ state.arguments += record.delta;
2436
+ yield {
2437
+ type: "tool-input-delta",
2438
+ id: state.toolCallId,
2439
+ delta: record.delta,
2440
+ };
2441
+ }
2442
+ continue;
2443
+ }
2444
+ // response.output_item.done: an item has finished emitting deltas.
2445
+ // Close any reasoning or function-call streams that were open.
2446
+ if (type === "response.output_item.done") {
2447
+ const item = readRecord(record?.item);
2448
+ const itemType = typeof item?.type === "string" ? item.type : undefined;
2449
+ const itemId = typeof item?.id === "string" ? item.id : undefined;
2450
+ if (itemType === "reasoning" && itemId) {
2451
+ const state = reasoningBlocks.get(itemId);
2452
+ if (state?.emittedStart) {
2453
+ yield { type: "reasoning-end", id: state.id };
2454
+ }
2455
+ reasoningBlocks.delete(itemId);
2456
+ }
2457
+ if (itemType === "function_call" && itemId) {
2458
+ const state = functionCalls.get(itemId);
2459
+ if (state) {
2460
+ yield {
2461
+ type: "tool-call",
2462
+ toolCallId: state.toolCallId,
2463
+ toolName: state.name,
2464
+ input: state.arguments,
2465
+ };
2466
+ }
2467
+ functionCalls.delete(itemId);
2468
+ }
2469
+ continue;
2470
+ }
2471
+ // response.completed: terminal event with the final response object
2472
+ // (status + usage). Capture both for the final finish part.
2473
+ if (type === "response.completed") {
2474
+ usage = extractOpenAIResponsesUsage(record) ?? usage;
2475
+ const responseRecord = readRecord(record?.response);
2476
+ finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status);
2477
+ continue;
2478
+ }
2479
+ if (type === "response.failed" || type === "response.incomplete") {
2480
+ const responseRecord = readRecord(record?.response);
2481
+ finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status) ??
2482
+ (type === "response.failed"
2483
+ ? { unified: "error", raw: "failed" }
2484
+ : { unified: "length", raw: "incomplete" });
2485
+ usage = extractOpenAIResponsesUsage(record) ?? usage;
2486
+ continue;
2487
+ }
2488
+ }
2489
+ }
2490
+ // Close any reasoning streams still open at end-of-stream (defensive
2491
+ // — a clean Responses API stream always closes them via output_item.done).
2492
+ for (const state of reasoningBlocks.values()) {
2493
+ if (state.emittedStart) {
2494
+ yield { type: "reasoning-end", id: state.id };
2495
+ }
2496
+ }
2497
+ yield {
2498
+ type: "finish",
2499
+ finishReason,
2500
+ ...(usage ? { usage } : {}),
2501
+ };
2502
+ }
2503
+ export function createOpenAIResponsesRuntime(config, modelId) {
2504
+ const fetchImpl = config.fetch ?? globalThis.fetch;
2505
+ return {
2506
+ provider: config.name ?? "openai",
2507
+ modelId,
2508
+ specificationVersion: "v3",
2509
+ supportedUrls: {},
2510
+ doGenerate(optionsForRuntime) {
2511
+ const options = optionsForRuntime;
2512
+ const url = getOpenAIResponsesUrl(config.baseURL);
2513
+ const warnings = createWarningCollector();
2514
+ const body = buildOpenAIResponsesRequest(modelId, config.name ?? "openai", options, false, warnings);
2515
+ return requestJson({
2516
+ url,
2517
+ fetchImpl,
2518
+ providerLabel: config.name ?? "openai",
2519
+ providerKind: "openai",
2520
+ init: {
2521
+ method: "POST",
2522
+ headers: createRequestHeaders({
2523
+ apiKeyHeaderName: "authorization",
2524
+ apiKey: `Bearer ${config.apiKey}`,
2525
+ extraHeaders: options.headers,
2526
+ }),
2527
+ body: JSON.stringify(body),
2528
+ signal: options.abortSignal,
2529
+ },
2530
+ }).then((payload) => {
2531
+ const drained = warnings.drain();
2532
+ return {
2533
+ ...buildOpenAIResponsesGenerateResult(payload),
2534
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2535
+ };
2536
+ });
2537
+ },
2538
+ doStream(optionsForRuntime) {
2539
+ const options = optionsForRuntime;
2540
+ const url = getOpenAIResponsesUrl(config.baseURL);
2541
+ const warnings = createWarningCollector();
2542
+ const body = buildOpenAIResponsesRequest(modelId, config.name ?? "openai", options, true, warnings);
2543
+ return requestStream({
2544
+ url,
2545
+ fetchImpl,
2546
+ providerLabel: config.name ?? "openai",
2547
+ providerKind: "openai",
2548
+ init: {
2549
+ method: "POST",
2550
+ headers: createRequestHeaders({
2551
+ apiKeyHeaderName: "authorization",
2552
+ apiKey: `Bearer ${config.apiKey}`,
2553
+ extraHeaders: options.headers,
2554
+ }),
2555
+ body: JSON.stringify(body),
2556
+ signal: options.abortSignal,
2557
+ },
2558
+ }).then((responseStream) => {
2559
+ const drained = warnings.drain();
2560
+ return {
2561
+ stream: ReadableStream.from(streamOpenAIResponsesParts(responseStream)),
2562
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2563
+ };
2564
+ });
1279
2565
  },
1280
2566
  };
1281
2567
  }
@@ -1289,11 +2575,13 @@ export function createAnthropicModelRuntime(config, modelId) {
1289
2575
  doGenerate(optionsForRuntime) {
1290
2576
  const options = optionsForRuntime;
1291
2577
  const url = getAnthropicMessagesUrl(config.baseURL);
1292
- const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, false);
2578
+ const warnings = createWarningCollector();
2579
+ const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, false, warnings);
1293
2580
  return requestJson({
1294
2581
  url,
1295
2582
  fetchImpl,
1296
2583
  providerLabel: config.name ?? "anthropic",
2584
+ providerKind: "anthropic",
1297
2585
  init: {
1298
2586
  method: "POST",
1299
2587
  headers: createAnthropicRequestHeaders({
@@ -1304,16 +2592,24 @@ export function createAnthropicModelRuntime(config, modelId) {
1304
2592
  body: JSON.stringify(body),
1305
2593
  signal: options.abortSignal,
1306
2594
  },
1307
- }).then(buildAnthropicGenerateResult);
2595
+ }).then((payload) => {
2596
+ const drained = warnings.drain();
2597
+ return {
2598
+ ...buildAnthropicGenerateResult(payload),
2599
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2600
+ };
2601
+ });
1308
2602
  },
1309
2603
  doStream(optionsForRuntime) {
1310
2604
  const options = optionsForRuntime;
1311
2605
  const url = getAnthropicMessagesUrl(config.baseURL);
1312
- const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, true);
2606
+ const warnings = createWarningCollector();
2607
+ const body = buildAnthropicMessagesRequest(modelId, config.name ?? "anthropic", options, true, warnings);
1313
2608
  return requestStream({
1314
2609
  url,
1315
2610
  fetchImpl,
1316
2611
  providerLabel: config.name ?? "anthropic",
2612
+ providerKind: "anthropic",
1317
2613
  init: {
1318
2614
  method: "POST",
1319
2615
  headers: createAnthropicRequestHeaders({
@@ -1324,9 +2620,13 @@ export function createAnthropicModelRuntime(config, modelId) {
1324
2620
  body: JSON.stringify(body),
1325
2621
  signal: options.abortSignal,
1326
2622
  },
1327
- }).then((responseStream) => ({
1328
- stream: ReadableStream.from(streamAnthropicCompatibleParts(responseStream)),
1329
- }));
2623
+ }).then((responseStream) => {
2624
+ const drained = warnings.drain();
2625
+ return {
2626
+ stream: ReadableStream.from(streamAnthropicCompatibleParts(responseStream)),
2627
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2628
+ };
2629
+ });
1330
2630
  },
1331
2631
  };
1332
2632
  }
@@ -1340,11 +2640,13 @@ export function createGoogleModelRuntime(config, modelId) {
1340
2640
  doGenerate(optionsForRuntime) {
1341
2641
  const options = optionsForRuntime;
1342
2642
  const url = getGoogleGenerateContentUrl(config.baseURL, modelId);
1343
- const body = buildGoogleGenerateContentRequest(config.name ?? "google", options);
2643
+ const warnings = createWarningCollector();
2644
+ const body = buildGoogleGenerateContentRequest(config.name ?? "google", options, warnings);
1344
2645
  return requestJson({
1345
2646
  url,
1346
2647
  fetchImpl,
1347
2648
  providerLabel: config.name ?? "google",
2649
+ providerKind: "google",
1348
2650
  init: {
1349
2651
  method: "POST",
1350
2652
  headers: createRequestHeaders({
@@ -1355,16 +2657,24 @@ export function createGoogleModelRuntime(config, modelId) {
1355
2657
  body: JSON.stringify(body),
1356
2658
  signal: options.abortSignal,
1357
2659
  },
1358
- }).then(buildGoogleGenerateResult);
2660
+ }).then((payload) => {
2661
+ const drained = warnings.drain();
2662
+ return {
2663
+ ...buildGoogleGenerateResult(payload),
2664
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2665
+ };
2666
+ });
1359
2667
  },
1360
2668
  doStream(optionsForRuntime) {
1361
2669
  const options = optionsForRuntime;
1362
2670
  const url = getGoogleStreamGenerateContentUrl(config.baseURL, modelId);
1363
- const body = buildGoogleGenerateContentRequest(config.name ?? "google", options);
2671
+ const warnings = createWarningCollector();
2672
+ const body = buildGoogleGenerateContentRequest(config.name ?? "google", options, warnings);
1364
2673
  return requestStream({
1365
2674
  url,
1366
2675
  fetchImpl,
1367
2676
  providerLabel: config.name ?? "google",
2677
+ providerKind: "google",
1368
2678
  init: {
1369
2679
  method: "POST",
1370
2680
  headers: createRequestHeaders({
@@ -1375,9 +2685,13 @@ export function createGoogleModelRuntime(config, modelId) {
1375
2685
  body: JSON.stringify(body),
1376
2686
  signal: options.abortSignal,
1377
2687
  },
1378
- }).then((responseStream) => ({
1379
- stream: ReadableStream.from(streamGoogleCompatibleParts(responseStream)),
1380
- }));
2688
+ }).then((responseStream) => {
2689
+ const drained = warnings.drain();
2690
+ return {
2691
+ stream: ReadableStream.from(streamGoogleCompatibleParts(responseStream)),
2692
+ ...(drained.length > 0 ? { warnings: drained } : {}),
2693
+ };
2694
+ });
1381
2695
  },
1382
2696
  };
1383
2697
  }
@@ -1400,6 +2714,7 @@ export function createOpenAIEmbeddingRuntime(config, modelId) {
1400
2714
  url,
1401
2715
  fetchImpl,
1402
2716
  providerLabel: config.name ?? "openai",
2717
+ providerKind: "openai",
1403
2718
  init: {
1404
2719
  method: "POST",
1405
2720
  headers: {
@@ -1442,6 +2757,7 @@ export function createGoogleEmbeddingRuntime(config, modelId) {
1442
2757
  url,
1443
2758
  fetchImpl,
1444
2759
  providerLabel: config.name ?? "google",
2760
+ providerKind: "google",
1445
2761
  init: {
1446
2762
  method: "POST",
1447
2763
  headers: {