@ai-sdk/gateway 4.0.0-beta.6 → 4.0.0-beta.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,7 +29,7 @@ For most use cases, you can use the AI Gateway directly with a model string:
29
29
  import { generateText } from 'ai';
30
30
 
31
31
  const { text } = await generateText({
32
- model: 'openai/gpt-5',
32
+ model: 'openai/gpt-5.4',
33
33
  prompt: 'Hello world',
34
34
  });
35
35
  ```
@@ -39,7 +39,7 @@ const { text } = await generateText({
39
39
  import { generateText, gateway } from 'ai';
40
40
 
41
41
  const { text } = await generateText({
42
- model: gateway('openai/gpt-5'),
42
+ model: gateway('openai/gpt-5.4'),
43
43
  prompt: 'Hello world',
44
44
  });
45
45
  ```
@@ -80,7 +80,7 @@ You can use the following optional settings to customize the AI Gateway provider
80
80
 
81
81
  - **baseURL** _string_
82
82
 
83
- Use a different URL prefix for API calls. The default prefix is `https://ai-gateway.vercel.sh/v3/ai`.
83
+ Use a different URL prefix for API calls. The default prefix is `https://ai-gateway.vercel.sh/v4/ai`.
84
84
 
85
85
  - **apiKey** _string_
86
86
 
@@ -159,6 +159,8 @@ You can connect your own provider credentials to use with Vercel AI Gateway. Thi
159
159
 
160
160
  To set up BYOK, add your provider credentials in your Vercel team's AI Gateway settings. Once configured, AI Gateway automatically uses your credentials. No code changes are needed.
161
161
 
162
+ For providers like Azure where you can use custom deployment names, you can configure model mappings to map gateway model slugs to your deployment names. See [model mappings](https://vercel.com/docs/ai-gateway/byok#model-mappings) for details.
163
+
162
164
  Learn more in the [BYOK documentation](https://vercel.com/docs/ai-gateway/byok).
163
165
 
164
166
  ## Language Models
@@ -169,13 +171,41 @@ You can create language models using a provider instance. The first argument is
169
171
  import { generateText } from 'ai';
170
172
 
171
173
  const { text } = await generateText({
172
- model: 'openai/gpt-5',
174
+ model: 'openai/gpt-5.4',
173
175
  prompt: 'Explain quantum computing in simple terms',
174
176
  });
175
177
  ```
176
178
 
177
179
  AI Gateway language models can also be used in the `streamText` function and support structured data generation with [`Output`](/docs/reference/ai-sdk-core/output) (see [AI SDK Core](/docs/ai-sdk-core)).
178
180
 
181
+ ## Reranking Models
182
+
183
+ You can create reranking models using the `rerankingModel` method on the provider instance:
184
+
185
+ ```ts
186
+ import { rerank } from 'ai';
187
+ import { gateway } from '@ai-sdk/gateway';
188
+
189
+ const { ranking } = await rerank({
190
+ model: gateway.rerankingModel('cohere/rerank-v3.5'),
191
+ query: 'What is the capital of France?',
192
+ documents: [
193
+ 'Paris is the capital of France.',
194
+ 'Berlin is the capital of Germany.',
195
+ 'Madrid is the capital of Spain.',
196
+ ],
197
+ topN: 2,
198
+ });
199
+
200
+ console.log(ranking);
201
+ // [
202
+ // { originalIndex: 0, score: 0.89, document: 'Paris is the capital of France.' },
203
+ // { originalIndex: 2, score: 0.15, document: 'Madrid is the capital of Spain.' },
204
+ // ]
205
+ ```
206
+
207
+ Reranking models are useful for improving search results in retrieval-augmented generation (RAG) pipelines by re-scoring candidate documents after an initial retrieval step.
208
+
179
209
  ## Available Models
180
210
 
181
211
  The AI Gateway supports models from OpenAI, Anthropic, Google, Meta, xAI, Mistral, DeepSeek, Amazon Bedrock, Cohere, Perplexity, Alibaba, and other providers.
@@ -215,7 +245,7 @@ availableModels.models.forEach(model => {
215
245
 
216
246
  // Use any discovered model with plain string
217
247
  const { text } = await generateText({
218
- model: availableModels.models[0].id, // e.g., 'openai/gpt-4o'
248
+ model: availableModels.models[0].id, // e.g., 'openai/gpt-5.4'
219
249
  prompt: 'Hello world',
220
250
  });
221
251
  ```
@@ -238,6 +268,86 @@ The `getCredits()` method returns your team's credit information based on the au
238
268
  - **balance** _number_ - Your team's current available credit balance
239
269
  - **total_used** _number_ - Total credits consumed by your team
240
270
 
271
+ ## Generation Lookup
272
+
273
+ Look up detailed information about a specific generation by its ID, including cost, token usage, latency, and provider details. Generation IDs are available in `providerMetadata.gateway.generationId` on both `generateText` and `streamText` responses.
274
+
275
+ When streaming, the generation ID is injected on the first content chunk, so you can capture it early in the stream without waiting for completion. This is especially useful in cases where a network interruption or mid-stream error could prevent you from receiving the final response — since the gateway records the final status server-side, you can use the generation ID to look up the results (including cost, token usage, and finish reason) later via `getGenerationInfo()`.
276
+
277
+ ```ts
278
+ import { gateway, generateText } from 'ai';
279
+
280
+ // Make a request
281
+ const result = await generateText({
282
+ model: gateway('anthropic/claude-sonnet-4'),
283
+ prompt: 'Explain quantum entanglement briefly',
284
+ });
285
+
286
+ // Get the generation ID from provider metadata
287
+ const generationId = result.providerMetadata?.gateway?.generationId;
288
+
289
+ // Look up detailed generation info
290
+ const generation = await gateway.getGenerationInfo({ id: generationId });
291
+
292
+ console.log(`Model: ${generation.model}`);
293
+ console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
294
+ console.log(`Latency: ${generation.latency}ms`);
295
+ console.log(`Prompt tokens: ${generation.promptTokens}`);
296
+ console.log(`Completion tokens: ${generation.completionTokens}`);
297
+ ```
298
+
299
+ With `streamText`, you can capture the generation ID from the first chunk via `fullStream`:
300
+
301
+ ```ts
302
+ import { gateway, streamText } from 'ai';
303
+
304
+ const result = streamText({
305
+ model: gateway('anthropic/claude-sonnet-4'),
306
+ prompt: 'Explain quantum entanglement briefly',
307
+ });
308
+
309
+ let generationId: string | undefined;
310
+
311
+ for await (const part of result.fullStream) {
312
+ if (!generationId && part.providerMetadata?.gateway?.generationId) {
313
+ generationId = part.providerMetadata.gateway.generationId as string;
314
+ console.log(`Generation ID (early): ${generationId}`);
315
+ }
316
+ }
317
+
318
+ // Look up cost and usage after the stream completes
319
+ if (generationId) {
320
+ const generation = await gateway.getGenerationInfo({ id: generationId });
321
+ console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
322
+ console.log(`Finish reason: ${generation.finishReason}`);
323
+ }
324
+ ```
325
+
326
+ The `getGenerationInfo()` method accepts:
327
+
328
+ - **id** _string_ - The generation ID to look up (format: `gen_<ulid>`, required)
329
+
330
+ It returns a `GatewayGenerationInfo` object with the following fields:
331
+
332
+ - **id** _string_ - The generation ID
333
+ - **totalCost** _number_ - Total cost in USD
334
+ - **upstreamInferenceCost** _number_ - Upstream inference cost in USD (relevant for BYOK)
335
+ - **usage** _number_ - Usage cost in USD (same as totalCost)
336
+ - **createdAt** _string_ - ISO 8601 timestamp when the generation was created
337
+ - **model** _string_ - Model identifier used
338
+ - **isByok** _boolean_ - Whether Bring Your Own Key credentials were used
339
+ - **providerName** _string_ - The provider that served this generation
340
+ - **streamed** _boolean_ - Whether streaming was used
341
+ - **finishReason** _string_ - Finish reason (e.g. `'stop'`)
342
+ - **latency** _number_ - Time to first token in milliseconds
343
+ - **generationTime** _number_ - Total generation time in milliseconds
344
+ - **promptTokens** _number_ - Number of prompt tokens
345
+ - **completionTokens** _number_ - Number of completion tokens
346
+ - **reasoningTokens** _number_ - Reasoning tokens used (if applicable)
347
+ - **cachedTokens** _number_ - Cached tokens used (if applicable)
348
+ - **cacheCreationTokens** _number_ - Cache creation input tokens
349
+ - **billableWebSearchCalls** _number_ - Number of billable web search calls
350
+
241
351
  ## Examples
242
352
 
243
353
  ### Basic Text Generation
@@ -246,7 +356,7 @@ The `getCredits()` method returns your team's credit information based on the au
246
356
  import { generateText } from 'ai';
247
357
 
248
358
  const { text } = await generateText({
249
- model: 'anthropic/claude-sonnet-4',
359
+ model: 'anthropic/claude-sonnet-4.6',
250
360
  prompt: 'Write a haiku about programming',
251
361
  });
252
362
 
@@ -259,7 +369,7 @@ console.log(text);
259
369
  import { streamText } from 'ai';
260
370
 
261
371
  const { textStream } = await streamText({
262
- model: 'openai/gpt-5',
372
+ model: 'openai/gpt-5.4',
263
373
  prompt: 'Explain the benefits of serverless architecture',
264
374
  });
265
375
 
@@ -297,13 +407,13 @@ const { text } = await generateText({
297
407
  Some providers offer tools that are executed by the provider itself, such as [OpenAI's web search tool](/providers/ai-sdk-providers/openai#web-search-tool). To use these tools through AI Gateway, import the provider to access the tool definitions:
298
408
 
299
409
  ```ts
300
- import { generateText, stepCountIs } from 'ai';
410
+ import { generateText, isStepCount } from 'ai';
301
411
  import { openai } from '@ai-sdk/openai';
302
412
 
303
413
  const result = await generateText({
304
- model: 'openai/gpt-5-mini',
414
+ model: 'openai/gpt-5.4-mini',
305
415
  prompt: 'What is the Vercel AI Gateway?',
306
- stopWhen: stepCountIs(10),
416
+ stopWhen: isStepCount(10),
307
417
  tools: {
308
418
  web_search: openai.tools.webSearch({}),
309
419
  },
@@ -330,7 +440,7 @@ The Perplexity Search tool enables models to search the web using [Perplexity's
330
440
  import { gateway, generateText } from 'ai';
331
441
 
332
442
  const result = await generateText({
333
- model: 'openai/gpt-5-nano',
443
+ model: 'openai/gpt-5.4-nano',
334
444
  prompt: 'Search for news about AI regulations in January 2025.',
335
445
  tools: {
336
446
  perplexity_search: gateway.tools.perplexitySearch(),
@@ -348,7 +458,7 @@ You can also configure the search with optional parameters:
348
458
  import { gateway, generateText } from 'ai';
349
459
 
350
460
  const result = await generateText({
351
- model: 'openai/gpt-5-nano',
461
+ model: 'openai/gpt-5.4-nano',
352
462
  prompt:
353
463
  'Search for news about AI regulations from the first week of January 2025.',
354
464
  tools: {
@@ -402,7 +512,7 @@ The tool works with both `generateText` and `streamText`:
402
512
  import { gateway, streamText } from 'ai';
403
513
 
404
514
  const result = streamText({
405
- model: 'openai/gpt-5-nano',
515
+ model: 'openai/gpt-5.4-nano',
406
516
  prompt: 'Search for the latest news about AI regulations.',
407
517
  tools: {
408
518
  perplexity_search: gateway.tools.perplexitySearch(),
@@ -432,7 +542,7 @@ The Parallel Search tool enables models to search the web using [Parallel AI's S
432
542
  import { gateway, generateText } from 'ai';
433
543
 
434
544
  const result = await generateText({
435
- model: 'openai/gpt-5-nano',
545
+ model: 'openai/gpt-5.4-nano',
436
546
  prompt: 'Research the latest developments in quantum computing.',
437
547
  tools: {
438
548
  parallel_search: gateway.tools.parallelSearch(),
@@ -450,7 +560,7 @@ You can also configure the search with optional parameters:
450
560
  import { gateway, generateText } from 'ai';
451
561
 
452
562
  const result = await generateText({
453
- model: 'openai/gpt-5-nano',
563
+ model: 'openai/gpt-5.4-nano',
454
564
  prompt: 'Find detailed information about TypeScript 5.0 features.',
455
565
  tools: {
456
566
  parallel_search: gateway.tools.parallelSearch({
@@ -511,7 +621,7 @@ The tool works with both `generateText` and `streamText`:
511
621
  import { gateway, streamText } from 'ai';
512
622
 
513
623
  const result = streamText({
514
- model: 'openai/gpt-5-nano',
624
+ model: 'openai/gpt-5.4-nano',
515
625
  prompt: 'Research the latest AI safety guidelines.',
516
626
  tools: {
517
627
  parallel_search: gateway.tools.parallelSearch(),
@@ -533,22 +643,24 @@ for await (const part of result.fullStream) {
533
643
  }
534
644
  ```
535
645
 
536
- ### Usage Tracking with User and Tags
646
+ ### Custom Reporting
537
647
 
538
- Track usage per end-user and categorize requests with tags:
648
+ Track usage per end-user and categorize requests with tags, then query the data through the reporting API.
649
+
650
+ #### Usage Tracking with User and Tags
539
651
 
540
652
  ```ts
541
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
653
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
542
654
  import { generateText } from 'ai';
543
655
 
544
656
  const { text } = await generateText({
545
- model: 'openai/gpt-5',
657
+ model: 'openai/gpt-5.4',
546
658
  prompt: 'Summarize this document...',
547
659
  providerOptions: {
548
660
  gateway: {
549
661
  user: 'user-abc-123', // Track usage for this specific end-user
550
662
  tags: ['document-summary', 'premium-feature'], // Categorize for reporting
551
- } satisfies GatewayLanguageModelOptions,
663
+ } satisfies GatewayProviderOptions,
552
664
  },
553
665
  });
554
666
  ```
@@ -559,6 +671,77 @@ This allows you to:
559
671
  - Filter and analyze spending by feature or use case using tags
560
672
  - Track which users or features are driving the most AI usage
561
673
 
674
+ #### Querying Spend Reports
675
+
676
+ Use the `getSpendReport()` method to query usage data programmatically. The reporting API is only available for Vercel Pro and Enterprise plans. For pricing, see the [Custom Reporting docs](https://vercel.com/docs/ai-gateway/capabilities/custom-reporting).
677
+
678
+ ```ts
679
+ import { gateway } from 'ai';
680
+
681
+ const report = await gateway.getSpendReport({
682
+ startDate: '2026-03-01',
683
+ endDate: '2026-03-25',
684
+ groupBy: 'model',
685
+ });
686
+
687
+ for (const row of report.results) {
688
+ console.log(`${row.model}: $${row.totalCost.toFixed(4)}`);
689
+ }
690
+ ```
691
+
692
+ The `getSpendReport()` method accepts the following parameters:
693
+
694
+ - **startDate** _string_ - Start date in `YYYY-MM-DD` format (inclusive, required)
695
+ - **endDate** _string_ - End date in `YYYY-MM-DD` format (inclusive, required)
696
+ - **groupBy** _string_ - Aggregation dimension: `'day'` (default), `'user'`, `'model'`, `'tag'`, `'provider'`, or `'credential_type'`
697
+ - **datePart** _string_ - Time granularity when `groupBy` is `'day'`: `'day'` or `'hour'`
698
+ - **userId** _string_ - Filter to a specific user
699
+ - **model** _string_ - Filter to a specific model (e.g. `'anthropic/claude-sonnet-4.5'`)
700
+ - **provider** _string_ - Filter to a specific provider (e.g. `'anthropic'`)
701
+ - **credentialType** _string_ - Filter by `'byok'` or `'system'` credentials
702
+ - **tags** _string[]_ - Filter to requests matching these tags
703
+
704
+ Each row in `results` contains a grouping field (matching your `groupBy` choice) and metrics:
705
+
706
+ - **totalCost** _number_ - Total cost in USD
707
+ - **marketCost** _number_ - Market cost in USD
708
+ - **inputTokens** _number_ - Number of input tokens
709
+ - **outputTokens** _number_ - Number of output tokens
710
+ - **cachedInputTokens** _number_ - Number of cached input tokens
711
+ - **cacheCreationInputTokens** _number_ - Number of cache creation input tokens
712
+ - **reasoningTokens** _number_ - Number of reasoning tokens
713
+ - **requestCount** _number_ - Number of requests
714
+
715
+ You can combine tracking and querying to analyze spend by tags you defined:
716
+
717
+ ```ts
718
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
719
+ import { gateway, streamText } from 'ai';
720
+
721
+ // 1. Make requests with tags
722
+ const result = streamText({
723
+ model: gateway('anthropic/claude-haiku-4.5'),
724
+ prompt: 'Summarize this quarter's results',
725
+ providerOptions: {
726
+ gateway: {
727
+ tags: ['team:finance', 'feature:summaries'],
728
+ } satisfies GatewayProviderOptions,
729
+ },
730
+ });
731
+
732
+ // 2. Later, query spend filtered by those tags
733
+ const report = await gateway.getSpendReport({
734
+ startDate: '2026-03-01',
735
+ endDate: '2026-03-31',
736
+ groupBy: 'tag',
737
+ tags: ['team:finance'],
738
+ });
739
+
740
+ for (const row of report.results) {
741
+ console.log(`${row.tag}: $${row.totalCost.toFixed(4)} (${row.requestCount} requests)`);
742
+ }
743
+ ```
744
+
562
745
  ## Provider Options
563
746
 
564
747
  The AI Gateway provider accepts provider options that control routing behavior and provider-specific configurations.
@@ -568,17 +751,17 @@ The AI Gateway provider accepts provider options that control routing behavior a
568
751
  You can use the `gateway` key in `providerOptions` to control how AI Gateway routes requests:
569
752
 
570
753
  ```ts
571
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
754
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
572
755
  import { generateText } from 'ai';
573
756
 
574
757
  const { text } = await generateText({
575
- model: 'anthropic/claude-sonnet-4',
758
+ model: 'anthropic/claude-sonnet-4.6',
576
759
  prompt: 'Explain quantum computing',
577
760
  providerOptions: {
578
761
  gateway: {
579
762
  order: ['vertex', 'anthropic'], // Try Vertex AI first, then Anthropic
580
763
  only: ['vertex', 'anthropic'], // Only use these providers
581
- } satisfies GatewayLanguageModelOptions,
764
+ } satisfies GatewayProviderOptions,
582
765
  },
583
766
  });
584
767
  ```
@@ -597,11 +780,25 @@ The following gateway provider options are available:
597
780
 
598
781
  Example: `only: ['anthropic', 'vertex']` will only allow routing to Anthropic or Vertex AI.
599
782
 
783
+ - **sort** _'cost' | 'ttft' | 'tps'_
784
+
785
+ Sorts available providers by a performance or cost metric before routing. The gateway will try the best-scoring provider first and fall back through the rest in sorted order. If unspecified, providers are ordered using the gateway's default system ranking.
786
+
787
+ - `'cost'` — lowest cost first
788
+ - `'ttft'` — lowest time-to-first-token first
789
+ - `'tps'` — highest tokens-per-second first
790
+
791
+ When combined with `order`, the user-specified providers are promoted to the front while remaining providers follow the sorted order.
792
+
793
+ Example: `sort: 'ttft'` will route to the provider with the fastest time-to-first-token.
794
+
795
+ When `sort` is active, the response's `providerMetadata.gateway.routing.sort` object contains the sort option used, the resulting execution order, per-provider metric values, and any providers that were deprioritized.
796
+
600
797
  - **models** _string[]_
601
798
 
602
799
  Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the `model` parameter), then try each model in this array in order until one succeeds.
603
800
 
604
- Example: `models: ['openai/gpt-5-nano', 'gemini-2.0-flash']` will try the fallback models in order if the primary model fails.
801
+ Example: `models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview']` will try the fallback models in order if the primary model fails.
605
802
 
606
803
  - **user** _string_
607
804
 
@@ -621,15 +818,30 @@ The following gateway provider options are available:
621
818
 
622
819
  Each provider can have multiple credentials (tried in order). The structure is a record where keys are provider slugs and values are arrays of credential objects.
623
820
 
821
+ Each credential can optionally include a `modelMappings` array to map AI Gateway model slugs to your deployment names (for example, custom Azure deployment names). If a BYOK request fails, the gateway falls back to system credentials using the default model name.
822
+
624
823
  Examples:
625
824
 
626
825
  - Single provider: `byok: { 'anthropic': [{ apiKey: 'sk-ant-...' }] }`
627
826
  - Multiple credentials: `byok: { 'vertex': [{ project: 'proj-1', googleCredentials: { privateKey: '...', clientEmail: '...' } }, { project: 'proj-2', googleCredentials: { privateKey: '...', clientEmail: '...' } }] }`
628
827
  - Multiple providers: `byok: { 'anthropic': [{ apiKey: '...' }], 'bedrock': [{ accessKeyId: '...', secretAccessKey: '...' }] }`
828
+ - With model mappings: `byok: { 'azure': [{ apiKey: '...', resourceName: '...', modelMappings: [{ gatewayModelSlug: 'openai/gpt-5.4-nano', customModelId: 'my-deployment' }] }] }`
629
829
 
630
830
  - **zeroDataRetention** _boolean_
631
831
 
632
- Restricts routing requests to providers that have zero data retention policies.
832
+ Restricts routing to providers that have zero data retention agreements with Vercel for AI Gateway. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers with zero data retention agreements will be used. If there are no providers available for the model with zero data retention, the request will fail. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
833
+
834
+ - **disallowPromptTraining** _boolean_
835
+
836
+ Restricts routing to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers that do not train on prompt data will be used. If there are no providers available for the model that disallow prompt training, the request will fail.
837
+
838
+ - **hipaaCompliant** _boolean_
839
+
840
+ Restricts routing to models and tools from providers that have signed a BAA with Vercel for the use of AI Gateway (requires Vercel HIPAA BAA add on). BYOK credentials are skipped when `hipaaCompliant` is set to `true` to ensure that requests are only routed to providers that support HIPAA compliance.
841
+
842
+ - **quotaEntityId** _string_
843
+
844
+ The unique identifier for the entity against which quota is tracked. Used for quota management and enforcement purposes.
633
845
 
634
846
  - **providerTimeouts** _object_
635
847
 
@@ -642,17 +854,17 @@ The following gateway provider options are available:
642
854
  You can combine these options to have fine-grained control over routing and tracking:
643
855
 
644
856
  ```ts
645
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
857
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
646
858
  import { generateText } from 'ai';
647
859
 
648
860
  const { text } = await generateText({
649
- model: 'anthropic/claude-sonnet-4',
861
+ model: 'anthropic/claude-sonnet-4.6',
650
862
  prompt: 'Write a haiku about programming',
651
863
  providerOptions: {
652
864
  gateway: {
653
865
  order: ['vertex'], // Prefer Vertex AI
654
866
  only: ['anthropic', 'vertex'], // Only allow these providers
655
- } satisfies GatewayLanguageModelOptions,
867
+ } satisfies GatewayProviderOptions,
656
868
  },
657
869
  });
658
870
  ```
@@ -662,43 +874,98 @@ const { text } = await generateText({
662
874
  The `models` option enables automatic fallback to alternative models when the primary model fails:
663
875
 
664
876
  ```ts
665
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
877
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
666
878
  import { generateText } from 'ai';
667
879
 
668
880
  const { text } = await generateText({
669
- model: 'openai/gpt-4o', // Primary model
881
+ model: 'openai/gpt-5.4', // Primary model
670
882
  prompt: 'Write a TypeScript haiku',
671
883
  providerOptions: {
672
884
  gateway: {
673
- models: ['openai/gpt-5-nano', 'gemini-2.0-flash'], // Fallback models
674
- } satisfies GatewayLanguageModelOptions,
885
+ models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview'], // Fallback models
886
+ } satisfies GatewayProviderOptions,
675
887
  },
676
888
  });
677
889
 
678
890
  // This will:
679
- // 1. Try openai/gpt-4o first
680
- // 2. If it fails, try openai/gpt-5-nano
681
- // 3. If that fails, try gemini-2.0-flash
891
+ // 1. Try openai/gpt-5.4 first
892
+ // 2. If it fails, try openai/gpt-5.4-nano
893
+ // 3. If that fails, try gemini-3-flash-preview
682
894
  // 4. Return the result from the first model that succeeds
683
895
  ```
684
896
 
685
897
  #### Zero Data Retention Example
686
898
 
687
- Set `zeroDataRetention` to true to ensure requests are only routed to providers
688
- that have zero data retention policies. When `zeroDataRetention` is `false` or not
689
- specified, there is no enforcement of restricting routing.
899
+ Set `zeroDataRetention` to true to route requests to providers that have zero data retention agreements with Vercel for AI Gateway. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers with zero data retention agreements will be used. If there are no providers available for the model with zero data retention, the request will fail. When `zeroDataRetention` is `false` or not specified, there is no enforcement of restricting routing. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
690
900
 
691
901
  ```ts
692
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
902
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
693
903
  import { generateText } from 'ai';
694
904
 
695
905
  const { text } = await generateText({
696
- model: 'anthropic/claude-sonnet-4.5',
906
+ model: 'anthropic/claude-sonnet-4.6',
697
907
  prompt: 'Analyze this sensitive document...',
698
908
  providerOptions: {
699
909
  gateway: {
700
910
  zeroDataRetention: true,
701
- } satisfies GatewayLanguageModelOptions,
911
+ } satisfies GatewayProviderOptions,
912
+ },
913
+ });
914
+ ```
915
+
916
+ #### Disallow Prompt Training Example
917
+
918
+ Set `disallowPromptTraining` to true to route requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers that do not train on prompt data will be used. If there are no providers available for the model that disallow prompt training, the request will fail. When `disallowPromptTraining` is `false` or not specified, there is no enforcement of restricting routing.
919
+
920
+ ```ts
921
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
922
+ import { generateText } from 'ai';
923
+
924
+ const { text } = await generateText({
925
+ model: 'anthropic/claude-sonnet-4.6',
926
+ prompt: 'Analyze this proprietary business data...',
927
+ providerOptions: {
928
+ gateway: {
929
+ disallowPromptTraining: true,
930
+ } satisfies GatewayProviderOptions,
931
+ },
932
+ });
933
+ ```
934
+
935
+ #### HIPAA Compliance Example
936
+
937
+ Set `hipaaCompliant` to true to route requests only to models or tools by providers that have signed a BAA with Vercel for the use of AI Gateway. If the model or tool does not have a HIPAA-compliant provider, the request will fail. When `hipaaCompliant` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `hipaaCompliant` is set to `true` to ensure that requests are only routed to providers that support HIPAA compliance.
938
+
939
+ ```ts
940
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
941
+ import { generateText } from 'ai';
942
+
943
+ const { text } = await generateText({
944
+ model: 'anthropic/claude-sonnet-4.6',
945
+ prompt: 'Analyze this patient data...',
946
+ providerOptions: {
947
+ gateway: {
948
+ hipaaCompliant: true,
949
+ } satisfies GatewayProviderOptions,
950
+ },
951
+ });
952
+ ```
953
+
954
+ #### Quota Entity ID Example
955
+
956
+ Set `quotaEntityId` to track and enforce quota against a specific entity. This is useful for multi-tenant applications where you need to manage quota at the entity level (e.g., per organization or team).
957
+
958
+ ```ts
959
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
960
+ import { generateText } from 'ai';
961
+
962
+ const { text } = await generateText({
963
+ model: 'anthropic/claude-sonnet-4.6',
964
+ prompt: 'Summarize this report...',
965
+ providerOptions: {
966
+ gateway: {
967
+ quotaEntityId: 'org-123',
968
+ } satisfies GatewayProviderOptions,
702
969
  },
703
970
  });
704
971
  ```
@@ -709,16 +976,16 @@ When using provider-specific options through AI Gateway, use the actual provider
709
976
 
710
977
  ```ts
711
978
  import type { AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
712
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
979
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
713
980
  import { generateText } from 'ai';
714
981
 
715
982
  const { text } = await generateText({
716
- model: 'anthropic/claude-sonnet-4',
983
+ model: 'anthropic/claude-sonnet-4.6',
717
984
  prompt: 'Explain quantum computing',
718
985
  providerOptions: {
719
986
  gateway: {
720
987
  order: ['vertex', 'anthropic'],
721
- } satisfies GatewayLanguageModelOptions,
988
+ } satisfies GatewayProviderOptions,
722
989
  anthropic: {
723
990
  thinking: { type: 'enabled', budgetTokens: 12000 },
724
991
  } satisfies AnthropicLanguageModelOptions,
package/package.json CHANGED
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "name": "@ai-sdk/gateway",
3
3
  "private": false,
4
- "version": "4.0.0-beta.6",
4
+ "version": "4.0.0-beta.61",
5
+ "type": "module",
5
6
  "license": "Apache-2.0",
6
7
  "sideEffects": false,
7
8
  "main": "./dist/index.js",
8
- "module": "./dist/index.mjs",
9
9
  "types": "./dist/index.d.ts",
10
10
  "files": [
11
11
  "dist/**/*",
@@ -25,14 +25,14 @@
25
25
  "./package.json": "./package.json",
26
26
  ".": {
27
27
  "types": "./dist/index.d.ts",
28
- "import": "./dist/index.mjs",
29
- "require": "./dist/index.js"
28
+ "import": "./dist/index.js",
29
+ "default": "./dist/index.js"
30
30
  }
31
31
  },
32
32
  "dependencies": {
33
- "@vercel/oidc": "3.1.0",
34
- "@ai-sdk/provider": "4.0.0-beta.0",
35
- "@ai-sdk/provider-utils": "5.0.0-beta.1"
33
+ "@vercel/oidc": "3.2.0",
34
+ "@ai-sdk/provider": "4.0.0-beta.12",
35
+ "@ai-sdk/provider-utils": "5.0.0-beta.25"
36
36
  },
37
37
  "devDependencies": {
38
38
  "@types/node": "18.15.11",
@@ -40,7 +40,7 @@
40
40
  "tsx": "4.19.2",
41
41
  "typescript": "5.8.3",
42
42
  "zod": "3.25.76",
43
- "@ai-sdk/test-server": "2.0.0-beta.0",
43
+ "@ai-sdk/test-server": "2.0.0-beta.1",
44
44
  "@vercel/ai-tsconfig": "0.0.0"
45
45
  },
46
46
  "peerDependencies": {
@@ -68,9 +68,7 @@
68
68
  "build:watch": "pnpm clean && tsup --watch",
69
69
  "clean": "del-cli dist docs *.tsbuildinfo",
70
70
  "generate-model-settings": "tsx scripts/generate-model-settings.ts",
71
- "lint": "eslint \"./**/*.ts*\"",
72
71
  "type-check": "tsc --build",
73
- "prettier-check": "prettier --check \"./**/*.ts*\"",
74
72
  "test": "pnpm test:node && pnpm test:edge",
75
73
  "test:update": "pnpm test:node -u",
76
74
  "test:watch": "vitest --config vitest.node.config.js",
@@ -13,7 +13,6 @@ import {
13
13
  InferSchema,
14
14
  lazySchema,
15
15
  safeValidateTypes,
16
- validateTypes,
17
16
  zodSchema,
18
17
  } from '@ai-sdk/provider-utils';
19
18
 
@@ -37,7 +37,6 @@ export class GatewayAuthenticationError extends GatewayError {
37
37
  static createContextualError({
38
38
  apiKeyProvided,
39
39
  oidcTokenProvided,
40
- message = 'Authentication failed',
41
40
  statusCode = 401,
42
41
  cause,
43
42
  generationId,
@@ -2,6 +2,6 @@ import type { FetchFunction, Resolvable } from '@ai-sdk/provider-utils';
2
2
 
3
3
  export type GatewayConfig = {
4
4
  baseURL: string;
5
- headers: () => Resolvable<Record<string, string | undefined>>;
5
+ headers?: Resolvable<Record<string, string | undefined>>;
6
6
  fetch?: FetchFunction;
7
7
  };