@ai-sdk/gateway 4.0.0-beta.4 → 4.0.0-beta.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,7 +29,7 @@ For most use cases, you can use the AI Gateway directly with a model string:
29
29
  import { generateText } from 'ai';
30
30
 
31
31
  const { text } = await generateText({
32
- model: 'openai/gpt-5',
32
+ model: 'openai/gpt-5.4',
33
33
  prompt: 'Hello world',
34
34
  });
35
35
  ```
@@ -39,7 +39,7 @@ const { text } = await generateText({
39
39
  import { generateText, gateway } from 'ai';
40
40
 
41
41
  const { text } = await generateText({
42
- model: gateway('openai/gpt-5'),
42
+ model: gateway('openai/gpt-5.4'),
43
43
  prompt: 'Hello world',
44
44
  });
45
45
  ```
@@ -169,13 +169,41 @@ You can create language models using a provider instance. The first argument is
169
169
  import { generateText } from 'ai';
170
170
 
171
171
  const { text } = await generateText({
172
- model: 'openai/gpt-5',
172
+ model: 'openai/gpt-5.4',
173
173
  prompt: 'Explain quantum computing in simple terms',
174
174
  });
175
175
  ```
176
176
 
177
177
  AI Gateway language models can also be used in the `streamText` function and support structured data generation with [`Output`](/docs/reference/ai-sdk-core/output) (see [AI SDK Core](/docs/ai-sdk-core)).
178
178
 
179
+ ## Reranking Models
180
+
181
+ You can create reranking models using the `rerankingModel` method on the provider instance:
182
+
183
+ ```ts
184
+ import { rerank } from 'ai';
185
+ import { gateway } from '@ai-sdk/gateway';
186
+
187
+ const { ranking } = await rerank({
188
+ model: gateway.rerankingModel('cohere/rerank-v3.5'),
189
+ query: 'What is the capital of France?',
190
+ documents: [
191
+ 'Paris is the capital of France.',
192
+ 'Berlin is the capital of Germany.',
193
+ 'Madrid is the capital of Spain.',
194
+ ],
195
+ topN: 2,
196
+ });
197
+
198
+ console.log(ranking);
199
+ // [
200
+ // { originalIndex: 0, score: 0.89, document: 'Paris is the capital of France.' },
201
+ // { originalIndex: 2, score: 0.15, document: 'Madrid is the capital of Spain.' },
202
+ // ]
203
+ ```
204
+
205
+ Reranking models are useful for improving search results in retrieval-augmented generation (RAG) pipelines by re-scoring candidate documents after an initial retrieval step.
206
+
179
207
  ## Available Models
180
208
 
181
209
  The AI Gateway supports models from OpenAI, Anthropic, Google, Meta, xAI, Mistral, DeepSeek, Amazon Bedrock, Cohere, Perplexity, Alibaba, and other providers.
@@ -215,7 +243,7 @@ availableModels.models.forEach(model => {
215
243
 
216
244
  // Use any discovered model with plain string
217
245
  const { text } = await generateText({
218
- model: availableModels.models[0].id, // e.g., 'openai/gpt-4o'
246
+ model: availableModels.models[0].id, // e.g., 'openai/gpt-5.4'
219
247
  prompt: 'Hello world',
220
248
  });
221
249
  ```
@@ -238,6 +266,86 @@ The `getCredits()` method returns your team's credit information based on the au
238
266
  - **balance** _number_ - Your team's current available credit balance
239
267
  - **total_used** _number_ - Total credits consumed by your team
240
268
 
269
+ ## Generation Lookup
270
+
271
+ Look up detailed information about a specific generation by its ID, including cost, token usage, latency, and provider details. Generation IDs are available in `providerMetadata.gateway.generationId` on both `generateText` and `streamText` responses.
272
+
273
+ When streaming, the generation ID is injected on the first content chunk, so you can capture it early in the stream without waiting for completion. This is especially useful in cases where a network interruption or mid-stream error could prevent you from receiving the final response — since the gateway records the final status server-side, you can use the generation ID to look up the results (including cost, token usage, and finish reason) later via `getGenerationInfo()`.
274
+
275
+ ```ts
276
+ import { gateway, generateText } from 'ai';
277
+
278
+ // Make a request
279
+ const result = await generateText({
280
+ model: gateway('anthropic/claude-sonnet-4'),
281
+ prompt: 'Explain quantum entanglement briefly',
282
+ });
283
+
284
+ // Get the generation ID from provider metadata
285
+ const generationId = result.providerMetadata?.gateway?.generationId;
286
+
287
+ // Look up detailed generation info
288
+ const generation = await gateway.getGenerationInfo({ id: generationId });
289
+
290
+ console.log(`Model: ${generation.model}`);
291
+ console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
292
+ console.log(`Latency: ${generation.latency}ms`);
293
+ console.log(`Prompt tokens: ${generation.promptTokens}`);
294
+ console.log(`Completion tokens: ${generation.completionTokens}`);
295
+ ```
296
+
297
+ With `streamText`, you can capture the generation ID from the first chunk via `fullStream`:
298
+
299
+ ```ts
300
+ import { gateway, streamText } from 'ai';
301
+
302
+ const result = streamText({
303
+ model: gateway('anthropic/claude-sonnet-4'),
304
+ prompt: 'Explain quantum entanglement briefly',
305
+ });
306
+
307
+ let generationId: string | undefined;
308
+
309
+ for await (const part of result.fullStream) {
310
+ if (!generationId && part.providerMetadata?.gateway?.generationId) {
311
+ generationId = part.providerMetadata.gateway.generationId as string;
312
+ console.log(`Generation ID (early): ${generationId}`);
313
+ }
314
+ }
315
+
316
+ // Look up cost and usage after the stream completes
317
+ if (generationId) {
318
+ const generation = await gateway.getGenerationInfo({ id: generationId });
319
+ console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
320
+ console.log(`Finish reason: ${generation.finishReason}`);
321
+ }
322
+ ```
323
+
324
+ The `getGenerationInfo()` method accepts:
325
+
326
+ - **id** _string_ - The generation ID to look up (format: `gen_<ulid>`, required)
327
+
328
+ It returns a `GatewayGenerationInfo` object with the following fields:
329
+
330
+ - **id** _string_ - The generation ID
331
+ - **totalCost** _number_ - Total cost in USD
332
+ - **upstreamInferenceCost** _number_ - Upstream inference cost in USD (relevant for BYOK)
333
+ - **usage** _number_ - Usage cost in USD (same as totalCost)
334
+ - **createdAt** _string_ - ISO 8601 timestamp when the generation was created
335
+ - **model** _string_ - Model identifier used
336
+ - **isByok** _boolean_ - Whether Bring Your Own Key credentials were used
337
+ - **providerName** _string_ - The provider that served this generation
338
+ - **streamed** _boolean_ - Whether streaming was used
339
+ - **finishReason** _string_ - Finish reason (e.g. `'stop'`)
340
+ - **latency** _number_ - Time to first token in milliseconds
341
+ - **generationTime** _number_ - Total generation time in milliseconds
342
+ - **promptTokens** _number_ - Number of prompt tokens
343
+ - **completionTokens** _number_ - Number of completion tokens
344
+ - **reasoningTokens** _number_ - Reasoning tokens used (if applicable)
345
+ - **cachedTokens** _number_ - Cached tokens used (if applicable)
346
+ - **cacheCreationTokens** _number_ - Cache creation input tokens
347
+ - **billableWebSearchCalls** _number_ - Number of billable web search calls
348
+
241
349
  ## Examples
242
350
 
243
351
  ### Basic Text Generation
@@ -246,7 +354,7 @@ The `getCredits()` method returns your team's credit information based on the au
246
354
  import { generateText } from 'ai';
247
355
 
248
356
  const { text } = await generateText({
249
- model: 'anthropic/claude-sonnet-4',
357
+ model: 'anthropic/claude-sonnet-4.6',
250
358
  prompt: 'Write a haiku about programming',
251
359
  });
252
360
 
@@ -259,7 +367,7 @@ console.log(text);
259
367
  import { streamText } from 'ai';
260
368
 
261
369
  const { textStream } = await streamText({
262
- model: 'openai/gpt-5',
370
+ model: 'openai/gpt-5.4',
263
371
  prompt: 'Explain the benefits of serverless architecture',
264
372
  });
265
373
 
@@ -297,13 +405,13 @@ const { text } = await generateText({
297
405
  Some providers offer tools that are executed by the provider itself, such as [OpenAI's web search tool](/providers/ai-sdk-providers/openai#web-search-tool). To use these tools through AI Gateway, import the provider to access the tool definitions:
298
406
 
299
407
  ```ts
300
- import { generateText, stepCountIs } from 'ai';
408
+ import { generateText, isStepCount } from 'ai';
301
409
  import { openai } from '@ai-sdk/openai';
302
410
 
303
411
  const result = await generateText({
304
- model: 'openai/gpt-5-mini',
412
+ model: 'openai/gpt-5.4-mini',
305
413
  prompt: 'What is the Vercel AI Gateway?',
306
- stopWhen: stepCountIs(10),
414
+ stopWhen: isStepCount(10),
307
415
  tools: {
308
416
  web_search: openai.tools.webSearch({}),
309
417
  },
@@ -330,7 +438,7 @@ The Perplexity Search tool enables models to search the web using [Perplexity's
330
438
  import { gateway, generateText } from 'ai';
331
439
 
332
440
  const result = await generateText({
333
- model: 'openai/gpt-5-nano',
441
+ model: 'openai/gpt-5.4-nano',
334
442
  prompt: 'Search for news about AI regulations in January 2025.',
335
443
  tools: {
336
444
  perplexity_search: gateway.tools.perplexitySearch(),
@@ -348,7 +456,7 @@ You can also configure the search with optional parameters:
348
456
  import { gateway, generateText } from 'ai';
349
457
 
350
458
  const result = await generateText({
351
- model: 'openai/gpt-5-nano',
459
+ model: 'openai/gpt-5.4-nano',
352
460
  prompt:
353
461
  'Search for news about AI regulations from the first week of January 2025.',
354
462
  tools: {
@@ -402,7 +510,7 @@ The tool works with both `generateText` and `streamText`:
402
510
  import { gateway, streamText } from 'ai';
403
511
 
404
512
  const result = streamText({
405
- model: 'openai/gpt-5-nano',
513
+ model: 'openai/gpt-5.4-nano',
406
514
  prompt: 'Search for the latest news about AI regulations.',
407
515
  tools: {
408
516
  perplexity_search: gateway.tools.perplexitySearch(),
@@ -432,7 +540,7 @@ The Parallel Search tool enables models to search the web using [Parallel AI's S
432
540
  import { gateway, generateText } from 'ai';
433
541
 
434
542
  const result = await generateText({
435
- model: 'openai/gpt-5-nano',
543
+ model: 'openai/gpt-5.4-nano',
436
544
  prompt: 'Research the latest developments in quantum computing.',
437
545
  tools: {
438
546
  parallel_search: gateway.tools.parallelSearch(),
@@ -450,7 +558,7 @@ You can also configure the search with optional parameters:
450
558
  import { gateway, generateText } from 'ai';
451
559
 
452
560
  const result = await generateText({
453
- model: 'openai/gpt-5-nano',
561
+ model: 'openai/gpt-5.4-nano',
454
562
  prompt: 'Find detailed information about TypeScript 5.0 features.',
455
563
  tools: {
456
564
  parallel_search: gateway.tools.parallelSearch({
@@ -511,7 +619,7 @@ The tool works with both `generateText` and `streamText`:
511
619
  import { gateway, streamText } from 'ai';
512
620
 
513
621
  const result = streamText({
514
- model: 'openai/gpt-5-nano',
622
+ model: 'openai/gpt-5.4-nano',
515
623
  prompt: 'Research the latest AI safety guidelines.',
516
624
  tools: {
517
625
  parallel_search: gateway.tools.parallelSearch(),
@@ -533,22 +641,24 @@ for await (const part of result.fullStream) {
533
641
  }
534
642
  ```
535
643
 
536
- ### Usage Tracking with User and Tags
644
+ ### Custom Reporting
537
645
 
538
- Track usage per end-user and categorize requests with tags:
646
+ Track usage per end-user and categorize requests with tags, then query the data through the reporting API.
647
+
648
+ #### Usage Tracking with User and Tags
539
649
 
540
650
  ```ts
541
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
651
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
542
652
  import { generateText } from 'ai';
543
653
 
544
654
  const { text } = await generateText({
545
- model: 'openai/gpt-5',
655
+ model: 'openai/gpt-5.4',
546
656
  prompt: 'Summarize this document...',
547
657
  providerOptions: {
548
658
  gateway: {
549
659
  user: 'user-abc-123', // Track usage for this specific end-user
550
660
  tags: ['document-summary', 'premium-feature'], // Categorize for reporting
551
- } satisfies GatewayLanguageModelOptions,
661
+ } satisfies GatewayProviderOptions,
552
662
  },
553
663
  });
554
664
  ```
@@ -559,6 +669,77 @@ This allows you to:
559
669
  - Filter and analyze spending by feature or use case using tags
560
670
  - Track which users or features are driving the most AI usage
561
671
 
672
+ #### Querying Spend Reports
673
+
674
+ Use the `getSpendReport()` method to query usage data programmatically. The reporting API is only available for Vercel Pro and Enterprise plans. For pricing, see the [Custom Reporting docs](https://vercel.com/docs/ai-gateway/capabilities/custom-reporting).
675
+
676
+ ```ts
677
+ import { gateway } from 'ai';
678
+
679
+ const report = await gateway.getSpendReport({
680
+ startDate: '2026-03-01',
681
+ endDate: '2026-03-25',
682
+ groupBy: 'model',
683
+ });
684
+
685
+ for (const row of report.results) {
686
+ console.log(`${row.model}: $${row.totalCost.toFixed(4)}`);
687
+ }
688
+ ```
689
+
690
+ The `getSpendReport()` method accepts the following parameters:
691
+
692
+ - **startDate** _string_ - Start date in `YYYY-MM-DD` format (inclusive, required)
693
+ - **endDate** _string_ - End date in `YYYY-MM-DD` format (inclusive, required)
694
+ - **groupBy** _string_ - Aggregation dimension: `'day'` (default), `'user'`, `'model'`, `'tag'`, `'provider'`, or `'credential_type'`
695
+ - **datePart** _string_ - Time granularity when `groupBy` is `'day'`: `'day'` or `'hour'`
696
+ - **userId** _string_ - Filter to a specific user
697
+ - **model** _string_ - Filter to a specific model (e.g. `'anthropic/claude-sonnet-4.5'`)
698
+ - **provider** _string_ - Filter to a specific provider (e.g. `'anthropic'`)
699
+ - **credentialType** _string_ - Filter by `'byok'` or `'system'` credentials
700
+ - **tags** _string[]_ - Filter to requests matching these tags
701
+
702
+ Each row in `results` contains a grouping field (matching your `groupBy` choice) and metrics:
703
+
704
+ - **totalCost** _number_ - Total cost in USD
705
+ - **marketCost** _number_ - Market cost in USD
706
+ - **inputTokens** _number_ - Number of input tokens
707
+ - **outputTokens** _number_ - Number of output tokens
708
+ - **cachedInputTokens** _number_ - Number of cached input tokens
709
+ - **cacheCreationInputTokens** _number_ - Number of cache creation input tokens
710
+ - **reasoningTokens** _number_ - Number of reasoning tokens
711
+ - **requestCount** _number_ - Number of requests
712
+
713
+ You can combine tracking and querying to analyze spend by tags you defined:
714
+
715
+ ```ts
716
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
717
+ import { gateway, streamText } from 'ai';
718
+
719
+ // 1. Make requests with tags
720
+ const result = streamText({
721
+ model: gateway('anthropic/claude-haiku-4.5'),
722
+ prompt: 'Summarize this quarter's results',
723
+ providerOptions: {
724
+ gateway: {
725
+ tags: ['team:finance', 'feature:summaries'],
726
+ } satisfies GatewayProviderOptions,
727
+ },
728
+ });
729
+
730
+ // 2. Later, query spend filtered by those tags
731
+ const report = await gateway.getSpendReport({
732
+ startDate: '2026-03-01',
733
+ endDate: '2026-03-31',
734
+ groupBy: 'tag',
735
+ tags: ['team:finance'],
736
+ });
737
+
738
+ for (const row of report.results) {
739
+ console.log(`${row.tag}: $${row.totalCost.toFixed(4)} (${row.requestCount} requests)`);
740
+ }
741
+ ```
742
+
562
743
  ## Provider Options
563
744
 
564
745
  The AI Gateway provider accepts provider options that control routing behavior and provider-specific configurations.
@@ -568,17 +749,17 @@ The AI Gateway provider accepts provider options that control routing behavior a
568
749
  You can use the `gateway` key in `providerOptions` to control how AI Gateway routes requests:
569
750
 
570
751
  ```ts
571
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
752
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
572
753
  import { generateText } from 'ai';
573
754
 
574
755
  const { text } = await generateText({
575
- model: 'anthropic/claude-sonnet-4',
756
+ model: 'anthropic/claude-sonnet-4.6',
576
757
  prompt: 'Explain quantum computing',
577
758
  providerOptions: {
578
759
  gateway: {
579
760
  order: ['vertex', 'anthropic'], // Try Vertex AI first, then Anthropic
580
761
  only: ['vertex', 'anthropic'], // Only use these providers
581
- } satisfies GatewayLanguageModelOptions,
762
+ } satisfies GatewayProviderOptions,
582
763
  },
583
764
  });
584
765
  ```
@@ -601,7 +782,7 @@ The following gateway provider options are available:
601
782
 
602
783
  Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the `model` parameter), then try each model in this array in order until one succeeds.
603
784
 
604
- Example: `models: ['openai/gpt-5-nano', 'gemini-2.0-flash']` will try the fallback models in order if the primary model fails.
785
+ Example: `models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview']` will try the fallback models in order if the primary model fails.
605
786
 
606
787
  - **user** _string_
607
788
 
@@ -629,7 +810,19 @@ The following gateway provider options are available:
629
810
 
630
811
  - **zeroDataRetention** _boolean_
631
812
 
632
- Restricts routing requests to providers that have zero data retention policies.
813
+ Restricts routing to providers that have zero data retention agreements with Vercel for AI Gateway. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers with zero data retention agreements will be used. If there are no providers available for the model with zero data retention, the request will fail. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
814
+
815
+ - **disallowPromptTraining** _boolean_
816
+
817
+ Restricts routing to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers that do not train on prompt data will be used. If there are no providers available for the model that disallow prompt training, the request will fail.
818
+
819
+ - **hipaaCompliant** _boolean_
820
+
821
+ Restricts routing to models and tools from providers that have signed a BAA with Vercel for the use of AI Gateway (requires Vercel HIPAA BAA add on). BYOK credentials are skipped when `hipaaCompliant` is set to `true` to ensure that requests are only routed to providers that support HIPAA compliance.
822
+
823
+ - **quotaEntityId** _string_
824
+
825
+ The unique identifier for the entity against which quota is tracked. Used for quota management and enforcement purposes.
633
826
 
634
827
  - **providerTimeouts** _object_
635
828
 
@@ -642,17 +835,17 @@ The following gateway provider options are available:
642
835
  You can combine these options to have fine-grained control over routing and tracking:
643
836
 
644
837
  ```ts
645
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
838
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
646
839
  import { generateText } from 'ai';
647
840
 
648
841
  const { text } = await generateText({
649
- model: 'anthropic/claude-sonnet-4',
842
+ model: 'anthropic/claude-sonnet-4.6',
650
843
  prompt: 'Write a haiku about programming',
651
844
  providerOptions: {
652
845
  gateway: {
653
846
  order: ['vertex'], // Prefer Vertex AI
654
847
  only: ['anthropic', 'vertex'], // Only allow these providers
655
- } satisfies GatewayLanguageModelOptions,
848
+ } satisfies GatewayProviderOptions,
656
849
  },
657
850
  });
658
851
  ```
@@ -662,43 +855,98 @@ const { text } = await generateText({
662
855
  The `models` option enables automatic fallback to alternative models when the primary model fails:
663
856
 
664
857
  ```ts
665
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
858
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
666
859
  import { generateText } from 'ai';
667
860
 
668
861
  const { text } = await generateText({
669
- model: 'openai/gpt-4o', // Primary model
862
+ model: 'openai/gpt-5.4', // Primary model
670
863
  prompt: 'Write a TypeScript haiku',
671
864
  providerOptions: {
672
865
  gateway: {
673
- models: ['openai/gpt-5-nano', 'gemini-2.0-flash'], // Fallback models
674
- } satisfies GatewayLanguageModelOptions,
866
+ models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview'], // Fallback models
867
+ } satisfies GatewayProviderOptions,
675
868
  },
676
869
  });
677
870
 
678
871
  // This will:
679
- // 1. Try openai/gpt-4o first
680
- // 2. If it fails, try openai/gpt-5-nano
681
- // 3. If that fails, try gemini-2.0-flash
872
+ // 1. Try openai/gpt-5.4 first
873
+ // 2. If it fails, try openai/gpt-5.4-nano
874
+ // 3. If that fails, try gemini-3-flash-preview
682
875
  // 4. Return the result from the first model that succeeds
683
876
  ```
684
877
 
685
878
  #### Zero Data Retention Example
686
879
 
687
- Set `zeroDataRetention` to true to ensure requests are only routed to providers
688
- that have zero data retention policies. When `zeroDataRetention` is `false` or not
689
- specified, there is no enforcement of restricting routing.
880
+ Set `zeroDataRetention` to true to route requests to providers that have zero data retention agreements with Vercel for AI Gateway. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers with zero data retention agreements will be used. If there are no providers available for the model with zero data retention, the request will fail. When `zeroDataRetention` is `false` or not specified, there is no enforcement of restricting routing. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
690
881
 
691
882
  ```ts
692
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
883
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
693
884
  import { generateText } from 'ai';
694
885
 
695
886
  const { text } = await generateText({
696
- model: 'anthropic/claude-sonnet-4.5',
887
+ model: 'anthropic/claude-sonnet-4.6',
697
888
  prompt: 'Analyze this sensitive document...',
698
889
  providerOptions: {
699
890
  gateway: {
700
891
  zeroDataRetention: true,
701
- } satisfies GatewayLanguageModelOptions,
892
+ } satisfies GatewayProviderOptions,
893
+ },
894
+ });
895
+ ```
896
+
897
+ #### Disallow Prompt Training Example
898
+
899
+ Set `disallowPromptTraining` to true to route requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers that do not train on prompt data will be used. If there are no providers available for the model that disallow prompt training, the request will fail. When `disallowPromptTraining` is `false` or not specified, there is no enforcement of restricting routing.
900
+
901
+ ```ts
902
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
903
+ import { generateText } from 'ai';
904
+
905
+ const { text } = await generateText({
906
+ model: 'anthropic/claude-sonnet-4.6',
907
+ prompt: 'Analyze this proprietary business data...',
908
+ providerOptions: {
909
+ gateway: {
910
+ disallowPromptTraining: true,
911
+ } satisfies GatewayProviderOptions,
912
+ },
913
+ });
914
+ ```
915
+
916
+ #### HIPAA Compliance Example
917
+
918
+ Set `hipaaCompliant` to true to route requests only to models or tools by providers that have signed a BAA with Vercel for the use of AI Gateway. If the model or tool does not have a HIPAA-compliant provider, the request will fail. When `hipaaCompliant` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `hipaaCompliant` is set to `true` to ensure that requests are only routed to providers that support HIPAA compliance.
919
+
920
+ ```ts
921
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
922
+ import { generateText } from 'ai';
923
+
924
+ const { text } = await generateText({
925
+ model: 'anthropic/claude-sonnet-4.6',
926
+ prompt: 'Analyze this patient data...',
927
+ providerOptions: {
928
+ gateway: {
929
+ hipaaCompliant: true,
930
+ } satisfies GatewayProviderOptions,
931
+ },
932
+ });
933
+ ```
934
+
935
+ #### Quota Entity ID Example
936
+
937
+ Set `quotaEntityId` to track and enforce quota against a specific entity. This is useful for multi-tenant applications where you need to manage quota at the entity level (e.g., per organization or team).
938
+
939
+ ```ts
940
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
941
+ import { generateText } from 'ai';
942
+
943
+ const { text } = await generateText({
944
+ model: 'anthropic/claude-sonnet-4.6',
945
+ prompt: 'Summarize this report...',
946
+ providerOptions: {
947
+ gateway: {
948
+ quotaEntityId: 'org-123',
949
+ } satisfies GatewayProviderOptions,
702
950
  },
703
951
  });
704
952
  ```
@@ -709,16 +957,16 @@ When using provider-specific options through AI Gateway, use the actual provider
709
957
 
710
958
  ```ts
711
959
  import type { AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
712
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
960
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
713
961
  import { generateText } from 'ai';
714
962
 
715
963
  const { text } = await generateText({
716
- model: 'anthropic/claude-sonnet-4',
964
+ model: 'anthropic/claude-sonnet-4.6',
717
965
  prompt: 'Explain quantum computing',
718
966
  providerOptions: {
719
967
  gateway: {
720
968
  order: ['vertex', 'anthropic'],
721
- } satisfies GatewayLanguageModelOptions,
969
+ } satisfies GatewayProviderOptions,
722
970
  anthropic: {
723
971
  thinking: { type: 'enabled', budgetTokens: 12000 },
724
972
  } satisfies AnthropicLanguageModelOptions,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@ai-sdk/gateway",
3
3
  "private": false,
4
- "version": "4.0.0-beta.4",
4
+ "version": "4.0.0-beta.40",
5
5
  "license": "Apache-2.0",
6
6
  "sideEffects": false,
7
7
  "main": "./dist/index.js",
@@ -30,9 +30,9 @@
30
30
  }
31
31
  },
32
32
  "dependencies": {
33
- "@vercel/oidc": "3.1.0",
34
- "@ai-sdk/provider": "4.0.0-beta.0",
35
- "@ai-sdk/provider-utils": "5.0.0-beta.1"
33
+ "@vercel/oidc": "3.2.0",
34
+ "@ai-sdk/provider": "4.0.0-beta.8",
35
+ "@ai-sdk/provider-utils": "5.0.0-beta.14"
36
36
  },
37
37
  "devDependencies": {
38
38
  "@types/node": "18.15.11",
@@ -68,9 +68,7 @@
68
68
  "build:watch": "pnpm clean && tsup --watch",
69
69
  "clean": "del-cli dist docs *.tsbuildinfo",
70
70
  "generate-model-settings": "tsx scripts/generate-model-settings.ts",
71
- "lint": "eslint \"./**/*.ts*\"",
72
71
  "type-check": "tsc --build",
73
- "prettier-check": "prettier --check \"./**/*.ts*\"",
74
72
  "test": "pnpm test:node && pnpm test:edge",
75
73
  "test:update": "pnpm test:node -u",
76
74
  "test:watch": "vitest --config vitest.node.config.js",
@@ -5,6 +5,7 @@ export type GatewayEmbeddingModelId =
5
5
  | 'amazon/titan-embed-text-v2'
6
6
  | 'cohere/embed-v4.0'
7
7
  | 'google/gemini-embedding-001'
8
+ | 'google/gemini-embedding-2'
8
9
  | 'google/text-embedding-005'
9
10
  | 'google/text-multilingual-embedding-002'
10
11
  | 'mistral/codestral-embed'
@@ -1,6 +1,6 @@
1
1
  import type {
2
- EmbeddingModelV3,
3
- SharedV3ProviderMetadata,
2
+ EmbeddingModelV4,
3
+ SharedV4ProviderMetadata,
4
4
  } from '@ai-sdk/provider';
5
5
  import {
6
6
  combineHeaders,
@@ -17,8 +17,8 @@ import { asGatewayError } from './errors';
17
17
  import { parseAuthMethod } from './errors/parse-auth-method';
18
18
  import type { GatewayConfig } from './gateway-config';
19
19
 
20
- export class GatewayEmbeddingModel implements EmbeddingModelV3 {
21
- readonly specificationVersion = 'v3';
20
+ export class GatewayEmbeddingModel implements EmbeddingModelV4 {
21
+ readonly specificationVersion = 'v4';
22
22
  readonly maxEmbeddingsPerCall = 2048;
23
23
  readonly supportsParallelCalls = true;
24
24
 
@@ -39,8 +39,8 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
39
39
  headers,
40
40
  abortSignal,
41
41
  providerOptions,
42
- }: Parameters<EmbeddingModelV3['doEmbed']>[0]): Promise<
43
- Awaited<ReturnType<EmbeddingModelV3['doEmbed']>>
42
+ }: Parameters<EmbeddingModelV4['doEmbed']>[0]): Promise<
43
+ Awaited<ReturnType<EmbeddingModelV4['doEmbed']>>
44
44
  > {
45
45
  const resolvedHeaders = await resolve(this.config.headers());
46
46
  try {
@@ -75,7 +75,7 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
75
75
  embeddings: responseBody.embeddings,
76
76
  usage: responseBody.usage ?? undefined,
77
77
  providerMetadata:
78
- responseBody.providerMetadata as unknown as SharedV3ProviderMetadata,
78
+ responseBody.providerMetadata as unknown as SharedV4ProviderMetadata,
79
79
  response: { headers: responseHeaders, body: rawValue },
80
80
  warnings: [],
81
81
  };
@@ -90,7 +90,7 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
90
90
 
91
91
  private getModelConfigHeaders() {
92
92
  return {
93
- 'ai-embedding-model-specification-version': '3',
93
+ 'ai-embedding-model-specification-version': '4',
94
94
  'ai-model-id': this.modelId,
95
95
  };
96
96
  }
@@ -101,7 +101,7 @@ const gatewayAvailableModelsResponseSchema = lazySchema(() =>
101
101
  )
102
102
  .nullish(),
103
103
  specification: z.object({
104
- specificationVersion: z.literal('v3'),
104
+ specificationVersion: z.literal('v4'),
105
105
  provider: z.string(),
106
106
  modelId: z.string(),
107
107
  }),