@ai-sdk/gateway 4.0.0-beta.6 → 4.0.0-beta.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +383 -4
- package/dist/index.d.ts +149 -24
- package/dist/index.js +735 -320
- package/dist/index.js.map +1 -1
- package/docs/00-ai-gateway.mdx +312 -45
- package/package.json +8 -10
- package/src/errors/create-gateway-error.ts +0 -1
- package/src/errors/gateway-authentication-error.ts +0 -1
- package/src/gateway-config.ts +1 -1
- package/src/gateway-embedding-model-settings.ts +1 -1
- package/src/gateway-embedding-model.ts +38 -14
- package/src/gateway-fetch-metadata.ts +51 -37
- package/src/gateway-generation-info.ts +149 -0
- package/src/gateway-image-model-settings.ts +9 -0
- package/src/gateway-image-model.ts +41 -21
- package/src/gateway-language-model-settings.ts +22 -10
- package/src/gateway-language-model.ts +49 -23
- package/src/gateway-model-entry.ts +13 -3
- package/src/gateway-provider-options.ts +35 -8
- package/src/gateway-provider.ts +100 -18
- package/src/gateway-reranking-model-settings.ts +7 -0
- package/src/gateway-reranking-model.ts +119 -0
- package/src/gateway-spend-report.ts +193 -0
- package/src/gateway-video-model-settings.ts +2 -0
- package/src/gateway-video-model.ts +22 -17
- package/src/index.ts +13 -3
- package/dist/index.d.mts +0 -602
- package/dist/index.mjs +0 -1539
- package/dist/index.mjs.map +0 -1
package/docs/00-ai-gateway.mdx
CHANGED
|
@@ -29,7 +29,7 @@ For most use cases, you can use the AI Gateway directly with a model string:
|
|
|
29
29
|
import { generateText } from 'ai';
|
|
30
30
|
|
|
31
31
|
const { text } = await generateText({
|
|
32
|
-
model: 'openai/gpt-5',
|
|
32
|
+
model: 'openai/gpt-5.4',
|
|
33
33
|
prompt: 'Hello world',
|
|
34
34
|
});
|
|
35
35
|
```
|
|
@@ -39,7 +39,7 @@ const { text } = await generateText({
|
|
|
39
39
|
import { generateText, gateway } from 'ai';
|
|
40
40
|
|
|
41
41
|
const { text } = await generateText({
|
|
42
|
-
model: gateway('openai/gpt-5'),
|
|
42
|
+
model: gateway('openai/gpt-5.4'),
|
|
43
43
|
prompt: 'Hello world',
|
|
44
44
|
});
|
|
45
45
|
```
|
|
@@ -80,7 +80,7 @@ You can use the following optional settings to customize the AI Gateway provider
|
|
|
80
80
|
|
|
81
81
|
- **baseURL** _string_
|
|
82
82
|
|
|
83
|
-
Use a different URL prefix for API calls. The default prefix is `https://ai-gateway.vercel.sh/
|
|
83
|
+
Use a different URL prefix for API calls. The default prefix is `https://ai-gateway.vercel.sh/v4/ai`.
|
|
84
84
|
|
|
85
85
|
- **apiKey** _string_
|
|
86
86
|
|
|
@@ -159,6 +159,8 @@ You can connect your own provider credentials to use with Vercel AI Gateway. Thi
|
|
|
159
159
|
|
|
160
160
|
To set up BYOK, add your provider credentials in your Vercel team's AI Gateway settings. Once configured, AI Gateway automatically uses your credentials. No code changes are needed.
|
|
161
161
|
|
|
162
|
+
For providers like Azure where you can use custom deployment names, you can configure model mappings to map gateway model slugs to your deployment names. See [model mappings](https://vercel.com/docs/ai-gateway/byok#model-mappings) for details.
|
|
163
|
+
|
|
162
164
|
Learn more in the [BYOK documentation](https://vercel.com/docs/ai-gateway/byok).
|
|
163
165
|
|
|
164
166
|
## Language Models
|
|
@@ -169,13 +171,41 @@ You can create language models using a provider instance. The first argument is
|
|
|
169
171
|
import { generateText } from 'ai';
|
|
170
172
|
|
|
171
173
|
const { text } = await generateText({
|
|
172
|
-
model: 'openai/gpt-5',
|
|
174
|
+
model: 'openai/gpt-5.4',
|
|
173
175
|
prompt: 'Explain quantum computing in simple terms',
|
|
174
176
|
});
|
|
175
177
|
```
|
|
176
178
|
|
|
177
179
|
AI Gateway language models can also be used in the `streamText` function and support structured data generation with [`Output`](/docs/reference/ai-sdk-core/output) (see [AI SDK Core](/docs/ai-sdk-core)).
|
|
178
180
|
|
|
181
|
+
## Reranking Models
|
|
182
|
+
|
|
183
|
+
You can create reranking models using the `rerankingModel` method on the provider instance:
|
|
184
|
+
|
|
185
|
+
```ts
|
|
186
|
+
import { rerank } from 'ai';
|
|
187
|
+
import { gateway } from '@ai-sdk/gateway';
|
|
188
|
+
|
|
189
|
+
const { ranking } = await rerank({
|
|
190
|
+
model: gateway.rerankingModel('cohere/rerank-v3.5'),
|
|
191
|
+
query: 'What is the capital of France?',
|
|
192
|
+
documents: [
|
|
193
|
+
'Paris is the capital of France.',
|
|
194
|
+
'Berlin is the capital of Germany.',
|
|
195
|
+
'Madrid is the capital of Spain.',
|
|
196
|
+
],
|
|
197
|
+
topN: 2,
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
console.log(ranking);
|
|
201
|
+
// [
|
|
202
|
+
// { originalIndex: 0, score: 0.89, document: 'Paris is the capital of France.' },
|
|
203
|
+
// { originalIndex: 2, score: 0.15, document: 'Madrid is the capital of Spain.' },
|
|
204
|
+
// ]
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
Reranking models are useful for improving search results in retrieval-augmented generation (RAG) pipelines by re-scoring candidate documents after an initial retrieval step.
|
|
208
|
+
|
|
179
209
|
## Available Models
|
|
180
210
|
|
|
181
211
|
The AI Gateway supports models from OpenAI, Anthropic, Google, Meta, xAI, Mistral, DeepSeek, Amazon Bedrock, Cohere, Perplexity, Alibaba, and other providers.
|
|
@@ -215,7 +245,7 @@ availableModels.models.forEach(model => {
|
|
|
215
245
|
|
|
216
246
|
// Use any discovered model with plain string
|
|
217
247
|
const { text } = await generateText({
|
|
218
|
-
model: availableModels.models[0].id, // e.g., 'openai/gpt-
|
|
248
|
+
model: availableModels.models[0].id, // e.g., 'openai/gpt-5.4'
|
|
219
249
|
prompt: 'Hello world',
|
|
220
250
|
});
|
|
221
251
|
```
|
|
@@ -238,6 +268,86 @@ The `getCredits()` method returns your team's credit information based on the au
|
|
|
238
268
|
- **balance** _number_ - Your team's current available credit balance
|
|
239
269
|
- **total_used** _number_ - Total credits consumed by your team
|
|
240
270
|
|
|
271
|
+
## Generation Lookup
|
|
272
|
+
|
|
273
|
+
Look up detailed information about a specific generation by its ID, including cost, token usage, latency, and provider details. Generation IDs are available in `providerMetadata.gateway.generationId` on both `generateText` and `streamText` responses.
|
|
274
|
+
|
|
275
|
+
When streaming, the generation ID is injected on the first content chunk, so you can capture it early in the stream without waiting for completion. This is especially useful in cases where a network interruption or mid-stream error could prevent you from receiving the final response — since the gateway records the final status server-side, you can use the generation ID to look up the results (including cost, token usage, and finish reason) later via `getGenerationInfo()`.
|
|
276
|
+
|
|
277
|
+
```ts
|
|
278
|
+
import { gateway, generateText } from 'ai';
|
|
279
|
+
|
|
280
|
+
// Make a request
|
|
281
|
+
const result = await generateText({
|
|
282
|
+
model: gateway('anthropic/claude-sonnet-4'),
|
|
283
|
+
prompt: 'Explain quantum entanglement briefly',
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
// Get the generation ID from provider metadata
|
|
287
|
+
const generationId = result.providerMetadata?.gateway?.generationId;
|
|
288
|
+
|
|
289
|
+
// Look up detailed generation info
|
|
290
|
+
const generation = await gateway.getGenerationInfo({ id: generationId });
|
|
291
|
+
|
|
292
|
+
console.log(`Model: ${generation.model}`);
|
|
293
|
+
console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
|
|
294
|
+
console.log(`Latency: ${generation.latency}ms`);
|
|
295
|
+
console.log(`Prompt tokens: ${generation.promptTokens}`);
|
|
296
|
+
console.log(`Completion tokens: ${generation.completionTokens}`);
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
With `streamText`, you can capture the generation ID from the first chunk via `fullStream`:
|
|
300
|
+
|
|
301
|
+
```ts
|
|
302
|
+
import { gateway, streamText } from 'ai';
|
|
303
|
+
|
|
304
|
+
const result = streamText({
|
|
305
|
+
model: gateway('anthropic/claude-sonnet-4'),
|
|
306
|
+
prompt: 'Explain quantum entanglement briefly',
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
let generationId: string | undefined;
|
|
310
|
+
|
|
311
|
+
for await (const part of result.fullStream) {
|
|
312
|
+
if (!generationId && part.providerMetadata?.gateway?.generationId) {
|
|
313
|
+
generationId = part.providerMetadata.gateway.generationId as string;
|
|
314
|
+
console.log(`Generation ID (early): ${generationId}`);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Look up cost and usage after the stream completes
|
|
319
|
+
if (generationId) {
|
|
320
|
+
const generation = await gateway.getGenerationInfo({ id: generationId });
|
|
321
|
+
console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
|
|
322
|
+
console.log(`Finish reason: ${generation.finishReason}`);
|
|
323
|
+
}
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
The `getGenerationInfo()` method accepts:
|
|
327
|
+
|
|
328
|
+
- **id** _string_ - The generation ID to look up (format: `gen_<ulid>`, required)
|
|
329
|
+
|
|
330
|
+
It returns a `GatewayGenerationInfo` object with the following fields:
|
|
331
|
+
|
|
332
|
+
- **id** _string_ - The generation ID
|
|
333
|
+
- **totalCost** _number_ - Total cost in USD
|
|
334
|
+
- **upstreamInferenceCost** _number_ - Upstream inference cost in USD (relevant for BYOK)
|
|
335
|
+
- **usage** _number_ - Usage cost in USD (same as totalCost)
|
|
336
|
+
- **createdAt** _string_ - ISO 8601 timestamp when the generation was created
|
|
337
|
+
- **model** _string_ - Model identifier used
|
|
338
|
+
- **isByok** _boolean_ - Whether Bring Your Own Key credentials were used
|
|
339
|
+
- **providerName** _string_ - The provider that served this generation
|
|
340
|
+
- **streamed** _boolean_ - Whether streaming was used
|
|
341
|
+
- **finishReason** _string_ - Finish reason (e.g. `'stop'`)
|
|
342
|
+
- **latency** _number_ - Time to first token in milliseconds
|
|
343
|
+
- **generationTime** _number_ - Total generation time in milliseconds
|
|
344
|
+
- **promptTokens** _number_ - Number of prompt tokens
|
|
345
|
+
- **completionTokens** _number_ - Number of completion tokens
|
|
346
|
+
- **reasoningTokens** _number_ - Reasoning tokens used (if applicable)
|
|
347
|
+
- **cachedTokens** _number_ - Cached tokens used (if applicable)
|
|
348
|
+
- **cacheCreationTokens** _number_ - Cache creation input tokens
|
|
349
|
+
- **billableWebSearchCalls** _number_ - Number of billable web search calls
|
|
350
|
+
|
|
241
351
|
## Examples
|
|
242
352
|
|
|
243
353
|
### Basic Text Generation
|
|
@@ -246,7 +356,7 @@ The `getCredits()` method returns your team's credit information based on the au
|
|
|
246
356
|
import { generateText } from 'ai';
|
|
247
357
|
|
|
248
358
|
const { text } = await generateText({
|
|
249
|
-
model: 'anthropic/claude-sonnet-4',
|
|
359
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
250
360
|
prompt: 'Write a haiku about programming',
|
|
251
361
|
});
|
|
252
362
|
|
|
@@ -259,7 +369,7 @@ console.log(text);
|
|
|
259
369
|
import { streamText } from 'ai';
|
|
260
370
|
|
|
261
371
|
const { textStream } = await streamText({
|
|
262
|
-
model: 'openai/gpt-5',
|
|
372
|
+
model: 'openai/gpt-5.4',
|
|
263
373
|
prompt: 'Explain the benefits of serverless architecture',
|
|
264
374
|
});
|
|
265
375
|
|
|
@@ -297,13 +407,13 @@ const { text } = await generateText({
|
|
|
297
407
|
Some providers offer tools that are executed by the provider itself, such as [OpenAI's web search tool](/providers/ai-sdk-providers/openai#web-search-tool). To use these tools through AI Gateway, import the provider to access the tool definitions:
|
|
298
408
|
|
|
299
409
|
```ts
|
|
300
|
-
import { generateText,
|
|
410
|
+
import { generateText, isStepCount } from 'ai';
|
|
301
411
|
import { openai } from '@ai-sdk/openai';
|
|
302
412
|
|
|
303
413
|
const result = await generateText({
|
|
304
|
-
model: 'openai/gpt-5-mini',
|
|
414
|
+
model: 'openai/gpt-5.4-mini',
|
|
305
415
|
prompt: 'What is the Vercel AI Gateway?',
|
|
306
|
-
stopWhen:
|
|
416
|
+
stopWhen: isStepCount(10),
|
|
307
417
|
tools: {
|
|
308
418
|
web_search: openai.tools.webSearch({}),
|
|
309
419
|
},
|
|
@@ -330,7 +440,7 @@ The Perplexity Search tool enables models to search the web using [Perplexity's
|
|
|
330
440
|
import { gateway, generateText } from 'ai';
|
|
331
441
|
|
|
332
442
|
const result = await generateText({
|
|
333
|
-
model: 'openai/gpt-5-nano',
|
|
443
|
+
model: 'openai/gpt-5.4-nano',
|
|
334
444
|
prompt: 'Search for news about AI regulations in January 2025.',
|
|
335
445
|
tools: {
|
|
336
446
|
perplexity_search: gateway.tools.perplexitySearch(),
|
|
@@ -348,7 +458,7 @@ You can also configure the search with optional parameters:
|
|
|
348
458
|
import { gateway, generateText } from 'ai';
|
|
349
459
|
|
|
350
460
|
const result = await generateText({
|
|
351
|
-
model: 'openai/gpt-5-nano',
|
|
461
|
+
model: 'openai/gpt-5.4-nano',
|
|
352
462
|
prompt:
|
|
353
463
|
'Search for news about AI regulations from the first week of January 2025.',
|
|
354
464
|
tools: {
|
|
@@ -402,7 +512,7 @@ The tool works with both `generateText` and `streamText`:
|
|
|
402
512
|
import { gateway, streamText } from 'ai';
|
|
403
513
|
|
|
404
514
|
const result = streamText({
|
|
405
|
-
model: 'openai/gpt-5-nano',
|
|
515
|
+
model: 'openai/gpt-5.4-nano',
|
|
406
516
|
prompt: 'Search for the latest news about AI regulations.',
|
|
407
517
|
tools: {
|
|
408
518
|
perplexity_search: gateway.tools.perplexitySearch(),
|
|
@@ -432,7 +542,7 @@ The Parallel Search tool enables models to search the web using [Parallel AI's S
|
|
|
432
542
|
import { gateway, generateText } from 'ai';
|
|
433
543
|
|
|
434
544
|
const result = await generateText({
|
|
435
|
-
model: 'openai/gpt-5-nano',
|
|
545
|
+
model: 'openai/gpt-5.4-nano',
|
|
436
546
|
prompt: 'Research the latest developments in quantum computing.',
|
|
437
547
|
tools: {
|
|
438
548
|
parallel_search: gateway.tools.parallelSearch(),
|
|
@@ -450,7 +560,7 @@ You can also configure the search with optional parameters:
|
|
|
450
560
|
import { gateway, generateText } from 'ai';
|
|
451
561
|
|
|
452
562
|
const result = await generateText({
|
|
453
|
-
model: 'openai/gpt-5-nano',
|
|
563
|
+
model: 'openai/gpt-5.4-nano',
|
|
454
564
|
prompt: 'Find detailed information about TypeScript 5.0 features.',
|
|
455
565
|
tools: {
|
|
456
566
|
parallel_search: gateway.tools.parallelSearch({
|
|
@@ -511,7 +621,7 @@ The tool works with both `generateText` and `streamText`:
|
|
|
511
621
|
import { gateway, streamText } from 'ai';
|
|
512
622
|
|
|
513
623
|
const result = streamText({
|
|
514
|
-
model: 'openai/gpt-5-nano',
|
|
624
|
+
model: 'openai/gpt-5.4-nano',
|
|
515
625
|
prompt: 'Research the latest AI safety guidelines.',
|
|
516
626
|
tools: {
|
|
517
627
|
parallel_search: gateway.tools.parallelSearch(),
|
|
@@ -533,22 +643,24 @@ for await (const part of result.fullStream) {
|
|
|
533
643
|
}
|
|
534
644
|
```
|
|
535
645
|
|
|
536
|
-
###
|
|
646
|
+
### Custom Reporting
|
|
537
647
|
|
|
538
|
-
Track usage per end-user and categorize requests with tags
|
|
648
|
+
Track usage per end-user and categorize requests with tags, then query the data through the reporting API.
|
|
649
|
+
|
|
650
|
+
#### Usage Tracking with User and Tags
|
|
539
651
|
|
|
540
652
|
```ts
|
|
541
|
-
import type {
|
|
653
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
542
654
|
import { generateText } from 'ai';
|
|
543
655
|
|
|
544
656
|
const { text } = await generateText({
|
|
545
|
-
model: 'openai/gpt-5',
|
|
657
|
+
model: 'openai/gpt-5.4',
|
|
546
658
|
prompt: 'Summarize this document...',
|
|
547
659
|
providerOptions: {
|
|
548
660
|
gateway: {
|
|
549
661
|
user: 'user-abc-123', // Track usage for this specific end-user
|
|
550
662
|
tags: ['document-summary', 'premium-feature'], // Categorize for reporting
|
|
551
|
-
} satisfies
|
|
663
|
+
} satisfies GatewayProviderOptions,
|
|
552
664
|
},
|
|
553
665
|
});
|
|
554
666
|
```
|
|
@@ -559,6 +671,77 @@ This allows you to:
|
|
|
559
671
|
- Filter and analyze spending by feature or use case using tags
|
|
560
672
|
- Track which users or features are driving the most AI usage
|
|
561
673
|
|
|
674
|
+
#### Querying Spend Reports
|
|
675
|
+
|
|
676
|
+
Use the `getSpendReport()` method to query usage data programmatically. The reporting API is only available for Vercel Pro and Enterprise plans. For pricing, see the [Custom Reporting docs](https://vercel.com/docs/ai-gateway/capabilities/custom-reporting).
|
|
677
|
+
|
|
678
|
+
```ts
|
|
679
|
+
import { gateway } from 'ai';
|
|
680
|
+
|
|
681
|
+
const report = await gateway.getSpendReport({
|
|
682
|
+
startDate: '2026-03-01',
|
|
683
|
+
endDate: '2026-03-25',
|
|
684
|
+
groupBy: 'model',
|
|
685
|
+
});
|
|
686
|
+
|
|
687
|
+
for (const row of report.results) {
|
|
688
|
+
console.log(`${row.model}: $${row.totalCost.toFixed(4)}`);
|
|
689
|
+
}
|
|
690
|
+
```
|
|
691
|
+
|
|
692
|
+
The `getSpendReport()` method accepts the following parameters:
|
|
693
|
+
|
|
694
|
+
- **startDate** _string_ - Start date in `YYYY-MM-DD` format (inclusive, required)
|
|
695
|
+
- **endDate** _string_ - End date in `YYYY-MM-DD` format (inclusive, required)
|
|
696
|
+
- **groupBy** _string_ - Aggregation dimension: `'day'` (default), `'user'`, `'model'`, `'tag'`, `'provider'`, or `'credential_type'`
|
|
697
|
+
- **datePart** _string_ - Time granularity when `groupBy` is `'day'`: `'day'` or `'hour'`
|
|
698
|
+
- **userId** _string_ - Filter to a specific user
|
|
699
|
+
- **model** _string_ - Filter to a specific model (e.g. `'anthropic/claude-sonnet-4.5'`)
|
|
700
|
+
- **provider** _string_ - Filter to a specific provider (e.g. `'anthropic'`)
|
|
701
|
+
- **credentialType** _string_ - Filter by `'byok'` or `'system'` credentials
|
|
702
|
+
- **tags** _string[]_ - Filter to requests matching these tags
|
|
703
|
+
|
|
704
|
+
Each row in `results` contains a grouping field (matching your `groupBy` choice) and metrics:
|
|
705
|
+
|
|
706
|
+
- **totalCost** _number_ - Total cost in USD
|
|
707
|
+
- **marketCost** _number_ - Market cost in USD
|
|
708
|
+
- **inputTokens** _number_ - Number of input tokens
|
|
709
|
+
- **outputTokens** _number_ - Number of output tokens
|
|
710
|
+
- **cachedInputTokens** _number_ - Number of cached input tokens
|
|
711
|
+
- **cacheCreationInputTokens** _number_ - Number of cache creation input tokens
|
|
712
|
+
- **reasoningTokens** _number_ - Number of reasoning tokens
|
|
713
|
+
- **requestCount** _number_ - Number of requests
|
|
714
|
+
|
|
715
|
+
You can combine tracking and querying to analyze spend by tags you defined:
|
|
716
|
+
|
|
717
|
+
```ts
|
|
718
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
719
|
+
import { gateway, streamText } from 'ai';
|
|
720
|
+
|
|
721
|
+
// 1. Make requests with tags
|
|
722
|
+
const result = streamText({
|
|
723
|
+
model: gateway('anthropic/claude-haiku-4.5'),
|
|
724
|
+
prompt: 'Summarize this quarter's results',
|
|
725
|
+
providerOptions: {
|
|
726
|
+
gateway: {
|
|
727
|
+
tags: ['team:finance', 'feature:summaries'],
|
|
728
|
+
} satisfies GatewayProviderOptions,
|
|
729
|
+
},
|
|
730
|
+
});
|
|
731
|
+
|
|
732
|
+
// 2. Later, query spend filtered by those tags
|
|
733
|
+
const report = await gateway.getSpendReport({
|
|
734
|
+
startDate: '2026-03-01',
|
|
735
|
+
endDate: '2026-03-31',
|
|
736
|
+
groupBy: 'tag',
|
|
737
|
+
tags: ['team:finance'],
|
|
738
|
+
});
|
|
739
|
+
|
|
740
|
+
for (const row of report.results) {
|
|
741
|
+
console.log(`${row.tag}: $${row.totalCost.toFixed(4)} (${row.requestCount} requests)`);
|
|
742
|
+
}
|
|
743
|
+
```
|
|
744
|
+
|
|
562
745
|
## Provider Options
|
|
563
746
|
|
|
564
747
|
The AI Gateway provider accepts provider options that control routing behavior and provider-specific configurations.
|
|
@@ -568,17 +751,17 @@ The AI Gateway provider accepts provider options that control routing behavior a
|
|
|
568
751
|
You can use the `gateway` key in `providerOptions` to control how AI Gateway routes requests:
|
|
569
752
|
|
|
570
753
|
```ts
|
|
571
|
-
import type {
|
|
754
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
572
755
|
import { generateText } from 'ai';
|
|
573
756
|
|
|
574
757
|
const { text } = await generateText({
|
|
575
|
-
model: 'anthropic/claude-sonnet-4',
|
|
758
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
576
759
|
prompt: 'Explain quantum computing',
|
|
577
760
|
providerOptions: {
|
|
578
761
|
gateway: {
|
|
579
762
|
order: ['vertex', 'anthropic'], // Try Vertex AI first, then Anthropic
|
|
580
763
|
only: ['vertex', 'anthropic'], // Only use these providers
|
|
581
|
-
} satisfies
|
|
764
|
+
} satisfies GatewayProviderOptions,
|
|
582
765
|
},
|
|
583
766
|
});
|
|
584
767
|
```
|
|
@@ -597,11 +780,25 @@ The following gateway provider options are available:
|
|
|
597
780
|
|
|
598
781
|
Example: `only: ['anthropic', 'vertex']` will only allow routing to Anthropic or Vertex AI.
|
|
599
782
|
|
|
783
|
+
- **sort** _'cost' | 'ttft' | 'tps'_
|
|
784
|
+
|
|
785
|
+
Sorts available providers by a performance or cost metric before routing. The gateway will try the best-scoring provider first and fall back through the rest in sorted order. If unspecified, providers are ordered using the gateway's default system ranking.
|
|
786
|
+
|
|
787
|
+
- `'cost'` — lowest cost first
|
|
788
|
+
- `'ttft'` — lowest time-to-first-token first
|
|
789
|
+
- `'tps'` — highest tokens-per-second first
|
|
790
|
+
|
|
791
|
+
When combined with `order`, the user-specified providers are promoted to the front while remaining providers follow the sorted order.
|
|
792
|
+
|
|
793
|
+
Example: `sort: 'ttft'` will route to the provider with the fastest time-to-first-token.
|
|
794
|
+
|
|
795
|
+
When `sort` is active, the response's `providerMetadata.gateway.routing.sort` object contains the sort option used, the resulting execution order, per-provider metric values, and any providers that were deprioritized.
|
|
796
|
+
|
|
600
797
|
- **models** _string[]_
|
|
601
798
|
|
|
602
799
|
Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the `model` parameter), then try each model in this array in order until one succeeds.
|
|
603
800
|
|
|
604
|
-
Example: `models: ['openai/gpt-5-nano', 'gemini-
|
|
801
|
+
Example: `models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview']` will try the fallback models in order if the primary model fails.
|
|
605
802
|
|
|
606
803
|
- **user** _string_
|
|
607
804
|
|
|
@@ -621,15 +818,30 @@ The following gateway provider options are available:
|
|
|
621
818
|
|
|
622
819
|
Each provider can have multiple credentials (tried in order). The structure is a record where keys are provider slugs and values are arrays of credential objects.
|
|
623
820
|
|
|
821
|
+
Each credential can optionally include a `modelMappings` array to map AI Gateway model slugs to your deployment names (for example, custom Azure deployment names). If a BYOK request fails, the gateway falls back to system credentials using the default model name.
|
|
822
|
+
|
|
624
823
|
Examples:
|
|
625
824
|
|
|
626
825
|
- Single provider: `byok: { 'anthropic': [{ apiKey: 'sk-ant-...' }] }`
|
|
627
826
|
- Multiple credentials: `byok: { 'vertex': [{ project: 'proj-1', googleCredentials: { privateKey: '...', clientEmail: '...' } }, { project: 'proj-2', googleCredentials: { privateKey: '...', clientEmail: '...' } }] }`
|
|
628
827
|
- Multiple providers: `byok: { 'anthropic': [{ apiKey: '...' }], 'bedrock': [{ accessKeyId: '...', secretAccessKey: '...' }] }`
|
|
828
|
+
- With model mappings: `byok: { 'azure': [{ apiKey: '...', resourceName: '...', modelMappings: [{ gatewayModelSlug: 'openai/gpt-5.4-nano', customModelId: 'my-deployment' }] }] }`
|
|
629
829
|
|
|
630
830
|
- **zeroDataRetention** _boolean_
|
|
631
831
|
|
|
632
|
-
Restricts routing
|
|
832
|
+
Restricts routing to providers that have zero data retention agreements with Vercel for AI Gateway. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers with zero data retention agreements will be used. If there are no providers available for the model with zero data retention, the request will fail. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
|
|
833
|
+
|
|
834
|
+
- **disallowPromptTraining** _boolean_
|
|
835
|
+
|
|
836
|
+
Restricts routing to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers that do not train on prompt data will be used. If there are no providers available for the model that disallow prompt training, the request will fail.
|
|
837
|
+
|
|
838
|
+
- **hipaaCompliant** _boolean_
|
|
839
|
+
|
|
840
|
+
Restricts routing to models and tools from providers that have signed a BAA with Vercel for the use of AI Gateway (requires Vercel HIPAA BAA add on). BYOK credentials are skipped when `hipaaCompliant` is set to `true` to ensure that requests are only routed to providers that support HIPAA compliance.
|
|
841
|
+
|
|
842
|
+
- **quotaEntityId** _string_
|
|
843
|
+
|
|
844
|
+
The unique identifier for the entity against which quota is tracked. Used for quota management and enforcement purposes.
|
|
633
845
|
|
|
634
846
|
- **providerTimeouts** _object_
|
|
635
847
|
|
|
@@ -642,17 +854,17 @@ The following gateway provider options are available:
|
|
|
642
854
|
You can combine these options to have fine-grained control over routing and tracking:
|
|
643
855
|
|
|
644
856
|
```ts
|
|
645
|
-
import type {
|
|
857
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
646
858
|
import { generateText } from 'ai';
|
|
647
859
|
|
|
648
860
|
const { text } = await generateText({
|
|
649
|
-
model: 'anthropic/claude-sonnet-4',
|
|
861
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
650
862
|
prompt: 'Write a haiku about programming',
|
|
651
863
|
providerOptions: {
|
|
652
864
|
gateway: {
|
|
653
865
|
order: ['vertex'], // Prefer Vertex AI
|
|
654
866
|
only: ['anthropic', 'vertex'], // Only allow these providers
|
|
655
|
-
} satisfies
|
|
867
|
+
} satisfies GatewayProviderOptions,
|
|
656
868
|
},
|
|
657
869
|
});
|
|
658
870
|
```
|
|
@@ -662,43 +874,98 @@ const { text } = await generateText({
|
|
|
662
874
|
The `models` option enables automatic fallback to alternative models when the primary model fails:
|
|
663
875
|
|
|
664
876
|
```ts
|
|
665
|
-
import type {
|
|
877
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
666
878
|
import { generateText } from 'ai';
|
|
667
879
|
|
|
668
880
|
const { text } = await generateText({
|
|
669
|
-
model: 'openai/gpt-
|
|
881
|
+
model: 'openai/gpt-5.4', // Primary model
|
|
670
882
|
prompt: 'Write a TypeScript haiku',
|
|
671
883
|
providerOptions: {
|
|
672
884
|
gateway: {
|
|
673
|
-
models: ['openai/gpt-5-nano', 'gemini-
|
|
674
|
-
} satisfies
|
|
885
|
+
models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview'], // Fallback models
|
|
886
|
+
} satisfies GatewayProviderOptions,
|
|
675
887
|
},
|
|
676
888
|
});
|
|
677
889
|
|
|
678
890
|
// This will:
|
|
679
|
-
// 1. Try openai/gpt-
|
|
680
|
-
// 2. If it fails, try openai/gpt-5-nano
|
|
681
|
-
// 3. If that fails, try gemini-
|
|
891
|
+
// 1. Try openai/gpt-5.4 first
|
|
892
|
+
// 2. If it fails, try openai/gpt-5.4-nano
|
|
893
|
+
// 3. If that fails, try gemini-3-flash-preview
|
|
682
894
|
// 4. Return the result from the first model that succeeds
|
|
683
895
|
```
|
|
684
896
|
|
|
685
897
|
#### Zero Data Retention Example
|
|
686
898
|
|
|
687
|
-
Set `zeroDataRetention` to true to
|
|
688
|
-
that have zero data retention policies. When `zeroDataRetention` is `false` or not
|
|
689
|
-
specified, there is no enforcement of restricting routing.
|
|
899
|
+
Set `zeroDataRetention` to true to route requests to providers that have zero data retention agreements with Vercel for AI Gateway. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers with zero data retention agreements will be used. If there are no providers available for the model with zero data retention, the request will fail. When `zeroDataRetention` is `false` or not specified, there is no enforcement of restricting routing. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
|
|
690
900
|
|
|
691
901
|
```ts
|
|
692
|
-
import type {
|
|
902
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
693
903
|
import { generateText } from 'ai';
|
|
694
904
|
|
|
695
905
|
const { text } = await generateText({
|
|
696
|
-
model: 'anthropic/claude-sonnet-4.
|
|
906
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
697
907
|
prompt: 'Analyze this sensitive document...',
|
|
698
908
|
providerOptions: {
|
|
699
909
|
gateway: {
|
|
700
910
|
zeroDataRetention: true,
|
|
701
|
-
} satisfies
|
|
911
|
+
} satisfies GatewayProviderOptions,
|
|
912
|
+
},
|
|
913
|
+
});
|
|
914
|
+
```
|
|
915
|
+
|
|
916
|
+
#### Disallow Prompt Training Example
|
|
917
|
+
|
|
918
|
+
Set `disallowPromptTraining` to true to route requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers that do not train on prompt data will be used. If there are no providers available for the model that disallow prompt training, the request will fail. When `disallowPromptTraining` is `false` or not specified, there is no enforcement of restricting routing.
|
|
919
|
+
|
|
920
|
+
```ts
|
|
921
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
922
|
+
import { generateText } from 'ai';
|
|
923
|
+
|
|
924
|
+
const { text } = await generateText({
|
|
925
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
926
|
+
prompt: 'Analyze this proprietary business data...',
|
|
927
|
+
providerOptions: {
|
|
928
|
+
gateway: {
|
|
929
|
+
disallowPromptTraining: true,
|
|
930
|
+
} satisfies GatewayProviderOptions,
|
|
931
|
+
},
|
|
932
|
+
});
|
|
933
|
+
```
|
|
934
|
+
|
|
935
|
+
#### HIPAA Compliance Example
|
|
936
|
+
|
|
937
|
+
Set `hipaaCompliant` to true to route requests only to models or tools by providers that have signed a BAA with Vercel for the use of AI Gateway. If the model or tool does not have a HIPAA-compliant provider, the request will fail. When `hipaaCompliant` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `hipaaCompliant` is set to `true` to ensure that requests are only routed to providers that support HIPAA compliance.
|
|
938
|
+
|
|
939
|
+
```ts
|
|
940
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
941
|
+
import { generateText } from 'ai';
|
|
942
|
+
|
|
943
|
+
const { text } = await generateText({
|
|
944
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
945
|
+
prompt: 'Analyze this patient data...',
|
|
946
|
+
providerOptions: {
|
|
947
|
+
gateway: {
|
|
948
|
+
hipaaCompliant: true,
|
|
949
|
+
} satisfies GatewayProviderOptions,
|
|
950
|
+
},
|
|
951
|
+
});
|
|
952
|
+
```
|
|
953
|
+
|
|
954
|
+
#### Quota Entity ID Example
|
|
955
|
+
|
|
956
|
+
Set `quotaEntityId` to track and enforce quota against a specific entity. This is useful for multi-tenant applications where you need to manage quota at the entity level (e.g., per organization or team).
|
|
957
|
+
|
|
958
|
+
```ts
|
|
959
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
960
|
+
import { generateText } from 'ai';
|
|
961
|
+
|
|
962
|
+
const { text } = await generateText({
|
|
963
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
964
|
+
prompt: 'Summarize this report...',
|
|
965
|
+
providerOptions: {
|
|
966
|
+
gateway: {
|
|
967
|
+
quotaEntityId: 'org-123',
|
|
968
|
+
} satisfies GatewayProviderOptions,
|
|
702
969
|
},
|
|
703
970
|
});
|
|
704
971
|
```
|
|
@@ -709,16 +976,16 @@ When using provider-specific options through AI Gateway, use the actual provider
|
|
|
709
976
|
|
|
710
977
|
```ts
|
|
711
978
|
import type { AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
|
|
712
|
-
import type {
|
|
979
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
713
980
|
import { generateText } from 'ai';
|
|
714
981
|
|
|
715
982
|
const { text } = await generateText({
|
|
716
|
-
model: 'anthropic/claude-sonnet-4',
|
|
983
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
717
984
|
prompt: 'Explain quantum computing',
|
|
718
985
|
providerOptions: {
|
|
719
986
|
gateway: {
|
|
720
987
|
order: ['vertex', 'anthropic'],
|
|
721
|
-
} satisfies
|
|
988
|
+
} satisfies GatewayProviderOptions,
|
|
722
989
|
anthropic: {
|
|
723
990
|
thinking: { type: 'enabled', budgetTokens: 12000 },
|
|
724
991
|
} satisfies AnthropicLanguageModelOptions,
|
package/package.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-sdk/gateway",
|
|
3
3
|
"private": false,
|
|
4
|
-
"version": "4.0.0-beta.
|
|
4
|
+
"version": "4.0.0-beta.60",
|
|
5
|
+
"type": "module",
|
|
5
6
|
"license": "Apache-2.0",
|
|
6
7
|
"sideEffects": false,
|
|
7
8
|
"main": "./dist/index.js",
|
|
8
|
-
"module": "./dist/index.mjs",
|
|
9
9
|
"types": "./dist/index.d.ts",
|
|
10
10
|
"files": [
|
|
11
11
|
"dist/**/*",
|
|
@@ -25,14 +25,14 @@
|
|
|
25
25
|
"./package.json": "./package.json",
|
|
26
26
|
".": {
|
|
27
27
|
"types": "./dist/index.d.ts",
|
|
28
|
-
"import": "./dist/index.
|
|
29
|
-
"
|
|
28
|
+
"import": "./dist/index.js",
|
|
29
|
+
"default": "./dist/index.js"
|
|
30
30
|
}
|
|
31
31
|
},
|
|
32
32
|
"dependencies": {
|
|
33
|
-
"@vercel/oidc": "3.
|
|
34
|
-
"@ai-sdk/provider": "4.0.0-beta.
|
|
35
|
-
"@ai-sdk/provider-utils": "5.0.0-beta.
|
|
33
|
+
"@vercel/oidc": "3.2.0",
|
|
34
|
+
"@ai-sdk/provider": "4.0.0-beta.12",
|
|
35
|
+
"@ai-sdk/provider-utils": "5.0.0-beta.24"
|
|
36
36
|
},
|
|
37
37
|
"devDependencies": {
|
|
38
38
|
"@types/node": "18.15.11",
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
"tsx": "4.19.2",
|
|
41
41
|
"typescript": "5.8.3",
|
|
42
42
|
"zod": "3.25.76",
|
|
43
|
-
"@ai-sdk/test-server": "2.0.0-beta.
|
|
43
|
+
"@ai-sdk/test-server": "2.0.0-beta.1",
|
|
44
44
|
"@vercel/ai-tsconfig": "0.0.0"
|
|
45
45
|
},
|
|
46
46
|
"peerDependencies": {
|
|
@@ -68,9 +68,7 @@
|
|
|
68
68
|
"build:watch": "pnpm clean && tsup --watch",
|
|
69
69
|
"clean": "del-cli dist docs *.tsbuildinfo",
|
|
70
70
|
"generate-model-settings": "tsx scripts/generate-model-settings.ts",
|
|
71
|
-
"lint": "eslint \"./**/*.ts*\"",
|
|
72
71
|
"type-check": "tsc --build",
|
|
73
|
-
"prettier-check": "prettier --check \"./**/*.ts*\"",
|
|
74
72
|
"test": "pnpm test:node && pnpm test:edge",
|
|
75
73
|
"test:update": "pnpm test:node -u",
|
|
76
74
|
"test:watch": "vitest --config vitest.node.config.js",
|
package/src/gateway-config.ts
CHANGED
|
@@ -2,6 +2,6 @@ import type { FetchFunction, Resolvable } from '@ai-sdk/provider-utils';
|
|
|
2
2
|
|
|
3
3
|
export type GatewayConfig = {
|
|
4
4
|
baseURL: string;
|
|
5
|
-
headers
|
|
5
|
+
headers?: Resolvable<Record<string, string | undefined>>;
|
|
6
6
|
fetch?: FetchFunction;
|
|
7
7
|
};
|