@ai-sdk/gateway 4.0.0-beta.4 → 4.0.0-beta.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +252 -4
- package/dist/index.d.mts +143 -21
- package/dist/index.d.ts +143 -21
- package/dist/index.js +454 -144
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +518 -186
- package/dist/index.mjs.map +1 -1
- package/docs/00-ai-gateway.mdx +292 -44
- package/package.json +4 -6
- package/src/gateway-embedding-model-settings.ts +1 -0
- package/src/gateway-embedding-model.ts +8 -8
- package/src/gateway-fetch-metadata.ts +1 -1
- package/src/gateway-generation-info.ts +147 -0
- package/src/gateway-image-model-settings.ts +6 -0
- package/src/gateway-image-model.ts +10 -10
- package/src/gateway-language-model-settings.ts +21 -10
- package/src/gateway-language-model.ts +19 -19
- package/src/gateway-model-entry.ts +2 -2
- package/src/gateway-provider-options.ts +27 -8
- package/src/gateway-provider.ts +99 -17
- package/src/gateway-reranking-model-settings.ts +1 -0
- package/src/gateway-reranking-model.ts +114 -0
- package/src/gateway-spend-report.ts +191 -0
- package/src/gateway-video-model.ts +15 -15
- package/src/index.ts +13 -3
package/docs/00-ai-gateway.mdx
CHANGED
|
@@ -29,7 +29,7 @@ For most use cases, you can use the AI Gateway directly with a model string:
|
|
|
29
29
|
import { generateText } from 'ai';
|
|
30
30
|
|
|
31
31
|
const { text } = await generateText({
|
|
32
|
-
model: 'openai/gpt-5',
|
|
32
|
+
model: 'openai/gpt-5.4',
|
|
33
33
|
prompt: 'Hello world',
|
|
34
34
|
});
|
|
35
35
|
```
|
|
@@ -39,7 +39,7 @@ const { text } = await generateText({
|
|
|
39
39
|
import { generateText, gateway } from 'ai';
|
|
40
40
|
|
|
41
41
|
const { text } = await generateText({
|
|
42
|
-
model: gateway('openai/gpt-5'),
|
|
42
|
+
model: gateway('openai/gpt-5.4'),
|
|
43
43
|
prompt: 'Hello world',
|
|
44
44
|
});
|
|
45
45
|
```
|
|
@@ -169,13 +169,41 @@ You can create language models using a provider instance. The first argument is
|
|
|
169
169
|
import { generateText } from 'ai';
|
|
170
170
|
|
|
171
171
|
const { text } = await generateText({
|
|
172
|
-
model: 'openai/gpt-5',
|
|
172
|
+
model: 'openai/gpt-5.4',
|
|
173
173
|
prompt: 'Explain quantum computing in simple terms',
|
|
174
174
|
});
|
|
175
175
|
```
|
|
176
176
|
|
|
177
177
|
AI Gateway language models can also be used in the `streamText` function and support structured data generation with [`Output`](/docs/reference/ai-sdk-core/output) (see [AI SDK Core](/docs/ai-sdk-core)).
|
|
178
178
|
|
|
179
|
+
## Reranking Models
|
|
180
|
+
|
|
181
|
+
You can create reranking models using the `rerankingModel` method on the provider instance:
|
|
182
|
+
|
|
183
|
+
```ts
|
|
184
|
+
import { rerank } from 'ai';
|
|
185
|
+
import { gateway } from '@ai-sdk/gateway';
|
|
186
|
+
|
|
187
|
+
const { ranking } = await rerank({
|
|
188
|
+
model: gateway.rerankingModel('cohere/rerank-v3.5'),
|
|
189
|
+
query: 'What is the capital of France?',
|
|
190
|
+
documents: [
|
|
191
|
+
'Paris is the capital of France.',
|
|
192
|
+
'Berlin is the capital of Germany.',
|
|
193
|
+
'Madrid is the capital of Spain.',
|
|
194
|
+
],
|
|
195
|
+
topN: 2,
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
console.log(ranking);
|
|
199
|
+
// [
|
|
200
|
+
// { originalIndex: 0, score: 0.89, document: 'Paris is the capital of France.' },
|
|
201
|
+
// { originalIndex: 2, score: 0.15, document: 'Madrid is the capital of Spain.' },
|
|
202
|
+
// ]
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
Reranking models are useful for improving search results in retrieval-augmented generation (RAG) pipelines by re-scoring candidate documents after an initial retrieval step.
|
|
206
|
+
|
|
179
207
|
## Available Models
|
|
180
208
|
|
|
181
209
|
The AI Gateway supports models from OpenAI, Anthropic, Google, Meta, xAI, Mistral, DeepSeek, Amazon Bedrock, Cohere, Perplexity, Alibaba, and other providers.
|
|
@@ -215,7 +243,7 @@ availableModels.models.forEach(model => {
|
|
|
215
243
|
|
|
216
244
|
// Use any discovered model with plain string
|
|
217
245
|
const { text } = await generateText({
|
|
218
|
-
model: availableModels.models[0].id, // e.g., 'openai/gpt-
|
|
246
|
+
model: availableModels.models[0].id, // e.g., 'openai/gpt-5.4'
|
|
219
247
|
prompt: 'Hello world',
|
|
220
248
|
});
|
|
221
249
|
```
|
|
@@ -238,6 +266,86 @@ The `getCredits()` method returns your team's credit information based on the au
|
|
|
238
266
|
- **balance** _number_ - Your team's current available credit balance
|
|
239
267
|
- **total_used** _number_ - Total credits consumed by your team
|
|
240
268
|
|
|
269
|
+
## Generation Lookup
|
|
270
|
+
|
|
271
|
+
Look up detailed information about a specific generation by its ID, including cost, token usage, latency, and provider details. Generation IDs are available in `providerMetadata.gateway.generationId` on both `generateText` and `streamText` responses.
|
|
272
|
+
|
|
273
|
+
When streaming, the generation ID is injected on the first content chunk, so you can capture it early in the stream without waiting for completion. This is especially useful in cases where a network interruption or mid-stream error could prevent you from receiving the final response — since the gateway records the final status server-side, you can use the generation ID to look up the results (including cost, token usage, and finish reason) later via `getGenerationInfo()`.
|
|
274
|
+
|
|
275
|
+
```ts
|
|
276
|
+
import { gateway, generateText } from 'ai';
|
|
277
|
+
|
|
278
|
+
// Make a request
|
|
279
|
+
const result = await generateText({
|
|
280
|
+
model: gateway('anthropic/claude-sonnet-4'),
|
|
281
|
+
prompt: 'Explain quantum entanglement briefly',
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
// Get the generation ID from provider metadata
|
|
285
|
+
const generationId = result.providerMetadata?.gateway?.generationId;
|
|
286
|
+
|
|
287
|
+
// Look up detailed generation info
|
|
288
|
+
const generation = await gateway.getGenerationInfo({ id: generationId });
|
|
289
|
+
|
|
290
|
+
console.log(`Model: ${generation.model}`);
|
|
291
|
+
console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
|
|
292
|
+
console.log(`Latency: ${generation.latency}ms`);
|
|
293
|
+
console.log(`Prompt tokens: ${generation.promptTokens}`);
|
|
294
|
+
console.log(`Completion tokens: ${generation.completionTokens}`);
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
With `streamText`, you can capture the generation ID from the first chunk via `fullStream`:
|
|
298
|
+
|
|
299
|
+
```ts
|
|
300
|
+
import { gateway, streamText } from 'ai';
|
|
301
|
+
|
|
302
|
+
const result = streamText({
|
|
303
|
+
model: gateway('anthropic/claude-sonnet-4'),
|
|
304
|
+
prompt: 'Explain quantum entanglement briefly',
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
let generationId: string | undefined;
|
|
308
|
+
|
|
309
|
+
for await (const part of result.fullStream) {
|
|
310
|
+
if (!generationId && part.providerMetadata?.gateway?.generationId) {
|
|
311
|
+
generationId = part.providerMetadata.gateway.generationId as string;
|
|
312
|
+
console.log(`Generation ID (early): ${generationId}`);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Look up cost and usage after the stream completes
|
|
317
|
+
if (generationId) {
|
|
318
|
+
const generation = await gateway.getGenerationInfo({ id: generationId });
|
|
319
|
+
console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
|
|
320
|
+
console.log(`Finish reason: ${generation.finishReason}`);
|
|
321
|
+
}
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
The `getGenerationInfo()` method accepts:
|
|
325
|
+
|
|
326
|
+
- **id** _string_ - The generation ID to look up (format: `gen_<ulid>`, required)
|
|
327
|
+
|
|
328
|
+
It returns a `GatewayGenerationInfo` object with the following fields:
|
|
329
|
+
|
|
330
|
+
- **id** _string_ - The generation ID
|
|
331
|
+
- **totalCost** _number_ - Total cost in USD
|
|
332
|
+
- **upstreamInferenceCost** _number_ - Upstream inference cost in USD (relevant for BYOK)
|
|
333
|
+
- **usage** _number_ - Usage cost in USD (same as totalCost)
|
|
334
|
+
- **createdAt** _string_ - ISO 8601 timestamp when the generation was created
|
|
335
|
+
- **model** _string_ - Model identifier used
|
|
336
|
+
- **isByok** _boolean_ - Whether Bring Your Own Key credentials were used
|
|
337
|
+
- **providerName** _string_ - The provider that served this generation
|
|
338
|
+
- **streamed** _boolean_ - Whether streaming was used
|
|
339
|
+
- **finishReason** _string_ - Finish reason (e.g. `'stop'`)
|
|
340
|
+
- **latency** _number_ - Time to first token in milliseconds
|
|
341
|
+
- **generationTime** _number_ - Total generation time in milliseconds
|
|
342
|
+
- **promptTokens** _number_ - Number of prompt tokens
|
|
343
|
+
- **completionTokens** _number_ - Number of completion tokens
|
|
344
|
+
- **reasoningTokens** _number_ - Reasoning tokens used (if applicable)
|
|
345
|
+
- **cachedTokens** _number_ - Cached tokens used (if applicable)
|
|
346
|
+
- **cacheCreationTokens** _number_ - Cache creation input tokens
|
|
347
|
+
- **billableWebSearchCalls** _number_ - Number of billable web search calls
|
|
348
|
+
|
|
241
349
|
## Examples
|
|
242
350
|
|
|
243
351
|
### Basic Text Generation
|
|
@@ -246,7 +354,7 @@ The `getCredits()` method returns your team's credit information based on the au
|
|
|
246
354
|
import { generateText } from 'ai';
|
|
247
355
|
|
|
248
356
|
const { text } = await generateText({
|
|
249
|
-
model: 'anthropic/claude-sonnet-4',
|
|
357
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
250
358
|
prompt: 'Write a haiku about programming',
|
|
251
359
|
});
|
|
252
360
|
|
|
@@ -259,7 +367,7 @@ console.log(text);
|
|
|
259
367
|
import { streamText } from 'ai';
|
|
260
368
|
|
|
261
369
|
const { textStream } = await streamText({
|
|
262
|
-
model: 'openai/gpt-5',
|
|
370
|
+
model: 'openai/gpt-5.4',
|
|
263
371
|
prompt: 'Explain the benefits of serverless architecture',
|
|
264
372
|
});
|
|
265
373
|
|
|
@@ -297,13 +405,13 @@ const { text } = await generateText({
|
|
|
297
405
|
Some providers offer tools that are executed by the provider itself, such as [OpenAI's web search tool](/providers/ai-sdk-providers/openai#web-search-tool). To use these tools through AI Gateway, import the provider to access the tool definitions:
|
|
298
406
|
|
|
299
407
|
```ts
|
|
300
|
-
import { generateText,
|
|
408
|
+
import { generateText, isStepCount } from 'ai';
|
|
301
409
|
import { openai } from '@ai-sdk/openai';
|
|
302
410
|
|
|
303
411
|
const result = await generateText({
|
|
304
|
-
model: 'openai/gpt-5-mini',
|
|
412
|
+
model: 'openai/gpt-5.4-mini',
|
|
305
413
|
prompt: 'What is the Vercel AI Gateway?',
|
|
306
|
-
stopWhen:
|
|
414
|
+
stopWhen: isStepCount(10),
|
|
307
415
|
tools: {
|
|
308
416
|
web_search: openai.tools.webSearch({}),
|
|
309
417
|
},
|
|
@@ -330,7 +438,7 @@ The Perplexity Search tool enables models to search the web using [Perplexity's
|
|
|
330
438
|
import { gateway, generateText } from 'ai';
|
|
331
439
|
|
|
332
440
|
const result = await generateText({
|
|
333
|
-
model: 'openai/gpt-5-nano',
|
|
441
|
+
model: 'openai/gpt-5.4-nano',
|
|
334
442
|
prompt: 'Search for news about AI regulations in January 2025.',
|
|
335
443
|
tools: {
|
|
336
444
|
perplexity_search: gateway.tools.perplexitySearch(),
|
|
@@ -348,7 +456,7 @@ You can also configure the search with optional parameters:
|
|
|
348
456
|
import { gateway, generateText } from 'ai';
|
|
349
457
|
|
|
350
458
|
const result = await generateText({
|
|
351
|
-
model: 'openai/gpt-5-nano',
|
|
459
|
+
model: 'openai/gpt-5.4-nano',
|
|
352
460
|
prompt:
|
|
353
461
|
'Search for news about AI regulations from the first week of January 2025.',
|
|
354
462
|
tools: {
|
|
@@ -402,7 +510,7 @@ The tool works with both `generateText` and `streamText`:
|
|
|
402
510
|
import { gateway, streamText } from 'ai';
|
|
403
511
|
|
|
404
512
|
const result = streamText({
|
|
405
|
-
model: 'openai/gpt-5-nano',
|
|
513
|
+
model: 'openai/gpt-5.4-nano',
|
|
406
514
|
prompt: 'Search for the latest news about AI regulations.',
|
|
407
515
|
tools: {
|
|
408
516
|
perplexity_search: gateway.tools.perplexitySearch(),
|
|
@@ -432,7 +540,7 @@ The Parallel Search tool enables models to search the web using [Parallel AI's S
|
|
|
432
540
|
import { gateway, generateText } from 'ai';
|
|
433
541
|
|
|
434
542
|
const result = await generateText({
|
|
435
|
-
model: 'openai/gpt-5-nano',
|
|
543
|
+
model: 'openai/gpt-5.4-nano',
|
|
436
544
|
prompt: 'Research the latest developments in quantum computing.',
|
|
437
545
|
tools: {
|
|
438
546
|
parallel_search: gateway.tools.parallelSearch(),
|
|
@@ -450,7 +558,7 @@ You can also configure the search with optional parameters:
|
|
|
450
558
|
import { gateway, generateText } from 'ai';
|
|
451
559
|
|
|
452
560
|
const result = await generateText({
|
|
453
|
-
model: 'openai/gpt-5-nano',
|
|
561
|
+
model: 'openai/gpt-5.4-nano',
|
|
454
562
|
prompt: 'Find detailed information about TypeScript 5.0 features.',
|
|
455
563
|
tools: {
|
|
456
564
|
parallel_search: gateway.tools.parallelSearch({
|
|
@@ -511,7 +619,7 @@ The tool works with both `generateText` and `streamText`:
|
|
|
511
619
|
import { gateway, streamText } from 'ai';
|
|
512
620
|
|
|
513
621
|
const result = streamText({
|
|
514
|
-
model: 'openai/gpt-5-nano',
|
|
622
|
+
model: 'openai/gpt-5.4-nano',
|
|
515
623
|
prompt: 'Research the latest AI safety guidelines.',
|
|
516
624
|
tools: {
|
|
517
625
|
parallel_search: gateway.tools.parallelSearch(),
|
|
@@ -533,22 +641,24 @@ for await (const part of result.fullStream) {
|
|
|
533
641
|
}
|
|
534
642
|
```
|
|
535
643
|
|
|
536
|
-
###
|
|
644
|
+
### Custom Reporting
|
|
537
645
|
|
|
538
|
-
Track usage per end-user and categorize requests with tags
|
|
646
|
+
Track usage per end-user and categorize requests with tags, then query the data through the reporting API.
|
|
647
|
+
|
|
648
|
+
#### Usage Tracking with User and Tags
|
|
539
649
|
|
|
540
650
|
```ts
|
|
541
|
-
import type {
|
|
651
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
542
652
|
import { generateText } from 'ai';
|
|
543
653
|
|
|
544
654
|
const { text } = await generateText({
|
|
545
|
-
model: 'openai/gpt-5',
|
|
655
|
+
model: 'openai/gpt-5.4',
|
|
546
656
|
prompt: 'Summarize this document...',
|
|
547
657
|
providerOptions: {
|
|
548
658
|
gateway: {
|
|
549
659
|
user: 'user-abc-123', // Track usage for this specific end-user
|
|
550
660
|
tags: ['document-summary', 'premium-feature'], // Categorize for reporting
|
|
551
|
-
} satisfies
|
|
661
|
+
} satisfies GatewayProviderOptions,
|
|
552
662
|
},
|
|
553
663
|
});
|
|
554
664
|
```
|
|
@@ -559,6 +669,77 @@ This allows you to:
|
|
|
559
669
|
- Filter and analyze spending by feature or use case using tags
|
|
560
670
|
- Track which users or features are driving the most AI usage
|
|
561
671
|
|
|
672
|
+
#### Querying Spend Reports
|
|
673
|
+
|
|
674
|
+
Use the `getSpendReport()` method to query usage data programmatically. The reporting API is only available for Vercel Pro and Enterprise plans. For pricing, see the [Custom Reporting docs](https://vercel.com/docs/ai-gateway/capabilities/custom-reporting).
|
|
675
|
+
|
|
676
|
+
```ts
|
|
677
|
+
import { gateway } from 'ai';
|
|
678
|
+
|
|
679
|
+
const report = await gateway.getSpendReport({
|
|
680
|
+
startDate: '2026-03-01',
|
|
681
|
+
endDate: '2026-03-25',
|
|
682
|
+
groupBy: 'model',
|
|
683
|
+
});
|
|
684
|
+
|
|
685
|
+
for (const row of report.results) {
|
|
686
|
+
console.log(`${row.model}: $${row.totalCost.toFixed(4)}`);
|
|
687
|
+
}
|
|
688
|
+
```
|
|
689
|
+
|
|
690
|
+
The `getSpendReport()` method accepts the following parameters:
|
|
691
|
+
|
|
692
|
+
- **startDate** _string_ - Start date in `YYYY-MM-DD` format (inclusive, required)
|
|
693
|
+
- **endDate** _string_ - End date in `YYYY-MM-DD` format (inclusive, required)
|
|
694
|
+
- **groupBy** _string_ - Aggregation dimension: `'day'` (default), `'user'`, `'model'`, `'tag'`, `'provider'`, or `'credential_type'`
|
|
695
|
+
- **datePart** _string_ - Time granularity when `groupBy` is `'day'`: `'day'` or `'hour'`
|
|
696
|
+
- **userId** _string_ - Filter to a specific user
|
|
697
|
+
- **model** _string_ - Filter to a specific model (e.g. `'anthropic/claude-sonnet-4.5'`)
|
|
698
|
+
- **provider** _string_ - Filter to a specific provider (e.g. `'anthropic'`)
|
|
699
|
+
- **credentialType** _string_ - Filter by `'byok'` or `'system'` credentials
|
|
700
|
+
- **tags** _string[]_ - Filter to requests matching these tags
|
|
701
|
+
|
|
702
|
+
Each row in `results` contains a grouping field (matching your `groupBy` choice) and metrics:
|
|
703
|
+
|
|
704
|
+
- **totalCost** _number_ - Total cost in USD
|
|
705
|
+
- **marketCost** _number_ - Market cost in USD
|
|
706
|
+
- **inputTokens** _number_ - Number of input tokens
|
|
707
|
+
- **outputTokens** _number_ - Number of output tokens
|
|
708
|
+
- **cachedInputTokens** _number_ - Number of cached input tokens
|
|
709
|
+
- **cacheCreationInputTokens** _number_ - Number of cache creation input tokens
|
|
710
|
+
- **reasoningTokens** _number_ - Number of reasoning tokens
|
|
711
|
+
- **requestCount** _number_ - Number of requests
|
|
712
|
+
|
|
713
|
+
You can combine tracking and querying to analyze spend by tags you defined:
|
|
714
|
+
|
|
715
|
+
```ts
|
|
716
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
717
|
+
import { gateway, streamText } from 'ai';
|
|
718
|
+
|
|
719
|
+
// 1. Make requests with tags
|
|
720
|
+
const result = streamText({
|
|
721
|
+
model: gateway('anthropic/claude-haiku-4.5'),
|
|
722
|
+
prompt: 'Summarize this quarter's results',
|
|
723
|
+
providerOptions: {
|
|
724
|
+
gateway: {
|
|
725
|
+
tags: ['team:finance', 'feature:summaries'],
|
|
726
|
+
} satisfies GatewayProviderOptions,
|
|
727
|
+
},
|
|
728
|
+
});
|
|
729
|
+
|
|
730
|
+
// 2. Later, query spend filtered by those tags
|
|
731
|
+
const report = await gateway.getSpendReport({
|
|
732
|
+
startDate: '2026-03-01',
|
|
733
|
+
endDate: '2026-03-31',
|
|
734
|
+
groupBy: 'tag',
|
|
735
|
+
tags: ['team:finance'],
|
|
736
|
+
});
|
|
737
|
+
|
|
738
|
+
for (const row of report.results) {
|
|
739
|
+
console.log(`${row.tag}: $${row.totalCost.toFixed(4)} (${row.requestCount} requests)`);
|
|
740
|
+
}
|
|
741
|
+
```
|
|
742
|
+
|
|
562
743
|
## Provider Options
|
|
563
744
|
|
|
564
745
|
The AI Gateway provider accepts provider options that control routing behavior and provider-specific configurations.
|
|
@@ -568,17 +749,17 @@ The AI Gateway provider accepts provider options that control routing behavior a
|
|
|
568
749
|
You can use the `gateway` key in `providerOptions` to control how AI Gateway routes requests:
|
|
569
750
|
|
|
570
751
|
```ts
|
|
571
|
-
import type {
|
|
752
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
572
753
|
import { generateText } from 'ai';
|
|
573
754
|
|
|
574
755
|
const { text } = await generateText({
|
|
575
|
-
model: 'anthropic/claude-sonnet-4',
|
|
756
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
576
757
|
prompt: 'Explain quantum computing',
|
|
577
758
|
providerOptions: {
|
|
578
759
|
gateway: {
|
|
579
760
|
order: ['vertex', 'anthropic'], // Try Vertex AI first, then Anthropic
|
|
580
761
|
only: ['vertex', 'anthropic'], // Only use these providers
|
|
581
|
-
} satisfies
|
|
762
|
+
} satisfies GatewayProviderOptions,
|
|
582
763
|
},
|
|
583
764
|
});
|
|
584
765
|
```
|
|
@@ -601,7 +782,7 @@ The following gateway provider options are available:
|
|
|
601
782
|
|
|
602
783
|
Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the `model` parameter), then try each model in this array in order until one succeeds.
|
|
603
784
|
|
|
604
|
-
Example: `models: ['openai/gpt-5-nano', 'gemini-
|
|
785
|
+
Example: `models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview']` will try the fallback models in order if the primary model fails.
|
|
605
786
|
|
|
606
787
|
- **user** _string_
|
|
607
788
|
|
|
@@ -629,7 +810,19 @@ The following gateway provider options are available:
|
|
|
629
810
|
|
|
630
811
|
- **zeroDataRetention** _boolean_
|
|
631
812
|
|
|
632
|
-
Restricts routing
|
|
813
|
+
Restricts routing to providers that have zero data retention agreements with Vercel for AI Gateway. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers with zero data retention agreements will be used. If there are no providers available for the model with zero data retention, the request will fail. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
|
|
814
|
+
|
|
815
|
+
- **disallowPromptTraining** _boolean_
|
|
816
|
+
|
|
817
|
+
Restricts routing to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers that do not train on prompt data will be used. If there are no providers available for the model that disallow prompt training, the request will fail.
|
|
818
|
+
|
|
819
|
+
- **hipaaCompliant** _boolean_
|
|
820
|
+
|
|
821
|
+
Restricts routing to models and tools from providers that have signed a BAA with Vercel for the use of AI Gateway (requires Vercel HIPAA BAA add on). BYOK credentials are skipped when `hipaaCompliant` is set to `true` to ensure that requests are only routed to providers that support HIPAA compliance.
|
|
822
|
+
|
|
823
|
+
- **quotaEntityId** _string_
|
|
824
|
+
|
|
825
|
+
The unique identifier for the entity against which quota is tracked. Used for quota management and enforcement purposes.
|
|
633
826
|
|
|
634
827
|
- **providerTimeouts** _object_
|
|
635
828
|
|
|
@@ -642,17 +835,17 @@ The following gateway provider options are available:
|
|
|
642
835
|
You can combine these options to have fine-grained control over routing and tracking:
|
|
643
836
|
|
|
644
837
|
```ts
|
|
645
|
-
import type {
|
|
838
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
646
839
|
import { generateText } from 'ai';
|
|
647
840
|
|
|
648
841
|
const { text } = await generateText({
|
|
649
|
-
model: 'anthropic/claude-sonnet-4',
|
|
842
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
650
843
|
prompt: 'Write a haiku about programming',
|
|
651
844
|
providerOptions: {
|
|
652
845
|
gateway: {
|
|
653
846
|
order: ['vertex'], // Prefer Vertex AI
|
|
654
847
|
only: ['anthropic', 'vertex'], // Only allow these providers
|
|
655
|
-
} satisfies
|
|
848
|
+
} satisfies GatewayProviderOptions,
|
|
656
849
|
},
|
|
657
850
|
});
|
|
658
851
|
```
|
|
@@ -662,43 +855,98 @@ const { text } = await generateText({
|
|
|
662
855
|
The `models` option enables automatic fallback to alternative models when the primary model fails:
|
|
663
856
|
|
|
664
857
|
```ts
|
|
665
|
-
import type {
|
|
858
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
666
859
|
import { generateText } from 'ai';
|
|
667
860
|
|
|
668
861
|
const { text } = await generateText({
|
|
669
|
-
model: 'openai/gpt-
|
|
862
|
+
model: 'openai/gpt-5.4', // Primary model
|
|
670
863
|
prompt: 'Write a TypeScript haiku',
|
|
671
864
|
providerOptions: {
|
|
672
865
|
gateway: {
|
|
673
|
-
models: ['openai/gpt-5-nano', 'gemini-
|
|
674
|
-
} satisfies
|
|
866
|
+
models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview'], // Fallback models
|
|
867
|
+
} satisfies GatewayProviderOptions,
|
|
675
868
|
},
|
|
676
869
|
});
|
|
677
870
|
|
|
678
871
|
// This will:
|
|
679
|
-
// 1. Try openai/gpt-
|
|
680
|
-
// 2. If it fails, try openai/gpt-5-nano
|
|
681
|
-
// 3. If that fails, try gemini-
|
|
872
|
+
// 1. Try openai/gpt-5.4 first
|
|
873
|
+
// 2. If it fails, try openai/gpt-5.4-nano
|
|
874
|
+
// 3. If that fails, try gemini-3-flash-preview
|
|
682
875
|
// 4. Return the result from the first model that succeeds
|
|
683
876
|
```
|
|
684
877
|
|
|
685
878
|
#### Zero Data Retention Example
|
|
686
879
|
|
|
687
|
-
Set `zeroDataRetention` to true to
|
|
688
|
-
that have zero data retention policies. When `zeroDataRetention` is `false` or not
|
|
689
|
-
specified, there is no enforcement of restricting routing.
|
|
880
|
+
Set `zeroDataRetention` to true to route requests to providers that have zero data retention agreements with Vercel for AI Gateway. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers with zero data retention agreements will be used. If there are no providers available for the model with zero data retention, the request will fail. When `zeroDataRetention` is `false` or not specified, there is no enforcement of restricting routing. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
|
|
690
881
|
|
|
691
882
|
```ts
|
|
692
|
-
import type {
|
|
883
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
693
884
|
import { generateText } from 'ai';
|
|
694
885
|
|
|
695
886
|
const { text } = await generateText({
|
|
696
|
-
model: 'anthropic/claude-sonnet-4.
|
|
887
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
697
888
|
prompt: 'Analyze this sensitive document...',
|
|
698
889
|
providerOptions: {
|
|
699
890
|
gateway: {
|
|
700
891
|
zeroDataRetention: true,
|
|
701
|
-
} satisfies
|
|
892
|
+
} satisfies GatewayProviderOptions,
|
|
893
|
+
},
|
|
894
|
+
});
|
|
895
|
+
```
|
|
896
|
+
|
|
897
|
+
#### Disallow Prompt Training Example
|
|
898
|
+
|
|
899
|
+
Set `disallowPromptTraining` to true to route requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. When using BYOK credentials, this filter is not applied. If BYOK credentials fail and the request falls back to system credentials, only providers that do not train on prompt data will be used. If there are no providers available for the model that disallow prompt training, the request will fail. When `disallowPromptTraining` is `false` or not specified, there is no enforcement of restricting routing.
|
|
900
|
+
|
|
901
|
+
```ts
|
|
902
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
903
|
+
import { generateText } from 'ai';
|
|
904
|
+
|
|
905
|
+
const { text } = await generateText({
|
|
906
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
907
|
+
prompt: 'Analyze this proprietary business data...',
|
|
908
|
+
providerOptions: {
|
|
909
|
+
gateway: {
|
|
910
|
+
disallowPromptTraining: true,
|
|
911
|
+
} satisfies GatewayProviderOptions,
|
|
912
|
+
},
|
|
913
|
+
});
|
|
914
|
+
```
|
|
915
|
+
|
|
916
|
+
#### HIPAA Compliance Example
|
|
917
|
+
|
|
918
|
+
Set `hipaaCompliant` to true to route requests only to models or tools by providers that have signed a BAA with Vercel for the use of AI Gateway. If the model or tool does not have a HIPAA-compliant provider, the request will fail. When `hipaaCompliant` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `hipaaCompliant` is set to `true` to ensure that requests are only routed to providers that support HIPAA compliance.
|
|
919
|
+
|
|
920
|
+
```ts
|
|
921
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
922
|
+
import { generateText } from 'ai';
|
|
923
|
+
|
|
924
|
+
const { text } = await generateText({
|
|
925
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
926
|
+
prompt: 'Analyze this patient data...',
|
|
927
|
+
providerOptions: {
|
|
928
|
+
gateway: {
|
|
929
|
+
hipaaCompliant: true,
|
|
930
|
+
} satisfies GatewayProviderOptions,
|
|
931
|
+
},
|
|
932
|
+
});
|
|
933
|
+
```
|
|
934
|
+
|
|
935
|
+
#### Quota Entity ID Example
|
|
936
|
+
|
|
937
|
+
Set `quotaEntityId` to track and enforce quota against a specific entity. This is useful for multi-tenant applications where you need to manage quota at the entity level (e.g., per organization or team).
|
|
938
|
+
|
|
939
|
+
```ts
|
|
940
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
941
|
+
import { generateText } from 'ai';
|
|
942
|
+
|
|
943
|
+
const { text } = await generateText({
|
|
944
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
945
|
+
prompt: 'Summarize this report...',
|
|
946
|
+
providerOptions: {
|
|
947
|
+
gateway: {
|
|
948
|
+
quotaEntityId: 'org-123',
|
|
949
|
+
} satisfies GatewayProviderOptions,
|
|
702
950
|
},
|
|
703
951
|
});
|
|
704
952
|
```
|
|
@@ -709,16 +957,16 @@ When using provider-specific options through AI Gateway, use the actual provider
|
|
|
709
957
|
|
|
710
958
|
```ts
|
|
711
959
|
import type { AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
|
|
712
|
-
import type {
|
|
960
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
713
961
|
import { generateText } from 'ai';
|
|
714
962
|
|
|
715
963
|
const { text } = await generateText({
|
|
716
|
-
model: 'anthropic/claude-sonnet-4',
|
|
964
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
717
965
|
prompt: 'Explain quantum computing',
|
|
718
966
|
providerOptions: {
|
|
719
967
|
gateway: {
|
|
720
968
|
order: ['vertex', 'anthropic'],
|
|
721
|
-
} satisfies
|
|
969
|
+
} satisfies GatewayProviderOptions,
|
|
722
970
|
anthropic: {
|
|
723
971
|
thinking: { type: 'enabled', budgetTokens: 12000 },
|
|
724
972
|
} satisfies AnthropicLanguageModelOptions,
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-sdk/gateway",
|
|
3
3
|
"private": false,
|
|
4
|
-
"version": "4.0.0-beta.
|
|
4
|
+
"version": "4.0.0-beta.41",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"sideEffects": false,
|
|
7
7
|
"main": "./dist/index.js",
|
|
@@ -30,9 +30,9 @@
|
|
|
30
30
|
}
|
|
31
31
|
},
|
|
32
32
|
"dependencies": {
|
|
33
|
-
"@vercel/oidc": "3.
|
|
34
|
-
"@ai-sdk/provider": "4.0.0-beta.
|
|
35
|
-
"@ai-sdk/provider-utils": "5.0.0-beta.
|
|
33
|
+
"@vercel/oidc": "3.2.0",
|
|
34
|
+
"@ai-sdk/provider": "4.0.0-beta.8",
|
|
35
|
+
"@ai-sdk/provider-utils": "5.0.0-beta.14"
|
|
36
36
|
},
|
|
37
37
|
"devDependencies": {
|
|
38
38
|
"@types/node": "18.15.11",
|
|
@@ -68,9 +68,7 @@
|
|
|
68
68
|
"build:watch": "pnpm clean && tsup --watch",
|
|
69
69
|
"clean": "del-cli dist docs *.tsbuildinfo",
|
|
70
70
|
"generate-model-settings": "tsx scripts/generate-model-settings.ts",
|
|
71
|
-
"lint": "eslint \"./**/*.ts*\"",
|
|
72
71
|
"type-check": "tsc --build",
|
|
73
|
-
"prettier-check": "prettier --check \"./**/*.ts*\"",
|
|
74
72
|
"test": "pnpm test:node && pnpm test:edge",
|
|
75
73
|
"test:update": "pnpm test:node -u",
|
|
76
74
|
"test:watch": "vitest --config vitest.node.config.js",
|
|
@@ -5,6 +5,7 @@ export type GatewayEmbeddingModelId =
|
|
|
5
5
|
| 'amazon/titan-embed-text-v2'
|
|
6
6
|
| 'cohere/embed-v4.0'
|
|
7
7
|
| 'google/gemini-embedding-001'
|
|
8
|
+
| 'google/gemini-embedding-2'
|
|
8
9
|
| 'google/text-embedding-005'
|
|
9
10
|
| 'google/text-multilingual-embedding-002'
|
|
10
11
|
| 'mistral/codestral-embed'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type {
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
EmbeddingModelV4,
|
|
3
|
+
SharedV4ProviderMetadata,
|
|
4
4
|
} from '@ai-sdk/provider';
|
|
5
5
|
import {
|
|
6
6
|
combineHeaders,
|
|
@@ -17,8 +17,8 @@ import { asGatewayError } from './errors';
|
|
|
17
17
|
import { parseAuthMethod } from './errors/parse-auth-method';
|
|
18
18
|
import type { GatewayConfig } from './gateway-config';
|
|
19
19
|
|
|
20
|
-
export class GatewayEmbeddingModel implements
|
|
21
|
-
readonly specificationVersion = '
|
|
20
|
+
export class GatewayEmbeddingModel implements EmbeddingModelV4 {
|
|
21
|
+
readonly specificationVersion = 'v4';
|
|
22
22
|
readonly maxEmbeddingsPerCall = 2048;
|
|
23
23
|
readonly supportsParallelCalls = true;
|
|
24
24
|
|
|
@@ -39,8 +39,8 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
|
|
|
39
39
|
headers,
|
|
40
40
|
abortSignal,
|
|
41
41
|
providerOptions,
|
|
42
|
-
}: Parameters<
|
|
43
|
-
Awaited<ReturnType<
|
|
42
|
+
}: Parameters<EmbeddingModelV4['doEmbed']>[0]): Promise<
|
|
43
|
+
Awaited<ReturnType<EmbeddingModelV4['doEmbed']>>
|
|
44
44
|
> {
|
|
45
45
|
const resolvedHeaders = await resolve(this.config.headers());
|
|
46
46
|
try {
|
|
@@ -75,7 +75,7 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
|
|
|
75
75
|
embeddings: responseBody.embeddings,
|
|
76
76
|
usage: responseBody.usage ?? undefined,
|
|
77
77
|
providerMetadata:
|
|
78
|
-
responseBody.providerMetadata as unknown as
|
|
78
|
+
responseBody.providerMetadata as unknown as SharedV4ProviderMetadata,
|
|
79
79
|
response: { headers: responseHeaders, body: rawValue },
|
|
80
80
|
warnings: [],
|
|
81
81
|
};
|
|
@@ -90,7 +90,7 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
|
|
|
90
90
|
|
|
91
91
|
private getModelConfigHeaders() {
|
|
92
92
|
return {
|
|
93
|
-
'ai-embedding-model-specification-version': '
|
|
93
|
+
'ai-embedding-model-specification-version': '4',
|
|
94
94
|
'ai-model-id': this.modelId,
|
|
95
95
|
};
|
|
96
96
|
}
|
|
@@ -101,7 +101,7 @@ const gatewayAvailableModelsResponseSchema = lazySchema(() =>
|
|
|
101
101
|
)
|
|
102
102
|
.nullish(),
|
|
103
103
|
specification: z.object({
|
|
104
|
-
specificationVersion: z.literal('
|
|
104
|
+
specificationVersion: z.literal('v4'),
|
|
105
105
|
provider: z.string(),
|
|
106
106
|
modelId: z.string(),
|
|
107
107
|
}),
|