@ai-sdk/gateway 4.0.0-beta.3 → 4.0.0-beta.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +181 -4
- package/dist/index.d.mts +131 -21
- package/dist/index.d.ts +131 -21
- package/dist/index.js +359 -144
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +415 -186
- package/dist/index.mjs.map +1 -1
- package/docs/00-ai-gateway.mdx +219 -44
- package/package.json +4 -6
- package/src/gateway-embedding-model-settings.ts +1 -0
- package/src/gateway-embedding-model.ts +8 -8
- package/src/gateway-fetch-metadata.ts +1 -1
- package/src/gateway-generation-info.ts +147 -0
- package/src/gateway-image-model-settings.ts +6 -0
- package/src/gateway-image-model.ts +10 -10
- package/src/gateway-language-model-settings.ts +18 -6
- package/src/gateway-language-model.ts +19 -19
- package/src/gateway-model-entry.ts +2 -2
- package/src/gateway-provider-options.ts +8 -4
- package/src/gateway-provider.ts +75 -17
- package/src/gateway-spend-report.ts +191 -0
- package/src/gateway-video-model.ts +15 -15
- package/src/index.ts +12 -3
package/docs/00-ai-gateway.mdx
CHANGED
|
@@ -29,7 +29,7 @@ For most use cases, you can use the AI Gateway directly with a model string:
|
|
|
29
29
|
import { generateText } from 'ai';
|
|
30
30
|
|
|
31
31
|
const { text } = await generateText({
|
|
32
|
-
model: 'openai/gpt-5',
|
|
32
|
+
model: 'openai/gpt-5.4',
|
|
33
33
|
prompt: 'Hello world',
|
|
34
34
|
});
|
|
35
35
|
```
|
|
@@ -39,7 +39,7 @@ const { text } = await generateText({
|
|
|
39
39
|
import { generateText, gateway } from 'ai';
|
|
40
40
|
|
|
41
41
|
const { text } = await generateText({
|
|
42
|
-
model: gateway('openai/gpt-5'),
|
|
42
|
+
model: gateway('openai/gpt-5.4'),
|
|
43
43
|
prompt: 'Hello world',
|
|
44
44
|
});
|
|
45
45
|
```
|
|
@@ -169,7 +169,7 @@ You can create language models using a provider instance. The first argument is
|
|
|
169
169
|
import { generateText } from 'ai';
|
|
170
170
|
|
|
171
171
|
const { text } = await generateText({
|
|
172
|
-
model: 'openai/gpt-5',
|
|
172
|
+
model: 'openai/gpt-5.4',
|
|
173
173
|
prompt: 'Explain quantum computing in simple terms',
|
|
174
174
|
});
|
|
175
175
|
```
|
|
@@ -215,7 +215,7 @@ availableModels.models.forEach(model => {
|
|
|
215
215
|
|
|
216
216
|
// Use any discovered model with plain string
|
|
217
217
|
const { text } = await generateText({
|
|
218
|
-
model: availableModels.models[0].id, // e.g., 'openai/gpt-
|
|
218
|
+
model: availableModels.models[0].id, // e.g., 'openai/gpt-5.4'
|
|
219
219
|
prompt: 'Hello world',
|
|
220
220
|
});
|
|
221
221
|
```
|
|
@@ -238,6 +238,86 @@ The `getCredits()` method returns your team's credit information based on the au
|
|
|
238
238
|
- **balance** _number_ - Your team's current available credit balance
|
|
239
239
|
- **total_used** _number_ - Total credits consumed by your team
|
|
240
240
|
|
|
241
|
+
## Generation Lookup
|
|
242
|
+
|
|
243
|
+
Look up detailed information about a specific generation by its ID, including cost, token usage, latency, and provider details. Generation IDs are available in `providerMetadata.gateway.generationId` on both `generateText` and `streamText` responses.
|
|
244
|
+
|
|
245
|
+
When streaming, the generation ID is injected on the first content chunk, so you can capture it early in the stream without waiting for completion. This is especially useful in cases where a network interruption or mid-stream error could prevent you from receiving the final response — since the gateway records the final status server-side, you can use the generation ID to look up the results (including cost, token usage, and finish reason) later via `getGenerationInfo()`.
|
|
246
|
+
|
|
247
|
+
```ts
|
|
248
|
+
import { gateway, generateText } from 'ai';
|
|
249
|
+
|
|
250
|
+
// Make a request
|
|
251
|
+
const result = await generateText({
|
|
252
|
+
model: gateway('anthropic/claude-sonnet-4'),
|
|
253
|
+
prompt: 'Explain quantum entanglement briefly',
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
// Get the generation ID from provider metadata
|
|
257
|
+
const generationId = result.providerMetadata?.gateway?.generationId;
|
|
258
|
+
|
|
259
|
+
// Look up detailed generation info
|
|
260
|
+
const generation = await gateway.getGenerationInfo({ id: generationId });
|
|
261
|
+
|
|
262
|
+
console.log(`Model: ${generation.model}`);
|
|
263
|
+
console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
|
|
264
|
+
console.log(`Latency: ${generation.latency}ms`);
|
|
265
|
+
console.log(`Prompt tokens: ${generation.promptTokens}`);
|
|
266
|
+
console.log(`Completion tokens: ${generation.completionTokens}`);
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
With `streamText`, you can capture the generation ID from the first chunk via `fullStream`:
|
|
270
|
+
|
|
271
|
+
```ts
|
|
272
|
+
import { gateway, streamText } from 'ai';
|
|
273
|
+
|
|
274
|
+
const result = streamText({
|
|
275
|
+
model: gateway('anthropic/claude-sonnet-4'),
|
|
276
|
+
prompt: 'Explain quantum entanglement briefly',
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
let generationId: string | undefined;
|
|
280
|
+
|
|
281
|
+
for await (const part of result.fullStream) {
|
|
282
|
+
if (!generationId && part.providerMetadata?.gateway?.generationId) {
|
|
283
|
+
generationId = part.providerMetadata.gateway.generationId as string;
|
|
284
|
+
console.log(`Generation ID (early): ${generationId}`);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Look up cost and usage after the stream completes
|
|
289
|
+
if (generationId) {
|
|
290
|
+
const generation = await gateway.getGenerationInfo({ id: generationId });
|
|
291
|
+
console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
|
|
292
|
+
console.log(`Finish reason: ${generation.finishReason}`);
|
|
293
|
+
}
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
The `getGenerationInfo()` method accepts:
|
|
297
|
+
|
|
298
|
+
- **id** _string_ - The generation ID to look up (format: `gen_<ulid>`, required)
|
|
299
|
+
|
|
300
|
+
It returns a `GatewayGenerationInfo` object with the following fields:
|
|
301
|
+
|
|
302
|
+
- **id** _string_ - The generation ID
|
|
303
|
+
- **totalCost** _number_ - Total cost in USD
|
|
304
|
+
- **upstreamInferenceCost** _number_ - Upstream inference cost in USD (relevant for BYOK)
|
|
305
|
+
- **usage** _number_ - Usage cost in USD (same as totalCost)
|
|
306
|
+
- **createdAt** _string_ - ISO 8601 timestamp when the generation was created
|
|
307
|
+
- **model** _string_ - Model identifier used
|
|
308
|
+
- **isByok** _boolean_ - Whether Bring Your Own Key credentials were used
|
|
309
|
+
- **providerName** _string_ - The provider that served this generation
|
|
310
|
+
- **streamed** _boolean_ - Whether streaming was used
|
|
311
|
+
- **finishReason** _string_ - Finish reason (e.g. `'stop'`)
|
|
312
|
+
- **latency** _number_ - Time to first token in milliseconds
|
|
313
|
+
- **generationTime** _number_ - Total generation time in milliseconds
|
|
314
|
+
- **promptTokens** _number_ - Number of prompt tokens
|
|
315
|
+
- **completionTokens** _number_ - Number of completion tokens
|
|
316
|
+
- **reasoningTokens** _number_ - Reasoning tokens used (if applicable)
|
|
317
|
+
- **cachedTokens** _number_ - Cached tokens used (if applicable)
|
|
318
|
+
- **cacheCreationTokens** _number_ - Cache creation input tokens
|
|
319
|
+
- **billableWebSearchCalls** _number_ - Number of billable web search calls
|
|
320
|
+
|
|
241
321
|
## Examples
|
|
242
322
|
|
|
243
323
|
### Basic Text Generation
|
|
@@ -246,7 +326,7 @@ The `getCredits()` method returns your team's credit information based on the au
|
|
|
246
326
|
import { generateText } from 'ai';
|
|
247
327
|
|
|
248
328
|
const { text } = await generateText({
|
|
249
|
-
model: 'anthropic/claude-sonnet-4',
|
|
329
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
250
330
|
prompt: 'Write a haiku about programming',
|
|
251
331
|
});
|
|
252
332
|
|
|
@@ -259,7 +339,7 @@ console.log(text);
|
|
|
259
339
|
import { streamText } from 'ai';
|
|
260
340
|
|
|
261
341
|
const { textStream } = await streamText({
|
|
262
|
-
model: 'openai/gpt-5',
|
|
342
|
+
model: 'openai/gpt-5.4',
|
|
263
343
|
prompt: 'Explain the benefits of serverless architecture',
|
|
264
344
|
});
|
|
265
345
|
|
|
@@ -297,13 +377,13 @@ const { text } = await generateText({
|
|
|
297
377
|
Some providers offer tools that are executed by the provider itself, such as [OpenAI's web search tool](/providers/ai-sdk-providers/openai#web-search-tool). To use these tools through AI Gateway, import the provider to access the tool definitions:
|
|
298
378
|
|
|
299
379
|
```ts
|
|
300
|
-
import { generateText,
|
|
380
|
+
import { generateText, isStepCount } from 'ai';
|
|
301
381
|
import { openai } from '@ai-sdk/openai';
|
|
302
382
|
|
|
303
383
|
const result = await generateText({
|
|
304
|
-
model: 'openai/gpt-5-mini',
|
|
384
|
+
model: 'openai/gpt-5.4-mini',
|
|
305
385
|
prompt: 'What is the Vercel AI Gateway?',
|
|
306
|
-
stopWhen:
|
|
386
|
+
stopWhen: isStepCount(10),
|
|
307
387
|
tools: {
|
|
308
388
|
web_search: openai.tools.webSearch({}),
|
|
309
389
|
},
|
|
@@ -330,7 +410,7 @@ The Perplexity Search tool enables models to search the web using [Perplexity's
|
|
|
330
410
|
import { gateway, generateText } from 'ai';
|
|
331
411
|
|
|
332
412
|
const result = await generateText({
|
|
333
|
-
model: 'openai/gpt-5-nano',
|
|
413
|
+
model: 'openai/gpt-5.4-nano',
|
|
334
414
|
prompt: 'Search for news about AI regulations in January 2025.',
|
|
335
415
|
tools: {
|
|
336
416
|
perplexity_search: gateway.tools.perplexitySearch(),
|
|
@@ -348,7 +428,7 @@ You can also configure the search with optional parameters:
|
|
|
348
428
|
import { gateway, generateText } from 'ai';
|
|
349
429
|
|
|
350
430
|
const result = await generateText({
|
|
351
|
-
model: 'openai/gpt-5-nano',
|
|
431
|
+
model: 'openai/gpt-5.4-nano',
|
|
352
432
|
prompt:
|
|
353
433
|
'Search for news about AI regulations from the first week of January 2025.',
|
|
354
434
|
tools: {
|
|
@@ -402,7 +482,7 @@ The tool works with both `generateText` and `streamText`:
|
|
|
402
482
|
import { gateway, streamText } from 'ai';
|
|
403
483
|
|
|
404
484
|
const result = streamText({
|
|
405
|
-
model: 'openai/gpt-5-nano',
|
|
485
|
+
model: 'openai/gpt-5.4-nano',
|
|
406
486
|
prompt: 'Search for the latest news about AI regulations.',
|
|
407
487
|
tools: {
|
|
408
488
|
perplexity_search: gateway.tools.perplexitySearch(),
|
|
@@ -432,7 +512,7 @@ The Parallel Search tool enables models to search the web using [Parallel AI's S
|
|
|
432
512
|
import { gateway, generateText } from 'ai';
|
|
433
513
|
|
|
434
514
|
const result = await generateText({
|
|
435
|
-
model: 'openai/gpt-5-nano',
|
|
515
|
+
model: 'openai/gpt-5.4-nano',
|
|
436
516
|
prompt: 'Research the latest developments in quantum computing.',
|
|
437
517
|
tools: {
|
|
438
518
|
parallel_search: gateway.tools.parallelSearch(),
|
|
@@ -450,7 +530,7 @@ You can also configure the search with optional parameters:
|
|
|
450
530
|
import { gateway, generateText } from 'ai';
|
|
451
531
|
|
|
452
532
|
const result = await generateText({
|
|
453
|
-
model: 'openai/gpt-5-nano',
|
|
533
|
+
model: 'openai/gpt-5.4-nano',
|
|
454
534
|
prompt: 'Find detailed information about TypeScript 5.0 features.',
|
|
455
535
|
tools: {
|
|
456
536
|
parallel_search: gateway.tools.parallelSearch({
|
|
@@ -511,7 +591,7 @@ The tool works with both `generateText` and `streamText`:
|
|
|
511
591
|
import { gateway, streamText } from 'ai';
|
|
512
592
|
|
|
513
593
|
const result = streamText({
|
|
514
|
-
model: 'openai/gpt-5-nano',
|
|
594
|
+
model: 'openai/gpt-5.4-nano',
|
|
515
595
|
prompt: 'Research the latest AI safety guidelines.',
|
|
516
596
|
tools: {
|
|
517
597
|
parallel_search: gateway.tools.parallelSearch(),
|
|
@@ -533,22 +613,24 @@ for await (const part of result.fullStream) {
|
|
|
533
613
|
}
|
|
534
614
|
```
|
|
535
615
|
|
|
536
|
-
###
|
|
616
|
+
### Custom Reporting
|
|
617
|
+
|
|
618
|
+
Track usage per end-user and categorize requests with tags, then query the data through the reporting API.
|
|
537
619
|
|
|
538
|
-
|
|
620
|
+
#### Usage Tracking with User and Tags
|
|
539
621
|
|
|
540
622
|
```ts
|
|
541
|
-
import type {
|
|
623
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
542
624
|
import { generateText } from 'ai';
|
|
543
625
|
|
|
544
626
|
const { text } = await generateText({
|
|
545
|
-
model: 'openai/gpt-5',
|
|
627
|
+
model: 'openai/gpt-5.4',
|
|
546
628
|
prompt: 'Summarize this document...',
|
|
547
629
|
providerOptions: {
|
|
548
630
|
gateway: {
|
|
549
631
|
user: 'user-abc-123', // Track usage for this specific end-user
|
|
550
632
|
tags: ['document-summary', 'premium-feature'], // Categorize for reporting
|
|
551
|
-
} satisfies
|
|
633
|
+
} satisfies GatewayProviderOptions,
|
|
552
634
|
},
|
|
553
635
|
});
|
|
554
636
|
```
|
|
@@ -559,6 +641,77 @@ This allows you to:
|
|
|
559
641
|
- Filter and analyze spending by feature or use case using tags
|
|
560
642
|
- Track which users or features are driving the most AI usage
|
|
561
643
|
|
|
644
|
+
#### Querying Spend Reports
|
|
645
|
+
|
|
646
|
+
Use the `getSpendReport()` method to query usage data programmatically. The reporting API is only available for Vercel Pro and Enterprise plans. For pricing, see the [Custom Reporting docs](https://vercel.com/docs/ai-gateway/capabilities/custom-reporting).
|
|
647
|
+
|
|
648
|
+
```ts
|
|
649
|
+
import { gateway } from 'ai';
|
|
650
|
+
|
|
651
|
+
const report = await gateway.getSpendReport({
|
|
652
|
+
startDate: '2026-03-01',
|
|
653
|
+
endDate: '2026-03-25',
|
|
654
|
+
groupBy: 'model',
|
|
655
|
+
});
|
|
656
|
+
|
|
657
|
+
for (const row of report.results) {
|
|
658
|
+
console.log(`${row.model}: $${row.totalCost.toFixed(4)}`);
|
|
659
|
+
}
|
|
660
|
+
```
|
|
661
|
+
|
|
662
|
+
The `getSpendReport()` method accepts the following parameters:
|
|
663
|
+
|
|
664
|
+
- **startDate** _string_ - Start date in `YYYY-MM-DD` format (inclusive, required)
|
|
665
|
+
- **endDate** _string_ - End date in `YYYY-MM-DD` format (inclusive, required)
|
|
666
|
+
- **groupBy** _string_ - Aggregation dimension: `'day'` (default), `'user'`, `'model'`, `'tag'`, `'provider'`, or `'credential_type'`
|
|
667
|
+
- **datePart** _string_ - Time granularity when `groupBy` is `'day'`: `'day'` or `'hour'`
|
|
668
|
+
- **userId** _string_ - Filter to a specific user
|
|
669
|
+
- **model** _string_ - Filter to a specific model (e.g. `'anthropic/claude-sonnet-4.5'`)
|
|
670
|
+
- **provider** _string_ - Filter to a specific provider (e.g. `'anthropic'`)
|
|
671
|
+
- **credentialType** _string_ - Filter by `'byok'` or `'system'` credentials
|
|
672
|
+
- **tags** _string[]_ - Filter to requests matching these tags
|
|
673
|
+
|
|
674
|
+
Each row in `results` contains a grouping field (matching your `groupBy` choice) and metrics:
|
|
675
|
+
|
|
676
|
+
- **totalCost** _number_ - Total cost in USD
|
|
677
|
+
- **marketCost** _number_ - Market cost in USD
|
|
678
|
+
- **inputTokens** _number_ - Number of input tokens
|
|
679
|
+
- **outputTokens** _number_ - Number of output tokens
|
|
680
|
+
- **cachedInputTokens** _number_ - Number of cached input tokens
|
|
681
|
+
- **cacheCreationInputTokens** _number_ - Number of cache creation input tokens
|
|
682
|
+
- **reasoningTokens** _number_ - Number of reasoning tokens
|
|
683
|
+
- **requestCount** _number_ - Number of requests
|
|
684
|
+
|
|
685
|
+
You can combine tracking and querying to analyze spend by tags you defined:
|
|
686
|
+
|
|
687
|
+
```ts
|
|
688
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
689
|
+
import { gateway, streamText } from 'ai';
|
|
690
|
+
|
|
691
|
+
// 1. Make requests with tags
|
|
692
|
+
const result = streamText({
|
|
693
|
+
model: gateway('anthropic/claude-haiku-4.5'),
|
|
694
|
+
prompt: 'Summarize this quarter's results',
|
|
695
|
+
providerOptions: {
|
|
696
|
+
gateway: {
|
|
697
|
+
tags: ['team:finance', 'feature:summaries'],
|
|
698
|
+
} satisfies GatewayProviderOptions,
|
|
699
|
+
},
|
|
700
|
+
});
|
|
701
|
+
|
|
702
|
+
// 2. Later, query spend filtered by those tags
|
|
703
|
+
const report = await gateway.getSpendReport({
|
|
704
|
+
startDate: '2026-03-01',
|
|
705
|
+
endDate: '2026-03-31',
|
|
706
|
+
groupBy: 'tag',
|
|
707
|
+
tags: ['team:finance'],
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
for (const row of report.results) {
|
|
711
|
+
console.log(`${row.tag}: $${row.totalCost.toFixed(4)} (${row.requestCount} requests)`);
|
|
712
|
+
}
|
|
713
|
+
```
|
|
714
|
+
|
|
562
715
|
## Provider Options
|
|
563
716
|
|
|
564
717
|
The AI Gateway provider accepts provider options that control routing behavior and provider-specific configurations.
|
|
@@ -568,17 +721,17 @@ The AI Gateway provider accepts provider options that control routing behavior a
|
|
|
568
721
|
You can use the `gateway` key in `providerOptions` to control how AI Gateway routes requests:
|
|
569
722
|
|
|
570
723
|
```ts
|
|
571
|
-
import type {
|
|
724
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
572
725
|
import { generateText } from 'ai';
|
|
573
726
|
|
|
574
727
|
const { text } = await generateText({
|
|
575
|
-
model: 'anthropic/claude-sonnet-4',
|
|
728
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
576
729
|
prompt: 'Explain quantum computing',
|
|
577
730
|
providerOptions: {
|
|
578
731
|
gateway: {
|
|
579
732
|
order: ['vertex', 'anthropic'], // Try Vertex AI first, then Anthropic
|
|
580
733
|
only: ['vertex', 'anthropic'], // Only use these providers
|
|
581
|
-
} satisfies
|
|
734
|
+
} satisfies GatewayProviderOptions,
|
|
582
735
|
},
|
|
583
736
|
});
|
|
584
737
|
```
|
|
@@ -601,7 +754,7 @@ The following gateway provider options are available:
|
|
|
601
754
|
|
|
602
755
|
Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the `model` parameter), then try each model in this array in order until one succeeds.
|
|
603
756
|
|
|
604
|
-
Example: `models: ['openai/gpt-5-nano', 'gemini-
|
|
757
|
+
Example: `models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview']` will try the fallback models in order if the primary model fails.
|
|
605
758
|
|
|
606
759
|
- **user** _string_
|
|
607
760
|
|
|
@@ -629,7 +782,12 @@ The following gateway provider options are available:
|
|
|
629
782
|
|
|
630
783
|
- **zeroDataRetention** _boolean_
|
|
631
784
|
|
|
632
|
-
Restricts routing requests to providers that have zero data retention
|
|
785
|
+
Restricts routing requests to providers that have zero data retention agreements with Vercel for AI Gateway. If there are no providers available for the model with zero data retention, the request will fail. BYOK credentials are skipped when `zeroDataRetention` is set to `true` to ensure that requests are only routed to providers that support ZDR compliance. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
|
|
786
|
+
|
|
787
|
+
- **disallowPromptTraining** _boolean_
|
|
788
|
+
|
|
789
|
+
Restricts routing requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. If there are no providers available for the model that disallow prompt training, the request will fail. BYOK credentials are skipped when `disallowPromptTraining` is set to `true` to ensure that requests are only routed to providers that do not train on prompt data.
|
|
790
|
+
|
|
633
791
|
|
|
634
792
|
- **providerTimeouts** _object_
|
|
635
793
|
|
|
@@ -642,17 +800,17 @@ The following gateway provider options are available:
|
|
|
642
800
|
You can combine these options to have fine-grained control over routing and tracking:
|
|
643
801
|
|
|
644
802
|
```ts
|
|
645
|
-
import type {
|
|
803
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
646
804
|
import { generateText } from 'ai';
|
|
647
805
|
|
|
648
806
|
const { text } = await generateText({
|
|
649
|
-
model: 'anthropic/claude-sonnet-4',
|
|
807
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
650
808
|
prompt: 'Write a haiku about programming',
|
|
651
809
|
providerOptions: {
|
|
652
810
|
gateway: {
|
|
653
811
|
order: ['vertex'], // Prefer Vertex AI
|
|
654
812
|
only: ['anthropic', 'vertex'], // Only allow these providers
|
|
655
|
-
} satisfies
|
|
813
|
+
} satisfies GatewayProviderOptions,
|
|
656
814
|
},
|
|
657
815
|
});
|
|
658
816
|
```
|
|
@@ -662,43 +820,60 @@ const { text } = await generateText({
|
|
|
662
820
|
The `models` option enables automatic fallback to alternative models when the primary model fails:
|
|
663
821
|
|
|
664
822
|
```ts
|
|
665
|
-
import type {
|
|
823
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
666
824
|
import { generateText } from 'ai';
|
|
667
825
|
|
|
668
826
|
const { text } = await generateText({
|
|
669
|
-
model: 'openai/gpt-
|
|
827
|
+
model: 'openai/gpt-5.4', // Primary model
|
|
670
828
|
prompt: 'Write a TypeScript haiku',
|
|
671
829
|
providerOptions: {
|
|
672
830
|
gateway: {
|
|
673
|
-
models: ['openai/gpt-5-nano', 'gemini-
|
|
674
|
-
} satisfies
|
|
831
|
+
models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview'], // Fallback models
|
|
832
|
+
} satisfies GatewayProviderOptions,
|
|
675
833
|
},
|
|
676
834
|
});
|
|
677
835
|
|
|
678
836
|
// This will:
|
|
679
|
-
// 1. Try openai/gpt-
|
|
680
|
-
// 2. If it fails, try openai/gpt-5-nano
|
|
681
|
-
// 3. If that fails, try gemini-
|
|
837
|
+
// 1. Try openai/gpt-5.4 first
|
|
838
|
+
// 2. If it fails, try openai/gpt-5.4-nano
|
|
839
|
+
// 3. If that fails, try gemini-3-flash-preview
|
|
682
840
|
// 4. Return the result from the first model that succeeds
|
|
683
841
|
```
|
|
684
842
|
|
|
685
843
|
#### Zero Data Retention Example
|
|
686
844
|
|
|
687
|
-
Set `zeroDataRetention` to true to ensure requests are only routed to providers
|
|
688
|
-
that have zero data retention policies. When `zeroDataRetention` is `false` or not
|
|
689
|
-
specified, there is no enforcement of restricting routing.
|
|
845
|
+
Set `zeroDataRetention` to true to route requests to providers that have zero data retention agreements with Vercel for AI Gateway. If there are no providers available for the model with zero data retention, the request will fail. When `zeroDataRetention` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `zeroDataRetention` is set to `true` to ensure that requests are only routed to providers that support ZDR compliance. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
|
|
690
846
|
|
|
691
847
|
```ts
|
|
692
|
-
import type {
|
|
848
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
693
849
|
import { generateText } from 'ai';
|
|
694
850
|
|
|
695
851
|
const { text } = await generateText({
|
|
696
|
-
model: 'anthropic/claude-sonnet-4.
|
|
852
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
697
853
|
prompt: 'Analyze this sensitive document...',
|
|
698
854
|
providerOptions: {
|
|
699
855
|
gateway: {
|
|
700
856
|
zeroDataRetention: true,
|
|
701
|
-
} satisfies
|
|
857
|
+
} satisfies GatewayProviderOptions,
|
|
858
|
+
},
|
|
859
|
+
});
|
|
860
|
+
```
|
|
861
|
+
|
|
862
|
+
#### Disallow Prompt Training Example
|
|
863
|
+
|
|
864
|
+
Set `disallowPromptTraining` to true to route requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. If there are no providers available for the model that disallow prompt training, the request will fail. When `disallowPromptTraining` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `disallowPromptTraining` is set to `true` to ensure that requests are only routed to providers that do not train on prompt data.
|
|
865
|
+
|
|
866
|
+
```ts
|
|
867
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
868
|
+
import { generateText } from 'ai';
|
|
869
|
+
|
|
870
|
+
const { text } = await generateText({
|
|
871
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
872
|
+
prompt: 'Analyze this proprietary business data...',
|
|
873
|
+
providerOptions: {
|
|
874
|
+
gateway: {
|
|
875
|
+
disallowPromptTraining: true,
|
|
876
|
+
} satisfies GatewayProviderOptions,
|
|
702
877
|
},
|
|
703
878
|
});
|
|
704
879
|
```
|
|
@@ -709,16 +884,16 @@ When using provider-specific options through AI Gateway, use the actual provider
|
|
|
709
884
|
|
|
710
885
|
```ts
|
|
711
886
|
import type { AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
|
|
712
|
-
import type {
|
|
887
|
+
import type { GatewayProviderOptions } from '@ai-sdk/gateway';
|
|
713
888
|
import { generateText } from 'ai';
|
|
714
889
|
|
|
715
890
|
const { text } = await generateText({
|
|
716
|
-
model: 'anthropic/claude-sonnet-4',
|
|
891
|
+
model: 'anthropic/claude-sonnet-4.6',
|
|
717
892
|
prompt: 'Explain quantum computing',
|
|
718
893
|
providerOptions: {
|
|
719
894
|
gateway: {
|
|
720
895
|
order: ['vertex', 'anthropic'],
|
|
721
|
-
} satisfies
|
|
896
|
+
} satisfies GatewayProviderOptions,
|
|
722
897
|
anthropic: {
|
|
723
898
|
thinking: { type: 'enabled', budgetTokens: 12000 },
|
|
724
899
|
} satisfies AnthropicLanguageModelOptions,
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-sdk/gateway",
|
|
3
3
|
"private": false,
|
|
4
|
-
"version": "4.0.0-beta.
|
|
4
|
+
"version": "4.0.0-beta.30",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"sideEffects": false,
|
|
7
7
|
"main": "./dist/index.js",
|
|
@@ -30,9 +30,9 @@
|
|
|
30
30
|
}
|
|
31
31
|
},
|
|
32
32
|
"dependencies": {
|
|
33
|
-
"@vercel/oidc": "3.
|
|
34
|
-
"@ai-sdk/provider": "4.0.0-beta.
|
|
35
|
-
"@ai-sdk/provider-utils": "5.0.0-beta.
|
|
33
|
+
"@vercel/oidc": "3.2.0",
|
|
34
|
+
"@ai-sdk/provider": "4.0.0-beta.5",
|
|
35
|
+
"@ai-sdk/provider-utils": "5.0.0-beta.9"
|
|
36
36
|
},
|
|
37
37
|
"devDependencies": {
|
|
38
38
|
"@types/node": "18.15.11",
|
|
@@ -68,9 +68,7 @@
|
|
|
68
68
|
"build:watch": "pnpm clean && tsup --watch",
|
|
69
69
|
"clean": "del-cli dist docs *.tsbuildinfo",
|
|
70
70
|
"generate-model-settings": "tsx scripts/generate-model-settings.ts",
|
|
71
|
-
"lint": "eslint \"./**/*.ts*\"",
|
|
72
71
|
"type-check": "tsc --build",
|
|
73
|
-
"prettier-check": "prettier --check \"./**/*.ts*\"",
|
|
74
72
|
"test": "pnpm test:node && pnpm test:edge",
|
|
75
73
|
"test:update": "pnpm test:node -u",
|
|
76
74
|
"test:watch": "vitest --config vitest.node.config.js",
|
|
@@ -5,6 +5,7 @@ export type GatewayEmbeddingModelId =
|
|
|
5
5
|
| 'amazon/titan-embed-text-v2'
|
|
6
6
|
| 'cohere/embed-v4.0'
|
|
7
7
|
| 'google/gemini-embedding-001'
|
|
8
|
+
| 'google/gemini-embedding-2'
|
|
8
9
|
| 'google/text-embedding-005'
|
|
9
10
|
| 'google/text-multilingual-embedding-002'
|
|
10
11
|
| 'mistral/codestral-embed'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type {
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
EmbeddingModelV4,
|
|
3
|
+
SharedV4ProviderMetadata,
|
|
4
4
|
} from '@ai-sdk/provider';
|
|
5
5
|
import {
|
|
6
6
|
combineHeaders,
|
|
@@ -17,8 +17,8 @@ import { asGatewayError } from './errors';
|
|
|
17
17
|
import { parseAuthMethod } from './errors/parse-auth-method';
|
|
18
18
|
import type { GatewayConfig } from './gateway-config';
|
|
19
19
|
|
|
20
|
-
export class GatewayEmbeddingModel implements
|
|
21
|
-
readonly specificationVersion = '
|
|
20
|
+
export class GatewayEmbeddingModel implements EmbeddingModelV4 {
|
|
21
|
+
readonly specificationVersion = 'v4';
|
|
22
22
|
readonly maxEmbeddingsPerCall = 2048;
|
|
23
23
|
readonly supportsParallelCalls = true;
|
|
24
24
|
|
|
@@ -39,8 +39,8 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
|
|
|
39
39
|
headers,
|
|
40
40
|
abortSignal,
|
|
41
41
|
providerOptions,
|
|
42
|
-
}: Parameters<
|
|
43
|
-
Awaited<ReturnType<
|
|
42
|
+
}: Parameters<EmbeddingModelV4['doEmbed']>[0]): Promise<
|
|
43
|
+
Awaited<ReturnType<EmbeddingModelV4['doEmbed']>>
|
|
44
44
|
> {
|
|
45
45
|
const resolvedHeaders = await resolve(this.config.headers());
|
|
46
46
|
try {
|
|
@@ -75,7 +75,7 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
|
|
|
75
75
|
embeddings: responseBody.embeddings,
|
|
76
76
|
usage: responseBody.usage ?? undefined,
|
|
77
77
|
providerMetadata:
|
|
78
|
-
responseBody.providerMetadata as unknown as
|
|
78
|
+
responseBody.providerMetadata as unknown as SharedV4ProviderMetadata,
|
|
79
79
|
response: { headers: responseHeaders, body: rawValue },
|
|
80
80
|
warnings: [],
|
|
81
81
|
};
|
|
@@ -90,7 +90,7 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
|
|
|
90
90
|
|
|
91
91
|
private getModelConfigHeaders() {
|
|
92
92
|
return {
|
|
93
|
-
'ai-embedding-model-specification-version': '
|
|
93
|
+
'ai-embedding-model-specification-version': '4',
|
|
94
94
|
'ai-model-id': this.modelId,
|
|
95
95
|
};
|
|
96
96
|
}
|
|
@@ -101,7 +101,7 @@ const gatewayAvailableModelsResponseSchema = lazySchema(() =>
|
|
|
101
101
|
)
|
|
102
102
|
.nullish(),
|
|
103
103
|
specification: z.object({
|
|
104
|
-
specificationVersion: z.literal('
|
|
104
|
+
specificationVersion: z.literal('v4'),
|
|
105
105
|
provider: z.string(),
|
|
106
106
|
modelId: z.string(),
|
|
107
107
|
}),
|