@ai-sdk/gateway 3.0.99 → 3.0.101
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/index.d.mts +4 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.js +17 -2
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +17 -2
- package/dist/index.mjs.map +1 -1
- package/docs/00-ai-gateway.mdx +153 -139
- package/package.json +1 -1
- package/src/gateway-fetch-metadata.ts +45 -35
- package/src/gateway-model-entry.ts +11 -1
- package/src/gateway-provider-options.ts +8 -0
package/docs/00-ai-gateway.mdx
CHANGED
|
@@ -26,21 +26,21 @@ For most use cases, you can use the AI Gateway directly with a model string:
|
|
|
26
26
|
|
|
27
27
|
```ts
|
|
28
28
|
// use plain model string with global provider
|
|
29
|
-
import { generateText } from
|
|
29
|
+
import { generateText } from "ai";
|
|
30
30
|
|
|
31
31
|
const { text } = await generateText({
|
|
32
|
-
model:
|
|
33
|
-
prompt:
|
|
32
|
+
model: "openai/gpt-5.4",
|
|
33
|
+
prompt: "Hello world",
|
|
34
34
|
});
|
|
35
35
|
```
|
|
36
36
|
|
|
37
37
|
```ts
|
|
38
38
|
// use provider instance (requires version 5.0.36 or later)
|
|
39
|
-
import { generateText, gateway } from
|
|
39
|
+
import { generateText, gateway } from "ai";
|
|
40
40
|
|
|
41
41
|
const { text } = await generateText({
|
|
42
|
-
model: gateway(
|
|
43
|
-
prompt:
|
|
42
|
+
model: gateway("openai/gpt-5.4"),
|
|
43
|
+
prompt: "Hello world",
|
|
44
44
|
});
|
|
45
45
|
```
|
|
46
46
|
|
|
@@ -56,7 +56,7 @@ The AI SDK automatically uses the AI Gateway when you pass a model string in the
|
|
|
56
56
|
You can also import the default provider instance `gateway` from `ai`:
|
|
57
57
|
|
|
58
58
|
```ts
|
|
59
|
-
import { gateway } from
|
|
59
|
+
import { gateway } from "ai";
|
|
60
60
|
```
|
|
61
61
|
|
|
62
62
|
You may want to create a custom provider instance when you need to:
|
|
@@ -69,10 +69,10 @@ You may want to create a custom provider instance when you need to:
|
|
|
69
69
|
To create a custom provider instance, import `createGateway` from `ai`:
|
|
70
70
|
|
|
71
71
|
```ts
|
|
72
|
-
import { createGateway } from
|
|
72
|
+
import { createGateway } from "ai";
|
|
73
73
|
|
|
74
74
|
const gateway = createGateway({
|
|
75
|
-
apiKey: process.env.AI_GATEWAY_API_KEY ??
|
|
75
|
+
apiKey: process.env.AI_GATEWAY_API_KEY ?? "",
|
|
76
76
|
});
|
|
77
77
|
```
|
|
78
78
|
|
|
@@ -117,10 +117,10 @@ AI_GATEWAY_API_KEY=your_api_key_here
|
|
|
117
117
|
Or pass it directly to the provider:
|
|
118
118
|
|
|
119
119
|
```ts
|
|
120
|
-
import { createGateway } from
|
|
120
|
+
import { createGateway } from "ai";
|
|
121
121
|
|
|
122
122
|
const gateway = createGateway({
|
|
123
|
-
apiKey:
|
|
123
|
+
apiKey: "your_api_key_here",
|
|
124
124
|
});
|
|
125
125
|
```
|
|
126
126
|
|
|
@@ -166,11 +166,11 @@ Learn more in the [BYOK documentation](https://vercel.com/docs/ai-gateway/byok).
|
|
|
166
166
|
You can create language models using a provider instance. The first argument is the model ID in the format `creator/model-name`:
|
|
167
167
|
|
|
168
168
|
```ts
|
|
169
|
-
import { generateText } from
|
|
169
|
+
import { generateText } from "ai";
|
|
170
170
|
|
|
171
171
|
const { text } = await generateText({
|
|
172
|
-
model:
|
|
173
|
-
prompt:
|
|
172
|
+
model: "openai/gpt-5.4",
|
|
173
|
+
prompt: "Explain quantum computing in simple terms",
|
|
174
174
|
});
|
|
175
175
|
```
|
|
176
176
|
|
|
@@ -181,16 +181,16 @@ AI Gateway language models can also be used in the `streamText` function and sup
|
|
|
181
181
|
You can create reranking models using the `rerankingModel` method on the provider instance:
|
|
182
182
|
|
|
183
183
|
```ts
|
|
184
|
-
import { rerank } from
|
|
185
|
-
import { gateway } from
|
|
184
|
+
import { rerank } from "ai";
|
|
185
|
+
import { gateway } from "@ai-sdk/gateway";
|
|
186
186
|
|
|
187
187
|
const { ranking } = await rerank({
|
|
188
|
-
model: gateway.rerankingModel(
|
|
189
|
-
query:
|
|
188
|
+
model: gateway.rerankingModel("cohere/rerank-v3.5"),
|
|
189
|
+
query: "What is the capital of France?",
|
|
190
190
|
documents: [
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
191
|
+
"Paris is the capital of France.",
|
|
192
|
+
"Berlin is the capital of Germany.",
|
|
193
|
+
"Madrid is the capital of Spain.",
|
|
194
194
|
],
|
|
195
195
|
topN: 2,
|
|
196
196
|
});
|
|
@@ -215,12 +215,12 @@ For the complete list of available models, see the [AI Gateway documentation](ht
|
|
|
215
215
|
You can discover available models programmatically:
|
|
216
216
|
|
|
217
217
|
```ts
|
|
218
|
-
import { gateway, generateText } from
|
|
218
|
+
import { gateway, generateText } from "ai";
|
|
219
219
|
|
|
220
220
|
const availableModels = await gateway.getAvailableModels();
|
|
221
221
|
|
|
222
222
|
// List all available models
|
|
223
|
-
availableModels.models.forEach(model => {
|
|
223
|
+
availableModels.models.forEach((model) => {
|
|
224
224
|
console.log(`${model.id}: ${model.name}`);
|
|
225
225
|
if (model.description) {
|
|
226
226
|
console.log(` Description: ${model.description}`);
|
|
@@ -244,7 +244,7 @@ availableModels.models.forEach(model => {
|
|
|
244
244
|
// Use any discovered model with plain string
|
|
245
245
|
const { text } = await generateText({
|
|
246
246
|
model: availableModels.models[0].id, // e.g., 'openai/gpt-5.4'
|
|
247
|
-
prompt:
|
|
247
|
+
prompt: "Hello world",
|
|
248
248
|
});
|
|
249
249
|
```
|
|
250
250
|
|
|
@@ -253,7 +253,7 @@ const { text } = await generateText({
|
|
|
253
253
|
You can check your team's current credit balance and usage:
|
|
254
254
|
|
|
255
255
|
```ts
|
|
256
|
-
import { gateway } from
|
|
256
|
+
import { gateway } from "ai";
|
|
257
257
|
|
|
258
258
|
const credits = await gateway.getCredits();
|
|
259
259
|
|
|
@@ -273,12 +273,12 @@ Look up detailed information about a specific generation by its ID, including co
|
|
|
273
273
|
When streaming, the generation ID is injected on the first content chunk, so you can capture it early in the stream without waiting for completion. This is especially useful in cases where a network interruption or mid-stream error could prevent you from receiving the final response — since the gateway records the final status server-side, you can use the generation ID to look up the results (including cost, token usage, and finish reason) later via `getGenerationInfo()`.
|
|
274
274
|
|
|
275
275
|
```ts
|
|
276
|
-
import { gateway, generateText } from
|
|
276
|
+
import { gateway, generateText } from "ai";
|
|
277
277
|
|
|
278
278
|
// Make a request
|
|
279
279
|
const result = await generateText({
|
|
280
|
-
model: gateway(
|
|
281
|
-
prompt:
|
|
280
|
+
model: gateway("anthropic/claude-sonnet-4"),
|
|
281
|
+
prompt: "Explain quantum entanglement briefly",
|
|
282
282
|
});
|
|
283
283
|
|
|
284
284
|
// Get the generation ID from provider metadata
|
|
@@ -297,11 +297,11 @@ console.log(`Completion tokens: ${generation.completionTokens}`);
|
|
|
297
297
|
With `streamText`, you can capture the generation ID from the first chunk via `fullStream`:
|
|
298
298
|
|
|
299
299
|
```ts
|
|
300
|
-
import { gateway, streamText } from
|
|
300
|
+
import { gateway, streamText } from "ai";
|
|
301
301
|
|
|
302
302
|
const result = streamText({
|
|
303
|
-
model: gateway(
|
|
304
|
-
prompt:
|
|
303
|
+
model: gateway("anthropic/claude-sonnet-4"),
|
|
304
|
+
prompt: "Explain quantum entanglement briefly",
|
|
305
305
|
});
|
|
306
306
|
|
|
307
307
|
let generationId: string | undefined;
|
|
@@ -351,11 +351,11 @@ It returns a `GatewayGenerationInfo` object with the following fields:
|
|
|
351
351
|
### Basic Text Generation
|
|
352
352
|
|
|
353
353
|
```ts
|
|
354
|
-
import { generateText } from
|
|
354
|
+
import { generateText } from "ai";
|
|
355
355
|
|
|
356
356
|
const { text } = await generateText({
|
|
357
|
-
model:
|
|
358
|
-
prompt:
|
|
357
|
+
model: "anthropic/claude-sonnet-4.6",
|
|
358
|
+
prompt: "Write a haiku about programming",
|
|
359
359
|
});
|
|
360
360
|
|
|
361
361
|
console.log(text);
|
|
@@ -364,11 +364,11 @@ console.log(text);
|
|
|
364
364
|
### Streaming
|
|
365
365
|
|
|
366
366
|
```ts
|
|
367
|
-
import { streamText } from
|
|
367
|
+
import { streamText } from "ai";
|
|
368
368
|
|
|
369
369
|
const { textStream } = await streamText({
|
|
370
|
-
model:
|
|
371
|
-
prompt:
|
|
370
|
+
model: "openai/gpt-5.4",
|
|
371
|
+
prompt: "Explain the benefits of serverless architecture",
|
|
372
372
|
});
|
|
373
373
|
|
|
374
374
|
for await (const textPart of textStream) {
|
|
@@ -379,17 +379,17 @@ for await (const textPart of textStream) {
|
|
|
379
379
|
### Tool Usage
|
|
380
380
|
|
|
381
381
|
```ts
|
|
382
|
-
import { generateText, tool } from
|
|
383
|
-
import { z } from
|
|
382
|
+
import { generateText, tool } from "ai";
|
|
383
|
+
import { z } from "zod";
|
|
384
384
|
|
|
385
385
|
const { text } = await generateText({
|
|
386
|
-
model:
|
|
387
|
-
prompt:
|
|
386
|
+
model: "xai/grok-4",
|
|
387
|
+
prompt: "What is the weather like in San Francisco?",
|
|
388
388
|
tools: {
|
|
389
389
|
getWeather: tool({
|
|
390
|
-
description:
|
|
390
|
+
description: "Get the current weather for a location",
|
|
391
391
|
parameters: z.object({
|
|
392
|
-
location: z.string().describe(
|
|
392
|
+
location: z.string().describe("The location to get weather for"),
|
|
393
393
|
}),
|
|
394
394
|
execute: async ({ location }) => {
|
|
395
395
|
// Your weather API call here
|
|
@@ -405,12 +405,12 @@ const { text } = await generateText({
|
|
|
405
405
|
Some providers offer tools that are executed by the provider itself, such as [OpenAI's web search tool](/providers/ai-sdk-providers/openai#web-search-tool). To use these tools through AI Gateway, import the provider to access the tool definitions:
|
|
406
406
|
|
|
407
407
|
```ts
|
|
408
|
-
import { generateText, stepCountIs } from
|
|
409
|
-
import { openai } from
|
|
408
|
+
import { generateText, stepCountIs } from "ai";
|
|
409
|
+
import { openai } from "@ai-sdk/openai";
|
|
410
410
|
|
|
411
411
|
const result = await generateText({
|
|
412
|
-
model:
|
|
413
|
-
prompt:
|
|
412
|
+
model: "openai/gpt-5.4-mini",
|
|
413
|
+
prompt: "What is the Vercel AI Gateway?",
|
|
414
414
|
stopWhen: stepCountIs(10),
|
|
415
415
|
tools: {
|
|
416
416
|
web_search: openai.tools.webSearch({}),
|
|
@@ -435,43 +435,43 @@ The AI Gateway provider includes built-in tools that are executed by the gateway
|
|
|
435
435
|
The Perplexity Search tool enables models to search the web using [Perplexity's search API](https://docs.perplexity.ai/guides/search-quickstart). This tool is executed by the AI Gateway and returns web search results that the model can use to provide up-to-date information.
|
|
436
436
|
|
|
437
437
|
```ts
|
|
438
|
-
import { gateway, generateText } from
|
|
438
|
+
import { gateway, generateText } from "ai";
|
|
439
439
|
|
|
440
440
|
const result = await generateText({
|
|
441
|
-
model:
|
|
442
|
-
prompt:
|
|
441
|
+
model: "openai/gpt-5.4-nano",
|
|
442
|
+
prompt: "Search for news about AI regulations in January 2025.",
|
|
443
443
|
tools: {
|
|
444
444
|
perplexity_search: gateway.tools.perplexitySearch(),
|
|
445
445
|
},
|
|
446
446
|
});
|
|
447
447
|
|
|
448
448
|
console.log(result.text);
|
|
449
|
-
console.log(
|
|
450
|
-
console.log(
|
|
449
|
+
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
|
|
450
|
+
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));
|
|
451
451
|
```
|
|
452
452
|
|
|
453
453
|
You can also configure the search with optional parameters:
|
|
454
454
|
|
|
455
455
|
```ts
|
|
456
|
-
import { gateway, generateText } from
|
|
456
|
+
import { gateway, generateText } from "ai";
|
|
457
457
|
|
|
458
458
|
const result = await generateText({
|
|
459
|
-
model:
|
|
459
|
+
model: "openai/gpt-5.4-nano",
|
|
460
460
|
prompt:
|
|
461
|
-
|
|
461
|
+
"Search for news about AI regulations from the first week of January 2025.",
|
|
462
462
|
tools: {
|
|
463
463
|
perplexity_search: gateway.tools.perplexitySearch({
|
|
464
464
|
maxResults: 5,
|
|
465
|
-
searchLanguageFilter: [
|
|
466
|
-
country:
|
|
467
|
-
searchDomainFilter: [
|
|
465
|
+
searchLanguageFilter: ["en"],
|
|
466
|
+
country: "US",
|
|
467
|
+
searchDomainFilter: ["reuters.com", "bbc.com", "nytimes.com"],
|
|
468
468
|
}),
|
|
469
469
|
},
|
|
470
470
|
});
|
|
471
471
|
|
|
472
472
|
console.log(result.text);
|
|
473
|
-
console.log(
|
|
474
|
-
console.log(
|
|
473
|
+
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
|
|
474
|
+
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));
|
|
475
475
|
```
|
|
476
476
|
|
|
477
477
|
The Perplexity Search tool supports the following optional configuration options:
|
|
@@ -507,11 +507,11 @@ The Perplexity Search tool supports the following optional configuration options
|
|
|
507
507
|
The tool works with both `generateText` and `streamText`:
|
|
508
508
|
|
|
509
509
|
```ts
|
|
510
|
-
import { gateway, streamText } from
|
|
510
|
+
import { gateway, streamText } from "ai";
|
|
511
511
|
|
|
512
512
|
const result = streamText({
|
|
513
|
-
model:
|
|
514
|
-
prompt:
|
|
513
|
+
model: "openai/gpt-5.4-nano",
|
|
514
|
+
prompt: "Search for the latest news about AI regulations.",
|
|
515
515
|
tools: {
|
|
516
516
|
perplexity_search: gateway.tools.perplexitySearch(),
|
|
517
517
|
},
|
|
@@ -519,14 +519,14 @@ const result = streamText({
|
|
|
519
519
|
|
|
520
520
|
for await (const part of result.fullStream) {
|
|
521
521
|
switch (part.type) {
|
|
522
|
-
case
|
|
522
|
+
case "text-delta":
|
|
523
523
|
process.stdout.write(part.text);
|
|
524
524
|
break;
|
|
525
|
-
case
|
|
526
|
-
console.log(
|
|
525
|
+
case "tool-call":
|
|
526
|
+
console.log("\nTool call:", JSON.stringify(part, null, 2));
|
|
527
527
|
break;
|
|
528
|
-
case
|
|
529
|
-
console.log(
|
|
528
|
+
case "tool-result":
|
|
529
|
+
console.log("\nTool result:", JSON.stringify(part, null, 2));
|
|
530
530
|
break;
|
|
531
531
|
}
|
|
532
532
|
}
|
|
@@ -537,35 +537,35 @@ for await (const part of result.fullStream) {
|
|
|
537
537
|
The Parallel Search tool enables models to search the web using [Parallel AI's Search API](https://docs.parallel.ai/api-reference/search-beta/search). This tool is optimized for LLM consumption, returning relevant excerpts from web pages that can replace multiple keyword searches with a single call.
|
|
538
538
|
|
|
539
539
|
```ts
|
|
540
|
-
import { gateway, generateText } from
|
|
540
|
+
import { gateway, generateText } from "ai";
|
|
541
541
|
|
|
542
542
|
const result = await generateText({
|
|
543
|
-
model:
|
|
544
|
-
prompt:
|
|
543
|
+
model: "openai/gpt-5.4-nano",
|
|
544
|
+
prompt: "Research the latest developments in quantum computing.",
|
|
545
545
|
tools: {
|
|
546
546
|
parallel_search: gateway.tools.parallelSearch(),
|
|
547
547
|
},
|
|
548
548
|
});
|
|
549
549
|
|
|
550
550
|
console.log(result.text);
|
|
551
|
-
console.log(
|
|
552
|
-
console.log(
|
|
551
|
+
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
|
|
552
|
+
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));
|
|
553
553
|
```
|
|
554
554
|
|
|
555
555
|
You can also configure the search with optional parameters:
|
|
556
556
|
|
|
557
557
|
```ts
|
|
558
|
-
import { gateway, generateText } from
|
|
558
|
+
import { gateway, generateText } from "ai";
|
|
559
559
|
|
|
560
560
|
const result = await generateText({
|
|
561
|
-
model:
|
|
562
|
-
prompt:
|
|
561
|
+
model: "openai/gpt-5.4-nano",
|
|
562
|
+
prompt: "Find detailed information about TypeScript 5.0 features.",
|
|
563
563
|
tools: {
|
|
564
564
|
parallel_search: gateway.tools.parallelSearch({
|
|
565
|
-
mode:
|
|
565
|
+
mode: "agentic",
|
|
566
566
|
maxResults: 5,
|
|
567
567
|
sourcePolicy: {
|
|
568
|
-
includeDomains: [
|
|
568
|
+
includeDomains: ["typescriptlang.org", "github.com"],
|
|
569
569
|
},
|
|
570
570
|
excerpts: {
|
|
571
571
|
maxCharsPerResult: 8000,
|
|
@@ -575,8 +575,8 @@ const result = await generateText({
|
|
|
575
575
|
});
|
|
576
576
|
|
|
577
577
|
console.log(result.text);
|
|
578
|
-
console.log(
|
|
579
|
-
console.log(
|
|
578
|
+
console.log("Tool calls:", JSON.stringify(result.toolCalls, null, 2));
|
|
579
|
+
console.log("Tool results:", JSON.stringify(result.toolResults, null, 2));
|
|
580
580
|
```
|
|
581
581
|
|
|
582
582
|
The Parallel Search tool supports the following optional configuration options:
|
|
@@ -616,11 +616,11 @@ The Parallel Search tool supports the following optional configuration options:
|
|
|
616
616
|
The tool works with both `generateText` and `streamText`:
|
|
617
617
|
|
|
618
618
|
```ts
|
|
619
|
-
import { gateway, streamText } from
|
|
619
|
+
import { gateway, streamText } from "ai";
|
|
620
620
|
|
|
621
621
|
const result = streamText({
|
|
622
|
-
model:
|
|
623
|
-
prompt:
|
|
622
|
+
model: "openai/gpt-5.4-nano",
|
|
623
|
+
prompt: "Research the latest AI safety guidelines.",
|
|
624
624
|
tools: {
|
|
625
625
|
parallel_search: gateway.tools.parallelSearch(),
|
|
626
626
|
},
|
|
@@ -628,14 +628,14 @@ const result = streamText({
|
|
|
628
628
|
|
|
629
629
|
for await (const part of result.fullStream) {
|
|
630
630
|
switch (part.type) {
|
|
631
|
-
case
|
|
631
|
+
case "text-delta":
|
|
632
632
|
process.stdout.write(part.text);
|
|
633
633
|
break;
|
|
634
|
-
case
|
|
635
|
-
console.log(
|
|
634
|
+
case "tool-call":
|
|
635
|
+
console.log("\nTool call:", JSON.stringify(part, null, 2));
|
|
636
636
|
break;
|
|
637
|
-
case
|
|
638
|
-
console.log(
|
|
637
|
+
case "tool-result":
|
|
638
|
+
console.log("\nTool result:", JSON.stringify(part, null, 2));
|
|
639
639
|
break;
|
|
640
640
|
}
|
|
641
641
|
}
|
|
@@ -646,16 +646,16 @@ for await (const part of result.fullStream) {
|
|
|
646
646
|
Track usage per end-user and categorize requests with tags:
|
|
647
647
|
|
|
648
648
|
```ts
|
|
649
|
-
import type { GatewayProviderOptions } from
|
|
650
|
-
import { generateText } from
|
|
649
|
+
import type { GatewayProviderOptions } from "@ai-sdk/gateway";
|
|
650
|
+
import { generateText } from "ai";
|
|
651
651
|
|
|
652
652
|
const { text } = await generateText({
|
|
653
|
-
model:
|
|
654
|
-
prompt:
|
|
653
|
+
model: "openai/gpt-5.4",
|
|
654
|
+
prompt: "Summarize this document...",
|
|
655
655
|
providerOptions: {
|
|
656
656
|
gateway: {
|
|
657
|
-
user:
|
|
658
|
-
tags: [
|
|
657
|
+
user: "user-abc-123", // Track usage for this specific end-user
|
|
658
|
+
tags: ["document-summary", "premium-feature"], // Categorize for reporting
|
|
659
659
|
} satisfies GatewayProviderOptions,
|
|
660
660
|
},
|
|
661
661
|
});
|
|
@@ -672,12 +672,12 @@ This allows you to:
|
|
|
672
672
|
Use the `getSpendReport()` method to query usage data programmatically. The reporting API is only available for Vercel Pro and Enterprise plans. For pricing, see the [Custom Reporting docs](https://vercel.com/docs/ai-gateway/capabilities/custom-reporting).
|
|
673
673
|
|
|
674
674
|
```ts
|
|
675
|
-
import { gateway } from
|
|
675
|
+
import { gateway } from "ai";
|
|
676
676
|
|
|
677
677
|
const report = await gateway.getSpendReport({
|
|
678
|
-
startDate:
|
|
679
|
-
endDate:
|
|
680
|
-
groupBy:
|
|
678
|
+
startDate: "2026-03-01",
|
|
679
|
+
endDate: "2026-03-25",
|
|
680
|
+
groupBy: "model",
|
|
681
681
|
});
|
|
682
682
|
|
|
683
683
|
for (const row of report.results) {
|
|
@@ -747,16 +747,16 @@ The AI Gateway provider accepts provider options that control routing behavior a
|
|
|
747
747
|
You can use the `gateway` key in `providerOptions` to control how AI Gateway routes requests:
|
|
748
748
|
|
|
749
749
|
```ts
|
|
750
|
-
import type { GatewayProviderOptions } from
|
|
751
|
-
import { generateText } from
|
|
750
|
+
import type { GatewayProviderOptions } from "@ai-sdk/gateway";
|
|
751
|
+
import { generateText } from "ai";
|
|
752
752
|
|
|
753
753
|
const { text } = await generateText({
|
|
754
|
-
model:
|
|
755
|
-
prompt:
|
|
754
|
+
model: "anthropic/claude-sonnet-4.6",
|
|
755
|
+
prompt: "Explain quantum computing",
|
|
756
756
|
providerOptions: {
|
|
757
757
|
gateway: {
|
|
758
|
-
order: [
|
|
759
|
-
only: [
|
|
758
|
+
order: ["vertex", "anthropic"], // Try Vertex AI first, then Anthropic
|
|
759
|
+
only: ["vertex", "anthropic"], // Only use these providers
|
|
760
760
|
} satisfies GatewayProviderOptions,
|
|
761
761
|
},
|
|
762
762
|
});
|
|
@@ -776,6 +776,20 @@ The following gateway provider options are available:
|
|
|
776
776
|
|
|
777
777
|
Example: `only: ['anthropic', 'vertex']` will only allow routing to Anthropic or Vertex AI.
|
|
778
778
|
|
|
779
|
+
- **sort** _'cost' | 'ttft' | 'tps'_
|
|
780
|
+
|
|
781
|
+
Sorts available providers by a performance or cost metric before routing. The gateway will try the best-scoring provider first and fall back through the rest in sorted order. If unspecified, providers are ordered using the gateway's default system ranking.
|
|
782
|
+
|
|
783
|
+
- `'cost'` — lowest cost first
|
|
784
|
+
- `'ttft'` — lowest time-to-first-token first
|
|
785
|
+
- `'tps'` — highest tokens-per-second first
|
|
786
|
+
|
|
787
|
+
When combined with `order`, the user-specified providers are promoted to the front while remaining providers follow the sorted order.
|
|
788
|
+
|
|
789
|
+
Example: `sort: 'ttft'` will route to the provider with the fastest time-to-first-token.
|
|
790
|
+
|
|
791
|
+
When `sort` is active, the response's `providerMetadata.gateway.routing.sort` object contains the sort option used, the resulting execution order, per-provider metric values, and any providers that were deprioritized.
|
|
792
|
+
|
|
779
793
|
- **models** _string[]_
|
|
780
794
|
|
|
781
795
|
Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the `model` parameter), then try each model in this array in order until one succeeds.
|
|
@@ -833,16 +847,16 @@ The following gateway provider options are available:
|
|
|
833
847
|
You can combine these options to have fine-grained control over routing and tracking:
|
|
834
848
|
|
|
835
849
|
```ts
|
|
836
|
-
import type { GatewayProviderOptions } from
|
|
837
|
-
import { generateText } from
|
|
850
|
+
import type { GatewayProviderOptions } from "@ai-sdk/gateway";
|
|
851
|
+
import { generateText } from "ai";
|
|
838
852
|
|
|
839
853
|
const { text } = await generateText({
|
|
840
|
-
model:
|
|
841
|
-
prompt:
|
|
854
|
+
model: "anthropic/claude-sonnet-4.6",
|
|
855
|
+
prompt: "Write a haiku about programming",
|
|
842
856
|
providerOptions: {
|
|
843
857
|
gateway: {
|
|
844
|
-
order: [
|
|
845
|
-
only: [
|
|
858
|
+
order: ["vertex"], // Prefer Vertex AI
|
|
859
|
+
only: ["anthropic", "vertex"], // Only allow these providers
|
|
846
860
|
} satisfies GatewayProviderOptions,
|
|
847
861
|
},
|
|
848
862
|
});
|
|
@@ -853,15 +867,15 @@ const { text } = await generateText({
|
|
|
853
867
|
The `models` option enables automatic fallback to alternative models when the primary model fails:
|
|
854
868
|
|
|
855
869
|
```ts
|
|
856
|
-
import type { GatewayProviderOptions } from
|
|
857
|
-
import { generateText } from
|
|
870
|
+
import type { GatewayProviderOptions } from "@ai-sdk/gateway";
|
|
871
|
+
import { generateText } from "ai";
|
|
858
872
|
|
|
859
873
|
const { text } = await generateText({
|
|
860
|
-
model:
|
|
861
|
-
prompt:
|
|
874
|
+
model: "openai/gpt-5.4", // Primary model
|
|
875
|
+
prompt: "Write a TypeScript haiku",
|
|
862
876
|
providerOptions: {
|
|
863
877
|
gateway: {
|
|
864
|
-
models: [
|
|
878
|
+
models: ["openai/gpt-5.4-nano", "gemini-3-flash-preview"], // Fallback models
|
|
865
879
|
} satisfies GatewayProviderOptions,
|
|
866
880
|
},
|
|
867
881
|
});
|
|
@@ -878,12 +892,12 @@ const { text } = await generateText({
|
|
|
878
892
|
Set `zeroDataRetention` to true to route requests to providers that have zero data retention agreements with Vercel for AI Gateway. If there are no providers available for the model with zero data retention, the request will fail. When `zeroDataRetention` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `zeroDataRetention` is set to `true` to ensure that requests are only routed to providers that support ZDR compliance. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
|
|
879
893
|
|
|
880
894
|
```ts
|
|
881
|
-
import type { GatewayProviderOptions } from
|
|
882
|
-
import { generateText } from
|
|
895
|
+
import type { GatewayProviderOptions } from "@ai-sdk/gateway";
|
|
896
|
+
import { generateText } from "ai";
|
|
883
897
|
|
|
884
898
|
const { text } = await generateText({
|
|
885
|
-
model:
|
|
886
|
-
prompt:
|
|
899
|
+
model: "anthropic/claude-sonnet-4.6",
|
|
900
|
+
prompt: "Analyze this sensitive document...",
|
|
887
901
|
providerOptions: {
|
|
888
902
|
gateway: {
|
|
889
903
|
zeroDataRetention: true,
|
|
@@ -897,12 +911,12 @@ const { text } = await generateText({
|
|
|
897
911
|
Set `disallowPromptTraining` to true to route requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. If there are no providers available for the model that disallow prompt training, the request will fail. When `disallowPromptTraining` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `disallowPromptTraining` is set to `true` to ensure that requests are only routed to providers that do not train on prompt data.
|
|
898
912
|
|
|
899
913
|
```ts
|
|
900
|
-
import type { GatewayProviderOptions } from
|
|
901
|
-
import { generateText } from
|
|
914
|
+
import type { GatewayProviderOptions } from "@ai-sdk/gateway";
|
|
915
|
+
import { generateText } from "ai";
|
|
902
916
|
|
|
903
917
|
const { text } = await generateText({
|
|
904
|
-
model:
|
|
905
|
-
prompt:
|
|
918
|
+
model: "anthropic/claude-sonnet-4.6",
|
|
919
|
+
prompt: "Analyze this proprietary business data...",
|
|
906
920
|
providerOptions: {
|
|
907
921
|
gateway: {
|
|
908
922
|
disallowPromptTraining: true,
|
|
@@ -916,12 +930,12 @@ const { text } = await generateText({
|
|
|
916
930
|
Set `hipaaCompliant` to true to route requests only to models or tools by providers that have signed a BAA with Vercel for the use of AI Gateway. If the model or tool does not have a HIPAA-compliant provider, the request will fail. When `hipaaCompliant` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `hipaaCompliant` is set to `true` to ensure that requests are only routed to providers that support HIPAA compliance.
|
|
917
931
|
|
|
918
932
|
```ts
|
|
919
|
-
import type { GatewayProviderOptions } from
|
|
920
|
-
import { generateText } from
|
|
933
|
+
import type { GatewayProviderOptions } from "@ai-sdk/gateway";
|
|
934
|
+
import { generateText } from "ai";
|
|
921
935
|
|
|
922
936
|
const { text } = await generateText({
|
|
923
|
-
model:
|
|
924
|
-
prompt:
|
|
937
|
+
model: "anthropic/claude-sonnet-4.6",
|
|
938
|
+
prompt: "Analyze this patient data...",
|
|
925
939
|
providerOptions: {
|
|
926
940
|
gateway: {
|
|
927
941
|
hipaaCompliant: true,
|
|
@@ -935,15 +949,15 @@ const { text } = await generateText({
|
|
|
935
949
|
Set `quotaEntityId` to track and enforce quota against a specific entity. This is useful for multi-tenant applications where you need to manage quota at the entity level (e.g., per organization or team).
|
|
936
950
|
|
|
937
951
|
```ts
|
|
938
|
-
import type { GatewayProviderOptions } from
|
|
939
|
-
import { generateText } from
|
|
952
|
+
import type { GatewayProviderOptions } from "@ai-sdk/gateway";
|
|
953
|
+
import { generateText } from "ai";
|
|
940
954
|
|
|
941
955
|
const { text } = await generateText({
|
|
942
|
-
model:
|
|
943
|
-
prompt:
|
|
956
|
+
model: "anthropic/claude-sonnet-4.6",
|
|
957
|
+
prompt: "Summarize this report...",
|
|
944
958
|
providerOptions: {
|
|
945
959
|
gateway: {
|
|
946
|
-
quotaEntityId:
|
|
960
|
+
quotaEntityId: "org-123",
|
|
947
961
|
} satisfies GatewayProviderOptions,
|
|
948
962
|
},
|
|
949
963
|
});
|
|
@@ -954,19 +968,19 @@ const { text } = await generateText({
|
|
|
954
968
|
When using provider-specific options through AI Gateway, use the actual provider name (e.g. `anthropic`, `openai`, not `gateway`) as the key:
|
|
955
969
|
|
|
956
970
|
```ts
|
|
957
|
-
import type { AnthropicLanguageModelOptions } from
|
|
958
|
-
import type { GatewayProviderOptions } from
|
|
959
|
-
import { generateText } from
|
|
971
|
+
import type { AnthropicLanguageModelOptions } from "@ai-sdk/anthropic";
|
|
972
|
+
import type { GatewayProviderOptions } from "@ai-sdk/gateway";
|
|
973
|
+
import { generateText } from "ai";
|
|
960
974
|
|
|
961
975
|
const { text } = await generateText({
|
|
962
|
-
model:
|
|
963
|
-
prompt:
|
|
976
|
+
model: "anthropic/claude-sonnet-4.6",
|
|
977
|
+
prompt: "Explain quantum computing",
|
|
964
978
|
providerOptions: {
|
|
965
979
|
gateway: {
|
|
966
|
-
order: [
|
|
980
|
+
order: ["vertex", "anthropic"],
|
|
967
981
|
} satisfies GatewayProviderOptions,
|
|
968
982
|
anthropic: {
|
|
969
|
-
thinking: { type:
|
|
983
|
+
thinking: { type: "enabled", budgetTokens: 12000 },
|
|
970
984
|
} satisfies AnthropicLanguageModelOptions,
|
|
971
985
|
},
|
|
972
986
|
});
|