@ai-sdk/gateway 4.0.0-beta.3 → 4.0.0-beta.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,7 +29,7 @@ For most use cases, you can use the AI Gateway directly with a model string:
29
29
  import { generateText } from 'ai';
30
30
 
31
31
  const { text } = await generateText({
32
- model: 'openai/gpt-5',
32
+ model: 'openai/gpt-5.4',
33
33
  prompt: 'Hello world',
34
34
  });
35
35
  ```
@@ -39,7 +39,7 @@ const { text } = await generateText({
39
39
  import { generateText, gateway } from 'ai';
40
40
 
41
41
  const { text } = await generateText({
42
- model: gateway('openai/gpt-5'),
42
+ model: gateway('openai/gpt-5.4'),
43
43
  prompt: 'Hello world',
44
44
  });
45
45
  ```
@@ -169,7 +169,7 @@ You can create language models using a provider instance. The first argument is
169
169
  import { generateText } from 'ai';
170
170
 
171
171
  const { text } = await generateText({
172
- model: 'openai/gpt-5',
172
+ model: 'openai/gpt-5.4',
173
173
  prompt: 'Explain quantum computing in simple terms',
174
174
  });
175
175
  ```
@@ -215,7 +215,7 @@ availableModels.models.forEach(model => {
215
215
 
216
216
  // Use any discovered model with plain string
217
217
  const { text } = await generateText({
218
- model: availableModels.models[0].id, // e.g., 'openai/gpt-4o'
218
+ model: availableModels.models[0].id, // e.g., 'openai/gpt-5.4'
219
219
  prompt: 'Hello world',
220
220
  });
221
221
  ```
@@ -238,6 +238,86 @@ The `getCredits()` method returns your team's credit information based on the au
238
238
  - **balance** _number_ - Your team's current available credit balance
239
239
  - **total_used** _number_ - Total credits consumed by your team
240
240
 
241
+ ## Generation Lookup
242
+
243
+ Look up detailed information about a specific generation by its ID, including cost, token usage, latency, and provider details. Generation IDs are available in `providerMetadata.gateway.generationId` on both `generateText` and `streamText` responses.
244
+
245
+ When streaming, the generation ID is injected on the first content chunk, so you can capture it early in the stream without waiting for completion. This is especially useful in cases where a network interruption or mid-stream error could prevent you from receiving the final response — since the gateway records the final status server-side, you can use the generation ID to look up the results (including cost, token usage, and finish reason) later via `getGenerationInfo()`.
246
+
247
+ ```ts
248
+ import { gateway, generateText } from 'ai';
249
+
250
+ // Make a request
251
+ const result = await generateText({
252
+ model: gateway('anthropic/claude-sonnet-4'),
253
+ prompt: 'Explain quantum entanglement briefly',
254
+ });
255
+
256
+ // Get the generation ID from provider metadata
257
+ const generationId = result.providerMetadata?.gateway?.generationId;
258
+
259
+ // Look up detailed generation info
260
+ const generation = await gateway.getGenerationInfo({ id: generationId });
261
+
262
+ console.log(`Model: ${generation.model}`);
263
+ console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
264
+ console.log(`Latency: ${generation.latency}ms`);
265
+ console.log(`Prompt tokens: ${generation.promptTokens}`);
266
+ console.log(`Completion tokens: ${generation.completionTokens}`);
267
+ ```
268
+
269
+ With `streamText`, you can capture the generation ID from the first chunk via `fullStream`:
270
+
271
+ ```ts
272
+ import { gateway, streamText } from 'ai';
273
+
274
+ const result = streamText({
275
+ model: gateway('anthropic/claude-sonnet-4'),
276
+ prompt: 'Explain quantum entanglement briefly',
277
+ });
278
+
279
+ let generationId: string | undefined;
280
+
281
+ for await (const part of result.fullStream) {
282
+ if (!generationId && part.providerMetadata?.gateway?.generationId) {
283
+ generationId = part.providerMetadata.gateway.generationId as string;
284
+ console.log(`Generation ID (early): ${generationId}`);
285
+ }
286
+ }
287
+
288
+ // Look up cost and usage after the stream completes
289
+ if (generationId) {
290
+ const generation = await gateway.getGenerationInfo({ id: generationId });
291
+ console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
292
+ console.log(`Finish reason: ${generation.finishReason}`);
293
+ }
294
+ ```
295
+
296
+ The `getGenerationInfo()` method accepts:
297
+
298
+ - **id** _string_ - The generation ID to look up (format: `gen_<ulid>`, required)
299
+
300
+ It returns a `GatewayGenerationInfo` object with the following fields:
301
+
302
+ - **id** _string_ - The generation ID
303
+ - **totalCost** _number_ - Total cost in USD
304
+ - **upstreamInferenceCost** _number_ - Upstream inference cost in USD (relevant for BYOK)
305
+ - **usage** _number_ - Usage cost in USD (same as totalCost)
306
+ - **createdAt** _string_ - ISO 8601 timestamp when the generation was created
307
+ - **model** _string_ - Model identifier used
308
+ - **isByok** _boolean_ - Whether Bring Your Own Key credentials were used
309
+ - **providerName** _string_ - The provider that served this generation
310
+ - **streamed** _boolean_ - Whether streaming was used
311
+ - **finishReason** _string_ - Finish reason (e.g. `'stop'`)
312
+ - **latency** _number_ - Time to first token in milliseconds
313
+ - **generationTime** _number_ - Total generation time in milliseconds
314
+ - **promptTokens** _number_ - Number of prompt tokens
315
+ - **completionTokens** _number_ - Number of completion tokens
316
+ - **reasoningTokens** _number_ - Reasoning tokens used (if applicable)
317
+ - **cachedTokens** _number_ - Cached tokens used (if applicable)
318
+ - **cacheCreationTokens** _number_ - Cache creation input tokens
319
+ - **billableWebSearchCalls** _number_ - Number of billable web search calls
320
+
241
321
  ## Examples
242
322
 
243
323
  ### Basic Text Generation
@@ -246,7 +326,7 @@ The `getCredits()` method returns your team's credit information based on the au
246
326
  import { generateText } from 'ai';
247
327
 
248
328
  const { text } = await generateText({
249
- model: 'anthropic/claude-sonnet-4',
329
+ model: 'anthropic/claude-sonnet-4.6',
250
330
  prompt: 'Write a haiku about programming',
251
331
  });
252
332
 
@@ -259,7 +339,7 @@ console.log(text);
259
339
  import { streamText } from 'ai';
260
340
 
261
341
  const { textStream } = await streamText({
262
- model: 'openai/gpt-5',
342
+ model: 'openai/gpt-5.4',
263
343
  prompt: 'Explain the benefits of serverless architecture',
264
344
  });
265
345
 
@@ -297,13 +377,13 @@ const { text } = await generateText({
297
377
  Some providers offer tools that are executed by the provider itself, such as [OpenAI's web search tool](/providers/ai-sdk-providers/openai#web-search-tool). To use these tools through AI Gateway, import the provider to access the tool definitions:
298
378
 
299
379
  ```ts
300
- import { generateText, stepCountIs } from 'ai';
380
+ import { generateText, isStepCount } from 'ai';
301
381
  import { openai } from '@ai-sdk/openai';
302
382
 
303
383
  const result = await generateText({
304
- model: 'openai/gpt-5-mini',
384
+ model: 'openai/gpt-5.4-mini',
305
385
  prompt: 'What is the Vercel AI Gateway?',
306
- stopWhen: stepCountIs(10),
386
+ stopWhen: isStepCount(10),
307
387
  tools: {
308
388
  web_search: openai.tools.webSearch({}),
309
389
  },
@@ -330,7 +410,7 @@ The Perplexity Search tool enables models to search the web using [Perplexity's
330
410
  import { gateway, generateText } from 'ai';
331
411
 
332
412
  const result = await generateText({
333
- model: 'openai/gpt-5-nano',
413
+ model: 'openai/gpt-5.4-nano',
334
414
  prompt: 'Search for news about AI regulations in January 2025.',
335
415
  tools: {
336
416
  perplexity_search: gateway.tools.perplexitySearch(),
@@ -348,7 +428,7 @@ You can also configure the search with optional parameters:
348
428
  import { gateway, generateText } from 'ai';
349
429
 
350
430
  const result = await generateText({
351
- model: 'openai/gpt-5-nano',
431
+ model: 'openai/gpt-5.4-nano',
352
432
  prompt:
353
433
  'Search for news about AI regulations from the first week of January 2025.',
354
434
  tools: {
@@ -402,7 +482,7 @@ The tool works with both `generateText` and `streamText`:
402
482
  import { gateway, streamText } from 'ai';
403
483
 
404
484
  const result = streamText({
405
- model: 'openai/gpt-5-nano',
485
+ model: 'openai/gpt-5.4-nano',
406
486
  prompt: 'Search for the latest news about AI regulations.',
407
487
  tools: {
408
488
  perplexity_search: gateway.tools.perplexitySearch(),
@@ -432,7 +512,7 @@ The Parallel Search tool enables models to search the web using [Parallel AI's S
432
512
  import { gateway, generateText } from 'ai';
433
513
 
434
514
  const result = await generateText({
435
- model: 'openai/gpt-5-nano',
515
+ model: 'openai/gpt-5.4-nano',
436
516
  prompt: 'Research the latest developments in quantum computing.',
437
517
  tools: {
438
518
  parallel_search: gateway.tools.parallelSearch(),
@@ -450,7 +530,7 @@ You can also configure the search with optional parameters:
450
530
  import { gateway, generateText } from 'ai';
451
531
 
452
532
  const result = await generateText({
453
- model: 'openai/gpt-5-nano',
533
+ model: 'openai/gpt-5.4-nano',
454
534
  prompt: 'Find detailed information about TypeScript 5.0 features.',
455
535
  tools: {
456
536
  parallel_search: gateway.tools.parallelSearch({
@@ -511,7 +591,7 @@ The tool works with both `generateText` and `streamText`:
511
591
  import { gateway, streamText } from 'ai';
512
592
 
513
593
  const result = streamText({
514
- model: 'openai/gpt-5-nano',
594
+ model: 'openai/gpt-5.4-nano',
515
595
  prompt: 'Research the latest AI safety guidelines.',
516
596
  tools: {
517
597
  parallel_search: gateway.tools.parallelSearch(),
@@ -533,22 +613,24 @@ for await (const part of result.fullStream) {
533
613
  }
534
614
  ```
535
615
 
536
- ### Usage Tracking with User and Tags
616
+ ### Custom Reporting
617
+
618
+ Track usage per end-user and categorize requests with tags, then query the data through the reporting API.
537
619
 
538
- Track usage per end-user and categorize requests with tags:
620
+ #### Usage Tracking with User and Tags
539
621
 
540
622
  ```ts
541
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
623
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
542
624
  import { generateText } from 'ai';
543
625
 
544
626
  const { text } = await generateText({
545
- model: 'openai/gpt-5',
627
+ model: 'openai/gpt-5.4',
546
628
  prompt: 'Summarize this document...',
547
629
  providerOptions: {
548
630
  gateway: {
549
631
  user: 'user-abc-123', // Track usage for this specific end-user
550
632
  tags: ['document-summary', 'premium-feature'], // Categorize for reporting
551
- } satisfies GatewayLanguageModelOptions,
633
+ } satisfies GatewayProviderOptions,
552
634
  },
553
635
  });
554
636
  ```
@@ -559,6 +641,77 @@ This allows you to:
559
641
  - Filter and analyze spending by feature or use case using tags
560
642
  - Track which users or features are driving the most AI usage
561
643
 
644
+ #### Querying Spend Reports
645
+
646
+ Use the `getSpendReport()` method to query usage data programmatically. The reporting API is only available for Vercel Pro and Enterprise plans. For pricing, see the [Custom Reporting docs](https://vercel.com/docs/ai-gateway/capabilities/custom-reporting).
647
+
648
+ ```ts
649
+ import { gateway } from 'ai';
650
+
651
+ const report = await gateway.getSpendReport({
652
+ startDate: '2026-03-01',
653
+ endDate: '2026-03-25',
654
+ groupBy: 'model',
655
+ });
656
+
657
+ for (const row of report.results) {
658
+ console.log(`${row.model}: $${row.totalCost.toFixed(4)}`);
659
+ }
660
+ ```
661
+
662
+ The `getSpendReport()` method accepts the following parameters:
663
+
664
+ - **startDate** _string_ - Start date in `YYYY-MM-DD` format (inclusive, required)
665
+ - **endDate** _string_ - End date in `YYYY-MM-DD` format (inclusive, required)
666
+ - **groupBy** _string_ - Aggregation dimension: `'day'` (default), `'user'`, `'model'`, `'tag'`, `'provider'`, or `'credential_type'`
667
+ - **datePart** _string_ - Time granularity when `groupBy` is `'day'`: `'day'` or `'hour'`
668
+ - **userId** _string_ - Filter to a specific user
669
+ - **model** _string_ - Filter to a specific model (e.g. `'anthropic/claude-sonnet-4.5'`)
670
+ - **provider** _string_ - Filter to a specific provider (e.g. `'anthropic'`)
671
+ - **credentialType** _string_ - Filter by `'byok'` or `'system'` credentials
672
+ - **tags** _string[]_ - Filter to requests matching these tags
673
+
674
+ Each row in `results` contains a grouping field (matching your `groupBy` choice) and metrics:
675
+
676
+ - **totalCost** _number_ - Total cost in USD
677
+ - **marketCost** _number_ - Market cost in USD
678
+ - **inputTokens** _number_ - Number of input tokens
679
+ - **outputTokens** _number_ - Number of output tokens
680
+ - **cachedInputTokens** _number_ - Number of cached input tokens
681
+ - **cacheCreationInputTokens** _number_ - Number of cache creation input tokens
682
+ - **reasoningTokens** _number_ - Number of reasoning tokens
683
+ - **requestCount** _number_ - Number of requests
684
+
685
+ You can combine tracking and querying to analyze spend by tags you defined:
686
+
687
+ ```ts
688
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
689
+ import { gateway, streamText } from 'ai';
690
+
691
+ // 1. Make requests with tags
692
+ const result = streamText({
693
+ model: gateway('anthropic/claude-haiku-4.5'),
694
+ prompt: 'Summarize this quarter's results',
695
+ providerOptions: {
696
+ gateway: {
697
+ tags: ['team:finance', 'feature:summaries'],
698
+ } satisfies GatewayProviderOptions,
699
+ },
700
+ });
701
+
702
+ // 2. Later, query spend filtered by those tags
703
+ const report = await gateway.getSpendReport({
704
+ startDate: '2026-03-01',
705
+ endDate: '2026-03-31',
706
+ groupBy: 'tag',
707
+ tags: ['team:finance'],
708
+ });
709
+
710
+ for (const row of report.results) {
711
+ console.log(`${row.tag}: $${row.totalCost.toFixed(4)} (${row.requestCount} requests)`);
712
+ }
713
+ ```
714
+
562
715
  ## Provider Options
563
716
 
564
717
  The AI Gateway provider accepts provider options that control routing behavior and provider-specific configurations.
@@ -568,17 +721,17 @@ The AI Gateway provider accepts provider options that control routing behavior a
568
721
  You can use the `gateway` key in `providerOptions` to control how AI Gateway routes requests:
569
722
 
570
723
  ```ts
571
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
724
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
572
725
  import { generateText } from 'ai';
573
726
 
574
727
  const { text } = await generateText({
575
- model: 'anthropic/claude-sonnet-4',
728
+ model: 'anthropic/claude-sonnet-4.6',
576
729
  prompt: 'Explain quantum computing',
577
730
  providerOptions: {
578
731
  gateway: {
579
732
  order: ['vertex', 'anthropic'], // Try Vertex AI first, then Anthropic
580
733
  only: ['vertex', 'anthropic'], // Only use these providers
581
- } satisfies GatewayLanguageModelOptions,
734
+ } satisfies GatewayProviderOptions,
582
735
  },
583
736
  });
584
737
  ```
@@ -601,7 +754,7 @@ The following gateway provider options are available:
601
754
 
602
755
  Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the `model` parameter), then try each model in this array in order until one succeeds.
603
756
 
604
- Example: `models: ['openai/gpt-5-nano', 'gemini-2.0-flash']` will try the fallback models in order if the primary model fails.
757
+ Example: `models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview']` will try the fallback models in order if the primary model fails.
605
758
 
606
759
  - **user** _string_
607
760
 
@@ -629,7 +782,12 @@ The following gateway provider options are available:
629
782
 
630
783
  - **zeroDataRetention** _boolean_
631
784
 
632
- Restricts routing requests to providers that have zero data retention policies.
785
+ Restricts routing requests to providers that have zero data retention agreements with Vercel for AI Gateway. If there are no providers available for the model with zero data retention, the request will fail. BYOK credentials are skipped when `zeroDataRetention` is set to `true` to ensure that requests are only routed to providers that support ZDR compliance. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
786
+
787
+ - **disallowPromptTraining** _boolean_
788
+
789
+ Restricts routing requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. If there are no providers available for the model that disallow prompt training, the request will fail. BYOK credentials are skipped when `disallowPromptTraining` is set to `true` to ensure that requests are only routed to providers that do not train on prompt data.
790
+
633
791
 
634
792
  - **providerTimeouts** _object_
635
793
 
@@ -642,17 +800,17 @@ The following gateway provider options are available:
642
800
  You can combine these options to have fine-grained control over routing and tracking:
643
801
 
644
802
  ```ts
645
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
803
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
646
804
  import { generateText } from 'ai';
647
805
 
648
806
  const { text } = await generateText({
649
- model: 'anthropic/claude-sonnet-4',
807
+ model: 'anthropic/claude-sonnet-4.6',
650
808
  prompt: 'Write a haiku about programming',
651
809
  providerOptions: {
652
810
  gateway: {
653
811
  order: ['vertex'], // Prefer Vertex AI
654
812
  only: ['anthropic', 'vertex'], // Only allow these providers
655
- } satisfies GatewayLanguageModelOptions,
813
+ } satisfies GatewayProviderOptions,
656
814
  },
657
815
  });
658
816
  ```
@@ -662,43 +820,60 @@ const { text } = await generateText({
662
820
  The `models` option enables automatic fallback to alternative models when the primary model fails:
663
821
 
664
822
  ```ts
665
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
823
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
666
824
  import { generateText } from 'ai';
667
825
 
668
826
  const { text } = await generateText({
669
- model: 'openai/gpt-4o', // Primary model
827
+ model: 'openai/gpt-5.4', // Primary model
670
828
  prompt: 'Write a TypeScript haiku',
671
829
  providerOptions: {
672
830
  gateway: {
673
- models: ['openai/gpt-5-nano', 'gemini-2.0-flash'], // Fallback models
674
- } satisfies GatewayLanguageModelOptions,
831
+ models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview'], // Fallback models
832
+ } satisfies GatewayProviderOptions,
675
833
  },
676
834
  });
677
835
 
678
836
  // This will:
679
- // 1. Try openai/gpt-4o first
680
- // 2. If it fails, try openai/gpt-5-nano
681
- // 3. If that fails, try gemini-2.0-flash
837
+ // 1. Try openai/gpt-5.4 first
838
+ // 2. If it fails, try openai/gpt-5.4-nano
839
+ // 3. If that fails, try gemini-3-flash-preview
682
840
  // 4. Return the result from the first model that succeeds
683
841
  ```
684
842
 
685
843
  #### Zero Data Retention Example
686
844
 
687
- Set `zeroDataRetention` to true to ensure requests are only routed to providers
688
- that have zero data retention policies. When `zeroDataRetention` is `false` or not
689
- specified, there is no enforcement of restricting routing.
845
+ Set `zeroDataRetention` to true to route requests to providers that have zero data retention agreements with Vercel for AI Gateway. If there are no providers available for the model with zero data retention, the request will fail. When `zeroDataRetention` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `zeroDataRetention` is set to `true` to ensure that requests are only routed to providers that support ZDR compliance. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
690
846
 
691
847
  ```ts
692
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
848
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
693
849
  import { generateText } from 'ai';
694
850
 
695
851
  const { text } = await generateText({
696
- model: 'anthropic/claude-sonnet-4.5',
852
+ model: 'anthropic/claude-sonnet-4.6',
697
853
  prompt: 'Analyze this sensitive document...',
698
854
  providerOptions: {
699
855
  gateway: {
700
856
  zeroDataRetention: true,
701
- } satisfies GatewayLanguageModelOptions,
857
+ } satisfies GatewayProviderOptions,
858
+ },
859
+ });
860
+ ```
861
+
862
+ #### Disallow Prompt Training Example
863
+
864
+ Set `disallowPromptTraining` to true to route requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. If there are no providers available for the model that disallow prompt training, the request will fail. When `disallowPromptTraining` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `disallowPromptTraining` is set to `true` to ensure that requests are only routed to providers that do not train on prompt data.
865
+
866
+ ```ts
867
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
868
+ import { generateText } from 'ai';
869
+
870
+ const { text } = await generateText({
871
+ model: 'anthropic/claude-sonnet-4.6',
872
+ prompt: 'Analyze this proprietary business data...',
873
+ providerOptions: {
874
+ gateway: {
875
+ disallowPromptTraining: true,
876
+ } satisfies GatewayProviderOptions,
702
877
  },
703
878
  });
704
879
  ```
@@ -709,16 +884,16 @@ When using provider-specific options through AI Gateway, use the actual provider
709
884
 
710
885
  ```ts
711
886
  import type { AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
712
- import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
887
+ import type { GatewayProviderOptions } from '@ai-sdk/gateway';
713
888
  import { generateText } from 'ai';
714
889
 
715
890
  const { text } = await generateText({
716
- model: 'anthropic/claude-sonnet-4',
891
+ model: 'anthropic/claude-sonnet-4.6',
717
892
  prompt: 'Explain quantum computing',
718
893
  providerOptions: {
719
894
  gateway: {
720
895
  order: ['vertex', 'anthropic'],
721
- } satisfies GatewayLanguageModelOptions,
896
+ } satisfies GatewayProviderOptions,
722
897
  anthropic: {
723
898
  thinking: { type: 'enabled', budgetTokens: 12000 },
724
899
  } satisfies AnthropicLanguageModelOptions,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@ai-sdk/gateway",
3
3
  "private": false,
4
- "version": "4.0.0-beta.3",
4
+ "version": "4.0.0-beta.31",
5
5
  "license": "Apache-2.0",
6
6
  "sideEffects": false,
7
7
  "main": "./dist/index.js",
@@ -30,9 +30,9 @@
30
30
  }
31
31
  },
32
32
  "dependencies": {
33
- "@vercel/oidc": "3.1.0",
34
- "@ai-sdk/provider": "4.0.0-beta.0",
35
- "@ai-sdk/provider-utils": "5.0.0-beta.1"
33
+ "@vercel/oidc": "3.2.0",
34
+ "@ai-sdk/provider": "4.0.0-beta.6",
35
+ "@ai-sdk/provider-utils": "5.0.0-beta.10"
36
36
  },
37
37
  "devDependencies": {
38
38
  "@types/node": "18.15.11",
@@ -68,9 +68,7 @@
68
68
  "build:watch": "pnpm clean && tsup --watch",
69
69
  "clean": "del-cli dist docs *.tsbuildinfo",
70
70
  "generate-model-settings": "tsx scripts/generate-model-settings.ts",
71
- "lint": "eslint \"./**/*.ts*\"",
72
71
  "type-check": "tsc --build",
73
- "prettier-check": "prettier --check \"./**/*.ts*\"",
74
72
  "test": "pnpm test:node && pnpm test:edge",
75
73
  "test:update": "pnpm test:node -u",
76
74
  "test:watch": "vitest --config vitest.node.config.js",
@@ -5,6 +5,7 @@ export type GatewayEmbeddingModelId =
5
5
  | 'amazon/titan-embed-text-v2'
6
6
  | 'cohere/embed-v4.0'
7
7
  | 'google/gemini-embedding-001'
8
+ | 'google/gemini-embedding-2'
8
9
  | 'google/text-embedding-005'
9
10
  | 'google/text-multilingual-embedding-002'
10
11
  | 'mistral/codestral-embed'
@@ -1,6 +1,6 @@
1
1
  import type {
2
- EmbeddingModelV3,
3
- SharedV3ProviderMetadata,
2
+ EmbeddingModelV4,
3
+ SharedV4ProviderMetadata,
4
4
  } from '@ai-sdk/provider';
5
5
  import {
6
6
  combineHeaders,
@@ -17,8 +17,8 @@ import { asGatewayError } from './errors';
17
17
  import { parseAuthMethod } from './errors/parse-auth-method';
18
18
  import type { GatewayConfig } from './gateway-config';
19
19
 
20
- export class GatewayEmbeddingModel implements EmbeddingModelV3 {
21
- readonly specificationVersion = 'v3';
20
+ export class GatewayEmbeddingModel implements EmbeddingModelV4 {
21
+ readonly specificationVersion = 'v4';
22
22
  readonly maxEmbeddingsPerCall = 2048;
23
23
  readonly supportsParallelCalls = true;
24
24
 
@@ -39,8 +39,8 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
39
39
  headers,
40
40
  abortSignal,
41
41
  providerOptions,
42
- }: Parameters<EmbeddingModelV3['doEmbed']>[0]): Promise<
43
- Awaited<ReturnType<EmbeddingModelV3['doEmbed']>>
42
+ }: Parameters<EmbeddingModelV4['doEmbed']>[0]): Promise<
43
+ Awaited<ReturnType<EmbeddingModelV4['doEmbed']>>
44
44
  > {
45
45
  const resolvedHeaders = await resolve(this.config.headers());
46
46
  try {
@@ -75,7 +75,7 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
75
75
  embeddings: responseBody.embeddings,
76
76
  usage: responseBody.usage ?? undefined,
77
77
  providerMetadata:
78
- responseBody.providerMetadata as unknown as SharedV3ProviderMetadata,
78
+ responseBody.providerMetadata as unknown as SharedV4ProviderMetadata,
79
79
  response: { headers: responseHeaders, body: rawValue },
80
80
  warnings: [],
81
81
  };
@@ -90,7 +90,7 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
90
90
 
91
91
  private getModelConfigHeaders() {
92
92
  return {
93
- 'ai-embedding-model-specification-version': '3',
93
+ 'ai-embedding-model-specification-version': '4',
94
94
  'ai-model-id': this.modelId,
95
95
  };
96
96
  }
@@ -101,7 +101,7 @@ const gatewayAvailableModelsResponseSchema = lazySchema(() =>
101
101
  )
102
102
  .nullish(),
103
103
  specification: z.object({
104
- specificationVersion: z.literal('v3'),
104
+ specificationVersion: z.literal('v4'),
105
105
  provider: z.string(),
106
106
  modelId: z.string(),
107
107
  }),