npm - @ai-sdk/gateway - Versions diffs - 4.0.0-beta.3 → 4.0.0-beta.31 - Mend

@ai-sdk/gateway 4.0.0-beta.3 → 4.0.0-beta.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/CHANGELOG.md +189 -4
package/dist/index.d.mts +131 -21
package/dist/index.d.ts +131 -21
package/dist/index.js +359 -144
package/dist/index.js.map +1 -1
package/dist/index.mjs +415 -186
package/dist/index.mjs.map +1 -1
package/docs/00-ai-gateway.mdx +219 -44
package/package.json +4 -6
package/src/gateway-embedding-model-settings.ts +1 -0
package/src/gateway-embedding-model.ts +8 -8
package/src/gateway-fetch-metadata.ts +1 -1
package/src/gateway-generation-info.ts +147 -0
package/src/gateway-image-model-settings.ts +6 -0
package/src/gateway-image-model.ts +10 -10
package/src/gateway-language-model-settings.ts +18 -6
package/src/gateway-language-model.ts +19 -19
package/src/gateway-model-entry.ts +2 -2
package/src/gateway-provider-options.ts +8 -4
package/src/gateway-provider.ts +75 -17
package/src/gateway-spend-report.ts +191 -0
package/src/gateway-video-model.ts +15 -15
package/src/index.ts +12 -3

package/docs/00-ai-gateway.mdx CHANGED Viewed

@@ -29,7 +29,7 @@ For most use cases, you can use the AI Gateway directly with a model string:
 import { generateText } from 'ai';
 const { text } = await generateText({
-  model: 'openai/gpt-5',
+  model: 'openai/gpt-5.4',
   prompt: 'Hello world',
 });
 ```
@@ -39,7 +39,7 @@ const { text } = await generateText({
 import { generateText, gateway } from 'ai';
 const { text } = await generateText({
-  model: gateway('openai/gpt-5'),
+  model: gateway('openai/gpt-5.4'),
   prompt: 'Hello world',
 });
 ```
@@ -169,7 +169,7 @@ You can create language models using a provider instance. The first argument is
 import { generateText } from 'ai';
 const { text } = await generateText({
-  model: 'openai/gpt-5',
+  model: 'openai/gpt-5.4',
   prompt: 'Explain quantum computing in simple terms',
 });
 ```
@@ -215,7 +215,7 @@ availableModels.models.forEach(model => {
 // Use any discovered model with plain string
 const { text } = await generateText({
-  model: availableModels.models[0].id, // e.g., 'openai/gpt-4o'
+  model: availableModels.models[0].id, // e.g., 'openai/gpt-5.4'
   prompt: 'Hello world',
 });
 ```
@@ -238,6 +238,86 @@ The `getCredits()` method returns your team's credit information based on the au
 - **balance** _number_ - Your team's current available credit balance
 - **total_used** _number_ - Total credits consumed by your team
+## Generation Lookup
+Look up detailed information about a specific generation by its ID, including cost, token usage, latency, and provider details. Generation IDs are available in `providerMetadata.gateway.generationId` on both `generateText` and `streamText` responses.
+When streaming, the generation ID is injected on the first content chunk, so you can capture it early in the stream without waiting for completion. This is especially useful in cases where a network interruption or mid-stream error could prevent you from receiving the final response — since the gateway records the final status server-side, you can use the generation ID to look up the results (including cost, token usage, and finish reason) later via `getGenerationInfo()`.
+```ts
+import { gateway, generateText } from 'ai';
+// Make a request
+const result = await generateText({
+  model: gateway('anthropic/claude-sonnet-4'),
+  prompt: 'Explain quantum entanglement briefly',
+});
+// Get the generation ID from provider metadata
+const generationId = result.providerMetadata?.gateway?.generationId;
+// Look up detailed generation info
+const generation = await gateway.getGenerationInfo({ id: generationId });
+console.log(`Model: ${generation.model}`);
+console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
+console.log(`Latency: ${generation.latency}ms`);
+console.log(`Prompt tokens: ${generation.promptTokens}`);
+console.log(`Completion tokens: ${generation.completionTokens}`);
+```
+With `streamText`, you can capture the generation ID from the first chunk via `fullStream`:
+```ts
+import { gateway, streamText } from 'ai';
+const result = streamText({
+  model: gateway('anthropic/claude-sonnet-4'),
+  prompt: 'Explain quantum entanglement briefly',
+});
+let generationId: string | undefined;
+for await (const part of result.fullStream) {
+  if (!generationId && part.providerMetadata?.gateway?.generationId) {
+    generationId = part.providerMetadata.gateway.generationId as string;
+    console.log(`Generation ID (early): ${generationId}`);
+  }
+}
+// Look up cost and usage after the stream completes
+if (generationId) {
+  const generation = await gateway.getGenerationInfo({ id: generationId });
+  console.log(`Cost: $${generation.totalCost.toFixed(6)}`);
+  console.log(`Finish reason: ${generation.finishReason}`);
+}
+```
+The `getGenerationInfo()` method accepts:
+- **id** _string_ - The generation ID to look up (format: `gen_<ulid>`, required)
+It returns a `GatewayGenerationInfo` object with the following fields:
+- **id** _string_ - The generation ID
+- **totalCost** _number_ - Total cost in USD
+- **upstreamInferenceCost** _number_ - Upstream inference cost in USD (relevant for BYOK)
+- **usage** _number_ - Usage cost in USD (same as totalCost)
+- **createdAt** _string_ - ISO 8601 timestamp when the generation was created
+- **model** _string_ - Model identifier used
+- **isByok** _boolean_ - Whether Bring Your Own Key credentials were used
+- **providerName** _string_ - The provider that served this generation
+- **streamed** _boolean_ - Whether streaming was used
+- **finishReason** _string_ - Finish reason (e.g. `'stop'`)
+- **latency** _number_ - Time to first token in milliseconds
+- **generationTime** _number_ - Total generation time in milliseconds
+- **promptTokens** _number_ - Number of prompt tokens
+- **completionTokens** _number_ - Number of completion tokens
+- **reasoningTokens** _number_ - Reasoning tokens used (if applicable)
+- **cachedTokens** _number_ - Cached tokens used (if applicable)
+- **cacheCreationTokens** _number_ - Cache creation input tokens
+- **billableWebSearchCalls** _number_ - Number of billable web search calls
 ## Examples
 ### Basic Text Generation
@@ -246,7 +326,7 @@ The `getCredits()` method returns your team's credit information based on the au
 import { generateText } from 'ai';
 const { text } = await generateText({
-  model: 'anthropic/claude-sonnet-4',
+  model: 'anthropic/claude-sonnet-4.6',
   prompt: 'Write a haiku about programming',
 });
@@ -259,7 +339,7 @@ console.log(text);
 import { streamText } from 'ai';
 const { textStream } = await streamText({
-  model: 'openai/gpt-5',
+  model: 'openai/gpt-5.4',
   prompt: 'Explain the benefits of serverless architecture',
 });
@@ -297,13 +377,13 @@ const { text } = await generateText({
 Some providers offer tools that are executed by the provider itself, such as [OpenAI's web search tool](/providers/ai-sdk-providers/openai#web-search-tool). To use these tools through AI Gateway, import the provider to access the tool definitions:
 ```ts
-import { generateText, stepCountIs } from 'ai';
+import { generateText, isStepCount } from 'ai';
 import { openai } from '@ai-sdk/openai';
 const result = await generateText({
-  model: 'openai/gpt-5-mini',
+  model: 'openai/gpt-5.4-mini',
   prompt: 'What is the Vercel AI Gateway?',
-  stopWhen: stepCountIs(10),
+  stopWhen: isStepCount(10),
   tools: {
     web_search: openai.tools.webSearch({}),
   },
@@ -330,7 +410,7 @@ The Perplexity Search tool enables models to search the web using [Perplexity's
 import { gateway, generateText } from 'ai';
 const result = await generateText({
-  model: 'openai/gpt-5-nano',
+  model: 'openai/gpt-5.4-nano',
   prompt: 'Search for news about AI regulations in January 2025.',
   tools: {
     perplexity_search: gateway.tools.perplexitySearch(),
@@ -348,7 +428,7 @@ You can also configure the search with optional parameters:
 import { gateway, generateText } from 'ai';
 const result = await generateText({
-  model: 'openai/gpt-5-nano',
+  model: 'openai/gpt-5.4-nano',
   prompt:
     'Search for news about AI regulations from the first week of January 2025.',
   tools: {
@@ -402,7 +482,7 @@ The tool works with both `generateText` and `streamText`:
 import { gateway, streamText } from 'ai';
 const result = streamText({
-  model: 'openai/gpt-5-nano',
+  model: 'openai/gpt-5.4-nano',
   prompt: 'Search for the latest news about AI regulations.',
   tools: {
     perplexity_search: gateway.tools.perplexitySearch(),
@@ -432,7 +512,7 @@ The Parallel Search tool enables models to search the web using [Parallel AI's S
 import { gateway, generateText } from 'ai';
 const result = await generateText({
-  model: 'openai/gpt-5-nano',
+  model: 'openai/gpt-5.4-nano',
   prompt: 'Research the latest developments in quantum computing.',
   tools: {
     parallel_search: gateway.tools.parallelSearch(),
@@ -450,7 +530,7 @@ You can also configure the search with optional parameters:
 import { gateway, generateText } from 'ai';
 const result = await generateText({
-  model: 'openai/gpt-5-nano',
+  model: 'openai/gpt-5.4-nano',
   prompt: 'Find detailed information about TypeScript 5.0 features.',
   tools: {
     parallel_search: gateway.tools.parallelSearch({
@@ -511,7 +591,7 @@ The tool works with both `generateText` and `streamText`:
 import { gateway, streamText } from 'ai';
 const result = streamText({
-  model: 'openai/gpt-5-nano',
+  model: 'openai/gpt-5.4-nano',
   prompt: 'Research the latest AI safety guidelines.',
   tools: {
     parallel_search: gateway.tools.parallelSearch(),
@@ -533,22 +613,24 @@ for await (const part of result.fullStream) {
 }
 ```
-### Usage Tracking with User and Tags
+### Custom Reporting
+Track usage per end-user and categorize requests with tags, then query the data through the reporting API.
-Track usage per end-user and categorize requests with tags:
+#### Usage Tracking with User and Tags
 ```ts
-import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
+import type { GatewayProviderOptions } from '@ai-sdk/gateway';
 import { generateText } from 'ai';
 const { text } = await generateText({
-  model: 'openai/gpt-5',
+  model: 'openai/gpt-5.4',
   prompt: 'Summarize this document...',
   providerOptions: {
     gateway: {
       user: 'user-abc-123', // Track usage for this specific end-user
       tags: ['document-summary', 'premium-feature'], // Categorize for reporting
-    } satisfies GatewayLanguageModelOptions,
+    } satisfies GatewayProviderOptions,
   },
 });
 ```
@@ -559,6 +641,77 @@ This allows you to:
 - Filter and analyze spending by feature or use case using tags
 - Track which users or features are driving the most AI usage
+#### Querying Spend Reports
+Use the `getSpendReport()` method to query usage data programmatically. The reporting API is only available for Vercel Pro and Enterprise plans. For pricing, see the [Custom Reporting docs](https://vercel.com/docs/ai-gateway/capabilities/custom-reporting).
+```ts
+import { gateway } from 'ai';
+const report = await gateway.getSpendReport({
+  startDate: '2026-03-01',
+  endDate: '2026-03-25',
+  groupBy: 'model',
+});
+for (const row of report.results) {
+  console.log(`${row.model}: $${row.totalCost.toFixed(4)}`);
+}
+```
+The `getSpendReport()` method accepts the following parameters:
+- **startDate** _string_ - Start date in `YYYY-MM-DD` format (inclusive, required)
+- **endDate** _string_ - End date in `YYYY-MM-DD` format (inclusive, required)
+- **groupBy** _string_ - Aggregation dimension: `'day'` (default), `'user'`, `'model'`, `'tag'`, `'provider'`, or `'credential_type'`
+- **datePart** _string_ - Time granularity when `groupBy` is `'day'`: `'day'` or `'hour'`
+- **userId** _string_ - Filter to a specific user
+- **model** _string_ - Filter to a specific model (e.g. `'anthropic/claude-sonnet-4.5'`)
+- **provider** _string_ - Filter to a specific provider (e.g. `'anthropic'`)
+- **credentialType** _string_ - Filter by `'byok'` or `'system'` credentials
+- **tags** _string[]_ - Filter to requests matching these tags
+Each row in `results` contains a grouping field (matching your `groupBy` choice) and metrics:
+- **totalCost** _number_ - Total cost in USD
+- **marketCost** _number_ - Market cost in USD
+- **inputTokens** _number_ - Number of input tokens
+- **outputTokens** _number_ - Number of output tokens
+- **cachedInputTokens** _number_ - Number of cached input tokens
+- **cacheCreationInputTokens** _number_ - Number of cache creation input tokens
+- **reasoningTokens** _number_ - Number of reasoning tokens
+- **requestCount** _number_ - Number of requests
+You can combine tracking and querying to analyze spend by tags you defined:
+```ts
+import type { GatewayProviderOptions } from '@ai-sdk/gateway';
+import { gateway, streamText } from 'ai';
+// 1. Make requests with tags
+const result = streamText({
+  model: gateway('anthropic/claude-haiku-4.5'),
+  prompt: 'Summarize this quarter's results',
+  providerOptions: {
+    gateway: {
+      tags: ['team:finance', 'feature:summaries'],
+    } satisfies GatewayProviderOptions,
+  },
+});
+// 2. Later, query spend filtered by those tags
+const report = await gateway.getSpendReport({
+  startDate: '2026-03-01',
+  endDate: '2026-03-31',
+  groupBy: 'tag',
+  tags: ['team:finance'],
+});
+for (const row of report.results) {
+  console.log(`${row.tag}: $${row.totalCost.toFixed(4)} (${row.requestCount} requests)`);
+}
+```
 ## Provider Options
 The AI Gateway provider accepts provider options that control routing behavior and provider-specific configurations.
@@ -568,17 +721,17 @@ The AI Gateway provider accepts provider options that control routing behavior a
 You can use the `gateway` key in `providerOptions` to control how AI Gateway routes requests:
 ```ts
-import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
+import type { GatewayProviderOptions } from '@ai-sdk/gateway';
 import { generateText } from 'ai';
 const { text } = await generateText({
-  model: 'anthropic/claude-sonnet-4',
+  model: 'anthropic/claude-sonnet-4.6',
   prompt: 'Explain quantum computing',
   providerOptions: {
     gateway: {
       order: ['vertex', 'anthropic'], // Try Vertex AI first, then Anthropic
       only: ['vertex', 'anthropic'], // Only use these providers
-    } satisfies GatewayLanguageModelOptions,
+    } satisfies GatewayProviderOptions,
   },
 });
 ```
@@ -601,7 +754,7 @@ The following gateway provider options are available:
   Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the `model` parameter), then try each model in this array in order until one succeeds.
-  Example: `models: ['openai/gpt-5-nano', 'gemini-2.0-flash']` will try the fallback models in order if the primary model fails.
+  Example: `models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview']` will try the fallback models in order if the primary model fails.
 - **user** _string_
@@ -629,7 +782,12 @@ The following gateway provider options are available:
 - **zeroDataRetention** _boolean_
-  Restricts routing requests to providers that have zero data retention policies.
+  Restricts routing requests to providers that have zero data retention agreements with Vercel for AI Gateway. If there are no providers available for the model with zero data retention, the request will fail. BYOK credentials are skipped when `zeroDataRetention` is set to `true` to ensure that requests are only routed to providers that support ZDR compliance. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
+- **disallowPromptTraining** _boolean_
+  Restricts routing requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. If there are no providers available for the model that disallow prompt training, the request will fail. BYOK credentials are skipped when `disallowPromptTraining` is set to `true` to ensure that requests are only routed to providers that do not train on prompt data.
 - **providerTimeouts** _object_
@@ -642,17 +800,17 @@ The following gateway provider options are available:
 You can combine these options to have fine-grained control over routing and tracking:
 ```ts
-import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
+import type { GatewayProviderOptions } from '@ai-sdk/gateway';
 import { generateText } from 'ai';
 const { text } = await generateText({
-  model: 'anthropic/claude-sonnet-4',
+  model: 'anthropic/claude-sonnet-4.6',
   prompt: 'Write a haiku about programming',
   providerOptions: {
     gateway: {
       order: ['vertex'], // Prefer Vertex AI
       only: ['anthropic', 'vertex'], // Only allow these providers
-    } satisfies GatewayLanguageModelOptions,
+    } satisfies GatewayProviderOptions,
   },
 });
 ```
@@ -662,43 +820,60 @@ const { text } = await generateText({
 The `models` option enables automatic fallback to alternative models when the primary model fails:
 ```ts
-import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
+import type { GatewayProviderOptions } from '@ai-sdk/gateway';
 import { generateText } from 'ai';
 const { text } = await generateText({
-  model: 'openai/gpt-4o', // Primary model
+  model: 'openai/gpt-5.4', // Primary model
   prompt: 'Write a TypeScript haiku',
   providerOptions: {
     gateway: {
-      models: ['openai/gpt-5-nano', 'gemini-2.0-flash'], // Fallback models
-    } satisfies GatewayLanguageModelOptions,
+      models: ['openai/gpt-5.4-nano', 'gemini-3-flash-preview'], // Fallback models
+    } satisfies GatewayProviderOptions,
   },
 });
 // This will:
-// 1. Try openai/gpt-4o first
-// 2. If it fails, try openai/gpt-5-nano
-// 3. If that fails, try gemini-2.0-flash
+// 1. Try openai/gpt-5.4 first
+// 2. If it fails, try openai/gpt-5.4-nano
+// 3. If that fails, try gemini-3-flash-preview
 // 4. Return the result from the first model that succeeds
 ```
 #### Zero Data Retention Example
-Set `zeroDataRetention` to true to ensure requests are only routed to providers
-that have zero data retention policies. When `zeroDataRetention` is `false` or not
-specified, there is no enforcement of restricting routing.
+Set `zeroDataRetention` to true to route requests to providers that have zero data retention agreements with Vercel for AI Gateway. If there are no providers available for the model with zero data retention, the request will fail. When `zeroDataRetention` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `zeroDataRetention` is set to `true` to ensure that requests are only routed to providers that support ZDR compliance. Request-level ZDR is only available for Vercel Pro and Enterprise plans.
 ```ts
-import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
+import type { GatewayProviderOptions } from '@ai-sdk/gateway';
 import { generateText } from 'ai';
 const { text } = await generateText({
-  model: 'anthropic/claude-sonnet-4.5',
+  model: 'anthropic/claude-sonnet-4.6',
   prompt: 'Analyze this sensitive document...',
   providerOptions: {
     gateway: {
       zeroDataRetention: true,
-    } satisfies GatewayLanguageModelOptions,
+    } satisfies GatewayProviderOptions,
+  },
+});
+```
+#### Disallow Prompt Training Example
+Set `disallowPromptTraining` to true to route requests to providers that have agreements with Vercel for AI Gateway to not use prompts for model training. If there are no providers available for the model that disallow prompt training, the request will fail. When `disallowPromptTraining` is `false` or not specified, there is no enforcement of restricting routing. BYOK credentials are skipped when `disallowPromptTraining` is set to `true` to ensure that requests are only routed to providers that do not train on prompt data.
+```ts
+import type { GatewayProviderOptions } from '@ai-sdk/gateway';
+import { generateText } from 'ai';
+const { text } = await generateText({
+  model: 'anthropic/claude-sonnet-4.6',
+  prompt: 'Analyze this proprietary business data...',
+  providerOptions: {
+    gateway: {
+      disallowPromptTraining: true,
+    } satisfies GatewayProviderOptions,
   },
 });
 ```
@@ -709,16 +884,16 @@ When using provider-specific options through AI Gateway, use the actual provider
 ```ts
 import type { AnthropicLanguageModelOptions } from '@ai-sdk/anthropic';
-import type { GatewayLanguageModelOptions } from '@ai-sdk/gateway';
+import type { GatewayProviderOptions } from '@ai-sdk/gateway';
 import { generateText } from 'ai';
 const { text } = await generateText({
-  model: 'anthropic/claude-sonnet-4',
+  model: 'anthropic/claude-sonnet-4.6',
   prompt: 'Explain quantum computing',
   providerOptions: {
     gateway: {
       order: ['vertex', 'anthropic'],
-    } satisfies GatewayLanguageModelOptions,
+    } satisfies GatewayProviderOptions,
     anthropic: {
       thinking: { type: 'enabled', budgetTokens: 12000 },
     } satisfies AnthropicLanguageModelOptions,

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@ai-sdk/gateway",
   "private": false,
-  "version": "4.0.0-beta.3",
+  "version": "4.0.0-beta.31",
   "license": "Apache-2.0",
   "sideEffects": false,
   "main": "./dist/index.js",
@@ -30,9 +30,9 @@
     }
   },
   "dependencies": {
-    "@vercel/oidc": "3.1.0",
-    "@ai-sdk/provider": "4.0.0-beta.0",
-    "@ai-sdk/provider-utils": "5.0.0-beta.1"
+    "@vercel/oidc": "3.2.0",
+    "@ai-sdk/provider": "4.0.0-beta.6",
+    "@ai-sdk/provider-utils": "5.0.0-beta.10"
   },
   "devDependencies": {
     "@types/node": "18.15.11",
@@ -68,9 +68,7 @@
     "build:watch": "pnpm clean && tsup --watch",
     "clean": "del-cli dist docs *.tsbuildinfo",
     "generate-model-settings": "tsx scripts/generate-model-settings.ts",
-    "lint": "eslint \"./**/*.ts*\"",
     "type-check": "tsc --build",
-    "prettier-check": "prettier --check \"./**/*.ts*\"",
     "test": "pnpm test:node && pnpm test:edge",
     "test:update": "pnpm test:node -u",
     "test:watch": "vitest --config vitest.node.config.js",

package/src/gateway-embedding-model-settings.ts CHANGED Viewed

@@ -5,6 +5,7 @@ export type GatewayEmbeddingModelId =
   | 'amazon/titan-embed-text-v2'
   | 'cohere/embed-v4.0'
   | 'google/gemini-embedding-001'
+  | 'google/gemini-embedding-2'
   | 'google/text-embedding-005'
   | 'google/text-multilingual-embedding-002'
   | 'mistral/codestral-embed'

package/src/gateway-embedding-model.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type {
-  EmbeddingModelV3,
-  SharedV3ProviderMetadata,
+  EmbeddingModelV4,
+  SharedV4ProviderMetadata,
 } from '@ai-sdk/provider';
 import {
   combineHeaders,
@@ -17,8 +17,8 @@ import { asGatewayError } from './errors';
 import { parseAuthMethod } from './errors/parse-auth-method';
 import type { GatewayConfig } from './gateway-config';
-export class GatewayEmbeddingModel implements EmbeddingModelV3 {
-  readonly specificationVersion = 'v3';
+export class GatewayEmbeddingModel implements EmbeddingModelV4 {
+  readonly specificationVersion = 'v4';
   readonly maxEmbeddingsPerCall = 2048;
   readonly supportsParallelCalls = true;
@@ -39,8 +39,8 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
     headers,
     abortSignal,
     providerOptions,
-  }: Parameters<EmbeddingModelV3['doEmbed']>[0]): Promise<
-    Awaited<ReturnType<EmbeddingModelV3['doEmbed']>>
+  }: Parameters<EmbeddingModelV4['doEmbed']>[0]): Promise<
+    Awaited<ReturnType<EmbeddingModelV4['doEmbed']>>
   > {
     const resolvedHeaders = await resolve(this.config.headers());
     try {
@@ -75,7 +75,7 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
         embeddings: responseBody.embeddings,
         usage: responseBody.usage ?? undefined,
         providerMetadata:
-          responseBody.providerMetadata as unknown as SharedV3ProviderMetadata,
+          responseBody.providerMetadata as unknown as SharedV4ProviderMetadata,
         response: { headers: responseHeaders, body: rawValue },
         warnings: [],
       };
@@ -90,7 +90,7 @@ export class GatewayEmbeddingModel implements EmbeddingModelV3 {
   private getModelConfigHeaders() {
     return {
-      'ai-embedding-model-specification-version': '3',
+      'ai-embedding-model-specification-version': '4',
       'ai-model-id': this.modelId,
     };
   }

package/src/gateway-fetch-metadata.ts CHANGED Viewed

@@ -101,7 +101,7 @@ const gatewayAvailableModelsResponseSchema = lazySchema(() =>
             )
             .nullish(),
           specification: z.object({
-            specificationVersion: z.literal('v3'),
+            specificationVersion: z.literal('v4'),
             provider: z.string(),
             modelId: z.string(),
           }),