npm - @ai-sdk/gateway - Versions diffs - 4.0.0-beta.4 → 4.0.0-beta.41 - Mend

@ai-sdk/gateway 4.0.0-beta.4 → 4.0.0-beta.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/CHANGELOG.md +252 -4
package/dist/index.d.mts +143 -21
package/dist/index.d.ts +143 -21
package/dist/index.js +454 -144
package/dist/index.js.map +1 -1
package/dist/index.mjs +518 -186
package/dist/index.mjs.map +1 -1
package/docs/00-ai-gateway.mdx +292 -44
package/package.json +4 -6
package/src/gateway-embedding-model-settings.ts +1 -0
package/src/gateway-embedding-model.ts +8 -8
package/src/gateway-fetch-metadata.ts +1 -1
package/src/gateway-generation-info.ts +147 -0
package/src/gateway-image-model-settings.ts +6 -0
package/src/gateway-image-model.ts +10 -10
package/src/gateway-language-model-settings.ts +21 -10
package/src/gateway-language-model.ts +19 -19
package/src/gateway-model-entry.ts +2 -2
package/src/gateway-provider-options.ts +27 -8
package/src/gateway-provider.ts +99 -17
package/src/gateway-reranking-model-settings.ts +1 -0
package/src/gateway-reranking-model.ts +114 -0
package/src/gateway-spend-report.ts +191 -0
package/src/gateway-video-model.ts +15 -15
package/src/index.ts +13 -3

package/src/gateway-generation-info.ts ADDED Viewed

@@ -0,0 +1,147 @@
+import {
+  createJsonErrorResponseHandler,
+  createJsonResponseHandler,
+  getFromApi,
+  lazySchema,
+  resolve,
+  zodSchema,
+} from '@ai-sdk/provider-utils';
+import { z } from 'zod/v4';
+import { asGatewayError } from './errors';
+import type { GatewayConfig } from './gateway-config';
+export interface GatewayGenerationInfoParams {
+  /** The generation ID to look up (format: gen_<ulid>) */
+  id: string;
+}
+export interface GatewayGenerationInfo {
+  /** The generation ID */
+  id: string;
+  /** Total cost in USD */
+  totalCost: number;
+  /** Upstream inference cost in USD (BYOK only) */
+  upstreamInferenceCost: number;
+  /** Usage cost in USD (same as totalCost) */
+  usage: number;
+  /** ISO 8601 timestamp when the generation was created */
+  createdAt: string;
+  /** Model identifier */
+  model: string;
+  /** Whether BYOK credentials were used */
+  isByok: boolean;
+  /** Provider that served this generation */
+  providerName: string;
+  /** Whether streaming was used */
+  streamed: boolean;
+  /** Finish reason (e.g. 'stop') */
+  finishReason: string;
+  /** Time to first token in milliseconds */
+  latency: number;
+  /** Total generation time in milliseconds */
+  generationTime: number;
+  /** Number of prompt tokens */
+  promptTokens: number;
+  /** Number of completion tokens */
+  completionTokens: number;
+  /** Reasoning tokens used */
+  reasoningTokens: number;
+  /** Cached tokens used */
+  cachedTokens: number;
+  /** Cache creation input tokens */
+  cacheCreationTokens: number;
+  /** Billable web search calls */
+  billableWebSearchCalls: number;
+}
+export class GatewayGenerationInfoFetcher {
+  constructor(private readonly config: GatewayConfig) {}
+  async getGenerationInfo(
+    params: GatewayGenerationInfoParams,
+  ): Promise<GatewayGenerationInfo> {
+    try {
+      const baseUrl = new URL(this.config.baseURL);
+      const { value } = await getFromApi({
+        url: `${baseUrl.origin}/v1/generation?id=${encodeURIComponent(params.id)}`,
+        headers: await resolve(this.config.headers()),
+        successfulResponseHandler: createJsonResponseHandler(
+          gatewayGenerationInfoResponseSchema,
+        ),
+        failedResponseHandler: createJsonErrorResponseHandler({
+          errorSchema: z.any(),
+          errorToMessage: data => data,
+        }),
+        fetch: this.config.fetch,
+      });
+      return value;
+    } catch (error) {
+      throw await asGatewayError(error);
+    }
+  }
+}
+const gatewayGenerationInfoResponseSchema = lazySchema(() =>
+  zodSchema(
+    z
+      .object({
+        data: z
+          .object({
+            id: z.string(),
+            total_cost: z.number(),
+            upstream_inference_cost: z.number(),
+            usage: z.number(),
+            created_at: z.string(),
+            model: z.string(),
+            is_byok: z.boolean(),
+            provider_name: z.string(),
+            streamed: z.boolean(),
+            finish_reason: z.string(),
+            latency: z.number(),
+            generation_time: z.number(),
+            native_tokens_prompt: z.number(),
+            native_tokens_completion: z.number(),
+            native_tokens_reasoning: z.number(),
+            native_tokens_cached: z.number(),
+            native_tokens_cache_creation: z.number(),
+            billable_web_search_calls: z.number(),
+          })
+          .transform(
+            ({
+              total_cost,
+              upstream_inference_cost,
+              created_at,
+              is_byok,
+              provider_name,
+              finish_reason,
+              generation_time,
+              native_tokens_prompt,
+              native_tokens_completion,
+              native_tokens_reasoning,
+              native_tokens_cached,
+              native_tokens_cache_creation,
+              billable_web_search_calls,
+              ...rest
+            }) => ({
+              ...rest,
+              totalCost: total_cost,
+              upstreamInferenceCost: upstream_inference_cost,
+              createdAt: created_at,
+              isByok: is_byok,
+              providerName: provider_name,
+              finishReason: finish_reason,
+              generationTime: generation_time,
+              promptTokens: native_tokens_prompt,
+              completionTokens: native_tokens_completion,
+              reasoningTokens: native_tokens_reasoning,
+              cachedTokens: native_tokens_cached,
+              cacheCreationTokens: native_tokens_cache_creation,
+              billableWebSearchCalls: billable_web_search_calls,
+            }),
+          ),
+      })
+      .transform(({ data }) => data),
+  ),
+);

package/src/gateway-image-model-settings.ts CHANGED Viewed

@@ -1,4 +1,9 @@
 export type GatewayImageModelId =
+  | 'bfl/flux-2-flex'
+  | 'bfl/flux-2-klein-4b'
+  | 'bfl/flux-2-klein-9b'
+  | 'bfl/flux-2-max'
+  | 'bfl/flux-2-pro'
   | 'bfl/flux-kontext-max'
   | 'bfl/flux-kontext-pro'
   | 'bfl/flux-pro-1.0-fill'
@@ -10,6 +15,7 @@ export type GatewayImageModelId =
   | 'openai/gpt-image-1'
   | 'openai/gpt-image-1-mini'
   | 'openai/gpt-image-1.5'
+  | 'prodia/flux-fast-schnell'
   | 'recraft/recraft-v2'
   | 'recraft/recraft-v3'
   | 'recraft/recraft-v4'

package/src/gateway-image-model.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import type {
-  ImageModelV3,
-  ImageModelV3File,
-  ImageModelV3ProviderMetadata,
+  ImageModelV4,
+  ImageModelV4File,
+  ImageModelV4ProviderMetadata,
 } from '@ai-sdk/provider';
 import {
   combineHeaders,
@@ -17,8 +17,8 @@ import type { GatewayConfig } from './gateway-config';
 import { asGatewayError } from './errors';
 import { parseAuthMethod } from './errors/parse-auth-method';
-export class GatewayImageModel implements ImageModelV3 {
-  readonly specificationVersion = 'v3' as const;
+export class GatewayImageModel implements ImageModelV4 {
+  readonly specificationVersion = 'v4' as const;
   // Set a very large number to prevent client-side splitting of requests
   readonly maxImagesPerCall = Number.MAX_SAFE_INTEGER;
@@ -45,8 +45,8 @@ export class GatewayImageModel implements ImageModelV3 {
     providerOptions,
     headers,
     abortSignal,
-  }: Parameters<ImageModelV3['doGenerate']>[0]): Promise<
-    Awaited<ReturnType<ImageModelV3['doGenerate']>>
+  }: Parameters<ImageModelV4['doGenerate']>[0]): Promise<
+    Awaited<ReturnType<ImageModelV4['doGenerate']>>
   > {
     const resolvedHeaders = await resolve(this.config.headers());
     try {
@@ -89,7 +89,7 @@ export class GatewayImageModel implements ImageModelV3 {
         images: responseBody.images, // Always base64 strings from server
         warnings: responseBody.warnings ?? [],
         providerMetadata:
-          responseBody.providerMetadata as ImageModelV3ProviderMetadata,
+          responseBody.providerMetadata as ImageModelV4ProviderMetadata,
         response: {
           timestamp: new Date(),
           modelId: this.modelId,
@@ -114,13 +114,13 @@ export class GatewayImageModel implements ImageModelV3 {
   private getModelConfigHeaders() {
     return {
-      'ai-image-model-specification-version': '3',
+      'ai-image-model-specification-version': '4',
       'ai-model-id': this.modelId,
     };
   }
 }
-function maybeEncodeImageFile(file: ImageModelV3File) {
+function maybeEncodeImageFile(file: ImageModelV4File) {
   if (file.type === 'file' && file.data instanceof Uint8Array) {
     return {
       ...file,

package/src/gateway-language-model-settings.ts CHANGED Viewed

@@ -17,6 +17,7 @@ export type GatewayModelId =
   | 'alibaba/qwen3-vl-thinking'
   | 'alibaba/qwen3.5-flash'
   | 'alibaba/qwen3.5-plus'
+  | 'alibaba/qwen3.6-plus'
   | 'amazon/nova-2-lite'
   | 'amazon/nova-lite'
   | 'amazon/nova-micro'
@@ -24,8 +25,6 @@ export type GatewayModelId =
   | 'anthropic/claude-3-haiku'
   | 'anthropic/claude-3-opus'
   | 'anthropic/claude-3.5-haiku'
-  | 'anthropic/claude-3.5-sonnet'
-  | 'anthropic/claude-3.5-sonnet-20240620'
   | 'anthropic/claude-3.7-sonnet'
   | 'anthropic/claude-haiku-4.5'
   | 'anthropic/claude-opus-4'
@@ -36,6 +35,7 @@ export type GatewayModelId =
   | 'anthropic/claude-sonnet-4.5'
   | 'anthropic/claude-sonnet-4.6'
   | 'arcee-ai/trinity-large-preview'
+  | 'arcee-ai/trinity-large-thinking'
   | 'arcee-ai/trinity-mini'
   | 'bytedance/seed-1.6'
   | 'bytedance/seed-1.8'
@@ -51,8 +51,6 @@ export type GatewayModelId =
   | 'google/gemini-2.5-flash'
   | 'google/gemini-2.5-flash-image'
   | 'google/gemini-2.5-flash-lite'
-  | 'google/gemini-2.5-flash-lite-preview-09-2025'
-  | 'google/gemini-2.5-flash-preview-09-2025'
   | 'google/gemini-2.5-pro'
   | 'google/gemini-3-flash'
   | 'google/gemini-3-pro-image'
@@ -60,11 +58,14 @@ export type GatewayModelId =
   | 'google/gemini-3.1-flash-image-preview'
   | 'google/gemini-3.1-flash-lite-preview'
   | 'google/gemini-3.1-pro-preview'
+  | 'google/gemma-4-26b-a4b-it'
+  | 'google/gemma-4-31b-it'
   | 'inception/mercury-2'
   | 'inception/mercury-coder-small'
   | 'kwaipilot/kat-coder-pro-v1'
+  | 'kwaipilot/kat-coder-pro-v2'
   | 'meituan/longcat-flash-chat'
-  | 'meituan/longcat-flash-thinking'
+  | 'meituan/longcat-flash-thinking-2601'
   | 'meta/llama-3.1-70b'
   | 'meta/llama-3.1-8b'
   | 'meta/llama-3.2-11b'
@@ -79,6 +80,8 @@ export type GatewayModelId =
   | 'minimax/minimax-m2.1-lightning'
   | 'minimax/minimax-m2.5'
   | 'minimax/minimax-m2.5-highspeed'
+  | 'minimax/minimax-m2.7'
+  | 'minimax/minimax-m2.7-highspeed'
   | 'mistral/codestral'
   | 'mistral/devstral-2'
   | 'mistral/devstral-small'
@@ -104,9 +107,9 @@ export type GatewayModelId =
   | 'morph/morph-v3-fast'
   | 'morph/morph-v3-large'
   | 'nvidia/nemotron-3-nano-30b-a3b'
+  | 'nvidia/nemotron-3-super-120b-a12b'
   | 'nvidia/nemotron-nano-12b-v2-vl'
   | 'nvidia/nemotron-nano-9b-v2'
-  | 'openai/codex-mini'
   | 'openai/gpt-3.5-turbo'
   | 'openai/gpt-3.5-turbo-instruct'
   | 'openai/gpt-4-turbo'
@@ -134,6 +137,8 @@ export type GatewayModelId =
   | 'openai/gpt-5.3-chat'
   | 'openai/gpt-5.3-codex'
   | 'openai/gpt-5.4'
+  | 'openai/gpt-5.4-mini'
+  | 'openai/gpt-5.4-nano'
   | 'openai/gpt-5.4-pro'
   | 'openai/gpt-oss-120b'
   | 'openai/gpt-oss-20b'
@@ -146,12 +151,8 @@ export type GatewayModelId =
   | 'openai/o4-mini'
   | 'perplexity/sonar'
   | 'perplexity/sonar-pro'
-  | 'perplexity/sonar-reasoning'
   | 'perplexity/sonar-reasoning-pro'
   | 'prime-intellect/intellect-3'
-  | 'vercel/v0-1.0-md'
-  | 'vercel/v0-1.5-md'
-  | 'xai/grok-2-vision'
   | 'xai/grok-3'
   | 'xai/grok-3-fast'
   | 'xai/grok-3-mini'
@@ -161,8 +162,15 @@ export type GatewayModelId =
   | 'xai/grok-4-fast-reasoning'
   | 'xai/grok-4.1-fast-non-reasoning'
   | 'xai/grok-4.1-fast-reasoning'
+  | 'xai/grok-4.20-multi-agent'
+  | 'xai/grok-4.20-multi-agent-beta'
+  | 'xai/grok-4.20-non-reasoning'
+  | 'xai/grok-4.20-non-reasoning-beta'
+  | 'xai/grok-4.20-reasoning'
+  | 'xai/grok-4.20-reasoning-beta'
   | 'xai/grok-code-fast-1'
   | 'xiaomi/mimo-v2-flash'
+  | 'xiaomi/mimo-v2-pro'
   | 'zai/glm-4.5'
   | 'zai/glm-4.5-air'
   | 'zai/glm-4.5v'
@@ -173,4 +181,7 @@ export type GatewayModelId =
   | 'zai/glm-4.7-flash'
   | 'zai/glm-4.7-flashx'
   | 'zai/glm-5'
+  | 'zai/glm-5-turbo'
+  | 'zai/glm-5.1'
+  | 'zai/glm-5v-turbo'
   | (string & {});

package/src/gateway-language-model.ts CHANGED Viewed

@@ -1,11 +1,11 @@
 import type {
-  LanguageModelV3,
-  LanguageModelV3CallOptions,
-  SharedV3Warning,
-  LanguageModelV3FilePart,
-  LanguageModelV3StreamPart,
-  LanguageModelV3GenerateResult,
-  LanguageModelV3StreamResult,
+  LanguageModelV4,
+  LanguageModelV4CallOptions,
+  SharedV4Warning,
+  LanguageModelV4FilePart,
+  LanguageModelV4StreamPart,
+  LanguageModelV4GenerateResult,
+  LanguageModelV4StreamResult,
 } from '@ai-sdk/provider';
 import {
   combineHeaders,
@@ -28,8 +28,8 @@ type GatewayChatConfig = GatewayConfig & {
   o11yHeaders: Resolvable<Record<string, string>>;
 };
-export class GatewayLanguageModel implements LanguageModelV3 {
-  readonly specificationVersion = 'v3';
+export class GatewayLanguageModel implements LanguageModelV4 {
+  readonly specificationVersion = 'v4';
   readonly supportedUrls = { '*/*': [/.*/] };
   constructor(
@@ -41,7 +41,7 @@ export class GatewayLanguageModel implements LanguageModelV3 {
     return this.config.provider;
   }
-  private async getArgs(options: LanguageModelV3CallOptions) {
+  private async getArgs(options: LanguageModelV4CallOptions) {
     const { abortSignal: _abortSignal, ...optionsWithoutSignal } = options;
     return {
@@ -51,8 +51,8 @@ export class GatewayLanguageModel implements LanguageModelV3 {
   }
   async doGenerate(
-    options: LanguageModelV3CallOptions,
-  ): Promise<LanguageModelV3GenerateResult> {
+    options: LanguageModelV4CallOptions,
+  ): Promise<LanguageModelV4GenerateResult> {
     const { args, warnings } = await this.getArgs(options);
     const { abortSignal } = options;
@@ -93,8 +93,8 @@ export class GatewayLanguageModel implements LanguageModelV3 {
   }
   async doStream(
-    options: LanguageModelV3CallOptions,
-  ): Promise<LanguageModelV3StreamResult> {
+    options: LanguageModelV4CallOptions,
+  ): Promise<LanguageModelV4StreamResult> {
     const { args, warnings } = await this.getArgs(options);
     const { abortSignal } = options;
@@ -122,8 +122,8 @@ export class GatewayLanguageModel implements LanguageModelV3 {
       return {
         stream: response.pipeThrough(
           new TransformStream<
-            ParseResult<LanguageModelV3StreamPart>,
-            LanguageModelV3StreamPart
+            ParseResult<LanguageModelV4StreamPart>,
+            LanguageModelV4StreamPart
           >({
             start(controller) {
               if (warnings.length > 0) {
@@ -177,11 +177,11 @@ export class GatewayLanguageModel implements LanguageModelV3 {
    * @param options - The options to encode.
    * @returns The options with the file parts encoded.
    */
-  private maybeEncodeFileParts(options: LanguageModelV3CallOptions) {
+  private maybeEncodeFileParts(options: LanguageModelV4CallOptions) {
     for (const message of options.prompt) {
       for (const part of message.content) {
         if (this.isFilePart(part)) {
-          const filePart = part as LanguageModelV3FilePart;
+          const filePart = part as LanguageModelV4FilePart;
           // If the file part is a URL it will get cleanly converted to a string.
           // If it's a binary file attachment we convert it to a data url.
           // In either case, server-side we should only ever see URLs as strings.
@@ -204,7 +204,7 @@ export class GatewayLanguageModel implements LanguageModelV3 {
   private getModelConfigHeaders(modelId: string, streaming: boolean) {
     return {
-      'ai-language-model-specification-version': '3',
+      'ai-language-model-specification-version': '4',
       'ai-language-model-id': modelId,
       'ai-language-model-streaming': String(streaming),
     };

package/src/gateway-model-entry.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { LanguageModelV3 } from '@ai-sdk/provider';
+import type { LanguageModelV4 } from '@ai-sdk/provider';
 export interface GatewayLanguageModelEntry {
   /**
@@ -53,6 +53,6 @@ export interface GatewayLanguageModelEntry {
 }
 export type GatewayLanguageModelSpecification = Pick<
-  LanguageModelV3,
+  LanguageModelV4,
   'specificationVersion' | 'provider' | 'modelId'
 >;

package/src/gateway-provider-options.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import { InferSchema, lazySchema, zodSchema } from '@ai-sdk/provider-utils';
 import { z } from 'zod/v4';
 // https://vercel.com/docs/ai-gateway/provider-options
-const gatewayLanguageModelOptions = lazySchema(() =>
+const gatewayProviderOptions = lazySchema(() =>
   zodSchema(
     z.object({
       /**
@@ -53,12 +53,33 @@ const gatewayLanguageModelOptions = lazySchema(() =>
         .record(z.string(), z.array(z.record(z.string(), z.unknown())))
         .optional(),
       /**
-       * Whether to filter by only providers that state they have zero data
-       * retention with Vercel AI Gateway. When enabled, only providers that
-       * have agreements with Vercel AI Gateway for zero data retention will be
-       * used.
+       * Whether to filter by only providers that have zero data retention
+       * agreements with Vercel for AI Gateway. When using BYOK credentials,
+       * this filter is not applied. If BYOK credentials fail and the request
+       * falls back to system credentials, only providers with zero data
+       * retention agreements will be used.
        */
       zeroDataRetention: z.boolean().optional(),
+      /**
+       * Whether to filter by only providers that do not train on prompt data.
+       * When using BYOK credentials, this filter is not applied. If BYOK
+       * credentials fail and the request falls back to system credentials,
+       * only providers that have agreements with Vercel for AI Gateway to not
+       * use prompts for model training will be used.
+       */
+      disallowPromptTraining: z.boolean().optional(),
+      /**
+       * Whether to filter by only providers that are HIPAA compliant with
+       * Vercel AI Gateway. When enabled, only providers that have agreements
+       * with Vercel AI Gateway for HIPAA compliance will be used.
+       */
+      hipaaCompliant: z.boolean().optional(),
+      /**
+       * The unique identifier for the entity against which quota is tracked.
+       *
+       * Used for quota management and enforcement purposes.
+       */
+      quotaEntityId: z.string().optional(),
       /**
        * Per-provider timeouts for BYOK credentials in milliseconds.
        * Controls how long to wait for a provider to start responding
@@ -75,6 +96,4 @@ const gatewayLanguageModelOptions = lazySchema(() =>
   ),
 );
-export type GatewayLanguageModelOptions = InferSchema<
-  typeof gatewayLanguageModelOptions
->;
+export type GatewayProviderOptions = InferSchema<typeof gatewayProviderOptions>;