npm - @ai-sdk/provider - Versions diffs - 4.0.0-beta.14 → 4.0.0-beta.19 - Mend

@ai-sdk/provider 4.0.0-beta.14 → 4.0.0-beta.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/CHANGELOG.md +40 -0
package/dist/index.d.ts +524 -31
package/package.json +4 -4
package/src/index.ts +1 -0
package/src/language-model/v4/language-model-v4-prompt.ts +17 -38
package/src/realtime-model/index.ts +1 -0
package/src/realtime-model/v4/index.ts +20 -0
package/src/realtime-model/v4/realtime-factory-v4.ts +20 -0
package/src/realtime-model/v4/realtime-model-v4-client-event.ts +68 -0
package/src/realtime-model/v4/realtime-model-v4-client-secret.ts +40 -0
package/src/realtime-model/v4/realtime-model-v4-conversation-item.ts +55 -0
package/src/realtime-model/v4/realtime-model-v4-server-event.ts +199 -0
package/src/realtime-model/v4/realtime-model-v4-session-config.ts +142 -0
package/src/realtime-model/v4/realtime-model-v4-tool-definition.ts +28 -0
package/src/realtime-model/v4/realtime-model-v4.ts +89 -0

package/src/language-model/v4/language-model-v4-prompt.ts CHANGED Viewed

@@ -5,7 +5,6 @@ import type {
   SharedV4FileDataUrl,
 } from '../../shared/v4/shared-v4-file-data';
 import type { SharedV4ProviderOptions } from '../../shared/v4/shared-v4-provider-options';
-import type { SharedV4ProviderReference } from '../../shared/v4/shared-v4-provider-reference';
 /**
  * A prompt is a list of messages.
@@ -359,15 +358,28 @@ export type LanguageModelV4ToolResultOutput =
             providerOptions?: SharedV4ProviderOptions;
           }
         | {
-            type: 'file-data';
+            type: 'file';
             /**
-             * Base-64 encoded media data.
+             * File data as a tagged discriminated union:
+             *
+             * - `{ type: 'data', data }`: raw bytes (Uint8Array) or base64-encoded string.
+             * - `{ type: 'url', url }`: a URL that points to the file.
+             * - `{ type: 'reference', reference }`: a provider reference (`{ [provider]: id }`).
+             * - `{ type: 'text', text }`: inline text content (e.g. an inline text document).
              */
-            data: string;
+            data: SharedV4FileData;
             /**
-             * IANA media type.
+             * Either a full IANA media type (`type/subtype`, e.g. `image/png`) or just
+             * the top-level IANA segment (e.g. `image`, `audio`, `video`, `text`).
+             *
+             * `*`-subtype wildcards (e.g. `image/*`) are normalized as equivalent to the
+             * top-level segment alone (e.g. `image`). Providers can use the helpers in
+             * `@ai-sdk/provider-utils` (`isFullMediaType`, `getTopLevelMediaType`,
+             * `detectMediaType`) to resolve the field according to their API
+             * requirements.
+             *
              * @see https://www.iana.org/assignments/media-types/media-types.xhtml
              */
             mediaType: string;
@@ -377,39 +389,6 @@ export type LanguageModelV4ToolResultOutput =
              */
             filename?: string;
-            /**
-             * Provider-specific options.
-             */
-            providerOptions?: SharedV4ProviderOptions;
-          }
-        | {
-            type: 'file-url';
-            /**
-             * URL of the file.
-             */
-            url: string;
-            /**
-             * IANA media type.
-             * @see https://www.iana.org/assignments/media-types/media-types.xhtml
-             */
-            mediaType: string;
-            /**
-             * Provider-specific options.
-             */
-            providerOptions?: SharedV4ProviderOptions;
-          }
-        | {
-            type: 'file-reference';
-            /**
-             * Provider-specific references for the file.
-             * The key is the provider name, e.g. 'openai' or 'anthropic'.
-             */
-            providerReference: SharedV4ProviderReference;
             /**
              * Provider-specific options.
              */

package/src/realtime-model/index.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export * from './v4/index';

package/src/realtime-model/v4/index.ts ADDED Viewed

@@ -0,0 +1,20 @@
+export type {
+  RealtimeFactoryV4 as Experimental_RealtimeFactoryV4,
+  RealtimeFactoryV4GetTokenOptions as Experimental_RealtimeFactoryV4GetTokenOptions,
+  RealtimeFactoryV4GetTokenResult as Experimental_RealtimeFactoryV4GetTokenResult,
+} from './realtime-factory-v4';
+export type { RealtimeModelV4 as Experimental_RealtimeModelV4 } from './realtime-model-v4';
+export type { RealtimeModelV4ClientEvent as Experimental_RealtimeModelV4ClientEvent } from './realtime-model-v4-client-event';
+export type {
+  RealtimeModelV4ClientSecretOptions as Experimental_RealtimeModelV4ClientSecretOptions,
+  RealtimeModelV4ClientSecretResult as Experimental_RealtimeModelV4ClientSecretResult,
+} from './realtime-model-v4-client-secret';
+export type {
+  RealtimeModelV4ConversationItem as Experimental_RealtimeModelV4ConversationItem,
+  RealtimeModelV4TextMessage as Experimental_RealtimeModelV4TextMessage,
+  RealtimeModelV4AudioMessage as Experimental_RealtimeModelV4AudioMessage,
+  RealtimeModelV4FunctionCallOutput as Experimental_RealtimeModelV4FunctionCallOutput,
+} from './realtime-model-v4-conversation-item';
+export type { RealtimeModelV4ServerEvent as Experimental_RealtimeModelV4ServerEvent } from './realtime-model-v4-server-event';
+export type { RealtimeModelV4SessionConfig as Experimental_RealtimeModelV4SessionConfig } from './realtime-model-v4-session-config';
+export type { RealtimeModelV4ToolDefinition as Experimental_RealtimeModelV4ToolDefinition } from './realtime-model-v4-tool-definition';

package/src/realtime-model/v4/realtime-factory-v4.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import type { RealtimeModelV4 } from './realtime-model-v4';
+import type { RealtimeModelV4ClientSecretOptions } from './realtime-model-v4-client-secret';
+export type RealtimeFactoryV4GetTokenOptions = {
+  model: string;
+} & RealtimeModelV4ClientSecretOptions;
+export type RealtimeFactoryV4GetTokenResult = {
+  token: string;
+  url: string;
+  expiresAt?: number;
+};
+export interface RealtimeFactoryV4 {
+  (modelId: string): RealtimeModelV4;
+  getToken(
+    options: RealtimeFactoryV4GetTokenOptions,
+  ): Promise<RealtimeFactoryV4GetTokenResult>;
+}

package/src/realtime-model/v4/realtime-model-v4-client-event.ts ADDED Viewed

@@ -0,0 +1,68 @@
+import type { RealtimeModelV4ConversationItem } from './realtime-model-v4-conversation-item';
+import type { RealtimeModelV4SessionConfig } from './realtime-model-v4-session-config';
+/**
+ * Normalized events sent from the browser to the realtime model.
+ * Each provider maps this to its native event format before sending
+ * over the WebSocket.
+ */
+export type RealtimeModelV4ClientEvent =
+  // ── Session ────────────────────────────────────────────────────────
+  | {
+      type: 'session-update';
+      config: RealtimeModelV4SessionConfig;
+    }
+  // ── Input audio buffer ─────────────────────────────────────────────
+  | {
+      type: 'input-audio-append';
+      /**
+       * Base64-encoded audio chunk to append to the input buffer.
+       */
+      audio: string;
+    }
+  | {
+      type: 'input-audio-commit';
+    }
+  | {
+      type: 'input-audio-clear';
+    }
+  // ── Conversation items ─────────────────────────────────────────────
+  | {
+      type: 'conversation-item-create';
+      item: RealtimeModelV4ConversationItem;
+    }
+  | {
+      type: 'conversation-item-truncate';
+      /**
+       * The ID of the assistant message item to truncate.
+       */
+      itemId: string;
+      /**
+       * The index of the content part to truncate.
+       */
+      contentIndex: number;
+      /**
+       * Truncate audio after this many milliseconds.
+       */
+      audioEndMs: number;
+    }
+  // ── Response control ───────────────────────────────────────────────
+  | {
+      type: 'response-create';
+      options?: {
+        modalities?: string[];
+        instructions?: string;
+        metadata?: Record<string, unknown>;
+      };
+    }
+  | {
+      type: 'response-cancel';
+    };

package/src/realtime-model/v4/realtime-model-v4-client-secret.ts ADDED Viewed

@@ -0,0 +1,40 @@
+import type { RealtimeModelV4SessionConfig } from './realtime-model-v4-session-config';
+/**
+ * Options for creating an ephemeral client secret for browser-side
+ * WebSocket connections to a realtime model.
+ */
+export type RealtimeModelV4ClientSecretOptions = {
+  /**
+   * Number of seconds until the client secret expires.
+   */
+  expiresAfterSeconds?: number;
+  /**
+   * Optional session configuration to embed in the token request.
+   * Some providers (e.g. Google) require the full session config at token creation time.
+   */
+  sessionConfig?: RealtimeModelV4SessionConfig;
+};
+/**
+ * Result of creating an ephemeral client secret.
+ */
+export type RealtimeModelV4ClientSecretResult = {
+  /**
+   * The ephemeral token value. Used as a Bearer token or in the
+   * WebSocket subprotocol header for authentication.
+   */
+  token: string;
+  /**
+   * The WebSocket URL to connect to. Includes any provider-specific
+   * query parameters (e.g. model ID).
+   */
+  url: string;
+  /**
+   * Unix timestamp (seconds) when this client secret expires.
+   */
+  expiresAt?: number;
+};

package/src/realtime-model/v4/realtime-model-v4-conversation-item.ts ADDED Viewed

@@ -0,0 +1,55 @@
+/**
+ * A conversation item that can be created by the client and sent to
+ * the model via the conversation.item.create event.
+ */
+export type RealtimeModelV4ConversationItem =
+  | RealtimeModelV4TextMessage
+  | RealtimeModelV4AudioMessage
+  | RealtimeModelV4FunctionCallOutput;
+/**
+ * A text message from the user.
+ */
+export type RealtimeModelV4TextMessage = {
+  type: 'text-message';
+  role: 'user';
+  text: string;
+};
+/**
+ * An audio message from the user (complete audio, not streamed).
+ */
+export type RealtimeModelV4AudioMessage = {
+  type: 'audio-message';
+  role: 'user';
+  /**
+   * Base64-encoded audio data.
+   */
+  audio: string;
+};
+/**
+ * The output of a function call, sent back to the model so it can
+ * continue generating a response using the tool result.
+ */
+export type RealtimeModelV4FunctionCallOutput = {
+  type: 'function-call-output';
+  /**
+   * The call ID from the function-call-arguments-done event.
+   * Must match so the model knows which function call this result is for.
+   */
+  callId: string;
+  /**
+   * The name of the function that was called.
+   * Required by some providers (e.g. Google) in the tool response routing.
+   */
+  name?: string;
+  /**
+   * JSON string containing the function call result.
+   */
+  output: string;
+};

package/src/realtime-model/v4/realtime-model-v4-server-event.ts ADDED Viewed

@@ -0,0 +1,199 @@
+/**
+ * Normalized events emitted by the realtime model (model → browser).
+ * Each provider maps its native event format to this discriminated union.
+ *
+ * Every event includes a `raw` field with the original provider-specific
+ * event data for debugging and provider-specific access.
+ */
+export type RealtimeModelV4ServerEvent =
+  // ── Session lifecycle ──────────────────────────────────────────────
+  | {
+      type: 'session-created';
+      sessionId?: string;
+      raw: unknown;
+    }
+  | {
+      type: 'session-updated';
+      raw: unknown;
+    }
+  // ── Input audio buffer ─────────────────────────────────────────────
+  | {
+      type: 'speech-started';
+      itemId?: string;
+      raw: unknown;
+    }
+  | {
+      type: 'speech-stopped';
+      itemId?: string;
+      raw: unknown;
+    }
+  | {
+      type: 'audio-committed';
+      itemId?: string;
+      previousItemId?: string;
+      raw: unknown;
+    }
+  // ── Conversation items ─────────────────────────────────────────────
+  | {
+      type: 'conversation-item-added';
+      itemId: string;
+      item: unknown;
+      raw: unknown;
+    }
+  | {
+      type: 'input-transcription-completed';
+      itemId: string;
+      transcript: string;
+      raw: unknown;
+    }
+  // ── Response lifecycle ─────────────────────────────────────────────
+  | {
+      type: 'response-created';
+      responseId: string;
+      raw: unknown;
+    }
+  | {
+      type: 'response-done';
+      responseId: string;
+      status: string;
+      raw: unknown;
+    }
+  // ── Output item lifecycle ──────────────────────────────────────────
+  | {
+      type: 'output-item-added';
+      responseId: string;
+      itemId: string;
+      raw: unknown;
+    }
+  | {
+      type: 'output-item-done';
+      responseId: string;
+      itemId: string;
+      raw: unknown;
+    }
+  | {
+      type: 'content-part-added';
+      responseId: string;
+      itemId: string;
+      raw: unknown;
+    }
+  | {
+      type: 'content-part-done';
+      responseId: string;
+      itemId: string;
+      raw: unknown;
+    }
+  // ── Audio output ───────────────────────────────────────────────────
+  | {
+      type: 'audio-delta';
+      responseId: string;
+      itemId: string;
+      /**
+       * Base64-encoded audio chunk.
+       */
+      delta: string;
+      raw: unknown;
+    }
+  | {
+      type: 'audio-done';
+      responseId: string;
+      itemId: string;
+      raw: unknown;
+    }
+  // ── Audio transcript output ────────────────────────────────────────
+  | {
+      type: 'audio-transcript-delta';
+      responseId: string;
+      itemId: string;
+      /**
+       * Text chunk of the audio transcript.
+       */
+      delta: string;
+      raw: unknown;
+    }
+  | {
+      type: 'audio-transcript-done';
+      responseId: string;
+      itemId: string;
+      transcript?: string;
+      raw: unknown;
+    }
+  // ── Text output ────────────────────────────────────────────────────
+  | {
+      type: 'text-delta';
+      responseId: string;
+      itemId: string;
+      /**
+       * Text chunk of the model's text response.
+       */
+      delta: string;
+      raw: unknown;
+    }
+  | {
+      type: 'text-done';
+      responseId: string;
+      itemId: string;
+      text?: string;
+      raw: unknown;
+    }
+  // ── Function calling ───────────────────────────────────────────────
+  | {
+      type: 'function-call-arguments-delta';
+      responseId: string;
+      itemId: string;
+      callId: string;
+      /**
+       * Partial JSON string of function call arguments.
+       */
+      delta: string;
+      raw: unknown;
+    }
+  | {
+      type: 'function-call-arguments-done';
+      responseId: string;
+      itemId: string;
+      callId: string;
+      /**
+       * The name of the function to call.
+       */
+      name: string;
+      /**
+       * Complete JSON string of function call arguments.
+       */
+      arguments: string;
+      raw: unknown;
+    }
+  // ── Error ──────────────────────────────────────────────────────────
+  | {
+      type: 'error';
+      message: string;
+      code?: string;
+      raw: unknown;
+    }
+  // ── Custom / provider-specific ────────────────────────────────────
+  | {
+      type: 'custom';
+      /**
+       * The original event type string from the provider.
+       */
+      rawType: string;
+      raw: unknown;
+    };

package/src/realtime-model/v4/realtime-model-v4-session-config.ts ADDED Viewed

@@ -0,0 +1,142 @@
+import type { RealtimeModelV4ToolDefinition } from './realtime-model-v4-tool-definition';
+/**
+ * Provider-neutral configuration for a realtime session.
+ * Each provider maps this to their specific session.update payload.
+ */
+export type RealtimeModelV4SessionConfig = {
+  /**
+   * System instructions for the model.
+   */
+  instructions?: string;
+  /**
+   * Voice to use for audio output.
+   */
+  voice?: string;
+  /**
+   * Which output modalities the model should produce.
+   */
+  outputModalities?: Array<'text' | 'audio'>;
+  /**
+   * Audio format configuration for input audio.
+   */
+  inputAudioFormat?: {
+    /**
+     * Audio format type (e.g. "audio/pcm", "audio/pcmu", "audio/pcma").
+     */
+    type: string;
+    /**
+     * Sample rate in Hz. Only applicable for PCM format.
+     */
+    rate?: number;
+  };
+  /**
+   * Input audio transcription configuration.
+   *
+   * When enabled, providers that support input transcription emit normalized
+   * `input-transcription-completed` events that can be rendered as user
+   * messages.
+   */
+  inputAudioTranscription?: {
+    /**
+     * Provider-specific transcription model.
+     */
+    model?: string;
+    /**
+     * Optional language hint for the input audio.
+     */
+    language?: string;
+    /**
+     * Optional prompt to guide transcription.
+     */
+    prompt?: string;
+  };
+  /**
+   * Output audio transcription configuration.
+   *
+   * When enabled, providers that support output transcription emit normalized
+   * `audio-transcript-delta` / `audio-transcript-done` events for the model's
+   * spoken response. Some providers transcribe output by default; setting this
+   * makes the behavior explicit rather than relying on that default.
+   */
+  outputAudioTranscription?: {
+    /**
+     * Provider-specific transcription model.
+     */
+    model?: string;
+    /**
+     * Optional language hint for the output audio.
+     */
+    language?: string;
+    /**
+     * Optional prompt to guide transcription.
+     */
+    prompt?: string;
+  };
+  /**
+   * Audio format configuration for output audio.
+   */
+  outputAudioFormat?: {
+    /**
+     * Audio format type (e.g. "audio/pcm", "audio/pcmu", "audio/pcma").
+     */
+    type: string;
+    /**
+     * Sample rate in Hz. Only applicable for PCM format.
+     */
+    rate?: number;
+  };
+  /**
+   * Voice activity detection configuration.
+   * Set to null or type 'disabled' to turn off VAD (push-to-talk mode).
+   */
+  turnDetection?: {
+    /**
+     * VAD mode. 'server-vad' for automatic detection,
+     * 'semantic-vad' for OpenAI's semantic detection,
+     * 'disabled' to turn off VAD.
+     */
+    type: 'server-vad' | 'semantic-vad' | 'disabled';
+    /**
+     * VAD activation threshold (0.0-1.0).
+     * Higher values require louder audio to trigger.
+     */
+    threshold?: number;
+    /**
+     * How long the user must be silent (in ms) before
+     * the server ends the turn.
+     */
+    silenceDurationMs?: number;
+    /**
+     * Amount of audio (in ms) to include before the
+     * detected start of speech.
+     */
+    prefixPaddingMs?: number;
+  } | null;
+  /**
+   * Tool definitions available to the model in this session.
+   */
+  tools?: RealtimeModelV4ToolDefinition[];
+  /**
+   * Provider-specific options that are passed through to the provider.
+   */
+  providerOptions?: Record<string, unknown>;
+};

package/src/realtime-model/v4/realtime-model-v4-tool-definition.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import type { JSONSchema7 } from 'json-schema';
+/**
+ * A tool definition for realtime models. Sent as part of the session
+ * configuration so the model knows which functions it can call.
+ */
+export type RealtimeModelV4ToolDefinition = {
+  /**
+   * The type of the tool (always 'function').
+   */
+  type: 'function';
+  /**
+   * The name of the tool. Unique within the session.
+   */
+  name: string;
+  /**
+   * A description of what the tool does. The model uses this to decide
+   * whether to call the tool.
+   */
+  description?: string;
+  /**
+   * JSON Schema describing the parameters the tool expects.
+   */
+  parameters: JSONSchema7;
+};