npm - @firebase/ai - Versions diffs - 2.4.0 → 2.5.0-20251028194003 - Mend

@firebase/ai 2.4.0 → 2.5.0-20251028194003

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/ai-public.d.ts +133 -9
package/dist/ai.d.ts +133 -9
package/dist/esm/index.esm.js +202 -68
package/dist/esm/index.esm.js.map +1 -1
package/dist/esm/src/methods/chrome-adapter.d.ts +1 -1
package/dist/esm/src/methods/live-session.d.ts +64 -9
package/dist/esm/src/requests/hybrid-helpers.d.ts +7 -2
package/dist/esm/src/requests/response-helpers.d.ts +2 -2
package/dist/esm/src/requests/stream-reader.d.ts +2 -1
package/dist/esm/src/types/enums.d.ts +15 -0
package/dist/esm/src/types/live-responses.d.ts +21 -3
package/dist/esm/src/types/requests.d.ts +23 -0
package/dist/esm/src/types/responses.d.ts +28 -1
package/dist/index.cjs.js +202 -67
package/dist/index.cjs.js.map +1 -1
package/dist/index.node.cjs.js +182 -60
package/dist/index.node.cjs.js.map +1 -1
package/dist/index.node.mjs +182 -61
package/dist/index.node.mjs.map +1 -1
package/dist/src/methods/chrome-adapter.d.ts +1 -1
package/dist/src/methods/live-session.d.ts +64 -9
package/dist/src/requests/hybrid-helpers.d.ts +7 -2
package/dist/src/requests/response-helpers.d.ts +2 -2
package/dist/src/requests/stream-reader.d.ts +2 -1
package/dist/src/types/enums.d.ts +15 -0
package/dist/src/types/live-responses.d.ts +21 -3
package/dist/src/types/requests.d.ts +23 -0
package/dist/src/types/responses.d.ts +28 -1
package/package.json +1 -1

package/dist/ai-public.d.ts CHANGED Viewed

@@ -192,6 +192,12 @@ export declare interface AudioConversationController {
     stop: () => Promise<void>;
 }
+/**
+ * The audio transcription configuration.
+ */
+export declare interface AudioTranscriptionConfig {
+}
 /**
  * Abstract base class representing the configuration for an AI service backend.
  * This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
@@ -558,6 +564,12 @@ export declare interface EnhancedGenerateContentResponse extends GenerateContent
      * set to `true`.
      */
     thoughtSummary: () => string | undefined;
+    /**
+     * Indicates whether inference happened on-device or in-cloud.
+     *
+     * @beta
+     */
+    inferenceSource?: InferenceSource;
 }
 /**
@@ -1833,6 +1845,23 @@ export declare const InferenceMode: {
  */
 export declare type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode];
+/**
+ * Indicates whether inference happened on-device or in-cloud.
+ *
+ * @beta
+ */
+export declare const InferenceSource: {
+    readonly ON_DEVICE: "on_device";
+    readonly IN_CLOUD: "in_cloud";
+};
+/**
+ * Indicates whether inference happened on-device or in-cloud.
+ *
+ * @beta
+ */
+export declare type InferenceSource = (typeof InferenceSource)[keyof typeof InferenceSource];
 /**
  * Content part interface if the part represents an image.
  * @public
@@ -1997,6 +2026,24 @@ export declare interface LiveGenerationConfig {
      * The modalities of the response.
      */
     responseModalities?: ResponseModality[];
+    /**
+     * Enables transcription of audio input.
+     *
+     * When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
+     * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
+     * messages, so you may only receive small amounts of text per message. For example, if you ask the model
+     * "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
+     */
+    inputAudioTranscription?: AudioTranscriptionConfig;
+    /**
+     * Enables transcription of audio input.
+     *
+     * When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
+     * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
+     * messages, so you may only receive small amounts of text per message. For example, if the model says
+     * "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
+     */
+    outputAudioTranscription?: AudioTranscriptionConfig;
 }
 /**
@@ -2078,6 +2125,14 @@ export declare interface LiveServerContent {
      * model was not interrupted.
      */
     interrupted?: boolean;
+    /**
+     * Transcription of the audio that was input to the model.
+     */
+    inputTranscription?: Transcription;
+    /**
+     * Transcription of the audio output from the model.
+     */
+    outputTranscription?: Transcription;
 }
 /**
@@ -2140,32 +2195,65 @@ export declare class LiveSession {
      */
     send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
     /**
-     * Sends realtime input to the server.
+     * Sends text to the server in realtime.
      *
-     * @param mediaChunks - The media chunks to send.
+     * @example
+     * ```javascript
+     * liveSession.sendTextRealtime("Hello, how are you?");
+     * ```
+     *
+     * @param text - The text data to send.
      * @throws If this session has been closed.
      *
      * @beta
      */
-    sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
+    sendTextRealtime(text: string): Promise<void>;
     /**
-     * Sends function responses to the server.
+     * Sends audio data to the server in realtime.
      *
-     * @param functionResponses - The function responses to send.
+     * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
+     * little-endian.
+     *
+     * @example
+     * ```javascript
+     * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
+     * const blob = { mimeType: "audio/pcm", data: pcmData };
+     * liveSession.sendAudioRealtime(blob);
+     * ```
+     *
+     * @param blob - The base64-encoded PCM data to send to the server in realtime.
      * @throws If this session has been closed.
      *
      * @beta
      */
-    sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
+    sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
     /**
-     * Sends a stream of {@link GenerativeContentBlob}.
+     * Sends video data to the server in realtime.
      *
-     * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
+     * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
+     * is recommended to set `mimeType` to `image/jpeg`.
+     *
+     * @example
+     * ```javascript
+     * // const videoFrame = ... base64-encoded JPEG data
+     * const blob = { mimeType: "image/jpeg", data: videoFrame };
+     * liveSession.sendVideoRealtime(blob);
+     * ```
+     * @param blob - The base64-encoded video data to send to the server in realtime.
      * @throws If this session has been closed.
      *
      * @beta
      */
-    sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
+    sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
+    /**
+     * Sends function responses to the server.
+     *
+     * @param functionResponses - The function responses to send.
+     * @throws If this session has been closed.
+     *
+     * @beta
+     */
+    sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
     /**
      * Yields messages received from the server.
      * This can only be used by one consumer at a time.
@@ -2183,6 +2271,28 @@ export declare class LiveSession {
      * @beta
      */
     close(): Promise<void>;
+    /**
+     * Sends realtime input to the server.
+     *
+     * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
+     *
+     * @param mediaChunks - The media chunks to send.
+     * @throws If this session has been closed.
+     *
+     * @beta
+     */
+    sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
+    /**
+     * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
+     *
+     * Sends a stream of {@link GenerativeContentBlob}.
+     *
+     * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
+     * @throws If this session has been closed.
+     *
+     * @beta
+     */
+    sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
 }
 /**
@@ -2874,6 +2984,20 @@ export declare interface ToolConfig {
     functionCallingConfig?: FunctionCallingConfig;
 }
+/**
+ * Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
+ * is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
+ * the {@link LiveGenerationConfig}.
+ *
+ * @beta
+ */
+export declare interface Transcription {
+    /**
+     * The text transcription of the audio.
+     */
+    text?: string;
+}
 /**
  * A type that includes all specific Schema types.
  * @public

package/dist/ai.d.ts CHANGED Viewed

@@ -232,6 +232,12 @@ export declare interface AudioConversationController {
     stop: () => Promise<void>;
 }
+/**
+ * The audio transcription configuration.
+ */
+export declare interface AudioTranscriptionConfig {
+}
 /**
  * Abstract base class representing the configuration for an AI service backend.
  * This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
@@ -604,6 +610,12 @@ export declare interface EnhancedGenerateContentResponse extends GenerateContent
      * set to `true`.
      */
     thoughtSummary: () => string | undefined;
+    /**
+     * Indicates whether inference happened on-device or in-cloud.
+     *
+     * @beta
+     */
+    inferenceSource?: InferenceSource;
 }
 /**
@@ -1945,6 +1957,23 @@ export declare const InferenceMode: {
  */
 export declare type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode];
+/**
+ * Indicates whether inference happened on-device or in-cloud.
+ *
+ * @beta
+ */
+export declare const InferenceSource: {
+    readonly ON_DEVICE: "on_device";
+    readonly IN_CLOUD: "in_cloud";
+};
+/**
+ * Indicates whether inference happened on-device or in-cloud.
+ *
+ * @beta
+ */
+export declare type InferenceSource = (typeof InferenceSource)[keyof typeof InferenceSource];
 /**
  * Content part interface if the part represents an image.
  * @public
@@ -2112,6 +2141,24 @@ export declare interface LiveGenerationConfig {
      * The modalities of the response.
      */
     responseModalities?: ResponseModality[];
+    /**
+     * Enables transcription of audio input.
+     *
+     * When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
+     * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
+     * messages, so you may only receive small amounts of text per message. For example, if you ask the model
+     * "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
+     */
+    inputAudioTranscription?: AudioTranscriptionConfig;
+    /**
+     * Enables transcription of audio input.
+     *
+     * When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
+     * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
+     * messages, so you may only receive small amounts of text per message. For example, if the model says
+     * "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
+     */
+    outputAudioTranscription?: AudioTranscriptionConfig;
 }
 /**
@@ -2203,6 +2250,14 @@ export declare interface LiveServerContent {
      * model was not interrupted.
      */
     interrupted?: boolean;
+    /**
+     * Transcription of the audio that was input to the model.
+     */
+    inputTranscription?: Transcription;
+    /**
+     * Transcription of the audio output from the model.
+     */
+    outputTranscription?: Transcription;
 }
 /**
@@ -2268,32 +2323,65 @@ export declare class LiveSession {
      */
     send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
     /**
-     * Sends realtime input to the server.
+     * Sends text to the server in realtime.
      *
-     * @param mediaChunks - The media chunks to send.
+     * @example
+     * ```javascript
+     * liveSession.sendTextRealtime("Hello, how are you?");
+     * ```
+     *
+     * @param text - The text data to send.
      * @throws If this session has been closed.
      *
      * @beta
      */
-    sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
+    sendTextRealtime(text: string): Promise<void>;
     /**
-     * Sends function responses to the server.
+     * Sends audio data to the server in realtime.
      *
-     * @param functionResponses - The function responses to send.
+     * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
+     * little-endian.
+     *
+     * @example
+     * ```javascript
+     * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
+     * const blob = { mimeType: "audio/pcm", data: pcmData };
+     * liveSession.sendAudioRealtime(blob);
+     * ```
+     *
+     * @param blob - The base64-encoded PCM data to send to the server in realtime.
      * @throws If this session has been closed.
      *
      * @beta
      */
-    sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
+    sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
     /**
-     * Sends a stream of {@link GenerativeContentBlob}.
+     * Sends video data to the server in realtime.
      *
-     * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
+     * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
+     * is recommended to set `mimeType` to `image/jpeg`.
+     *
+     * @example
+     * ```javascript
+     * // const videoFrame = ... base64-encoded JPEG data
+     * const blob = { mimeType: "image/jpeg", data: videoFrame };
+     * liveSession.sendVideoRealtime(blob);
+     * ```
+     * @param blob - The base64-encoded video data to send to the server in realtime.
      * @throws If this session has been closed.
      *
      * @beta
      */
-    sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
+    sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
+    /**
+     * Sends function responses to the server.
+     *
+     * @param functionResponses - The function responses to send.
+     * @throws If this session has been closed.
+     *
+     * @beta
+     */
+    sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
     /**
      * Yields messages received from the server.
      * This can only be used by one consumer at a time.
@@ -2311,6 +2399,28 @@ export declare class LiveSession {
      * @beta
      */
     close(): Promise<void>;
+    /**
+     * Sends realtime input to the server.
+     *
+     * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
+     *
+     * @param mediaChunks - The media chunks to send.
+     * @throws If this session has been closed.
+     *
+     * @beta
+     */
+    sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
+    /**
+     * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
+     *
+     * Sends a stream of {@link GenerativeContentBlob}.
+     *
+     * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
+     * @throws If this session has been closed.
+     *
+     * @beta
+     */
+    sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
 }
 /**
@@ -3016,6 +3126,20 @@ export declare interface ToolConfig {
     functionCallingConfig?: FunctionCallingConfig;
 }
+/**
+ * Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
+ * is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
+ * the {@link LiveGenerationConfig}.
+ *
+ * @beta
+ */
+export declare interface Transcription {
+    /**
+     * The text transcription of the audio.
+     */
+    text?: string;
+}
 /**
  * A type that includes all specific Schema types.
  * @public