@firebase/ai 2.4.0-canary.44d9891f9 → 2.4.0-canary.6e0e30317

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -192,6 +192,12 @@ export declare interface AudioConversationController {
192
192
  stop: () => Promise<void>;
193
193
  }
194
194
 
195
+ /**
196
+ * The audio transcription configuration.
197
+ */
198
+ export declare interface AudioTranscriptionConfig {
199
+ }
200
+
195
201
  /**
196
202
  * Abstract base class representing the configuration for an AI service backend.
197
203
  * This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
@@ -335,6 +341,7 @@ export declare class ChatSession {
335
341
  * @beta
336
342
  */
337
343
  export declare interface ChromeAdapter {
344
+ /* Excluded from this release type: mode */
338
345
  /**
339
346
  * Checks if the on-device model is capable of handling a given
340
347
  * request.
@@ -2020,6 +2027,24 @@ export declare interface LiveGenerationConfig {
2020
2027
  * The modalities of the response.
2021
2028
  */
2022
2029
  responseModalities?: ResponseModality[];
2030
+ /**
2031
+ * Enables transcription of audio input.
2032
+ *
2033
+ * When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
2034
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2035
+ * messages, so you may only receive small amounts of text per message. For example, if you ask the model
2036
+ * "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
2037
+ */
2038
+ inputAudioTranscription?: AudioTranscriptionConfig;
2039
+ /**
2040
+ * Enables transcription of audio input.
2041
+ *
2042
+ * When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
2043
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2044
+ * messages, so you may only receive small amounts of text per message. For example, if the model says
2045
+ * "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
2046
+ */
2047
+ outputAudioTranscription?: AudioTranscriptionConfig;
2023
2048
  }
2024
2049
 
2025
2050
  /**
@@ -2101,6 +2126,14 @@ export declare interface LiveServerContent {
2101
2126
  * model was not interrupted.
2102
2127
  */
2103
2128
  interrupted?: boolean;
2129
+ /**
2130
+ * Transcription of the audio that was input to the model.
2131
+ */
2132
+ inputTranscription?: Transcription;
2133
+ /**
2134
+ * Transcription of the audio output from the model.
2135
+ */
2136
+ outputTranscription?: Transcription;
2104
2137
  }
2105
2138
 
2106
2139
  /**
@@ -2163,32 +2196,65 @@ export declare class LiveSession {
2163
2196
  */
2164
2197
  send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
2165
2198
  /**
2166
- * Sends realtime input to the server.
2199
+ * Sends text to the server in realtime.
2167
2200
  *
2168
- * @param mediaChunks - The media chunks to send.
2201
+ * @example
2202
+ * ```javascript
2203
+ * liveSession.sendTextRealtime("Hello, how are you?");
2204
+ * ```
2205
+ *
2206
+ * @param text - The text data to send.
2169
2207
  * @throws If this session has been closed.
2170
2208
  *
2171
2209
  * @beta
2172
2210
  */
2173
- sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2211
+ sendTextRealtime(text: string): Promise<void>;
2174
2212
  /**
2175
- * Sends function responses to the server.
2213
+ * Sends audio data to the server in realtime.
2176
2214
  *
2177
- * @param functionResponses - The function responses to send.
2215
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2216
+ * little-endian.
2217
+ *
2218
+ * @example
2219
+ * ```javascript
2220
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2221
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2222
+ * liveSession.sendAudioRealtime(blob);
2223
+ * ```
2224
+ *
2225
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2178
2226
  * @throws If this session has been closed.
2179
2227
  *
2180
2228
  * @beta
2181
2229
  */
2182
- sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2230
+ sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
2183
2231
  /**
2184
- * Sends a stream of {@link GenerativeContentBlob}.
2232
+ * Sends video data to the server in realtime.
2185
2233
  *
2186
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2234
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2235
+ * is recommended to set `mimeType` to `image/jpeg`.
2236
+ *
2237
+ * @example
2238
+ * ```javascript
2239
+ * // const videoFrame = ... base64-encoded JPEG data
2240
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2241
+ * liveSession.sendVideoRealtime(blob);
2242
+ * ```
2243
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2187
2244
  * @throws If this session has been closed.
2188
2245
  *
2189
2246
  * @beta
2190
2247
  */
2191
- sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2248
+ sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
2249
+ /**
2250
+ * Sends function responses to the server.
2251
+ *
2252
+ * @param functionResponses - The function responses to send.
2253
+ * @throws If this session has been closed.
2254
+ *
2255
+ * @beta
2256
+ */
2257
+ sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2192
2258
  /**
2193
2259
  * Yields messages received from the server.
2194
2260
  * This can only be used by one consumer at a time.
@@ -2206,6 +2272,28 @@ export declare class LiveSession {
2206
2272
  * @beta
2207
2273
  */
2208
2274
  close(): Promise<void>;
2275
+ /**
2276
+ * Sends realtime input to the server.
2277
+ *
2278
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2279
+ *
2280
+ * @param mediaChunks - The media chunks to send.
2281
+ * @throws If this session has been closed.
2282
+ *
2283
+ * @beta
2284
+ */
2285
+ sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2286
+ /**
2287
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2288
+ *
2289
+ * Sends a stream of {@link GenerativeContentBlob}.
2290
+ *
2291
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2292
+ * @throws If this session has been closed.
2293
+ *
2294
+ * @beta
2295
+ */
2296
+ sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2209
2297
  }
2210
2298
 
2211
2299
  /**
@@ -2897,6 +2985,20 @@ export declare interface ToolConfig {
2897
2985
  functionCallingConfig?: FunctionCallingConfig;
2898
2986
  }
2899
2987
 
2988
+ /**
2989
+ * Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
2990
+ * is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
2991
+ * the {@link LiveGenerationConfig}.
2992
+ *
2993
+ * @beta
2994
+ */
2995
+ export declare interface Transcription {
2996
+ /**
2997
+ * The text transcription of the audio.
2998
+ */
2999
+ text?: string;
3000
+ }
3001
+
2900
3002
  /**
2901
3003
  * A type that includes all specific Schema types.
2902
3004
  * @public
package/dist/ai.d.ts CHANGED
@@ -232,6 +232,12 @@ export declare interface AudioConversationController {
232
232
  stop: () => Promise<void>;
233
233
  }
234
234
 
235
+ /**
236
+ * The audio transcription configuration.
237
+ */
238
+ export declare interface AudioTranscriptionConfig {
239
+ }
240
+
235
241
  /**
236
242
  * Abstract base class representing the configuration for an AI service backend.
237
243
  * This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
@@ -375,6 +381,10 @@ export declare class ChatSession {
375
381
  * @beta
376
382
  */
377
383
  export declare interface ChromeAdapter {
384
+ /**
385
+ * @internal
386
+ */
387
+ mode: InferenceMode;
378
388
  /**
379
389
  * Checks if the on-device model is capable of handling a given
380
390
  * request.
@@ -2135,6 +2145,24 @@ export declare interface LiveGenerationConfig {
2135
2145
  * The modalities of the response.
2136
2146
  */
2137
2147
  responseModalities?: ResponseModality[];
2148
+ /**
2149
+ * Enables transcription of audio input.
2150
+ *
2151
+ * When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
2152
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2153
+ * messages, so you may only receive small amounts of text per message. For example, if you ask the model
2154
+ * "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
2155
+ */
2156
+ inputAudioTranscription?: AudioTranscriptionConfig;
2157
+ /**
2158
+ * Enables transcription of audio input.
2159
+ *
2160
+ * When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
2161
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2162
+ * messages, so you may only receive small amounts of text per message. For example, if the model says
2163
+ * "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
2164
+ */
2165
+ outputAudioTranscription?: AudioTranscriptionConfig;
2138
2166
  }
2139
2167
 
2140
2168
  /**
@@ -2226,6 +2254,14 @@ export declare interface LiveServerContent {
2226
2254
  * model was not interrupted.
2227
2255
  */
2228
2256
  interrupted?: boolean;
2257
+ /**
2258
+ * Transcription of the audio that was input to the model.
2259
+ */
2260
+ inputTranscription?: Transcription;
2261
+ /**
2262
+ * Transcription of the audio output from the model.
2263
+ */
2264
+ outputTranscription?: Transcription;
2229
2265
  }
2230
2266
 
2231
2267
  /**
@@ -2291,32 +2327,65 @@ export declare class LiveSession {
2291
2327
  */
2292
2328
  send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
2293
2329
  /**
2294
- * Sends realtime input to the server.
2330
+ * Sends text to the server in realtime.
2295
2331
  *
2296
- * @param mediaChunks - The media chunks to send.
2332
+ * @example
2333
+ * ```javascript
2334
+ * liveSession.sendTextRealtime("Hello, how are you?");
2335
+ * ```
2336
+ *
2337
+ * @param text - The text data to send.
2297
2338
  * @throws If this session has been closed.
2298
2339
  *
2299
2340
  * @beta
2300
2341
  */
2301
- sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2342
+ sendTextRealtime(text: string): Promise<void>;
2302
2343
  /**
2303
- * Sends function responses to the server.
2344
+ * Sends audio data to the server in realtime.
2304
2345
  *
2305
- * @param functionResponses - The function responses to send.
2346
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2347
+ * little-endian.
2348
+ *
2349
+ * @example
2350
+ * ```javascript
2351
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2352
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2353
+ * liveSession.sendAudioRealtime(blob);
2354
+ * ```
2355
+ *
2356
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2306
2357
  * @throws If this session has been closed.
2307
2358
  *
2308
2359
  * @beta
2309
2360
  */
2310
- sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2361
+ sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
2311
2362
  /**
2312
- * Sends a stream of {@link GenerativeContentBlob}.
2363
+ * Sends video data to the server in realtime.
2313
2364
  *
2314
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2365
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2366
+ * is recommended to set `mimeType` to `image/jpeg`.
2367
+ *
2368
+ * @example
2369
+ * ```javascript
2370
+ * // const videoFrame = ... base64-encoded JPEG data
2371
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2372
+ * liveSession.sendVideoRealtime(blob);
2373
+ * ```
2374
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2315
2375
  * @throws If this session has been closed.
2316
2376
  *
2317
2377
  * @beta
2318
2378
  */
2319
- sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2379
+ sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
2380
+ /**
2381
+ * Sends function responses to the server.
2382
+ *
2383
+ * @param functionResponses - The function responses to send.
2384
+ * @throws If this session has been closed.
2385
+ *
2386
+ * @beta
2387
+ */
2388
+ sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2320
2389
  /**
2321
2390
  * Yields messages received from the server.
2322
2391
  * This can only be used by one consumer at a time.
@@ -2334,6 +2403,28 @@ export declare class LiveSession {
2334
2403
  * @beta
2335
2404
  */
2336
2405
  close(): Promise<void>;
2406
+ /**
2407
+ * Sends realtime input to the server.
2408
+ *
2409
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2410
+ *
2411
+ * @param mediaChunks - The media chunks to send.
2412
+ * @throws If this session has been closed.
2413
+ *
2414
+ * @beta
2415
+ */
2416
+ sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2417
+ /**
2418
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2419
+ *
2420
+ * Sends a stream of {@link GenerativeContentBlob}.
2421
+ *
2422
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2423
+ * @throws If this session has been closed.
2424
+ *
2425
+ * @beta
2426
+ */
2427
+ sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2337
2428
  }
2338
2429
 
2339
2430
  /**
@@ -3039,6 +3130,20 @@ export declare interface ToolConfig {
3039
3130
  functionCallingConfig?: FunctionCallingConfig;
3040
3131
  }
3041
3132
 
3133
+ /**
3134
+ * Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
3135
+ * is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
3136
+ * the {@link LiveGenerationConfig}.
3137
+ *
3138
+ * @beta
3139
+ */
3140
+ export declare interface Transcription {
3141
+ /**
3142
+ * The text transcription of the audio.
3143
+ */
3144
+ text?: string;
3145
+ }
3146
+
3042
3147
  /**
3043
3148
  * A type that includes all specific Schema types.
3044
3149
  * @public
@@ -4,7 +4,7 @@ import { FirebaseError, Deferred, getModularInstance } from '@firebase/util';
4
4
  import { Logger } from '@firebase/logger';
5
5
 
6
6
  var name = "@firebase/ai";
7
- var version = "2.4.0-canary.44d9891f9";
7
+ var version = "2.4.0-canary.6e0e30317";
8
8
 
9
9
  /**
10
10
  * @license
@@ -2870,75 +2870,104 @@ class LiveSession {
2870
2870
  this.webSocketHandler.send(JSON.stringify(message));
2871
2871
  }
2872
2872
  /**
2873
- * Sends realtime input to the server.
2873
+ * Sends text to the server in realtime.
2874
2874
  *
2875
- * @param mediaChunks - The media chunks to send.
2875
+ * @example
2876
+ * ```javascript
2877
+ * liveSession.sendTextRealtime("Hello, how are you?");
2878
+ * ```
2879
+ *
2880
+ * @param text - The text data to send.
2876
2881
  * @throws If this session has been closed.
2877
2882
  *
2878
2883
  * @beta
2879
2884
  */
2880
- async sendMediaChunks(mediaChunks) {
2885
+ async sendTextRealtime(text) {
2881
2886
  if (this.isClosed) {
2882
2887
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2883
2888
  }
2884
- // The backend does not support sending more than one mediaChunk in one message.
2885
- // Work around this limitation by sending mediaChunks in separate messages.
2886
- mediaChunks.forEach(mediaChunk => {
2887
- const message = {
2888
- realtimeInput: { mediaChunks: [mediaChunk] }
2889
- };
2890
- this.webSocketHandler.send(JSON.stringify(message));
2891
- });
2889
+ const message = {
2890
+ realtimeInput: {
2891
+ text
2892
+ }
2893
+ };
2894
+ this.webSocketHandler.send(JSON.stringify(message));
2892
2895
  }
2893
2896
  /**
2894
- * Sends function responses to the server.
2897
+ * Sends audio data to the server in realtime.
2895
2898
  *
2896
- * @param functionResponses - The function responses to send.
2899
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2900
+ * little-endian.
2901
+ *
2902
+ * @example
2903
+ * ```javascript
2904
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2905
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2906
+ * liveSession.sendAudioRealtime(blob);
2907
+ * ```
2908
+ *
2909
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2897
2910
  * @throws If this session has been closed.
2898
2911
  *
2899
2912
  * @beta
2900
2913
  */
2901
- async sendFunctionResponses(functionResponses) {
2914
+ async sendAudioRealtime(blob) {
2902
2915
  if (this.isClosed) {
2903
2916
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2904
2917
  }
2905
2918
  const message = {
2906
- toolResponse: {
2907
- functionResponses
2919
+ realtimeInput: {
2920
+ audio: blob
2908
2921
  }
2909
2922
  };
2910
2923
  this.webSocketHandler.send(JSON.stringify(message));
2911
2924
  }
2912
2925
  /**
2913
- * Sends a stream of {@link GenerativeContentBlob}.
2926
+ * Sends video data to the server in realtime.
2914
2927
  *
2915
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2928
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2929
+ * is recommended to set `mimeType` to `image/jpeg`.
2930
+ *
2931
+ * @example
2932
+ * ```javascript
2933
+ * // const videoFrame = ... base64-encoded JPEG data
2934
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2935
+ * liveSession.sendVideoRealtime(blob);
2936
+ * ```
2937
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2916
2938
  * @throws If this session has been closed.
2917
2939
  *
2918
2940
  * @beta
2919
2941
  */
2920
- async sendMediaStream(mediaChunkStream) {
2942
+ async sendVideoRealtime(blob) {
2921
2943
  if (this.isClosed) {
2922
2944
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2923
2945
  }
2924
- const reader = mediaChunkStream.getReader();
2925
- while (true) {
2926
- try {
2927
- const { done, value } = await reader.read();
2928
- if (done) {
2929
- break;
2930
- }
2931
- else if (!value) {
2932
- throw new Error('Missing chunk in reader, but reader is not done.');
2933
- }
2934
- await this.sendMediaChunks([value]);
2935
- }
2936
- catch (e) {
2937
- // Re-throw any errors that occur during stream consumption or sending.
2938
- const message = e instanceof Error ? e.message : 'Error processing media stream.';
2939
- throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2946
+ const message = {
2947
+ realtimeInput: {
2948
+ video: blob
2940
2949
  }
2950
+ };
2951
+ this.webSocketHandler.send(JSON.stringify(message));
2952
+ }
2953
+ /**
2954
+ * Sends function responses to the server.
2955
+ *
2956
+ * @param functionResponses - The function responses to send.
2957
+ * @throws If this session has been closed.
2958
+ *
2959
+ * @beta
2960
+ */
2961
+ async sendFunctionResponses(functionResponses) {
2962
+ if (this.isClosed) {
2963
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2941
2964
  }
2965
+ const message = {
2966
+ toolResponse: {
2967
+ functionResponses
2968
+ }
2969
+ };
2970
+ this.webSocketHandler.send(JSON.stringify(message));
2942
2971
  }
2943
2972
  /**
2944
2973
  * Yields messages received from the server.
@@ -2996,6 +3025,62 @@ class LiveSession {
2996
3025
  await this.webSocketHandler.close(1000, 'Client closed session.');
2997
3026
  }
2998
3027
  }
3028
+ /**
3029
+ * Sends realtime input to the server.
3030
+ *
3031
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
3032
+ *
3033
+ * @param mediaChunks - The media chunks to send.
3034
+ * @throws If this session has been closed.
3035
+ *
3036
+ * @beta
3037
+ */
3038
+ async sendMediaChunks(mediaChunks) {
3039
+ if (this.isClosed) {
3040
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
3041
+ }
3042
+ // The backend does not support sending more than one mediaChunk in one message.
3043
+ // Work around this limitation by sending mediaChunks in separate messages.
3044
+ mediaChunks.forEach(mediaChunk => {
3045
+ const message = {
3046
+ realtimeInput: { mediaChunks: [mediaChunk] }
3047
+ };
3048
+ this.webSocketHandler.send(JSON.stringify(message));
3049
+ });
3050
+ }
3051
+ /**
3052
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
3053
+ *
3054
+ * Sends a stream of {@link GenerativeContentBlob}.
3055
+ *
3056
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
3057
+ * @throws If this session has been closed.
3058
+ *
3059
+ * @beta
3060
+ */
3061
+ async sendMediaStream(mediaChunkStream) {
3062
+ if (this.isClosed) {
3063
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
3064
+ }
3065
+ const reader = mediaChunkStream.getReader();
3066
+ while (true) {
3067
+ try {
3068
+ const { done, value } = await reader.read();
3069
+ if (done) {
3070
+ break;
3071
+ }
3072
+ else if (!value) {
3073
+ throw new Error('Missing chunk in reader, but reader is not done.');
3074
+ }
3075
+ await this.sendMediaChunks([value]);
3076
+ }
3077
+ catch (e) {
3078
+ // Re-throw any errors that occur during stream consumption or sending.
3079
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
3080
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
3081
+ }
3082
+ }
3083
+ }
2999
3084
  }
3000
3085
 
3001
3086
  /**
@@ -3056,13 +3141,18 @@ class LiveGenerativeModel extends AIModel {
3056
3141
  else {
3057
3142
  fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
3058
3143
  }
3144
+ // inputAudioTranscription and outputAudioTranscription are on the generation config in the public API,
3145
+ // but the backend expects them to be in the `setup` message.
3146
+ const { inputAudioTranscription, outputAudioTranscription, ...generationConfig } = this.generationConfig;
3059
3147
  const setupMessage = {
3060
3148
  setup: {
3061
3149
  model: fullModelPath,
3062
- generationConfig: this.generationConfig,
3150
+ generationConfig,
3063
3151
  tools: this.tools,
3064
3152
  toolConfig: this.toolConfig,
3065
- systemInstruction: this.systemInstruction
3153
+ systemInstruction: this.systemInstruction,
3154
+ inputAudioTranscription,
3155
+ outputAudioTranscription
3066
3156
  }
3067
3157
  };
3068
3158
  try {
@@ -3768,7 +3858,7 @@ class AudioConversationRunner {
3768
3858
  mimeType: 'audio/pcm',
3769
3859
  data: base64
3770
3860
  };
3771
- void this.liveSession.sendMediaChunks([chunk]);
3861
+ void this.liveSession.sendAudioRealtime(chunk);
3772
3862
  };
3773
3863
  }
3774
3864
  /**