@firebase/ai 2.4.0-canary.91c218db2 → 2.4.0-canary.bc5a7c4a7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -192,6 +192,12 @@ export declare interface AudioConversationController {
192
192
  stop: () => Promise<void>;
193
193
  }
194
194
 
195
+ /**
196
+ * The audio transcription configuration.
197
+ */
198
+ export declare interface AudioTranscriptionConfig {
199
+ }
200
+
195
201
  /**
196
202
  * Abstract base class representing the configuration for an AI service backend.
197
203
  * This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
@@ -2020,6 +2026,24 @@ export declare interface LiveGenerationConfig {
2020
2026
  * The modalities of the response.
2021
2027
  */
2022
2028
  responseModalities?: ResponseModality[];
2029
+ /**
2030
+ * Enables transcription of audio input.
2031
+ *
2032
+ * When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
2033
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2034
+ * messages, so you may only receive small amounts of text per message. For example, if you ask the model
2035
+ * "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
2036
+ */
2037
+ inputAudioTranscription?: AudioTranscriptionConfig;
2038
+ /**
2039
+ * Enables transcription of audio input.
2040
+ *
2041
+ * When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
2042
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2043
+ * messages, so you may only receive small amounts of text per message. For example, if the model says
2044
+ * "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
2045
+ */
2046
+ outputAudioTranscription?: AudioTranscriptionConfig;
2023
2047
  }
2024
2048
 
2025
2049
  /**
@@ -2101,6 +2125,14 @@ export declare interface LiveServerContent {
2101
2125
  * model was not interrupted.
2102
2126
  */
2103
2127
  interrupted?: boolean;
2128
+ /**
2129
+ * Transcription of the audio that was input to the model.
2130
+ */
2131
+ inputTranscription?: Transcription;
2132
+ /**
2133
+ * Transcription of the audio output from the model.
2134
+ */
2135
+ outputTranscription?: Transcription;
2104
2136
  }
2105
2137
 
2106
2138
  /**
@@ -2163,32 +2195,65 @@ export declare class LiveSession {
2163
2195
  */
2164
2196
  send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
2165
2197
  /**
2166
- * Sends realtime input to the server.
2198
+ * Sends text to the server in realtime.
2167
2199
  *
2168
- * @param mediaChunks - The media chunks to send.
2200
+ * @example
2201
+ * ```javascript
2202
+ * liveSession.sendTextRealtime("Hello, how are you?");
2203
+ * ```
2204
+ *
2205
+ * @param text - The text data to send.
2169
2206
  * @throws If this session has been closed.
2170
2207
  *
2171
2208
  * @beta
2172
2209
  */
2173
- sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2210
+ sendTextRealtime(text: string): Promise<void>;
2174
2211
  /**
2175
- * Sends function responses to the server.
2212
+ * Sends audio data to the server in realtime.
2176
2213
  *
2177
- * @param functionResponses - The function responses to send.
2214
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2215
+ * little-endian.
2216
+ *
2217
+ * @example
2218
+ * ```javascript
2219
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2220
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2221
+ * liveSession.sendAudioRealtime(blob);
2222
+ * ```
2223
+ *
2224
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2178
2225
  * @throws If this session has been closed.
2179
2226
  *
2180
2227
  * @beta
2181
2228
  */
2182
- sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2229
+ sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
2183
2230
  /**
2184
- * Sends a stream of {@link GenerativeContentBlob}.
2231
+ * Sends video data to the server in realtime.
2185
2232
  *
2186
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2233
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2234
+ * is recommended to set `mimeType` to `image/jpeg`.
2235
+ *
2236
+ * @example
2237
+ * ```javascript
2238
+ * // const videoFrame = ... base64-encoded JPEG data
2239
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2240
+ * liveSession.sendVideoRealtime(blob);
2241
+ * ```
2242
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2187
2243
  * @throws If this session has been closed.
2188
2244
  *
2189
2245
  * @beta
2190
2246
  */
2191
- sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2247
+ sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
2248
+ /**
2249
+ * Sends function responses to the server.
2250
+ *
2251
+ * @param functionResponses - The function responses to send.
2252
+ * @throws If this session has been closed.
2253
+ *
2254
+ * @beta
2255
+ */
2256
+ sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2192
2257
  /**
2193
2258
  * Yields messages received from the server.
2194
2259
  * This can only be used by one consumer at a time.
@@ -2206,6 +2271,28 @@ export declare class LiveSession {
2206
2271
  * @beta
2207
2272
  */
2208
2273
  close(): Promise<void>;
2274
+ /**
2275
+ * Sends realtime input to the server.
2276
+ *
2277
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2278
+ *
2279
+ * @param mediaChunks - The media chunks to send.
2280
+ * @throws If this session has been closed.
2281
+ *
2282
+ * @beta
2283
+ */
2284
+ sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2285
+ /**
2286
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2287
+ *
2288
+ * Sends a stream of {@link GenerativeContentBlob}.
2289
+ *
2290
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2291
+ * @throws If this session has been closed.
2292
+ *
2293
+ * @beta
2294
+ */
2295
+ sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2209
2296
  }
2210
2297
 
2211
2298
  /**
@@ -2897,6 +2984,20 @@ export declare interface ToolConfig {
2897
2984
  functionCallingConfig?: FunctionCallingConfig;
2898
2985
  }
2899
2986
 
2987
+ /**
2988
+ * Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
2989
+ * is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
2990
+ * the {@link LiveGenerationConfig}.
2991
+ *
2992
+ * @beta
2993
+ */
2994
+ export declare interface Transcription {
2995
+ /**
2996
+ * The text transcription of the audio.
2997
+ */
2998
+ text?: string;
2999
+ }
3000
+
2900
3001
  /**
2901
3002
  * A type that includes all specific Schema types.
2902
3003
  * @public
package/dist/ai.d.ts CHANGED
@@ -232,6 +232,12 @@ export declare interface AudioConversationController {
232
232
  stop: () => Promise<void>;
233
233
  }
234
234
 
235
+ /**
236
+ * The audio transcription configuration.
237
+ */
238
+ export declare interface AudioTranscriptionConfig {
239
+ }
240
+
235
241
  /**
236
242
  * Abstract base class representing the configuration for an AI service backend.
237
243
  * This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
@@ -2135,6 +2141,24 @@ export declare interface LiveGenerationConfig {
2135
2141
  * The modalities of the response.
2136
2142
  */
2137
2143
  responseModalities?: ResponseModality[];
2144
+ /**
2145
+ * Enables transcription of audio input.
2146
+ *
2147
+ * When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
2148
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2149
+ * messages, so you may only receive small amounts of text per message. For example, if you ask the model
2150
+ * "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
2151
+ */
2152
+ inputAudioTranscription?: AudioTranscriptionConfig;
2153
+ /**
2154
+ * Enables transcription of audio input.
2155
+ *
2156
+ * When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
2157
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2158
+ * messages, so you may only receive small amounts of text per message. For example, if the model says
2159
+ * "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
2160
+ */
2161
+ outputAudioTranscription?: AudioTranscriptionConfig;
2138
2162
  }
2139
2163
 
2140
2164
  /**
@@ -2226,6 +2250,14 @@ export declare interface LiveServerContent {
2226
2250
  * model was not interrupted.
2227
2251
  */
2228
2252
  interrupted?: boolean;
2253
+ /**
2254
+ * Transcription of the audio that was input to the model.
2255
+ */
2256
+ inputTranscription?: Transcription;
2257
+ /**
2258
+ * Transcription of the audio output from the model.
2259
+ */
2260
+ outputTranscription?: Transcription;
2229
2261
  }
2230
2262
 
2231
2263
  /**
@@ -2291,32 +2323,65 @@ export declare class LiveSession {
2291
2323
  */
2292
2324
  send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
2293
2325
  /**
2294
- * Sends realtime input to the server.
2326
+ * Sends text to the server in realtime.
2295
2327
  *
2296
- * @param mediaChunks - The media chunks to send.
2328
+ * @example
2329
+ * ```javascript
2330
+ * liveSession.sendTextRealtime("Hello, how are you?");
2331
+ * ```
2332
+ *
2333
+ * @param text - The text data to send.
2297
2334
  * @throws If this session has been closed.
2298
2335
  *
2299
2336
  * @beta
2300
2337
  */
2301
- sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2338
+ sendTextRealtime(text: string): Promise<void>;
2302
2339
  /**
2303
- * Sends function responses to the server.
2340
+ * Sends audio data to the server in realtime.
2304
2341
  *
2305
- * @param functionResponses - The function responses to send.
2342
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2343
+ * little-endian.
2344
+ *
2345
+ * @example
2346
+ * ```javascript
2347
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2348
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2349
+ * liveSession.sendAudioRealtime(blob);
2350
+ * ```
2351
+ *
2352
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2306
2353
  * @throws If this session has been closed.
2307
2354
  *
2308
2355
  * @beta
2309
2356
  */
2310
- sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2357
+ sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
2311
2358
  /**
2312
- * Sends a stream of {@link GenerativeContentBlob}.
2359
+ * Sends video data to the server in realtime.
2313
2360
  *
2314
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2361
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2362
+ * is recommended to set `mimeType` to `image/jpeg`.
2363
+ *
2364
+ * @example
2365
+ * ```javascript
2366
+ * // const videoFrame = ... base64-encoded JPEG data
2367
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2368
+ * liveSession.sendVideoRealtime(blob);
2369
+ * ```
2370
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2315
2371
  * @throws If this session has been closed.
2316
2372
  *
2317
2373
  * @beta
2318
2374
  */
2319
- sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2375
+ sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
2376
+ /**
2377
+ * Sends function responses to the server.
2378
+ *
2379
+ * @param functionResponses - The function responses to send.
2380
+ * @throws If this session has been closed.
2381
+ *
2382
+ * @beta
2383
+ */
2384
+ sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2320
2385
  /**
2321
2386
  * Yields messages received from the server.
2322
2387
  * This can only be used by one consumer at a time.
@@ -2334,6 +2399,28 @@ export declare class LiveSession {
2334
2399
  * @beta
2335
2400
  */
2336
2401
  close(): Promise<void>;
2402
+ /**
2403
+ * Sends realtime input to the server.
2404
+ *
2405
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2406
+ *
2407
+ * @param mediaChunks - The media chunks to send.
2408
+ * @throws If this session has been closed.
2409
+ *
2410
+ * @beta
2411
+ */
2412
+ sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2413
+ /**
2414
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2415
+ *
2416
+ * Sends a stream of {@link GenerativeContentBlob}.
2417
+ *
2418
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2419
+ * @throws If this session has been closed.
2420
+ *
2421
+ * @beta
2422
+ */
2423
+ sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2337
2424
  }
2338
2425
 
2339
2426
  /**
@@ -3039,6 +3126,20 @@ export declare interface ToolConfig {
3039
3126
  functionCallingConfig?: FunctionCallingConfig;
3040
3127
  }
3041
3128
 
3129
+ /**
3130
+ * Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
3131
+ * is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
3132
+ * the {@link LiveGenerationConfig}.
3133
+ *
3134
+ * @beta
3135
+ */
3136
+ export declare interface Transcription {
3137
+ /**
3138
+ * The text transcription of the audio.
3139
+ */
3140
+ text?: string;
3141
+ }
3142
+
3042
3143
  /**
3043
3144
  * A type that includes all specific Schema types.
3044
3145
  * @public
@@ -4,7 +4,7 @@ import { FirebaseError, Deferred, getModularInstance } from '@firebase/util';
4
4
  import { Logger } from '@firebase/logger';
5
5
 
6
6
  var name = "@firebase/ai";
7
- var version = "2.4.0-canary.91c218db2";
7
+ var version = "2.4.0-canary.bc5a7c4a7";
8
8
 
9
9
  /**
10
10
  * @license
@@ -2870,75 +2870,104 @@ class LiveSession {
2870
2870
  this.webSocketHandler.send(JSON.stringify(message));
2871
2871
  }
2872
2872
  /**
2873
- * Sends realtime input to the server.
2873
+ * Sends text to the server in realtime.
2874
2874
  *
2875
- * @param mediaChunks - The media chunks to send.
2875
+ * @example
2876
+ * ```javascript
2877
+ * liveSession.sendTextRealtime("Hello, how are you?");
2878
+ * ```
2879
+ *
2880
+ * @param text - The text data to send.
2876
2881
  * @throws If this session has been closed.
2877
2882
  *
2878
2883
  * @beta
2879
2884
  */
2880
- async sendMediaChunks(mediaChunks) {
2885
+ async sendTextRealtime(text) {
2881
2886
  if (this.isClosed) {
2882
2887
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2883
2888
  }
2884
- // The backend does not support sending more than one mediaChunk in one message.
2885
- // Work around this limitation by sending mediaChunks in separate messages.
2886
- mediaChunks.forEach(mediaChunk => {
2887
- const message = {
2888
- realtimeInput: { mediaChunks: [mediaChunk] }
2889
- };
2890
- this.webSocketHandler.send(JSON.stringify(message));
2891
- });
2889
+ const message = {
2890
+ realtimeInput: {
2891
+ text
2892
+ }
2893
+ };
2894
+ this.webSocketHandler.send(JSON.stringify(message));
2892
2895
  }
2893
2896
  /**
2894
- * Sends function responses to the server.
2897
+ * Sends audio data to the server in realtime.
2895
2898
  *
2896
- * @param functionResponses - The function responses to send.
2899
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2900
+ * little-endian.
2901
+ *
2902
+ * @example
2903
+ * ```javascript
2904
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2905
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2906
+ * liveSession.sendAudioRealtime(blob);
2907
+ * ```
2908
+ *
2909
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2897
2910
  * @throws If this session has been closed.
2898
2911
  *
2899
2912
  * @beta
2900
2913
  */
2901
- async sendFunctionResponses(functionResponses) {
2914
+ async sendAudioRealtime(blob) {
2902
2915
  if (this.isClosed) {
2903
2916
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2904
2917
  }
2905
2918
  const message = {
2906
- toolResponse: {
2907
- functionResponses
2919
+ realtimeInput: {
2920
+ audio: blob
2908
2921
  }
2909
2922
  };
2910
2923
  this.webSocketHandler.send(JSON.stringify(message));
2911
2924
  }
2912
2925
  /**
2913
- * Sends a stream of {@link GenerativeContentBlob}.
2926
+ * Sends video data to the server in realtime.
2914
2927
  *
2915
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2928
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2929
+ * is recommended to set `mimeType` to `image/jpeg`.
2930
+ *
2931
+ * @example
2932
+ * ```javascript
2933
+ * // const videoFrame = ... base64-encoded JPEG data
2934
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2935
+ * liveSession.sendVideoRealtime(blob);
2936
+ * ```
2937
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2916
2938
  * @throws If this session has been closed.
2917
2939
  *
2918
2940
  * @beta
2919
2941
  */
2920
- async sendMediaStream(mediaChunkStream) {
2942
+ async sendVideoRealtime(blob) {
2921
2943
  if (this.isClosed) {
2922
2944
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2923
2945
  }
2924
- const reader = mediaChunkStream.getReader();
2925
- while (true) {
2926
- try {
2927
- const { done, value } = await reader.read();
2928
- if (done) {
2929
- break;
2930
- }
2931
- else if (!value) {
2932
- throw new Error('Missing chunk in reader, but reader is not done.');
2933
- }
2934
- await this.sendMediaChunks([value]);
2935
- }
2936
- catch (e) {
2937
- // Re-throw any errors that occur during stream consumption or sending.
2938
- const message = e instanceof Error ? e.message : 'Error processing media stream.';
2939
- throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2946
+ const message = {
2947
+ realtimeInput: {
2948
+ video: blob
2940
2949
  }
2950
+ };
2951
+ this.webSocketHandler.send(JSON.stringify(message));
2952
+ }
2953
+ /**
2954
+ * Sends function responses to the server.
2955
+ *
2956
+ * @param functionResponses - The function responses to send.
2957
+ * @throws If this session has been closed.
2958
+ *
2959
+ * @beta
2960
+ */
2961
+ async sendFunctionResponses(functionResponses) {
2962
+ if (this.isClosed) {
2963
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2941
2964
  }
2965
+ const message = {
2966
+ toolResponse: {
2967
+ functionResponses
2968
+ }
2969
+ };
2970
+ this.webSocketHandler.send(JSON.stringify(message));
2942
2971
  }
2943
2972
  /**
2944
2973
  * Yields messages received from the server.
@@ -2996,6 +3025,62 @@ class LiveSession {
2996
3025
  await this.webSocketHandler.close(1000, 'Client closed session.');
2997
3026
  }
2998
3027
  }
3028
+ /**
3029
+ * Sends realtime input to the server.
3030
+ *
3031
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
3032
+ *
3033
+ * @param mediaChunks - The media chunks to send.
3034
+ * @throws If this session has been closed.
3035
+ *
3036
+ * @beta
3037
+ */
3038
+ async sendMediaChunks(mediaChunks) {
3039
+ if (this.isClosed) {
3040
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
3041
+ }
3042
+ // The backend does not support sending more than one mediaChunk in one message.
3043
+ // Work around this limitation by sending mediaChunks in separate messages.
3044
+ mediaChunks.forEach(mediaChunk => {
3045
+ const message = {
3046
+ realtimeInput: { mediaChunks: [mediaChunk] }
3047
+ };
3048
+ this.webSocketHandler.send(JSON.stringify(message));
3049
+ });
3050
+ }
3051
+ /**
3052
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
3053
+ *
3054
+ * Sends a stream of {@link GenerativeContentBlob}.
3055
+ *
3056
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
3057
+ * @throws If this session has been closed.
3058
+ *
3059
+ * @beta
3060
+ */
3061
+ async sendMediaStream(mediaChunkStream) {
3062
+ if (this.isClosed) {
3063
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
3064
+ }
3065
+ const reader = mediaChunkStream.getReader();
3066
+ while (true) {
3067
+ try {
3068
+ const { done, value } = await reader.read();
3069
+ if (done) {
3070
+ break;
3071
+ }
3072
+ else if (!value) {
3073
+ throw new Error('Missing chunk in reader, but reader is not done.');
3074
+ }
3075
+ await this.sendMediaChunks([value]);
3076
+ }
3077
+ catch (e) {
3078
+ // Re-throw any errors that occur during stream consumption or sending.
3079
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
3080
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
3081
+ }
3082
+ }
3083
+ }
2999
3084
  }
3000
3085
 
3001
3086
  /**
@@ -3056,13 +3141,18 @@ class LiveGenerativeModel extends AIModel {
3056
3141
  else {
3057
3142
  fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
3058
3143
  }
3144
+ // inputAudioTranscription and outputAudioTranscription are on the generation config in the public API,
3145
+ // but the backend expects them to be in the `setup` message.
3146
+ const { inputAudioTranscription, outputAudioTranscription, ...generationConfig } = this.generationConfig;
3059
3147
  const setupMessage = {
3060
3148
  setup: {
3061
3149
  model: fullModelPath,
3062
- generationConfig: this.generationConfig,
3150
+ generationConfig,
3063
3151
  tools: this.tools,
3064
3152
  toolConfig: this.toolConfig,
3065
- systemInstruction: this.systemInstruction
3153
+ systemInstruction: this.systemInstruction,
3154
+ inputAudioTranscription,
3155
+ outputAudioTranscription
3066
3156
  }
3067
3157
  };
3068
3158
  try {
@@ -3768,7 +3858,7 @@ class AudioConversationRunner {
3768
3858
  mimeType: 'audio/pcm',
3769
3859
  data: base64
3770
3860
  };
3771
- void this.liveSession.sendMediaChunks([chunk]);
3861
+ void this.liveSession.sendAudioRealtime(chunk);
3772
3862
  };
3773
3863
  }
3774
3864
  /**