@firebase/ai 2.4.0-canary.91c218db2 → 2.4.0-canary.bc5a7c4a7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-public.d.ts +110 -9
- package/dist/ai.d.ts +110 -9
- package/dist/esm/index.esm.js +129 -39
- package/dist/esm/index.esm.js.map +1 -1
- package/dist/esm/src/methods/live-session.d.ts +64 -9
- package/dist/esm/src/types/live-responses.d.ts +21 -3
- package/dist/esm/src/types/requests.d.ts +23 -0
- package/dist/esm/src/types/responses.d.ts +21 -0
- package/dist/index.cjs.js +129 -39
- package/dist/index.cjs.js.map +1 -1
- package/dist/index.node.cjs.js +129 -39
- package/dist/index.node.cjs.js.map +1 -1
- package/dist/index.node.mjs +129 -39
- package/dist/index.node.mjs.map +1 -1
- package/dist/src/methods/live-session.d.ts +64 -9
- package/dist/src/types/live-responses.d.ts +21 -3
- package/dist/src/types/requests.d.ts +23 -0
- package/dist/src/types/responses.d.ts +21 -0
- package/package.json +8 -8
package/dist/ai-public.d.ts
CHANGED
|
@@ -192,6 +192,12 @@ export declare interface AudioConversationController {
|
|
|
192
192
|
stop: () => Promise<void>;
|
|
193
193
|
}
|
|
194
194
|
|
|
195
|
+
/**
|
|
196
|
+
* The audio transcription configuration.
|
|
197
|
+
*/
|
|
198
|
+
export declare interface AudioTranscriptionConfig {
|
|
199
|
+
}
|
|
200
|
+
|
|
195
201
|
/**
|
|
196
202
|
* Abstract base class representing the configuration for an AI service backend.
|
|
197
203
|
* This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
|
|
@@ -2020,6 +2026,24 @@ export declare interface LiveGenerationConfig {
|
|
|
2020
2026
|
* The modalities of the response.
|
|
2021
2027
|
*/
|
|
2022
2028
|
responseModalities?: ResponseModality[];
|
|
2029
|
+
/**
|
|
2030
|
+
* Enables transcription of audio input.
|
|
2031
|
+
*
|
|
2032
|
+
* When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
|
|
2033
|
+
* in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
|
|
2034
|
+
* messages, so you may only receive small amounts of text per message. For example, if you ask the model
|
|
2035
|
+
* "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
|
|
2036
|
+
*/
|
|
2037
|
+
inputAudioTranscription?: AudioTranscriptionConfig;
|
|
2038
|
+
/**
|
|
2039
|
+
* Enables transcription of audio input.
|
|
2040
|
+
*
|
|
2041
|
+
* When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
|
|
2042
|
+
* in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
|
|
2043
|
+
* messages, so you may only receive small amounts of text per message. For example, if the model says
|
|
2044
|
+
* "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
|
|
2045
|
+
*/
|
|
2046
|
+
outputAudioTranscription?: AudioTranscriptionConfig;
|
|
2023
2047
|
}
|
|
2024
2048
|
|
|
2025
2049
|
/**
|
|
@@ -2101,6 +2125,14 @@ export declare interface LiveServerContent {
|
|
|
2101
2125
|
* model was not interrupted.
|
|
2102
2126
|
*/
|
|
2103
2127
|
interrupted?: boolean;
|
|
2128
|
+
/**
|
|
2129
|
+
* Transcription of the audio that was input to the model.
|
|
2130
|
+
*/
|
|
2131
|
+
inputTranscription?: Transcription;
|
|
2132
|
+
/**
|
|
2133
|
+
* Transcription of the audio output from the model.
|
|
2134
|
+
*/
|
|
2135
|
+
outputTranscription?: Transcription;
|
|
2104
2136
|
}
|
|
2105
2137
|
|
|
2106
2138
|
/**
|
|
@@ -2163,32 +2195,65 @@ export declare class LiveSession {
|
|
|
2163
2195
|
*/
|
|
2164
2196
|
send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
|
|
2165
2197
|
/**
|
|
2166
|
-
* Sends
|
|
2198
|
+
* Sends text to the server in realtime.
|
|
2167
2199
|
*
|
|
2168
|
-
* @
|
|
2200
|
+
* @example
|
|
2201
|
+
* ```javascript
|
|
2202
|
+
* liveSession.sendTextRealtime("Hello, how are you?");
|
|
2203
|
+
* ```
|
|
2204
|
+
*
|
|
2205
|
+
* @param text - The text data to send.
|
|
2169
2206
|
* @throws If this session has been closed.
|
|
2170
2207
|
*
|
|
2171
2208
|
* @beta
|
|
2172
2209
|
*/
|
|
2173
|
-
|
|
2210
|
+
sendTextRealtime(text: string): Promise<void>;
|
|
2174
2211
|
/**
|
|
2175
|
-
* Sends
|
|
2212
|
+
* Sends audio data to the server in realtime.
|
|
2176
2213
|
*
|
|
2177
|
-
* @
|
|
2214
|
+
* @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
|
|
2215
|
+
* little-endian.
|
|
2216
|
+
*
|
|
2217
|
+
* @example
|
|
2218
|
+
* ```javascript
|
|
2219
|
+
* // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
|
|
2220
|
+
* const blob = { mimeType: "audio/pcm", data: pcmData };
|
|
2221
|
+
* liveSession.sendAudioRealtime(blob);
|
|
2222
|
+
* ```
|
|
2223
|
+
*
|
|
2224
|
+
* @param blob - The base64-encoded PCM data to send to the server in realtime.
|
|
2178
2225
|
* @throws If this session has been closed.
|
|
2179
2226
|
*
|
|
2180
2227
|
* @beta
|
|
2181
2228
|
*/
|
|
2182
|
-
|
|
2229
|
+
sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
|
|
2183
2230
|
/**
|
|
2184
|
-
* Sends
|
|
2231
|
+
* Sends video data to the server in realtime.
|
|
2185
2232
|
*
|
|
2186
|
-
* @
|
|
2233
|
+
* @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
|
|
2234
|
+
* is recommended to set `mimeType` to `image/jpeg`.
|
|
2235
|
+
*
|
|
2236
|
+
* @example
|
|
2237
|
+
* ```javascript
|
|
2238
|
+
* // const videoFrame = ... base64-encoded JPEG data
|
|
2239
|
+
* const blob = { mimeType: "image/jpeg", data: videoFrame };
|
|
2240
|
+
* liveSession.sendVideoRealtime(blob);
|
|
2241
|
+
* ```
|
|
2242
|
+
* @param blob - The base64-encoded video data to send to the server in realtime.
|
|
2187
2243
|
* @throws If this session has been closed.
|
|
2188
2244
|
*
|
|
2189
2245
|
* @beta
|
|
2190
2246
|
*/
|
|
2191
|
-
|
|
2247
|
+
sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
|
|
2248
|
+
/**
|
|
2249
|
+
* Sends function responses to the server.
|
|
2250
|
+
*
|
|
2251
|
+
* @param functionResponses - The function responses to send.
|
|
2252
|
+
* @throws If this session has been closed.
|
|
2253
|
+
*
|
|
2254
|
+
* @beta
|
|
2255
|
+
*/
|
|
2256
|
+
sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
|
|
2192
2257
|
/**
|
|
2193
2258
|
* Yields messages received from the server.
|
|
2194
2259
|
* This can only be used by one consumer at a time.
|
|
@@ -2206,6 +2271,28 @@ export declare class LiveSession {
|
|
|
2206
2271
|
* @beta
|
|
2207
2272
|
*/
|
|
2208
2273
|
close(): Promise<void>;
|
|
2274
|
+
/**
|
|
2275
|
+
* Sends realtime input to the server.
|
|
2276
|
+
*
|
|
2277
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
2278
|
+
*
|
|
2279
|
+
* @param mediaChunks - The media chunks to send.
|
|
2280
|
+
* @throws If this session has been closed.
|
|
2281
|
+
*
|
|
2282
|
+
* @beta
|
|
2283
|
+
*/
|
|
2284
|
+
sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
|
|
2285
|
+
/**
|
|
2286
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
2287
|
+
*
|
|
2288
|
+
* Sends a stream of {@link GenerativeContentBlob}.
|
|
2289
|
+
*
|
|
2290
|
+
* @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
|
|
2291
|
+
* @throws If this session has been closed.
|
|
2292
|
+
*
|
|
2293
|
+
* @beta
|
|
2294
|
+
*/
|
|
2295
|
+
sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
|
|
2209
2296
|
}
|
|
2210
2297
|
|
|
2211
2298
|
/**
|
|
@@ -2897,6 +2984,20 @@ export declare interface ToolConfig {
|
|
|
2897
2984
|
functionCallingConfig?: FunctionCallingConfig;
|
|
2898
2985
|
}
|
|
2899
2986
|
|
|
2987
|
+
/**
|
|
2988
|
+
* Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
|
|
2989
|
+
* is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
|
|
2990
|
+
* the {@link LiveGenerationConfig}.
|
|
2991
|
+
*
|
|
2992
|
+
* @beta
|
|
2993
|
+
*/
|
|
2994
|
+
export declare interface Transcription {
|
|
2995
|
+
/**
|
|
2996
|
+
* The text transcription of the audio.
|
|
2997
|
+
*/
|
|
2998
|
+
text?: string;
|
|
2999
|
+
}
|
|
3000
|
+
|
|
2900
3001
|
/**
|
|
2901
3002
|
* A type that includes all specific Schema types.
|
|
2902
3003
|
* @public
|
package/dist/ai.d.ts
CHANGED
|
@@ -232,6 +232,12 @@ export declare interface AudioConversationController {
|
|
|
232
232
|
stop: () => Promise<void>;
|
|
233
233
|
}
|
|
234
234
|
|
|
235
|
+
/**
|
|
236
|
+
* The audio transcription configuration.
|
|
237
|
+
*/
|
|
238
|
+
export declare interface AudioTranscriptionConfig {
|
|
239
|
+
}
|
|
240
|
+
|
|
235
241
|
/**
|
|
236
242
|
* Abstract base class representing the configuration for an AI service backend.
|
|
237
243
|
* This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
|
|
@@ -2135,6 +2141,24 @@ export declare interface LiveGenerationConfig {
|
|
|
2135
2141
|
* The modalities of the response.
|
|
2136
2142
|
*/
|
|
2137
2143
|
responseModalities?: ResponseModality[];
|
|
2144
|
+
/**
|
|
2145
|
+
* Enables transcription of audio input.
|
|
2146
|
+
*
|
|
2147
|
+
* When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
|
|
2148
|
+
* in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
|
|
2149
|
+
* messages, so you may only receive small amounts of text per message. For example, if you ask the model
|
|
2150
|
+
* "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
|
|
2151
|
+
*/
|
|
2152
|
+
inputAudioTranscription?: AudioTranscriptionConfig;
|
|
2153
|
+
/**
|
|
2154
|
+
* Enables transcription of audio input.
|
|
2155
|
+
*
|
|
2156
|
+
* When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
|
|
2157
|
+
* in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
|
|
2158
|
+
* messages, so you may only receive small amounts of text per message. For example, if the model says
|
|
2159
|
+
* "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
|
|
2160
|
+
*/
|
|
2161
|
+
outputAudioTranscription?: AudioTranscriptionConfig;
|
|
2138
2162
|
}
|
|
2139
2163
|
|
|
2140
2164
|
/**
|
|
@@ -2226,6 +2250,14 @@ export declare interface LiveServerContent {
|
|
|
2226
2250
|
* model was not interrupted.
|
|
2227
2251
|
*/
|
|
2228
2252
|
interrupted?: boolean;
|
|
2253
|
+
/**
|
|
2254
|
+
* Transcription of the audio that was input to the model.
|
|
2255
|
+
*/
|
|
2256
|
+
inputTranscription?: Transcription;
|
|
2257
|
+
/**
|
|
2258
|
+
* Transcription of the audio output from the model.
|
|
2259
|
+
*/
|
|
2260
|
+
outputTranscription?: Transcription;
|
|
2229
2261
|
}
|
|
2230
2262
|
|
|
2231
2263
|
/**
|
|
@@ -2291,32 +2323,65 @@ export declare class LiveSession {
|
|
|
2291
2323
|
*/
|
|
2292
2324
|
send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
|
|
2293
2325
|
/**
|
|
2294
|
-
* Sends
|
|
2326
|
+
* Sends text to the server in realtime.
|
|
2295
2327
|
*
|
|
2296
|
-
* @
|
|
2328
|
+
* @example
|
|
2329
|
+
* ```javascript
|
|
2330
|
+
* liveSession.sendTextRealtime("Hello, how are you?");
|
|
2331
|
+
* ```
|
|
2332
|
+
*
|
|
2333
|
+
* @param text - The text data to send.
|
|
2297
2334
|
* @throws If this session has been closed.
|
|
2298
2335
|
*
|
|
2299
2336
|
* @beta
|
|
2300
2337
|
*/
|
|
2301
|
-
|
|
2338
|
+
sendTextRealtime(text: string): Promise<void>;
|
|
2302
2339
|
/**
|
|
2303
|
-
* Sends
|
|
2340
|
+
* Sends audio data to the server in realtime.
|
|
2304
2341
|
*
|
|
2305
|
-
* @
|
|
2342
|
+
* @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
|
|
2343
|
+
* little-endian.
|
|
2344
|
+
*
|
|
2345
|
+
* @example
|
|
2346
|
+
* ```javascript
|
|
2347
|
+
* // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
|
|
2348
|
+
* const blob = { mimeType: "audio/pcm", data: pcmData };
|
|
2349
|
+
* liveSession.sendAudioRealtime(blob);
|
|
2350
|
+
* ```
|
|
2351
|
+
*
|
|
2352
|
+
* @param blob - The base64-encoded PCM data to send to the server in realtime.
|
|
2306
2353
|
* @throws If this session has been closed.
|
|
2307
2354
|
*
|
|
2308
2355
|
* @beta
|
|
2309
2356
|
*/
|
|
2310
|
-
|
|
2357
|
+
sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
|
|
2311
2358
|
/**
|
|
2312
|
-
* Sends
|
|
2359
|
+
* Sends video data to the server in realtime.
|
|
2313
2360
|
*
|
|
2314
|
-
* @
|
|
2361
|
+
* @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
|
|
2362
|
+
* is recommended to set `mimeType` to `image/jpeg`.
|
|
2363
|
+
*
|
|
2364
|
+
* @example
|
|
2365
|
+
* ```javascript
|
|
2366
|
+
* // const videoFrame = ... base64-encoded JPEG data
|
|
2367
|
+
* const blob = { mimeType: "image/jpeg", data: videoFrame };
|
|
2368
|
+
* liveSession.sendVideoRealtime(blob);
|
|
2369
|
+
* ```
|
|
2370
|
+
* @param blob - The base64-encoded video data to send to the server in realtime.
|
|
2315
2371
|
* @throws If this session has been closed.
|
|
2316
2372
|
*
|
|
2317
2373
|
* @beta
|
|
2318
2374
|
*/
|
|
2319
|
-
|
|
2375
|
+
sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
|
|
2376
|
+
/**
|
|
2377
|
+
* Sends function responses to the server.
|
|
2378
|
+
*
|
|
2379
|
+
* @param functionResponses - The function responses to send.
|
|
2380
|
+
* @throws If this session has been closed.
|
|
2381
|
+
*
|
|
2382
|
+
* @beta
|
|
2383
|
+
*/
|
|
2384
|
+
sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
|
|
2320
2385
|
/**
|
|
2321
2386
|
* Yields messages received from the server.
|
|
2322
2387
|
* This can only be used by one consumer at a time.
|
|
@@ -2334,6 +2399,28 @@ export declare class LiveSession {
|
|
|
2334
2399
|
* @beta
|
|
2335
2400
|
*/
|
|
2336
2401
|
close(): Promise<void>;
|
|
2402
|
+
/**
|
|
2403
|
+
* Sends realtime input to the server.
|
|
2404
|
+
*
|
|
2405
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
2406
|
+
*
|
|
2407
|
+
* @param mediaChunks - The media chunks to send.
|
|
2408
|
+
* @throws If this session has been closed.
|
|
2409
|
+
*
|
|
2410
|
+
* @beta
|
|
2411
|
+
*/
|
|
2412
|
+
sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
|
|
2413
|
+
/**
|
|
2414
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
2415
|
+
*
|
|
2416
|
+
* Sends a stream of {@link GenerativeContentBlob}.
|
|
2417
|
+
*
|
|
2418
|
+
* @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
|
|
2419
|
+
* @throws If this session has been closed.
|
|
2420
|
+
*
|
|
2421
|
+
* @beta
|
|
2422
|
+
*/
|
|
2423
|
+
sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
|
|
2337
2424
|
}
|
|
2338
2425
|
|
|
2339
2426
|
/**
|
|
@@ -3039,6 +3126,20 @@ export declare interface ToolConfig {
|
|
|
3039
3126
|
functionCallingConfig?: FunctionCallingConfig;
|
|
3040
3127
|
}
|
|
3041
3128
|
|
|
3129
|
+
/**
|
|
3130
|
+
* Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
|
|
3131
|
+
* is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
|
|
3132
|
+
* the {@link LiveGenerationConfig}.
|
|
3133
|
+
*
|
|
3134
|
+
* @beta
|
|
3135
|
+
*/
|
|
3136
|
+
export declare interface Transcription {
|
|
3137
|
+
/**
|
|
3138
|
+
* The text transcription of the audio.
|
|
3139
|
+
*/
|
|
3140
|
+
text?: string;
|
|
3141
|
+
}
|
|
3142
|
+
|
|
3042
3143
|
/**
|
|
3043
3144
|
* A type that includes all specific Schema types.
|
|
3044
3145
|
* @public
|
package/dist/esm/index.esm.js
CHANGED
|
@@ -4,7 +4,7 @@ import { FirebaseError, Deferred, getModularInstance } from '@firebase/util';
|
|
|
4
4
|
import { Logger } from '@firebase/logger';
|
|
5
5
|
|
|
6
6
|
var name = "@firebase/ai";
|
|
7
|
-
var version = "2.4.0-canary.
|
|
7
|
+
var version = "2.4.0-canary.bc5a7c4a7";
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* @license
|
|
@@ -2870,75 +2870,104 @@ class LiveSession {
|
|
|
2870
2870
|
this.webSocketHandler.send(JSON.stringify(message));
|
|
2871
2871
|
}
|
|
2872
2872
|
/**
|
|
2873
|
-
* Sends
|
|
2873
|
+
* Sends text to the server in realtime.
|
|
2874
2874
|
*
|
|
2875
|
-
* @
|
|
2875
|
+
* @example
|
|
2876
|
+
* ```javascript
|
|
2877
|
+
* liveSession.sendTextRealtime("Hello, how are you?");
|
|
2878
|
+
* ```
|
|
2879
|
+
*
|
|
2880
|
+
* @param text - The text data to send.
|
|
2876
2881
|
* @throws If this session has been closed.
|
|
2877
2882
|
*
|
|
2878
2883
|
* @beta
|
|
2879
2884
|
*/
|
|
2880
|
-
async
|
|
2885
|
+
async sendTextRealtime(text) {
|
|
2881
2886
|
if (this.isClosed) {
|
|
2882
2887
|
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2883
2888
|
}
|
|
2884
|
-
|
|
2885
|
-
|
|
2886
|
-
|
|
2887
|
-
|
|
2888
|
-
|
|
2889
|
-
|
|
2890
|
-
this.webSocketHandler.send(JSON.stringify(message));
|
|
2891
|
-
});
|
|
2889
|
+
const message = {
|
|
2890
|
+
realtimeInput: {
|
|
2891
|
+
text
|
|
2892
|
+
}
|
|
2893
|
+
};
|
|
2894
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
2892
2895
|
}
|
|
2893
2896
|
/**
|
|
2894
|
-
* Sends
|
|
2897
|
+
* Sends audio data to the server in realtime.
|
|
2895
2898
|
*
|
|
2896
|
-
* @
|
|
2899
|
+
* @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
|
|
2900
|
+
* little-endian.
|
|
2901
|
+
*
|
|
2902
|
+
* @example
|
|
2903
|
+
* ```javascript
|
|
2904
|
+
* // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
|
|
2905
|
+
* const blob = { mimeType: "audio/pcm", data: pcmData };
|
|
2906
|
+
* liveSession.sendAudioRealtime(blob);
|
|
2907
|
+
* ```
|
|
2908
|
+
*
|
|
2909
|
+
* @param blob - The base64-encoded PCM data to send to the server in realtime.
|
|
2897
2910
|
* @throws If this session has been closed.
|
|
2898
2911
|
*
|
|
2899
2912
|
* @beta
|
|
2900
2913
|
*/
|
|
2901
|
-
async
|
|
2914
|
+
async sendAudioRealtime(blob) {
|
|
2902
2915
|
if (this.isClosed) {
|
|
2903
2916
|
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2904
2917
|
}
|
|
2905
2918
|
const message = {
|
|
2906
|
-
|
|
2907
|
-
|
|
2919
|
+
realtimeInput: {
|
|
2920
|
+
audio: blob
|
|
2908
2921
|
}
|
|
2909
2922
|
};
|
|
2910
2923
|
this.webSocketHandler.send(JSON.stringify(message));
|
|
2911
2924
|
}
|
|
2912
2925
|
/**
|
|
2913
|
-
* Sends
|
|
2926
|
+
* Sends video data to the server in realtime.
|
|
2914
2927
|
*
|
|
2915
|
-
* @
|
|
2928
|
+
* @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
|
|
2929
|
+
* is recommended to set `mimeType` to `image/jpeg`.
|
|
2930
|
+
*
|
|
2931
|
+
* @example
|
|
2932
|
+
* ```javascript
|
|
2933
|
+
* // const videoFrame = ... base64-encoded JPEG data
|
|
2934
|
+
* const blob = { mimeType: "image/jpeg", data: videoFrame };
|
|
2935
|
+
* liveSession.sendVideoRealtime(blob);
|
|
2936
|
+
* ```
|
|
2937
|
+
* @param blob - The base64-encoded video data to send to the server in realtime.
|
|
2916
2938
|
* @throws If this session has been closed.
|
|
2917
2939
|
*
|
|
2918
2940
|
* @beta
|
|
2919
2941
|
*/
|
|
2920
|
-
async
|
|
2942
|
+
async sendVideoRealtime(blob) {
|
|
2921
2943
|
if (this.isClosed) {
|
|
2922
2944
|
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2923
2945
|
}
|
|
2924
|
-
const
|
|
2925
|
-
|
|
2926
|
-
|
|
2927
|
-
const { done, value } = await reader.read();
|
|
2928
|
-
if (done) {
|
|
2929
|
-
break;
|
|
2930
|
-
}
|
|
2931
|
-
else if (!value) {
|
|
2932
|
-
throw new Error('Missing chunk in reader, but reader is not done.');
|
|
2933
|
-
}
|
|
2934
|
-
await this.sendMediaChunks([value]);
|
|
2935
|
-
}
|
|
2936
|
-
catch (e) {
|
|
2937
|
-
// Re-throw any errors that occur during stream consumption or sending.
|
|
2938
|
-
const message = e instanceof Error ? e.message : 'Error processing media stream.';
|
|
2939
|
-
throw new AIError(AIErrorCode.REQUEST_ERROR, message);
|
|
2946
|
+
const message = {
|
|
2947
|
+
realtimeInput: {
|
|
2948
|
+
video: blob
|
|
2940
2949
|
}
|
|
2950
|
+
};
|
|
2951
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
2952
|
+
}
|
|
2953
|
+
/**
|
|
2954
|
+
* Sends function responses to the server.
|
|
2955
|
+
*
|
|
2956
|
+
* @param functionResponses - The function responses to send.
|
|
2957
|
+
* @throws If this session has been closed.
|
|
2958
|
+
*
|
|
2959
|
+
* @beta
|
|
2960
|
+
*/
|
|
2961
|
+
async sendFunctionResponses(functionResponses) {
|
|
2962
|
+
if (this.isClosed) {
|
|
2963
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2941
2964
|
}
|
|
2965
|
+
const message = {
|
|
2966
|
+
toolResponse: {
|
|
2967
|
+
functionResponses
|
|
2968
|
+
}
|
|
2969
|
+
};
|
|
2970
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
2942
2971
|
}
|
|
2943
2972
|
/**
|
|
2944
2973
|
* Yields messages received from the server.
|
|
@@ -2996,6 +3025,62 @@ class LiveSession {
|
|
|
2996
3025
|
await this.webSocketHandler.close(1000, 'Client closed session.');
|
|
2997
3026
|
}
|
|
2998
3027
|
}
|
|
3028
|
+
/**
|
|
3029
|
+
* Sends realtime input to the server.
|
|
3030
|
+
*
|
|
3031
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
3032
|
+
*
|
|
3033
|
+
* @param mediaChunks - The media chunks to send.
|
|
3034
|
+
* @throws If this session has been closed.
|
|
3035
|
+
*
|
|
3036
|
+
* @beta
|
|
3037
|
+
*/
|
|
3038
|
+
async sendMediaChunks(mediaChunks) {
|
|
3039
|
+
if (this.isClosed) {
|
|
3040
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
3041
|
+
}
|
|
3042
|
+
// The backend does not support sending more than one mediaChunk in one message.
|
|
3043
|
+
// Work around this limitation by sending mediaChunks in separate messages.
|
|
3044
|
+
mediaChunks.forEach(mediaChunk => {
|
|
3045
|
+
const message = {
|
|
3046
|
+
realtimeInput: { mediaChunks: [mediaChunk] }
|
|
3047
|
+
};
|
|
3048
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
3049
|
+
});
|
|
3050
|
+
}
|
|
3051
|
+
/**
|
|
3052
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
3053
|
+
*
|
|
3054
|
+
* Sends a stream of {@link GenerativeContentBlob}.
|
|
3055
|
+
*
|
|
3056
|
+
* @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
|
|
3057
|
+
* @throws If this session has been closed.
|
|
3058
|
+
*
|
|
3059
|
+
* @beta
|
|
3060
|
+
*/
|
|
3061
|
+
async sendMediaStream(mediaChunkStream) {
|
|
3062
|
+
if (this.isClosed) {
|
|
3063
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
3064
|
+
}
|
|
3065
|
+
const reader = mediaChunkStream.getReader();
|
|
3066
|
+
while (true) {
|
|
3067
|
+
try {
|
|
3068
|
+
const { done, value } = await reader.read();
|
|
3069
|
+
if (done) {
|
|
3070
|
+
break;
|
|
3071
|
+
}
|
|
3072
|
+
else if (!value) {
|
|
3073
|
+
throw new Error('Missing chunk in reader, but reader is not done.');
|
|
3074
|
+
}
|
|
3075
|
+
await this.sendMediaChunks([value]);
|
|
3076
|
+
}
|
|
3077
|
+
catch (e) {
|
|
3078
|
+
// Re-throw any errors that occur during stream consumption or sending.
|
|
3079
|
+
const message = e instanceof Error ? e.message : 'Error processing media stream.';
|
|
3080
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, message);
|
|
3081
|
+
}
|
|
3082
|
+
}
|
|
3083
|
+
}
|
|
2999
3084
|
}
|
|
3000
3085
|
|
|
3001
3086
|
/**
|
|
@@ -3056,13 +3141,18 @@ class LiveGenerativeModel extends AIModel {
|
|
|
3056
3141
|
else {
|
|
3057
3142
|
fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
|
|
3058
3143
|
}
|
|
3144
|
+
// inputAudioTranscription and outputAudioTranscription are on the generation config in the public API,
|
|
3145
|
+
// but the backend expects them to be in the `setup` message.
|
|
3146
|
+
const { inputAudioTranscription, outputAudioTranscription, ...generationConfig } = this.generationConfig;
|
|
3059
3147
|
const setupMessage = {
|
|
3060
3148
|
setup: {
|
|
3061
3149
|
model: fullModelPath,
|
|
3062
|
-
generationConfig
|
|
3150
|
+
generationConfig,
|
|
3063
3151
|
tools: this.tools,
|
|
3064
3152
|
toolConfig: this.toolConfig,
|
|
3065
|
-
systemInstruction: this.systemInstruction
|
|
3153
|
+
systemInstruction: this.systemInstruction,
|
|
3154
|
+
inputAudioTranscription,
|
|
3155
|
+
outputAudioTranscription
|
|
3066
3156
|
}
|
|
3067
3157
|
};
|
|
3068
3158
|
try {
|
|
@@ -3768,7 +3858,7 @@ class AudioConversationRunner {
|
|
|
3768
3858
|
mimeType: 'audio/pcm',
|
|
3769
3859
|
data: base64
|
|
3770
3860
|
};
|
|
3771
|
-
void this.liveSession.
|
|
3861
|
+
void this.liveSession.sendAudioRealtime(chunk);
|
|
3772
3862
|
};
|
|
3773
3863
|
}
|
|
3774
3864
|
/**
|