@firebase/ai 2.4.0-canary.44d9891f9 → 2.4.0-canary.6e0e30317
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-public.d.ts +111 -9
- package/dist/ai.d.ts +114 -9
- package/dist/esm/index.esm.js +129 -39
- package/dist/esm/index.esm.js.map +1 -1
- package/dist/esm/src/factory-node.d.ts +19 -0
- package/dist/esm/src/methods/live-session.d.ts +64 -9
- package/dist/esm/src/service.d.ts +3 -4
- package/dist/esm/src/types/chrome-adapter.d.ts +5 -0
- package/dist/esm/src/types/live-responses.d.ts +21 -3
- package/dist/esm/src/types/requests.d.ts +23 -0
- package/dist/esm/src/types/responses.d.ts +21 -0
- package/dist/index.cjs.js +129 -39
- package/dist/index.cjs.js.map +1 -1
- package/dist/index.node.cjs.js +253 -145
- package/dist/index.node.cjs.js.map +1 -1
- package/dist/index.node.mjs +253 -145
- package/dist/index.node.mjs.map +1 -1
- package/dist/src/factory-node.d.ts +19 -0
- package/dist/src/methods/live-session.d.ts +64 -9
- package/dist/src/service.d.ts +3 -4
- package/dist/src/types/chrome-adapter.d.ts +5 -0
- package/dist/src/types/live-responses.d.ts +21 -3
- package/dist/src/types/requests.d.ts +23 -0
- package/dist/src/types/responses.d.ts +21 -0
- package/package.json +8 -8
package/dist/ai-public.d.ts
CHANGED
|
@@ -192,6 +192,12 @@ export declare interface AudioConversationController {
|
|
|
192
192
|
stop: () => Promise<void>;
|
|
193
193
|
}
|
|
194
194
|
|
|
195
|
+
/**
|
|
196
|
+
* The audio transcription configuration.
|
|
197
|
+
*/
|
|
198
|
+
export declare interface AudioTranscriptionConfig {
|
|
199
|
+
}
|
|
200
|
+
|
|
195
201
|
/**
|
|
196
202
|
* Abstract base class representing the configuration for an AI service backend.
|
|
197
203
|
* This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
|
|
@@ -335,6 +341,7 @@ export declare class ChatSession {
|
|
|
335
341
|
* @beta
|
|
336
342
|
*/
|
|
337
343
|
export declare interface ChromeAdapter {
|
|
344
|
+
/* Excluded from this release type: mode */
|
|
338
345
|
/**
|
|
339
346
|
* Checks if the on-device model is capable of handling a given
|
|
340
347
|
* request.
|
|
@@ -2020,6 +2027,24 @@ export declare interface LiveGenerationConfig {
|
|
|
2020
2027
|
* The modalities of the response.
|
|
2021
2028
|
*/
|
|
2022
2029
|
responseModalities?: ResponseModality[];
|
|
2030
|
+
/**
|
|
2031
|
+
* Enables transcription of audio input.
|
|
2032
|
+
*
|
|
2033
|
+
* When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
|
|
2034
|
+
* in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
|
|
2035
|
+
* messages, so you may only receive small amounts of text per message. For example, if you ask the model
|
|
2036
|
+
* "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
|
|
2037
|
+
*/
|
|
2038
|
+
inputAudioTranscription?: AudioTranscriptionConfig;
|
|
2039
|
+
/**
|
|
2040
|
+
* Enables transcription of audio input.
|
|
2041
|
+
*
|
|
2042
|
+
* When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
|
|
2043
|
+
* in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
|
|
2044
|
+
* messages, so you may only receive small amounts of text per message. For example, if the model says
|
|
2045
|
+
* "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
|
|
2046
|
+
*/
|
|
2047
|
+
outputAudioTranscription?: AudioTranscriptionConfig;
|
|
2023
2048
|
}
|
|
2024
2049
|
|
|
2025
2050
|
/**
|
|
@@ -2101,6 +2126,14 @@ export declare interface LiveServerContent {
|
|
|
2101
2126
|
* model was not interrupted.
|
|
2102
2127
|
*/
|
|
2103
2128
|
interrupted?: boolean;
|
|
2129
|
+
/**
|
|
2130
|
+
* Transcription of the audio that was input to the model.
|
|
2131
|
+
*/
|
|
2132
|
+
inputTranscription?: Transcription;
|
|
2133
|
+
/**
|
|
2134
|
+
* Transcription of the audio output from the model.
|
|
2135
|
+
*/
|
|
2136
|
+
outputTranscription?: Transcription;
|
|
2104
2137
|
}
|
|
2105
2138
|
|
|
2106
2139
|
/**
|
|
@@ -2163,32 +2196,65 @@ export declare class LiveSession {
|
|
|
2163
2196
|
*/
|
|
2164
2197
|
send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
|
|
2165
2198
|
/**
|
|
2166
|
-
* Sends
|
|
2199
|
+
* Sends text to the server in realtime.
|
|
2167
2200
|
*
|
|
2168
|
-
* @
|
|
2201
|
+
* @example
|
|
2202
|
+
* ```javascript
|
|
2203
|
+
* liveSession.sendTextRealtime("Hello, how are you?");
|
|
2204
|
+
* ```
|
|
2205
|
+
*
|
|
2206
|
+
* @param text - The text data to send.
|
|
2169
2207
|
* @throws If this session has been closed.
|
|
2170
2208
|
*
|
|
2171
2209
|
* @beta
|
|
2172
2210
|
*/
|
|
2173
|
-
|
|
2211
|
+
sendTextRealtime(text: string): Promise<void>;
|
|
2174
2212
|
/**
|
|
2175
|
-
* Sends
|
|
2213
|
+
* Sends audio data to the server in realtime.
|
|
2176
2214
|
*
|
|
2177
|
-
* @
|
|
2215
|
+
* @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
|
|
2216
|
+
* little-endian.
|
|
2217
|
+
*
|
|
2218
|
+
* @example
|
|
2219
|
+
* ```javascript
|
|
2220
|
+
* // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
|
|
2221
|
+
* const blob = { mimeType: "audio/pcm", data: pcmData };
|
|
2222
|
+
* liveSession.sendAudioRealtime(blob);
|
|
2223
|
+
* ```
|
|
2224
|
+
*
|
|
2225
|
+
* @param blob - The base64-encoded PCM data to send to the server in realtime.
|
|
2178
2226
|
* @throws If this session has been closed.
|
|
2179
2227
|
*
|
|
2180
2228
|
* @beta
|
|
2181
2229
|
*/
|
|
2182
|
-
|
|
2230
|
+
sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
|
|
2183
2231
|
/**
|
|
2184
|
-
* Sends
|
|
2232
|
+
* Sends video data to the server in realtime.
|
|
2185
2233
|
*
|
|
2186
|
-
* @
|
|
2234
|
+
* @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
|
|
2235
|
+
* is recommended to set `mimeType` to `image/jpeg`.
|
|
2236
|
+
*
|
|
2237
|
+
* @example
|
|
2238
|
+
* ```javascript
|
|
2239
|
+
* // const videoFrame = ... base64-encoded JPEG data
|
|
2240
|
+
* const blob = { mimeType: "image/jpeg", data: videoFrame };
|
|
2241
|
+
* liveSession.sendVideoRealtime(blob);
|
|
2242
|
+
* ```
|
|
2243
|
+
* @param blob - The base64-encoded video data to send to the server in realtime.
|
|
2187
2244
|
* @throws If this session has been closed.
|
|
2188
2245
|
*
|
|
2189
2246
|
* @beta
|
|
2190
2247
|
*/
|
|
2191
|
-
|
|
2248
|
+
sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
|
|
2249
|
+
/**
|
|
2250
|
+
* Sends function responses to the server.
|
|
2251
|
+
*
|
|
2252
|
+
* @param functionResponses - The function responses to send.
|
|
2253
|
+
* @throws If this session has been closed.
|
|
2254
|
+
*
|
|
2255
|
+
* @beta
|
|
2256
|
+
*/
|
|
2257
|
+
sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
|
|
2192
2258
|
/**
|
|
2193
2259
|
* Yields messages received from the server.
|
|
2194
2260
|
* This can only be used by one consumer at a time.
|
|
@@ -2206,6 +2272,28 @@ export declare class LiveSession {
|
|
|
2206
2272
|
* @beta
|
|
2207
2273
|
*/
|
|
2208
2274
|
close(): Promise<void>;
|
|
2275
|
+
/**
|
|
2276
|
+
* Sends realtime input to the server.
|
|
2277
|
+
*
|
|
2278
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
2279
|
+
*
|
|
2280
|
+
* @param mediaChunks - The media chunks to send.
|
|
2281
|
+
* @throws If this session has been closed.
|
|
2282
|
+
*
|
|
2283
|
+
* @beta
|
|
2284
|
+
*/
|
|
2285
|
+
sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
|
|
2286
|
+
/**
|
|
2287
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
2288
|
+
*
|
|
2289
|
+
* Sends a stream of {@link GenerativeContentBlob}.
|
|
2290
|
+
*
|
|
2291
|
+
* @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
|
|
2292
|
+
* @throws If this session has been closed.
|
|
2293
|
+
*
|
|
2294
|
+
* @beta
|
|
2295
|
+
*/
|
|
2296
|
+
sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
|
|
2209
2297
|
}
|
|
2210
2298
|
|
|
2211
2299
|
/**
|
|
@@ -2897,6 +2985,20 @@ export declare interface ToolConfig {
|
|
|
2897
2985
|
functionCallingConfig?: FunctionCallingConfig;
|
|
2898
2986
|
}
|
|
2899
2987
|
|
|
2988
|
+
/**
|
|
2989
|
+
* Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
|
|
2990
|
+
* is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
|
|
2991
|
+
* the {@link LiveGenerationConfig}.
|
|
2992
|
+
*
|
|
2993
|
+
* @beta
|
|
2994
|
+
*/
|
|
2995
|
+
export declare interface Transcription {
|
|
2996
|
+
/**
|
|
2997
|
+
* The text transcription of the audio.
|
|
2998
|
+
*/
|
|
2999
|
+
text?: string;
|
|
3000
|
+
}
|
|
3001
|
+
|
|
2900
3002
|
/**
|
|
2901
3003
|
* A type that includes all specific Schema types.
|
|
2902
3004
|
* @public
|
package/dist/ai.d.ts
CHANGED
|
@@ -232,6 +232,12 @@ export declare interface AudioConversationController {
|
|
|
232
232
|
stop: () => Promise<void>;
|
|
233
233
|
}
|
|
234
234
|
|
|
235
|
+
/**
|
|
236
|
+
* The audio transcription configuration.
|
|
237
|
+
*/
|
|
238
|
+
export declare interface AudioTranscriptionConfig {
|
|
239
|
+
}
|
|
240
|
+
|
|
235
241
|
/**
|
|
236
242
|
* Abstract base class representing the configuration for an AI service backend.
|
|
237
243
|
* This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
|
|
@@ -375,6 +381,10 @@ export declare class ChatSession {
|
|
|
375
381
|
* @beta
|
|
376
382
|
*/
|
|
377
383
|
export declare interface ChromeAdapter {
|
|
384
|
+
/**
|
|
385
|
+
* @internal
|
|
386
|
+
*/
|
|
387
|
+
mode: InferenceMode;
|
|
378
388
|
/**
|
|
379
389
|
* Checks if the on-device model is capable of handling a given
|
|
380
390
|
* request.
|
|
@@ -2135,6 +2145,24 @@ export declare interface LiveGenerationConfig {
|
|
|
2135
2145
|
* The modalities of the response.
|
|
2136
2146
|
*/
|
|
2137
2147
|
responseModalities?: ResponseModality[];
|
|
2148
|
+
/**
|
|
2149
|
+
* Enables transcription of audio input.
|
|
2150
|
+
*
|
|
2151
|
+
* When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
|
|
2152
|
+
* in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
|
|
2153
|
+
* messages, so you may only receive small amounts of text per message. For example, if you ask the model
|
|
2154
|
+
* "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
|
|
2155
|
+
*/
|
|
2156
|
+
inputAudioTranscription?: AudioTranscriptionConfig;
|
|
2157
|
+
/**
|
|
2158
|
+
* Enables transcription of audio input.
|
|
2159
|
+
*
|
|
2160
|
+
* When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
|
|
2161
|
+
* in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
|
|
2162
|
+
* messages, so you may only receive small amounts of text per message. For example, if the model says
|
|
2163
|
+
* "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
|
|
2164
|
+
*/
|
|
2165
|
+
outputAudioTranscription?: AudioTranscriptionConfig;
|
|
2138
2166
|
}
|
|
2139
2167
|
|
|
2140
2168
|
/**
|
|
@@ -2226,6 +2254,14 @@ export declare interface LiveServerContent {
|
|
|
2226
2254
|
* model was not interrupted.
|
|
2227
2255
|
*/
|
|
2228
2256
|
interrupted?: boolean;
|
|
2257
|
+
/**
|
|
2258
|
+
* Transcription of the audio that was input to the model.
|
|
2259
|
+
*/
|
|
2260
|
+
inputTranscription?: Transcription;
|
|
2261
|
+
/**
|
|
2262
|
+
* Transcription of the audio output from the model.
|
|
2263
|
+
*/
|
|
2264
|
+
outputTranscription?: Transcription;
|
|
2229
2265
|
}
|
|
2230
2266
|
|
|
2231
2267
|
/**
|
|
@@ -2291,32 +2327,65 @@ export declare class LiveSession {
|
|
|
2291
2327
|
*/
|
|
2292
2328
|
send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
|
|
2293
2329
|
/**
|
|
2294
|
-
* Sends
|
|
2330
|
+
* Sends text to the server in realtime.
|
|
2295
2331
|
*
|
|
2296
|
-
* @
|
|
2332
|
+
* @example
|
|
2333
|
+
* ```javascript
|
|
2334
|
+
* liveSession.sendTextRealtime("Hello, how are you?");
|
|
2335
|
+
* ```
|
|
2336
|
+
*
|
|
2337
|
+
* @param text - The text data to send.
|
|
2297
2338
|
* @throws If this session has been closed.
|
|
2298
2339
|
*
|
|
2299
2340
|
* @beta
|
|
2300
2341
|
*/
|
|
2301
|
-
|
|
2342
|
+
sendTextRealtime(text: string): Promise<void>;
|
|
2302
2343
|
/**
|
|
2303
|
-
* Sends
|
|
2344
|
+
* Sends audio data to the server in realtime.
|
|
2304
2345
|
*
|
|
2305
|
-
* @
|
|
2346
|
+
* @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
|
|
2347
|
+
* little-endian.
|
|
2348
|
+
*
|
|
2349
|
+
* @example
|
|
2350
|
+
* ```javascript
|
|
2351
|
+
* // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
|
|
2352
|
+
* const blob = { mimeType: "audio/pcm", data: pcmData };
|
|
2353
|
+
* liveSession.sendAudioRealtime(blob);
|
|
2354
|
+
* ```
|
|
2355
|
+
*
|
|
2356
|
+
* @param blob - The base64-encoded PCM data to send to the server in realtime.
|
|
2306
2357
|
* @throws If this session has been closed.
|
|
2307
2358
|
*
|
|
2308
2359
|
* @beta
|
|
2309
2360
|
*/
|
|
2310
|
-
|
|
2361
|
+
sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
|
|
2311
2362
|
/**
|
|
2312
|
-
* Sends
|
|
2363
|
+
* Sends video data to the server in realtime.
|
|
2313
2364
|
*
|
|
2314
|
-
* @
|
|
2365
|
+
* @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
|
|
2366
|
+
* is recommended to set `mimeType` to `image/jpeg`.
|
|
2367
|
+
*
|
|
2368
|
+
* @example
|
|
2369
|
+
* ```javascript
|
|
2370
|
+
* // const videoFrame = ... base64-encoded JPEG data
|
|
2371
|
+
* const blob = { mimeType: "image/jpeg", data: videoFrame };
|
|
2372
|
+
* liveSession.sendVideoRealtime(blob);
|
|
2373
|
+
* ```
|
|
2374
|
+
* @param blob - The base64-encoded video data to send to the server in realtime.
|
|
2315
2375
|
* @throws If this session has been closed.
|
|
2316
2376
|
*
|
|
2317
2377
|
* @beta
|
|
2318
2378
|
*/
|
|
2319
|
-
|
|
2379
|
+
sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
|
|
2380
|
+
/**
|
|
2381
|
+
* Sends function responses to the server.
|
|
2382
|
+
*
|
|
2383
|
+
* @param functionResponses - The function responses to send.
|
|
2384
|
+
* @throws If this session has been closed.
|
|
2385
|
+
*
|
|
2386
|
+
* @beta
|
|
2387
|
+
*/
|
|
2388
|
+
sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
|
|
2320
2389
|
/**
|
|
2321
2390
|
* Yields messages received from the server.
|
|
2322
2391
|
* This can only be used by one consumer at a time.
|
|
@@ -2334,6 +2403,28 @@ export declare class LiveSession {
|
|
|
2334
2403
|
* @beta
|
|
2335
2404
|
*/
|
|
2336
2405
|
close(): Promise<void>;
|
|
2406
|
+
/**
|
|
2407
|
+
* Sends realtime input to the server.
|
|
2408
|
+
*
|
|
2409
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
2410
|
+
*
|
|
2411
|
+
* @param mediaChunks - The media chunks to send.
|
|
2412
|
+
* @throws If this session has been closed.
|
|
2413
|
+
*
|
|
2414
|
+
* @beta
|
|
2415
|
+
*/
|
|
2416
|
+
sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
|
|
2417
|
+
/**
|
|
2418
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
2419
|
+
*
|
|
2420
|
+
* Sends a stream of {@link GenerativeContentBlob}.
|
|
2421
|
+
*
|
|
2422
|
+
* @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
|
|
2423
|
+
* @throws If this session has been closed.
|
|
2424
|
+
*
|
|
2425
|
+
* @beta
|
|
2426
|
+
*/
|
|
2427
|
+
sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
|
|
2337
2428
|
}
|
|
2338
2429
|
|
|
2339
2430
|
/**
|
|
@@ -3039,6 +3130,20 @@ export declare interface ToolConfig {
|
|
|
3039
3130
|
functionCallingConfig?: FunctionCallingConfig;
|
|
3040
3131
|
}
|
|
3041
3132
|
|
|
3133
|
+
/**
|
|
3134
|
+
* Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
|
|
3135
|
+
* is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
|
|
3136
|
+
* the {@link LiveGenerationConfig}.
|
|
3137
|
+
*
|
|
3138
|
+
* @beta
|
|
3139
|
+
*/
|
|
3140
|
+
export declare interface Transcription {
|
|
3141
|
+
/**
|
|
3142
|
+
* The text transcription of the audio.
|
|
3143
|
+
*/
|
|
3144
|
+
text?: string;
|
|
3145
|
+
}
|
|
3146
|
+
|
|
3042
3147
|
/**
|
|
3043
3148
|
* A type that includes all specific Schema types.
|
|
3044
3149
|
* @public
|
package/dist/esm/index.esm.js
CHANGED
|
@@ -4,7 +4,7 @@ import { FirebaseError, Deferred, getModularInstance } from '@firebase/util';
|
|
|
4
4
|
import { Logger } from '@firebase/logger';
|
|
5
5
|
|
|
6
6
|
var name = "@firebase/ai";
|
|
7
|
-
var version = "2.4.0-canary.
|
|
7
|
+
var version = "2.4.0-canary.6e0e30317";
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* @license
|
|
@@ -2870,75 +2870,104 @@ class LiveSession {
|
|
|
2870
2870
|
this.webSocketHandler.send(JSON.stringify(message));
|
|
2871
2871
|
}
|
|
2872
2872
|
/**
|
|
2873
|
-
* Sends
|
|
2873
|
+
* Sends text to the server in realtime.
|
|
2874
2874
|
*
|
|
2875
|
-
* @
|
|
2875
|
+
* @example
|
|
2876
|
+
* ```javascript
|
|
2877
|
+
* liveSession.sendTextRealtime("Hello, how are you?");
|
|
2878
|
+
* ```
|
|
2879
|
+
*
|
|
2880
|
+
* @param text - The text data to send.
|
|
2876
2881
|
* @throws If this session has been closed.
|
|
2877
2882
|
*
|
|
2878
2883
|
* @beta
|
|
2879
2884
|
*/
|
|
2880
|
-
async
|
|
2885
|
+
async sendTextRealtime(text) {
|
|
2881
2886
|
if (this.isClosed) {
|
|
2882
2887
|
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2883
2888
|
}
|
|
2884
|
-
|
|
2885
|
-
|
|
2886
|
-
|
|
2887
|
-
|
|
2888
|
-
|
|
2889
|
-
|
|
2890
|
-
this.webSocketHandler.send(JSON.stringify(message));
|
|
2891
|
-
});
|
|
2889
|
+
const message = {
|
|
2890
|
+
realtimeInput: {
|
|
2891
|
+
text
|
|
2892
|
+
}
|
|
2893
|
+
};
|
|
2894
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
2892
2895
|
}
|
|
2893
2896
|
/**
|
|
2894
|
-
* Sends
|
|
2897
|
+
* Sends audio data to the server in realtime.
|
|
2895
2898
|
*
|
|
2896
|
-
* @
|
|
2899
|
+
* @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
|
|
2900
|
+
* little-endian.
|
|
2901
|
+
*
|
|
2902
|
+
* @example
|
|
2903
|
+
* ```javascript
|
|
2904
|
+
* // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
|
|
2905
|
+
* const blob = { mimeType: "audio/pcm", data: pcmData };
|
|
2906
|
+
* liveSession.sendAudioRealtime(blob);
|
|
2907
|
+
* ```
|
|
2908
|
+
*
|
|
2909
|
+
* @param blob - The base64-encoded PCM data to send to the server in realtime.
|
|
2897
2910
|
* @throws If this session has been closed.
|
|
2898
2911
|
*
|
|
2899
2912
|
* @beta
|
|
2900
2913
|
*/
|
|
2901
|
-
async
|
|
2914
|
+
async sendAudioRealtime(blob) {
|
|
2902
2915
|
if (this.isClosed) {
|
|
2903
2916
|
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2904
2917
|
}
|
|
2905
2918
|
const message = {
|
|
2906
|
-
|
|
2907
|
-
|
|
2919
|
+
realtimeInput: {
|
|
2920
|
+
audio: blob
|
|
2908
2921
|
}
|
|
2909
2922
|
};
|
|
2910
2923
|
this.webSocketHandler.send(JSON.stringify(message));
|
|
2911
2924
|
}
|
|
2912
2925
|
/**
|
|
2913
|
-
* Sends
|
|
2926
|
+
* Sends video data to the server in realtime.
|
|
2914
2927
|
*
|
|
2915
|
-
* @
|
|
2928
|
+
* @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
|
|
2929
|
+
* is recommended to set `mimeType` to `image/jpeg`.
|
|
2930
|
+
*
|
|
2931
|
+
* @example
|
|
2932
|
+
* ```javascript
|
|
2933
|
+
* // const videoFrame = ... base64-encoded JPEG data
|
|
2934
|
+
* const blob = { mimeType: "image/jpeg", data: videoFrame };
|
|
2935
|
+
* liveSession.sendVideoRealtime(blob);
|
|
2936
|
+
* ```
|
|
2937
|
+
* @param blob - The base64-encoded video data to send to the server in realtime.
|
|
2916
2938
|
* @throws If this session has been closed.
|
|
2917
2939
|
*
|
|
2918
2940
|
* @beta
|
|
2919
2941
|
*/
|
|
2920
|
-
async
|
|
2942
|
+
async sendVideoRealtime(blob) {
|
|
2921
2943
|
if (this.isClosed) {
|
|
2922
2944
|
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2923
2945
|
}
|
|
2924
|
-
const
|
|
2925
|
-
|
|
2926
|
-
|
|
2927
|
-
const { done, value } = await reader.read();
|
|
2928
|
-
if (done) {
|
|
2929
|
-
break;
|
|
2930
|
-
}
|
|
2931
|
-
else if (!value) {
|
|
2932
|
-
throw new Error('Missing chunk in reader, but reader is not done.');
|
|
2933
|
-
}
|
|
2934
|
-
await this.sendMediaChunks([value]);
|
|
2935
|
-
}
|
|
2936
|
-
catch (e) {
|
|
2937
|
-
// Re-throw any errors that occur during stream consumption or sending.
|
|
2938
|
-
const message = e instanceof Error ? e.message : 'Error processing media stream.';
|
|
2939
|
-
throw new AIError(AIErrorCode.REQUEST_ERROR, message);
|
|
2946
|
+
const message = {
|
|
2947
|
+
realtimeInput: {
|
|
2948
|
+
video: blob
|
|
2940
2949
|
}
|
|
2950
|
+
};
|
|
2951
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
2952
|
+
}
|
|
2953
|
+
/**
|
|
2954
|
+
* Sends function responses to the server.
|
|
2955
|
+
*
|
|
2956
|
+
* @param functionResponses - The function responses to send.
|
|
2957
|
+
* @throws If this session has been closed.
|
|
2958
|
+
*
|
|
2959
|
+
* @beta
|
|
2960
|
+
*/
|
|
2961
|
+
async sendFunctionResponses(functionResponses) {
|
|
2962
|
+
if (this.isClosed) {
|
|
2963
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2941
2964
|
}
|
|
2965
|
+
const message = {
|
|
2966
|
+
toolResponse: {
|
|
2967
|
+
functionResponses
|
|
2968
|
+
}
|
|
2969
|
+
};
|
|
2970
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
2942
2971
|
}
|
|
2943
2972
|
/**
|
|
2944
2973
|
* Yields messages received from the server.
|
|
@@ -2996,6 +3025,62 @@ class LiveSession {
|
|
|
2996
3025
|
await this.webSocketHandler.close(1000, 'Client closed session.');
|
|
2997
3026
|
}
|
|
2998
3027
|
}
|
|
3028
|
+
/**
|
|
3029
|
+
* Sends realtime input to the server.
|
|
3030
|
+
*
|
|
3031
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
3032
|
+
*
|
|
3033
|
+
* @param mediaChunks - The media chunks to send.
|
|
3034
|
+
* @throws If this session has been closed.
|
|
3035
|
+
*
|
|
3036
|
+
* @beta
|
|
3037
|
+
*/
|
|
3038
|
+
async sendMediaChunks(mediaChunks) {
|
|
3039
|
+
if (this.isClosed) {
|
|
3040
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
3041
|
+
}
|
|
3042
|
+
// The backend does not support sending more than one mediaChunk in one message.
|
|
3043
|
+
// Work around this limitation by sending mediaChunks in separate messages.
|
|
3044
|
+
mediaChunks.forEach(mediaChunk => {
|
|
3045
|
+
const message = {
|
|
3046
|
+
realtimeInput: { mediaChunks: [mediaChunk] }
|
|
3047
|
+
};
|
|
3048
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
3049
|
+
});
|
|
3050
|
+
}
|
|
3051
|
+
/**
|
|
3052
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
3053
|
+
*
|
|
3054
|
+
* Sends a stream of {@link GenerativeContentBlob}.
|
|
3055
|
+
*
|
|
3056
|
+
* @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
|
|
3057
|
+
* @throws If this session has been closed.
|
|
3058
|
+
*
|
|
3059
|
+
* @beta
|
|
3060
|
+
*/
|
|
3061
|
+
async sendMediaStream(mediaChunkStream) {
|
|
3062
|
+
if (this.isClosed) {
|
|
3063
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
3064
|
+
}
|
|
3065
|
+
const reader = mediaChunkStream.getReader();
|
|
3066
|
+
while (true) {
|
|
3067
|
+
try {
|
|
3068
|
+
const { done, value } = await reader.read();
|
|
3069
|
+
if (done) {
|
|
3070
|
+
break;
|
|
3071
|
+
}
|
|
3072
|
+
else if (!value) {
|
|
3073
|
+
throw new Error('Missing chunk in reader, but reader is not done.');
|
|
3074
|
+
}
|
|
3075
|
+
await this.sendMediaChunks([value]);
|
|
3076
|
+
}
|
|
3077
|
+
catch (e) {
|
|
3078
|
+
// Re-throw any errors that occur during stream consumption or sending.
|
|
3079
|
+
const message = e instanceof Error ? e.message : 'Error processing media stream.';
|
|
3080
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, message);
|
|
3081
|
+
}
|
|
3082
|
+
}
|
|
3083
|
+
}
|
|
2999
3084
|
}
|
|
3000
3085
|
|
|
3001
3086
|
/**
|
|
@@ -3056,13 +3141,18 @@ class LiveGenerativeModel extends AIModel {
|
|
|
3056
3141
|
else {
|
|
3057
3142
|
fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
|
|
3058
3143
|
}
|
|
3144
|
+
// inputAudioTranscription and outputAudioTranscription are on the generation config in the public API,
|
|
3145
|
+
// but the backend expects them to be in the `setup` message.
|
|
3146
|
+
const { inputAudioTranscription, outputAudioTranscription, ...generationConfig } = this.generationConfig;
|
|
3059
3147
|
const setupMessage = {
|
|
3060
3148
|
setup: {
|
|
3061
3149
|
model: fullModelPath,
|
|
3062
|
-
generationConfig
|
|
3150
|
+
generationConfig,
|
|
3063
3151
|
tools: this.tools,
|
|
3064
3152
|
toolConfig: this.toolConfig,
|
|
3065
|
-
systemInstruction: this.systemInstruction
|
|
3153
|
+
systemInstruction: this.systemInstruction,
|
|
3154
|
+
inputAudioTranscription,
|
|
3155
|
+
outputAudioTranscription
|
|
3066
3156
|
}
|
|
3067
3157
|
};
|
|
3068
3158
|
try {
|
|
@@ -3768,7 +3858,7 @@ class AudioConversationRunner {
|
|
|
3768
3858
|
mimeType: 'audio/pcm',
|
|
3769
3859
|
data: base64
|
|
3770
3860
|
};
|
|
3771
|
-
void this.liveSession.
|
|
3861
|
+
void this.liveSession.sendAudioRealtime(chunk);
|
|
3772
3862
|
};
|
|
3773
3863
|
}
|
|
3774
3864
|
/**
|