@firebase/ai 2.4.0 → 2.5.0-canary.0800a8bed

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/ai-public.d.ts +134 -9
  2. package/dist/ai.d.ts +137 -9
  3. package/dist/esm/index.esm.js +202 -68
  4. package/dist/esm/index.esm.js.map +1 -1
  5. package/dist/esm/src/factory-node.d.ts +19 -0
  6. package/dist/esm/src/methods/chrome-adapter.d.ts +1 -1
  7. package/dist/esm/src/methods/live-session.d.ts +64 -9
  8. package/dist/esm/src/requests/hybrid-helpers.d.ts +7 -2
  9. package/dist/esm/src/requests/response-helpers.d.ts +2 -2
  10. package/dist/esm/src/requests/stream-reader.d.ts +2 -1
  11. package/dist/esm/src/service.d.ts +3 -4
  12. package/dist/esm/src/types/chrome-adapter.d.ts +5 -0
  13. package/dist/esm/src/types/enums.d.ts +15 -0
  14. package/dist/esm/src/types/live-responses.d.ts +21 -3
  15. package/dist/esm/src/types/requests.d.ts +23 -0
  16. package/dist/esm/src/types/responses.d.ts +28 -1
  17. package/dist/index.cjs.js +202 -67
  18. package/dist/index.cjs.js.map +1 -1
  19. package/dist/index.node.cjs.js +306 -166
  20. package/dist/index.node.cjs.js.map +1 -1
  21. package/dist/index.node.mjs +306 -167
  22. package/dist/index.node.mjs.map +1 -1
  23. package/dist/src/factory-node.d.ts +19 -0
  24. package/dist/src/methods/chrome-adapter.d.ts +1 -1
  25. package/dist/src/methods/live-session.d.ts +64 -9
  26. package/dist/src/requests/hybrid-helpers.d.ts +7 -2
  27. package/dist/src/requests/response-helpers.d.ts +2 -2
  28. package/dist/src/requests/stream-reader.d.ts +2 -1
  29. package/dist/src/service.d.ts +3 -4
  30. package/dist/src/types/chrome-adapter.d.ts +5 -0
  31. package/dist/src/types/enums.d.ts +15 -0
  32. package/dist/src/types/live-responses.d.ts +21 -3
  33. package/dist/src/types/requests.d.ts +23 -0
  34. package/dist/src/types/responses.d.ts +28 -1
  35. package/package.json +8 -8
@@ -192,6 +192,12 @@ export declare interface AudioConversationController {
192
192
  stop: () => Promise<void>;
193
193
  }
194
194
 
195
+ /**
196
+ * The audio transcription configuration.
197
+ */
198
+ export declare interface AudioTranscriptionConfig {
199
+ }
200
+
195
201
  /**
196
202
  * Abstract base class representing the configuration for an AI service backend.
197
203
  * This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
@@ -335,6 +341,7 @@ export declare class ChatSession {
335
341
  * @beta
336
342
  */
337
343
  export declare interface ChromeAdapter {
344
+ /* Excluded from this release type: mode */
338
345
  /**
339
346
  * Checks if the on-device model is capable of handling a given
340
347
  * request.
@@ -558,6 +565,12 @@ export declare interface EnhancedGenerateContentResponse extends GenerateContent
558
565
  * set to `true`.
559
566
  */
560
567
  thoughtSummary: () => string | undefined;
568
+ /**
569
+ * Indicates whether inference happened on-device or in-cloud.
570
+ *
571
+ * @beta
572
+ */
573
+ inferenceSource?: InferenceSource;
561
574
  }
562
575
 
563
576
  /**
@@ -1833,6 +1846,23 @@ export declare const InferenceMode: {
1833
1846
  */
1834
1847
  export declare type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode];
1835
1848
 
1849
+ /**
1850
+ * Indicates whether inference happened on-device or in-cloud.
1851
+ *
1852
+ * @beta
1853
+ */
1854
+ export declare const InferenceSource: {
1855
+ readonly ON_DEVICE: "on_device";
1856
+ readonly IN_CLOUD: "in_cloud";
1857
+ };
1858
+
1859
+ /**
1860
+ * Indicates whether inference happened on-device or in-cloud.
1861
+ *
1862
+ * @beta
1863
+ */
1864
+ export declare type InferenceSource = (typeof InferenceSource)[keyof typeof InferenceSource];
1865
+
1836
1866
  /**
1837
1867
  * Content part interface if the part represents an image.
1838
1868
  * @public
@@ -1997,6 +2027,24 @@ export declare interface LiveGenerationConfig {
1997
2027
  * The modalities of the response.
1998
2028
  */
1999
2029
  responseModalities?: ResponseModality[];
2030
+ /**
2031
+ * Enables transcription of audio input.
2032
+ *
2033
+ * When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
2034
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2035
+ * messages, so you may only receive small amounts of text per message. For example, if you ask the model
2036
+ * "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
2037
+ */
2038
+ inputAudioTranscription?: AudioTranscriptionConfig;
2039
+ /**
2040
+ * Enables transcription of audio input.
2041
+ *
2042
+ * When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
2043
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2044
+ * messages, so you may only receive small amounts of text per message. For example, if the model says
2045
+ * "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
2046
+ */
2047
+ outputAudioTranscription?: AudioTranscriptionConfig;
2000
2048
  }
2001
2049
 
2002
2050
  /**
@@ -2078,6 +2126,14 @@ export declare interface LiveServerContent {
2078
2126
  * model was not interrupted.
2079
2127
  */
2080
2128
  interrupted?: boolean;
2129
+ /**
2130
+ * Transcription of the audio that was input to the model.
2131
+ */
2132
+ inputTranscription?: Transcription;
2133
+ /**
2134
+ * Transcription of the audio output from the model.
2135
+ */
2136
+ outputTranscription?: Transcription;
2081
2137
  }
2082
2138
 
2083
2139
  /**
@@ -2140,32 +2196,65 @@ export declare class LiveSession {
2140
2196
  */
2141
2197
  send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
2142
2198
  /**
2143
- * Sends realtime input to the server.
2199
+ * Sends text to the server in realtime.
2144
2200
  *
2145
- * @param mediaChunks - The media chunks to send.
2201
+ * @example
2202
+ * ```javascript
2203
+ * liveSession.sendTextRealtime("Hello, how are you?");
2204
+ * ```
2205
+ *
2206
+ * @param text - The text data to send.
2146
2207
  * @throws If this session has been closed.
2147
2208
  *
2148
2209
  * @beta
2149
2210
  */
2150
- sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2211
+ sendTextRealtime(text: string): Promise<void>;
2151
2212
  /**
2152
- * Sends function responses to the server.
2213
+ * Sends audio data to the server in realtime.
2153
2214
  *
2154
- * @param functionResponses - The function responses to send.
2215
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2216
+ * little-endian.
2217
+ *
2218
+ * @example
2219
+ * ```javascript
2220
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2221
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2222
+ * liveSession.sendAudioRealtime(blob);
2223
+ * ```
2224
+ *
2225
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2155
2226
  * @throws If this session has been closed.
2156
2227
  *
2157
2228
  * @beta
2158
2229
  */
2159
- sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2230
+ sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
2160
2231
  /**
2161
- * Sends a stream of {@link GenerativeContentBlob}.
2232
+ * Sends video data to the server in realtime.
2162
2233
  *
2163
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2234
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2235
+ * is recommended to set `mimeType` to `image/jpeg`.
2236
+ *
2237
+ * @example
2238
+ * ```javascript
2239
+ * // const videoFrame = ... base64-encoded JPEG data
2240
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2241
+ * liveSession.sendVideoRealtime(blob);
2242
+ * ```
2243
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2164
2244
  * @throws If this session has been closed.
2165
2245
  *
2166
2246
  * @beta
2167
2247
  */
2168
- sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2248
+ sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
2249
+ /**
2250
+ * Sends function responses to the server.
2251
+ *
2252
+ * @param functionResponses - The function responses to send.
2253
+ * @throws If this session has been closed.
2254
+ *
2255
+ * @beta
2256
+ */
2257
+ sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2169
2258
  /**
2170
2259
  * Yields messages received from the server.
2171
2260
  * This can only be used by one consumer at a time.
@@ -2183,6 +2272,28 @@ export declare class LiveSession {
2183
2272
  * @beta
2184
2273
  */
2185
2274
  close(): Promise<void>;
2275
+ /**
2276
+ * Sends realtime input to the server.
2277
+ *
2278
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2279
+ *
2280
+ * @param mediaChunks - The media chunks to send.
2281
+ * @throws If this session has been closed.
2282
+ *
2283
+ * @beta
2284
+ */
2285
+ sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2286
+ /**
2287
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2288
+ *
2289
+ * Sends a stream of {@link GenerativeContentBlob}.
2290
+ *
2291
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2292
+ * @throws If this session has been closed.
2293
+ *
2294
+ * @beta
2295
+ */
2296
+ sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2186
2297
  }
2187
2298
 
2188
2299
  /**
@@ -2874,6 +2985,20 @@ export declare interface ToolConfig {
2874
2985
  functionCallingConfig?: FunctionCallingConfig;
2875
2986
  }
2876
2987
 
2988
+ /**
2989
+ * Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
2990
+ * is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
2991
+ * the {@link LiveGenerationConfig}.
2992
+ *
2993
+ * @beta
2994
+ */
2995
+ export declare interface Transcription {
2996
+ /**
2997
+ * The text transcription of the audio.
2998
+ */
2999
+ text?: string;
3000
+ }
3001
+
2877
3002
  /**
2878
3003
  * A type that includes all specific Schema types.
2879
3004
  * @public
package/dist/ai.d.ts CHANGED
@@ -232,6 +232,12 @@ export declare interface AudioConversationController {
232
232
  stop: () => Promise<void>;
233
233
  }
234
234
 
235
+ /**
236
+ * The audio transcription configuration.
237
+ */
238
+ export declare interface AudioTranscriptionConfig {
239
+ }
240
+
235
241
  /**
236
242
  * Abstract base class representing the configuration for an AI service backend.
237
243
  * This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
@@ -375,6 +381,10 @@ export declare class ChatSession {
375
381
  * @beta
376
382
  */
377
383
  export declare interface ChromeAdapter {
384
+ /**
385
+ * @internal
386
+ */
387
+ mode: InferenceMode;
378
388
  /**
379
389
  * Checks if the on-device model is capable of handling a given
380
390
  * request.
@@ -604,6 +614,12 @@ export declare interface EnhancedGenerateContentResponse extends GenerateContent
604
614
  * set to `true`.
605
615
  */
606
616
  thoughtSummary: () => string | undefined;
617
+ /**
618
+ * Indicates whether inference happened on-device or in-cloud.
619
+ *
620
+ * @beta
621
+ */
622
+ inferenceSource?: InferenceSource;
607
623
  }
608
624
 
609
625
  /**
@@ -1945,6 +1961,23 @@ export declare const InferenceMode: {
1945
1961
  */
1946
1962
  export declare type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode];
1947
1963
 
1964
+ /**
1965
+ * Indicates whether inference happened on-device or in-cloud.
1966
+ *
1967
+ * @beta
1968
+ */
1969
+ export declare const InferenceSource: {
1970
+ readonly ON_DEVICE: "on_device";
1971
+ readonly IN_CLOUD: "in_cloud";
1972
+ };
1973
+
1974
+ /**
1975
+ * Indicates whether inference happened on-device or in-cloud.
1976
+ *
1977
+ * @beta
1978
+ */
1979
+ export declare type InferenceSource = (typeof InferenceSource)[keyof typeof InferenceSource];
1980
+
1948
1981
  /**
1949
1982
  * Content part interface if the part represents an image.
1950
1983
  * @public
@@ -2112,6 +2145,24 @@ export declare interface LiveGenerationConfig {
2112
2145
  * The modalities of the response.
2113
2146
  */
2114
2147
  responseModalities?: ResponseModality[];
2148
+ /**
2149
+ * Enables transcription of audio input.
2150
+ *
2151
+ * When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
2152
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2153
+ * messages, so you may only receive small amounts of text per message. For example, if you ask the model
2154
+ * "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
2155
+ */
2156
+ inputAudioTranscription?: AudioTranscriptionConfig;
2157
+ /**
2158
+ * Enables transcription of audio input.
2159
+ *
2160
+ * When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
2161
+ * in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
2162
+ * messages, so you may only receive small amounts of text per message. For example, if the model says
2163
+ * "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
2164
+ */
2165
+ outputAudioTranscription?: AudioTranscriptionConfig;
2115
2166
  }
2116
2167
 
2117
2168
  /**
@@ -2203,6 +2254,14 @@ export declare interface LiveServerContent {
2203
2254
  * model was not interrupted.
2204
2255
  */
2205
2256
  interrupted?: boolean;
2257
+ /**
2258
+ * Transcription of the audio that was input to the model.
2259
+ */
2260
+ inputTranscription?: Transcription;
2261
+ /**
2262
+ * Transcription of the audio output from the model.
2263
+ */
2264
+ outputTranscription?: Transcription;
2206
2265
  }
2207
2266
 
2208
2267
  /**
@@ -2268,32 +2327,65 @@ export declare class LiveSession {
2268
2327
  */
2269
2328
  send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
2270
2329
  /**
2271
- * Sends realtime input to the server.
2330
+ * Sends text to the server in realtime.
2272
2331
  *
2273
- * @param mediaChunks - The media chunks to send.
2332
+ * @example
2333
+ * ```javascript
2334
+ * liveSession.sendTextRealtime("Hello, how are you?");
2335
+ * ```
2336
+ *
2337
+ * @param text - The text data to send.
2274
2338
  * @throws If this session has been closed.
2275
2339
  *
2276
2340
  * @beta
2277
2341
  */
2278
- sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2342
+ sendTextRealtime(text: string): Promise<void>;
2279
2343
  /**
2280
- * Sends function responses to the server.
2344
+ * Sends audio data to the server in realtime.
2281
2345
  *
2282
- * @param functionResponses - The function responses to send.
2346
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2347
+ * little-endian.
2348
+ *
2349
+ * @example
2350
+ * ```javascript
2351
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2352
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2353
+ * liveSession.sendAudioRealtime(blob);
2354
+ * ```
2355
+ *
2356
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2283
2357
  * @throws If this session has been closed.
2284
2358
  *
2285
2359
  * @beta
2286
2360
  */
2287
- sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2361
+ sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
2288
2362
  /**
2289
- * Sends a stream of {@link GenerativeContentBlob}.
2363
+ * Sends video data to the server in realtime.
2290
2364
  *
2291
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2365
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2366
+ * is recommended to set `mimeType` to `image/jpeg`.
2367
+ *
2368
+ * @example
2369
+ * ```javascript
2370
+ * // const videoFrame = ... base64-encoded JPEG data
2371
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2372
+ * liveSession.sendVideoRealtime(blob);
2373
+ * ```
2374
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2292
2375
  * @throws If this session has been closed.
2293
2376
  *
2294
2377
  * @beta
2295
2378
  */
2296
- sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2379
+ sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
2380
+ /**
2381
+ * Sends function responses to the server.
2382
+ *
2383
+ * @param functionResponses - The function responses to send.
2384
+ * @throws If this session has been closed.
2385
+ *
2386
+ * @beta
2387
+ */
2388
+ sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
2297
2389
  /**
2298
2390
  * Yields messages received from the server.
2299
2391
  * This can only be used by one consumer at a time.
@@ -2311,6 +2403,28 @@ export declare class LiveSession {
2311
2403
  * @beta
2312
2404
  */
2313
2405
  close(): Promise<void>;
2406
+ /**
2407
+ * Sends realtime input to the server.
2408
+ *
2409
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2410
+ *
2411
+ * @param mediaChunks - The media chunks to send.
2412
+ * @throws If this session has been closed.
2413
+ *
2414
+ * @beta
2415
+ */
2416
+ sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2417
+ /**
2418
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2419
+ *
2420
+ * Sends a stream of {@link GenerativeContentBlob}.
2421
+ *
2422
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2423
+ * @throws If this session has been closed.
2424
+ *
2425
+ * @beta
2426
+ */
2427
+ sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2314
2428
  }
2315
2429
 
2316
2430
  /**
@@ -3016,6 +3130,20 @@ export declare interface ToolConfig {
3016
3130
  functionCallingConfig?: FunctionCallingConfig;
3017
3131
  }
3018
3132
 
3133
+ /**
3134
+ * Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
3135
+ * is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
3136
+ * the {@link LiveGenerationConfig}.
3137
+ *
3138
+ * @beta
3139
+ */
3140
+ export declare interface Transcription {
3141
+ /**
3142
+ * The text transcription of the audio.
3143
+ */
3144
+ text?: string;
3145
+ }
3146
+
3019
3147
  /**
3020
3148
  * A type that includes all specific Schema types.
3021
3149
  * @public