@firebase/ai 2.4.0 → 2.5.0-20251028194003

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ var util = require('@firebase/util');
8
8
  var logger$1 = require('@firebase/logger');
9
9
 
10
10
  var name = "@firebase/ai";
11
- var version = "2.4.0";
11
+ var version = "2.5.0-20251028194003";
12
12
 
13
13
  /**
14
14
  * @license
@@ -327,6 +327,15 @@ const InferenceMode = {
327
327
  'ONLY_IN_CLOUD': 'only_in_cloud',
328
328
  'PREFER_IN_CLOUD': 'prefer_in_cloud'
329
329
  };
330
+ /**
331
+ * Indicates whether inference happened on-device or in-cloud.
332
+ *
333
+ * @beta
334
+ */
335
+ const InferenceSource = {
336
+ 'ON_DEVICE': 'on_device',
337
+ 'IN_CLOUD': 'in_cloud'
338
+ };
330
339
  /**
331
340
  * Represents the result of the code execution.
332
341
  *
@@ -1273,7 +1282,7 @@ function hasValidCandidates(response) {
1273
1282
  * Creates an EnhancedGenerateContentResponse object that has helper functions and
1274
1283
  * other modifications that improve usability.
1275
1284
  */
1276
- function createEnhancedContentResponse(response) {
1285
+ function createEnhancedContentResponse(response, inferenceSource = InferenceSource.IN_CLOUD) {
1277
1286
  /**
1278
1287
  * The Vertex AI backend omits default values.
1279
1288
  * This causes the `index` property to be omitted from the first candidate in the
@@ -1284,6 +1293,7 @@ function createEnhancedContentResponse(response) {
1284
1293
  response.candidates[0].index = 0;
1285
1294
  }
1286
1295
  const responseWithHelpers = addHelpers(response);
1296
+ responseWithHelpers.inferenceSource = inferenceSource;
1287
1297
  return responseWithHelpers;
1288
1298
  }
1289
1299
  /**
@@ -1660,16 +1670,16 @@ const responseLineRE = /^data\: (.*)(?:\n\n|\r\r|\r\n\r\n)/;
1660
1670
  *
1661
1671
  * @param response - Response from a fetch call
1662
1672
  */
1663
- function processStream(response, apiSettings) {
1673
+ function processStream(response, apiSettings, inferenceSource) {
1664
1674
  const inputStream = response.body.pipeThrough(new TextDecoderStream('utf8', { fatal: true }));
1665
1675
  const responseStream = getResponseStream(inputStream);
1666
1676
  const [stream1, stream2] = responseStream.tee();
1667
1677
  return {
1668
- stream: generateResponseSequence(stream1, apiSettings),
1669
- response: getResponsePromise(stream2, apiSettings)
1678
+ stream: generateResponseSequence(stream1, apiSettings, inferenceSource),
1679
+ response: getResponsePromise(stream2, apiSettings, inferenceSource)
1670
1680
  };
1671
1681
  }
1672
- async function getResponsePromise(stream, apiSettings) {
1682
+ async function getResponsePromise(stream, apiSettings, inferenceSource) {
1673
1683
  const allResponses = [];
1674
1684
  const reader = stream.getReader();
1675
1685
  while (true) {
@@ -1679,12 +1689,12 @@ async function getResponsePromise(stream, apiSettings) {
1679
1689
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1680
1690
  generateContentResponse = mapGenerateContentResponse(generateContentResponse);
1681
1691
  }
1682
- return createEnhancedContentResponse(generateContentResponse);
1692
+ return createEnhancedContentResponse(generateContentResponse, inferenceSource);
1683
1693
  }
1684
1694
  allResponses.push(value);
1685
1695
  }
1686
1696
  }
1687
- async function* generateResponseSequence(stream, apiSettings) {
1697
+ async function* generateResponseSequence(stream, apiSettings, inferenceSource) {
1688
1698
  const reader = stream.getReader();
1689
1699
  while (true) {
1690
1700
  const { value, done } = await reader.read();
@@ -1693,10 +1703,10 @@ async function* generateResponseSequence(stream, apiSettings) {
1693
1703
  }
1694
1704
  let enhancedResponse;
1695
1705
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1696
- enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value));
1706
+ enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value), inferenceSource);
1697
1707
  }
1698
1708
  else {
1699
- enhancedResponse = createEnhancedContentResponse(value);
1709
+ enhancedResponse = createEnhancedContentResponse(value, inferenceSource);
1700
1710
  }
1701
1711
  const firstCandidate = enhancedResponse.candidates?.[0];
1702
1712
  // Don't yield a response with no useful data for the developer.
@@ -1866,31 +1876,52 @@ const errorsCausingFallback = [
1866
1876
  */
1867
1877
  async function callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall) {
1868
1878
  if (!chromeAdapter) {
1869
- return inCloudCall();
1879
+ return {
1880
+ response: await inCloudCall(),
1881
+ inferenceSource: InferenceSource.IN_CLOUD
1882
+ };
1870
1883
  }
1871
1884
  switch (chromeAdapter.mode) {
1872
1885
  case InferenceMode.ONLY_ON_DEVICE:
1873
1886
  if (await chromeAdapter.isAvailable(request)) {
1874
- return onDeviceCall();
1887
+ return {
1888
+ response: await onDeviceCall(),
1889
+ inferenceSource: InferenceSource.ON_DEVICE
1890
+ };
1875
1891
  }
1876
1892
  throw new AIError(AIErrorCode.UNSUPPORTED, 'Inference mode is ONLY_ON_DEVICE, but an on-device model is not available.');
1877
1893
  case InferenceMode.ONLY_IN_CLOUD:
1878
- return inCloudCall();
1894
+ return {
1895
+ response: await inCloudCall(),
1896
+ inferenceSource: InferenceSource.IN_CLOUD
1897
+ };
1879
1898
  case InferenceMode.PREFER_IN_CLOUD:
1880
1899
  try {
1881
- return await inCloudCall();
1900
+ return {
1901
+ response: await inCloudCall(),
1902
+ inferenceSource: InferenceSource.IN_CLOUD
1903
+ };
1882
1904
  }
1883
1905
  catch (e) {
1884
1906
  if (e instanceof AIError && errorsCausingFallback.includes(e.code)) {
1885
- return onDeviceCall();
1907
+ return {
1908
+ response: await onDeviceCall(),
1909
+ inferenceSource: InferenceSource.ON_DEVICE
1910
+ };
1886
1911
  }
1887
1912
  throw e;
1888
1913
  }
1889
1914
  case InferenceMode.PREFER_ON_DEVICE:
1890
1915
  if (await chromeAdapter.isAvailable(request)) {
1891
- return onDeviceCall();
1916
+ return {
1917
+ response: await onDeviceCall(),
1918
+ inferenceSource: InferenceSource.ON_DEVICE
1919
+ };
1892
1920
  }
1893
- return inCloudCall();
1921
+ return {
1922
+ response: await inCloudCall(),
1923
+ inferenceSource: InferenceSource.IN_CLOUD
1924
+ };
1894
1925
  default:
1895
1926
  throw new AIError(AIErrorCode.ERROR, `Unexpected infererence mode: ${chromeAdapter.mode}`);
1896
1927
  }
@@ -1920,8 +1951,8 @@ async function generateContentStreamOnCloud(apiSettings, model, params, requestO
1920
1951
  /* stream */ true, JSON.stringify(params), requestOptions);
1921
1952
  }
1922
1953
  async function generateContentStream(apiSettings, model, params, chromeAdapter, requestOptions) {
1923
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
1924
- return processStream(response, apiSettings); // TODO: Map streaming responses
1954
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
1955
+ return processStream(callResult.response, apiSettings); // TODO: Map streaming responses
1925
1956
  }
1926
1957
  async function generateContentOnCloud(apiSettings, model, params, requestOptions) {
1927
1958
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
@@ -1931,9 +1962,9 @@ async function generateContentOnCloud(apiSettings, model, params, requestOptions
1931
1962
  /* stream */ false, JSON.stringify(params), requestOptions);
1932
1963
  }
1933
1964
  async function generateContent(apiSettings, model, params, chromeAdapter, requestOptions) {
1934
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
1935
- const generateContentResponse = await processGenerateContentResponse(response, apiSettings);
1936
- const enhancedResponse = createEnhancedContentResponse(generateContentResponse);
1965
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
1966
+ const generateContentResponse = await processGenerateContentResponse(callResult.response, apiSettings);
1967
+ const enhancedResponse = createEnhancedContentResponse(generateContentResponse, callResult.inferenceSource);
1937
1968
  return {
1938
1969
  response: enhancedResponse
1939
1970
  };
@@ -2507,75 +2538,104 @@ class LiveSession {
2507
2538
  this.webSocketHandler.send(JSON.stringify(message));
2508
2539
  }
2509
2540
  /**
2510
- * Sends realtime input to the server.
2541
+ * Sends text to the server in realtime.
2511
2542
  *
2512
- * @param mediaChunks - The media chunks to send.
2543
+ * @example
2544
+ * ```javascript
2545
+ * liveSession.sendTextRealtime("Hello, how are you?");
2546
+ * ```
2547
+ *
2548
+ * @param text - The text data to send.
2513
2549
  * @throws If this session has been closed.
2514
2550
  *
2515
2551
  * @beta
2516
2552
  */
2517
- async sendMediaChunks(mediaChunks) {
2553
+ async sendTextRealtime(text) {
2518
2554
  if (this.isClosed) {
2519
2555
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2520
2556
  }
2521
- // The backend does not support sending more than one mediaChunk in one message.
2522
- // Work around this limitation by sending mediaChunks in separate messages.
2523
- mediaChunks.forEach(mediaChunk => {
2524
- const message = {
2525
- realtimeInput: { mediaChunks: [mediaChunk] }
2526
- };
2527
- this.webSocketHandler.send(JSON.stringify(message));
2528
- });
2557
+ const message = {
2558
+ realtimeInput: {
2559
+ text
2560
+ }
2561
+ };
2562
+ this.webSocketHandler.send(JSON.stringify(message));
2529
2563
  }
2530
2564
  /**
2531
- * Sends function responses to the server.
2565
+ * Sends audio data to the server in realtime.
2532
2566
  *
2533
- * @param functionResponses - The function responses to send.
2567
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2568
+ * little-endian.
2569
+ *
2570
+ * @example
2571
+ * ```javascript
2572
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2573
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2574
+ * liveSession.sendAudioRealtime(blob);
2575
+ * ```
2576
+ *
2577
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2534
2578
  * @throws If this session has been closed.
2535
2579
  *
2536
2580
  * @beta
2537
2581
  */
2538
- async sendFunctionResponses(functionResponses) {
2582
+ async sendAudioRealtime(blob) {
2539
2583
  if (this.isClosed) {
2540
2584
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2541
2585
  }
2542
2586
  const message = {
2543
- toolResponse: {
2544
- functionResponses
2587
+ realtimeInput: {
2588
+ audio: blob
2545
2589
  }
2546
2590
  };
2547
2591
  this.webSocketHandler.send(JSON.stringify(message));
2548
2592
  }
2549
2593
  /**
2550
- * Sends a stream of {@link GenerativeContentBlob}.
2594
+ * Sends video data to the server in realtime.
2551
2595
  *
2552
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2596
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2597
+ * is recommended to set `mimeType` to `image/jpeg`.
2598
+ *
2599
+ * @example
2600
+ * ```javascript
2601
+ * // const videoFrame = ... base64-encoded JPEG data
2602
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2603
+ * liveSession.sendVideoRealtime(blob);
2604
+ * ```
2605
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2553
2606
  * @throws If this session has been closed.
2554
2607
  *
2555
2608
  * @beta
2556
2609
  */
2557
- async sendMediaStream(mediaChunkStream) {
2610
+ async sendVideoRealtime(blob) {
2558
2611
  if (this.isClosed) {
2559
2612
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2560
2613
  }
2561
- const reader = mediaChunkStream.getReader();
2562
- while (true) {
2563
- try {
2564
- const { done, value } = await reader.read();
2565
- if (done) {
2566
- break;
2567
- }
2568
- else if (!value) {
2569
- throw new Error('Missing chunk in reader, but reader is not done.');
2570
- }
2571
- await this.sendMediaChunks([value]);
2572
- }
2573
- catch (e) {
2574
- // Re-throw any errors that occur during stream consumption or sending.
2575
- const message = e instanceof Error ? e.message : 'Error processing media stream.';
2576
- throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2614
+ const message = {
2615
+ realtimeInput: {
2616
+ video: blob
2577
2617
  }
2618
+ };
2619
+ this.webSocketHandler.send(JSON.stringify(message));
2620
+ }
2621
+ /**
2622
+ * Sends function responses to the server.
2623
+ *
2624
+ * @param functionResponses - The function responses to send.
2625
+ * @throws If this session has been closed.
2626
+ *
2627
+ * @beta
2628
+ */
2629
+ async sendFunctionResponses(functionResponses) {
2630
+ if (this.isClosed) {
2631
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2578
2632
  }
2633
+ const message = {
2634
+ toolResponse: {
2635
+ functionResponses
2636
+ }
2637
+ };
2638
+ this.webSocketHandler.send(JSON.stringify(message));
2579
2639
  }
2580
2640
  /**
2581
2641
  * Yields messages received from the server.
@@ -2633,6 +2693,62 @@ class LiveSession {
2633
2693
  await this.webSocketHandler.close(1000, 'Client closed session.');
2634
2694
  }
2635
2695
  }
2696
+ /**
2697
+ * Sends realtime input to the server.
2698
+ *
2699
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2700
+ *
2701
+ * @param mediaChunks - The media chunks to send.
2702
+ * @throws If this session has been closed.
2703
+ *
2704
+ * @beta
2705
+ */
2706
+ async sendMediaChunks(mediaChunks) {
2707
+ if (this.isClosed) {
2708
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2709
+ }
2710
+ // The backend does not support sending more than one mediaChunk in one message.
2711
+ // Work around this limitation by sending mediaChunks in separate messages.
2712
+ mediaChunks.forEach(mediaChunk => {
2713
+ const message = {
2714
+ realtimeInput: { mediaChunks: [mediaChunk] }
2715
+ };
2716
+ this.webSocketHandler.send(JSON.stringify(message));
2717
+ });
2718
+ }
2719
+ /**
2720
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2721
+ *
2722
+ * Sends a stream of {@link GenerativeContentBlob}.
2723
+ *
2724
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2725
+ * @throws If this session has been closed.
2726
+ *
2727
+ * @beta
2728
+ */
2729
+ async sendMediaStream(mediaChunkStream) {
2730
+ if (this.isClosed) {
2731
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2732
+ }
2733
+ const reader = mediaChunkStream.getReader();
2734
+ while (true) {
2735
+ try {
2736
+ const { done, value } = await reader.read();
2737
+ if (done) {
2738
+ break;
2739
+ }
2740
+ else if (!value) {
2741
+ throw new Error('Missing chunk in reader, but reader is not done.');
2742
+ }
2743
+ await this.sendMediaChunks([value]);
2744
+ }
2745
+ catch (e) {
2746
+ // Re-throw any errors that occur during stream consumption or sending.
2747
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
2748
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2749
+ }
2750
+ }
2751
+ }
2636
2752
  }
2637
2753
 
2638
2754
  /**
@@ -2693,13 +2809,18 @@ class LiveGenerativeModel extends AIModel {
2693
2809
  else {
2694
2810
  fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
2695
2811
  }
2812
+ // inputAudioTranscription and outputAudioTranscription are on the generation config in the public API,
2813
+ // but the backend expects them to be in the `setup` message.
2814
+ const { inputAudioTranscription, outputAudioTranscription, ...generationConfig } = this.generationConfig;
2696
2815
  const setupMessage = {
2697
2816
  setup: {
2698
2817
  model: fullModelPath,
2699
- generationConfig: this.generationConfig,
2818
+ generationConfig,
2700
2819
  tools: this.tools,
2701
2820
  toolConfig: this.toolConfig,
2702
- systemInstruction: this.systemInstruction
2821
+ systemInstruction: this.systemInstruction,
2822
+ inputAudioTranscription,
2823
+ outputAudioTranscription
2703
2824
  }
2704
2825
  };
2705
2826
  try {
@@ -3405,7 +3526,7 @@ class AudioConversationRunner {
3405
3526
  mimeType: 'audio/pcm',
3406
3527
  data: base64
3407
3528
  };
3408
- void this.liveSession.sendMediaChunks([chunk]);
3529
+ void this.liveSession.sendAudioRealtime(chunk);
3409
3530
  };
3410
3531
  }
3411
3532
  /**
@@ -3825,6 +3946,7 @@ exports.ImagenModel = ImagenModel;
3825
3946
  exports.ImagenPersonFilterLevel = ImagenPersonFilterLevel;
3826
3947
  exports.ImagenSafetyFilterLevel = ImagenSafetyFilterLevel;
3827
3948
  exports.InferenceMode = InferenceMode;
3949
+ exports.InferenceSource = InferenceSource;
3828
3950
  exports.IntegerSchema = IntegerSchema;
3829
3951
  exports.Language = Language;
3830
3952
  exports.LiveGenerativeModel = LiveGenerativeModel;