@firebase/ai 2.4.0 → 2.5.0-20251028194003

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { FirebaseError, Deferred, getModularInstance } from '@firebase/util';
4
4
  import { Logger } from '@firebase/logger';
5
5
 
6
6
  var name = "@firebase/ai";
7
- var version = "2.4.0";
7
+ var version = "2.5.0-20251028194003";
8
8
 
9
9
  /**
10
10
  * @license
@@ -323,6 +323,15 @@ const InferenceMode = {
323
323
  'ONLY_IN_CLOUD': 'only_in_cloud',
324
324
  'PREFER_IN_CLOUD': 'prefer_in_cloud'
325
325
  };
326
+ /**
327
+ * Indicates whether inference happened on-device or in-cloud.
328
+ *
329
+ * @beta
330
+ */
331
+ const InferenceSource = {
332
+ 'ON_DEVICE': 'on_device',
333
+ 'IN_CLOUD': 'in_cloud'
334
+ };
326
335
  /**
327
336
  * Represents the result of the code execution.
328
337
  *
@@ -1269,7 +1278,7 @@ function hasValidCandidates(response) {
1269
1278
  * Creates an EnhancedGenerateContentResponse object that has helper functions and
1270
1279
  * other modifications that improve usability.
1271
1280
  */
1272
- function createEnhancedContentResponse(response) {
1281
+ function createEnhancedContentResponse(response, inferenceSource = InferenceSource.IN_CLOUD) {
1273
1282
  /**
1274
1283
  * The Vertex AI backend omits default values.
1275
1284
  * This causes the `index` property to be omitted from the first candidate in the
@@ -1280,6 +1289,7 @@ function createEnhancedContentResponse(response) {
1280
1289
  response.candidates[0].index = 0;
1281
1290
  }
1282
1291
  const responseWithHelpers = addHelpers(response);
1292
+ responseWithHelpers.inferenceSource = inferenceSource;
1283
1293
  return responseWithHelpers;
1284
1294
  }
1285
1295
  /**
@@ -1656,16 +1666,16 @@ const responseLineRE = /^data\: (.*)(?:\n\n|\r\r|\r\n\r\n)/;
1656
1666
  *
1657
1667
  * @param response - Response from a fetch call
1658
1668
  */
1659
- function processStream(response, apiSettings) {
1669
+ function processStream(response, apiSettings, inferenceSource) {
1660
1670
  const inputStream = response.body.pipeThrough(new TextDecoderStream('utf8', { fatal: true }));
1661
1671
  const responseStream = getResponseStream(inputStream);
1662
1672
  const [stream1, stream2] = responseStream.tee();
1663
1673
  return {
1664
- stream: generateResponseSequence(stream1, apiSettings),
1665
- response: getResponsePromise(stream2, apiSettings)
1674
+ stream: generateResponseSequence(stream1, apiSettings, inferenceSource),
1675
+ response: getResponsePromise(stream2, apiSettings, inferenceSource)
1666
1676
  };
1667
1677
  }
1668
- async function getResponsePromise(stream, apiSettings) {
1678
+ async function getResponsePromise(stream, apiSettings, inferenceSource) {
1669
1679
  const allResponses = [];
1670
1680
  const reader = stream.getReader();
1671
1681
  while (true) {
@@ -1675,12 +1685,12 @@ async function getResponsePromise(stream, apiSettings) {
1675
1685
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1676
1686
  generateContentResponse = mapGenerateContentResponse(generateContentResponse);
1677
1687
  }
1678
- return createEnhancedContentResponse(generateContentResponse);
1688
+ return createEnhancedContentResponse(generateContentResponse, inferenceSource);
1679
1689
  }
1680
1690
  allResponses.push(value);
1681
1691
  }
1682
1692
  }
1683
- async function* generateResponseSequence(stream, apiSettings) {
1693
+ async function* generateResponseSequence(stream, apiSettings, inferenceSource) {
1684
1694
  const reader = stream.getReader();
1685
1695
  while (true) {
1686
1696
  const { value, done } = await reader.read();
@@ -1689,10 +1699,10 @@ async function* generateResponseSequence(stream, apiSettings) {
1689
1699
  }
1690
1700
  let enhancedResponse;
1691
1701
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1692
- enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value));
1702
+ enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value), inferenceSource);
1693
1703
  }
1694
1704
  else {
1695
- enhancedResponse = createEnhancedContentResponse(value);
1705
+ enhancedResponse = createEnhancedContentResponse(value, inferenceSource);
1696
1706
  }
1697
1707
  const firstCandidate = enhancedResponse.candidates?.[0];
1698
1708
  // Don't yield a response with no useful data for the developer.
@@ -1862,31 +1872,52 @@ const errorsCausingFallback = [
1862
1872
  */
1863
1873
  async function callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall) {
1864
1874
  if (!chromeAdapter) {
1865
- return inCloudCall();
1875
+ return {
1876
+ response: await inCloudCall(),
1877
+ inferenceSource: InferenceSource.IN_CLOUD
1878
+ };
1866
1879
  }
1867
1880
  switch (chromeAdapter.mode) {
1868
1881
  case InferenceMode.ONLY_ON_DEVICE:
1869
1882
  if (await chromeAdapter.isAvailable(request)) {
1870
- return onDeviceCall();
1883
+ return {
1884
+ response: await onDeviceCall(),
1885
+ inferenceSource: InferenceSource.ON_DEVICE
1886
+ };
1871
1887
  }
1872
1888
  throw new AIError(AIErrorCode.UNSUPPORTED, 'Inference mode is ONLY_ON_DEVICE, but an on-device model is not available.');
1873
1889
  case InferenceMode.ONLY_IN_CLOUD:
1874
- return inCloudCall();
1890
+ return {
1891
+ response: await inCloudCall(),
1892
+ inferenceSource: InferenceSource.IN_CLOUD
1893
+ };
1875
1894
  case InferenceMode.PREFER_IN_CLOUD:
1876
1895
  try {
1877
- return await inCloudCall();
1896
+ return {
1897
+ response: await inCloudCall(),
1898
+ inferenceSource: InferenceSource.IN_CLOUD
1899
+ };
1878
1900
  }
1879
1901
  catch (e) {
1880
1902
  if (e instanceof AIError && errorsCausingFallback.includes(e.code)) {
1881
- return onDeviceCall();
1903
+ return {
1904
+ response: await onDeviceCall(),
1905
+ inferenceSource: InferenceSource.ON_DEVICE
1906
+ };
1882
1907
  }
1883
1908
  throw e;
1884
1909
  }
1885
1910
  case InferenceMode.PREFER_ON_DEVICE:
1886
1911
  if (await chromeAdapter.isAvailable(request)) {
1887
- return onDeviceCall();
1912
+ return {
1913
+ response: await onDeviceCall(),
1914
+ inferenceSource: InferenceSource.ON_DEVICE
1915
+ };
1888
1916
  }
1889
- return inCloudCall();
1917
+ return {
1918
+ response: await inCloudCall(),
1919
+ inferenceSource: InferenceSource.IN_CLOUD
1920
+ };
1890
1921
  default:
1891
1922
  throw new AIError(AIErrorCode.ERROR, `Unexpected infererence mode: ${chromeAdapter.mode}`);
1892
1923
  }
@@ -1916,8 +1947,8 @@ async function generateContentStreamOnCloud(apiSettings, model, params, requestO
1916
1947
  /* stream */ true, JSON.stringify(params), requestOptions);
1917
1948
  }
1918
1949
  async function generateContentStream(apiSettings, model, params, chromeAdapter, requestOptions) {
1919
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
1920
- return processStream(response, apiSettings); // TODO: Map streaming responses
1950
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
1951
+ return processStream(callResult.response, apiSettings); // TODO: Map streaming responses
1921
1952
  }
1922
1953
  async function generateContentOnCloud(apiSettings, model, params, requestOptions) {
1923
1954
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
@@ -1927,9 +1958,9 @@ async function generateContentOnCloud(apiSettings, model, params, requestOptions
1927
1958
  /* stream */ false, JSON.stringify(params), requestOptions);
1928
1959
  }
1929
1960
  async function generateContent(apiSettings, model, params, chromeAdapter, requestOptions) {
1930
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
1931
- const generateContentResponse = await processGenerateContentResponse(response, apiSettings);
1932
- const enhancedResponse = createEnhancedContentResponse(generateContentResponse);
1961
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
1962
+ const generateContentResponse = await processGenerateContentResponse(callResult.response, apiSettings);
1963
+ const enhancedResponse = createEnhancedContentResponse(generateContentResponse, callResult.inferenceSource);
1933
1964
  return {
1934
1965
  response: enhancedResponse
1935
1966
  };
@@ -2503,75 +2534,104 @@ class LiveSession {
2503
2534
  this.webSocketHandler.send(JSON.stringify(message));
2504
2535
  }
2505
2536
  /**
2506
- * Sends realtime input to the server.
2537
+ * Sends text to the server in realtime.
2507
2538
  *
2508
- * @param mediaChunks - The media chunks to send.
2539
+ * @example
2540
+ * ```javascript
2541
+ * liveSession.sendTextRealtime("Hello, how are you?");
2542
+ * ```
2543
+ *
2544
+ * @param text - The text data to send.
2509
2545
  * @throws If this session has been closed.
2510
2546
  *
2511
2547
  * @beta
2512
2548
  */
2513
- async sendMediaChunks(mediaChunks) {
2549
+ async sendTextRealtime(text) {
2514
2550
  if (this.isClosed) {
2515
2551
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2516
2552
  }
2517
- // The backend does not support sending more than one mediaChunk in one message.
2518
- // Work around this limitation by sending mediaChunks in separate messages.
2519
- mediaChunks.forEach(mediaChunk => {
2520
- const message = {
2521
- realtimeInput: { mediaChunks: [mediaChunk] }
2522
- };
2523
- this.webSocketHandler.send(JSON.stringify(message));
2524
- });
2553
+ const message = {
2554
+ realtimeInput: {
2555
+ text
2556
+ }
2557
+ };
2558
+ this.webSocketHandler.send(JSON.stringify(message));
2525
2559
  }
2526
2560
  /**
2527
- * Sends function responses to the server.
2561
+ * Sends audio data to the server in realtime.
2528
2562
  *
2529
- * @param functionResponses - The function responses to send.
2563
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2564
+ * little-endian.
2565
+ *
2566
+ * @example
2567
+ * ```javascript
2568
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2569
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2570
+ * liveSession.sendAudioRealtime(blob);
2571
+ * ```
2572
+ *
2573
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2530
2574
  * @throws If this session has been closed.
2531
2575
  *
2532
2576
  * @beta
2533
2577
  */
2534
- async sendFunctionResponses(functionResponses) {
2578
+ async sendAudioRealtime(blob) {
2535
2579
  if (this.isClosed) {
2536
2580
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2537
2581
  }
2538
2582
  const message = {
2539
- toolResponse: {
2540
- functionResponses
2583
+ realtimeInput: {
2584
+ audio: blob
2541
2585
  }
2542
2586
  };
2543
2587
  this.webSocketHandler.send(JSON.stringify(message));
2544
2588
  }
2545
2589
  /**
2546
- * Sends a stream of {@link GenerativeContentBlob}.
2590
+ * Sends video data to the server in realtime.
2547
2591
  *
2548
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2592
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2593
+ * is recommended to set `mimeType` to `image/jpeg`.
2594
+ *
2595
+ * @example
2596
+ * ```javascript
2597
+ * // const videoFrame = ... base64-encoded JPEG data
2598
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2599
+ * liveSession.sendVideoRealtime(blob);
2600
+ * ```
2601
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2549
2602
  * @throws If this session has been closed.
2550
2603
  *
2551
2604
  * @beta
2552
2605
  */
2553
- async sendMediaStream(mediaChunkStream) {
2606
+ async sendVideoRealtime(blob) {
2554
2607
  if (this.isClosed) {
2555
2608
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2556
2609
  }
2557
- const reader = mediaChunkStream.getReader();
2558
- while (true) {
2559
- try {
2560
- const { done, value } = await reader.read();
2561
- if (done) {
2562
- break;
2563
- }
2564
- else if (!value) {
2565
- throw new Error('Missing chunk in reader, but reader is not done.');
2566
- }
2567
- await this.sendMediaChunks([value]);
2568
- }
2569
- catch (e) {
2570
- // Re-throw any errors that occur during stream consumption or sending.
2571
- const message = e instanceof Error ? e.message : 'Error processing media stream.';
2572
- throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2610
+ const message = {
2611
+ realtimeInput: {
2612
+ video: blob
2573
2613
  }
2614
+ };
2615
+ this.webSocketHandler.send(JSON.stringify(message));
2616
+ }
2617
+ /**
2618
+ * Sends function responses to the server.
2619
+ *
2620
+ * @param functionResponses - The function responses to send.
2621
+ * @throws If this session has been closed.
2622
+ *
2623
+ * @beta
2624
+ */
2625
+ async sendFunctionResponses(functionResponses) {
2626
+ if (this.isClosed) {
2627
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2574
2628
  }
2629
+ const message = {
2630
+ toolResponse: {
2631
+ functionResponses
2632
+ }
2633
+ };
2634
+ this.webSocketHandler.send(JSON.stringify(message));
2575
2635
  }
2576
2636
  /**
2577
2637
  * Yields messages received from the server.
@@ -2629,6 +2689,62 @@ class LiveSession {
2629
2689
  await this.webSocketHandler.close(1000, 'Client closed session.');
2630
2690
  }
2631
2691
  }
2692
+ /**
2693
+ * Sends realtime input to the server.
2694
+ *
2695
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2696
+ *
2697
+ * @param mediaChunks - The media chunks to send.
2698
+ * @throws If this session has been closed.
2699
+ *
2700
+ * @beta
2701
+ */
2702
+ async sendMediaChunks(mediaChunks) {
2703
+ if (this.isClosed) {
2704
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2705
+ }
2706
+ // The backend does not support sending more than one mediaChunk in one message.
2707
+ // Work around this limitation by sending mediaChunks in separate messages.
2708
+ mediaChunks.forEach(mediaChunk => {
2709
+ const message = {
2710
+ realtimeInput: { mediaChunks: [mediaChunk] }
2711
+ };
2712
+ this.webSocketHandler.send(JSON.stringify(message));
2713
+ });
2714
+ }
2715
+ /**
2716
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2717
+ *
2718
+ * Sends a stream of {@link GenerativeContentBlob}.
2719
+ *
2720
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2721
+ * @throws If this session has been closed.
2722
+ *
2723
+ * @beta
2724
+ */
2725
+ async sendMediaStream(mediaChunkStream) {
2726
+ if (this.isClosed) {
2727
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2728
+ }
2729
+ const reader = mediaChunkStream.getReader();
2730
+ while (true) {
2731
+ try {
2732
+ const { done, value } = await reader.read();
2733
+ if (done) {
2734
+ break;
2735
+ }
2736
+ else if (!value) {
2737
+ throw new Error('Missing chunk in reader, but reader is not done.');
2738
+ }
2739
+ await this.sendMediaChunks([value]);
2740
+ }
2741
+ catch (e) {
2742
+ // Re-throw any errors that occur during stream consumption or sending.
2743
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
2744
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2745
+ }
2746
+ }
2747
+ }
2632
2748
  }
2633
2749
 
2634
2750
  /**
@@ -2689,13 +2805,18 @@ class LiveGenerativeModel extends AIModel {
2689
2805
  else {
2690
2806
  fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
2691
2807
  }
2808
+ // inputAudioTranscription and outputAudioTranscription are on the generation config in the public API,
2809
+ // but the backend expects them to be in the `setup` message.
2810
+ const { inputAudioTranscription, outputAudioTranscription, ...generationConfig } = this.generationConfig;
2692
2811
  const setupMessage = {
2693
2812
  setup: {
2694
2813
  model: fullModelPath,
2695
- generationConfig: this.generationConfig,
2814
+ generationConfig,
2696
2815
  tools: this.tools,
2697
2816
  toolConfig: this.toolConfig,
2698
- systemInstruction: this.systemInstruction
2817
+ systemInstruction: this.systemInstruction,
2818
+ inputAudioTranscription,
2819
+ outputAudioTranscription
2699
2820
  }
2700
2821
  };
2701
2822
  try {
@@ -3401,7 +3522,7 @@ class AudioConversationRunner {
3401
3522
  mimeType: 'audio/pcm',
3402
3523
  data: base64
3403
3524
  };
3404
- void this.liveSession.sendMediaChunks([chunk]);
3525
+ void this.liveSession.sendAudioRealtime(chunk);
3405
3526
  };
3406
3527
  }
3407
3528
  /**
@@ -3796,5 +3917,5 @@ function registerAI() {
3796
3917
  }
3797
3918
  registerAI();
3798
3919
 
3799
- export { AIError, AIErrorCode, AIModel, AnyOfSchema, ArraySchema, Backend, BackendType, BlockReason, BooleanSchema, ChatSession, FinishReason, FunctionCallingMode, GenerativeModel, GoogleAIBackend, HarmBlockMethod, HarmBlockThreshold, HarmCategory, HarmProbability, HarmSeverity, ImagenAspectRatio, ImagenImageFormat, ImagenModel, ImagenPersonFilterLevel, ImagenSafetyFilterLevel, InferenceMode, IntegerSchema, Language, LiveGenerativeModel, LiveResponseType, LiveSession, Modality, NumberSchema, ObjectSchema, Outcome, POSSIBLE_ROLES, ResponseModality, Schema, SchemaType, StringSchema, URLRetrievalStatus, VertexAIBackend, getAI, getGenerativeModel, getImagenModel, getLiveGenerativeModel, startAudioConversation };
3920
+ export { AIError, AIErrorCode, AIModel, AnyOfSchema, ArraySchema, Backend, BackendType, BlockReason, BooleanSchema, ChatSession, FinishReason, FunctionCallingMode, GenerativeModel, GoogleAIBackend, HarmBlockMethod, HarmBlockThreshold, HarmCategory, HarmProbability, HarmSeverity, ImagenAspectRatio, ImagenImageFormat, ImagenModel, ImagenPersonFilterLevel, ImagenSafetyFilterLevel, InferenceMode, InferenceSource, IntegerSchema, Language, LiveGenerativeModel, LiveResponseType, LiveSession, Modality, NumberSchema, ObjectSchema, Outcome, POSSIBLE_ROLES, ResponseModality, Schema, SchemaType, StringSchema, URLRetrievalStatus, VertexAIBackend, getAI, getGenerativeModel, getImagenModel, getLiveGenerativeModel, startAudioConversation };
3800
3921
  //# sourceMappingURL=index.node.mjs.map