@firebase/ai 2.4.0-canary.91c218db2 → 2.4.0-canary.bc5a7c4a7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ var util = require('@firebase/util');
8
8
  var logger$1 = require('@firebase/logger');
9
9
 
10
10
  var name = "@firebase/ai";
11
- var version = "2.4.0-canary.91c218db2";
11
+ var version = "2.4.0-canary.bc5a7c4a7";
12
12
 
13
13
  /**
14
14
  * @license
@@ -2538,75 +2538,104 @@ class LiveSession {
2538
2538
  this.webSocketHandler.send(JSON.stringify(message));
2539
2539
  }
2540
2540
  /**
2541
- * Sends realtime input to the server.
2541
+ * Sends text to the server in realtime.
2542
2542
  *
2543
- * @param mediaChunks - The media chunks to send.
2543
+ * @example
2544
+ * ```javascript
2545
+ * liveSession.sendTextRealtime("Hello, how are you?");
2546
+ * ```
2547
+ *
2548
+ * @param text - The text data to send.
2544
2549
  * @throws If this session has been closed.
2545
2550
  *
2546
2551
  * @beta
2547
2552
  */
2548
- async sendMediaChunks(mediaChunks) {
2553
+ async sendTextRealtime(text) {
2549
2554
  if (this.isClosed) {
2550
2555
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2551
2556
  }
2552
- // The backend does not support sending more than one mediaChunk in one message.
2553
- // Work around this limitation by sending mediaChunks in separate messages.
2554
- mediaChunks.forEach(mediaChunk => {
2555
- const message = {
2556
- realtimeInput: { mediaChunks: [mediaChunk] }
2557
- };
2558
- this.webSocketHandler.send(JSON.stringify(message));
2559
- });
2557
+ const message = {
2558
+ realtimeInput: {
2559
+ text
2560
+ }
2561
+ };
2562
+ this.webSocketHandler.send(JSON.stringify(message));
2560
2563
  }
2561
2564
  /**
2562
- * Sends function responses to the server.
2565
+ * Sends audio data to the server in realtime.
2563
2566
  *
2564
- * @param functionResponses - The function responses to send.
2567
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2568
+ * little-endian.
2569
+ *
2570
+ * @example
2571
+ * ```javascript
2572
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2573
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2574
+ * liveSession.sendAudioRealtime(blob);
2575
+ * ```
2576
+ *
2577
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2565
2578
  * @throws If this session has been closed.
2566
2579
  *
2567
2580
  * @beta
2568
2581
  */
2569
- async sendFunctionResponses(functionResponses) {
2582
+ async sendAudioRealtime(blob) {
2570
2583
  if (this.isClosed) {
2571
2584
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2572
2585
  }
2573
2586
  const message = {
2574
- toolResponse: {
2575
- functionResponses
2587
+ realtimeInput: {
2588
+ audio: blob
2576
2589
  }
2577
2590
  };
2578
2591
  this.webSocketHandler.send(JSON.stringify(message));
2579
2592
  }
2580
2593
  /**
2581
- * Sends a stream of {@link GenerativeContentBlob}.
2594
+ * Sends video data to the server in realtime.
2582
2595
  *
2583
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2596
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2597
+ * is recommended to set `mimeType` to `image/jpeg`.
2598
+ *
2599
+ * @example
2600
+ * ```javascript
2601
+ * // const videoFrame = ... base64-encoded JPEG data
2602
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2603
+ * liveSession.sendVideoRealtime(blob);
2604
+ * ```
2605
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2584
2606
  * @throws If this session has been closed.
2585
2607
  *
2586
2608
  * @beta
2587
2609
  */
2588
- async sendMediaStream(mediaChunkStream) {
2610
+ async sendVideoRealtime(blob) {
2589
2611
  if (this.isClosed) {
2590
2612
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2591
2613
  }
2592
- const reader = mediaChunkStream.getReader();
2593
- while (true) {
2594
- try {
2595
- const { done, value } = await reader.read();
2596
- if (done) {
2597
- break;
2598
- }
2599
- else if (!value) {
2600
- throw new Error('Missing chunk in reader, but reader is not done.');
2601
- }
2602
- await this.sendMediaChunks([value]);
2603
- }
2604
- catch (e) {
2605
- // Re-throw any errors that occur during stream consumption or sending.
2606
- const message = e instanceof Error ? e.message : 'Error processing media stream.';
2607
- throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2614
+ const message = {
2615
+ realtimeInput: {
2616
+ video: blob
2608
2617
  }
2618
+ };
2619
+ this.webSocketHandler.send(JSON.stringify(message));
2620
+ }
2621
+ /**
2622
+ * Sends function responses to the server.
2623
+ *
2624
+ * @param functionResponses - The function responses to send.
2625
+ * @throws If this session has been closed.
2626
+ *
2627
+ * @beta
2628
+ */
2629
+ async sendFunctionResponses(functionResponses) {
2630
+ if (this.isClosed) {
2631
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2609
2632
  }
2633
+ const message = {
2634
+ toolResponse: {
2635
+ functionResponses
2636
+ }
2637
+ };
2638
+ this.webSocketHandler.send(JSON.stringify(message));
2610
2639
  }
2611
2640
  /**
2612
2641
  * Yields messages received from the server.
@@ -2664,6 +2693,62 @@ class LiveSession {
2664
2693
  await this.webSocketHandler.close(1000, 'Client closed session.');
2665
2694
  }
2666
2695
  }
2696
+ /**
2697
+ * Sends realtime input to the server.
2698
+ *
2699
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2700
+ *
2701
+ * @param mediaChunks - The media chunks to send.
2702
+ * @throws If this session has been closed.
2703
+ *
2704
+ * @beta
2705
+ */
2706
+ async sendMediaChunks(mediaChunks) {
2707
+ if (this.isClosed) {
2708
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2709
+ }
2710
+ // The backend does not support sending more than one mediaChunk in one message.
2711
+ // Work around this limitation by sending mediaChunks in separate messages.
2712
+ mediaChunks.forEach(mediaChunk => {
2713
+ const message = {
2714
+ realtimeInput: { mediaChunks: [mediaChunk] }
2715
+ };
2716
+ this.webSocketHandler.send(JSON.stringify(message));
2717
+ });
2718
+ }
2719
+ /**
2720
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2721
+ *
2722
+ * Sends a stream of {@link GenerativeContentBlob}.
2723
+ *
2724
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2725
+ * @throws If this session has been closed.
2726
+ *
2727
+ * @beta
2728
+ */
2729
+ async sendMediaStream(mediaChunkStream) {
2730
+ if (this.isClosed) {
2731
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2732
+ }
2733
+ const reader = mediaChunkStream.getReader();
2734
+ while (true) {
2735
+ try {
2736
+ const { done, value } = await reader.read();
2737
+ if (done) {
2738
+ break;
2739
+ }
2740
+ else if (!value) {
2741
+ throw new Error('Missing chunk in reader, but reader is not done.');
2742
+ }
2743
+ await this.sendMediaChunks([value]);
2744
+ }
2745
+ catch (e) {
2746
+ // Re-throw any errors that occur during stream consumption or sending.
2747
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
2748
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2749
+ }
2750
+ }
2751
+ }
2667
2752
  }
2668
2753
 
2669
2754
  /**
@@ -2724,13 +2809,18 @@ class LiveGenerativeModel extends AIModel {
2724
2809
  else {
2725
2810
  fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
2726
2811
  }
2812
+ // inputAudioTranscription and outputAudioTranscription are on the generation config in the public API,
2813
+ // but the backend expects them to be in the `setup` message.
2814
+ const { inputAudioTranscription, outputAudioTranscription, ...generationConfig } = this.generationConfig;
2727
2815
  const setupMessage = {
2728
2816
  setup: {
2729
2817
  model: fullModelPath,
2730
- generationConfig: this.generationConfig,
2818
+ generationConfig,
2731
2819
  tools: this.tools,
2732
2820
  toolConfig: this.toolConfig,
2733
- systemInstruction: this.systemInstruction
2821
+ systemInstruction: this.systemInstruction,
2822
+ inputAudioTranscription,
2823
+ outputAudioTranscription
2734
2824
  }
2735
2825
  };
2736
2826
  try {
@@ -3436,7 +3526,7 @@ class AudioConversationRunner {
3436
3526
  mimeType: 'audio/pcm',
3437
3527
  data: base64
3438
3528
  };
3439
- void this.liveSession.sendMediaChunks([chunk]);
3529
+ void this.liveSession.sendAudioRealtime(chunk);
3440
3530
  };
3441
3531
  }
3442
3532
  /**