@firebase/ai 2.4.0-canary.91c218db2 → 2.4.0-canary.bc5a7c4a7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { FirebaseError, Deferred, getModularInstance } from '@firebase/util';
4
4
  import { Logger } from '@firebase/logger';
5
5
 
6
6
  var name = "@firebase/ai";
7
- var version = "2.4.0-canary.91c218db2";
7
+ var version = "2.4.0-canary.bc5a7c4a7";
8
8
 
9
9
  /**
10
10
  * @license
@@ -2534,75 +2534,104 @@ class LiveSession {
2534
2534
  this.webSocketHandler.send(JSON.stringify(message));
2535
2535
  }
2536
2536
  /**
2537
- * Sends realtime input to the server.
2537
+ * Sends text to the server in realtime.
2538
2538
  *
2539
- * @param mediaChunks - The media chunks to send.
2539
+ * @example
2540
+ * ```javascript
2541
+ * liveSession.sendTextRealtime("Hello, how are you?");
2542
+ * ```
2543
+ *
2544
+ * @param text - The text data to send.
2540
2545
  * @throws If this session has been closed.
2541
2546
  *
2542
2547
  * @beta
2543
2548
  */
2544
- async sendMediaChunks(mediaChunks) {
2549
+ async sendTextRealtime(text) {
2545
2550
  if (this.isClosed) {
2546
2551
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2547
2552
  }
2548
- // The backend does not support sending more than one mediaChunk in one message.
2549
- // Work around this limitation by sending mediaChunks in separate messages.
2550
- mediaChunks.forEach(mediaChunk => {
2551
- const message = {
2552
- realtimeInput: { mediaChunks: [mediaChunk] }
2553
- };
2554
- this.webSocketHandler.send(JSON.stringify(message));
2555
- });
2553
+ const message = {
2554
+ realtimeInput: {
2555
+ text
2556
+ }
2557
+ };
2558
+ this.webSocketHandler.send(JSON.stringify(message));
2556
2559
  }
2557
2560
  /**
2558
- * Sends function responses to the server.
2561
+ * Sends audio data to the server in realtime.
2559
2562
  *
2560
- * @param functionResponses - The function responses to send.
2563
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2564
+ * little-endian.
2565
+ *
2566
+ * @example
2567
+ * ```javascript
2568
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2569
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2570
+ * liveSession.sendAudioRealtime(blob);
2571
+ * ```
2572
+ *
2573
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2561
2574
  * @throws If this session has been closed.
2562
2575
  *
2563
2576
  * @beta
2564
2577
  */
2565
- async sendFunctionResponses(functionResponses) {
2578
+ async sendAudioRealtime(blob) {
2566
2579
  if (this.isClosed) {
2567
2580
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2568
2581
  }
2569
2582
  const message = {
2570
- toolResponse: {
2571
- functionResponses
2583
+ realtimeInput: {
2584
+ audio: blob
2572
2585
  }
2573
2586
  };
2574
2587
  this.webSocketHandler.send(JSON.stringify(message));
2575
2588
  }
2576
2589
  /**
2577
- * Sends a stream of {@link GenerativeContentBlob}.
2590
+ * Sends video data to the server in realtime.
2578
2591
  *
2579
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2592
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2593
+ * is recommended to set `mimeType` to `image/jpeg`.
2594
+ *
2595
+ * @example
2596
+ * ```javascript
2597
+ * // const videoFrame = ... base64-encoded JPEG data
2598
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2599
+ * liveSession.sendVideoRealtime(blob);
2600
+ * ```
2601
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2580
2602
  * @throws If this session has been closed.
2581
2603
  *
2582
2604
  * @beta
2583
2605
  */
2584
- async sendMediaStream(mediaChunkStream) {
2606
+ async sendVideoRealtime(blob) {
2585
2607
  if (this.isClosed) {
2586
2608
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2587
2609
  }
2588
- const reader = mediaChunkStream.getReader();
2589
- while (true) {
2590
- try {
2591
- const { done, value } = await reader.read();
2592
- if (done) {
2593
- break;
2594
- }
2595
- else if (!value) {
2596
- throw new Error('Missing chunk in reader, but reader is not done.');
2597
- }
2598
- await this.sendMediaChunks([value]);
2599
- }
2600
- catch (e) {
2601
- // Re-throw any errors that occur during stream consumption or sending.
2602
- const message = e instanceof Error ? e.message : 'Error processing media stream.';
2603
- throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2610
+ const message = {
2611
+ realtimeInput: {
2612
+ video: blob
2604
2613
  }
2614
+ };
2615
+ this.webSocketHandler.send(JSON.stringify(message));
2616
+ }
2617
+ /**
2618
+ * Sends function responses to the server.
2619
+ *
2620
+ * @param functionResponses - The function responses to send.
2621
+ * @throws If this session has been closed.
2622
+ *
2623
+ * @beta
2624
+ */
2625
+ async sendFunctionResponses(functionResponses) {
2626
+ if (this.isClosed) {
2627
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2605
2628
  }
2629
+ const message = {
2630
+ toolResponse: {
2631
+ functionResponses
2632
+ }
2633
+ };
2634
+ this.webSocketHandler.send(JSON.stringify(message));
2606
2635
  }
2607
2636
  /**
2608
2637
  * Yields messages received from the server.
@@ -2660,6 +2689,62 @@ class LiveSession {
2660
2689
  await this.webSocketHandler.close(1000, 'Client closed session.');
2661
2690
  }
2662
2691
  }
2692
+ /**
2693
+ * Sends realtime input to the server.
2694
+ *
2695
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2696
+ *
2697
+ * @param mediaChunks - The media chunks to send.
2698
+ * @throws If this session has been closed.
2699
+ *
2700
+ * @beta
2701
+ */
2702
+ async sendMediaChunks(mediaChunks) {
2703
+ if (this.isClosed) {
2704
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2705
+ }
2706
+ // The backend does not support sending more than one mediaChunk in one message.
2707
+ // Work around this limitation by sending mediaChunks in separate messages.
2708
+ mediaChunks.forEach(mediaChunk => {
2709
+ const message = {
2710
+ realtimeInput: { mediaChunks: [mediaChunk] }
2711
+ };
2712
+ this.webSocketHandler.send(JSON.stringify(message));
2713
+ });
2714
+ }
2715
+ /**
2716
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2717
+ *
2718
+ * Sends a stream of {@link GenerativeContentBlob}.
2719
+ *
2720
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2721
+ * @throws If this session has been closed.
2722
+ *
2723
+ * @beta
2724
+ */
2725
+ async sendMediaStream(mediaChunkStream) {
2726
+ if (this.isClosed) {
2727
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2728
+ }
2729
+ const reader = mediaChunkStream.getReader();
2730
+ while (true) {
2731
+ try {
2732
+ const { done, value } = await reader.read();
2733
+ if (done) {
2734
+ break;
2735
+ }
2736
+ else if (!value) {
2737
+ throw new Error('Missing chunk in reader, but reader is not done.');
2738
+ }
2739
+ await this.sendMediaChunks([value]);
2740
+ }
2741
+ catch (e) {
2742
+ // Re-throw any errors that occur during stream consumption or sending.
2743
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
2744
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2745
+ }
2746
+ }
2747
+ }
2663
2748
  }
2664
2749
 
2665
2750
  /**
@@ -2720,13 +2805,18 @@ class LiveGenerativeModel extends AIModel {
2720
2805
  else {
2721
2806
  fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
2722
2807
  }
2808
+ // inputAudioTranscription and outputAudioTranscription are on the generation config in the public API,
2809
+ // but the backend expects them to be in the `setup` message.
2810
+ const { inputAudioTranscription, outputAudioTranscription, ...generationConfig } = this.generationConfig;
2723
2811
  const setupMessage = {
2724
2812
  setup: {
2725
2813
  model: fullModelPath,
2726
- generationConfig: this.generationConfig,
2814
+ generationConfig,
2727
2815
  tools: this.tools,
2728
2816
  toolConfig: this.toolConfig,
2729
- systemInstruction: this.systemInstruction
2817
+ systemInstruction: this.systemInstruction,
2818
+ inputAudioTranscription,
2819
+ outputAudioTranscription
2730
2820
  }
2731
2821
  };
2732
2822
  try {
@@ -3432,7 +3522,7 @@ class AudioConversationRunner {
3432
3522
  mimeType: 'audio/pcm',
3433
3523
  data: base64
3434
3524
  };
3435
- void this.liveSession.sendMediaChunks([chunk]);
3525
+ void this.liveSession.sendAudioRealtime(chunk);
3436
3526
  };
3437
3527
  }
3438
3528
  /**