@firebase/ai 2.3.0 → 2.4.0-canary.22e0a1adb

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/ai-public.d.ts +240 -35
  2. package/dist/ai.d.ts +241 -35
  3. package/dist/esm/index.esm.js +295 -92
  4. package/dist/esm/index.esm.js.map +1 -1
  5. package/dist/esm/src/api.d.ts +1 -1
  6. package/dist/esm/src/methods/chrome-adapter.d.ts +1 -1
  7. package/dist/esm/src/methods/live-session-helpers.d.ts +2 -2
  8. package/dist/esm/src/methods/live-session.d.ts +71 -7
  9. package/dist/esm/src/models/imagen-model.d.ts +2 -2
  10. package/dist/esm/src/requests/hybrid-helpers.d.ts +7 -2
  11. package/dist/esm/src/requests/imagen-image-format.d.ts +3 -3
  12. package/dist/esm/src/requests/response-helpers.d.ts +2 -2
  13. package/dist/esm/src/requests/stream-reader.d.ts +2 -1
  14. package/dist/esm/src/types/content.d.ts +4 -4
  15. package/dist/esm/src/types/enums.d.ts +19 -4
  16. package/dist/esm/src/types/googleai.d.ts +2 -1
  17. package/dist/esm/src/types/imagen/requests.d.ts +9 -9
  18. package/dist/esm/src/types/imagen/responses.d.ts +3 -3
  19. package/dist/esm/src/types/live-responses.d.ts +16 -2
  20. package/dist/esm/src/types/requests.d.ts +22 -2
  21. package/dist/esm/src/types/responses.d.ts +99 -1
  22. package/dist/index.cjs.js +296 -91
  23. package/dist/index.cjs.js.map +1 -1
  24. package/dist/index.node.cjs.js +276 -84
  25. package/dist/index.node.cjs.js.map +1 -1
  26. package/dist/index.node.mjs +275 -85
  27. package/dist/index.node.mjs.map +1 -1
  28. package/dist/src/api.d.ts +1 -1
  29. package/dist/src/methods/chrome-adapter.d.ts +1 -1
  30. package/dist/src/methods/live-session-helpers.d.ts +2 -2
  31. package/dist/src/methods/live-session.d.ts +71 -7
  32. package/dist/src/models/imagen-model.d.ts +2 -2
  33. package/dist/src/requests/hybrid-helpers.d.ts +7 -2
  34. package/dist/src/requests/imagen-image-format.d.ts +3 -3
  35. package/dist/src/requests/response-helpers.d.ts +2 -2
  36. package/dist/src/requests/stream-reader.d.ts +2 -1
  37. package/dist/src/types/content.d.ts +4 -4
  38. package/dist/src/types/enums.d.ts +19 -4
  39. package/dist/src/types/googleai.d.ts +2 -1
  40. package/dist/src/types/imagen/requests.d.ts +9 -9
  41. package/dist/src/types/imagen/responses.d.ts +3 -3
  42. package/dist/src/types/live-responses.d.ts +16 -2
  43. package/dist/src/types/requests.d.ts +22 -2
  44. package/dist/src/types/responses.d.ts +99 -1
  45. package/package.json +8 -8
@@ -8,7 +8,7 @@ var util = require('@firebase/util');
8
8
  var logger$1 = require('@firebase/logger');
9
9
 
10
10
  var name = "@firebase/ai";
11
- var version = "2.3.0";
11
+ var version = "2.4.0-canary.22e0a1adb";
12
12
 
13
13
  /**
14
14
  * @license
@@ -327,10 +327,19 @@ const InferenceMode = {
327
327
  'ONLY_IN_CLOUD': 'only_in_cloud',
328
328
  'PREFER_IN_CLOUD': 'prefer_in_cloud'
329
329
  };
330
+ /**
331
+ * Indicates whether inference happened on-device or in-cloud.
332
+ *
333
+ * @beta
334
+ */
335
+ const InferenceSource = {
336
+ 'ON_DEVICE': 'on_device',
337
+ 'IN_CLOUD': 'in_cloud'
338
+ };
330
339
  /**
331
340
  * Represents the result of the code execution.
332
341
  *
333
- * @public
342
+ * @beta
334
343
  */
335
344
  const Outcome = {
336
345
  UNSPECIFIED: 'OUTCOME_UNSPECIFIED',
@@ -341,7 +350,7 @@ const Outcome = {
341
350
  /**
342
351
  * The programming language of the code.
343
352
  *
344
- * @public
353
+ * @beta
345
354
  */
346
355
  const Language = {
347
356
  UNSPECIFIED: 'LANGUAGE_UNSPECIFIED',
@@ -364,6 +373,45 @@ const Language = {
364
373
  * See the License for the specific language governing permissions and
365
374
  * limitations under the License.
366
375
  */
376
+ /**
377
+ * The status of a URL retrieval.
378
+ *
379
+ * @remarks
380
+ * <b>URL_RETRIEVAL_STATUS_UNSPECIFIED:</b> Unspecified retrieval status.
381
+ * <br/>
382
+ * <b>URL_RETRIEVAL_STATUS_SUCCESS:</b> The URL retrieval was successful.
383
+ * <br/>
384
+ * <b>URL_RETRIEVAL_STATUS_ERROR:</b> The URL retrieval failed.
385
+ * <br/>
386
+ * <b>URL_RETRIEVAL_STATUS_PAYWALL:</b> The URL retrieval failed because the content is behind a paywall.
387
+ * <br/>
388
+ * <b>URL_RETRIEVAL_STATUS_UNSAFE:</b> The URL retrieval failed because the content is unsafe.
389
+ * <br/>
390
+ *
391
+ * @beta
392
+ */
393
+ const URLRetrievalStatus = {
394
+ /**
395
+ * Unspecified retrieval status.
396
+ */
397
+ URL_RETRIEVAL_STATUS_UNSPECIFIED: 'URL_RETRIEVAL_STATUS_UNSPECIFIED',
398
+ /**
399
+ * The URL retrieval was successful.
400
+ */
401
+ URL_RETRIEVAL_STATUS_SUCCESS: 'URL_RETRIEVAL_STATUS_SUCCESS',
402
+ /**
403
+ * The URL retrieval failed.
404
+ */
405
+ URL_RETRIEVAL_STATUS_ERROR: 'URL_RETRIEVAL_STATUS_ERROR',
406
+ /**
407
+ * The URL retrieval failed because the content is behind a paywall.
408
+ */
409
+ URL_RETRIEVAL_STATUS_PAYWALL: 'URL_RETRIEVAL_STATUS_PAYWALL',
410
+ /**
411
+ * The URL retrieval failed because the content is unsafe.
412
+ */
413
+ URL_RETRIEVAL_STATUS_UNSAFE: 'URL_RETRIEVAL_STATUS_UNSAFE'
414
+ };
367
415
  /**
368
416
  * The types of responses that can be returned by {@link LiveSession.receive}.
369
417
  *
@@ -490,7 +538,7 @@ const SchemaType = {
490
538
  * and the {@link https://cloud.google.com/vertex-ai/generative-ai/docs/image/responsible-ai-imagen#safety-filters | Responsible AI and usage guidelines}
491
539
  * for more details.
492
540
  *
493
- * @beta
541
+ * @public
494
542
  */
495
543
  const ImagenSafetyFilterLevel = {
496
544
  /**
@@ -519,7 +567,7 @@ const ImagenSafetyFilterLevel = {
519
567
  * See the <a href="http://firebase.google.com/docs/vertex-ai/generate-images">personGeneration</a>
520
568
  * documentation for more details.
521
569
  *
522
- * @beta
570
+ * @public
523
571
  */
524
572
  const ImagenPersonFilterLevel = {
525
573
  /**
@@ -552,7 +600,7 @@ const ImagenPersonFilterLevel = {
552
600
  * See the {@link http://firebase.google.com/docs/vertex-ai/generate-images | documentation }
553
601
  * for more details and examples of the supported aspect ratios.
554
602
  *
555
- * @beta
603
+ * @public
556
604
  */
557
605
  const ImagenAspectRatio = {
558
606
  /**
@@ -1234,7 +1282,7 @@ function hasValidCandidates(response) {
1234
1282
  * Creates an EnhancedGenerateContentResponse object that has helper functions and
1235
1283
  * other modifications that improve usability.
1236
1284
  */
1237
- function createEnhancedContentResponse(response) {
1285
+ function createEnhancedContentResponse(response, inferenceSource = InferenceSource.IN_CLOUD) {
1238
1286
  /**
1239
1287
  * The Vertex AI backend omits default values.
1240
1288
  * This causes the `index` property to be omitted from the first candidate in the
@@ -1245,6 +1293,7 @@ function createEnhancedContentResponse(response) {
1245
1293
  response.candidates[0].index = 0;
1246
1294
  }
1247
1295
  const responseWithHelpers = addHelpers(response);
1296
+ responseWithHelpers.inferenceSource = inferenceSource;
1248
1297
  return responseWithHelpers;
1249
1298
  }
1250
1299
  /**
@@ -1557,7 +1606,7 @@ function mapGenerateContentCandidates(candidates) {
1557
1606
  // videoMetadata is not supported.
1558
1607
  // Throw early since developers may send a long video as input and only expect to pay
1559
1608
  // for inference on a small portion of the video.
1560
- if (candidate.content?.parts.some(part => part?.videoMetadata)) {
1609
+ if (candidate.content?.parts?.some(part => part?.videoMetadata)) {
1561
1610
  throw new AIError(AIErrorCode.UNSUPPORTED, 'Part.videoMetadata is not supported in the Gemini Developer API. Please remove this property.');
1562
1611
  }
1563
1612
  const mappedCandidate = {
@@ -1567,7 +1616,8 @@ function mapGenerateContentCandidates(candidates) {
1567
1616
  finishMessage: candidate.finishMessage,
1568
1617
  safetyRatings: mappedSafetyRatings,
1569
1618
  citationMetadata,
1570
- groundingMetadata: candidate.groundingMetadata
1619
+ groundingMetadata: candidate.groundingMetadata,
1620
+ urlContextMetadata: candidate.urlContextMetadata
1571
1621
  };
1572
1622
  mappedCandidates.push(mappedCandidate);
1573
1623
  });
@@ -1620,16 +1670,16 @@ const responseLineRE = /^data\: (.*)(?:\n\n|\r\r|\r\n\r\n)/;
1620
1670
  *
1621
1671
  * @param response - Response from a fetch call
1622
1672
  */
1623
- function processStream(response, apiSettings) {
1673
+ function processStream(response, apiSettings, inferenceSource) {
1624
1674
  const inputStream = response.body.pipeThrough(new TextDecoderStream('utf8', { fatal: true }));
1625
1675
  const responseStream = getResponseStream(inputStream);
1626
1676
  const [stream1, stream2] = responseStream.tee();
1627
1677
  return {
1628
- stream: generateResponseSequence(stream1, apiSettings),
1629
- response: getResponsePromise(stream2, apiSettings)
1678
+ stream: generateResponseSequence(stream1, apiSettings, inferenceSource),
1679
+ response: getResponsePromise(stream2, apiSettings, inferenceSource)
1630
1680
  };
1631
1681
  }
1632
- async function getResponsePromise(stream, apiSettings) {
1682
+ async function getResponsePromise(stream, apiSettings, inferenceSource) {
1633
1683
  const allResponses = [];
1634
1684
  const reader = stream.getReader();
1635
1685
  while (true) {
@@ -1639,12 +1689,12 @@ async function getResponsePromise(stream, apiSettings) {
1639
1689
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1640
1690
  generateContentResponse = mapGenerateContentResponse(generateContentResponse);
1641
1691
  }
1642
- return createEnhancedContentResponse(generateContentResponse);
1692
+ return createEnhancedContentResponse(generateContentResponse, inferenceSource);
1643
1693
  }
1644
1694
  allResponses.push(value);
1645
1695
  }
1646
1696
  }
1647
- async function* generateResponseSequence(stream, apiSettings) {
1697
+ async function* generateResponseSequence(stream, apiSettings, inferenceSource) {
1648
1698
  const reader = stream.getReader();
1649
1699
  while (true) {
1650
1700
  const { value, done } = await reader.read();
@@ -1653,10 +1703,18 @@ async function* generateResponseSequence(stream, apiSettings) {
1653
1703
  }
1654
1704
  let enhancedResponse;
1655
1705
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1656
- enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value));
1706
+ enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value), inferenceSource);
1657
1707
  }
1658
1708
  else {
1659
- enhancedResponse = createEnhancedContentResponse(value);
1709
+ enhancedResponse = createEnhancedContentResponse(value, inferenceSource);
1710
+ }
1711
+ const firstCandidate = enhancedResponse.candidates?.[0];
1712
+ // Don't yield a response with no useful data for the developer.
1713
+ if (!firstCandidate?.content?.parts &&
1714
+ !firstCandidate?.finishReason &&
1715
+ !firstCandidate?.citationMetadata &&
1716
+ !firstCandidate?.urlContextMetadata) {
1717
+ continue;
1660
1718
  }
1661
1719
  yield enhancedResponse;
1662
1720
  }
@@ -1737,36 +1795,43 @@ function aggregateResponses(responses) {
1737
1795
  candidate.safetyRatings;
1738
1796
  aggregatedResponse.candidates[i].groundingMetadata =
1739
1797
  candidate.groundingMetadata;
1798
+ // The urlContextMetadata object is defined in the first chunk of the response stream.
1799
+ // In all subsequent chunks, the urlContextMetadata object will be undefined. We need to
1800
+ // make sure that we don't overwrite the first value urlContextMetadata object with undefined.
1801
+ // FIXME: What happens if we receive a second, valid urlContextMetadata object?
1802
+ const urlContextMetadata = candidate.urlContextMetadata;
1803
+ if (typeof urlContextMetadata === 'object' &&
1804
+ urlContextMetadata !== null &&
1805
+ Object.keys(urlContextMetadata).length > 0) {
1806
+ aggregatedResponse.candidates[i].urlContextMetadata =
1807
+ urlContextMetadata;
1808
+ }
1740
1809
  /**
1741
1810
  * Candidates should always have content and parts, but this handles
1742
1811
  * possible malformed responses.
1743
1812
  */
1744
- if (candidate.content && candidate.content.parts) {
1813
+ if (candidate.content) {
1814
+ // Skip a candidate without parts.
1815
+ if (!candidate.content.parts) {
1816
+ continue;
1817
+ }
1745
1818
  if (!aggregatedResponse.candidates[i].content) {
1746
1819
  aggregatedResponse.candidates[i].content = {
1747
1820
  role: candidate.content.role || 'user',
1748
1821
  parts: []
1749
1822
  };
1750
1823
  }
1751
- const newPart = {};
1752
1824
  for (const part of candidate.content.parts) {
1753
- if (part.text !== undefined) {
1754
- // The backend can send empty text parts. If these are sent back
1755
- // (e.g. in chat history), the backend will respond with an error.
1756
- // To prevent this, ignore empty text parts.
1757
- if (part.text === '') {
1758
- continue;
1759
- }
1760
- newPart.text = part.text;
1825
+ const newPart = { ...part };
1826
+ // The backend can send empty text parts. If these are sent back
1827
+ // (e.g. in chat history), the backend will respond with an error.
1828
+ // To prevent this, ignore empty text parts.
1829
+ if (part.text === '') {
1830
+ continue;
1761
1831
  }
1762
- if (part.functionCall) {
1763
- newPart.functionCall = part.functionCall;
1832
+ if (Object.keys(newPart).length > 0) {
1833
+ aggregatedResponse.candidates[i].content.parts.push(newPart);
1764
1834
  }
1765
- if (Object.keys(newPart).length === 0) {
1766
- throw new AIError(AIErrorCode.INVALID_CONTENT, 'Part should have at least one property, but there are none. This is likely caused ' +
1767
- 'by a malformed response from the backend.');
1768
- }
1769
- aggregatedResponse.candidates[i].content.parts.push(newPart);
1770
1835
  }
1771
1836
  }
1772
1837
  }
@@ -1811,31 +1876,52 @@ const errorsCausingFallback = [
1811
1876
  */
1812
1877
  async function callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall) {
1813
1878
  if (!chromeAdapter) {
1814
- return inCloudCall();
1879
+ return {
1880
+ response: await inCloudCall(),
1881
+ inferenceSource: InferenceSource.IN_CLOUD
1882
+ };
1815
1883
  }
1816
1884
  switch (chromeAdapter.mode) {
1817
1885
  case InferenceMode.ONLY_ON_DEVICE:
1818
1886
  if (await chromeAdapter.isAvailable(request)) {
1819
- return onDeviceCall();
1887
+ return {
1888
+ response: await onDeviceCall(),
1889
+ inferenceSource: InferenceSource.ON_DEVICE
1890
+ };
1820
1891
  }
1821
1892
  throw new AIError(AIErrorCode.UNSUPPORTED, 'Inference mode is ONLY_ON_DEVICE, but an on-device model is not available.');
1822
1893
  case InferenceMode.ONLY_IN_CLOUD:
1823
- return inCloudCall();
1894
+ return {
1895
+ response: await inCloudCall(),
1896
+ inferenceSource: InferenceSource.IN_CLOUD
1897
+ };
1824
1898
  case InferenceMode.PREFER_IN_CLOUD:
1825
1899
  try {
1826
- return await inCloudCall();
1900
+ return {
1901
+ response: await inCloudCall(),
1902
+ inferenceSource: InferenceSource.IN_CLOUD
1903
+ };
1827
1904
  }
1828
1905
  catch (e) {
1829
1906
  if (e instanceof AIError && errorsCausingFallback.includes(e.code)) {
1830
- return onDeviceCall();
1907
+ return {
1908
+ response: await onDeviceCall(),
1909
+ inferenceSource: InferenceSource.ON_DEVICE
1910
+ };
1831
1911
  }
1832
1912
  throw e;
1833
1913
  }
1834
1914
  case InferenceMode.PREFER_ON_DEVICE:
1835
1915
  if (await chromeAdapter.isAvailable(request)) {
1836
- return onDeviceCall();
1916
+ return {
1917
+ response: await onDeviceCall(),
1918
+ inferenceSource: InferenceSource.ON_DEVICE
1919
+ };
1837
1920
  }
1838
- return inCloudCall();
1921
+ return {
1922
+ response: await inCloudCall(),
1923
+ inferenceSource: InferenceSource.IN_CLOUD
1924
+ };
1839
1925
  default:
1840
1926
  throw new AIError(AIErrorCode.ERROR, `Unexpected infererence mode: ${chromeAdapter.mode}`);
1841
1927
  }
@@ -1865,8 +1951,8 @@ async function generateContentStreamOnCloud(apiSettings, model, params, requestO
1865
1951
  /* stream */ true, JSON.stringify(params), requestOptions);
1866
1952
  }
1867
1953
  async function generateContentStream(apiSettings, model, params, chromeAdapter, requestOptions) {
1868
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
1869
- return processStream(response, apiSettings); // TODO: Map streaming responses
1954
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
1955
+ return processStream(callResult.response, apiSettings); // TODO: Map streaming responses
1870
1956
  }
1871
1957
  async function generateContentOnCloud(apiSettings, model, params, requestOptions) {
1872
1958
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
@@ -1876,9 +1962,9 @@ async function generateContentOnCloud(apiSettings, model, params, requestOptions
1876
1962
  /* stream */ false, JSON.stringify(params), requestOptions);
1877
1963
  }
1878
1964
  async function generateContent(apiSettings, model, params, chromeAdapter, requestOptions) {
1879
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
1880
- const generateContentResponse = await processGenerateContentResponse(response, apiSettings);
1881
- const enhancedResponse = createEnhancedContentResponse(generateContentResponse);
1965
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
1966
+ const generateContentResponse = await processGenerateContentResponse(callResult.response, apiSettings);
1967
+ const enhancedResponse = createEnhancedContentResponse(generateContentResponse, callResult.inferenceSource);
1882
1968
  return {
1883
1969
  response: enhancedResponse
1884
1970
  };
@@ -2452,56 +2538,104 @@ class LiveSession {
2452
2538
  this.webSocketHandler.send(JSON.stringify(message));
2453
2539
  }
2454
2540
  /**
2455
- * Sends realtime input to the server.
2541
+ * Sends text to the server in realtime.
2456
2542
  *
2457
- * @param mediaChunks - The media chunks to send.
2543
+ * @example
2544
+ * ```javascript
2545
+ * liveSession.sendTextRealtime("Hello, how are you?");
2546
+ * ```
2547
+ *
2548
+ * @param text - The text data to send.
2458
2549
  * @throws If this session has been closed.
2459
2550
  *
2460
2551
  * @beta
2461
2552
  */
2462
- async sendMediaChunks(mediaChunks) {
2553
+ async sendTextRealtime(text) {
2463
2554
  if (this.isClosed) {
2464
2555
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2465
2556
  }
2466
- // The backend does not support sending more than one mediaChunk in one message.
2467
- // Work around this limitation by sending mediaChunks in separate messages.
2468
- mediaChunks.forEach(mediaChunk => {
2469
- const message = {
2470
- realtimeInput: { mediaChunks: [mediaChunk] }
2471
- };
2472
- this.webSocketHandler.send(JSON.stringify(message));
2473
- });
2557
+ const message = {
2558
+ realtimeInput: {
2559
+ text
2560
+ }
2561
+ };
2562
+ this.webSocketHandler.send(JSON.stringify(message));
2474
2563
  }
2475
2564
  /**
2476
- * Sends a stream of {@link GenerativeContentBlob}.
2565
+ * Sends audio data to the server in realtime.
2477
2566
  *
2478
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2567
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2568
+ * little-endian.
2569
+ *
2570
+ * @example
2571
+ * ```javascript
2572
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2573
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2574
+ * liveSession.sendAudioRealtime(blob);
2575
+ * ```
2576
+ *
2577
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2479
2578
  * @throws If this session has been closed.
2480
2579
  *
2481
2580
  * @beta
2482
2581
  */
2483
- async sendMediaStream(mediaChunkStream) {
2582
+ async sendAudioRealtime(blob) {
2484
2583
  if (this.isClosed) {
2485
2584
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2486
2585
  }
2487
- const reader = mediaChunkStream.getReader();
2488
- while (true) {
2489
- try {
2490
- const { done, value } = await reader.read();
2491
- if (done) {
2492
- break;
2493
- }
2494
- else if (!value) {
2495
- throw new Error('Missing chunk in reader, but reader is not done.');
2496
- }
2497
- await this.sendMediaChunks([value]);
2586
+ const message = {
2587
+ realtimeInput: {
2588
+ audio: blob
2498
2589
  }
2499
- catch (e) {
2500
- // Re-throw any errors that occur during stream consumption or sending.
2501
- const message = e instanceof Error ? e.message : 'Error processing media stream.';
2502
- throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2590
+ };
2591
+ this.webSocketHandler.send(JSON.stringify(message));
2592
+ }
2593
+ /**
2594
+ * Sends video data to the server in realtime.
2595
+ *
2596
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2597
+ * is recommended to set `mimeType` to `image/jpeg`.
2598
+ *
2599
+ * @example
2600
+ * ```javascript
2601
+ * // const videoFrame = ... base64-encoded JPEG data
2602
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2603
+ * liveSession.sendVideoRealtime(blob);
2604
+ * ```
2605
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2606
+ * @throws If this session has been closed.
2607
+ *
2608
+ * @beta
2609
+ */
2610
+ async sendVideoRealtime(blob) {
2611
+ if (this.isClosed) {
2612
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2613
+ }
2614
+ const message = {
2615
+ realtimeInput: {
2616
+ video: blob
2503
2617
  }
2618
+ };
2619
+ this.webSocketHandler.send(JSON.stringify(message));
2620
+ }
2621
+ /**
2622
+ * Sends function responses to the server.
2623
+ *
2624
+ * @param functionResponses - The function responses to send.
2625
+ * @throws If this session has been closed.
2626
+ *
2627
+ * @beta
2628
+ */
2629
+ async sendFunctionResponses(functionResponses) {
2630
+ if (this.isClosed) {
2631
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2504
2632
  }
2633
+ const message = {
2634
+ toolResponse: {
2635
+ functionResponses
2636
+ }
2637
+ };
2638
+ this.webSocketHandler.send(JSON.stringify(message));
2505
2639
  }
2506
2640
  /**
2507
2641
  * Yields messages received from the server.
@@ -2559,6 +2693,62 @@ class LiveSession {
2559
2693
  await this.webSocketHandler.close(1000, 'Client closed session.');
2560
2694
  }
2561
2695
  }
2696
+ /**
2697
+ * Sends realtime input to the server.
2698
+ *
2699
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2700
+ *
2701
+ * @param mediaChunks - The media chunks to send.
2702
+ * @throws If this session has been closed.
2703
+ *
2704
+ * @beta
2705
+ */
2706
+ async sendMediaChunks(mediaChunks) {
2707
+ if (this.isClosed) {
2708
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2709
+ }
2710
+ // The backend does not support sending more than one mediaChunk in one message.
2711
+ // Work around this limitation by sending mediaChunks in separate messages.
2712
+ mediaChunks.forEach(mediaChunk => {
2713
+ const message = {
2714
+ realtimeInput: { mediaChunks: [mediaChunk] }
2715
+ };
2716
+ this.webSocketHandler.send(JSON.stringify(message));
2717
+ });
2718
+ }
2719
+ /**
2720
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
2721
+ *
2722
+ * Sends a stream of {@link GenerativeContentBlob}.
2723
+ *
2724
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2725
+ * @throws If this session has been closed.
2726
+ *
2727
+ * @beta
2728
+ */
2729
+ async sendMediaStream(mediaChunkStream) {
2730
+ if (this.isClosed) {
2731
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2732
+ }
2733
+ const reader = mediaChunkStream.getReader();
2734
+ while (true) {
2735
+ try {
2736
+ const { done, value } = await reader.read();
2737
+ if (done) {
2738
+ break;
2739
+ }
2740
+ else if (!value) {
2741
+ throw new Error('Missing chunk in reader, but reader is not done.');
2742
+ }
2743
+ await this.sendMediaChunks([value]);
2744
+ }
2745
+ catch (e) {
2746
+ // Re-throw any errors that occur during stream consumption or sending.
2747
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
2748
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2749
+ }
2750
+ }
2751
+ }
2562
2752
  }
2563
2753
 
2564
2754
  /**
@@ -2686,7 +2876,7 @@ class LiveGenerativeModel extends AIModel {
2686
2876
  * }
2687
2877
  * ```
2688
2878
  *
2689
- * @beta
2879
+ * @public
2690
2880
  */
2691
2881
  class ImagenModel extends AIModel {
2692
2882
  /**
@@ -2722,7 +2912,7 @@ class ImagenModel extends AIModel {
2722
2912
  * returned object will have a `filteredReason` property.
2723
2913
  * If all images are filtered, the `images` array will be empty.
2724
2914
  *
2725
- * @beta
2915
+ * @public
2726
2916
  */
2727
2917
  async generateImages(prompt) {
2728
2918
  const body = createPredictRequestBody(prompt, {
@@ -3185,7 +3375,7 @@ class AnyOfSchema extends Schema {
3185
3375
  * }
3186
3376
  * ```
3187
3377
  *
3188
- * @beta
3378
+ * @public
3189
3379
  */
3190
3380
  class ImagenImageFormat {
3191
3381
  constructor() {
@@ -3197,7 +3387,7 @@ class ImagenImageFormat {
3197
3387
  * @param compressionQuality - The level of compression (a number between 0 and 100).
3198
3388
  * @returns An {@link ImagenImageFormat} object for a JPEG image.
3199
3389
  *
3200
- * @beta
3390
+ * @public
3201
3391
  */
3202
3392
  static jpeg(compressionQuality) {
3203
3393
  if (compressionQuality &&
@@ -3211,7 +3401,7 @@ class ImagenImageFormat {
3211
3401
  *
3212
3402
  * @returns An {@link ImagenImageFormat} object for a PNG image.
3213
3403
  *
3214
- * @beta
3404
+ * @public
3215
3405
  */
3216
3406
  static png() {
3217
3407
  return { mimeType: 'image/png' };
@@ -3331,7 +3521,7 @@ class AudioConversationRunner {
3331
3521
  mimeType: 'audio/pcm',
3332
3522
  data: base64
3333
3523
  };
3334
- void this.liveSession.sendMediaChunks([chunk]);
3524
+ void this.liveSession.sendAudioRealtime(chunk);
3335
3525
  };
3336
3526
  }
3337
3527
  /**
@@ -3453,9 +3643,9 @@ class AudioConversationRunner {
3453
3643
  }
3454
3644
  else {
3455
3645
  try {
3456
- const resultPart = await this.options.functionCallingHandler(message.functionCalls);
3646
+ const functionResponse = await this.options.functionCallingHandler(message.functionCalls);
3457
3647
  if (!this.isStopped) {
3458
- void this.liveSession.send([resultPart]);
3648
+ void this.liveSession.sendFunctionResponses([functionResponse]);
3459
3649
  }
3460
3650
  }
3461
3651
  catch (e) {
@@ -3675,7 +3865,7 @@ function getGenerativeModel(ai, modelParams, requestOptions) {
3675
3865
  * @throws If the `apiKey` or `projectId` fields are missing in your
3676
3866
  * Firebase config.
3677
3867
  *
3678
- * @beta
3868
+ * @public
3679
3869
  */
3680
3870
  function getImagenModel(ai, modelParams, requestOptions) {
3681
3871
  if (!modelParams.model) {
@@ -3751,6 +3941,7 @@ exports.ImagenModel = ImagenModel;
3751
3941
  exports.ImagenPersonFilterLevel = ImagenPersonFilterLevel;
3752
3942
  exports.ImagenSafetyFilterLevel = ImagenSafetyFilterLevel;
3753
3943
  exports.InferenceMode = InferenceMode;
3944
+ exports.InferenceSource = InferenceSource;
3754
3945
  exports.IntegerSchema = IntegerSchema;
3755
3946
  exports.Language = Language;
3756
3947
  exports.LiveGenerativeModel = LiveGenerativeModel;
@@ -3765,6 +3956,7 @@ exports.ResponseModality = ResponseModality;
3765
3956
  exports.Schema = Schema;
3766
3957
  exports.SchemaType = SchemaType;
3767
3958
  exports.StringSchema = StringSchema;
3959
+ exports.URLRetrievalStatus = URLRetrievalStatus;
3768
3960
  exports.VertexAIBackend = VertexAIBackend;
3769
3961
  exports.getAI = getAI;
3770
3962
  exports.getGenerativeModel = getGenerativeModel;