@firebase/ai 2.3.0 → 2.4.0-canary.22e0a1adb

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/ai-public.d.ts +240 -35
  2. package/dist/ai.d.ts +241 -35
  3. package/dist/esm/index.esm.js +295 -92
  4. package/dist/esm/index.esm.js.map +1 -1
  5. package/dist/esm/src/api.d.ts +1 -1
  6. package/dist/esm/src/methods/chrome-adapter.d.ts +1 -1
  7. package/dist/esm/src/methods/live-session-helpers.d.ts +2 -2
  8. package/dist/esm/src/methods/live-session.d.ts +71 -7
  9. package/dist/esm/src/models/imagen-model.d.ts +2 -2
  10. package/dist/esm/src/requests/hybrid-helpers.d.ts +7 -2
  11. package/dist/esm/src/requests/imagen-image-format.d.ts +3 -3
  12. package/dist/esm/src/requests/response-helpers.d.ts +2 -2
  13. package/dist/esm/src/requests/stream-reader.d.ts +2 -1
  14. package/dist/esm/src/types/content.d.ts +4 -4
  15. package/dist/esm/src/types/enums.d.ts +19 -4
  16. package/dist/esm/src/types/googleai.d.ts +2 -1
  17. package/dist/esm/src/types/imagen/requests.d.ts +9 -9
  18. package/dist/esm/src/types/imagen/responses.d.ts +3 -3
  19. package/dist/esm/src/types/live-responses.d.ts +16 -2
  20. package/dist/esm/src/types/requests.d.ts +22 -2
  21. package/dist/esm/src/types/responses.d.ts +99 -1
  22. package/dist/index.cjs.js +296 -91
  23. package/dist/index.cjs.js.map +1 -1
  24. package/dist/index.node.cjs.js +276 -84
  25. package/dist/index.node.cjs.js.map +1 -1
  26. package/dist/index.node.mjs +275 -85
  27. package/dist/index.node.mjs.map +1 -1
  28. package/dist/src/api.d.ts +1 -1
  29. package/dist/src/methods/chrome-adapter.d.ts +1 -1
  30. package/dist/src/methods/live-session-helpers.d.ts +2 -2
  31. package/dist/src/methods/live-session.d.ts +71 -7
  32. package/dist/src/models/imagen-model.d.ts +2 -2
  33. package/dist/src/requests/hybrid-helpers.d.ts +7 -2
  34. package/dist/src/requests/imagen-image-format.d.ts +3 -3
  35. package/dist/src/requests/response-helpers.d.ts +2 -2
  36. package/dist/src/requests/stream-reader.d.ts +2 -1
  37. package/dist/src/types/content.d.ts +4 -4
  38. package/dist/src/types/enums.d.ts +19 -4
  39. package/dist/src/types/googleai.d.ts +2 -1
  40. package/dist/src/types/imagen/requests.d.ts +9 -9
  41. package/dist/src/types/imagen/responses.d.ts +3 -3
  42. package/dist/src/types/live-responses.d.ts +16 -2
  43. package/dist/src/types/requests.d.ts +22 -2
  44. package/dist/src/types/responses.d.ts +99 -1
  45. package/package.json +8 -8
package/dist/index.cjs.js CHANGED
@@ -8,7 +8,7 @@ var util = require('@firebase/util');
8
8
  var logger$1 = require('@firebase/logger');
9
9
 
10
10
  var name = "@firebase/ai";
11
- var version = "2.3.0";
11
+ var version = "2.4.0-canary.22e0a1adb";
12
12
 
13
13
  /**
14
14
  * @license
@@ -383,10 +383,19 @@ const InferenceMode = {
383
383
  'ONLY_IN_CLOUD': 'only_in_cloud',
384
384
  'PREFER_IN_CLOUD': 'prefer_in_cloud'
385
385
  };
386
+ /**
387
+ * Indicates whether inference happened on-device or in-cloud.
388
+ *
389
+ * @beta
390
+ */
391
+ const InferenceSource = {
392
+ 'ON_DEVICE': 'on_device',
393
+ 'IN_CLOUD': 'in_cloud'
394
+ };
386
395
  /**
387
396
  * Represents the result of the code execution.
388
397
  *
389
- * @public
398
+ * @beta
390
399
  */
391
400
  const Outcome = {
392
401
  UNSPECIFIED: 'OUTCOME_UNSPECIFIED',
@@ -397,7 +406,7 @@ const Outcome = {
397
406
  /**
398
407
  * The programming language of the code.
399
408
  *
400
- * @public
409
+ * @beta
401
410
  */
402
411
  const Language = {
403
412
  UNSPECIFIED: 'LANGUAGE_UNSPECIFIED',
@@ -420,6 +429,45 @@ const Language = {
420
429
  * See the License for the specific language governing permissions and
421
430
  * limitations under the License.
422
431
  */
432
+ /**
433
+ * The status of a URL retrieval.
434
+ *
435
+ * @remarks
436
+ * <b>URL_RETRIEVAL_STATUS_UNSPECIFIED:</b> Unspecified retrieval status.
437
+ * <br/>
438
+ * <b>URL_RETRIEVAL_STATUS_SUCCESS:</b> The URL retrieval was successful.
439
+ * <br/>
440
+ * <b>URL_RETRIEVAL_STATUS_ERROR:</b> The URL retrieval failed.
441
+ * <br/>
442
+ * <b>URL_RETRIEVAL_STATUS_PAYWALL:</b> The URL retrieval failed because the content is behind a paywall.
443
+ * <br/>
444
+ * <b>URL_RETRIEVAL_STATUS_UNSAFE:</b> The URL retrieval failed because the content is unsafe.
445
+ * <br/>
446
+ *
447
+ * @beta
448
+ */
449
+ const URLRetrievalStatus = {
450
+ /**
451
+ * Unspecified retrieval status.
452
+ */
453
+ URL_RETRIEVAL_STATUS_UNSPECIFIED: 'URL_RETRIEVAL_STATUS_UNSPECIFIED',
454
+ /**
455
+ * The URL retrieval was successful.
456
+ */
457
+ URL_RETRIEVAL_STATUS_SUCCESS: 'URL_RETRIEVAL_STATUS_SUCCESS',
458
+ /**
459
+ * The URL retrieval failed.
460
+ */
461
+ URL_RETRIEVAL_STATUS_ERROR: 'URL_RETRIEVAL_STATUS_ERROR',
462
+ /**
463
+ * The URL retrieval failed because the content is behind a paywall.
464
+ */
465
+ URL_RETRIEVAL_STATUS_PAYWALL: 'URL_RETRIEVAL_STATUS_PAYWALL',
466
+ /**
467
+ * The URL retrieval failed because the content is unsafe.
468
+ */
469
+ URL_RETRIEVAL_STATUS_UNSAFE: 'URL_RETRIEVAL_STATUS_UNSAFE'
470
+ };
423
471
  /**
424
472
  * The types of responses that can be returned by {@link LiveSession.receive}.
425
473
  *
@@ -546,7 +594,7 @@ const SchemaType = {
546
594
  * and the {@link https://cloud.google.com/vertex-ai/generative-ai/docs/image/responsible-ai-imagen#safety-filters | Responsible AI and usage guidelines}
547
595
  * for more details.
548
596
  *
549
- * @beta
597
+ * @public
550
598
  */
551
599
  const ImagenSafetyFilterLevel = {
552
600
  /**
@@ -575,7 +623,7 @@ const ImagenSafetyFilterLevel = {
575
623
  * See the <a href="http://firebase.google.com/docs/vertex-ai/generate-images">personGeneration</a>
576
624
  * documentation for more details.
577
625
  *
578
- * @beta
626
+ * @public
579
627
  */
580
628
  const ImagenPersonFilterLevel = {
581
629
  /**
@@ -608,7 +656,7 @@ const ImagenPersonFilterLevel = {
608
656
  * See the {@link http://firebase.google.com/docs/vertex-ai/generate-images | documentation }
609
657
  * for more details and examples of the supported aspect ratios.
610
658
  *
611
- * @beta
659
+ * @public
612
660
  */
613
661
  const ImagenAspectRatio = {
614
662
  /**
@@ -853,22 +901,35 @@ var Availability;
853
901
  * See the License for the specific language governing permissions and
854
902
  * limitations under the License.
855
903
  */
904
+ // Defaults to support image inputs for convenience.
905
+ const defaultExpectedInputs = [{ type: 'image' }];
856
906
  /**
857
907
  * Defines an inference "backend" that uses Chrome's on-device model,
858
908
  * and encapsulates logic for detecting when on-device inference is
859
909
  * possible.
860
910
  */
861
911
  class ChromeAdapterImpl {
862
- constructor(languageModelProvider, mode, onDeviceParams = {
863
- createOptions: {
864
- // Defaults to support image inputs for convenience.
865
- expectedInputs: [{ type: 'image' }]
866
- }
867
- }) {
912
+ constructor(languageModelProvider, mode, onDeviceParams) {
868
913
  this.languageModelProvider = languageModelProvider;
869
914
  this.mode = mode;
870
- this.onDeviceParams = onDeviceParams;
871
915
  this.isDownloading = false;
916
+ this.onDeviceParams = {
917
+ createOptions: {
918
+ expectedInputs: defaultExpectedInputs
919
+ }
920
+ };
921
+ if (onDeviceParams) {
922
+ this.onDeviceParams = onDeviceParams;
923
+ if (!this.onDeviceParams.createOptions) {
924
+ this.onDeviceParams.createOptions = {
925
+ expectedInputs: defaultExpectedInputs
926
+ };
927
+ }
928
+ else if (!this.onDeviceParams.createOptions.expectedInputs) {
929
+ this.onDeviceParams.createOptions.expectedInputs =
930
+ defaultExpectedInputs;
931
+ }
932
+ }
872
933
  }
873
934
  /**
874
935
  * Checks if a given request can be made on-device.
@@ -1557,7 +1618,7 @@ function hasValidCandidates(response) {
1557
1618
  * Creates an EnhancedGenerateContentResponse object that has helper functions and
1558
1619
  * other modifications that improve usability.
1559
1620
  */
1560
- function createEnhancedContentResponse(response) {
1621
+ function createEnhancedContentResponse(response, inferenceSource = InferenceSource.IN_CLOUD) {
1561
1622
  /**
1562
1623
  * The Vertex AI backend omits default values.
1563
1624
  * This causes the `index` property to be omitted from the first candidate in the
@@ -1568,6 +1629,7 @@ function createEnhancedContentResponse(response) {
1568
1629
  response.candidates[0].index = 0;
1569
1630
  }
1570
1631
  const responseWithHelpers = addHelpers(response);
1632
+ responseWithHelpers.inferenceSource = inferenceSource;
1571
1633
  return responseWithHelpers;
1572
1634
  }
1573
1635
  /**
@@ -1880,7 +1942,7 @@ function mapGenerateContentCandidates(candidates) {
1880
1942
  // videoMetadata is not supported.
1881
1943
  // Throw early since developers may send a long video as input and only expect to pay
1882
1944
  // for inference on a small portion of the video.
1883
- if (candidate.content?.parts.some(part => part?.videoMetadata)) {
1945
+ if (candidate.content?.parts?.some(part => part?.videoMetadata)) {
1884
1946
  throw new AIError(AIErrorCode.UNSUPPORTED, 'Part.videoMetadata is not supported in the Gemini Developer API. Please remove this property.');
1885
1947
  }
1886
1948
  const mappedCandidate = {
@@ -1890,7 +1952,8 @@ function mapGenerateContentCandidates(candidates) {
1890
1952
  finishMessage: candidate.finishMessage,
1891
1953
  safetyRatings: mappedSafetyRatings,
1892
1954
  citationMetadata,
1893
- groundingMetadata: candidate.groundingMetadata
1955
+ groundingMetadata: candidate.groundingMetadata,
1956
+ urlContextMetadata: candidate.urlContextMetadata
1894
1957
  };
1895
1958
  mappedCandidates.push(mappedCandidate);
1896
1959
  });
@@ -1943,16 +2006,16 @@ const responseLineRE = /^data\: (.*)(?:\n\n|\r\r|\r\n\r\n)/;
1943
2006
  *
1944
2007
  * @param response - Response from a fetch call
1945
2008
  */
1946
- function processStream(response, apiSettings) {
2009
+ function processStream(response, apiSettings, inferenceSource) {
1947
2010
  const inputStream = response.body.pipeThrough(new TextDecoderStream('utf8', { fatal: true }));
1948
2011
  const responseStream = getResponseStream(inputStream);
1949
2012
  const [stream1, stream2] = responseStream.tee();
1950
2013
  return {
1951
- stream: generateResponseSequence(stream1, apiSettings),
1952
- response: getResponsePromise(stream2, apiSettings)
2014
+ stream: generateResponseSequence(stream1, apiSettings, inferenceSource),
2015
+ response: getResponsePromise(stream2, apiSettings, inferenceSource)
1953
2016
  };
1954
2017
  }
1955
- async function getResponsePromise(stream, apiSettings) {
2018
+ async function getResponsePromise(stream, apiSettings, inferenceSource) {
1956
2019
  const allResponses = [];
1957
2020
  const reader = stream.getReader();
1958
2021
  while (true) {
@@ -1962,12 +2025,12 @@ async function getResponsePromise(stream, apiSettings) {
1962
2025
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1963
2026
  generateContentResponse = mapGenerateContentResponse(generateContentResponse);
1964
2027
  }
1965
- return createEnhancedContentResponse(generateContentResponse);
2028
+ return createEnhancedContentResponse(generateContentResponse, inferenceSource);
1966
2029
  }
1967
2030
  allResponses.push(value);
1968
2031
  }
1969
2032
  }
1970
- async function* generateResponseSequence(stream, apiSettings) {
2033
+ async function* generateResponseSequence(stream, apiSettings, inferenceSource) {
1971
2034
  const reader = stream.getReader();
1972
2035
  while (true) {
1973
2036
  const { value, done } = await reader.read();
@@ -1976,10 +2039,18 @@ async function* generateResponseSequence(stream, apiSettings) {
1976
2039
  }
1977
2040
  let enhancedResponse;
1978
2041
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1979
- enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value));
2042
+ enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value), inferenceSource);
1980
2043
  }
1981
2044
  else {
1982
- enhancedResponse = createEnhancedContentResponse(value);
2045
+ enhancedResponse = createEnhancedContentResponse(value, inferenceSource);
2046
+ }
2047
+ const firstCandidate = enhancedResponse.candidates?.[0];
2048
+ // Don't yield a response with no useful data for the developer.
2049
+ if (!firstCandidate?.content?.parts &&
2050
+ !firstCandidate?.finishReason &&
2051
+ !firstCandidate?.citationMetadata &&
2052
+ !firstCandidate?.urlContextMetadata) {
2053
+ continue;
1983
2054
  }
1984
2055
  yield enhancedResponse;
1985
2056
  }
@@ -2060,36 +2131,43 @@ function aggregateResponses(responses) {
2060
2131
  candidate.safetyRatings;
2061
2132
  aggregatedResponse.candidates[i].groundingMetadata =
2062
2133
  candidate.groundingMetadata;
2134
+ // The urlContextMetadata object is defined in the first chunk of the response stream.
2135
+ // In all subsequent chunks, the urlContextMetadata object will be undefined. We need to
2136
+ // make sure that we don't overwrite the first value urlContextMetadata object with undefined.
2137
+ // FIXME: What happens if we receive a second, valid urlContextMetadata object?
2138
+ const urlContextMetadata = candidate.urlContextMetadata;
2139
+ if (typeof urlContextMetadata === 'object' &&
2140
+ urlContextMetadata !== null &&
2141
+ Object.keys(urlContextMetadata).length > 0) {
2142
+ aggregatedResponse.candidates[i].urlContextMetadata =
2143
+ urlContextMetadata;
2144
+ }
2063
2145
  /**
2064
2146
  * Candidates should always have content and parts, but this handles
2065
2147
  * possible malformed responses.
2066
2148
  */
2067
- if (candidate.content && candidate.content.parts) {
2149
+ if (candidate.content) {
2150
+ // Skip a candidate without parts.
2151
+ if (!candidate.content.parts) {
2152
+ continue;
2153
+ }
2068
2154
  if (!aggregatedResponse.candidates[i].content) {
2069
2155
  aggregatedResponse.candidates[i].content = {
2070
2156
  role: candidate.content.role || 'user',
2071
2157
  parts: []
2072
2158
  };
2073
2159
  }
2074
- const newPart = {};
2075
2160
  for (const part of candidate.content.parts) {
2076
- if (part.text !== undefined) {
2077
- // The backend can send empty text parts. If these are sent back
2078
- // (e.g. in chat history), the backend will respond with an error.
2079
- // To prevent this, ignore empty text parts.
2080
- if (part.text === '') {
2081
- continue;
2082
- }
2083
- newPart.text = part.text;
2161
+ const newPart = { ...part };
2162
+ // The backend can send empty text parts. If these are sent back
2163
+ // (e.g. in chat history), the backend will respond with an error.
2164
+ // To prevent this, ignore empty text parts.
2165
+ if (part.text === '') {
2166
+ continue;
2084
2167
  }
2085
- if (part.functionCall) {
2086
- newPart.functionCall = part.functionCall;
2168
+ if (Object.keys(newPart).length > 0) {
2169
+ aggregatedResponse.candidates[i].content.parts.push(newPart);
2087
2170
  }
2088
- if (Object.keys(newPart).length === 0) {
2089
- throw new AIError(AIErrorCode.INVALID_CONTENT, 'Part should have at least one property, but there are none. This is likely caused ' +
2090
- 'by a malformed response from the backend.');
2091
- }
2092
- aggregatedResponse.candidates[i].content.parts.push(newPart);
2093
2171
  }
2094
2172
  }
2095
2173
  }
@@ -2134,31 +2212,52 @@ const errorsCausingFallback = [
2134
2212
  */
2135
2213
  async function callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall) {
2136
2214
  if (!chromeAdapter) {
2137
- return inCloudCall();
2215
+ return {
2216
+ response: await inCloudCall(),
2217
+ inferenceSource: InferenceSource.IN_CLOUD
2218
+ };
2138
2219
  }
2139
2220
  switch (chromeAdapter.mode) {
2140
2221
  case InferenceMode.ONLY_ON_DEVICE:
2141
2222
  if (await chromeAdapter.isAvailable(request)) {
2142
- return onDeviceCall();
2223
+ return {
2224
+ response: await onDeviceCall(),
2225
+ inferenceSource: InferenceSource.ON_DEVICE
2226
+ };
2143
2227
  }
2144
2228
  throw new AIError(AIErrorCode.UNSUPPORTED, 'Inference mode is ONLY_ON_DEVICE, but an on-device model is not available.');
2145
2229
  case InferenceMode.ONLY_IN_CLOUD:
2146
- return inCloudCall();
2230
+ return {
2231
+ response: await inCloudCall(),
2232
+ inferenceSource: InferenceSource.IN_CLOUD
2233
+ };
2147
2234
  case InferenceMode.PREFER_IN_CLOUD:
2148
2235
  try {
2149
- return await inCloudCall();
2236
+ return {
2237
+ response: await inCloudCall(),
2238
+ inferenceSource: InferenceSource.IN_CLOUD
2239
+ };
2150
2240
  }
2151
2241
  catch (e) {
2152
2242
  if (e instanceof AIError && errorsCausingFallback.includes(e.code)) {
2153
- return onDeviceCall();
2243
+ return {
2244
+ response: await onDeviceCall(),
2245
+ inferenceSource: InferenceSource.ON_DEVICE
2246
+ };
2154
2247
  }
2155
2248
  throw e;
2156
2249
  }
2157
2250
  case InferenceMode.PREFER_ON_DEVICE:
2158
2251
  if (await chromeAdapter.isAvailable(request)) {
2159
- return onDeviceCall();
2252
+ return {
2253
+ response: await onDeviceCall(),
2254
+ inferenceSource: InferenceSource.ON_DEVICE
2255
+ };
2160
2256
  }
2161
- return inCloudCall();
2257
+ return {
2258
+ response: await inCloudCall(),
2259
+ inferenceSource: InferenceSource.IN_CLOUD
2260
+ };
2162
2261
  default:
2163
2262
  throw new AIError(AIErrorCode.ERROR, `Unexpected infererence mode: ${chromeAdapter.mode}`);
2164
2263
  }
@@ -2188,8 +2287,8 @@ async function generateContentStreamOnCloud(apiSettings, model, params, requestO
2188
2287
  /* stream */ true, JSON.stringify(params), requestOptions);
2189
2288
  }
2190
2289
  async function generateContentStream(apiSettings, model, params, chromeAdapter, requestOptions) {
2191
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
2192
- return processStream(response, apiSettings); // TODO: Map streaming responses
2290
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
2291
+ return processStream(callResult.response, apiSettings); // TODO: Map streaming responses
2193
2292
  }
2194
2293
  async function generateContentOnCloud(apiSettings, model, params, requestOptions) {
2195
2294
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
@@ -2199,9 +2298,9 @@ async function generateContentOnCloud(apiSettings, model, params, requestOptions
2199
2298
  /* stream */ false, JSON.stringify(params), requestOptions);
2200
2299
  }
2201
2300
  async function generateContent(apiSettings, model, params, chromeAdapter, requestOptions) {
2202
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
2203
- const generateContentResponse = await processGenerateContentResponse(response, apiSettings);
2204
- const enhancedResponse = createEnhancedContentResponse(generateContentResponse);
2301
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
2302
+ const generateContentResponse = await processGenerateContentResponse(callResult.response, apiSettings);
2303
+ const enhancedResponse = createEnhancedContentResponse(generateContentResponse, callResult.inferenceSource);
2205
2304
  return {
2206
2305
  response: enhancedResponse
2207
2306
  };
@@ -2775,56 +2874,104 @@ class LiveSession {
2775
2874
  this.webSocketHandler.send(JSON.stringify(message));
2776
2875
  }
2777
2876
  /**
2778
- * Sends realtime input to the server.
2877
+ * Sends text to the server in realtime.
2779
2878
  *
2780
- * @param mediaChunks - The media chunks to send.
2879
+ * @example
2880
+ * ```javascript
2881
+ * liveSession.sendTextRealtime("Hello, how are you?");
2882
+ * ```
2883
+ *
2884
+ * @param text - The text data to send.
2781
2885
  * @throws If this session has been closed.
2782
2886
  *
2783
2887
  * @beta
2784
2888
  */
2785
- async sendMediaChunks(mediaChunks) {
2889
+ async sendTextRealtime(text) {
2786
2890
  if (this.isClosed) {
2787
2891
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2788
2892
  }
2789
- // The backend does not support sending more than one mediaChunk in one message.
2790
- // Work around this limitation by sending mediaChunks in separate messages.
2791
- mediaChunks.forEach(mediaChunk => {
2792
- const message = {
2793
- realtimeInput: { mediaChunks: [mediaChunk] }
2794
- };
2795
- this.webSocketHandler.send(JSON.stringify(message));
2796
- });
2893
+ const message = {
2894
+ realtimeInput: {
2895
+ text
2896
+ }
2897
+ };
2898
+ this.webSocketHandler.send(JSON.stringify(message));
2797
2899
  }
2798
2900
  /**
2799
- * Sends a stream of {@link GenerativeContentBlob}.
2901
+ * Sends audio data to the server in realtime.
2800
2902
  *
2801
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2903
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2904
+ * little-endian.
2905
+ *
2906
+ * @example
2907
+ * ```javascript
2908
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2909
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2910
+ * liveSession.sendAudioRealtime(blob);
2911
+ * ```
2912
+ *
2913
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2802
2914
  * @throws If this session has been closed.
2803
2915
  *
2804
2916
  * @beta
2805
2917
  */
2806
- async sendMediaStream(mediaChunkStream) {
2918
+ async sendAudioRealtime(blob) {
2807
2919
  if (this.isClosed) {
2808
2920
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2809
2921
  }
2810
- const reader = mediaChunkStream.getReader();
2811
- while (true) {
2812
- try {
2813
- const { done, value } = await reader.read();
2814
- if (done) {
2815
- break;
2816
- }
2817
- else if (!value) {
2818
- throw new Error('Missing chunk in reader, but reader is not done.');
2819
- }
2820
- await this.sendMediaChunks([value]);
2922
+ const message = {
2923
+ realtimeInput: {
2924
+ audio: blob
2821
2925
  }
2822
- catch (e) {
2823
- // Re-throw any errors that occur during stream consumption or sending.
2824
- const message = e instanceof Error ? e.message : 'Error processing media stream.';
2825
- throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2926
+ };
2927
+ this.webSocketHandler.send(JSON.stringify(message));
2928
+ }
2929
+ /**
2930
+ * Sends video data to the server in realtime.
2931
+ *
2932
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2933
+ * is recommended to set `mimeType` to `image/jpeg`.
2934
+ *
2935
+ * @example
2936
+ * ```javascript
2937
+ * // const videoFrame = ... base64-encoded JPEG data
2938
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2939
+ * liveSession.sendVideoRealtime(blob);
2940
+ * ```
2941
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2942
+ * @throws If this session has been closed.
2943
+ *
2944
+ * @beta
2945
+ */
2946
+ async sendVideoRealtime(blob) {
2947
+ if (this.isClosed) {
2948
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2949
+ }
2950
+ const message = {
2951
+ realtimeInput: {
2952
+ video: blob
2826
2953
  }
2954
+ };
2955
+ this.webSocketHandler.send(JSON.stringify(message));
2956
+ }
2957
+ /**
2958
+ * Sends function responses to the server.
2959
+ *
2960
+ * @param functionResponses - The function responses to send.
2961
+ * @throws If this session has been closed.
2962
+ *
2963
+ * @beta
2964
+ */
2965
+ async sendFunctionResponses(functionResponses) {
2966
+ if (this.isClosed) {
2967
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2827
2968
  }
2969
+ const message = {
2970
+ toolResponse: {
2971
+ functionResponses
2972
+ }
2973
+ };
2974
+ this.webSocketHandler.send(JSON.stringify(message));
2828
2975
  }
2829
2976
  /**
2830
2977
  * Yields messages received from the server.
@@ -2882,6 +3029,62 @@ class LiveSession {
2882
3029
  await this.webSocketHandler.close(1000, 'Client closed session.');
2883
3030
  }
2884
3031
  }
3032
+ /**
3033
+ * Sends realtime input to the server.
3034
+ *
3035
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
3036
+ *
3037
+ * @param mediaChunks - The media chunks to send.
3038
+ * @throws If this session has been closed.
3039
+ *
3040
+ * @beta
3041
+ */
3042
+ async sendMediaChunks(mediaChunks) {
3043
+ if (this.isClosed) {
3044
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
3045
+ }
3046
+ // The backend does not support sending more than one mediaChunk in one message.
3047
+ // Work around this limitation by sending mediaChunks in separate messages.
3048
+ mediaChunks.forEach(mediaChunk => {
3049
+ const message = {
3050
+ realtimeInput: { mediaChunks: [mediaChunk] }
3051
+ };
3052
+ this.webSocketHandler.send(JSON.stringify(message));
3053
+ });
3054
+ }
3055
+ /**
3056
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
3057
+ *
3058
+ * Sends a stream of {@link GenerativeContentBlob}.
3059
+ *
3060
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
3061
+ * @throws If this session has been closed.
3062
+ *
3063
+ * @beta
3064
+ */
3065
+ async sendMediaStream(mediaChunkStream) {
3066
+ if (this.isClosed) {
3067
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
3068
+ }
3069
+ const reader = mediaChunkStream.getReader();
3070
+ while (true) {
3071
+ try {
3072
+ const { done, value } = await reader.read();
3073
+ if (done) {
3074
+ break;
3075
+ }
3076
+ else if (!value) {
3077
+ throw new Error('Missing chunk in reader, but reader is not done.');
3078
+ }
3079
+ await this.sendMediaChunks([value]);
3080
+ }
3081
+ catch (e) {
3082
+ // Re-throw any errors that occur during stream consumption or sending.
3083
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
3084
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
3085
+ }
3086
+ }
3087
+ }
2885
3088
  }
2886
3089
 
2887
3090
  /**
@@ -3009,7 +3212,7 @@ class LiveGenerativeModel extends AIModel {
3009
3212
  * }
3010
3213
  * ```
3011
3214
  *
3012
- * @beta
3215
+ * @public
3013
3216
  */
3014
3217
  class ImagenModel extends AIModel {
3015
3218
  /**
@@ -3045,7 +3248,7 @@ class ImagenModel extends AIModel {
3045
3248
  * returned object will have a `filteredReason` property.
3046
3249
  * If all images are filtered, the `images` array will be empty.
3047
3250
  *
3048
- * @beta
3251
+ * @public
3049
3252
  */
3050
3253
  async generateImages(prompt) {
3051
3254
  const body = createPredictRequestBody(prompt, {
@@ -3508,7 +3711,7 @@ class AnyOfSchema extends Schema {
3508
3711
  * }
3509
3712
  * ```
3510
3713
  *
3511
- * @beta
3714
+ * @public
3512
3715
  */
3513
3716
  class ImagenImageFormat {
3514
3717
  constructor() {
@@ -3520,7 +3723,7 @@ class ImagenImageFormat {
3520
3723
  * @param compressionQuality - The level of compression (a number between 0 and 100).
3521
3724
  * @returns An {@link ImagenImageFormat} object for a JPEG image.
3522
3725
  *
3523
- * @beta
3726
+ * @public
3524
3727
  */
3525
3728
  static jpeg(compressionQuality) {
3526
3729
  if (compressionQuality &&
@@ -3534,7 +3737,7 @@ class ImagenImageFormat {
3534
3737
  *
3535
3738
  * @returns An {@link ImagenImageFormat} object for a PNG image.
3536
3739
  *
3537
- * @beta
3740
+ * @public
3538
3741
  */
3539
3742
  static png() {
3540
3743
  return { mimeType: 'image/png' };
@@ -3654,7 +3857,7 @@ class AudioConversationRunner {
3654
3857
  mimeType: 'audio/pcm',
3655
3858
  data: base64
3656
3859
  };
3657
- void this.liveSession.sendMediaChunks([chunk]);
3860
+ void this.liveSession.sendAudioRealtime(chunk);
3658
3861
  };
3659
3862
  }
3660
3863
  /**
@@ -3776,9 +3979,9 @@ class AudioConversationRunner {
3776
3979
  }
3777
3980
  else {
3778
3981
  try {
3779
- const resultPart = await this.options.functionCallingHandler(message.functionCalls);
3982
+ const functionResponse = await this.options.functionCallingHandler(message.functionCalls);
3780
3983
  if (!this.isStopped) {
3781
- void this.liveSession.send([resultPart]);
3984
+ void this.liveSession.sendFunctionResponses([functionResponse]);
3782
3985
  }
3783
3986
  }
3784
3987
  catch (e) {
@@ -3998,7 +4201,7 @@ function getGenerativeModel(ai, modelParams, requestOptions) {
3998
4201
  * @throws If the `apiKey` or `projectId` fields are missing in your
3999
4202
  * Firebase config.
4000
4203
  *
4001
- * @beta
4204
+ * @public
4002
4205
  */
4003
4206
  function getImagenModel(ai, modelParams, requestOptions) {
4004
4207
  if (!modelParams.model) {
@@ -4064,6 +4267,7 @@ exports.ImagenModel = ImagenModel;
4064
4267
  exports.ImagenPersonFilterLevel = ImagenPersonFilterLevel;
4065
4268
  exports.ImagenSafetyFilterLevel = ImagenSafetyFilterLevel;
4066
4269
  exports.InferenceMode = InferenceMode;
4270
+ exports.InferenceSource = InferenceSource;
4067
4271
  exports.IntegerSchema = IntegerSchema;
4068
4272
  exports.Language = Language;
4069
4273
  exports.LiveGenerativeModel = LiveGenerativeModel;
@@ -4078,6 +4282,7 @@ exports.ResponseModality = ResponseModality;
4078
4282
  exports.Schema = Schema;
4079
4283
  exports.SchemaType = SchemaType;
4080
4284
  exports.StringSchema = StringSchema;
4285
+ exports.URLRetrievalStatus = URLRetrievalStatus;
4081
4286
  exports.VertexAIBackend = VertexAIBackend;
4082
4287
  exports.getAI = getAI;
4083
4288
  exports.getGenerativeModel = getGenerativeModel;