@firebase/ai 2.3.0 → 2.4.0-canary.22e0a1adb

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/ai-public.d.ts +240 -35
  2. package/dist/ai.d.ts +241 -35
  3. package/dist/esm/index.esm.js +295 -92
  4. package/dist/esm/index.esm.js.map +1 -1
  5. package/dist/esm/src/api.d.ts +1 -1
  6. package/dist/esm/src/methods/chrome-adapter.d.ts +1 -1
  7. package/dist/esm/src/methods/live-session-helpers.d.ts +2 -2
  8. package/dist/esm/src/methods/live-session.d.ts +71 -7
  9. package/dist/esm/src/models/imagen-model.d.ts +2 -2
  10. package/dist/esm/src/requests/hybrid-helpers.d.ts +7 -2
  11. package/dist/esm/src/requests/imagen-image-format.d.ts +3 -3
  12. package/dist/esm/src/requests/response-helpers.d.ts +2 -2
  13. package/dist/esm/src/requests/stream-reader.d.ts +2 -1
  14. package/dist/esm/src/types/content.d.ts +4 -4
  15. package/dist/esm/src/types/enums.d.ts +19 -4
  16. package/dist/esm/src/types/googleai.d.ts +2 -1
  17. package/dist/esm/src/types/imagen/requests.d.ts +9 -9
  18. package/dist/esm/src/types/imagen/responses.d.ts +3 -3
  19. package/dist/esm/src/types/live-responses.d.ts +16 -2
  20. package/dist/esm/src/types/requests.d.ts +22 -2
  21. package/dist/esm/src/types/responses.d.ts +99 -1
  22. package/dist/index.cjs.js +296 -91
  23. package/dist/index.cjs.js.map +1 -1
  24. package/dist/index.node.cjs.js +276 -84
  25. package/dist/index.node.cjs.js.map +1 -1
  26. package/dist/index.node.mjs +275 -85
  27. package/dist/index.node.mjs.map +1 -1
  28. package/dist/src/api.d.ts +1 -1
  29. package/dist/src/methods/chrome-adapter.d.ts +1 -1
  30. package/dist/src/methods/live-session-helpers.d.ts +2 -2
  31. package/dist/src/methods/live-session.d.ts +71 -7
  32. package/dist/src/models/imagen-model.d.ts +2 -2
  33. package/dist/src/requests/hybrid-helpers.d.ts +7 -2
  34. package/dist/src/requests/imagen-image-format.d.ts +3 -3
  35. package/dist/src/requests/response-helpers.d.ts +2 -2
  36. package/dist/src/requests/stream-reader.d.ts +2 -1
  37. package/dist/src/types/content.d.ts +4 -4
  38. package/dist/src/types/enums.d.ts +19 -4
  39. package/dist/src/types/googleai.d.ts +2 -1
  40. package/dist/src/types/imagen/requests.d.ts +9 -9
  41. package/dist/src/types/imagen/responses.d.ts +3 -3
  42. package/dist/src/types/live-responses.d.ts +16 -2
  43. package/dist/src/types/requests.d.ts +22 -2
  44. package/dist/src/types/responses.d.ts +99 -1
  45. package/package.json +8 -8
@@ -4,7 +4,7 @@ import { FirebaseError, Deferred, getModularInstance } from '@firebase/util';
4
4
  import { Logger } from '@firebase/logger';
5
5
 
6
6
  var name = "@firebase/ai";
7
- var version = "2.3.0";
7
+ var version = "2.4.0-canary.22e0a1adb";
8
8
 
9
9
  /**
10
10
  * @license
@@ -379,10 +379,19 @@ const InferenceMode = {
379
379
  'ONLY_IN_CLOUD': 'only_in_cloud',
380
380
  'PREFER_IN_CLOUD': 'prefer_in_cloud'
381
381
  };
382
+ /**
383
+ * Indicates whether inference happened on-device or in-cloud.
384
+ *
385
+ * @beta
386
+ */
387
+ const InferenceSource = {
388
+ 'ON_DEVICE': 'on_device',
389
+ 'IN_CLOUD': 'in_cloud'
390
+ };
382
391
  /**
383
392
  * Represents the result of the code execution.
384
393
  *
385
- * @public
394
+ * @beta
386
395
  */
387
396
  const Outcome = {
388
397
  UNSPECIFIED: 'OUTCOME_UNSPECIFIED',
@@ -393,7 +402,7 @@ const Outcome = {
393
402
  /**
394
403
  * The programming language of the code.
395
404
  *
396
- * @public
405
+ * @beta
397
406
  */
398
407
  const Language = {
399
408
  UNSPECIFIED: 'LANGUAGE_UNSPECIFIED',
@@ -416,6 +425,45 @@ const Language = {
416
425
  * See the License for the specific language governing permissions and
417
426
  * limitations under the License.
418
427
  */
428
+ /**
429
+ * The status of a URL retrieval.
430
+ *
431
+ * @remarks
432
+ * <b>URL_RETRIEVAL_STATUS_UNSPECIFIED:</b> Unspecified retrieval status.
433
+ * <br/>
434
+ * <b>URL_RETRIEVAL_STATUS_SUCCESS:</b> The URL retrieval was successful.
435
+ * <br/>
436
+ * <b>URL_RETRIEVAL_STATUS_ERROR:</b> The URL retrieval failed.
437
+ * <br/>
438
+ * <b>URL_RETRIEVAL_STATUS_PAYWALL:</b> The URL retrieval failed because the content is behind a paywall.
439
+ * <br/>
440
+ * <b>URL_RETRIEVAL_STATUS_UNSAFE:</b> The URL retrieval failed because the content is unsafe.
441
+ * <br/>
442
+ *
443
+ * @beta
444
+ */
445
+ const URLRetrievalStatus = {
446
+ /**
447
+ * Unspecified retrieval status.
448
+ */
449
+ URL_RETRIEVAL_STATUS_UNSPECIFIED: 'URL_RETRIEVAL_STATUS_UNSPECIFIED',
450
+ /**
451
+ * The URL retrieval was successful.
452
+ */
453
+ URL_RETRIEVAL_STATUS_SUCCESS: 'URL_RETRIEVAL_STATUS_SUCCESS',
454
+ /**
455
+ * The URL retrieval failed.
456
+ */
457
+ URL_RETRIEVAL_STATUS_ERROR: 'URL_RETRIEVAL_STATUS_ERROR',
458
+ /**
459
+ * The URL retrieval failed because the content is behind a paywall.
460
+ */
461
+ URL_RETRIEVAL_STATUS_PAYWALL: 'URL_RETRIEVAL_STATUS_PAYWALL',
462
+ /**
463
+ * The URL retrieval failed because the content is unsafe.
464
+ */
465
+ URL_RETRIEVAL_STATUS_UNSAFE: 'URL_RETRIEVAL_STATUS_UNSAFE'
466
+ };
419
467
  /**
420
468
  * The types of responses that can be returned by {@link LiveSession.receive}.
421
469
  *
@@ -542,7 +590,7 @@ const SchemaType = {
542
590
  * and the {@link https://cloud.google.com/vertex-ai/generative-ai/docs/image/responsible-ai-imagen#safety-filters | Responsible AI and usage guidelines}
543
591
  * for more details.
544
592
  *
545
- * @beta
593
+ * @public
546
594
  */
547
595
  const ImagenSafetyFilterLevel = {
548
596
  /**
@@ -571,7 +619,7 @@ const ImagenSafetyFilterLevel = {
571
619
  * See the <a href="http://firebase.google.com/docs/vertex-ai/generate-images">personGeneration</a>
572
620
  * documentation for more details.
573
621
  *
574
- * @beta
622
+ * @public
575
623
  */
576
624
  const ImagenPersonFilterLevel = {
577
625
  /**
@@ -604,7 +652,7 @@ const ImagenPersonFilterLevel = {
604
652
  * See the {@link http://firebase.google.com/docs/vertex-ai/generate-images | documentation }
605
653
  * for more details and examples of the supported aspect ratios.
606
654
  *
607
- * @beta
655
+ * @public
608
656
  */
609
657
  const ImagenAspectRatio = {
610
658
  /**
@@ -849,22 +897,35 @@ var Availability;
849
897
  * See the License for the specific language governing permissions and
850
898
  * limitations under the License.
851
899
  */
900
+ // Defaults to support image inputs for convenience.
901
+ const defaultExpectedInputs = [{ type: 'image' }];
852
902
  /**
853
903
  * Defines an inference "backend" that uses Chrome's on-device model,
854
904
  * and encapsulates logic for detecting when on-device inference is
855
905
  * possible.
856
906
  */
857
907
  class ChromeAdapterImpl {
858
- constructor(languageModelProvider, mode, onDeviceParams = {
859
- createOptions: {
860
- // Defaults to support image inputs for convenience.
861
- expectedInputs: [{ type: 'image' }]
862
- }
863
- }) {
908
+ constructor(languageModelProvider, mode, onDeviceParams) {
864
909
  this.languageModelProvider = languageModelProvider;
865
910
  this.mode = mode;
866
- this.onDeviceParams = onDeviceParams;
867
911
  this.isDownloading = false;
912
+ this.onDeviceParams = {
913
+ createOptions: {
914
+ expectedInputs: defaultExpectedInputs
915
+ }
916
+ };
917
+ if (onDeviceParams) {
918
+ this.onDeviceParams = onDeviceParams;
919
+ if (!this.onDeviceParams.createOptions) {
920
+ this.onDeviceParams.createOptions = {
921
+ expectedInputs: defaultExpectedInputs
922
+ };
923
+ }
924
+ else if (!this.onDeviceParams.createOptions.expectedInputs) {
925
+ this.onDeviceParams.createOptions.expectedInputs =
926
+ defaultExpectedInputs;
927
+ }
928
+ }
868
929
  }
869
930
  /**
870
931
  * Checks if a given request can be made on-device.
@@ -1553,7 +1614,7 @@ function hasValidCandidates(response) {
1553
1614
  * Creates an EnhancedGenerateContentResponse object that has helper functions and
1554
1615
  * other modifications that improve usability.
1555
1616
  */
1556
- function createEnhancedContentResponse(response) {
1617
+ function createEnhancedContentResponse(response, inferenceSource = InferenceSource.IN_CLOUD) {
1557
1618
  /**
1558
1619
  * The Vertex AI backend omits default values.
1559
1620
  * This causes the `index` property to be omitted from the first candidate in the
@@ -1564,6 +1625,7 @@ function createEnhancedContentResponse(response) {
1564
1625
  response.candidates[0].index = 0;
1565
1626
  }
1566
1627
  const responseWithHelpers = addHelpers(response);
1628
+ responseWithHelpers.inferenceSource = inferenceSource;
1567
1629
  return responseWithHelpers;
1568
1630
  }
1569
1631
  /**
@@ -1876,7 +1938,7 @@ function mapGenerateContentCandidates(candidates) {
1876
1938
  // videoMetadata is not supported.
1877
1939
  // Throw early since developers may send a long video as input and only expect to pay
1878
1940
  // for inference on a small portion of the video.
1879
- if (candidate.content?.parts.some(part => part?.videoMetadata)) {
1941
+ if (candidate.content?.parts?.some(part => part?.videoMetadata)) {
1880
1942
  throw new AIError(AIErrorCode.UNSUPPORTED, 'Part.videoMetadata is not supported in the Gemini Developer API. Please remove this property.');
1881
1943
  }
1882
1944
  const mappedCandidate = {
@@ -1886,7 +1948,8 @@ function mapGenerateContentCandidates(candidates) {
1886
1948
  finishMessage: candidate.finishMessage,
1887
1949
  safetyRatings: mappedSafetyRatings,
1888
1950
  citationMetadata,
1889
- groundingMetadata: candidate.groundingMetadata
1951
+ groundingMetadata: candidate.groundingMetadata,
1952
+ urlContextMetadata: candidate.urlContextMetadata
1890
1953
  };
1891
1954
  mappedCandidates.push(mappedCandidate);
1892
1955
  });
@@ -1939,16 +2002,16 @@ const responseLineRE = /^data\: (.*)(?:\n\n|\r\r|\r\n\r\n)/;
1939
2002
  *
1940
2003
  * @param response - Response from a fetch call
1941
2004
  */
1942
- function processStream(response, apiSettings) {
2005
+ function processStream(response, apiSettings, inferenceSource) {
1943
2006
  const inputStream = response.body.pipeThrough(new TextDecoderStream('utf8', { fatal: true }));
1944
2007
  const responseStream = getResponseStream(inputStream);
1945
2008
  const [stream1, stream2] = responseStream.tee();
1946
2009
  return {
1947
- stream: generateResponseSequence(stream1, apiSettings),
1948
- response: getResponsePromise(stream2, apiSettings)
2010
+ stream: generateResponseSequence(stream1, apiSettings, inferenceSource),
2011
+ response: getResponsePromise(stream2, apiSettings, inferenceSource)
1949
2012
  };
1950
2013
  }
1951
- async function getResponsePromise(stream, apiSettings) {
2014
+ async function getResponsePromise(stream, apiSettings, inferenceSource) {
1952
2015
  const allResponses = [];
1953
2016
  const reader = stream.getReader();
1954
2017
  while (true) {
@@ -1958,12 +2021,12 @@ async function getResponsePromise(stream, apiSettings) {
1958
2021
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1959
2022
  generateContentResponse = mapGenerateContentResponse(generateContentResponse);
1960
2023
  }
1961
- return createEnhancedContentResponse(generateContentResponse);
2024
+ return createEnhancedContentResponse(generateContentResponse, inferenceSource);
1962
2025
  }
1963
2026
  allResponses.push(value);
1964
2027
  }
1965
2028
  }
1966
- async function* generateResponseSequence(stream, apiSettings) {
2029
+ async function* generateResponseSequence(stream, apiSettings, inferenceSource) {
1967
2030
  const reader = stream.getReader();
1968
2031
  while (true) {
1969
2032
  const { value, done } = await reader.read();
@@ -1972,10 +2035,18 @@ async function* generateResponseSequence(stream, apiSettings) {
1972
2035
  }
1973
2036
  let enhancedResponse;
1974
2037
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1975
- enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value));
2038
+ enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value), inferenceSource);
1976
2039
  }
1977
2040
  else {
1978
- enhancedResponse = createEnhancedContentResponse(value);
2041
+ enhancedResponse = createEnhancedContentResponse(value, inferenceSource);
2042
+ }
2043
+ const firstCandidate = enhancedResponse.candidates?.[0];
2044
+ // Don't yield a response with no useful data for the developer.
2045
+ if (!firstCandidate?.content?.parts &&
2046
+ !firstCandidate?.finishReason &&
2047
+ !firstCandidate?.citationMetadata &&
2048
+ !firstCandidate?.urlContextMetadata) {
2049
+ continue;
1979
2050
  }
1980
2051
  yield enhancedResponse;
1981
2052
  }
@@ -2056,36 +2127,43 @@ function aggregateResponses(responses) {
2056
2127
  candidate.safetyRatings;
2057
2128
  aggregatedResponse.candidates[i].groundingMetadata =
2058
2129
  candidate.groundingMetadata;
2130
+ // The urlContextMetadata object is defined in the first chunk of the response stream.
2131
+ // In all subsequent chunks, the urlContextMetadata object will be undefined. We need to
2132
+ // make sure that we don't overwrite the first value urlContextMetadata object with undefined.
2133
+ // FIXME: What happens if we receive a second, valid urlContextMetadata object?
2134
+ const urlContextMetadata = candidate.urlContextMetadata;
2135
+ if (typeof urlContextMetadata === 'object' &&
2136
+ urlContextMetadata !== null &&
2137
+ Object.keys(urlContextMetadata).length > 0) {
2138
+ aggregatedResponse.candidates[i].urlContextMetadata =
2139
+ urlContextMetadata;
2140
+ }
2059
2141
  /**
2060
2142
  * Candidates should always have content and parts, but this handles
2061
2143
  * possible malformed responses.
2062
2144
  */
2063
- if (candidate.content && candidate.content.parts) {
2145
+ if (candidate.content) {
2146
+ // Skip a candidate without parts.
2147
+ if (!candidate.content.parts) {
2148
+ continue;
2149
+ }
2064
2150
  if (!aggregatedResponse.candidates[i].content) {
2065
2151
  aggregatedResponse.candidates[i].content = {
2066
2152
  role: candidate.content.role || 'user',
2067
2153
  parts: []
2068
2154
  };
2069
2155
  }
2070
- const newPart = {};
2071
2156
  for (const part of candidate.content.parts) {
2072
- if (part.text !== undefined) {
2073
- // The backend can send empty text parts. If these are sent back
2074
- // (e.g. in chat history), the backend will respond with an error.
2075
- // To prevent this, ignore empty text parts.
2076
- if (part.text === '') {
2077
- continue;
2078
- }
2079
- newPart.text = part.text;
2157
+ const newPart = { ...part };
2158
+ // The backend can send empty text parts. If these are sent back
2159
+ // (e.g. in chat history), the backend will respond with an error.
2160
+ // To prevent this, ignore empty text parts.
2161
+ if (part.text === '') {
2162
+ continue;
2080
2163
  }
2081
- if (part.functionCall) {
2082
- newPart.functionCall = part.functionCall;
2164
+ if (Object.keys(newPart).length > 0) {
2165
+ aggregatedResponse.candidates[i].content.parts.push(newPart);
2083
2166
  }
2084
- if (Object.keys(newPart).length === 0) {
2085
- throw new AIError(AIErrorCode.INVALID_CONTENT, 'Part should have at least one property, but there are none. This is likely caused ' +
2086
- 'by a malformed response from the backend.');
2087
- }
2088
- aggregatedResponse.candidates[i].content.parts.push(newPart);
2089
2167
  }
2090
2168
  }
2091
2169
  }
@@ -2130,31 +2208,52 @@ const errorsCausingFallback = [
2130
2208
  */
2131
2209
  async function callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall) {
2132
2210
  if (!chromeAdapter) {
2133
- return inCloudCall();
2211
+ return {
2212
+ response: await inCloudCall(),
2213
+ inferenceSource: InferenceSource.IN_CLOUD
2214
+ };
2134
2215
  }
2135
2216
  switch (chromeAdapter.mode) {
2136
2217
  case InferenceMode.ONLY_ON_DEVICE:
2137
2218
  if (await chromeAdapter.isAvailable(request)) {
2138
- return onDeviceCall();
2219
+ return {
2220
+ response: await onDeviceCall(),
2221
+ inferenceSource: InferenceSource.ON_DEVICE
2222
+ };
2139
2223
  }
2140
2224
  throw new AIError(AIErrorCode.UNSUPPORTED, 'Inference mode is ONLY_ON_DEVICE, but an on-device model is not available.');
2141
2225
  case InferenceMode.ONLY_IN_CLOUD:
2142
- return inCloudCall();
2226
+ return {
2227
+ response: await inCloudCall(),
2228
+ inferenceSource: InferenceSource.IN_CLOUD
2229
+ };
2143
2230
  case InferenceMode.PREFER_IN_CLOUD:
2144
2231
  try {
2145
- return await inCloudCall();
2232
+ return {
2233
+ response: await inCloudCall(),
2234
+ inferenceSource: InferenceSource.IN_CLOUD
2235
+ };
2146
2236
  }
2147
2237
  catch (e) {
2148
2238
  if (e instanceof AIError && errorsCausingFallback.includes(e.code)) {
2149
- return onDeviceCall();
2239
+ return {
2240
+ response: await onDeviceCall(),
2241
+ inferenceSource: InferenceSource.ON_DEVICE
2242
+ };
2150
2243
  }
2151
2244
  throw e;
2152
2245
  }
2153
2246
  case InferenceMode.PREFER_ON_DEVICE:
2154
2247
  if (await chromeAdapter.isAvailable(request)) {
2155
- return onDeviceCall();
2248
+ return {
2249
+ response: await onDeviceCall(),
2250
+ inferenceSource: InferenceSource.ON_DEVICE
2251
+ };
2156
2252
  }
2157
- return inCloudCall();
2253
+ return {
2254
+ response: await inCloudCall(),
2255
+ inferenceSource: InferenceSource.IN_CLOUD
2256
+ };
2158
2257
  default:
2159
2258
  throw new AIError(AIErrorCode.ERROR, `Unexpected infererence mode: ${chromeAdapter.mode}`);
2160
2259
  }
@@ -2184,8 +2283,8 @@ async function generateContentStreamOnCloud(apiSettings, model, params, requestO
2184
2283
  /* stream */ true, JSON.stringify(params), requestOptions);
2185
2284
  }
2186
2285
  async function generateContentStream(apiSettings, model, params, chromeAdapter, requestOptions) {
2187
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
2188
- return processStream(response, apiSettings); // TODO: Map streaming responses
2286
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
2287
+ return processStream(callResult.response, apiSettings); // TODO: Map streaming responses
2189
2288
  }
2190
2289
  async function generateContentOnCloud(apiSettings, model, params, requestOptions) {
2191
2290
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
@@ -2195,9 +2294,9 @@ async function generateContentOnCloud(apiSettings, model, params, requestOptions
2195
2294
  /* stream */ false, JSON.stringify(params), requestOptions);
2196
2295
  }
2197
2296
  async function generateContent(apiSettings, model, params, chromeAdapter, requestOptions) {
2198
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
2199
- const generateContentResponse = await processGenerateContentResponse(response, apiSettings);
2200
- const enhancedResponse = createEnhancedContentResponse(generateContentResponse);
2297
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
2298
+ const generateContentResponse = await processGenerateContentResponse(callResult.response, apiSettings);
2299
+ const enhancedResponse = createEnhancedContentResponse(generateContentResponse, callResult.inferenceSource);
2201
2300
  return {
2202
2301
  response: enhancedResponse
2203
2302
  };
@@ -2771,56 +2870,104 @@ class LiveSession {
2771
2870
  this.webSocketHandler.send(JSON.stringify(message));
2772
2871
  }
2773
2872
  /**
2774
- * Sends realtime input to the server.
2873
+ * Sends text to the server in realtime.
2775
2874
  *
2776
- * @param mediaChunks - The media chunks to send.
2875
+ * @example
2876
+ * ```javascript
2877
+ * liveSession.sendTextRealtime("Hello, how are you?");
2878
+ * ```
2879
+ *
2880
+ * @param text - The text data to send.
2777
2881
  * @throws If this session has been closed.
2778
2882
  *
2779
2883
  * @beta
2780
2884
  */
2781
- async sendMediaChunks(mediaChunks) {
2885
+ async sendTextRealtime(text) {
2782
2886
  if (this.isClosed) {
2783
2887
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2784
2888
  }
2785
- // The backend does not support sending more than one mediaChunk in one message.
2786
- // Work around this limitation by sending mediaChunks in separate messages.
2787
- mediaChunks.forEach(mediaChunk => {
2788
- const message = {
2789
- realtimeInput: { mediaChunks: [mediaChunk] }
2790
- };
2791
- this.webSocketHandler.send(JSON.stringify(message));
2792
- });
2889
+ const message = {
2890
+ realtimeInput: {
2891
+ text
2892
+ }
2893
+ };
2894
+ this.webSocketHandler.send(JSON.stringify(message));
2793
2895
  }
2794
2896
  /**
2795
- * Sends a stream of {@link GenerativeContentBlob}.
2897
+ * Sends audio data to the server in realtime.
2796
2898
  *
2797
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2899
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2900
+ * little-endian.
2901
+ *
2902
+ * @example
2903
+ * ```javascript
2904
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2905
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2906
+ * liveSession.sendAudioRealtime(blob);
2907
+ * ```
2908
+ *
2909
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2798
2910
  * @throws If this session has been closed.
2799
2911
  *
2800
2912
  * @beta
2801
2913
  */
2802
- async sendMediaStream(mediaChunkStream) {
2914
+ async sendAudioRealtime(blob) {
2803
2915
  if (this.isClosed) {
2804
2916
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2805
2917
  }
2806
- const reader = mediaChunkStream.getReader();
2807
- while (true) {
2808
- try {
2809
- const { done, value } = await reader.read();
2810
- if (done) {
2811
- break;
2812
- }
2813
- else if (!value) {
2814
- throw new Error('Missing chunk in reader, but reader is not done.');
2815
- }
2816
- await this.sendMediaChunks([value]);
2918
+ const message = {
2919
+ realtimeInput: {
2920
+ audio: blob
2817
2921
  }
2818
- catch (e) {
2819
- // Re-throw any errors that occur during stream consumption or sending.
2820
- const message = e instanceof Error ? e.message : 'Error processing media stream.';
2821
- throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2922
+ };
2923
+ this.webSocketHandler.send(JSON.stringify(message));
2924
+ }
2925
+ /**
2926
+ * Sends video data to the server in realtime.
2927
+ *
2928
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2929
+ * is recommended to set `mimeType` to `image/jpeg`.
2930
+ *
2931
+ * @example
2932
+ * ```javascript
2933
+ * // const videoFrame = ... base64-encoded JPEG data
2934
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2935
+ * liveSession.sendVideoRealtime(blob);
2936
+ * ```
2937
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2938
+ * @throws If this session has been closed.
2939
+ *
2940
+ * @beta
2941
+ */
2942
+ async sendVideoRealtime(blob) {
2943
+ if (this.isClosed) {
2944
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2945
+ }
2946
+ const message = {
2947
+ realtimeInput: {
2948
+ video: blob
2822
2949
  }
2950
+ };
2951
+ this.webSocketHandler.send(JSON.stringify(message));
2952
+ }
2953
+ /**
2954
+ * Sends function responses to the server.
2955
+ *
2956
+ * @param functionResponses - The function responses to send.
2957
+ * @throws If this session has been closed.
2958
+ *
2959
+ * @beta
2960
+ */
2961
+ async sendFunctionResponses(functionResponses) {
2962
+ if (this.isClosed) {
2963
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2823
2964
  }
2965
+ const message = {
2966
+ toolResponse: {
2967
+ functionResponses
2968
+ }
2969
+ };
2970
+ this.webSocketHandler.send(JSON.stringify(message));
2824
2971
  }
2825
2972
  /**
2826
2973
  * Yields messages received from the server.
@@ -2878,6 +3025,62 @@ class LiveSession {
2878
3025
  await this.webSocketHandler.close(1000, 'Client closed session.');
2879
3026
  }
2880
3027
  }
3028
+ /**
3029
+ * Sends realtime input to the server.
3030
+ *
3031
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
3032
+ *
3033
+ * @param mediaChunks - The media chunks to send.
3034
+ * @throws If this session has been closed.
3035
+ *
3036
+ * @beta
3037
+ */
3038
+ async sendMediaChunks(mediaChunks) {
3039
+ if (this.isClosed) {
3040
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
3041
+ }
3042
+ // The backend does not support sending more than one mediaChunk in one message.
3043
+ // Work around this limitation by sending mediaChunks in separate messages.
3044
+ mediaChunks.forEach(mediaChunk => {
3045
+ const message = {
3046
+ realtimeInput: { mediaChunks: [mediaChunk] }
3047
+ };
3048
+ this.webSocketHandler.send(JSON.stringify(message));
3049
+ });
3050
+ }
3051
+ /**
3052
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
3053
+ *
3054
+ * Sends a stream of {@link GenerativeContentBlob}.
3055
+ *
3056
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
3057
+ * @throws If this session has been closed.
3058
+ *
3059
+ * @beta
3060
+ */
3061
+ async sendMediaStream(mediaChunkStream) {
3062
+ if (this.isClosed) {
3063
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
3064
+ }
3065
+ const reader = mediaChunkStream.getReader();
3066
+ while (true) {
3067
+ try {
3068
+ const { done, value } = await reader.read();
3069
+ if (done) {
3070
+ break;
3071
+ }
3072
+ else if (!value) {
3073
+ throw new Error('Missing chunk in reader, but reader is not done.');
3074
+ }
3075
+ await this.sendMediaChunks([value]);
3076
+ }
3077
+ catch (e) {
3078
+ // Re-throw any errors that occur during stream consumption or sending.
3079
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
3080
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
3081
+ }
3082
+ }
3083
+ }
2881
3084
  }
2882
3085
 
2883
3086
  /**
@@ -3005,7 +3208,7 @@ class LiveGenerativeModel extends AIModel {
3005
3208
  * }
3006
3209
  * ```
3007
3210
  *
3008
- * @beta
3211
+ * @public
3009
3212
  */
3010
3213
  class ImagenModel extends AIModel {
3011
3214
  /**
@@ -3041,7 +3244,7 @@ class ImagenModel extends AIModel {
3041
3244
  * returned object will have a `filteredReason` property.
3042
3245
  * If all images are filtered, the `images` array will be empty.
3043
3246
  *
3044
- * @beta
3247
+ * @public
3045
3248
  */
3046
3249
  async generateImages(prompt) {
3047
3250
  const body = createPredictRequestBody(prompt, {
@@ -3504,7 +3707,7 @@ class AnyOfSchema extends Schema {
3504
3707
  * }
3505
3708
  * ```
3506
3709
  *
3507
- * @beta
3710
+ * @public
3508
3711
  */
3509
3712
  class ImagenImageFormat {
3510
3713
  constructor() {
@@ -3516,7 +3719,7 @@ class ImagenImageFormat {
3516
3719
  * @param compressionQuality - The level of compression (a number between 0 and 100).
3517
3720
  * @returns An {@link ImagenImageFormat} object for a JPEG image.
3518
3721
  *
3519
- * @beta
3722
+ * @public
3520
3723
  */
3521
3724
  static jpeg(compressionQuality) {
3522
3725
  if (compressionQuality &&
@@ -3530,7 +3733,7 @@ class ImagenImageFormat {
3530
3733
  *
3531
3734
  * @returns An {@link ImagenImageFormat} object for a PNG image.
3532
3735
  *
3533
- * @beta
3736
+ * @public
3534
3737
  */
3535
3738
  static png() {
3536
3739
  return { mimeType: 'image/png' };
@@ -3650,7 +3853,7 @@ class AudioConversationRunner {
3650
3853
  mimeType: 'audio/pcm',
3651
3854
  data: base64
3652
3855
  };
3653
- void this.liveSession.sendMediaChunks([chunk]);
3856
+ void this.liveSession.sendAudioRealtime(chunk);
3654
3857
  };
3655
3858
  }
3656
3859
  /**
@@ -3772,9 +3975,9 @@ class AudioConversationRunner {
3772
3975
  }
3773
3976
  else {
3774
3977
  try {
3775
- const resultPart = await this.options.functionCallingHandler(message.functionCalls);
3978
+ const functionResponse = await this.options.functionCallingHandler(message.functionCalls);
3776
3979
  if (!this.isStopped) {
3777
- void this.liveSession.send([resultPart]);
3980
+ void this.liveSession.sendFunctionResponses([functionResponse]);
3778
3981
  }
3779
3982
  }
3780
3983
  catch (e) {
@@ -3994,7 +4197,7 @@ function getGenerativeModel(ai, modelParams, requestOptions) {
3994
4197
  * @throws If the `apiKey` or `projectId` fields are missing in your
3995
4198
  * Firebase config.
3996
4199
  *
3997
- * @beta
4200
+ * @public
3998
4201
  */
3999
4202
  function getImagenModel(ai, modelParams, requestOptions) {
4000
4203
  if (!modelParams.model) {
@@ -4035,5 +4238,5 @@ function registerAI() {
4035
4238
  }
4036
4239
  registerAI();
4037
4240
 
4038
- export { AIError, AIErrorCode, AIModel, AnyOfSchema, ArraySchema, Backend, BackendType, BlockReason, BooleanSchema, ChatSession, FinishReason, FunctionCallingMode, GenerativeModel, GoogleAIBackend, HarmBlockMethod, HarmBlockThreshold, HarmCategory, HarmProbability, HarmSeverity, ImagenAspectRatio, ImagenImageFormat, ImagenModel, ImagenPersonFilterLevel, ImagenSafetyFilterLevel, InferenceMode, IntegerSchema, Language, LiveGenerativeModel, LiveResponseType, LiveSession, Modality, NumberSchema, ObjectSchema, Outcome, POSSIBLE_ROLES, ResponseModality, Schema, SchemaType, StringSchema, VertexAIBackend, getAI, getGenerativeModel, getImagenModel, getLiveGenerativeModel, startAudioConversation };
4241
+ export { AIError, AIErrorCode, AIModel, AnyOfSchema, ArraySchema, Backend, BackendType, BlockReason, BooleanSchema, ChatSession, FinishReason, FunctionCallingMode, GenerativeModel, GoogleAIBackend, HarmBlockMethod, HarmBlockThreshold, HarmCategory, HarmProbability, HarmSeverity, ImagenAspectRatio, ImagenImageFormat, ImagenModel, ImagenPersonFilterLevel, ImagenSafetyFilterLevel, InferenceMode, InferenceSource, IntegerSchema, Language, LiveGenerativeModel, LiveResponseType, LiveSession, Modality, NumberSchema, ObjectSchema, Outcome, POSSIBLE_ROLES, ResponseModality, Schema, SchemaType, StringSchema, URLRetrievalStatus, VertexAIBackend, getAI, getGenerativeModel, getImagenModel, getLiveGenerativeModel, startAudioConversation };
4039
4242
  //# sourceMappingURL=index.esm.js.map