@firebase/ai 2.4.0 → 2.5.0-canary.0800a8bed

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist/ai-public.d.ts +134 -9
  2. package/dist/ai.d.ts +137 -9
  3. package/dist/esm/index.esm.js +202 -68
  4. package/dist/esm/index.esm.js.map +1 -1
  5. package/dist/esm/src/factory-node.d.ts +19 -0
  6. package/dist/esm/src/methods/chrome-adapter.d.ts +1 -1
  7. package/dist/esm/src/methods/live-session.d.ts +64 -9
  8. package/dist/esm/src/requests/hybrid-helpers.d.ts +7 -2
  9. package/dist/esm/src/requests/response-helpers.d.ts +2 -2
  10. package/dist/esm/src/requests/stream-reader.d.ts +2 -1
  11. package/dist/esm/src/service.d.ts +3 -4
  12. package/dist/esm/src/types/chrome-adapter.d.ts +5 -0
  13. package/dist/esm/src/types/enums.d.ts +15 -0
  14. package/dist/esm/src/types/live-responses.d.ts +21 -3
  15. package/dist/esm/src/types/requests.d.ts +23 -0
  16. package/dist/esm/src/types/responses.d.ts +28 -1
  17. package/dist/index.cjs.js +202 -67
  18. package/dist/index.cjs.js.map +1 -1
  19. package/dist/index.node.cjs.js +306 -166
  20. package/dist/index.node.cjs.js.map +1 -1
  21. package/dist/index.node.mjs +306 -167
  22. package/dist/index.node.mjs.map +1 -1
  23. package/dist/src/factory-node.d.ts +19 -0
  24. package/dist/src/methods/chrome-adapter.d.ts +1 -1
  25. package/dist/src/methods/live-session.d.ts +64 -9
  26. package/dist/src/requests/hybrid-helpers.d.ts +7 -2
  27. package/dist/src/requests/response-helpers.d.ts +2 -2
  28. package/dist/src/requests/stream-reader.d.ts +2 -1
  29. package/dist/src/service.d.ts +3 -4
  30. package/dist/src/types/chrome-adapter.d.ts +5 -0
  31. package/dist/src/types/enums.d.ts +15 -0
  32. package/dist/src/types/live-responses.d.ts +21 -3
  33. package/dist/src/types/requests.d.ts +23 -0
  34. package/dist/src/types/responses.d.ts +28 -1
  35. package/package.json +8 -8
package/dist/index.cjs.js CHANGED
@@ -8,7 +8,7 @@ var util = require('@firebase/util');
8
8
  var logger$1 = require('@firebase/logger');
9
9
 
10
10
  var name = "@firebase/ai";
11
- var version = "2.4.0";
11
+ var version = "2.5.0-canary.0800a8bed";
12
12
 
13
13
  /**
14
14
  * @license
@@ -383,6 +383,15 @@ const InferenceMode = {
383
383
  'ONLY_IN_CLOUD': 'only_in_cloud',
384
384
  'PREFER_IN_CLOUD': 'prefer_in_cloud'
385
385
  };
386
+ /**
387
+ * Indicates whether inference happened on-device or in-cloud.
388
+ *
389
+ * @beta
390
+ */
391
+ const InferenceSource = {
392
+ 'ON_DEVICE': 'on_device',
393
+ 'IN_CLOUD': 'in_cloud'
394
+ };
386
395
  /**
387
396
  * Represents the result of the code execution.
388
397
  *
@@ -892,22 +901,35 @@ var Availability;
892
901
  * See the License for the specific language governing permissions and
893
902
  * limitations under the License.
894
903
  */
904
+ // Defaults to support image inputs for convenience.
905
+ const defaultExpectedInputs = [{ type: 'image' }];
895
906
  /**
896
907
  * Defines an inference "backend" that uses Chrome's on-device model,
897
908
  * and encapsulates logic for detecting when on-device inference is
898
909
  * possible.
899
910
  */
900
911
  class ChromeAdapterImpl {
901
- constructor(languageModelProvider, mode, onDeviceParams = {
902
- createOptions: {
903
- // Defaults to support image inputs for convenience.
904
- expectedInputs: [{ type: 'image' }]
905
- }
906
- }) {
912
+ constructor(languageModelProvider, mode, onDeviceParams) {
907
913
  this.languageModelProvider = languageModelProvider;
908
914
  this.mode = mode;
909
- this.onDeviceParams = onDeviceParams;
910
915
  this.isDownloading = false;
916
+ this.onDeviceParams = {
917
+ createOptions: {
918
+ expectedInputs: defaultExpectedInputs
919
+ }
920
+ };
921
+ if (onDeviceParams) {
922
+ this.onDeviceParams = onDeviceParams;
923
+ if (!this.onDeviceParams.createOptions) {
924
+ this.onDeviceParams.createOptions = {
925
+ expectedInputs: defaultExpectedInputs
926
+ };
927
+ }
928
+ else if (!this.onDeviceParams.createOptions.expectedInputs) {
929
+ this.onDeviceParams.createOptions.expectedInputs =
930
+ defaultExpectedInputs;
931
+ }
932
+ }
911
933
  }
912
934
  /**
913
935
  * Checks if a given request can be made on-device.
@@ -1596,7 +1618,7 @@ function hasValidCandidates(response) {
1596
1618
  * Creates an EnhancedGenerateContentResponse object that has helper functions and
1597
1619
  * other modifications that improve usability.
1598
1620
  */
1599
- function createEnhancedContentResponse(response) {
1621
+ function createEnhancedContentResponse(response, inferenceSource = InferenceSource.IN_CLOUD) {
1600
1622
  /**
1601
1623
  * The Vertex AI backend omits default values.
1602
1624
  * This causes the `index` property to be omitted from the first candidate in the
@@ -1607,6 +1629,7 @@ function createEnhancedContentResponse(response) {
1607
1629
  response.candidates[0].index = 0;
1608
1630
  }
1609
1631
  const responseWithHelpers = addHelpers(response);
1632
+ responseWithHelpers.inferenceSource = inferenceSource;
1610
1633
  return responseWithHelpers;
1611
1634
  }
1612
1635
  /**
@@ -1983,16 +2006,16 @@ const responseLineRE = /^data\: (.*)(?:\n\n|\r\r|\r\n\r\n)/;
1983
2006
  *
1984
2007
  * @param response - Response from a fetch call
1985
2008
  */
1986
- function processStream(response, apiSettings) {
2009
+ function processStream(response, apiSettings, inferenceSource) {
1987
2010
  const inputStream = response.body.pipeThrough(new TextDecoderStream('utf8', { fatal: true }));
1988
2011
  const responseStream = getResponseStream(inputStream);
1989
2012
  const [stream1, stream2] = responseStream.tee();
1990
2013
  return {
1991
- stream: generateResponseSequence(stream1, apiSettings),
1992
- response: getResponsePromise(stream2, apiSettings)
2014
+ stream: generateResponseSequence(stream1, apiSettings, inferenceSource),
2015
+ response: getResponsePromise(stream2, apiSettings, inferenceSource)
1993
2016
  };
1994
2017
  }
1995
- async function getResponsePromise(stream, apiSettings) {
2018
+ async function getResponsePromise(stream, apiSettings, inferenceSource) {
1996
2019
  const allResponses = [];
1997
2020
  const reader = stream.getReader();
1998
2021
  while (true) {
@@ -2002,12 +2025,12 @@ async function getResponsePromise(stream, apiSettings) {
2002
2025
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
2003
2026
  generateContentResponse = mapGenerateContentResponse(generateContentResponse);
2004
2027
  }
2005
- return createEnhancedContentResponse(generateContentResponse);
2028
+ return createEnhancedContentResponse(generateContentResponse, inferenceSource);
2006
2029
  }
2007
2030
  allResponses.push(value);
2008
2031
  }
2009
2032
  }
2010
- async function* generateResponseSequence(stream, apiSettings) {
2033
+ async function* generateResponseSequence(stream, apiSettings, inferenceSource) {
2011
2034
  const reader = stream.getReader();
2012
2035
  while (true) {
2013
2036
  const { value, done } = await reader.read();
@@ -2016,10 +2039,10 @@ async function* generateResponseSequence(stream, apiSettings) {
2016
2039
  }
2017
2040
  let enhancedResponse;
2018
2041
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
2019
- enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value));
2042
+ enhancedResponse = createEnhancedContentResponse(mapGenerateContentResponse(value), inferenceSource);
2020
2043
  }
2021
2044
  else {
2022
- enhancedResponse = createEnhancedContentResponse(value);
2045
+ enhancedResponse = createEnhancedContentResponse(value, inferenceSource);
2023
2046
  }
2024
2047
  const firstCandidate = enhancedResponse.candidates?.[0];
2025
2048
  // Don't yield a response with no useful data for the developer.
@@ -2189,31 +2212,52 @@ const errorsCausingFallback = [
2189
2212
  */
2190
2213
  async function callCloudOrDevice(request, chromeAdapter, onDeviceCall, inCloudCall) {
2191
2214
  if (!chromeAdapter) {
2192
- return inCloudCall();
2215
+ return {
2216
+ response: await inCloudCall(),
2217
+ inferenceSource: InferenceSource.IN_CLOUD
2218
+ };
2193
2219
  }
2194
2220
  switch (chromeAdapter.mode) {
2195
2221
  case InferenceMode.ONLY_ON_DEVICE:
2196
2222
  if (await chromeAdapter.isAvailable(request)) {
2197
- return onDeviceCall();
2223
+ return {
2224
+ response: await onDeviceCall(),
2225
+ inferenceSource: InferenceSource.ON_DEVICE
2226
+ };
2198
2227
  }
2199
2228
  throw new AIError(AIErrorCode.UNSUPPORTED, 'Inference mode is ONLY_ON_DEVICE, but an on-device model is not available.');
2200
2229
  case InferenceMode.ONLY_IN_CLOUD:
2201
- return inCloudCall();
2230
+ return {
2231
+ response: await inCloudCall(),
2232
+ inferenceSource: InferenceSource.IN_CLOUD
2233
+ };
2202
2234
  case InferenceMode.PREFER_IN_CLOUD:
2203
2235
  try {
2204
- return await inCloudCall();
2236
+ return {
2237
+ response: await inCloudCall(),
2238
+ inferenceSource: InferenceSource.IN_CLOUD
2239
+ };
2205
2240
  }
2206
2241
  catch (e) {
2207
2242
  if (e instanceof AIError && errorsCausingFallback.includes(e.code)) {
2208
- return onDeviceCall();
2243
+ return {
2244
+ response: await onDeviceCall(),
2245
+ inferenceSource: InferenceSource.ON_DEVICE
2246
+ };
2209
2247
  }
2210
2248
  throw e;
2211
2249
  }
2212
2250
  case InferenceMode.PREFER_ON_DEVICE:
2213
2251
  if (await chromeAdapter.isAvailable(request)) {
2214
- return onDeviceCall();
2252
+ return {
2253
+ response: await onDeviceCall(),
2254
+ inferenceSource: InferenceSource.ON_DEVICE
2255
+ };
2215
2256
  }
2216
- return inCloudCall();
2257
+ return {
2258
+ response: await inCloudCall(),
2259
+ inferenceSource: InferenceSource.IN_CLOUD
2260
+ };
2217
2261
  default:
2218
2262
  throw new AIError(AIErrorCode.ERROR, `Unexpected infererence mode: ${chromeAdapter.mode}`);
2219
2263
  }
@@ -2243,8 +2287,8 @@ async function generateContentStreamOnCloud(apiSettings, model, params, requestO
2243
2287
  /* stream */ true, JSON.stringify(params), requestOptions);
2244
2288
  }
2245
2289
  async function generateContentStream(apiSettings, model, params, chromeAdapter, requestOptions) {
2246
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
2247
- return processStream(response, apiSettings); // TODO: Map streaming responses
2290
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContentStream(params), () => generateContentStreamOnCloud(apiSettings, model, params, requestOptions));
2291
+ return processStream(callResult.response, apiSettings); // TODO: Map streaming responses
2248
2292
  }
2249
2293
  async function generateContentOnCloud(apiSettings, model, params, requestOptions) {
2250
2294
  if (apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
@@ -2254,9 +2298,9 @@ async function generateContentOnCloud(apiSettings, model, params, requestOptions
2254
2298
  /* stream */ false, JSON.stringify(params), requestOptions);
2255
2299
  }
2256
2300
  async function generateContent(apiSettings, model, params, chromeAdapter, requestOptions) {
2257
- const response = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
2258
- const generateContentResponse = await processGenerateContentResponse(response, apiSettings);
2259
- const enhancedResponse = createEnhancedContentResponse(generateContentResponse);
2301
+ const callResult = await callCloudOrDevice(params, chromeAdapter, () => chromeAdapter.generateContent(params), () => generateContentOnCloud(apiSettings, model, params, requestOptions));
2302
+ const generateContentResponse = await processGenerateContentResponse(callResult.response, apiSettings);
2303
+ const enhancedResponse = createEnhancedContentResponse(generateContentResponse, callResult.inferenceSource);
2260
2304
  return {
2261
2305
  response: enhancedResponse
2262
2306
  };
@@ -2830,75 +2874,104 @@ class LiveSession {
2830
2874
  this.webSocketHandler.send(JSON.stringify(message));
2831
2875
  }
2832
2876
  /**
2833
- * Sends realtime input to the server.
2877
+ * Sends text to the server in realtime.
2834
2878
  *
2835
- * @param mediaChunks - The media chunks to send.
2879
+ * @example
2880
+ * ```javascript
2881
+ * liveSession.sendTextRealtime("Hello, how are you?");
2882
+ * ```
2883
+ *
2884
+ * @param text - The text data to send.
2836
2885
  * @throws If this session has been closed.
2837
2886
  *
2838
2887
  * @beta
2839
2888
  */
2840
- async sendMediaChunks(mediaChunks) {
2889
+ async sendTextRealtime(text) {
2841
2890
  if (this.isClosed) {
2842
2891
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2843
2892
  }
2844
- // The backend does not support sending more than one mediaChunk in one message.
2845
- // Work around this limitation by sending mediaChunks in separate messages.
2846
- mediaChunks.forEach(mediaChunk => {
2847
- const message = {
2848
- realtimeInput: { mediaChunks: [mediaChunk] }
2849
- };
2850
- this.webSocketHandler.send(JSON.stringify(message));
2851
- });
2893
+ const message = {
2894
+ realtimeInput: {
2895
+ text
2896
+ }
2897
+ };
2898
+ this.webSocketHandler.send(JSON.stringify(message));
2852
2899
  }
2853
2900
  /**
2854
- * Sends function responses to the server.
2901
+ * Sends audio data to the server in realtime.
2855
2902
  *
2856
- * @param functionResponses - The function responses to send.
2903
+ * @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
2904
+ * little-endian.
2905
+ *
2906
+ * @example
2907
+ * ```javascript
2908
+ * // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
2909
+ * const blob = { mimeType: "audio/pcm", data: pcmData };
2910
+ * liveSession.sendAudioRealtime(blob);
2911
+ * ```
2912
+ *
2913
+ * @param blob - The base64-encoded PCM data to send to the server in realtime.
2857
2914
  * @throws If this session has been closed.
2858
2915
  *
2859
2916
  * @beta
2860
2917
  */
2861
- async sendFunctionResponses(functionResponses) {
2918
+ async sendAudioRealtime(blob) {
2862
2919
  if (this.isClosed) {
2863
2920
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2864
2921
  }
2865
2922
  const message = {
2866
- toolResponse: {
2867
- functionResponses
2923
+ realtimeInput: {
2924
+ audio: blob
2868
2925
  }
2869
2926
  };
2870
2927
  this.webSocketHandler.send(JSON.stringify(message));
2871
2928
  }
2872
2929
  /**
2873
- * Sends a stream of {@link GenerativeContentBlob}.
2930
+ * Sends video data to the server in realtime.
2874
2931
  *
2875
- * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2932
+ * @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
2933
+ * is recommended to set `mimeType` to `image/jpeg`.
2934
+ *
2935
+ * @example
2936
+ * ```javascript
2937
+ * // const videoFrame = ... base64-encoded JPEG data
2938
+ * const blob = { mimeType: "image/jpeg", data: videoFrame };
2939
+ * liveSession.sendVideoRealtime(blob);
2940
+ * ```
2941
+ * @param blob - The base64-encoded video data to send to the server in realtime.
2876
2942
  * @throws If this session has been closed.
2877
2943
  *
2878
2944
  * @beta
2879
2945
  */
2880
- async sendMediaStream(mediaChunkStream) {
2946
+ async sendVideoRealtime(blob) {
2881
2947
  if (this.isClosed) {
2882
2948
  throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2883
2949
  }
2884
- const reader = mediaChunkStream.getReader();
2885
- while (true) {
2886
- try {
2887
- const { done, value } = await reader.read();
2888
- if (done) {
2889
- break;
2890
- }
2891
- else if (!value) {
2892
- throw new Error('Missing chunk in reader, but reader is not done.');
2893
- }
2894
- await this.sendMediaChunks([value]);
2895
- }
2896
- catch (e) {
2897
- // Re-throw any errors that occur during stream consumption or sending.
2898
- const message = e instanceof Error ? e.message : 'Error processing media stream.';
2899
- throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2950
+ const message = {
2951
+ realtimeInput: {
2952
+ video: blob
2900
2953
  }
2954
+ };
2955
+ this.webSocketHandler.send(JSON.stringify(message));
2956
+ }
2957
+ /**
2958
+ * Sends function responses to the server.
2959
+ *
2960
+ * @param functionResponses - The function responses to send.
2961
+ * @throws If this session has been closed.
2962
+ *
2963
+ * @beta
2964
+ */
2965
+ async sendFunctionResponses(functionResponses) {
2966
+ if (this.isClosed) {
2967
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2901
2968
  }
2969
+ const message = {
2970
+ toolResponse: {
2971
+ functionResponses
2972
+ }
2973
+ };
2974
+ this.webSocketHandler.send(JSON.stringify(message));
2902
2975
  }
2903
2976
  /**
2904
2977
  * Yields messages received from the server.
@@ -2956,6 +3029,62 @@ class LiveSession {
2956
3029
  await this.webSocketHandler.close(1000, 'Client closed session.');
2957
3030
  }
2958
3031
  }
3032
+ /**
3033
+ * Sends realtime input to the server.
3034
+ *
3035
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
3036
+ *
3037
+ * @param mediaChunks - The media chunks to send.
3038
+ * @throws If this session has been closed.
3039
+ *
3040
+ * @beta
3041
+ */
3042
+ async sendMediaChunks(mediaChunks) {
3043
+ if (this.isClosed) {
3044
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
3045
+ }
3046
+ // The backend does not support sending more than one mediaChunk in one message.
3047
+ // Work around this limitation by sending mediaChunks in separate messages.
3048
+ mediaChunks.forEach(mediaChunk => {
3049
+ const message = {
3050
+ realtimeInput: { mediaChunks: [mediaChunk] }
3051
+ };
3052
+ this.webSocketHandler.send(JSON.stringify(message));
3053
+ });
3054
+ }
3055
+ /**
3056
+ * @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
3057
+ *
3058
+ * Sends a stream of {@link GenerativeContentBlob}.
3059
+ *
3060
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
3061
+ * @throws If this session has been closed.
3062
+ *
3063
+ * @beta
3064
+ */
3065
+ async sendMediaStream(mediaChunkStream) {
3066
+ if (this.isClosed) {
3067
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
3068
+ }
3069
+ const reader = mediaChunkStream.getReader();
3070
+ while (true) {
3071
+ try {
3072
+ const { done, value } = await reader.read();
3073
+ if (done) {
3074
+ break;
3075
+ }
3076
+ else if (!value) {
3077
+ throw new Error('Missing chunk in reader, but reader is not done.');
3078
+ }
3079
+ await this.sendMediaChunks([value]);
3080
+ }
3081
+ catch (e) {
3082
+ // Re-throw any errors that occur during stream consumption or sending.
3083
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
3084
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
3085
+ }
3086
+ }
3087
+ }
2959
3088
  }
2960
3089
 
2961
3090
  /**
@@ -3016,13 +3145,18 @@ class LiveGenerativeModel extends AIModel {
3016
3145
  else {
3017
3146
  fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
3018
3147
  }
3148
+ // inputAudioTranscription and outputAudioTranscription are on the generation config in the public API,
3149
+ // but the backend expects them to be in the `setup` message.
3150
+ const { inputAudioTranscription, outputAudioTranscription, ...generationConfig } = this.generationConfig;
3019
3151
  const setupMessage = {
3020
3152
  setup: {
3021
3153
  model: fullModelPath,
3022
- generationConfig: this.generationConfig,
3154
+ generationConfig,
3023
3155
  tools: this.tools,
3024
3156
  toolConfig: this.toolConfig,
3025
- systemInstruction: this.systemInstruction
3157
+ systemInstruction: this.systemInstruction,
3158
+ inputAudioTranscription,
3159
+ outputAudioTranscription
3026
3160
  }
3027
3161
  };
3028
3162
  try {
@@ -3728,7 +3862,7 @@ class AudioConversationRunner {
3728
3862
  mimeType: 'audio/pcm',
3729
3863
  data: base64
3730
3864
  };
3731
- void this.liveSession.sendMediaChunks([chunk]);
3865
+ void this.liveSession.sendAudioRealtime(chunk);
3732
3866
  };
3733
3867
  }
3734
3868
  /**
@@ -4138,6 +4272,7 @@ exports.ImagenModel = ImagenModel;
4138
4272
  exports.ImagenPersonFilterLevel = ImagenPersonFilterLevel;
4139
4273
  exports.ImagenSafetyFilterLevel = ImagenSafetyFilterLevel;
4140
4274
  exports.InferenceMode = InferenceMode;
4275
+ exports.InferenceSource = InferenceSource;
4141
4276
  exports.IntegerSchema = IntegerSchema;
4142
4277
  exports.Language = Language;
4143
4278
  exports.LiveGenerativeModel = LiveGenerativeModel;