@firebase/ai 2.1.0 → 2.2.0-canary.095c098de

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/ai-public.d.ts +443 -10
  2. package/dist/ai.d.ts +525 -11
  3. package/dist/esm/index.esm.js +1255 -368
  4. package/dist/esm/index.esm.js.map +1 -1
  5. package/dist/esm/src/api.d.ts +18 -3
  6. package/dist/esm/src/constants.d.ts +1 -1
  7. package/dist/esm/src/index.d.ts +2 -1
  8. package/dist/esm/src/methods/chrome-adapter.d.ts +30 -24
  9. package/dist/esm/src/methods/live-session-helpers.d.ts +154 -0
  10. package/dist/esm/src/methods/live-session.d.ts +90 -0
  11. package/dist/esm/src/models/ai-model.d.ts +1 -1
  12. package/dist/esm/src/models/index.d.ts +1 -0
  13. package/dist/esm/src/models/live-generative-model.d.ts +55 -0
  14. package/dist/esm/src/public-types.d.ts +10 -1
  15. package/dist/esm/src/requests/request.d.ts +6 -0
  16. package/dist/esm/src/requests/response-helpers.d.ts +9 -5
  17. package/dist/esm/src/service.d.ts +7 -2
  18. package/dist/esm/src/types/chrome-adapter.d.ts +6 -4
  19. package/dist/esm/src/types/content.d.ts +42 -0
  20. package/dist/esm/src/types/enums.d.ts +5 -0
  21. package/dist/esm/src/types/error.d.ts +2 -0
  22. package/dist/esm/src/types/imagen/internal.d.ts +10 -0
  23. package/dist/esm/src/types/live-responses.d.ts +53 -0
  24. package/dist/esm/src/types/requests.d.ts +109 -1
  25. package/dist/esm/src/types/responses.d.ts +87 -4
  26. package/dist/esm/src/websocket.d.ts +67 -0
  27. package/dist/index.cjs.js +1258 -366
  28. package/dist/index.cjs.js.map +1 -1
  29. package/dist/index.node.cjs.js +907 -311
  30. package/dist/index.node.cjs.js.map +1 -1
  31. package/dist/index.node.mjs +904 -313
  32. package/dist/index.node.mjs.map +1 -1
  33. package/dist/src/api.d.ts +18 -3
  34. package/dist/src/constants.d.ts +1 -1
  35. package/dist/src/index.d.ts +2 -1
  36. package/dist/src/methods/chrome-adapter.d.ts +30 -24
  37. package/dist/src/methods/live-session-helpers.d.ts +154 -0
  38. package/dist/src/methods/live-session.d.ts +90 -0
  39. package/dist/src/models/ai-model.d.ts +1 -1
  40. package/dist/src/models/index.d.ts +1 -0
  41. package/dist/src/models/live-generative-model.d.ts +55 -0
  42. package/dist/src/public-types.d.ts +10 -1
  43. package/dist/src/requests/request.d.ts +6 -0
  44. package/dist/src/requests/response-helpers.d.ts +9 -5
  45. package/dist/src/service.d.ts +7 -2
  46. package/dist/src/types/chrome-adapter.d.ts +6 -4
  47. package/dist/src/types/content.d.ts +42 -0
  48. package/dist/src/types/enums.d.ts +5 -0
  49. package/dist/src/types/error.d.ts +2 -0
  50. package/dist/src/types/imagen/internal.d.ts +10 -0
  51. package/dist/src/types/live-responses.d.ts +53 -0
  52. package/dist/src/types/requests.d.ts +109 -1
  53. package/dist/src/types/responses.d.ts +87 -4
  54. package/dist/src/websocket.d.ts +67 -0
  55. package/package.json +10 -8
package/dist/index.cjs.js CHANGED
@@ -8,7 +8,7 @@ var util = require('@firebase/util');
8
8
  var logger$1 = require('@firebase/logger');
9
9
 
10
10
  var name = "@firebase/ai";
11
- var version = "2.1.0";
11
+ var version = "2.2.0-canary.095c098de";
12
12
 
13
13
  /**
14
14
  * @license
@@ -28,7 +28,7 @@ var version = "2.1.0";
28
28
  */
29
29
  const AI_TYPE = 'AI';
30
30
  const DEFAULT_LOCATION = 'us-central1';
31
- const DEFAULT_BASE_URL = 'https://firebasevertexai.googleapis.com';
31
+ const DEFAULT_DOMAIN = 'firebasevertexai.googleapis.com';
32
32
  const DEFAULT_API_VERSION = 'v1beta';
33
33
  const PACKAGE_VERSION = version;
34
34
  const LANGUAGE_TAG = 'gl-js';
@@ -293,7 +293,12 @@ const ResponseModality = {
293
293
  * Image.
294
294
  * @beta
295
295
  */
296
- IMAGE: 'IMAGE'
296
+ IMAGE: 'IMAGE',
297
+ /**
298
+ * Audio.
299
+ * @beta
300
+ */
301
+ AUDIO: 'AUDIO'
297
302
  };
298
303
  /**
299
304
  * <b>(EXPERIMENTAL)</b>
@@ -306,6 +311,33 @@ const InferenceMode = {
306
311
  'ONLY_IN_CLOUD': 'only_in_cloud'
307
312
  };
308
313
 
314
+ /**
315
+ * @license
316
+ * Copyright 2024 Google LLC
317
+ *
318
+ * Licensed under the Apache License, Version 2.0 (the "License");
319
+ * you may not use this file except in compliance with the License.
320
+ * You may obtain a copy of the License at
321
+ *
322
+ * http://www.apache.org/licenses/LICENSE-2.0
323
+ *
324
+ * Unless required by applicable law or agreed to in writing, software
325
+ * distributed under the License is distributed on an "AS IS" BASIS,
326
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
327
+ * See the License for the specific language governing permissions and
328
+ * limitations under the License.
329
+ */
330
+ /**
331
+ * The types of responses that can be returned by {@link LiveSession.receive}.
332
+ *
333
+ * @beta
334
+ */
335
+ const LiveResponseType = {
336
+ SERVER_CONTENT: 'serverContent',
337
+ TOOL_CALL: 'toolCall',
338
+ TOOL_CALL_CANCELLATION: 'toolCallCancellation'
339
+ };
340
+
309
341
  /**
310
342
  * @license
311
343
  * Copyright 2024 Google LLC
@@ -336,6 +368,8 @@ const AIErrorCode = {
336
368
  RESPONSE_ERROR: 'response-error',
337
369
  /** An error occurred while performing a fetch. */
338
370
  FETCH_ERROR: 'fetch-error',
371
+ /** An error occurred because an operation was attempted on a closed session. */
372
+ SESSION_CLOSED: 'session-closed',
339
373
  /** An error associated with a Content object. */
340
374
  INVALID_CONTENT: 'invalid-content',
341
375
  /** An error due to the Firebase API not being enabled in the Console. */
@@ -640,9 +674,10 @@ class VertexAIBackend extends Backend {
640
674
  * limitations under the License.
641
675
  */
642
676
  class AIService {
643
- constructor(app, backend, authProvider, appCheckProvider) {
677
+ constructor(app, backend, authProvider, appCheckProvider, chromeAdapterFactory) {
644
678
  this.app = app;
645
679
  this.backend = backend;
680
+ this.chromeAdapterFactory = chromeAdapterFactory;
646
681
  const appCheck = appCheckProvider?.getImmediate({ optional: true });
647
682
  const auth = authProvider?.getImmediate({ optional: true });
648
683
  this.auth = auth || null;
@@ -657,6 +692,12 @@ class AIService {
657
692
  _delete() {
658
693
  return Promise.resolve();
659
694
  }
695
+ set options(optionsToSet) {
696
+ this._options = optionsToSet;
697
+ }
698
+ get options() {
699
+ return this._options;
700
+ }
660
701
  }
661
702
 
662
703
  /**
@@ -841,7 +882,12 @@ class AIModel {
841
882
  };
842
883
  }
843
884
  else if (ai.appCheck) {
844
- this._apiSettings.getAppCheckToken = () => ai.appCheck.getToken();
885
+ if (ai.options?.useLimitedUseAppCheckTokens) {
886
+ this._apiSettings.getAppCheckToken = () => ai.appCheck.getLimitedUseToken();
887
+ }
888
+ else {
889
+ this._apiSettings.getAppCheckToken = () => ai.appCheck.getToken();
890
+ }
845
891
  }
846
892
  if (ai.auth) {
847
893
  this._apiSettings.getAuthToken = () => ai.auth.getToken();
@@ -950,7 +996,7 @@ class RequestUrl {
950
996
  return url.toString();
951
997
  }
952
998
  get baseUrl() {
953
- return this.requestOptions?.baseUrl || DEFAULT_BASE_URL;
999
+ return this.requestOptions?.baseUrl || `https://${DEFAULT_DOMAIN}`;
954
1000
  }
955
1001
  get apiVersion() {
956
1002
  return DEFAULT_API_VERSION; // TODO: allow user-set options if that feature becomes available
@@ -974,6 +1020,27 @@ class RequestUrl {
974
1020
  return params;
975
1021
  }
976
1022
  }
1023
+ class WebSocketUrl {
1024
+ constructor(apiSettings) {
1025
+ this.apiSettings = apiSettings;
1026
+ }
1027
+ toString() {
1028
+ const url = new URL(`wss://${DEFAULT_DOMAIN}`);
1029
+ url.pathname = this.pathname;
1030
+ const queryParams = new URLSearchParams();
1031
+ queryParams.set('key', this.apiSettings.apiKey);
1032
+ url.search = queryParams.toString();
1033
+ return url.toString();
1034
+ }
1035
+ get pathname() {
1036
+ if (this.apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1037
+ return 'ws/google.firebase.vertexai.v1beta.GenerativeService/BidiGenerateContent';
1038
+ }
1039
+ else {
1040
+ return `ws/google.firebase.vertexai.v1beta.LlmBidiService/BidiGenerateContent/locations/${this.apiSettings.location}`;
1041
+ }
1042
+ }
1043
+ }
977
1044
  /**
978
1045
  * Log language and "fire/version" to x-goog-api-client
979
1046
  */
@@ -1104,6 +1171,28 @@ async function makeRequest(model, task, apiSettings, stream, body, requestOption
1104
1171
  * See the License for the specific language governing permissions and
1105
1172
  * limitations under the License.
1106
1173
  */
1174
+ /**
1175
+ * Check that at least one candidate exists and does not have a bad
1176
+ * finish reason. Warns if multiple candidates exist.
1177
+ */
1178
+ function hasValidCandidates(response) {
1179
+ if (response.candidates && response.candidates.length > 0) {
1180
+ if (response.candidates.length > 1) {
1181
+ logger.warn(`This response had ${response.candidates.length} ` +
1182
+ `candidates. Returning text from the first candidate only. ` +
1183
+ `Access response.candidates directly to use the other candidates.`);
1184
+ }
1185
+ if (hadBadFinishReason(response.candidates[0])) {
1186
+ throw new AIError(AIErrorCode.RESPONSE_ERROR, `Response error: ${formatBlockErrorMessage(response)}. Response body stored in error.response`, {
1187
+ response
1188
+ });
1189
+ }
1190
+ return true;
1191
+ }
1192
+ else {
1193
+ return false;
1194
+ }
1195
+ }
1107
1196
  /**
1108
1197
  * Creates an EnhancedGenerateContentResponse object that has helper functions and
1109
1198
  * other modifications that improve usability.
@@ -1127,18 +1216,8 @@ function createEnhancedContentResponse(response) {
1127
1216
  */
1128
1217
  function addHelpers(response) {
1129
1218
  response.text = () => {
1130
- if (response.candidates && response.candidates.length > 0) {
1131
- if (response.candidates.length > 1) {
1132
- logger.warn(`This response had ${response.candidates.length} ` +
1133
- `candidates. Returning text from the first candidate only. ` +
1134
- `Access response.candidates directly to use the other candidates.`);
1135
- }
1136
- if (hadBadFinishReason(response.candidates[0])) {
1137
- throw new AIError(AIErrorCode.RESPONSE_ERROR, `Response error: ${formatBlockErrorMessage(response)}. Response body stored in error.response`, {
1138
- response
1139
- });
1140
- }
1141
- return getText(response);
1219
+ if (hasValidCandidates(response)) {
1220
+ return getText(response, part => !part.thought);
1142
1221
  }
1143
1222
  else if (response.promptFeedback) {
1144
1223
  throw new AIError(AIErrorCode.RESPONSE_ERROR, `Text not available. ${formatBlockErrorMessage(response)}`, {
@@ -1147,18 +1226,20 @@ function addHelpers(response) {
1147
1226
  }
1148
1227
  return '';
1149
1228
  };
1229
+ response.thoughtSummary = () => {
1230
+ if (hasValidCandidates(response)) {
1231
+ const result = getText(response, part => !!part.thought);
1232
+ return result === '' ? undefined : result;
1233
+ }
1234
+ else if (response.promptFeedback) {
1235
+ throw new AIError(AIErrorCode.RESPONSE_ERROR, `Thought summary not available. ${formatBlockErrorMessage(response)}`, {
1236
+ response
1237
+ });
1238
+ }
1239
+ return undefined;
1240
+ };
1150
1241
  response.inlineDataParts = () => {
1151
- if (response.candidates && response.candidates.length > 0) {
1152
- if (response.candidates.length > 1) {
1153
- logger.warn(`This response had ${response.candidates.length} ` +
1154
- `candidates. Returning data from the first candidate only. ` +
1155
- `Access response.candidates directly to use the other candidates.`);
1156
- }
1157
- if (hadBadFinishReason(response.candidates[0])) {
1158
- throw new AIError(AIErrorCode.RESPONSE_ERROR, `Response error: ${formatBlockErrorMessage(response)}. Response body stored in error.response`, {
1159
- response
1160
- });
1161
- }
1242
+ if (hasValidCandidates(response)) {
1162
1243
  return getInlineDataParts(response);
1163
1244
  }
1164
1245
  else if (response.promptFeedback) {
@@ -1169,17 +1250,7 @@ function addHelpers(response) {
1169
1250
  return undefined;
1170
1251
  };
1171
1252
  response.functionCalls = () => {
1172
- if (response.candidates && response.candidates.length > 0) {
1173
- if (response.candidates.length > 1) {
1174
- logger.warn(`This response had ${response.candidates.length} ` +
1175
- `candidates. Returning function calls from the first candidate only. ` +
1176
- `Access response.candidates directly to use the other candidates.`);
1177
- }
1178
- if (hadBadFinishReason(response.candidates[0])) {
1179
- throw new AIError(AIErrorCode.RESPONSE_ERROR, `Response error: ${formatBlockErrorMessage(response)}. Response body stored in error.response`, {
1180
- response
1181
- });
1182
- }
1253
+ if (hasValidCandidates(response)) {
1183
1254
  return getFunctionCalls(response);
1184
1255
  }
1185
1256
  else if (response.promptFeedback) {
@@ -1192,13 +1263,17 @@ function addHelpers(response) {
1192
1263
  return response;
1193
1264
  }
1194
1265
  /**
1195
- * Returns all text found in all parts of first candidate.
1266
+ * Returns all text from the first candidate's parts, filtering by whether
1267
+ * `partFilter()` returns true.
1268
+ *
1269
+ * @param response - The `GenerateContentResponse` from which to extract text.
1270
+ * @param partFilter - Only return `Part`s for which this returns true
1196
1271
  */
1197
- function getText(response) {
1272
+ function getText(response, partFilter) {
1198
1273
  const textStrings = [];
1199
1274
  if (response.candidates?.[0].content?.parts) {
1200
1275
  for (const part of response.candidates?.[0].content?.parts) {
1201
- if (part.text) {
1276
+ if (part.text && partFilter(part)) {
1202
1277
  textStrings.push(part.text);
1203
1278
  }
1204
1279
  }
@@ -1211,7 +1286,7 @@ function getText(response) {
1211
1286
  }
1212
1287
  }
1213
1288
  /**
1214
- * Returns {@link FunctionCall}s associated with first candidate.
1289
+ * Returns every {@link FunctionCall} associated with first candidate.
1215
1290
  */
1216
1291
  function getFunctionCalls(response) {
1217
1292
  const functionCalls = [];
@@ -1230,7 +1305,7 @@ function getFunctionCalls(response) {
1230
1305
  }
1231
1306
  }
1232
1307
  /**
1233
- * Returns {@link InlineDataPart}s in the first candidate if present.
1308
+ * Returns every {@link InlineDataPart} in the first candidate if present.
1234
1309
  *
1235
1310
  * @internal
1236
1311
  */
@@ -1309,8 +1384,9 @@ async function handlePredictResponse(response) {
1309
1384
  gcsURI: prediction.gcsUri
1310
1385
  });
1311
1386
  }
1387
+ else if (prediction.safetyAttributes) ;
1312
1388
  else {
1313
- throw new AIError(AIErrorCode.RESPONSE_ERROR, `Predictions array in response has missing properties. Response: ${JSON.stringify(responseJson)}`);
1389
+ throw new AIError(AIErrorCode.RESPONSE_ERROR, `Unexpected element in 'predictions' array in response: '${JSON.stringify(prediction)}'`);
1314
1390
  }
1315
1391
  }
1316
1392
  return { images, filteredReason };
@@ -1851,7 +1927,8 @@ function createPredictRequestBody(prompt, { gcsURI, imageFormat, addWatermark, n
1851
1927
  addWatermark,
1852
1928
  safetyFilterLevel,
1853
1929
  personGeneration: personFilterLevel,
1854
- includeRaiReason: true
1930
+ includeRaiReason: true,
1931
+ includeSafetyAttributes: true
1855
1932
  }
1856
1933
  };
1857
1934
  return body;
@@ -1878,12 +1955,14 @@ const VALID_PART_FIELDS = [
1878
1955
  'text',
1879
1956
  'inlineData',
1880
1957
  'functionCall',
1881
- 'functionResponse'
1958
+ 'functionResponse',
1959
+ 'thought',
1960
+ 'thoughtSignature'
1882
1961
  ];
1883
1962
  const VALID_PARTS_PER_ROLE = {
1884
1963
  user: ['text', 'inlineData'],
1885
1964
  function: ['functionResponse'],
1886
- model: ['text', 'functionCall'],
1965
+ model: ['text', 'functionCall', 'thought', 'thoughtSignature'],
1887
1966
  // System instructions shouldn't be in history anyway.
1888
1967
  system: ['text']
1889
1968
  };
@@ -1905,7 +1984,7 @@ function validateChatHistory(history) {
1905
1984
  throw new AIError(AIErrorCode.INVALID_CONTENT, `Each item should include role field. Got ${role} but valid roles are: ${JSON.stringify(POSSIBLE_ROLES)}`);
1906
1985
  }
1907
1986
  if (!Array.isArray(parts)) {
1908
- throw new AIError(AIErrorCode.INVALID_CONTENT, `Content should have 'parts' but property with an array of Parts`);
1987
+ throw new AIError(AIErrorCode.INVALID_CONTENT, `Content should have 'parts' property with an array of Parts`);
1909
1988
  }
1910
1989
  if (parts.length === 0) {
1911
1990
  throw new AIError(AIErrorCode.INVALID_CONTENT, `Each Content should have at least one part`);
@@ -1914,7 +1993,9 @@ function validateChatHistory(history) {
1914
1993
  text: 0,
1915
1994
  inlineData: 0,
1916
1995
  functionCall: 0,
1917
- functionResponse: 0
1996
+ functionResponse: 0,
1997
+ thought: 0,
1998
+ thoughtSignature: 0
1918
1999
  };
1919
2000
  for (const part of parts) {
1920
2001
  for (const key of VALID_PART_FIELDS) {
@@ -2212,6 +2293,270 @@ class GenerativeModel extends AIModel {
2212
2293
  }
2213
2294
  }
2214
2295
 
2296
+ /**
2297
+ * @license
2298
+ * Copyright 2025 Google LLC
2299
+ *
2300
+ * Licensed under the Apache License, Version 2.0 (the "License");
2301
+ * you may not use this file except in compliance with the License.
2302
+ * You may obtain a copy of the License at
2303
+ *
2304
+ * http://www.apache.org/licenses/LICENSE-2.0
2305
+ *
2306
+ * Unless required by applicable law or agreed to in writing, software
2307
+ * distributed under the License is distributed on an "AS IS" BASIS,
2308
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2309
+ * See the License for the specific language governing permissions and
2310
+ * limitations under the License.
2311
+ */
2312
+ /**
2313
+ * Represents an active, real-time, bidirectional conversation with the model.
2314
+ *
2315
+ * This class should only be instantiated by calling {@link LiveGenerativeModel.connect}.
2316
+ *
2317
+ * @beta
2318
+ */
2319
+ class LiveSession {
2320
+ /**
2321
+ * @internal
2322
+ */
2323
+ constructor(webSocketHandler, serverMessages) {
2324
+ this.webSocketHandler = webSocketHandler;
2325
+ this.serverMessages = serverMessages;
2326
+ /**
2327
+ * Indicates whether this Live session is closed.
2328
+ *
2329
+ * @beta
2330
+ */
2331
+ this.isClosed = false;
2332
+ /**
2333
+ * Indicates whether this Live session is being controlled by an `AudioConversationController`.
2334
+ *
2335
+ * @beta
2336
+ */
2337
+ this.inConversation = false;
2338
+ }
2339
+ /**
2340
+ * Sends content to the server.
2341
+ *
2342
+ * @param request - The message to send to the model.
2343
+ * @param turnComplete - Indicates if the turn is complete. Defaults to false.
2344
+ * @throws If this session has been closed.
2345
+ *
2346
+ * @beta
2347
+ */
2348
+ async send(request, turnComplete = true) {
2349
+ if (this.isClosed) {
2350
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2351
+ }
2352
+ const newContent = formatNewContent(request);
2353
+ const message = {
2354
+ clientContent: {
2355
+ turns: [newContent],
2356
+ turnComplete
2357
+ }
2358
+ };
2359
+ this.webSocketHandler.send(JSON.stringify(message));
2360
+ }
2361
+ /**
2362
+ * Sends realtime input to the server.
2363
+ *
2364
+ * @param mediaChunks - The media chunks to send.
2365
+ * @throws If this session has been closed.
2366
+ *
2367
+ * @beta
2368
+ */
2369
+ async sendMediaChunks(mediaChunks) {
2370
+ if (this.isClosed) {
2371
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2372
+ }
2373
+ // The backend does not support sending more than one mediaChunk in one message.
2374
+ // Work around this limitation by sending mediaChunks in separate messages.
2375
+ mediaChunks.forEach(mediaChunk => {
2376
+ const message = {
2377
+ realtimeInput: { mediaChunks: [mediaChunk] }
2378
+ };
2379
+ this.webSocketHandler.send(JSON.stringify(message));
2380
+ });
2381
+ }
2382
+ /**
2383
+ * Sends a stream of {@link GenerativeContentBlob}.
2384
+ *
2385
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2386
+ * @throws If this session has been closed.
2387
+ *
2388
+ * @beta
2389
+ */
2390
+ async sendMediaStream(mediaChunkStream) {
2391
+ if (this.isClosed) {
2392
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2393
+ }
2394
+ const reader = mediaChunkStream.getReader();
2395
+ while (true) {
2396
+ try {
2397
+ const { done, value } = await reader.read();
2398
+ if (done) {
2399
+ break;
2400
+ }
2401
+ else if (!value) {
2402
+ throw new Error('Missing chunk in reader, but reader is not done.');
2403
+ }
2404
+ await this.sendMediaChunks([value]);
2405
+ }
2406
+ catch (e) {
2407
+ // Re-throw any errors that occur during stream consumption or sending.
2408
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
2409
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2410
+ }
2411
+ }
2412
+ }
2413
+ /**
2414
+ * Yields messages received from the server.
2415
+ * This can only be used by one consumer at a time.
2416
+ *
2417
+ * @returns An `AsyncGenerator` that yields server messages as they arrive.
2418
+ * @throws If the session is already closed, or if we receive a response that we don't support.
2419
+ *
2420
+ * @beta
2421
+ */
2422
+ async *receive() {
2423
+ if (this.isClosed) {
2424
+ throw new AIError(AIErrorCode.SESSION_CLOSED, 'Cannot read from a Live session that is closed. Try starting a new Live session.');
2425
+ }
2426
+ for await (const message of this.serverMessages) {
2427
+ if (message && typeof message === 'object') {
2428
+ if (LiveResponseType.SERVER_CONTENT in message) {
2429
+ yield {
2430
+ type: 'serverContent',
2431
+ ...message
2432
+ .serverContent
2433
+ };
2434
+ }
2435
+ else if (LiveResponseType.TOOL_CALL in message) {
2436
+ yield {
2437
+ type: 'toolCall',
2438
+ ...message
2439
+ .toolCall
2440
+ };
2441
+ }
2442
+ else if (LiveResponseType.TOOL_CALL_CANCELLATION in message) {
2443
+ yield {
2444
+ type: 'toolCallCancellation',
2445
+ ...message.toolCallCancellation
2446
+ };
2447
+ }
2448
+ else {
2449
+ logger.warn(`Received an unknown message type from the server: ${JSON.stringify(message)}`);
2450
+ }
2451
+ }
2452
+ else {
2453
+ logger.warn(`Received an invalid message from the server: ${JSON.stringify(message)}`);
2454
+ }
2455
+ }
2456
+ }
2457
+ /**
2458
+ * Closes this session.
2459
+ * All methods on this session will throw an error once this resolves.
2460
+ *
2461
+ * @beta
2462
+ */
2463
+ async close() {
2464
+ if (!this.isClosed) {
2465
+ this.isClosed = true;
2466
+ await this.webSocketHandler.close(1000, 'Client closed session.');
2467
+ }
2468
+ }
2469
+ }
2470
+
2471
+ /**
2472
+ * @license
2473
+ * Copyright 2025 Google LLC
2474
+ *
2475
+ * Licensed under the Apache License, Version 2.0 (the "License");
2476
+ * you may not use this file except in compliance with the License.
2477
+ * You may obtain a copy of the License at
2478
+ *
2479
+ * http://www.apache.org/licenses/LICENSE-2.0
2480
+ *
2481
+ * Unless required by applicable law or agreed to in writing, software
2482
+ * distributed under the License is distributed on an "AS IS" BASIS,
2483
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2484
+ * See the License for the specific language governing permissions and
2485
+ * limitations under the License.
2486
+ */
2487
+ /**
2488
+ * Class for Live generative model APIs. The Live API enables low-latency, two-way multimodal
2489
+ * interactions with Gemini.
2490
+ *
2491
+ * This class should only be instantiated with {@link getLiveGenerativeModel}.
2492
+ *
2493
+ * @beta
2494
+ */
2495
+ class LiveGenerativeModel extends AIModel {
2496
+ /**
2497
+ * @internal
2498
+ */
2499
+ constructor(ai, modelParams,
2500
+ /**
2501
+ * @internal
2502
+ */
2503
+ _webSocketHandler) {
2504
+ super(ai, modelParams.model);
2505
+ this._webSocketHandler = _webSocketHandler;
2506
+ this.generationConfig = modelParams.generationConfig || {};
2507
+ this.tools = modelParams.tools;
2508
+ this.toolConfig = modelParams.toolConfig;
2509
+ this.systemInstruction = formatSystemInstruction(modelParams.systemInstruction);
2510
+ }
2511
+ /**
2512
+ * Starts a {@link LiveSession}.
2513
+ *
2514
+ * @returns A {@link LiveSession}.
2515
+ * @throws If the connection failed to be established with the server.
2516
+ *
2517
+ * @beta
2518
+ */
2519
+ async connect() {
2520
+ const url = new WebSocketUrl(this._apiSettings);
2521
+ await this._webSocketHandler.connect(url.toString());
2522
+ let fullModelPath;
2523
+ if (this._apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
2524
+ fullModelPath = `projects/${this._apiSettings.project}/${this.model}`;
2525
+ }
2526
+ else {
2527
+ fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
2528
+ }
2529
+ const setupMessage = {
2530
+ setup: {
2531
+ model: fullModelPath,
2532
+ generationConfig: this.generationConfig,
2533
+ tools: this.tools,
2534
+ toolConfig: this.toolConfig,
2535
+ systemInstruction: this.systemInstruction
2536
+ }
2537
+ };
2538
+ try {
2539
+ // Begin listening for server messages, and begin the handshake by sending the 'setupMessage'
2540
+ const serverMessages = this._webSocketHandler.listen();
2541
+ this._webSocketHandler.send(JSON.stringify(setupMessage));
2542
+ // Verify we received the handshake response 'setupComplete'
2543
+ const firstMessage = (await serverMessages.next()).value;
2544
+ if (!firstMessage ||
2545
+ !(typeof firstMessage === 'object') ||
2546
+ !('setupComplete' in firstMessage)) {
2547
+ await this._webSocketHandler.close(1011, 'Handshake failure');
2548
+ throw new AIError(AIErrorCode.RESPONSE_ERROR, 'Server connection handshake failed. The server did not respond with a setupComplete message.');
2549
+ }
2550
+ return new LiveSession(this._webSocketHandler, serverMessages);
2551
+ }
2552
+ catch (e) {
2553
+ // Ensure connection is closed on any setup error
2554
+ await this._webSocketHandler.close();
2555
+ throw e;
2556
+ }
2557
+ }
2558
+ }
2559
+
2215
2560
  /**
2216
2561
  * @license
2217
2562
  * Copyright 2025 Google LLC
@@ -2326,17 +2671,6 @@ class ImagenModel extends AIModel {
2326
2671
  }
2327
2672
  }
2328
2673
 
2329
- /**
2330
- * @internal
2331
- */
2332
- var Availability;
2333
- (function (Availability) {
2334
- Availability["UNAVAILABLE"] = "unavailable";
2335
- Availability["DOWNLOADABLE"] = "downloadable";
2336
- Availability["DOWNLOADING"] = "downloading";
2337
- Availability["AVAILABLE"] = "available";
2338
- })(Availability || (Availability = {}));
2339
-
2340
2674
  /**
2341
2675
  * @license
2342
2676
  * Copyright 2025 Google LLC
@@ -2354,261 +2688,135 @@ var Availability;
2354
2688
  * limitations under the License.
2355
2689
  */
2356
2690
  /**
2357
- * Defines an inference "backend" that uses Chrome's on-device model,
2358
- * and encapsulates logic for detecting when on-device inference is
2359
- * possible.
2691
+ * A wrapper for the native `WebSocket` available in both Browsers and Node >= 22.
2692
+ *
2693
+ * @internal
2360
2694
  */
2361
- class ChromeAdapterImpl {
2362
- constructor(languageModelProvider, mode, onDeviceParams = {
2363
- createOptions: {
2364
- // Defaults to support image inputs for convenience.
2365
- expectedInputs: [{ type: 'image' }]
2366
- }
2367
- }) {
2368
- this.languageModelProvider = languageModelProvider;
2369
- this.mode = mode;
2370
- this.onDeviceParams = onDeviceParams;
2371
- this.isDownloading = false;
2695
+ class WebSocketHandlerImpl {
2696
+ constructor() {
2697
+ if (typeof WebSocket === 'undefined') {
2698
+ throw new AIError(AIErrorCode.UNSUPPORTED, 'The WebSocket API is not available in this environment. ' +
2699
+ 'The "Live" feature is not supported here. It is supported in ' +
2700
+ 'modern browser windows, Web Workers with WebSocket support, and Node >= 22.');
2701
+ }
2702
+ }
2703
+ connect(url) {
2704
+ return new Promise((resolve, reject) => {
2705
+ this.ws = new WebSocket(url);
2706
+ this.ws.binaryType = 'blob'; // Only important to set in Node
2707
+ this.ws.addEventListener('open', () => resolve(), { once: true });
2708
+ this.ws.addEventListener('error', () => reject(new AIError(AIErrorCode.FETCH_ERROR, `Error event raised on WebSocket`)), { once: true });
2709
+ this.ws.addEventListener('close', (closeEvent) => {
2710
+ if (closeEvent.reason) {
2711
+ logger.warn(`WebSocket connection closed by server. Reason: '${closeEvent.reason}'`);
2712
+ }
2713
+ });
2714
+ });
2372
2715
  }
2373
- /**
2374
- * Checks if a given request can be made on-device.
2375
- *
2376
- * <ol>Encapsulates a few concerns:
2377
- * <li>the mode</li>
2378
- * <li>API existence</li>
2379
- * <li>prompt formatting</li>
2380
- * <li>model availability, including triggering download if necessary</li>
2381
- * </ol>
2382
- *
2383
- * <p>Pros: callers needn't be concerned with details of on-device availability.</p>
2384
- * <p>Cons: this method spans a few concerns and splits request validation from usage.
2385
- * If instance variables weren't already part of the API, we could consider a better
2386
- * separation of concerns.</p>
2387
- */
2388
- async isAvailable(request) {
2389
- if (!this.mode) {
2390
- logger.debug(`On-device inference unavailable because mode is undefined.`);
2391
- return false;
2716
+ send(data) {
2717
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
2718
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'WebSocket is not open.');
2392
2719
  }
2393
- if (this.mode === InferenceMode.ONLY_IN_CLOUD) {
2394
- logger.debug(`On-device inference unavailable because mode is "only_in_cloud".`);
2395
- return false;
2720
+ this.ws.send(data);
2721
+ }
2722
+ async *listen() {
2723
+ if (!this.ws) {
2724
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'WebSocket is not connected.');
2396
2725
  }
2397
- // Triggers out-of-band download so model will eventually become available.
2398
- const availability = await this.downloadIfAvailable();
2399
- if (this.mode === InferenceMode.ONLY_ON_DEVICE) {
2400
- // If it will never be available due to API inavailability, throw.
2401
- if (availability === Availability.UNAVAILABLE) {
2402
- throw new AIError(AIErrorCode.API_NOT_ENABLED, 'Local LanguageModel API not available in this environment.');
2726
+ const messageQueue = [];
2727
+ const errorQueue = [];
2728
+ let resolvePromise = null;
2729
+ let isClosed = false;
2730
+ const messageListener = async (event) => {
2731
+ let data;
2732
+ if (event.data instanceof Blob) {
2733
+ data = await event.data.text();
2403
2734
  }
2404
- else if (availability === Availability.DOWNLOADABLE ||
2405
- availability === Availability.DOWNLOADING) {
2406
- // TODO(chholland): Better user experience during download - progress?
2407
- logger.debug(`Waiting for download of LanguageModel to complete.`);
2408
- await this.downloadPromise;
2409
- return true;
2735
+ else if (typeof event.data === 'string') {
2736
+ data = event.data;
2410
2737
  }
2411
- return true;
2412
- }
2413
- // Applies prefer_on_device logic.
2414
- if (availability !== Availability.AVAILABLE) {
2415
- logger.debug(`On-device inference unavailable because availability is "${availability}".`);
2416
- return false;
2417
- }
2418
- if (!ChromeAdapterImpl.isOnDeviceRequest(request)) {
2419
- logger.debug(`On-device inference unavailable because request is incompatible.`);
2420
- return false;
2421
- }
2422
- return true;
2423
- }
2424
- /**
2425
- * Generates content on device.
2426
- *
2427
- * <p>This is comparable to {@link GenerativeModel.generateContent} for generating content in
2428
- * Cloud.</p>
2429
- * @param request - a standard Firebase AI {@link GenerateContentRequest}
2430
- * @returns {@link Response}, so we can reuse common response formatting.
2431
- */
2432
- async generateContent(request) {
2433
- const session = await this.createSession();
2434
- const contents = await Promise.all(request.contents.map(ChromeAdapterImpl.toLanguageModelMessage));
2435
- const text = await session.prompt(contents, this.onDeviceParams.promptOptions);
2436
- return ChromeAdapterImpl.toResponse(text);
2437
- }
2438
- /**
2439
- * Generates content stream on device.
2440
- *
2441
- * <p>This is comparable to {@link GenerativeModel.generateContentStream} for generating content in
2442
- * Cloud.</p>
2443
- * @param request - a standard Firebase AI {@link GenerateContentRequest}
2444
- * @returns {@link Response}, so we can reuse common response formatting.
2445
- */
2446
- async generateContentStream(request) {
2447
- const session = await this.createSession();
2448
- const contents = await Promise.all(request.contents.map(ChromeAdapterImpl.toLanguageModelMessage));
2449
- const stream = session.promptStreaming(contents, this.onDeviceParams.promptOptions);
2450
- return ChromeAdapterImpl.toStreamResponse(stream);
2451
- }
2452
- async countTokens(_request) {
2453
- throw new AIError(AIErrorCode.REQUEST_ERROR, 'Count Tokens is not yet available for on-device model.');
2454
- }
2455
- /**
2456
- * Asserts inference for the given request can be performed by an on-device model.
2457
- */
2458
- static isOnDeviceRequest(request) {
2459
- // Returns false if the prompt is empty.
2460
- if (request.contents.length === 0) {
2461
- logger.debug('Empty prompt rejected for on-device inference.');
2462
- return false;
2463
- }
2464
- for (const content of request.contents) {
2465
- if (content.role === 'function') {
2466
- logger.debug(`"Function" role rejected for on-device inference.`);
2467
- return false;
2468
- }
2469
- // Returns false if request contains an image with an unsupported mime type.
2470
- for (const part of content.parts) {
2471
- if (part.inlineData &&
2472
- ChromeAdapterImpl.SUPPORTED_MIME_TYPES.indexOf(part.inlineData.mimeType) === -1) {
2473
- logger.debug(`Unsupported mime type "${part.inlineData.mimeType}" rejected for on-device inference.`);
2474
- return false;
2738
+ else {
2739
+ errorQueue.push(new AIError(AIErrorCode.PARSE_FAILED, `Failed to parse WebSocket response. Expected data to be a Blob or string, but was ${typeof event.data}.`));
2740
+ if (resolvePromise) {
2741
+ resolvePromise();
2742
+ resolvePromise = null;
2475
2743
  }
2744
+ return;
2745
+ }
2746
+ try {
2747
+ const obj = JSON.parse(data);
2748
+ messageQueue.push(obj);
2749
+ }
2750
+ catch (e) {
2751
+ const err = e;
2752
+ errorQueue.push(new AIError(AIErrorCode.PARSE_FAILED, `Error parsing WebSocket message to JSON: ${err.message}`));
2753
+ }
2754
+ if (resolvePromise) {
2755
+ resolvePromise();
2756
+ resolvePromise = null;
2476
2757
  }
2477
- }
2478
- return true;
2479
- }
2480
- /**
2481
- * Encapsulates logic to get availability and download a model if one is downloadable.
2482
- */
2483
- async downloadIfAvailable() {
2484
- const availability = await this.languageModelProvider?.availability(this.onDeviceParams.createOptions);
2485
- if (availability === Availability.DOWNLOADABLE) {
2486
- this.download();
2487
- }
2488
- return availability;
2489
- }
2490
- /**
2491
- * Triggers out-of-band download of an on-device model.
2492
- *
2493
- * <p>Chrome only downloads models as needed. Chrome knows a model is needed when code calls
2494
- * LanguageModel.create.</p>
2495
- *
2496
- * <p>Since Chrome manages the download, the SDK can only avoid redundant download requests by
2497
- * tracking if a download has previously been requested.</p>
2498
- */
2499
- download() {
2500
- if (this.isDownloading) {
2501
- return;
2502
- }
2503
- this.isDownloading = true;
2504
- this.downloadPromise = this.languageModelProvider
2505
- ?.create(this.onDeviceParams.createOptions)
2506
- .finally(() => {
2507
- this.isDownloading = false;
2508
- });
2509
- }
2510
- /**
2511
- * Converts Firebase AI {@link Content} object to a Chrome {@link LanguageModelMessage} object.
2512
- */
2513
- static async toLanguageModelMessage(content) {
2514
- const languageModelMessageContents = await Promise.all(content.parts.map(ChromeAdapterImpl.toLanguageModelMessageContent));
2515
- return {
2516
- role: ChromeAdapterImpl.toLanguageModelMessageRole(content.role),
2517
- content: languageModelMessageContents
2518
2758
  };
2519
- }
2520
- /**
2521
- * Converts a Firebase AI Part object to a Chrome LanguageModelMessageContent object.
2522
- */
2523
- static async toLanguageModelMessageContent(part) {
2524
- if (part.text) {
2525
- return {
2526
- type: 'text',
2527
- value: part.text
2528
- };
2529
- }
2530
- else if (part.inlineData) {
2531
- const formattedImageContent = await fetch(`data:${part.inlineData.mimeType};base64,${part.inlineData.data}`);
2532
- const imageBlob = await formattedImageContent.blob();
2533
- const imageBitmap = await createImageBitmap(imageBlob);
2534
- return {
2535
- type: 'image',
2536
- value: imageBitmap
2537
- };
2538
- }
2539
- throw new AIError(AIErrorCode.REQUEST_ERROR, `Processing of this Part type is not currently supported.`);
2540
- }
2541
- /**
2542
- * Converts a Firebase AI {@link Role} string to a {@link LanguageModelMessageRole} string.
2543
- */
2544
- static toLanguageModelMessageRole(role) {
2545
- // Assumes 'function' rule has been filtered by isOnDeviceRequest
2546
- return role === 'model' ? 'assistant' : 'user';
2547
- }
2548
- /**
2549
- * Abstracts Chrome session creation.
2550
- *
2551
- * <p>Chrome uses a multi-turn session for all inference. Firebase AI uses single-turn for all
2552
- * inference. To map the Firebase AI API to Chrome's API, the SDK creates a new session for all
2553
- * inference.</p>
2554
- *
2555
- * <p>Chrome will remove a model from memory if it's no longer in use, so this method ensures a
2556
- * new session is created before an old session is destroyed.</p>
2557
- */
2558
- async createSession() {
2559
- if (!this.languageModelProvider) {
2560
- throw new AIError(AIErrorCode.UNSUPPORTED, 'Chrome AI requested for unsupported browser version.');
2759
+ const errorListener = () => {
2760
+ errorQueue.push(new AIError(AIErrorCode.FETCH_ERROR, 'WebSocket connection error.'));
2761
+ if (resolvePromise) {
2762
+ resolvePromise();
2763
+ resolvePromise = null;
2764
+ }
2765
+ };
2766
+ const closeListener = (event) => {
2767
+ if (event.reason) {
2768
+ logger.warn(`WebSocket connection closed by the server with reason: ${event.reason}`);
2769
+ }
2770
+ isClosed = true;
2771
+ if (resolvePromise) {
2772
+ resolvePromise();
2773
+ resolvePromise = null;
2774
+ }
2775
+ // Clean up listeners to prevent memory leaks
2776
+ this.ws?.removeEventListener('message', messageListener);
2777
+ this.ws?.removeEventListener('close', closeListener);
2778
+ this.ws?.removeEventListener('error', errorListener);
2779
+ };
2780
+ this.ws.addEventListener('message', messageListener);
2781
+ this.ws.addEventListener('close', closeListener);
2782
+ this.ws.addEventListener('error', errorListener);
2783
+ while (!isClosed) {
2784
+ if (errorQueue.length > 0) {
2785
+ const error = errorQueue.shift();
2786
+ throw error;
2787
+ }
2788
+ if (messageQueue.length > 0) {
2789
+ yield messageQueue.shift();
2790
+ }
2791
+ else {
2792
+ await new Promise(resolve => {
2793
+ resolvePromise = resolve;
2794
+ });
2795
+ }
2561
2796
  }
2562
- const newSession = await this.languageModelProvider.create(this.onDeviceParams.createOptions);
2563
- if (this.oldSession) {
2564
- this.oldSession.destroy();
2797
+ // If the loop terminated because isClosed is true, check for any final errors
2798
+ if (errorQueue.length > 0) {
2799
+ const error = errorQueue.shift();
2800
+ throw error;
2565
2801
  }
2566
- // Holds session reference, so model isn't unloaded from memory.
2567
- this.oldSession = newSession;
2568
- return newSession;
2569
- }
2570
- /**
2571
- * Formats string returned by Chrome as a {@link Response} returned by Firebase AI.
2572
- */
2573
- static toResponse(text) {
2574
- return {
2575
- json: async () => ({
2576
- candidates: [
2577
- {
2578
- content: {
2579
- parts: [{ text }]
2580
- }
2581
- }
2582
- ]
2583
- })
2584
- };
2585
2802
  }
2586
- /**
2587
- * Formats string stream returned by Chrome as SSE returned by Firebase AI.
2588
- */
2589
- static toStreamResponse(stream) {
2590
- const encoder = new TextEncoder();
2591
- return {
2592
- body: stream.pipeThrough(new TransformStream({
2593
- transform(chunk, controller) {
2594
- const json = JSON.stringify({
2595
- candidates: [
2596
- {
2597
- content: {
2598
- role: 'model',
2599
- parts: [{ text: chunk }]
2600
- }
2601
- }
2602
- ]
2603
- });
2604
- controller.enqueue(encoder.encode(`data: ${json}\n\n`));
2605
- }
2606
- }))
2607
- };
2803
+ close(code, reason) {
2804
+ return new Promise(resolve => {
2805
+ if (!this.ws) {
2806
+ return resolve();
2807
+ }
2808
+ this.ws.addEventListener('close', () => resolve(), { once: true });
2809
+ // Calling 'close' during these states results in an error.
2810
+ if (this.ws.readyState === WebSocket.CLOSED ||
2811
+ this.ws.readyState === WebSocket.CONNECTING) {
2812
+ return resolve();
2813
+ }
2814
+ if (this.ws.readyState !== WebSocket.CLOSING) {
2815
+ this.ws.close(code, reason);
2816
+ }
2817
+ });
2608
2818
  }
2609
2819
  }
2610
- // Visible for testing
2611
- ChromeAdapterImpl.SUPPORTED_MIME_TYPES = ['image/jpeg', 'image/png'];
2612
2820
 
2613
2821
  /**
2614
2822
  * @license
@@ -2919,7 +3127,7 @@ class ImagenImageFormat {
2919
3127
 
2920
3128
  /**
2921
3129
  * @license
2922
- * Copyright 2024 Google LLC
3130
+ * Copyright 2025 Google LLC
2923
3131
  *
2924
3132
  * Licensed under the Apache License, Version 2.0 (the "License");
2925
3133
  * you may not use this file except in compliance with the License.
@@ -2933,69 +3141,433 @@ class ImagenImageFormat {
2933
3141
  * See the License for the specific language governing permissions and
2934
3142
  * limitations under the License.
2935
3143
  */
3144
+ const SERVER_INPUT_SAMPLE_RATE = 16000;
3145
+ const SERVER_OUTPUT_SAMPLE_RATE = 24000;
3146
+ const AUDIO_PROCESSOR_NAME = 'audio-processor';
2936
3147
  /**
2937
- * Returns the default {@link AI} instance that is associated with the provided
2938
- * {@link @firebase/app#FirebaseApp}. If no instance exists, initializes a new instance with the
2939
- * default settings.
2940
- *
2941
- * @example
2942
- * ```javascript
2943
- * const ai = getAI(app);
2944
- * ```
2945
- *
2946
- * @example
2947
- * ```javascript
2948
- * // Get an AI instance configured to use the Gemini Developer API (via Google AI).
2949
- * const ai = getAI(app, { backend: new GoogleAIBackend() });
2950
- * ```
2951
- *
2952
- * @example
2953
- * ```javascript
2954
- * // Get an AI instance configured to use the Vertex AI Gemini API.
2955
- * const ai = getAI(app, { backend: new VertexAIBackend() });
2956
- * ```
3148
+ * The JS for an `AudioWorkletProcessor`.
3149
+ * This processor is responsible for taking raw audio from the microphone,
3150
+ * converting it to the required 16-bit 16kHz PCM, and posting it back to the main thread.
2957
3151
  *
2958
- * @param app - The {@link @firebase/app#FirebaseApp} to use.
2959
- * @param options - {@link AIOptions} that configure the AI instance.
2960
- * @returns The default {@link AI} instance for the given {@link @firebase/app#FirebaseApp}.
3152
+ * See: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletProcessor
2961
3153
  *
2962
- * @public
3154
+ * It is defined as a string here so that it can be converted into a `Blob`
3155
+ * and loaded at runtime.
2963
3156
  */
2964
- function getAI(app$1 = app.getApp(), options = { backend: new GoogleAIBackend() }) {
2965
- app$1 = util.getModularInstance(app$1);
2966
- // Dependencies
2967
- const AIProvider = app._getProvider(app$1, AI_TYPE);
2968
- const identifier = encodeInstanceIdentifier(options.backend);
2969
- return AIProvider.getImmediate({
2970
- identifier
2971
- });
2972
- }
3157
+ const audioProcessorWorkletString = `
3158
+ class AudioProcessor extends AudioWorkletProcessor {
3159
+ constructor(options) {
3160
+ super();
3161
+ this.targetSampleRate = options.processorOptions.targetSampleRate;
3162
+ // 'sampleRate' is a global variable available inside the AudioWorkletGlobalScope,
3163
+ // representing the native sample rate of the AudioContext.
3164
+ this.inputSampleRate = sampleRate;
3165
+ }
3166
+
3167
+ /**
3168
+ * This method is called by the browser's audio engine for each block of audio data.
3169
+ * Input is a single input, with a single channel (input[0][0]).
3170
+ */
3171
+ process(inputs) {
3172
+ const input = inputs[0];
3173
+ if (input && input.length > 0 && input[0].length > 0) {
3174
+ const pcmData = input[0]; // Float32Array of raw audio samples.
3175
+
3176
+ // Simple linear interpolation for resampling.
3177
+ const resampled = new Float32Array(Math.round(pcmData.length * this.targetSampleRate / this.inputSampleRate));
3178
+ const ratio = pcmData.length / resampled.length;
3179
+ for (let i = 0; i < resampled.length; i++) {
3180
+ resampled[i] = pcmData[Math.floor(i * ratio)];
3181
+ }
3182
+
3183
+ // Convert Float32 (-1, 1) samples to Int16 (-32768, 32767)
3184
+ const resampledInt16 = new Int16Array(resampled.length);
3185
+ for (let i = 0; i < resampled.length; i++) {
3186
+ const sample = Math.max(-1, Math.min(1, resampled[i]));
3187
+ if (sample < 0) {
3188
+ resampledInt16[i] = sample * 32768;
3189
+ } else {
3190
+ resampledInt16[i] = sample * 32767;
3191
+ }
3192
+ }
3193
+
3194
+ this.port.postMessage(resampledInt16);
3195
+ }
3196
+ // Return true to keep the processor alive and processing the next audio block.
3197
+ return true;
3198
+ }
3199
+ }
3200
+
3201
+ // Register the processor with a name that can be used to instantiate it from the main thread.
3202
+ registerProcessor('${AUDIO_PROCESSOR_NAME}', AudioProcessor);
3203
+ `;
2973
3204
  /**
2974
- * Returns a {@link GenerativeModel} class with methods for inference
2975
- * and other functionality.
3205
+ * Encapsulates the core logic of an audio conversation.
2976
3206
  *
2977
- * @public
3207
+ * @internal
2978
3208
  */
2979
- function getGenerativeModel(ai, modelParams, requestOptions) {
2980
- // Uses the existence of HybridParams.mode to clarify the type of the modelParams input.
2981
- const hybridParams = modelParams;
2982
- let inCloudParams;
2983
- if (hybridParams.mode) {
2984
- inCloudParams = hybridParams.inCloudParams || {
2985
- model: DEFAULT_HYBRID_IN_CLOUD_MODEL
3209
+ class AudioConversationRunner {
3210
+ constructor(liveSession, options, deps) {
3211
+ this.liveSession = liveSession;
3212
+ this.options = options;
3213
+ this.deps = deps;
3214
+ /** A flag to indicate if the conversation has been stopped. */
3215
+ this.isStopped = false;
3216
+ /** A deferred that contains a promise that is resolved when stop() is called, to unblock the receive loop. */
3217
+ this.stopDeferred = new util.Deferred();
3218
+ /** A FIFO queue of 24kHz, 16-bit PCM audio chunks received from the server. */
3219
+ this.playbackQueue = [];
3220
+ /** Tracks scheduled audio sources. Used to cancel scheduled audio when the model is interrupted. */
3221
+ this.scheduledSources = [];
3222
+ /** A high-precision timeline pointer for scheduling gapless audio playback. */
3223
+ this.nextStartTime = 0;
3224
+ /** A mutex to prevent the playback processing loop from running multiple times concurrently. */
3225
+ this.isPlaybackLoopRunning = false;
3226
+ this.liveSession.inConversation = true;
3227
+ // Start listening for messages from the server.
3228
+ this.receiveLoopPromise = this.runReceiveLoop().finally(() => this.cleanup());
3229
+ // Set up the handler for receiving processed audio data from the worklet.
3230
+ // Message data has been resampled to 16kHz 16-bit PCM.
3231
+ this.deps.workletNode.port.onmessage = event => {
3232
+ if (this.isStopped) {
3233
+ return;
3234
+ }
3235
+ const pcm16 = event.data;
3236
+ const base64 = btoa(String.fromCharCode.apply(null, Array.from(new Uint8Array(pcm16.buffer))));
3237
+ const chunk = {
3238
+ mimeType: 'audio/pcm',
3239
+ data: base64
3240
+ };
3241
+ void this.liveSession.sendMediaChunks([chunk]);
2986
3242
  };
2987
3243
  }
2988
- else {
2989
- inCloudParams = modelParams;
3244
+ /**
3245
+ * Stops the conversation and unblocks the main receive loop.
3246
+ */
3247
+ async stop() {
3248
+ if (this.isStopped) {
3249
+ return;
3250
+ }
3251
+ this.isStopped = true;
3252
+ this.stopDeferred.resolve(); // Unblock the receive loop
3253
+ await this.receiveLoopPromise; // Wait for the loop and cleanup to finish
2990
3254
  }
2991
- if (!inCloudParams.model) {
2992
- throw new AIError(AIErrorCode.NO_MODEL, `Must provide a model name. Example: getGenerativeModel({ model: 'my-model-name' })`);
3255
+ /**
3256
+ * Cleans up all audio resources (nodes, stream tracks, context) and marks the
3257
+ * session as no longer in a conversation.
3258
+ */
3259
+ cleanup() {
3260
+ this.interruptPlayback(); // Ensure all audio is stopped on final cleanup.
3261
+ this.deps.workletNode.port.onmessage = null;
3262
+ this.deps.workletNode.disconnect();
3263
+ this.deps.sourceNode.disconnect();
3264
+ this.deps.mediaStream.getTracks().forEach(track => track.stop());
3265
+ if (this.deps.audioContext.state !== 'closed') {
3266
+ void this.deps.audioContext.close();
3267
+ }
3268
+ this.liveSession.inConversation = false;
2993
3269
  }
2994
- let chromeAdapter;
2995
- // Do not initialize a ChromeAdapter if we are not in hybrid mode.
2996
- if (typeof window !== 'undefined' && hybridParams.mode) {
2997
- chromeAdapter = new ChromeAdapterImpl(window.LanguageModel, hybridParams.mode, hybridParams.onDeviceParams);
3270
+ /**
3271
+ * Adds audio data to the queue and ensures the playback loop is running.
3272
+ */
3273
+ enqueueAndPlay(audioData) {
3274
+ this.playbackQueue.push(audioData);
3275
+ // Will no-op if it's already running.
3276
+ void this.processPlaybackQueue();
3277
+ }
3278
+ /**
3279
+ * Stops all current and pending audio playback and clears the queue. This is
3280
+ * called when the server indicates the model's speech was interrupted with
3281
+ * `LiveServerContent.modelTurn.interrupted`.
3282
+ */
3283
+ interruptPlayback() {
3284
+ // Stop all sources that have been scheduled. The onended event will fire for each,
3285
+ // which will clean up the scheduledSources array.
3286
+ [...this.scheduledSources].forEach(source => source.stop(0));
3287
+ // Clear the internal buffer of unprocessed audio chunks.
3288
+ this.playbackQueue.length = 0;
3289
+ // Reset the playback clock to start fresh.
3290
+ this.nextStartTime = this.deps.audioContext.currentTime;
3291
+ }
3292
+ /**
3293
+ * Processes the playback queue in a loop, scheduling each chunk in a gapless sequence.
3294
+ */
3295
+ async processPlaybackQueue() {
3296
+ if (this.isPlaybackLoopRunning) {
3297
+ return;
3298
+ }
3299
+ this.isPlaybackLoopRunning = true;
3300
+ while (this.playbackQueue.length > 0 && !this.isStopped) {
3301
+ const pcmRawBuffer = this.playbackQueue.shift();
3302
+ try {
3303
+ const pcm16 = new Int16Array(pcmRawBuffer);
3304
+ const frameCount = pcm16.length;
3305
+ const audioBuffer = this.deps.audioContext.createBuffer(1, frameCount, SERVER_OUTPUT_SAMPLE_RATE);
3306
+ // Convert 16-bit PCM to 32-bit PCM, required by the Web Audio API.
3307
+ const channelData = audioBuffer.getChannelData(0);
3308
+ for (let i = 0; i < frameCount; i++) {
3309
+ channelData[i] = pcm16[i] / 32768; // Normalize to Float32 range [-1.0, 1.0]
3310
+ }
3311
+ const source = this.deps.audioContext.createBufferSource();
3312
+ source.buffer = audioBuffer;
3313
+ source.connect(this.deps.audioContext.destination);
3314
+ // Track the source and set up a handler to remove it from tracking when it finishes.
3315
+ this.scheduledSources.push(source);
3316
+ source.onended = () => {
3317
+ this.scheduledSources = this.scheduledSources.filter(s => s !== source);
3318
+ };
3319
+ // To prevent gaps, schedule the next chunk to start either now (if we're catching up)
3320
+ // or exactly when the previous chunk is scheduled to end.
3321
+ this.nextStartTime = Math.max(this.deps.audioContext.currentTime, this.nextStartTime);
3322
+ source.start(this.nextStartTime);
3323
+ // Update the schedule for the *next* chunk.
3324
+ this.nextStartTime += audioBuffer.duration;
3325
+ }
3326
+ catch (e) {
3327
+ logger.error('Error playing audio:', e);
3328
+ }
3329
+ }
3330
+ this.isPlaybackLoopRunning = false;
3331
+ }
3332
+ /**
3333
+ * The main loop that listens for and processes messages from the server.
3334
+ */
3335
+ async runReceiveLoop() {
3336
+ const messageGenerator = this.liveSession.receive();
3337
+ while (!this.isStopped) {
3338
+ const result = await Promise.race([
3339
+ messageGenerator.next(),
3340
+ this.stopDeferred.promise
3341
+ ]);
3342
+ if (this.isStopped || !result || result.done) {
3343
+ break;
3344
+ }
3345
+ const message = result.value;
3346
+ if (message.type === 'serverContent') {
3347
+ const serverContent = message;
3348
+ if (serverContent.interrupted) {
3349
+ this.interruptPlayback();
3350
+ }
3351
+ const audioPart = serverContent.modelTurn?.parts.find(part => part.inlineData?.mimeType.startsWith('audio/'));
3352
+ if (audioPart?.inlineData) {
3353
+ const audioData = Uint8Array.from(atob(audioPart.inlineData.data), c => c.charCodeAt(0)).buffer;
3354
+ this.enqueueAndPlay(audioData);
3355
+ }
3356
+ }
3357
+ else if (message.type === 'toolCall') {
3358
+ if (!this.options.functionCallingHandler) {
3359
+ logger.warn('Received tool call message, but StartAudioConversationOptions.functionCallingHandler is undefined. Ignoring tool call.');
3360
+ }
3361
+ else {
3362
+ try {
3363
+ const resultPart = await this.options.functionCallingHandler(message.functionCalls);
3364
+ if (!this.isStopped) {
3365
+ void this.liveSession.send([resultPart]);
3366
+ }
3367
+ }
3368
+ catch (e) {
3369
+ throw new AIError(AIErrorCode.ERROR, `Function calling handler failed: ${e.message}`);
3370
+ }
3371
+ }
3372
+ }
3373
+ }
3374
+ }
3375
+ }
3376
+ /**
3377
+ * Starts a real-time, bidirectional audio conversation with the model. This helper function manages
3378
+ * the complexities of microphone access, audio recording, playback, and interruptions.
3379
+ *
3380
+ * @remarks Important: This function must be called in response to a user gesture
3381
+ * (for example, a button click) to comply with {@link https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy | browser autoplay policies}.
3382
+ *
3383
+ * @example
3384
+ * ```javascript
3385
+ * const liveSession = await model.connect();
3386
+ * let conversationController;
3387
+ *
3388
+ * // This function must be called from within a click handler.
3389
+ * async function startConversation() {
3390
+ * try {
3391
+ * conversationController = await startAudioConversation(liveSession);
3392
+ * } catch (e) {
3393
+ * // Handle AI-specific errors
3394
+ * if (e instanceof AIError) {
3395
+ * console.error("AI Error:", e.message);
3396
+ * }
3397
+ * // Handle microphone permission and hardware errors
3398
+ * else if (e instanceof DOMException) {
3399
+ * console.error("Microphone Error:", e.message);
3400
+ * }
3401
+ * // Handle other unexpected errors
3402
+ * else {
3403
+ * console.error("An unexpected error occurred:", e);
3404
+ * }
3405
+ * }
3406
+ * }
3407
+ *
3408
+ * // Later, to stop the conversation:
3409
+ * // if (conversationController) {
3410
+ * // await conversationController.stop();
3411
+ * // }
3412
+ * ```
3413
+ *
3414
+ * @param liveSession - An active {@link LiveSession} instance.
3415
+ * @param options - Configuration options for the audio conversation.
3416
+ * @returns A `Promise` that resolves with an {@link AudioConversationController}.
3417
+ * @throws `AIError` if the environment does not support required Web APIs (`UNSUPPORTED`), if a conversation is already active (`REQUEST_ERROR`), the session is closed (`SESSION_CLOSED`), or if an unexpected initialization error occurs (`ERROR`).
3418
+ * @throws `DOMException` Thrown by `navigator.mediaDevices.getUserMedia()` if issues occur with microphone access, such as permissions being denied (`NotAllowedError`) or no compatible hardware being found (`NotFoundError`). See the {@link https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia#exceptions | MDN documentation} for a full list of exceptions.
3419
+ *
3420
+ * @beta
3421
+ */
3422
+ async function startAudioConversation(liveSession, options = {}) {
3423
+ if (liveSession.isClosed) {
3424
+ throw new AIError(AIErrorCode.SESSION_CLOSED, 'Cannot start audio conversation on a closed LiveSession.');
3425
+ }
3426
+ if (liveSession.inConversation) {
3427
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'An audio conversation is already in progress for this session.');
3428
+ }
3429
+ // Check for necessary Web API support.
3430
+ if (typeof AudioWorkletNode === 'undefined' ||
3431
+ typeof AudioContext === 'undefined' ||
3432
+ typeof navigator === 'undefined' ||
3433
+ !navigator.mediaDevices) {
3434
+ throw new AIError(AIErrorCode.UNSUPPORTED, 'Audio conversation is not supported in this environment. It requires the Web Audio API and AudioWorklet support.');
3435
+ }
3436
+ let audioContext;
3437
+ try {
3438
+ // 1. Set up the audio context. This must be in response to a user gesture.
3439
+ // See: https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy
3440
+ audioContext = new AudioContext();
3441
+ if (audioContext.state === 'suspended') {
3442
+ await audioContext.resume();
3443
+ }
3444
+ // 2. Prompt for microphone access and get the media stream.
3445
+ // This can throw a variety of permission or hardware-related errors.
3446
+ const mediaStream = await navigator.mediaDevices.getUserMedia({
3447
+ audio: true
3448
+ });
3449
+ // 3. Load the AudioWorklet processor.
3450
+ // See: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorklet
3451
+ const workletBlob = new Blob([audioProcessorWorkletString], {
3452
+ type: 'application/javascript'
3453
+ });
3454
+ const workletURL = URL.createObjectURL(workletBlob);
3455
+ await audioContext.audioWorklet.addModule(workletURL);
3456
+ // 4. Create the audio graph: Microphone -> Source Node -> Worklet Node
3457
+ const sourceNode = audioContext.createMediaStreamSource(mediaStream);
3458
+ const workletNode = new AudioWorkletNode(audioContext, AUDIO_PROCESSOR_NAME, {
3459
+ processorOptions: { targetSampleRate: SERVER_INPUT_SAMPLE_RATE }
3460
+ });
3461
+ sourceNode.connect(workletNode);
3462
+ // 5. Instantiate and return the runner which manages the conversation.
3463
+ const runner = new AudioConversationRunner(liveSession, options, {
3464
+ audioContext,
3465
+ mediaStream,
3466
+ sourceNode,
3467
+ workletNode
3468
+ });
3469
+ return { stop: () => runner.stop() };
3470
+ }
3471
+ catch (e) {
3472
+ // Ensure the audio context is closed on any setup error.
3473
+ if (audioContext && audioContext.state !== 'closed') {
3474
+ void audioContext.close();
3475
+ }
3476
+ // Re-throw specific, known error types directly. The user may want to handle `DOMException`
3477
+ // errors differently (for example, if permission to access audio device was denied).
3478
+ if (e instanceof AIError || e instanceof DOMException) {
3479
+ throw e;
3480
+ }
3481
+ // Wrap any other unexpected errors in a standard AIError.
3482
+ throw new AIError(AIErrorCode.ERROR, `Failed to initialize audio recording: ${e.message}`);
3483
+ }
3484
+ }
3485
+
3486
+ /**
3487
+ * @license
3488
+ * Copyright 2024 Google LLC
3489
+ *
3490
+ * Licensed under the Apache License, Version 2.0 (the "License");
3491
+ * you may not use this file except in compliance with the License.
3492
+ * You may obtain a copy of the License at
3493
+ *
3494
+ * http://www.apache.org/licenses/LICENSE-2.0
3495
+ *
3496
+ * Unless required by applicable law or agreed to in writing, software
3497
+ * distributed under the License is distributed on an "AS IS" BASIS,
3498
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3499
+ * See the License for the specific language governing permissions and
3500
+ * limitations under the License.
3501
+ */
3502
+ /**
3503
+ * Returns the default {@link AI} instance that is associated with the provided
3504
+ * {@link @firebase/app#FirebaseApp}. If no instance exists, initializes a new instance with the
3505
+ * default settings.
3506
+ *
3507
+ * @example
3508
+ * ```javascript
3509
+ * const ai = getAI(app);
3510
+ * ```
3511
+ *
3512
+ * @example
3513
+ * ```javascript
3514
+ * // Get an AI instance configured to use the Gemini Developer API (via Google AI).
3515
+ * const ai = getAI(app, { backend: new GoogleAIBackend() });
3516
+ * ```
3517
+ *
3518
+ * @example
3519
+ * ```javascript
3520
+ * // Get an AI instance configured to use the Vertex AI Gemini API.
3521
+ * const ai = getAI(app, { backend: new VertexAIBackend() });
3522
+ * ```
3523
+ *
3524
+ * @param app - The {@link @firebase/app#FirebaseApp} to use.
3525
+ * @param options - {@link AIOptions} that configure the AI instance.
3526
+ * @returns The default {@link AI} instance for the given {@link @firebase/app#FirebaseApp}.
3527
+ *
3528
+ * @public
3529
+ */
3530
+ function getAI(app$1 = app.getApp(), options) {
3531
+ app$1 = util.getModularInstance(app$1);
3532
+ // Dependencies
3533
+ const AIProvider = app._getProvider(app$1, AI_TYPE);
3534
+ const backend = options?.backend ?? new GoogleAIBackend();
3535
+ const finalOptions = {
3536
+ useLimitedUseAppCheckTokens: options?.useLimitedUseAppCheckTokens ?? false
3537
+ };
3538
+ const identifier = encodeInstanceIdentifier(backend);
3539
+ const aiInstance = AIProvider.getImmediate({
3540
+ identifier
3541
+ });
3542
+ aiInstance.options = finalOptions;
3543
+ return aiInstance;
3544
+ }
3545
+ /**
3546
+ * Returns a {@link GenerativeModel} class with methods for inference
3547
+ * and other functionality.
3548
+ *
3549
+ * @public
3550
+ */
3551
+ function getGenerativeModel(ai, modelParams, requestOptions) {
3552
+ // Uses the existence of HybridParams.mode to clarify the type of the modelParams input.
3553
+ const hybridParams = modelParams;
3554
+ let inCloudParams;
3555
+ if (hybridParams.mode) {
3556
+ inCloudParams = hybridParams.inCloudParams || {
3557
+ model: DEFAULT_HYBRID_IN_CLOUD_MODEL
3558
+ };
2998
3559
  }
3560
+ else {
3561
+ inCloudParams = modelParams;
3562
+ }
3563
+ if (!inCloudParams.model) {
3564
+ throw new AIError(AIErrorCode.NO_MODEL, `Must provide a model name. Example: getGenerativeModel({ model: 'my-model-name' })`);
3565
+ }
3566
+ /**
3567
+ * An AIService registered by index.node.ts will not have a
3568
+ * chromeAdapterFactory() method.
3569
+ */
3570
+ const chromeAdapter = ai.chromeAdapterFactory?.(hybridParams.mode, typeof window === 'undefined' ? undefined : window, hybridParams.onDeviceParams);
2999
3571
  return new GenerativeModel(ai, inCloudParams, requestOptions, chromeAdapter);
3000
3572
  }
3001
3573
  /**
@@ -3018,24 +3590,339 @@ function getImagenModel(ai, modelParams, requestOptions) {
3018
3590
  }
3019
3591
  return new ImagenModel(ai, modelParams, requestOptions);
3020
3592
  }
3593
+ /**
3594
+ * Returns a {@link LiveGenerativeModel} class for real-time, bidirectional communication.
3595
+ *
3596
+ * The Live API is only supported in modern browser windows and Node >= 22.
3597
+ *
3598
+ * @param ai - An {@link AI} instance.
3599
+ * @param modelParams - Parameters to use when setting up a {@link LiveSession}.
3600
+ * @throws If the `apiKey` or `projectId` fields are missing in your
3601
+ * Firebase config.
3602
+ *
3603
+ * @beta
3604
+ */
3605
+ function getLiveGenerativeModel(ai, modelParams) {
3606
+ if (!modelParams.model) {
3607
+ throw new AIError(AIErrorCode.NO_MODEL, `Must provide a model name for getLiveGenerativeModel. Example: getLiveGenerativeModel(ai, { model: 'my-model-name' })`);
3608
+ }
3609
+ const webSocketHandler = new WebSocketHandlerImpl();
3610
+ return new LiveGenerativeModel(ai, modelParams, webSocketHandler);
3611
+ }
3612
+
3613
+ /**
3614
+ * @internal
3615
+ */
3616
+ var Availability;
3617
+ (function (Availability) {
3618
+ Availability["UNAVAILABLE"] = "unavailable";
3619
+ Availability["DOWNLOADABLE"] = "downloadable";
3620
+ Availability["DOWNLOADING"] = "downloading";
3621
+ Availability["AVAILABLE"] = "available";
3622
+ })(Availability || (Availability = {}));
3623
+
3624
+ /**
3625
+ * @license
3626
+ * Copyright 2025 Google LLC
3627
+ *
3628
+ * Licensed under the Apache License, Version 2.0 (the "License");
3629
+ * you may not use this file except in compliance with the License.
3630
+ * You may obtain a copy of the License at
3631
+ *
3632
+ * http://www.apache.org/licenses/LICENSE-2.0
3633
+ *
3634
+ * Unless required by applicable law or agreed to in writing, software
3635
+ * distributed under the License is distributed on an "AS IS" BASIS,
3636
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3637
+ * See the License for the specific language governing permissions and
3638
+ * limitations under the License.
3639
+ */
3640
+ /**
3641
+ * Defines an inference "backend" that uses Chrome's on-device model,
3642
+ * and encapsulates logic for detecting when on-device inference is
3643
+ * possible.
3644
+ */
3645
+ class ChromeAdapterImpl {
3646
+ constructor(languageModelProvider, mode, onDeviceParams = {
3647
+ createOptions: {
3648
+ // Defaults to support image inputs for convenience.
3649
+ expectedInputs: [{ type: 'image' }]
3650
+ }
3651
+ }) {
3652
+ this.languageModelProvider = languageModelProvider;
3653
+ this.mode = mode;
3654
+ this.onDeviceParams = onDeviceParams;
3655
+ this.isDownloading = false;
3656
+ }
3657
+ /**
3658
+ * Checks if a given request can be made on-device.
3659
+ *
3660
+ * Encapsulates a few concerns:
3661
+ * the mode
3662
+ * API existence
3663
+ * prompt formatting
3664
+ * model availability, including triggering download if necessary
3665
+ *
3666
+ *
3667
+ * Pros: callers needn't be concerned with details of on-device availability.</p>
3668
+ * Cons: this method spans a few concerns and splits request validation from usage.
3669
+ * If instance variables weren't already part of the API, we could consider a better
3670
+ * separation of concerns.
3671
+ */
3672
+ async isAvailable(request) {
3673
+ if (!this.mode) {
3674
+ logger.debug(`On-device inference unavailable because mode is undefined.`);
3675
+ return false;
3676
+ }
3677
+ if (this.mode === InferenceMode.ONLY_IN_CLOUD) {
3678
+ logger.debug(`On-device inference unavailable because mode is "only_in_cloud".`);
3679
+ return false;
3680
+ }
3681
+ // Triggers out-of-band download so model will eventually become available.
3682
+ const availability = await this.downloadIfAvailable();
3683
+ if (this.mode === InferenceMode.ONLY_ON_DEVICE) {
3684
+ // If it will never be available due to API inavailability, throw.
3685
+ if (availability === Availability.UNAVAILABLE) {
3686
+ throw new AIError(AIErrorCode.API_NOT_ENABLED, 'Local LanguageModel API not available in this environment.');
3687
+ }
3688
+ else if (availability === Availability.DOWNLOADABLE ||
3689
+ availability === Availability.DOWNLOADING) {
3690
+ // TODO(chholland): Better user experience during download - progress?
3691
+ logger.debug(`Waiting for download of LanguageModel to complete.`);
3692
+ await this.downloadPromise;
3693
+ return true;
3694
+ }
3695
+ return true;
3696
+ }
3697
+ // Applies prefer_on_device logic.
3698
+ if (availability !== Availability.AVAILABLE) {
3699
+ logger.debug(`On-device inference unavailable because availability is "${availability}".`);
3700
+ return false;
3701
+ }
3702
+ if (!ChromeAdapterImpl.isOnDeviceRequest(request)) {
3703
+ logger.debug(`On-device inference unavailable because request is incompatible.`);
3704
+ return false;
3705
+ }
3706
+ return true;
3707
+ }
3708
+ /**
3709
+ * Generates content on device.
3710
+ *
3711
+ * @remarks
3712
+ * This is comparable to {@link GenerativeModel.generateContent} for generating content in
3713
+ * Cloud.
3714
+ * @param request - a standard Firebase AI {@link GenerateContentRequest}
3715
+ * @returns {@link Response}, so we can reuse common response formatting.
3716
+ */
3717
+ async generateContent(request) {
3718
+ const session = await this.createSession();
3719
+ const contents = await Promise.all(request.contents.map(ChromeAdapterImpl.toLanguageModelMessage));
3720
+ const text = await session.prompt(contents, this.onDeviceParams.promptOptions);
3721
+ return ChromeAdapterImpl.toResponse(text);
3722
+ }
3723
+ /**
3724
+ * Generates content stream on device.
3725
+ *
3726
+ * @remarks
3727
+ * This is comparable to {@link GenerativeModel.generateContentStream} for generating content in
3728
+ * Cloud.
3729
+ * @param request - a standard Firebase AI {@link GenerateContentRequest}
3730
+ * @returns {@link Response}, so we can reuse common response formatting.
3731
+ */
3732
+ async generateContentStream(request) {
3733
+ const session = await this.createSession();
3734
+ const contents = await Promise.all(request.contents.map(ChromeAdapterImpl.toLanguageModelMessage));
3735
+ const stream = session.promptStreaming(contents, this.onDeviceParams.promptOptions);
3736
+ return ChromeAdapterImpl.toStreamResponse(stream);
3737
+ }
3738
+ async countTokens(_request) {
3739
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'Count Tokens is not yet available for on-device model.');
3740
+ }
3741
+ /**
3742
+ * Asserts inference for the given request can be performed by an on-device model.
3743
+ */
3744
+ static isOnDeviceRequest(request) {
3745
+ // Returns false if the prompt is empty.
3746
+ if (request.contents.length === 0) {
3747
+ logger.debug('Empty prompt rejected for on-device inference.');
3748
+ return false;
3749
+ }
3750
+ for (const content of request.contents) {
3751
+ if (content.role === 'function') {
3752
+ logger.debug(`"Function" role rejected for on-device inference.`);
3753
+ return false;
3754
+ }
3755
+ // Returns false if request contains an image with an unsupported mime type.
3756
+ for (const part of content.parts) {
3757
+ if (part.inlineData &&
3758
+ ChromeAdapterImpl.SUPPORTED_MIME_TYPES.indexOf(part.inlineData.mimeType) === -1) {
3759
+ logger.debug(`Unsupported mime type "${part.inlineData.mimeType}" rejected for on-device inference.`);
3760
+ return false;
3761
+ }
3762
+ }
3763
+ }
3764
+ return true;
3765
+ }
3766
+ /**
3767
+ * Encapsulates logic to get availability and download a model if one is downloadable.
3768
+ */
3769
+ async downloadIfAvailable() {
3770
+ const availability = await this.languageModelProvider?.availability(this.onDeviceParams.createOptions);
3771
+ if (availability === Availability.DOWNLOADABLE) {
3772
+ this.download();
3773
+ }
3774
+ return availability;
3775
+ }
3776
+ /**
3777
+ * Triggers out-of-band download of an on-device model.
3778
+ *
3779
+ * Chrome only downloads models as needed. Chrome knows a model is needed when code calls
3780
+ * LanguageModel.create.
3781
+ *
3782
+ * Since Chrome manages the download, the SDK can only avoid redundant download requests by
3783
+ * tracking if a download has previously been requested.
3784
+ */
3785
+ download() {
3786
+ if (this.isDownloading) {
3787
+ return;
3788
+ }
3789
+ this.isDownloading = true;
3790
+ this.downloadPromise = this.languageModelProvider
3791
+ ?.create(this.onDeviceParams.createOptions)
3792
+ .finally(() => {
3793
+ this.isDownloading = false;
3794
+ });
3795
+ }
3796
+ /**
3797
+ * Converts Firebase AI {@link Content} object to a Chrome {@link LanguageModelMessage} object.
3798
+ */
3799
+ static async toLanguageModelMessage(content) {
3800
+ const languageModelMessageContents = await Promise.all(content.parts.map(ChromeAdapterImpl.toLanguageModelMessageContent));
3801
+ return {
3802
+ role: ChromeAdapterImpl.toLanguageModelMessageRole(content.role),
3803
+ content: languageModelMessageContents
3804
+ };
3805
+ }
3806
+ /**
3807
+ * Converts a Firebase AI Part object to a Chrome LanguageModelMessageContent object.
3808
+ */
3809
+ static async toLanguageModelMessageContent(part) {
3810
+ if (part.text) {
3811
+ return {
3812
+ type: 'text',
3813
+ value: part.text
3814
+ };
3815
+ }
3816
+ else if (part.inlineData) {
3817
+ const formattedImageContent = await fetch(`data:${part.inlineData.mimeType};base64,${part.inlineData.data}`);
3818
+ const imageBlob = await formattedImageContent.blob();
3819
+ const imageBitmap = await createImageBitmap(imageBlob);
3820
+ return {
3821
+ type: 'image',
3822
+ value: imageBitmap
3823
+ };
3824
+ }
3825
+ throw new AIError(AIErrorCode.REQUEST_ERROR, `Processing of this Part type is not currently supported.`);
3826
+ }
3827
+ /**
3828
+ * Converts a Firebase AI {@link Role} string to a {@link LanguageModelMessageRole} string.
3829
+ */
3830
+ static toLanguageModelMessageRole(role) {
3831
+ // Assumes 'function' rule has been filtered by isOnDeviceRequest
3832
+ return role === 'model' ? 'assistant' : 'user';
3833
+ }
3834
+ /**
3835
+ * Abstracts Chrome session creation.
3836
+ *
3837
+ * Chrome uses a multi-turn session for all inference. Firebase AI uses single-turn for all
3838
+ * inference. To map the Firebase AI API to Chrome's API, the SDK creates a new session for all
3839
+ * inference.
3840
+ *
3841
+ * Chrome will remove a model from memory if it's no longer in use, so this method ensures a
3842
+ * new session is created before an old session is destroyed.
3843
+ */
3844
+ async createSession() {
3845
+ if (!this.languageModelProvider) {
3846
+ throw new AIError(AIErrorCode.UNSUPPORTED, 'Chrome AI requested for unsupported browser version.');
3847
+ }
3848
+ const newSession = await this.languageModelProvider.create(this.onDeviceParams.createOptions);
3849
+ if (this.oldSession) {
3850
+ this.oldSession.destroy();
3851
+ }
3852
+ // Holds session reference, so model isn't unloaded from memory.
3853
+ this.oldSession = newSession;
3854
+ return newSession;
3855
+ }
3856
+ /**
3857
+ * Formats string returned by Chrome as a {@link Response} returned by Firebase AI.
3858
+ */
3859
+ static toResponse(text) {
3860
+ return {
3861
+ json: async () => ({
3862
+ candidates: [
3863
+ {
3864
+ content: {
3865
+ parts: [{ text }]
3866
+ }
3867
+ }
3868
+ ]
3869
+ })
3870
+ };
3871
+ }
3872
+ /**
3873
+ * Formats string stream returned by Chrome as SSE returned by Firebase AI.
3874
+ */
3875
+ static toStreamResponse(stream) {
3876
+ const encoder = new TextEncoder();
3877
+ return {
3878
+ body: stream.pipeThrough(new TransformStream({
3879
+ transform(chunk, controller) {
3880
+ const json = JSON.stringify({
3881
+ candidates: [
3882
+ {
3883
+ content: {
3884
+ role: 'model',
3885
+ parts: [{ text: chunk }]
3886
+ }
3887
+ }
3888
+ ]
3889
+ });
3890
+ controller.enqueue(encoder.encode(`data: ${json}\n\n`));
3891
+ }
3892
+ }))
3893
+ };
3894
+ }
3895
+ }
3896
+ // Visible for testing
3897
+ ChromeAdapterImpl.SUPPORTED_MIME_TYPES = ['image/jpeg', 'image/png'];
3898
+ /**
3899
+ * Creates a ChromeAdapterImpl on demand.
3900
+ */
3901
+ function chromeAdapterFactory(mode, window, params) {
3902
+ // Do not initialize a ChromeAdapter if we are not in hybrid mode.
3903
+ if (typeof window !== 'undefined' && mode) {
3904
+ return new ChromeAdapterImpl(window.LanguageModel, mode, params);
3905
+ }
3906
+ }
3021
3907
 
3022
3908
  /**
3023
3909
  * The Firebase AI Web SDK.
3024
3910
  *
3025
3911
  * @packageDocumentation
3026
3912
  */
3913
+ function factory(container, { instanceIdentifier }) {
3914
+ if (!instanceIdentifier) {
3915
+ throw new AIError(AIErrorCode.ERROR, 'AIService instance identifier is undefined.');
3916
+ }
3917
+ const backend = decodeInstanceIdentifier(instanceIdentifier);
3918
+ // getImmediate for FirebaseApp will always succeed
3919
+ const app = container.getProvider('app').getImmediate();
3920
+ const auth = container.getProvider('auth-internal');
3921
+ const appCheckProvider = container.getProvider('app-check-internal');
3922
+ return new AIService(app, backend, auth, appCheckProvider, chromeAdapterFactory);
3923
+ }
3027
3924
  function registerAI() {
3028
- app._registerComponent(new component.Component(AI_TYPE, (container, { instanceIdentifier }) => {
3029
- if (!instanceIdentifier) {
3030
- throw new AIError(AIErrorCode.ERROR, 'AIService instance identifier is undefined.');
3031
- }
3032
- const backend = decodeInstanceIdentifier(instanceIdentifier);
3033
- // getImmediate for FirebaseApp will always succeed
3034
- const app = container.getProvider('app').getImmediate();
3035
- const auth = container.getProvider('auth-internal');
3036
- const appCheckProvider = container.getProvider('app-check-internal');
3037
- return new AIService(app, backend, auth, appCheckProvider);
3038
- }, "PUBLIC" /* ComponentType.PUBLIC */).setMultipleInstances(true));
3925
+ app._registerComponent(new component.Component(AI_TYPE, factory, "PUBLIC" /* ComponentType.PUBLIC */).setMultipleInstances(true));
3039
3926
  app.registerVersion(name, version);
3040
3927
  // BUILD_TARGET will be replaced by values like esm, cjs, etc during the compilation
3041
3928
  app.registerVersion(name, version, 'cjs2020');
@@ -3068,6 +3955,9 @@ exports.ImagenPersonFilterLevel = ImagenPersonFilterLevel;
3068
3955
  exports.ImagenSafetyFilterLevel = ImagenSafetyFilterLevel;
3069
3956
  exports.InferenceMode = InferenceMode;
3070
3957
  exports.IntegerSchema = IntegerSchema;
3958
+ exports.LiveGenerativeModel = LiveGenerativeModel;
3959
+ exports.LiveResponseType = LiveResponseType;
3960
+ exports.LiveSession = LiveSession;
3071
3961
  exports.Modality = Modality;
3072
3962
  exports.NumberSchema = NumberSchema;
3073
3963
  exports.ObjectSchema = ObjectSchema;
@@ -3080,4 +3970,6 @@ exports.VertexAIBackend = VertexAIBackend;
3080
3970
  exports.getAI = getAI;
3081
3971
  exports.getGenerativeModel = getGenerativeModel;
3082
3972
  exports.getImagenModel = getImagenModel;
3973
+ exports.getLiveGenerativeModel = getLiveGenerativeModel;
3974
+ exports.startAudioConversation = startAudioConversation;
3083
3975
  //# sourceMappingURL=index.cjs.js.map