@firebase/ai 2.1.0 → 2.2.0-canary.095c098de

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/ai-public.d.ts +443 -10
  2. package/dist/ai.d.ts +525 -11
  3. package/dist/esm/index.esm.js +1255 -368
  4. package/dist/esm/index.esm.js.map +1 -1
  5. package/dist/esm/src/api.d.ts +18 -3
  6. package/dist/esm/src/constants.d.ts +1 -1
  7. package/dist/esm/src/index.d.ts +2 -1
  8. package/dist/esm/src/methods/chrome-adapter.d.ts +30 -24
  9. package/dist/esm/src/methods/live-session-helpers.d.ts +154 -0
  10. package/dist/esm/src/methods/live-session.d.ts +90 -0
  11. package/dist/esm/src/models/ai-model.d.ts +1 -1
  12. package/dist/esm/src/models/index.d.ts +1 -0
  13. package/dist/esm/src/models/live-generative-model.d.ts +55 -0
  14. package/dist/esm/src/public-types.d.ts +10 -1
  15. package/dist/esm/src/requests/request.d.ts +6 -0
  16. package/dist/esm/src/requests/response-helpers.d.ts +9 -5
  17. package/dist/esm/src/service.d.ts +7 -2
  18. package/dist/esm/src/types/chrome-adapter.d.ts +6 -4
  19. package/dist/esm/src/types/content.d.ts +42 -0
  20. package/dist/esm/src/types/enums.d.ts +5 -0
  21. package/dist/esm/src/types/error.d.ts +2 -0
  22. package/dist/esm/src/types/imagen/internal.d.ts +10 -0
  23. package/dist/esm/src/types/live-responses.d.ts +53 -0
  24. package/dist/esm/src/types/requests.d.ts +109 -1
  25. package/dist/esm/src/types/responses.d.ts +87 -4
  26. package/dist/esm/src/websocket.d.ts +67 -0
  27. package/dist/index.cjs.js +1258 -366
  28. package/dist/index.cjs.js.map +1 -1
  29. package/dist/index.node.cjs.js +907 -311
  30. package/dist/index.node.cjs.js.map +1 -1
  31. package/dist/index.node.mjs +904 -313
  32. package/dist/index.node.mjs.map +1 -1
  33. package/dist/src/api.d.ts +18 -3
  34. package/dist/src/constants.d.ts +1 -1
  35. package/dist/src/index.d.ts +2 -1
  36. package/dist/src/methods/chrome-adapter.d.ts +30 -24
  37. package/dist/src/methods/live-session-helpers.d.ts +154 -0
  38. package/dist/src/methods/live-session.d.ts +90 -0
  39. package/dist/src/models/ai-model.d.ts +1 -1
  40. package/dist/src/models/index.d.ts +1 -0
  41. package/dist/src/models/live-generative-model.d.ts +55 -0
  42. package/dist/src/public-types.d.ts +10 -1
  43. package/dist/src/requests/request.d.ts +6 -0
  44. package/dist/src/requests/response-helpers.d.ts +9 -5
  45. package/dist/src/service.d.ts +7 -2
  46. package/dist/src/types/chrome-adapter.d.ts +6 -4
  47. package/dist/src/types/content.d.ts +42 -0
  48. package/dist/src/types/enums.d.ts +5 -0
  49. package/dist/src/types/error.d.ts +2 -0
  50. package/dist/src/types/imagen/internal.d.ts +10 -0
  51. package/dist/src/types/live-responses.d.ts +53 -0
  52. package/dist/src/types/requests.d.ts +109 -1
  53. package/dist/src/types/responses.d.ts +87 -4
  54. package/dist/src/websocket.d.ts +67 -0
  55. package/package.json +10 -8
@@ -1,10 +1,10 @@
1
1
  import { _isFirebaseServerApp, _getProvider, getApp, _registerComponent, registerVersion } from '@firebase/app';
2
2
  import { Component } from '@firebase/component';
3
- import { FirebaseError, getModularInstance } from '@firebase/util';
3
+ import { FirebaseError, Deferred, getModularInstance } from '@firebase/util';
4
4
  import { Logger } from '@firebase/logger';
5
5
 
6
6
  var name = "@firebase/ai";
7
- var version = "2.1.0";
7
+ var version = "2.2.0-canary.095c098de";
8
8
 
9
9
  /**
10
10
  * @license
@@ -24,7 +24,7 @@ var version = "2.1.0";
24
24
  */
25
25
  const AI_TYPE = 'AI';
26
26
  const DEFAULT_LOCATION = 'us-central1';
27
- const DEFAULT_BASE_URL = 'https://firebasevertexai.googleapis.com';
27
+ const DEFAULT_DOMAIN = 'firebasevertexai.googleapis.com';
28
28
  const DEFAULT_API_VERSION = 'v1beta';
29
29
  const PACKAGE_VERSION = version;
30
30
  const LANGUAGE_TAG = 'gl-js';
@@ -289,7 +289,12 @@ const ResponseModality = {
289
289
  * Image.
290
290
  * @beta
291
291
  */
292
- IMAGE: 'IMAGE'
292
+ IMAGE: 'IMAGE',
293
+ /**
294
+ * Audio.
295
+ * @beta
296
+ */
297
+ AUDIO: 'AUDIO'
293
298
  };
294
299
  /**
295
300
  * <b>(EXPERIMENTAL)</b>
@@ -302,6 +307,33 @@ const InferenceMode = {
302
307
  'ONLY_IN_CLOUD': 'only_in_cloud'
303
308
  };
304
309
 
310
+ /**
311
+ * @license
312
+ * Copyright 2024 Google LLC
313
+ *
314
+ * Licensed under the Apache License, Version 2.0 (the "License");
315
+ * you may not use this file except in compliance with the License.
316
+ * You may obtain a copy of the License at
317
+ *
318
+ * http://www.apache.org/licenses/LICENSE-2.0
319
+ *
320
+ * Unless required by applicable law or agreed to in writing, software
321
+ * distributed under the License is distributed on an "AS IS" BASIS,
322
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
323
+ * See the License for the specific language governing permissions and
324
+ * limitations under the License.
325
+ */
326
+ /**
327
+ * The types of responses that can be returned by {@link LiveSession.receive}.
328
+ *
329
+ * @beta
330
+ */
331
+ const LiveResponseType = {
332
+ SERVER_CONTENT: 'serverContent',
333
+ TOOL_CALL: 'toolCall',
334
+ TOOL_CALL_CANCELLATION: 'toolCallCancellation'
335
+ };
336
+
305
337
  /**
306
338
  * @license
307
339
  * Copyright 2024 Google LLC
@@ -332,6 +364,8 @@ const AIErrorCode = {
332
364
  RESPONSE_ERROR: 'response-error',
333
365
  /** An error occurred while performing a fetch. */
334
366
  FETCH_ERROR: 'fetch-error',
367
+ /** An error occurred because an operation was attempted on a closed session. */
368
+ SESSION_CLOSED: 'session-closed',
335
369
  /** An error associated with a Content object. */
336
370
  INVALID_CONTENT: 'invalid-content',
337
371
  /** An error due to the Firebase API not being enabled in the Console. */
@@ -636,9 +670,10 @@ class VertexAIBackend extends Backend {
636
670
  * limitations under the License.
637
671
  */
638
672
  class AIService {
639
- constructor(app, backend, authProvider, appCheckProvider) {
673
+ constructor(app, backend, authProvider, appCheckProvider, chromeAdapterFactory) {
640
674
  this.app = app;
641
675
  this.backend = backend;
676
+ this.chromeAdapterFactory = chromeAdapterFactory;
642
677
  const appCheck = appCheckProvider?.getImmediate({ optional: true });
643
678
  const auth = authProvider?.getImmediate({ optional: true });
644
679
  this.auth = auth || null;
@@ -653,6 +688,12 @@ class AIService {
653
688
  _delete() {
654
689
  return Promise.resolve();
655
690
  }
691
+ set options(optionsToSet) {
692
+ this._options = optionsToSet;
693
+ }
694
+ get options() {
695
+ return this._options;
696
+ }
656
697
  }
657
698
 
658
699
  /**
@@ -837,7 +878,12 @@ class AIModel {
837
878
  };
838
879
  }
839
880
  else if (ai.appCheck) {
840
- this._apiSettings.getAppCheckToken = () => ai.appCheck.getToken();
881
+ if (ai.options?.useLimitedUseAppCheckTokens) {
882
+ this._apiSettings.getAppCheckToken = () => ai.appCheck.getLimitedUseToken();
883
+ }
884
+ else {
885
+ this._apiSettings.getAppCheckToken = () => ai.appCheck.getToken();
886
+ }
841
887
  }
842
888
  if (ai.auth) {
843
889
  this._apiSettings.getAuthToken = () => ai.auth.getToken();
@@ -946,7 +992,7 @@ class RequestUrl {
946
992
  return url.toString();
947
993
  }
948
994
  get baseUrl() {
949
- return this.requestOptions?.baseUrl || DEFAULT_BASE_URL;
995
+ return this.requestOptions?.baseUrl || `https://${DEFAULT_DOMAIN}`;
950
996
  }
951
997
  get apiVersion() {
952
998
  return DEFAULT_API_VERSION; // TODO: allow user-set options if that feature becomes available
@@ -970,6 +1016,27 @@ class RequestUrl {
970
1016
  return params;
971
1017
  }
972
1018
  }
1019
+ class WebSocketUrl {
1020
+ constructor(apiSettings) {
1021
+ this.apiSettings = apiSettings;
1022
+ }
1023
+ toString() {
1024
+ const url = new URL(`wss://${DEFAULT_DOMAIN}`);
1025
+ url.pathname = this.pathname;
1026
+ const queryParams = new URLSearchParams();
1027
+ queryParams.set('key', this.apiSettings.apiKey);
1028
+ url.search = queryParams.toString();
1029
+ return url.toString();
1030
+ }
1031
+ get pathname() {
1032
+ if (this.apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
1033
+ return 'ws/google.firebase.vertexai.v1beta.GenerativeService/BidiGenerateContent';
1034
+ }
1035
+ else {
1036
+ return `ws/google.firebase.vertexai.v1beta.LlmBidiService/BidiGenerateContent/locations/${this.apiSettings.location}`;
1037
+ }
1038
+ }
1039
+ }
973
1040
  /**
974
1041
  * Log language and "fire/version" to x-goog-api-client
975
1042
  */
@@ -1100,6 +1167,28 @@ async function makeRequest(model, task, apiSettings, stream, body, requestOption
1100
1167
  * See the License for the specific language governing permissions and
1101
1168
  * limitations under the License.
1102
1169
  */
1170
+ /**
1171
+ * Check that at least one candidate exists and does not have a bad
1172
+ * finish reason. Warns if multiple candidates exist.
1173
+ */
1174
+ function hasValidCandidates(response) {
1175
+ if (response.candidates && response.candidates.length > 0) {
1176
+ if (response.candidates.length > 1) {
1177
+ logger.warn(`This response had ${response.candidates.length} ` +
1178
+ `candidates. Returning text from the first candidate only. ` +
1179
+ `Access response.candidates directly to use the other candidates.`);
1180
+ }
1181
+ if (hadBadFinishReason(response.candidates[0])) {
1182
+ throw new AIError(AIErrorCode.RESPONSE_ERROR, `Response error: ${formatBlockErrorMessage(response)}. Response body stored in error.response`, {
1183
+ response
1184
+ });
1185
+ }
1186
+ return true;
1187
+ }
1188
+ else {
1189
+ return false;
1190
+ }
1191
+ }
1103
1192
  /**
1104
1193
  * Creates an EnhancedGenerateContentResponse object that has helper functions and
1105
1194
  * other modifications that improve usability.
@@ -1123,18 +1212,8 @@ function createEnhancedContentResponse(response) {
1123
1212
  */
1124
1213
  function addHelpers(response) {
1125
1214
  response.text = () => {
1126
- if (response.candidates && response.candidates.length > 0) {
1127
- if (response.candidates.length > 1) {
1128
- logger.warn(`This response had ${response.candidates.length} ` +
1129
- `candidates. Returning text from the first candidate only. ` +
1130
- `Access response.candidates directly to use the other candidates.`);
1131
- }
1132
- if (hadBadFinishReason(response.candidates[0])) {
1133
- throw new AIError(AIErrorCode.RESPONSE_ERROR, `Response error: ${formatBlockErrorMessage(response)}. Response body stored in error.response`, {
1134
- response
1135
- });
1136
- }
1137
- return getText(response);
1215
+ if (hasValidCandidates(response)) {
1216
+ return getText(response, part => !part.thought);
1138
1217
  }
1139
1218
  else if (response.promptFeedback) {
1140
1219
  throw new AIError(AIErrorCode.RESPONSE_ERROR, `Text not available. ${formatBlockErrorMessage(response)}`, {
@@ -1143,18 +1222,20 @@ function addHelpers(response) {
1143
1222
  }
1144
1223
  return '';
1145
1224
  };
1225
+ response.thoughtSummary = () => {
1226
+ if (hasValidCandidates(response)) {
1227
+ const result = getText(response, part => !!part.thought);
1228
+ return result === '' ? undefined : result;
1229
+ }
1230
+ else if (response.promptFeedback) {
1231
+ throw new AIError(AIErrorCode.RESPONSE_ERROR, `Thought summary not available. ${formatBlockErrorMessage(response)}`, {
1232
+ response
1233
+ });
1234
+ }
1235
+ return undefined;
1236
+ };
1146
1237
  response.inlineDataParts = () => {
1147
- if (response.candidates && response.candidates.length > 0) {
1148
- if (response.candidates.length > 1) {
1149
- logger.warn(`This response had ${response.candidates.length} ` +
1150
- `candidates. Returning data from the first candidate only. ` +
1151
- `Access response.candidates directly to use the other candidates.`);
1152
- }
1153
- if (hadBadFinishReason(response.candidates[0])) {
1154
- throw new AIError(AIErrorCode.RESPONSE_ERROR, `Response error: ${formatBlockErrorMessage(response)}. Response body stored in error.response`, {
1155
- response
1156
- });
1157
- }
1238
+ if (hasValidCandidates(response)) {
1158
1239
  return getInlineDataParts(response);
1159
1240
  }
1160
1241
  else if (response.promptFeedback) {
@@ -1165,17 +1246,7 @@ function addHelpers(response) {
1165
1246
  return undefined;
1166
1247
  };
1167
1248
  response.functionCalls = () => {
1168
- if (response.candidates && response.candidates.length > 0) {
1169
- if (response.candidates.length > 1) {
1170
- logger.warn(`This response had ${response.candidates.length} ` +
1171
- `candidates. Returning function calls from the first candidate only. ` +
1172
- `Access response.candidates directly to use the other candidates.`);
1173
- }
1174
- if (hadBadFinishReason(response.candidates[0])) {
1175
- throw new AIError(AIErrorCode.RESPONSE_ERROR, `Response error: ${formatBlockErrorMessage(response)}. Response body stored in error.response`, {
1176
- response
1177
- });
1178
- }
1249
+ if (hasValidCandidates(response)) {
1179
1250
  return getFunctionCalls(response);
1180
1251
  }
1181
1252
  else if (response.promptFeedback) {
@@ -1188,13 +1259,17 @@ function addHelpers(response) {
1188
1259
  return response;
1189
1260
  }
1190
1261
  /**
1191
- * Returns all text found in all parts of first candidate.
1262
+ * Returns all text from the first candidate's parts, filtering by whether
1263
+ * `partFilter()` returns true.
1264
+ *
1265
+ * @param response - The `GenerateContentResponse` from which to extract text.
1266
+ * @param partFilter - Only return `Part`s for which this returns true
1192
1267
  */
1193
- function getText(response) {
1268
+ function getText(response, partFilter) {
1194
1269
  const textStrings = [];
1195
1270
  if (response.candidates?.[0].content?.parts) {
1196
1271
  for (const part of response.candidates?.[0].content?.parts) {
1197
- if (part.text) {
1272
+ if (part.text && partFilter(part)) {
1198
1273
  textStrings.push(part.text);
1199
1274
  }
1200
1275
  }
@@ -1207,7 +1282,7 @@ function getText(response) {
1207
1282
  }
1208
1283
  }
1209
1284
  /**
1210
- * Returns {@link FunctionCall}s associated with first candidate.
1285
+ * Returns every {@link FunctionCall} associated with first candidate.
1211
1286
  */
1212
1287
  function getFunctionCalls(response) {
1213
1288
  const functionCalls = [];
@@ -1226,7 +1301,7 @@ function getFunctionCalls(response) {
1226
1301
  }
1227
1302
  }
1228
1303
  /**
1229
- * Returns {@link InlineDataPart}s in the first candidate if present.
1304
+ * Returns every {@link InlineDataPart} in the first candidate if present.
1230
1305
  *
1231
1306
  * @internal
1232
1307
  */
@@ -1305,8 +1380,9 @@ async function handlePredictResponse(response) {
1305
1380
  gcsURI: prediction.gcsUri
1306
1381
  });
1307
1382
  }
1383
+ else if (prediction.safetyAttributes) ;
1308
1384
  else {
1309
- throw new AIError(AIErrorCode.RESPONSE_ERROR, `Predictions array in response has missing properties. Response: ${JSON.stringify(responseJson)}`);
1385
+ throw new AIError(AIErrorCode.RESPONSE_ERROR, `Unexpected element in 'predictions' array in response: '${JSON.stringify(prediction)}'`);
1310
1386
  }
1311
1387
  }
1312
1388
  return { images, filteredReason };
@@ -1847,7 +1923,8 @@ function createPredictRequestBody(prompt, { gcsURI, imageFormat, addWatermark, n
1847
1923
  addWatermark,
1848
1924
  safetyFilterLevel,
1849
1925
  personGeneration: personFilterLevel,
1850
- includeRaiReason: true
1926
+ includeRaiReason: true,
1927
+ includeSafetyAttributes: true
1851
1928
  }
1852
1929
  };
1853
1930
  return body;
@@ -1874,12 +1951,14 @@ const VALID_PART_FIELDS = [
1874
1951
  'text',
1875
1952
  'inlineData',
1876
1953
  'functionCall',
1877
- 'functionResponse'
1954
+ 'functionResponse',
1955
+ 'thought',
1956
+ 'thoughtSignature'
1878
1957
  ];
1879
1958
  const VALID_PARTS_PER_ROLE = {
1880
1959
  user: ['text', 'inlineData'],
1881
1960
  function: ['functionResponse'],
1882
- model: ['text', 'functionCall'],
1961
+ model: ['text', 'functionCall', 'thought', 'thoughtSignature'],
1883
1962
  // System instructions shouldn't be in history anyway.
1884
1963
  system: ['text']
1885
1964
  };
@@ -1901,7 +1980,7 @@ function validateChatHistory(history) {
1901
1980
  throw new AIError(AIErrorCode.INVALID_CONTENT, `Each item should include role field. Got ${role} but valid roles are: ${JSON.stringify(POSSIBLE_ROLES)}`);
1902
1981
  }
1903
1982
  if (!Array.isArray(parts)) {
1904
- throw new AIError(AIErrorCode.INVALID_CONTENT, `Content should have 'parts' but property with an array of Parts`);
1983
+ throw new AIError(AIErrorCode.INVALID_CONTENT, `Content should have 'parts' property with an array of Parts`);
1905
1984
  }
1906
1985
  if (parts.length === 0) {
1907
1986
  throw new AIError(AIErrorCode.INVALID_CONTENT, `Each Content should have at least one part`);
@@ -1910,7 +1989,9 @@ function validateChatHistory(history) {
1910
1989
  text: 0,
1911
1990
  inlineData: 0,
1912
1991
  functionCall: 0,
1913
- functionResponse: 0
1992
+ functionResponse: 0,
1993
+ thought: 0,
1994
+ thoughtSignature: 0
1914
1995
  };
1915
1996
  for (const part of parts) {
1916
1997
  for (const key of VALID_PART_FIELDS) {
@@ -2208,6 +2289,270 @@ class GenerativeModel extends AIModel {
2208
2289
  }
2209
2290
  }
2210
2291
 
2292
+ /**
2293
+ * @license
2294
+ * Copyright 2025 Google LLC
2295
+ *
2296
+ * Licensed under the Apache License, Version 2.0 (the "License");
2297
+ * you may not use this file except in compliance with the License.
2298
+ * You may obtain a copy of the License at
2299
+ *
2300
+ * http://www.apache.org/licenses/LICENSE-2.0
2301
+ *
2302
+ * Unless required by applicable law or agreed to in writing, software
2303
+ * distributed under the License is distributed on an "AS IS" BASIS,
2304
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2305
+ * See the License for the specific language governing permissions and
2306
+ * limitations under the License.
2307
+ */
2308
+ /**
2309
+ * Represents an active, real-time, bidirectional conversation with the model.
2310
+ *
2311
+ * This class should only be instantiated by calling {@link LiveGenerativeModel.connect}.
2312
+ *
2313
+ * @beta
2314
+ */
2315
+ class LiveSession {
2316
+ /**
2317
+ * @internal
2318
+ */
2319
+ constructor(webSocketHandler, serverMessages) {
2320
+ this.webSocketHandler = webSocketHandler;
2321
+ this.serverMessages = serverMessages;
2322
+ /**
2323
+ * Indicates whether this Live session is closed.
2324
+ *
2325
+ * @beta
2326
+ */
2327
+ this.isClosed = false;
2328
+ /**
2329
+ * Indicates whether this Live session is being controlled by an `AudioConversationController`.
2330
+ *
2331
+ * @beta
2332
+ */
2333
+ this.inConversation = false;
2334
+ }
2335
+ /**
2336
+ * Sends content to the server.
2337
+ *
2338
+ * @param request - The message to send to the model.
2339
+ * @param turnComplete - Indicates if the turn is complete. Defaults to false.
2340
+ * @throws If this session has been closed.
2341
+ *
2342
+ * @beta
2343
+ */
2344
+ async send(request, turnComplete = true) {
2345
+ if (this.isClosed) {
2346
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2347
+ }
2348
+ const newContent = formatNewContent(request);
2349
+ const message = {
2350
+ clientContent: {
2351
+ turns: [newContent],
2352
+ turnComplete
2353
+ }
2354
+ };
2355
+ this.webSocketHandler.send(JSON.stringify(message));
2356
+ }
2357
+ /**
2358
+ * Sends realtime input to the server.
2359
+ *
2360
+ * @param mediaChunks - The media chunks to send.
2361
+ * @throws If this session has been closed.
2362
+ *
2363
+ * @beta
2364
+ */
2365
+ async sendMediaChunks(mediaChunks) {
2366
+ if (this.isClosed) {
2367
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2368
+ }
2369
+ // The backend does not support sending more than one mediaChunk in one message.
2370
+ // Work around this limitation by sending mediaChunks in separate messages.
2371
+ mediaChunks.forEach(mediaChunk => {
2372
+ const message = {
2373
+ realtimeInput: { mediaChunks: [mediaChunk] }
2374
+ };
2375
+ this.webSocketHandler.send(JSON.stringify(message));
2376
+ });
2377
+ }
2378
+ /**
2379
+ * Sends a stream of {@link GenerativeContentBlob}.
2380
+ *
2381
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2382
+ * @throws If this session has been closed.
2383
+ *
2384
+ * @beta
2385
+ */
2386
+ async sendMediaStream(mediaChunkStream) {
2387
+ if (this.isClosed) {
2388
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2389
+ }
2390
+ const reader = mediaChunkStream.getReader();
2391
+ while (true) {
2392
+ try {
2393
+ const { done, value } = await reader.read();
2394
+ if (done) {
2395
+ break;
2396
+ }
2397
+ else if (!value) {
2398
+ throw new Error('Missing chunk in reader, but reader is not done.');
2399
+ }
2400
+ await this.sendMediaChunks([value]);
2401
+ }
2402
+ catch (e) {
2403
+ // Re-throw any errors that occur during stream consumption or sending.
2404
+ const message = e instanceof Error ? e.message : 'Error processing media stream.';
2405
+ throw new AIError(AIErrorCode.REQUEST_ERROR, message);
2406
+ }
2407
+ }
2408
+ }
2409
+ /**
2410
+ * Yields messages received from the server.
2411
+ * This can only be used by one consumer at a time.
2412
+ *
2413
+ * @returns An `AsyncGenerator` that yields server messages as they arrive.
2414
+ * @throws If the session is already closed, or if we receive a response that we don't support.
2415
+ *
2416
+ * @beta
2417
+ */
2418
+ async *receive() {
2419
+ if (this.isClosed) {
2420
+ throw new AIError(AIErrorCode.SESSION_CLOSED, 'Cannot read from a Live session that is closed. Try starting a new Live session.');
2421
+ }
2422
+ for await (const message of this.serverMessages) {
2423
+ if (message && typeof message === 'object') {
2424
+ if (LiveResponseType.SERVER_CONTENT in message) {
2425
+ yield {
2426
+ type: 'serverContent',
2427
+ ...message
2428
+ .serverContent
2429
+ };
2430
+ }
2431
+ else if (LiveResponseType.TOOL_CALL in message) {
2432
+ yield {
2433
+ type: 'toolCall',
2434
+ ...message
2435
+ .toolCall
2436
+ };
2437
+ }
2438
+ else if (LiveResponseType.TOOL_CALL_CANCELLATION in message) {
2439
+ yield {
2440
+ type: 'toolCallCancellation',
2441
+ ...message.toolCallCancellation
2442
+ };
2443
+ }
2444
+ else {
2445
+ logger.warn(`Received an unknown message type from the server: ${JSON.stringify(message)}`);
2446
+ }
2447
+ }
2448
+ else {
2449
+ logger.warn(`Received an invalid message from the server: ${JSON.stringify(message)}`);
2450
+ }
2451
+ }
2452
+ }
2453
+ /**
2454
+ * Closes this session.
2455
+ * All methods on this session will throw an error once this resolves.
2456
+ *
2457
+ * @beta
2458
+ */
2459
+ async close() {
2460
+ if (!this.isClosed) {
2461
+ this.isClosed = true;
2462
+ await this.webSocketHandler.close(1000, 'Client closed session.');
2463
+ }
2464
+ }
2465
+ }
2466
+
2467
+ /**
2468
+ * @license
2469
+ * Copyright 2025 Google LLC
2470
+ *
2471
+ * Licensed under the Apache License, Version 2.0 (the "License");
2472
+ * you may not use this file except in compliance with the License.
2473
+ * You may obtain a copy of the License at
2474
+ *
2475
+ * http://www.apache.org/licenses/LICENSE-2.0
2476
+ *
2477
+ * Unless required by applicable law or agreed to in writing, software
2478
+ * distributed under the License is distributed on an "AS IS" BASIS,
2479
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2480
+ * See the License for the specific language governing permissions and
2481
+ * limitations under the License.
2482
+ */
2483
+ /**
2484
+ * Class for Live generative model APIs. The Live API enables low-latency, two-way multimodal
2485
+ * interactions with Gemini.
2486
+ *
2487
+ * This class should only be instantiated with {@link getLiveGenerativeModel}.
2488
+ *
2489
+ * @beta
2490
+ */
2491
+ class LiveGenerativeModel extends AIModel {
2492
+ /**
2493
+ * @internal
2494
+ */
2495
+ constructor(ai, modelParams,
2496
+ /**
2497
+ * @internal
2498
+ */
2499
+ _webSocketHandler) {
2500
+ super(ai, modelParams.model);
2501
+ this._webSocketHandler = _webSocketHandler;
2502
+ this.generationConfig = modelParams.generationConfig || {};
2503
+ this.tools = modelParams.tools;
2504
+ this.toolConfig = modelParams.toolConfig;
2505
+ this.systemInstruction = formatSystemInstruction(modelParams.systemInstruction);
2506
+ }
2507
+ /**
2508
+ * Starts a {@link LiveSession}.
2509
+ *
2510
+ * @returns A {@link LiveSession}.
2511
+ * @throws If the connection failed to be established with the server.
2512
+ *
2513
+ * @beta
2514
+ */
2515
+ async connect() {
2516
+ const url = new WebSocketUrl(this._apiSettings);
2517
+ await this._webSocketHandler.connect(url.toString());
2518
+ let fullModelPath;
2519
+ if (this._apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
2520
+ fullModelPath = `projects/${this._apiSettings.project}/${this.model}`;
2521
+ }
2522
+ else {
2523
+ fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
2524
+ }
2525
+ const setupMessage = {
2526
+ setup: {
2527
+ model: fullModelPath,
2528
+ generationConfig: this.generationConfig,
2529
+ tools: this.tools,
2530
+ toolConfig: this.toolConfig,
2531
+ systemInstruction: this.systemInstruction
2532
+ }
2533
+ };
2534
+ try {
2535
+ // Begin listening for server messages, and begin the handshake by sending the 'setupMessage'
2536
+ const serverMessages = this._webSocketHandler.listen();
2537
+ this._webSocketHandler.send(JSON.stringify(setupMessage));
2538
+ // Verify we received the handshake response 'setupComplete'
2539
+ const firstMessage = (await serverMessages.next()).value;
2540
+ if (!firstMessage ||
2541
+ !(typeof firstMessage === 'object') ||
2542
+ !('setupComplete' in firstMessage)) {
2543
+ await this._webSocketHandler.close(1011, 'Handshake failure');
2544
+ throw new AIError(AIErrorCode.RESPONSE_ERROR, 'Server connection handshake failed. The server did not respond with a setupComplete message.');
2545
+ }
2546
+ return new LiveSession(this._webSocketHandler, serverMessages);
2547
+ }
2548
+ catch (e) {
2549
+ // Ensure connection is closed on any setup error
2550
+ await this._webSocketHandler.close();
2551
+ throw e;
2552
+ }
2553
+ }
2554
+ }
2555
+
2211
2556
  /**
2212
2557
  * @license
2213
2558
  * Copyright 2025 Google LLC
@@ -2322,17 +2667,6 @@ class ImagenModel extends AIModel {
2322
2667
  }
2323
2668
  }
2324
2669
 
2325
- /**
2326
- * @internal
2327
- */
2328
- var Availability;
2329
- (function (Availability) {
2330
- Availability["UNAVAILABLE"] = "unavailable";
2331
- Availability["DOWNLOADABLE"] = "downloadable";
2332
- Availability["DOWNLOADING"] = "downloading";
2333
- Availability["AVAILABLE"] = "available";
2334
- })(Availability || (Availability = {}));
2335
-
2336
2670
  /**
2337
2671
  * @license
2338
2672
  * Copyright 2025 Google LLC
@@ -2350,261 +2684,135 @@ var Availability;
2350
2684
  * limitations under the License.
2351
2685
  */
2352
2686
  /**
2353
- * Defines an inference "backend" that uses Chrome's on-device model,
2354
- * and encapsulates logic for detecting when on-device inference is
2355
- * possible.
2687
+ * A wrapper for the native `WebSocket` available in both Browsers and Node >= 22.
2688
+ *
2689
+ * @internal
2356
2690
  */
2357
- class ChromeAdapterImpl {
2358
- constructor(languageModelProvider, mode, onDeviceParams = {
2359
- createOptions: {
2360
- // Defaults to support image inputs for convenience.
2361
- expectedInputs: [{ type: 'image' }]
2362
- }
2363
- }) {
2364
- this.languageModelProvider = languageModelProvider;
2365
- this.mode = mode;
2366
- this.onDeviceParams = onDeviceParams;
2367
- this.isDownloading = false;
2691
+ class WebSocketHandlerImpl {
2692
+ constructor() {
2693
+ if (typeof WebSocket === 'undefined') {
2694
+ throw new AIError(AIErrorCode.UNSUPPORTED, 'The WebSocket API is not available in this environment. ' +
2695
+ 'The "Live" feature is not supported here. It is supported in ' +
2696
+ 'modern browser windows, Web Workers with WebSocket support, and Node >= 22.');
2697
+ }
2698
+ }
2699
+ connect(url) {
2700
+ return new Promise((resolve, reject) => {
2701
+ this.ws = new WebSocket(url);
2702
+ this.ws.binaryType = 'blob'; // Only important to set in Node
2703
+ this.ws.addEventListener('open', () => resolve(), { once: true });
2704
+ this.ws.addEventListener('error', () => reject(new AIError(AIErrorCode.FETCH_ERROR, `Error event raised on WebSocket`)), { once: true });
2705
+ this.ws.addEventListener('close', (closeEvent) => {
2706
+ if (closeEvent.reason) {
2707
+ logger.warn(`WebSocket connection closed by server. Reason: '${closeEvent.reason}'`);
2708
+ }
2709
+ });
2710
+ });
2368
2711
  }
2369
- /**
2370
- * Checks if a given request can be made on-device.
2371
- *
2372
- * <ol>Encapsulates a few concerns:
2373
- * <li>the mode</li>
2374
- * <li>API existence</li>
2375
- * <li>prompt formatting</li>
2376
- * <li>model availability, including triggering download if necessary</li>
2377
- * </ol>
2378
- *
2379
- * <p>Pros: callers needn't be concerned with details of on-device availability.</p>
2380
- * <p>Cons: this method spans a few concerns and splits request validation from usage.
2381
- * If instance variables weren't already part of the API, we could consider a better
2382
- * separation of concerns.</p>
2383
- */
2384
- async isAvailable(request) {
2385
- if (!this.mode) {
2386
- logger.debug(`On-device inference unavailable because mode is undefined.`);
2387
- return false;
2388
- }
2389
- if (this.mode === InferenceMode.ONLY_IN_CLOUD) {
2390
- logger.debug(`On-device inference unavailable because mode is "only_in_cloud".`);
2391
- return false;
2392
- }
2393
- // Triggers out-of-band download so model will eventually become available.
2394
- const availability = await this.downloadIfAvailable();
2395
- if (this.mode === InferenceMode.ONLY_ON_DEVICE) {
2396
- // If it will never be available due to API inavailability, throw.
2397
- if (availability === Availability.UNAVAILABLE) {
2398
- throw new AIError(AIErrorCode.API_NOT_ENABLED, 'Local LanguageModel API not available in this environment.');
2399
- }
2400
- else if (availability === Availability.DOWNLOADABLE ||
2401
- availability === Availability.DOWNLOADING) {
2402
- // TODO(chholland): Better user experience during download - progress?
2403
- logger.debug(`Waiting for download of LanguageModel to complete.`);
2404
- await this.downloadPromise;
2405
- return true;
2406
- }
2407
- return true;
2712
+ send(data) {
2713
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
2714
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'WebSocket is not open.');
2408
2715
  }
2409
- // Applies prefer_on_device logic.
2410
- if (availability !== Availability.AVAILABLE) {
2411
- logger.debug(`On-device inference unavailable because availability is "${availability}".`);
2412
- return false;
2413
- }
2414
- if (!ChromeAdapterImpl.isOnDeviceRequest(request)) {
2415
- logger.debug(`On-device inference unavailable because request is incompatible.`);
2416
- return false;
2417
- }
2418
- return true;
2419
- }
2420
- /**
2421
- * Generates content on device.
2422
- *
2423
- * <p>This is comparable to {@link GenerativeModel.generateContent} for generating content in
2424
- * Cloud.</p>
2425
- * @param request - a standard Firebase AI {@link GenerateContentRequest}
2426
- * @returns {@link Response}, so we can reuse common response formatting.
2427
- */
2428
- async generateContent(request) {
2429
- const session = await this.createSession();
2430
- const contents = await Promise.all(request.contents.map(ChromeAdapterImpl.toLanguageModelMessage));
2431
- const text = await session.prompt(contents, this.onDeviceParams.promptOptions);
2432
- return ChromeAdapterImpl.toResponse(text);
2433
- }
2434
- /**
2435
- * Generates content stream on device.
2436
- *
2437
- * <p>This is comparable to {@link GenerativeModel.generateContentStream} for generating content in
2438
- * Cloud.</p>
2439
- * @param request - a standard Firebase AI {@link GenerateContentRequest}
2440
- * @returns {@link Response}, so we can reuse common response formatting.
2441
- */
2442
- async generateContentStream(request) {
2443
- const session = await this.createSession();
2444
- const contents = await Promise.all(request.contents.map(ChromeAdapterImpl.toLanguageModelMessage));
2445
- const stream = session.promptStreaming(contents, this.onDeviceParams.promptOptions);
2446
- return ChromeAdapterImpl.toStreamResponse(stream);
2716
+ this.ws.send(data);
2447
2717
  }
2448
- async countTokens(_request) {
2449
- throw new AIError(AIErrorCode.REQUEST_ERROR, 'Count Tokens is not yet available for on-device model.');
2450
- }
2451
- /**
2452
- * Asserts inference for the given request can be performed by an on-device model.
2453
- */
2454
- static isOnDeviceRequest(request) {
2455
- // Returns false if the prompt is empty.
2456
- if (request.contents.length === 0) {
2457
- logger.debug('Empty prompt rejected for on-device inference.');
2458
- return false;
2718
+ async *listen() {
2719
+ if (!this.ws) {
2720
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'WebSocket is not connected.');
2459
2721
  }
2460
- for (const content of request.contents) {
2461
- if (content.role === 'function') {
2462
- logger.debug(`"Function" role rejected for on-device inference.`);
2463
- return false;
2722
+ const messageQueue = [];
2723
+ const errorQueue = [];
2724
+ let resolvePromise = null;
2725
+ let isClosed = false;
2726
+ const messageListener = async (event) => {
2727
+ let data;
2728
+ if (event.data instanceof Blob) {
2729
+ data = await event.data.text();
2464
2730
  }
2465
- // Returns false if request contains an image with an unsupported mime type.
2466
- for (const part of content.parts) {
2467
- if (part.inlineData &&
2468
- ChromeAdapterImpl.SUPPORTED_MIME_TYPES.indexOf(part.inlineData.mimeType) === -1) {
2469
- logger.debug(`Unsupported mime type "${part.inlineData.mimeType}" rejected for on-device inference.`);
2470
- return false;
2731
+ else if (typeof event.data === 'string') {
2732
+ data = event.data;
2733
+ }
2734
+ else {
2735
+ errorQueue.push(new AIError(AIErrorCode.PARSE_FAILED, `Failed to parse WebSocket response. Expected data to be a Blob or string, but was ${typeof event.data}.`));
2736
+ if (resolvePromise) {
2737
+ resolvePromise();
2738
+ resolvePromise = null;
2471
2739
  }
2740
+ return;
2741
+ }
2742
+ try {
2743
+ const obj = JSON.parse(data);
2744
+ messageQueue.push(obj);
2745
+ }
2746
+ catch (e) {
2747
+ const err = e;
2748
+ errorQueue.push(new AIError(AIErrorCode.PARSE_FAILED, `Error parsing WebSocket message to JSON: ${err.message}`));
2749
+ }
2750
+ if (resolvePromise) {
2751
+ resolvePromise();
2752
+ resolvePromise = null;
2472
2753
  }
2473
- }
2474
- return true;
2475
- }
2476
- /**
2477
- * Encapsulates logic to get availability and download a model if one is downloadable.
2478
- */
2479
- async downloadIfAvailable() {
2480
- const availability = await this.languageModelProvider?.availability(this.onDeviceParams.createOptions);
2481
- if (availability === Availability.DOWNLOADABLE) {
2482
- this.download();
2483
- }
2484
- return availability;
2485
- }
2486
- /**
2487
- * Triggers out-of-band download of an on-device model.
2488
- *
2489
- * <p>Chrome only downloads models as needed. Chrome knows a model is needed when code calls
2490
- * LanguageModel.create.</p>
2491
- *
2492
- * <p>Since Chrome manages the download, the SDK can only avoid redundant download requests by
2493
- * tracking if a download has previously been requested.</p>
2494
- */
2495
- download() {
2496
- if (this.isDownloading) {
2497
- return;
2498
- }
2499
- this.isDownloading = true;
2500
- this.downloadPromise = this.languageModelProvider
2501
- ?.create(this.onDeviceParams.createOptions)
2502
- .finally(() => {
2503
- this.isDownloading = false;
2504
- });
2505
- }
2506
- /**
2507
- * Converts Firebase AI {@link Content} object to a Chrome {@link LanguageModelMessage} object.
2508
- */
2509
- static async toLanguageModelMessage(content) {
2510
- const languageModelMessageContents = await Promise.all(content.parts.map(ChromeAdapterImpl.toLanguageModelMessageContent));
2511
- return {
2512
- role: ChromeAdapterImpl.toLanguageModelMessageRole(content.role),
2513
- content: languageModelMessageContents
2514
2754
  };
2515
- }
2516
- /**
2517
- * Converts a Firebase AI Part object to a Chrome LanguageModelMessageContent object.
2518
- */
2519
- static async toLanguageModelMessageContent(part) {
2520
- if (part.text) {
2521
- return {
2522
- type: 'text',
2523
- value: part.text
2524
- };
2525
- }
2526
- else if (part.inlineData) {
2527
- const formattedImageContent = await fetch(`data:${part.inlineData.mimeType};base64,${part.inlineData.data}`);
2528
- const imageBlob = await formattedImageContent.blob();
2529
- const imageBitmap = await createImageBitmap(imageBlob);
2530
- return {
2531
- type: 'image',
2532
- value: imageBitmap
2533
- };
2534
- }
2535
- throw new AIError(AIErrorCode.REQUEST_ERROR, `Processing of this Part type is not currently supported.`);
2536
- }
2537
- /**
2538
- * Converts a Firebase AI {@link Role} string to a {@link LanguageModelMessageRole} string.
2539
- */
2540
- static toLanguageModelMessageRole(role) {
2541
- // Assumes 'function' rule has been filtered by isOnDeviceRequest
2542
- return role === 'model' ? 'assistant' : 'user';
2543
- }
2544
- /**
2545
- * Abstracts Chrome session creation.
2546
- *
2547
- * <p>Chrome uses a multi-turn session for all inference. Firebase AI uses single-turn for all
2548
- * inference. To map the Firebase AI API to Chrome's API, the SDK creates a new session for all
2549
- * inference.</p>
2550
- *
2551
- * <p>Chrome will remove a model from memory if it's no longer in use, so this method ensures a
2552
- * new session is created before an old session is destroyed.</p>
2553
- */
2554
- async createSession() {
2555
- if (!this.languageModelProvider) {
2556
- throw new AIError(AIErrorCode.UNSUPPORTED, 'Chrome AI requested for unsupported browser version.');
2755
+ const errorListener = () => {
2756
+ errorQueue.push(new AIError(AIErrorCode.FETCH_ERROR, 'WebSocket connection error.'));
2757
+ if (resolvePromise) {
2758
+ resolvePromise();
2759
+ resolvePromise = null;
2760
+ }
2761
+ };
2762
+ const closeListener = (event) => {
2763
+ if (event.reason) {
2764
+ logger.warn(`WebSocket connection closed by the server with reason: ${event.reason}`);
2765
+ }
2766
+ isClosed = true;
2767
+ if (resolvePromise) {
2768
+ resolvePromise();
2769
+ resolvePromise = null;
2770
+ }
2771
+ // Clean up listeners to prevent memory leaks
2772
+ this.ws?.removeEventListener('message', messageListener);
2773
+ this.ws?.removeEventListener('close', closeListener);
2774
+ this.ws?.removeEventListener('error', errorListener);
2775
+ };
2776
+ this.ws.addEventListener('message', messageListener);
2777
+ this.ws.addEventListener('close', closeListener);
2778
+ this.ws.addEventListener('error', errorListener);
2779
+ while (!isClosed) {
2780
+ if (errorQueue.length > 0) {
2781
+ const error = errorQueue.shift();
2782
+ throw error;
2783
+ }
2784
+ if (messageQueue.length > 0) {
2785
+ yield messageQueue.shift();
2786
+ }
2787
+ else {
2788
+ await new Promise(resolve => {
2789
+ resolvePromise = resolve;
2790
+ });
2791
+ }
2557
2792
  }
2558
- const newSession = await this.languageModelProvider.create(this.onDeviceParams.createOptions);
2559
- if (this.oldSession) {
2560
- this.oldSession.destroy();
2793
+ // If the loop terminated because isClosed is true, check for any final errors
2794
+ if (errorQueue.length > 0) {
2795
+ const error = errorQueue.shift();
2796
+ throw error;
2561
2797
  }
2562
- // Holds session reference, so model isn't unloaded from memory.
2563
- this.oldSession = newSession;
2564
- return newSession;
2565
- }
2566
- /**
2567
- * Formats string returned by Chrome as a {@link Response} returned by Firebase AI.
2568
- */
2569
- static toResponse(text) {
2570
- return {
2571
- json: async () => ({
2572
- candidates: [
2573
- {
2574
- content: {
2575
- parts: [{ text }]
2576
- }
2577
- }
2578
- ]
2579
- })
2580
- };
2581
2798
  }
2582
- /**
2583
- * Formats string stream returned by Chrome as SSE returned by Firebase AI.
2584
- */
2585
- static toStreamResponse(stream) {
2586
- const encoder = new TextEncoder();
2587
- return {
2588
- body: stream.pipeThrough(new TransformStream({
2589
- transform(chunk, controller) {
2590
- const json = JSON.stringify({
2591
- candidates: [
2592
- {
2593
- content: {
2594
- role: 'model',
2595
- parts: [{ text: chunk }]
2596
- }
2597
- }
2598
- ]
2599
- });
2600
- controller.enqueue(encoder.encode(`data: ${json}\n\n`));
2601
- }
2602
- }))
2603
- };
2799
+ close(code, reason) {
2800
+ return new Promise(resolve => {
2801
+ if (!this.ws) {
2802
+ return resolve();
2803
+ }
2804
+ this.ws.addEventListener('close', () => resolve(), { once: true });
2805
+ // Calling 'close' during these states results in an error.
2806
+ if (this.ws.readyState === WebSocket.CLOSED ||
2807
+ this.ws.readyState === WebSocket.CONNECTING) {
2808
+ return resolve();
2809
+ }
2810
+ if (this.ws.readyState !== WebSocket.CLOSING) {
2811
+ this.ws.close(code, reason);
2812
+ }
2813
+ });
2604
2814
  }
2605
2815
  }
2606
- // Visible for testing
2607
- ChromeAdapterImpl.SUPPORTED_MIME_TYPES = ['image/jpeg', 'image/png'];
2608
2816
 
2609
2817
  /**
2610
2818
  * @license
@@ -2913,6 +3121,364 @@ class ImagenImageFormat {
2913
3121
  }
2914
3122
  }
2915
3123
 
3124
+ /**
3125
+ * @license
3126
+ * Copyright 2025 Google LLC
3127
+ *
3128
+ * Licensed under the Apache License, Version 2.0 (the "License");
3129
+ * you may not use this file except in compliance with the License.
3130
+ * You may obtain a copy of the License at
3131
+ *
3132
+ * http://www.apache.org/licenses/LICENSE-2.0
3133
+ *
3134
+ * Unless required by applicable law or agreed to in writing, software
3135
+ * distributed under the License is distributed on an "AS IS" BASIS,
3136
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3137
+ * See the License for the specific language governing permissions and
3138
+ * limitations under the License.
3139
+ */
3140
+ const SERVER_INPUT_SAMPLE_RATE = 16000;
3141
+ const SERVER_OUTPUT_SAMPLE_RATE = 24000;
3142
+ const AUDIO_PROCESSOR_NAME = 'audio-processor';
3143
+ /**
3144
+ * The JS for an `AudioWorkletProcessor`.
3145
+ * This processor is responsible for taking raw audio from the microphone,
3146
+ * converting it to the required 16-bit 16kHz PCM, and posting it back to the main thread.
3147
+ *
3148
+ * See: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletProcessor
3149
+ *
3150
+ * It is defined as a string here so that it can be converted into a `Blob`
3151
+ * and loaded at runtime.
3152
+ */
3153
+ const audioProcessorWorkletString = `
3154
+ class AudioProcessor extends AudioWorkletProcessor {
3155
+ constructor(options) {
3156
+ super();
3157
+ this.targetSampleRate = options.processorOptions.targetSampleRate;
3158
+ // 'sampleRate' is a global variable available inside the AudioWorkletGlobalScope,
3159
+ // representing the native sample rate of the AudioContext.
3160
+ this.inputSampleRate = sampleRate;
3161
+ }
3162
+
3163
+ /**
3164
+ * This method is called by the browser's audio engine for each block of audio data.
3165
+ * Input is a single input, with a single channel (input[0][0]).
3166
+ */
3167
+ process(inputs) {
3168
+ const input = inputs[0];
3169
+ if (input && input.length > 0 && input[0].length > 0) {
3170
+ const pcmData = input[0]; // Float32Array of raw audio samples.
3171
+
3172
+ // Simple linear interpolation for resampling.
3173
+ const resampled = new Float32Array(Math.round(pcmData.length * this.targetSampleRate / this.inputSampleRate));
3174
+ const ratio = pcmData.length / resampled.length;
3175
+ for (let i = 0; i < resampled.length; i++) {
3176
+ resampled[i] = pcmData[Math.floor(i * ratio)];
3177
+ }
3178
+
3179
+ // Convert Float32 (-1, 1) samples to Int16 (-32768, 32767)
3180
+ const resampledInt16 = new Int16Array(resampled.length);
3181
+ for (let i = 0; i < resampled.length; i++) {
3182
+ const sample = Math.max(-1, Math.min(1, resampled[i]));
3183
+ if (sample < 0) {
3184
+ resampledInt16[i] = sample * 32768;
3185
+ } else {
3186
+ resampledInt16[i] = sample * 32767;
3187
+ }
3188
+ }
3189
+
3190
+ this.port.postMessage(resampledInt16);
3191
+ }
3192
+ // Return true to keep the processor alive and processing the next audio block.
3193
+ return true;
3194
+ }
3195
+ }
3196
+
3197
+ // Register the processor with a name that can be used to instantiate it from the main thread.
3198
+ registerProcessor('${AUDIO_PROCESSOR_NAME}', AudioProcessor);
3199
+ `;
3200
+ /**
3201
+ * Encapsulates the core logic of an audio conversation.
3202
+ *
3203
+ * @internal
3204
+ */
3205
+ class AudioConversationRunner {
3206
+ constructor(liveSession, options, deps) {
3207
+ this.liveSession = liveSession;
3208
+ this.options = options;
3209
+ this.deps = deps;
3210
+ /** A flag to indicate if the conversation has been stopped. */
3211
+ this.isStopped = false;
3212
+ /** A deferred that contains a promise that is resolved when stop() is called, to unblock the receive loop. */
3213
+ this.stopDeferred = new Deferred();
3214
+ /** A FIFO queue of 24kHz, 16-bit PCM audio chunks received from the server. */
3215
+ this.playbackQueue = [];
3216
+ /** Tracks scheduled audio sources. Used to cancel scheduled audio when the model is interrupted. */
3217
+ this.scheduledSources = [];
3218
+ /** A high-precision timeline pointer for scheduling gapless audio playback. */
3219
+ this.nextStartTime = 0;
3220
+ /** A mutex to prevent the playback processing loop from running multiple times concurrently. */
3221
+ this.isPlaybackLoopRunning = false;
3222
+ this.liveSession.inConversation = true;
3223
+ // Start listening for messages from the server.
3224
+ this.receiveLoopPromise = this.runReceiveLoop().finally(() => this.cleanup());
3225
+ // Set up the handler for receiving processed audio data from the worklet.
3226
+ // Message data has been resampled to 16kHz 16-bit PCM.
3227
+ this.deps.workletNode.port.onmessage = event => {
3228
+ if (this.isStopped) {
3229
+ return;
3230
+ }
3231
+ const pcm16 = event.data;
3232
+ const base64 = btoa(String.fromCharCode.apply(null, Array.from(new Uint8Array(pcm16.buffer))));
3233
+ const chunk = {
3234
+ mimeType: 'audio/pcm',
3235
+ data: base64
3236
+ };
3237
+ void this.liveSession.sendMediaChunks([chunk]);
3238
+ };
3239
+ }
3240
+ /**
3241
+ * Stops the conversation and unblocks the main receive loop.
3242
+ */
3243
+ async stop() {
3244
+ if (this.isStopped) {
3245
+ return;
3246
+ }
3247
+ this.isStopped = true;
3248
+ this.stopDeferred.resolve(); // Unblock the receive loop
3249
+ await this.receiveLoopPromise; // Wait for the loop and cleanup to finish
3250
+ }
3251
+ /**
3252
+ * Cleans up all audio resources (nodes, stream tracks, context) and marks the
3253
+ * session as no longer in a conversation.
3254
+ */
3255
+ cleanup() {
3256
+ this.interruptPlayback(); // Ensure all audio is stopped on final cleanup.
3257
+ this.deps.workletNode.port.onmessage = null;
3258
+ this.deps.workletNode.disconnect();
3259
+ this.deps.sourceNode.disconnect();
3260
+ this.deps.mediaStream.getTracks().forEach(track => track.stop());
3261
+ if (this.deps.audioContext.state !== 'closed') {
3262
+ void this.deps.audioContext.close();
3263
+ }
3264
+ this.liveSession.inConversation = false;
3265
+ }
3266
+ /**
3267
+ * Adds audio data to the queue and ensures the playback loop is running.
3268
+ */
3269
+ enqueueAndPlay(audioData) {
3270
+ this.playbackQueue.push(audioData);
3271
+ // Will no-op if it's already running.
3272
+ void this.processPlaybackQueue();
3273
+ }
3274
+ /**
3275
+ * Stops all current and pending audio playback and clears the queue. This is
3276
+ * called when the server indicates the model's speech was interrupted with
3277
+ * `LiveServerContent.modelTurn.interrupted`.
3278
+ */
3279
+ interruptPlayback() {
3280
+ // Stop all sources that have been scheduled. The onended event will fire for each,
3281
+ // which will clean up the scheduledSources array.
3282
+ [...this.scheduledSources].forEach(source => source.stop(0));
3283
+ // Clear the internal buffer of unprocessed audio chunks.
3284
+ this.playbackQueue.length = 0;
3285
+ // Reset the playback clock to start fresh.
3286
+ this.nextStartTime = this.deps.audioContext.currentTime;
3287
+ }
3288
+ /**
3289
+ * Processes the playback queue in a loop, scheduling each chunk in a gapless sequence.
3290
+ */
3291
+ async processPlaybackQueue() {
3292
+ if (this.isPlaybackLoopRunning) {
3293
+ return;
3294
+ }
3295
+ this.isPlaybackLoopRunning = true;
3296
+ while (this.playbackQueue.length > 0 && !this.isStopped) {
3297
+ const pcmRawBuffer = this.playbackQueue.shift();
3298
+ try {
3299
+ const pcm16 = new Int16Array(pcmRawBuffer);
3300
+ const frameCount = pcm16.length;
3301
+ const audioBuffer = this.deps.audioContext.createBuffer(1, frameCount, SERVER_OUTPUT_SAMPLE_RATE);
3302
+ // Convert 16-bit PCM to 32-bit PCM, required by the Web Audio API.
3303
+ const channelData = audioBuffer.getChannelData(0);
3304
+ for (let i = 0; i < frameCount; i++) {
3305
+ channelData[i] = pcm16[i] / 32768; // Normalize to Float32 range [-1.0, 1.0]
3306
+ }
3307
+ const source = this.deps.audioContext.createBufferSource();
3308
+ source.buffer = audioBuffer;
3309
+ source.connect(this.deps.audioContext.destination);
3310
+ // Track the source and set up a handler to remove it from tracking when it finishes.
3311
+ this.scheduledSources.push(source);
3312
+ source.onended = () => {
3313
+ this.scheduledSources = this.scheduledSources.filter(s => s !== source);
3314
+ };
3315
+ // To prevent gaps, schedule the next chunk to start either now (if we're catching up)
3316
+ // or exactly when the previous chunk is scheduled to end.
3317
+ this.nextStartTime = Math.max(this.deps.audioContext.currentTime, this.nextStartTime);
3318
+ source.start(this.nextStartTime);
3319
+ // Update the schedule for the *next* chunk.
3320
+ this.nextStartTime += audioBuffer.duration;
3321
+ }
3322
+ catch (e) {
3323
+ logger.error('Error playing audio:', e);
3324
+ }
3325
+ }
3326
+ this.isPlaybackLoopRunning = false;
3327
+ }
3328
+ /**
3329
+ * The main loop that listens for and processes messages from the server.
3330
+ */
3331
+ async runReceiveLoop() {
3332
+ const messageGenerator = this.liveSession.receive();
3333
+ while (!this.isStopped) {
3334
+ const result = await Promise.race([
3335
+ messageGenerator.next(),
3336
+ this.stopDeferred.promise
3337
+ ]);
3338
+ if (this.isStopped || !result || result.done) {
3339
+ break;
3340
+ }
3341
+ const message = result.value;
3342
+ if (message.type === 'serverContent') {
3343
+ const serverContent = message;
3344
+ if (serverContent.interrupted) {
3345
+ this.interruptPlayback();
3346
+ }
3347
+ const audioPart = serverContent.modelTurn?.parts.find(part => part.inlineData?.mimeType.startsWith('audio/'));
3348
+ if (audioPart?.inlineData) {
3349
+ const audioData = Uint8Array.from(atob(audioPart.inlineData.data), c => c.charCodeAt(0)).buffer;
3350
+ this.enqueueAndPlay(audioData);
3351
+ }
3352
+ }
3353
+ else if (message.type === 'toolCall') {
3354
+ if (!this.options.functionCallingHandler) {
3355
+ logger.warn('Received tool call message, but StartAudioConversationOptions.functionCallingHandler is undefined. Ignoring tool call.');
3356
+ }
3357
+ else {
3358
+ try {
3359
+ const resultPart = await this.options.functionCallingHandler(message.functionCalls);
3360
+ if (!this.isStopped) {
3361
+ void this.liveSession.send([resultPart]);
3362
+ }
3363
+ }
3364
+ catch (e) {
3365
+ throw new AIError(AIErrorCode.ERROR, `Function calling handler failed: ${e.message}`);
3366
+ }
3367
+ }
3368
+ }
3369
+ }
3370
+ }
3371
+ }
3372
+ /**
3373
+ * Starts a real-time, bidirectional audio conversation with the model. This helper function manages
3374
+ * the complexities of microphone access, audio recording, playback, and interruptions.
3375
+ *
3376
+ * @remarks Important: This function must be called in response to a user gesture
3377
+ * (for example, a button click) to comply with {@link https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy | browser autoplay policies}.
3378
+ *
3379
+ * @example
3380
+ * ```javascript
3381
+ * const liveSession = await model.connect();
3382
+ * let conversationController;
3383
+ *
3384
+ * // This function must be called from within a click handler.
3385
+ * async function startConversation() {
3386
+ * try {
3387
+ * conversationController = await startAudioConversation(liveSession);
3388
+ * } catch (e) {
3389
+ * // Handle AI-specific errors
3390
+ * if (e instanceof AIError) {
3391
+ * console.error("AI Error:", e.message);
3392
+ * }
3393
+ * // Handle microphone permission and hardware errors
3394
+ * else if (e instanceof DOMException) {
3395
+ * console.error("Microphone Error:", e.message);
3396
+ * }
3397
+ * // Handle other unexpected errors
3398
+ * else {
3399
+ * console.error("An unexpected error occurred:", e);
3400
+ * }
3401
+ * }
3402
+ * }
3403
+ *
3404
+ * // Later, to stop the conversation:
3405
+ * // if (conversationController) {
3406
+ * // await conversationController.stop();
3407
+ * // }
3408
+ * ```
3409
+ *
3410
+ * @param liveSession - An active {@link LiveSession} instance.
3411
+ * @param options - Configuration options for the audio conversation.
3412
+ * @returns A `Promise` that resolves with an {@link AudioConversationController}.
3413
+ * @throws `AIError` if the environment does not support required Web APIs (`UNSUPPORTED`), if a conversation is already active (`REQUEST_ERROR`), the session is closed (`SESSION_CLOSED`), or if an unexpected initialization error occurs (`ERROR`).
3414
+ * @throws `DOMException` Thrown by `navigator.mediaDevices.getUserMedia()` if issues occur with microphone access, such as permissions being denied (`NotAllowedError`) or no compatible hardware being found (`NotFoundError`). See the {@link https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia#exceptions | MDN documentation} for a full list of exceptions.
3415
+ *
3416
+ * @beta
3417
+ */
3418
+ async function startAudioConversation(liveSession, options = {}) {
3419
+ if (liveSession.isClosed) {
3420
+ throw new AIError(AIErrorCode.SESSION_CLOSED, 'Cannot start audio conversation on a closed LiveSession.');
3421
+ }
3422
+ if (liveSession.inConversation) {
3423
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'An audio conversation is already in progress for this session.');
3424
+ }
3425
+ // Check for necessary Web API support.
3426
+ if (typeof AudioWorkletNode === 'undefined' ||
3427
+ typeof AudioContext === 'undefined' ||
3428
+ typeof navigator === 'undefined' ||
3429
+ !navigator.mediaDevices) {
3430
+ throw new AIError(AIErrorCode.UNSUPPORTED, 'Audio conversation is not supported in this environment. It requires the Web Audio API and AudioWorklet support.');
3431
+ }
3432
+ let audioContext;
3433
+ try {
3434
+ // 1. Set up the audio context. This must be in response to a user gesture.
3435
+ // See: https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy
3436
+ audioContext = new AudioContext();
3437
+ if (audioContext.state === 'suspended') {
3438
+ await audioContext.resume();
3439
+ }
3440
+ // 2. Prompt for microphone access and get the media stream.
3441
+ // This can throw a variety of permission or hardware-related errors.
3442
+ const mediaStream = await navigator.mediaDevices.getUserMedia({
3443
+ audio: true
3444
+ });
3445
+ // 3. Load the AudioWorklet processor.
3446
+ // See: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorklet
3447
+ const workletBlob = new Blob([audioProcessorWorkletString], {
3448
+ type: 'application/javascript'
3449
+ });
3450
+ const workletURL = URL.createObjectURL(workletBlob);
3451
+ await audioContext.audioWorklet.addModule(workletURL);
3452
+ // 4. Create the audio graph: Microphone -> Source Node -> Worklet Node
3453
+ const sourceNode = audioContext.createMediaStreamSource(mediaStream);
3454
+ const workletNode = new AudioWorkletNode(audioContext, AUDIO_PROCESSOR_NAME, {
3455
+ processorOptions: { targetSampleRate: SERVER_INPUT_SAMPLE_RATE }
3456
+ });
3457
+ sourceNode.connect(workletNode);
3458
+ // 5. Instantiate and return the runner which manages the conversation.
3459
+ const runner = new AudioConversationRunner(liveSession, options, {
3460
+ audioContext,
3461
+ mediaStream,
3462
+ sourceNode,
3463
+ workletNode
3464
+ });
3465
+ return { stop: () => runner.stop() };
3466
+ }
3467
+ catch (e) {
3468
+ // Ensure the audio context is closed on any setup error.
3469
+ if (audioContext && audioContext.state !== 'closed') {
3470
+ void audioContext.close();
3471
+ }
3472
+ // Re-throw specific, known error types directly. The user may want to handle `DOMException`
3473
+ // errors differently (for example, if permission to access audio device was denied).
3474
+ if (e instanceof AIError || e instanceof DOMException) {
3475
+ throw e;
3476
+ }
3477
+ // Wrap any other unexpected errors in a standard AIError.
3478
+ throw new AIError(AIErrorCode.ERROR, `Failed to initialize audio recording: ${e.message}`);
3479
+ }
3480
+ }
3481
+
2916
3482
  /**
2917
3483
  * @license
2918
3484
  * Copyright 2024 Google LLC
@@ -2957,14 +3523,20 @@ class ImagenImageFormat {
2957
3523
  *
2958
3524
  * @public
2959
3525
  */
2960
- function getAI(app = getApp(), options = { backend: new GoogleAIBackend() }) {
3526
+ function getAI(app = getApp(), options) {
2961
3527
  app = getModularInstance(app);
2962
3528
  // Dependencies
2963
3529
  const AIProvider = _getProvider(app, AI_TYPE);
2964
- const identifier = encodeInstanceIdentifier(options.backend);
2965
- return AIProvider.getImmediate({
3530
+ const backend = options?.backend ?? new GoogleAIBackend();
3531
+ const finalOptions = {
3532
+ useLimitedUseAppCheckTokens: options?.useLimitedUseAppCheckTokens ?? false
3533
+ };
3534
+ const identifier = encodeInstanceIdentifier(backend);
3535
+ const aiInstance = AIProvider.getImmediate({
2966
3536
  identifier
2967
3537
  });
3538
+ aiInstance.options = finalOptions;
3539
+ return aiInstance;
2968
3540
  }
2969
3541
  /**
2970
3542
  * Returns a {@link GenerativeModel} class with methods for inference
@@ -2987,11 +3559,11 @@ function getGenerativeModel(ai, modelParams, requestOptions) {
2987
3559
  if (!inCloudParams.model) {
2988
3560
  throw new AIError(AIErrorCode.NO_MODEL, `Must provide a model name. Example: getGenerativeModel({ model: 'my-model-name' })`);
2989
3561
  }
2990
- let chromeAdapter;
2991
- // Do not initialize a ChromeAdapter if we are not in hybrid mode.
2992
- if (typeof window !== 'undefined' && hybridParams.mode) {
2993
- chromeAdapter = new ChromeAdapterImpl(window.LanguageModel, hybridParams.mode, hybridParams.onDeviceParams);
2994
- }
3562
+ /**
3563
+ * An AIService registered by index.node.ts will not have a
3564
+ * chromeAdapterFactory() method.
3565
+ */
3566
+ const chromeAdapter = ai.chromeAdapterFactory?.(hybridParams.mode, typeof window === 'undefined' ? undefined : window, hybridParams.onDeviceParams);
2995
3567
  return new GenerativeModel(ai, inCloudParams, requestOptions, chromeAdapter);
2996
3568
  }
2997
3569
  /**
@@ -3014,6 +3586,25 @@ function getImagenModel(ai, modelParams, requestOptions) {
3014
3586
  }
3015
3587
  return new ImagenModel(ai, modelParams, requestOptions);
3016
3588
  }
3589
+ /**
3590
+ * Returns a {@link LiveGenerativeModel} class for real-time, bidirectional communication.
3591
+ *
3592
+ * The Live API is only supported in modern browser windows and Node >= 22.
3593
+ *
3594
+ * @param ai - An {@link AI} instance.
3595
+ * @param modelParams - Parameters to use when setting up a {@link LiveSession}.
3596
+ * @throws If the `apiKey` or `projectId` fields are missing in your
3597
+ * Firebase config.
3598
+ *
3599
+ * @beta
3600
+ */
3601
+ function getLiveGenerativeModel(ai, modelParams) {
3602
+ if (!modelParams.model) {
3603
+ throw new AIError(AIErrorCode.NO_MODEL, `Must provide a model name for getLiveGenerativeModel. Example: getLiveGenerativeModel(ai, { model: 'my-model-name' })`);
3604
+ }
3605
+ const webSocketHandler = new WebSocketHandlerImpl();
3606
+ return new LiveGenerativeModel(ai, modelParams, webSocketHandler);
3607
+ }
3017
3608
 
3018
3609
  /**
3019
3610
  * The Firebase AI Web SDK.
@@ -3038,5 +3629,5 @@ function registerAI() {
3038
3629
  }
3039
3630
  registerAI();
3040
3631
 
3041
- export { AIError, AIErrorCode, AIModel, AnyOfSchema, ArraySchema, Backend, BackendType, BlockReason, BooleanSchema, ChatSession, FinishReason, FunctionCallingMode, GenerativeModel, GoogleAIBackend, HarmBlockMethod, HarmBlockThreshold, HarmCategory, HarmProbability, HarmSeverity, ImagenAspectRatio, ImagenImageFormat, ImagenModel, ImagenPersonFilterLevel, ImagenSafetyFilterLevel, InferenceMode, IntegerSchema, Modality, NumberSchema, ObjectSchema, POSSIBLE_ROLES, ResponseModality, Schema, SchemaType, StringSchema, VertexAIBackend, getAI, getGenerativeModel, getImagenModel };
3632
+ export { AIError, AIErrorCode, AIModel, AnyOfSchema, ArraySchema, Backend, BackendType, BlockReason, BooleanSchema, ChatSession, FinishReason, FunctionCallingMode, GenerativeModel, GoogleAIBackend, HarmBlockMethod, HarmBlockThreshold, HarmCategory, HarmProbability, HarmSeverity, ImagenAspectRatio, ImagenImageFormat, ImagenModel, ImagenPersonFilterLevel, ImagenSafetyFilterLevel, InferenceMode, IntegerSchema, LiveGenerativeModel, LiveResponseType, LiveSession, Modality, NumberSchema, ObjectSchema, POSSIBLE_ROLES, ResponseModality, Schema, SchemaType, StringSchema, VertexAIBackend, getAI, getGenerativeModel, getImagenModel, getLiveGenerativeModel, startAudioConversation };
3042
3633
  //# sourceMappingURL=index.node.mjs.map