@firebase/ai 2.1.0-canary.84b8bed35 → 2.1.0-canary.984086b0b
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-public.d.ts +496 -1
- package/dist/ai.d.ts +600 -1
- package/dist/esm/index.esm.js +1176 -321
- package/dist/esm/index.esm.js.map +1 -1
- package/dist/esm/src/api.d.ts +18 -3
- package/dist/esm/src/constants.d.ts +1 -1
- package/dist/esm/src/index.d.ts +2 -1
- package/dist/esm/src/methods/chrome-adapter.d.ts +7 -3
- package/dist/esm/src/methods/live-session-helpers.d.ts +154 -0
- package/dist/esm/src/methods/live-session.d.ts +90 -0
- package/dist/esm/src/models/index.d.ts +1 -0
- package/dist/esm/src/models/live-generative-model.d.ts +55 -0
- package/dist/esm/src/requests/request.d.ts +6 -0
- package/dist/esm/src/service.d.ts +4 -2
- package/dist/esm/src/types/content.d.ts +17 -0
- package/dist/esm/src/types/enums.d.ts +5 -0
- package/dist/esm/src/types/error.d.ts +2 -0
- package/dist/esm/src/types/imagen/internal.d.ts +10 -0
- package/dist/esm/src/types/live-responses.d.ts +53 -0
- package/dist/esm/src/types/requests.d.ts +96 -0
- package/dist/esm/src/types/responses.d.ts +64 -0
- package/dist/esm/src/websocket.d.ts +67 -0
- package/dist/index.cjs.js +1179 -319
- package/dist/index.cjs.js.map +1 -1
- package/dist/index.node.cjs.js +830 -265
- package/dist/index.node.cjs.js.map +1 -1
- package/dist/index.node.mjs +827 -267
- package/dist/index.node.mjs.map +1 -1
- package/dist/src/api.d.ts +18 -3
- package/dist/src/constants.d.ts +1 -1
- package/dist/src/index.d.ts +2 -1
- package/dist/src/methods/chrome-adapter.d.ts +7 -3
- package/dist/src/methods/live-session-helpers.d.ts +154 -0
- package/dist/src/methods/live-session.d.ts +90 -0
- package/dist/src/models/index.d.ts +1 -0
- package/dist/src/models/live-generative-model.d.ts +55 -0
- package/dist/src/requests/request.d.ts +6 -0
- package/dist/src/service.d.ts +4 -2
- package/dist/src/types/content.d.ts +17 -0
- package/dist/src/types/enums.d.ts +5 -0
- package/dist/src/types/error.d.ts +2 -0
- package/dist/src/types/imagen/internal.d.ts +10 -0
- package/dist/src/types/live-responses.d.ts +53 -0
- package/dist/src/types/requests.d.ts +96 -0
- package/dist/src/types/responses.d.ts +64 -0
- package/dist/src/websocket.d.ts +67 -0
- package/package.json +10 -8
package/dist/esm/index.esm.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { _isFirebaseServerApp, _getProvider, getApp, _registerComponent, registerVersion } from '@firebase/app';
|
|
2
2
|
import { Component } from '@firebase/component';
|
|
3
|
-
import { FirebaseError, getModularInstance } from '@firebase/util';
|
|
3
|
+
import { FirebaseError, Deferred, getModularInstance } from '@firebase/util';
|
|
4
4
|
import { Logger } from '@firebase/logger';
|
|
5
5
|
|
|
6
6
|
var name = "@firebase/ai";
|
|
7
|
-
var version = "2.1.0-canary.
|
|
7
|
+
var version = "2.1.0-canary.984086b0b";
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* @license
|
|
@@ -24,7 +24,7 @@ var version = "2.1.0-canary.84b8bed35";
|
|
|
24
24
|
*/
|
|
25
25
|
const AI_TYPE = 'AI';
|
|
26
26
|
const DEFAULT_LOCATION = 'us-central1';
|
|
27
|
-
const
|
|
27
|
+
const DEFAULT_DOMAIN = 'firebasevertexai.googleapis.com';
|
|
28
28
|
const DEFAULT_API_VERSION = 'v1beta';
|
|
29
29
|
const PACKAGE_VERSION = version;
|
|
30
30
|
const LANGUAGE_TAG = 'gl-js';
|
|
@@ -289,7 +289,12 @@ const ResponseModality = {
|
|
|
289
289
|
* Image.
|
|
290
290
|
* @beta
|
|
291
291
|
*/
|
|
292
|
-
IMAGE: 'IMAGE'
|
|
292
|
+
IMAGE: 'IMAGE',
|
|
293
|
+
/**
|
|
294
|
+
* Audio.
|
|
295
|
+
* @beta
|
|
296
|
+
*/
|
|
297
|
+
AUDIO: 'AUDIO'
|
|
293
298
|
};
|
|
294
299
|
/**
|
|
295
300
|
* <b>(EXPERIMENTAL)</b>
|
|
@@ -302,6 +307,33 @@ const InferenceMode = {
|
|
|
302
307
|
'ONLY_IN_CLOUD': 'only_in_cloud'
|
|
303
308
|
};
|
|
304
309
|
|
|
310
|
+
/**
|
|
311
|
+
* @license
|
|
312
|
+
* Copyright 2024 Google LLC
|
|
313
|
+
*
|
|
314
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
315
|
+
* you may not use this file except in compliance with the License.
|
|
316
|
+
* You may obtain a copy of the License at
|
|
317
|
+
*
|
|
318
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
319
|
+
*
|
|
320
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
321
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
322
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
323
|
+
* See the License for the specific language governing permissions and
|
|
324
|
+
* limitations under the License.
|
|
325
|
+
*/
|
|
326
|
+
/**
|
|
327
|
+
* The types of responses that can be returned by {@link LiveSession.receive}.
|
|
328
|
+
*
|
|
329
|
+
* @beta
|
|
330
|
+
*/
|
|
331
|
+
const LiveResponseType = {
|
|
332
|
+
SERVER_CONTENT: 'serverContent',
|
|
333
|
+
TOOL_CALL: 'toolCall',
|
|
334
|
+
TOOL_CALL_CANCELLATION: 'toolCallCancellation'
|
|
335
|
+
};
|
|
336
|
+
|
|
305
337
|
/**
|
|
306
338
|
* @license
|
|
307
339
|
* Copyright 2024 Google LLC
|
|
@@ -332,6 +364,8 @@ const AIErrorCode = {
|
|
|
332
364
|
RESPONSE_ERROR: 'response-error',
|
|
333
365
|
/** An error occurred while performing a fetch. */
|
|
334
366
|
FETCH_ERROR: 'fetch-error',
|
|
367
|
+
/** An error occurred because an operation was attempted on a closed session. */
|
|
368
|
+
SESSION_CLOSED: 'session-closed',
|
|
335
369
|
/** An error associated with a Content object. */
|
|
336
370
|
INVALID_CONTENT: 'invalid-content',
|
|
337
371
|
/** An error due to the Firebase API not being enabled in the Console. */
|
|
@@ -636,9 +670,10 @@ class VertexAIBackend extends Backend {
|
|
|
636
670
|
* limitations under the License.
|
|
637
671
|
*/
|
|
638
672
|
class AIService {
|
|
639
|
-
constructor(app, backend, authProvider, appCheckProvider) {
|
|
673
|
+
constructor(app, backend, authProvider, appCheckProvider, chromeAdapterFactory) {
|
|
640
674
|
this.app = app;
|
|
641
675
|
this.backend = backend;
|
|
676
|
+
this.chromeAdapterFactory = chromeAdapterFactory;
|
|
642
677
|
const appCheck = appCheckProvider?.getImmediate({ optional: true });
|
|
643
678
|
const auth = authProvider?.getImmediate({ optional: true });
|
|
644
679
|
this.auth = auth || null;
|
|
@@ -957,7 +992,7 @@ class RequestUrl {
|
|
|
957
992
|
return url.toString();
|
|
958
993
|
}
|
|
959
994
|
get baseUrl() {
|
|
960
|
-
return this.requestOptions?.baseUrl ||
|
|
995
|
+
return this.requestOptions?.baseUrl || `https://${DEFAULT_DOMAIN}`;
|
|
961
996
|
}
|
|
962
997
|
get apiVersion() {
|
|
963
998
|
return DEFAULT_API_VERSION; // TODO: allow user-set options if that feature becomes available
|
|
@@ -981,6 +1016,27 @@ class RequestUrl {
|
|
|
981
1016
|
return params;
|
|
982
1017
|
}
|
|
983
1018
|
}
|
|
1019
|
+
class WebSocketUrl {
|
|
1020
|
+
constructor(apiSettings) {
|
|
1021
|
+
this.apiSettings = apiSettings;
|
|
1022
|
+
}
|
|
1023
|
+
toString() {
|
|
1024
|
+
const url = new URL(`wss://${DEFAULT_DOMAIN}`);
|
|
1025
|
+
url.pathname = this.pathname;
|
|
1026
|
+
const queryParams = new URLSearchParams();
|
|
1027
|
+
queryParams.set('key', this.apiSettings.apiKey);
|
|
1028
|
+
url.search = queryParams.toString();
|
|
1029
|
+
return url.toString();
|
|
1030
|
+
}
|
|
1031
|
+
get pathname() {
|
|
1032
|
+
if (this.apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
|
|
1033
|
+
return 'ws/google.firebase.vertexai.v1beta.GenerativeService/BidiGenerateContent';
|
|
1034
|
+
}
|
|
1035
|
+
else {
|
|
1036
|
+
return `ws/google.firebase.vertexai.v1beta.LlmBidiService/BidiGenerateContent/locations/${this.apiSettings.location}`;
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
984
1040
|
/**
|
|
985
1041
|
* Log language and "fire/version" to x-goog-api-client
|
|
986
1042
|
*/
|
|
@@ -1324,6 +1380,7 @@ async function handlePredictResponse(response) {
|
|
|
1324
1380
|
gcsURI: prediction.gcsUri
|
|
1325
1381
|
});
|
|
1326
1382
|
}
|
|
1383
|
+
else if (prediction.safetyAttributes) ;
|
|
1327
1384
|
else {
|
|
1328
1385
|
throw new AIError(AIErrorCode.RESPONSE_ERROR, `Unexpected element in 'predictions' array in response: '${JSON.stringify(prediction)}'`);
|
|
1329
1386
|
}
|
|
@@ -1866,7 +1923,8 @@ function createPredictRequestBody(prompt, { gcsURI, imageFormat, addWatermark, n
|
|
|
1866
1923
|
addWatermark,
|
|
1867
1924
|
safetyFilterLevel,
|
|
1868
1925
|
personGeneration: personFilterLevel,
|
|
1869
|
-
includeRaiReason: true
|
|
1926
|
+
includeRaiReason: true,
|
|
1927
|
+
includeSafetyAttributes: true
|
|
1870
1928
|
}
|
|
1871
1929
|
};
|
|
1872
1930
|
return body;
|
|
@@ -2231,6 +2289,270 @@ class GenerativeModel extends AIModel {
|
|
|
2231
2289
|
}
|
|
2232
2290
|
}
|
|
2233
2291
|
|
|
2292
|
+
/**
|
|
2293
|
+
* @license
|
|
2294
|
+
* Copyright 2025 Google LLC
|
|
2295
|
+
*
|
|
2296
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
2297
|
+
* you may not use this file except in compliance with the License.
|
|
2298
|
+
* You may obtain a copy of the License at
|
|
2299
|
+
*
|
|
2300
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
2301
|
+
*
|
|
2302
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
2303
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
2304
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
2305
|
+
* See the License for the specific language governing permissions and
|
|
2306
|
+
* limitations under the License.
|
|
2307
|
+
*/
|
|
2308
|
+
/**
|
|
2309
|
+
* Represents an active, real-time, bidirectional conversation with the model.
|
|
2310
|
+
*
|
|
2311
|
+
* This class should only be instantiated by calling {@link LiveGenerativeModel.connect}.
|
|
2312
|
+
*
|
|
2313
|
+
* @beta
|
|
2314
|
+
*/
|
|
2315
|
+
class LiveSession {
|
|
2316
|
+
/**
|
|
2317
|
+
* @internal
|
|
2318
|
+
*/
|
|
2319
|
+
constructor(webSocketHandler, serverMessages) {
|
|
2320
|
+
this.webSocketHandler = webSocketHandler;
|
|
2321
|
+
this.serverMessages = serverMessages;
|
|
2322
|
+
/**
|
|
2323
|
+
* Indicates whether this Live session is closed.
|
|
2324
|
+
*
|
|
2325
|
+
* @beta
|
|
2326
|
+
*/
|
|
2327
|
+
this.isClosed = false;
|
|
2328
|
+
/**
|
|
2329
|
+
* Indicates whether this Live session is being controlled by an `AudioConversationController`.
|
|
2330
|
+
*
|
|
2331
|
+
* @beta
|
|
2332
|
+
*/
|
|
2333
|
+
this.inConversation = false;
|
|
2334
|
+
}
|
|
2335
|
+
/**
|
|
2336
|
+
* Sends content to the server.
|
|
2337
|
+
*
|
|
2338
|
+
* @param request - The message to send to the model.
|
|
2339
|
+
* @param turnComplete - Indicates if the turn is complete. Defaults to false.
|
|
2340
|
+
* @throws If this session has been closed.
|
|
2341
|
+
*
|
|
2342
|
+
* @beta
|
|
2343
|
+
*/
|
|
2344
|
+
async send(request, turnComplete = true) {
|
|
2345
|
+
if (this.isClosed) {
|
|
2346
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2347
|
+
}
|
|
2348
|
+
const newContent = formatNewContent(request);
|
|
2349
|
+
const message = {
|
|
2350
|
+
clientContent: {
|
|
2351
|
+
turns: [newContent],
|
|
2352
|
+
turnComplete
|
|
2353
|
+
}
|
|
2354
|
+
};
|
|
2355
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
2356
|
+
}
|
|
2357
|
+
/**
|
|
2358
|
+
* Sends realtime input to the server.
|
|
2359
|
+
*
|
|
2360
|
+
* @param mediaChunks - The media chunks to send.
|
|
2361
|
+
* @throws If this session has been closed.
|
|
2362
|
+
*
|
|
2363
|
+
* @beta
|
|
2364
|
+
*/
|
|
2365
|
+
async sendMediaChunks(mediaChunks) {
|
|
2366
|
+
if (this.isClosed) {
|
|
2367
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2368
|
+
}
|
|
2369
|
+
// The backend does not support sending more than one mediaChunk in one message.
|
|
2370
|
+
// Work around this limitation by sending mediaChunks in separate messages.
|
|
2371
|
+
mediaChunks.forEach(mediaChunk => {
|
|
2372
|
+
const message = {
|
|
2373
|
+
realtimeInput: { mediaChunks: [mediaChunk] }
|
|
2374
|
+
};
|
|
2375
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
2376
|
+
});
|
|
2377
|
+
}
|
|
2378
|
+
/**
|
|
2379
|
+
* Sends a stream of {@link GenerativeContentBlob}.
|
|
2380
|
+
*
|
|
2381
|
+
* @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
|
|
2382
|
+
* @throws If this session has been closed.
|
|
2383
|
+
*
|
|
2384
|
+
* @beta
|
|
2385
|
+
*/
|
|
2386
|
+
async sendMediaStream(mediaChunkStream) {
|
|
2387
|
+
if (this.isClosed) {
|
|
2388
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2389
|
+
}
|
|
2390
|
+
const reader = mediaChunkStream.getReader();
|
|
2391
|
+
while (true) {
|
|
2392
|
+
try {
|
|
2393
|
+
const { done, value } = await reader.read();
|
|
2394
|
+
if (done) {
|
|
2395
|
+
break;
|
|
2396
|
+
}
|
|
2397
|
+
else if (!value) {
|
|
2398
|
+
throw new Error('Missing chunk in reader, but reader is not done.');
|
|
2399
|
+
}
|
|
2400
|
+
await this.sendMediaChunks([value]);
|
|
2401
|
+
}
|
|
2402
|
+
catch (e) {
|
|
2403
|
+
// Re-throw any errors that occur during stream consumption or sending.
|
|
2404
|
+
const message = e instanceof Error ? e.message : 'Error processing media stream.';
|
|
2405
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, message);
|
|
2406
|
+
}
|
|
2407
|
+
}
|
|
2408
|
+
}
|
|
2409
|
+
/**
|
|
2410
|
+
* Yields messages received from the server.
|
|
2411
|
+
* This can only be used by one consumer at a time.
|
|
2412
|
+
*
|
|
2413
|
+
* @returns An `AsyncGenerator` that yields server messages as they arrive.
|
|
2414
|
+
* @throws If the session is already closed, or if we receive a response that we don't support.
|
|
2415
|
+
*
|
|
2416
|
+
* @beta
|
|
2417
|
+
*/
|
|
2418
|
+
async *receive() {
|
|
2419
|
+
if (this.isClosed) {
|
|
2420
|
+
throw new AIError(AIErrorCode.SESSION_CLOSED, 'Cannot read from a Live session that is closed. Try starting a new Live session.');
|
|
2421
|
+
}
|
|
2422
|
+
for await (const message of this.serverMessages) {
|
|
2423
|
+
if (message && typeof message === 'object') {
|
|
2424
|
+
if (LiveResponseType.SERVER_CONTENT in message) {
|
|
2425
|
+
yield {
|
|
2426
|
+
type: 'serverContent',
|
|
2427
|
+
...message
|
|
2428
|
+
.serverContent
|
|
2429
|
+
};
|
|
2430
|
+
}
|
|
2431
|
+
else if (LiveResponseType.TOOL_CALL in message) {
|
|
2432
|
+
yield {
|
|
2433
|
+
type: 'toolCall',
|
|
2434
|
+
...message
|
|
2435
|
+
.toolCall
|
|
2436
|
+
};
|
|
2437
|
+
}
|
|
2438
|
+
else if (LiveResponseType.TOOL_CALL_CANCELLATION in message) {
|
|
2439
|
+
yield {
|
|
2440
|
+
type: 'toolCallCancellation',
|
|
2441
|
+
...message.toolCallCancellation
|
|
2442
|
+
};
|
|
2443
|
+
}
|
|
2444
|
+
else {
|
|
2445
|
+
logger.warn(`Received an unknown message type from the server: ${JSON.stringify(message)}`);
|
|
2446
|
+
}
|
|
2447
|
+
}
|
|
2448
|
+
else {
|
|
2449
|
+
logger.warn(`Received an invalid message from the server: ${JSON.stringify(message)}`);
|
|
2450
|
+
}
|
|
2451
|
+
}
|
|
2452
|
+
}
|
|
2453
|
+
/**
|
|
2454
|
+
* Closes this session.
|
|
2455
|
+
* All methods on this session will throw an error once this resolves.
|
|
2456
|
+
*
|
|
2457
|
+
* @beta
|
|
2458
|
+
*/
|
|
2459
|
+
async close() {
|
|
2460
|
+
if (!this.isClosed) {
|
|
2461
|
+
this.isClosed = true;
|
|
2462
|
+
await this.webSocketHandler.close(1000, 'Client closed session.');
|
|
2463
|
+
}
|
|
2464
|
+
}
|
|
2465
|
+
}
|
|
2466
|
+
|
|
2467
|
+
/**
|
|
2468
|
+
* @license
|
|
2469
|
+
* Copyright 2025 Google LLC
|
|
2470
|
+
*
|
|
2471
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
2472
|
+
* you may not use this file except in compliance with the License.
|
|
2473
|
+
* You may obtain a copy of the License at
|
|
2474
|
+
*
|
|
2475
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
2476
|
+
*
|
|
2477
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
2478
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
2479
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
2480
|
+
* See the License for the specific language governing permissions and
|
|
2481
|
+
* limitations under the License.
|
|
2482
|
+
*/
|
|
2483
|
+
/**
|
|
2484
|
+
* Class for Live generative model APIs. The Live API enables low-latency, two-way multimodal
|
|
2485
|
+
* interactions with Gemini.
|
|
2486
|
+
*
|
|
2487
|
+
* This class should only be instantiated with {@link getLiveGenerativeModel}.
|
|
2488
|
+
*
|
|
2489
|
+
* @beta
|
|
2490
|
+
*/
|
|
2491
|
+
class LiveGenerativeModel extends AIModel {
|
|
2492
|
+
/**
|
|
2493
|
+
* @internal
|
|
2494
|
+
*/
|
|
2495
|
+
constructor(ai, modelParams,
|
|
2496
|
+
/**
|
|
2497
|
+
* @internal
|
|
2498
|
+
*/
|
|
2499
|
+
_webSocketHandler) {
|
|
2500
|
+
super(ai, modelParams.model);
|
|
2501
|
+
this._webSocketHandler = _webSocketHandler;
|
|
2502
|
+
this.generationConfig = modelParams.generationConfig || {};
|
|
2503
|
+
this.tools = modelParams.tools;
|
|
2504
|
+
this.toolConfig = modelParams.toolConfig;
|
|
2505
|
+
this.systemInstruction = formatSystemInstruction(modelParams.systemInstruction);
|
|
2506
|
+
}
|
|
2507
|
+
/**
|
|
2508
|
+
* Starts a {@link LiveSession}.
|
|
2509
|
+
*
|
|
2510
|
+
* @returns A {@link LiveSession}.
|
|
2511
|
+
* @throws If the connection failed to be established with the server.
|
|
2512
|
+
*
|
|
2513
|
+
* @beta
|
|
2514
|
+
*/
|
|
2515
|
+
async connect() {
|
|
2516
|
+
const url = new WebSocketUrl(this._apiSettings);
|
|
2517
|
+
await this._webSocketHandler.connect(url.toString());
|
|
2518
|
+
let fullModelPath;
|
|
2519
|
+
if (this._apiSettings.backend.backendType === BackendType.GOOGLE_AI) {
|
|
2520
|
+
fullModelPath = `projects/${this._apiSettings.project}/${this.model}`;
|
|
2521
|
+
}
|
|
2522
|
+
else {
|
|
2523
|
+
fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
|
|
2524
|
+
}
|
|
2525
|
+
const setupMessage = {
|
|
2526
|
+
setup: {
|
|
2527
|
+
model: fullModelPath,
|
|
2528
|
+
generationConfig: this.generationConfig,
|
|
2529
|
+
tools: this.tools,
|
|
2530
|
+
toolConfig: this.toolConfig,
|
|
2531
|
+
systemInstruction: this.systemInstruction
|
|
2532
|
+
}
|
|
2533
|
+
};
|
|
2534
|
+
try {
|
|
2535
|
+
// Begin listening for server messages, and begin the handshake by sending the 'setupMessage'
|
|
2536
|
+
const serverMessages = this._webSocketHandler.listen();
|
|
2537
|
+
this._webSocketHandler.send(JSON.stringify(setupMessage));
|
|
2538
|
+
// Verify we received the handshake response 'setupComplete'
|
|
2539
|
+
const firstMessage = (await serverMessages.next()).value;
|
|
2540
|
+
if (!firstMessage ||
|
|
2541
|
+
!(typeof firstMessage === 'object') ||
|
|
2542
|
+
!('setupComplete' in firstMessage)) {
|
|
2543
|
+
await this._webSocketHandler.close(1011, 'Handshake failure');
|
|
2544
|
+
throw new AIError(AIErrorCode.RESPONSE_ERROR, 'Server connection handshake failed. The server did not respond with a setupComplete message.');
|
|
2545
|
+
}
|
|
2546
|
+
return new LiveSession(this._webSocketHandler, serverMessages);
|
|
2547
|
+
}
|
|
2548
|
+
catch (e) {
|
|
2549
|
+
// Ensure connection is closed on any setup error
|
|
2550
|
+
await this._webSocketHandler.close();
|
|
2551
|
+
throw e;
|
|
2552
|
+
}
|
|
2553
|
+
}
|
|
2554
|
+
}
|
|
2555
|
+
|
|
2234
2556
|
/**
|
|
2235
2557
|
* @license
|
|
2236
2558
|
* Copyright 2025 Google LLC
|
|
@@ -2345,17 +2667,6 @@ class ImagenModel extends AIModel {
|
|
|
2345
2667
|
}
|
|
2346
2668
|
}
|
|
2347
2669
|
|
|
2348
|
-
/**
|
|
2349
|
-
* @internal
|
|
2350
|
-
*/
|
|
2351
|
-
var Availability;
|
|
2352
|
-
(function (Availability) {
|
|
2353
|
-
Availability["UNAVAILABLE"] = "unavailable";
|
|
2354
|
-
Availability["DOWNLOADABLE"] = "downloadable";
|
|
2355
|
-
Availability["DOWNLOADING"] = "downloading";
|
|
2356
|
-
Availability["AVAILABLE"] = "available";
|
|
2357
|
-
})(Availability || (Availability = {}));
|
|
2358
|
-
|
|
2359
2670
|
/**
|
|
2360
2671
|
* @license
|
|
2361
2672
|
* Copyright 2025 Google LLC
|
|
@@ -2373,263 +2684,135 @@ var Availability;
|
|
|
2373
2684
|
* limitations under the License.
|
|
2374
2685
|
*/
|
|
2375
2686
|
/**
|
|
2376
|
-
*
|
|
2377
|
-
*
|
|
2378
|
-
*
|
|
2687
|
+
* A wrapper for the native `WebSocket` available in both Browsers and Node >= 22.
|
|
2688
|
+
*
|
|
2689
|
+
* @internal
|
|
2379
2690
|
*/
|
|
2380
|
-
class
|
|
2381
|
-
constructor(
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2691
|
+
class WebSocketHandlerImpl {
|
|
2692
|
+
constructor() {
|
|
2693
|
+
if (typeof WebSocket === 'undefined') {
|
|
2694
|
+
throw new AIError(AIErrorCode.UNSUPPORTED, 'The WebSocket API is not available in this environment. ' +
|
|
2695
|
+
'The "Live" feature is not supported here. It is supported in ' +
|
|
2696
|
+
'modern browser windows, Web Workers with WebSocket support, and Node >= 22.');
|
|
2697
|
+
}
|
|
2698
|
+
}
|
|
2699
|
+
connect(url) {
|
|
2700
|
+
return new Promise((resolve, reject) => {
|
|
2701
|
+
this.ws = new WebSocket(url);
|
|
2702
|
+
this.ws.binaryType = 'blob'; // Only important to set in Node
|
|
2703
|
+
this.ws.addEventListener('open', () => resolve(), { once: true });
|
|
2704
|
+
this.ws.addEventListener('error', () => reject(new AIError(AIErrorCode.FETCH_ERROR, `Error event raised on WebSocket`)), { once: true });
|
|
2705
|
+
this.ws.addEventListener('close', (closeEvent) => {
|
|
2706
|
+
if (closeEvent.reason) {
|
|
2707
|
+
logger.warn(`WebSocket connection closed by server. Reason: '${closeEvent.reason}'`);
|
|
2708
|
+
}
|
|
2709
|
+
});
|
|
2710
|
+
});
|
|
2391
2711
|
}
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
* Encapsulates a few concerns:
|
|
2396
|
-
* the mode
|
|
2397
|
-
* API existence
|
|
2398
|
-
* prompt formatting
|
|
2399
|
-
* model availability, including triggering download if necessary
|
|
2400
|
-
*
|
|
2401
|
-
*
|
|
2402
|
-
* Pros: callers needn't be concerned with details of on-device availability.</p>
|
|
2403
|
-
* Cons: this method spans a few concerns and splits request validation from usage.
|
|
2404
|
-
* If instance variables weren't already part of the API, we could consider a better
|
|
2405
|
-
* separation of concerns.
|
|
2406
|
-
*/
|
|
2407
|
-
async isAvailable(request) {
|
|
2408
|
-
if (!this.mode) {
|
|
2409
|
-
logger.debug(`On-device inference unavailable because mode is undefined.`);
|
|
2410
|
-
return false;
|
|
2712
|
+
send(data) {
|
|
2713
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
2714
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'WebSocket is not open.');
|
|
2411
2715
|
}
|
|
2412
|
-
|
|
2413
|
-
|
|
2414
|
-
|
|
2716
|
+
this.ws.send(data);
|
|
2717
|
+
}
|
|
2718
|
+
async *listen() {
|
|
2719
|
+
if (!this.ws) {
|
|
2720
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'WebSocket is not connected.');
|
|
2415
2721
|
}
|
|
2416
|
-
|
|
2417
|
-
const
|
|
2418
|
-
|
|
2419
|
-
|
|
2420
|
-
|
|
2421
|
-
|
|
2722
|
+
const messageQueue = [];
|
|
2723
|
+
const errorQueue = [];
|
|
2724
|
+
let resolvePromise = null;
|
|
2725
|
+
let isClosed = false;
|
|
2726
|
+
const messageListener = async (event) => {
|
|
2727
|
+
let data;
|
|
2728
|
+
if (event.data instanceof Blob) {
|
|
2729
|
+
data = await event.data.text();
|
|
2422
2730
|
}
|
|
2423
|
-
else if (
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
|
|
2428
|
-
|
|
2731
|
+
else if (typeof event.data === 'string') {
|
|
2732
|
+
data = event.data;
|
|
2733
|
+
}
|
|
2734
|
+
else {
|
|
2735
|
+
errorQueue.push(new AIError(AIErrorCode.PARSE_FAILED, `Failed to parse WebSocket response. Expected data to be a Blob or string, but was ${typeof event.data}.`));
|
|
2736
|
+
if (resolvePromise) {
|
|
2737
|
+
resolvePromise();
|
|
2738
|
+
resolvePromise = null;
|
|
2739
|
+
}
|
|
2740
|
+
return;
|
|
2741
|
+
}
|
|
2742
|
+
try {
|
|
2743
|
+
const obj = JSON.parse(data);
|
|
2744
|
+
messageQueue.push(obj);
|
|
2745
|
+
}
|
|
2746
|
+
catch (e) {
|
|
2747
|
+
const err = e;
|
|
2748
|
+
errorQueue.push(new AIError(AIErrorCode.PARSE_FAILED, `Error parsing WebSocket message to JSON: ${err.message}`));
|
|
2749
|
+
}
|
|
2750
|
+
if (resolvePromise) {
|
|
2751
|
+
resolvePromise();
|
|
2752
|
+
resolvePromise = null;
|
|
2753
|
+
}
|
|
2754
|
+
};
|
|
2755
|
+
const errorListener = () => {
|
|
2756
|
+
errorQueue.push(new AIError(AIErrorCode.FETCH_ERROR, 'WebSocket connection error.'));
|
|
2757
|
+
if (resolvePromise) {
|
|
2758
|
+
resolvePromise();
|
|
2759
|
+
resolvePromise = null;
|
|
2760
|
+
}
|
|
2761
|
+
};
|
|
2762
|
+
const closeListener = (event) => {
|
|
2763
|
+
if (event.reason) {
|
|
2764
|
+
logger.warn(`WebSocket connection closed by the server with reason: ${event.reason}`);
|
|
2765
|
+
}
|
|
2766
|
+
isClosed = true;
|
|
2767
|
+
if (resolvePromise) {
|
|
2768
|
+
resolvePromise();
|
|
2769
|
+
resolvePromise = null;
|
|
2770
|
+
}
|
|
2771
|
+
// Clean up listeners to prevent memory leaks
|
|
2772
|
+
this.ws?.removeEventListener('message', messageListener);
|
|
2773
|
+
this.ws?.removeEventListener('close', closeListener);
|
|
2774
|
+
this.ws?.removeEventListener('error', errorListener);
|
|
2775
|
+
};
|
|
2776
|
+
this.ws.addEventListener('message', messageListener);
|
|
2777
|
+
this.ws.addEventListener('close', closeListener);
|
|
2778
|
+
this.ws.addEventListener('error', errorListener);
|
|
2779
|
+
while (!isClosed) {
|
|
2780
|
+
if (errorQueue.length > 0) {
|
|
2781
|
+
const error = errorQueue.shift();
|
|
2782
|
+
throw error;
|
|
2783
|
+
}
|
|
2784
|
+
if (messageQueue.length > 0) {
|
|
2785
|
+
yield messageQueue.shift();
|
|
2786
|
+
}
|
|
2787
|
+
else {
|
|
2788
|
+
await new Promise(resolve => {
|
|
2789
|
+
resolvePromise = resolve;
|
|
2790
|
+
});
|
|
2429
2791
|
}
|
|
2430
|
-
return true;
|
|
2431
2792
|
}
|
|
2432
|
-
//
|
|
2433
|
-
if (
|
|
2434
|
-
|
|
2435
|
-
|
|
2436
|
-
}
|
|
2437
|
-
if (!ChromeAdapterImpl.isOnDeviceRequest(request)) {
|
|
2438
|
-
logger.debug(`On-device inference unavailable because request is incompatible.`);
|
|
2439
|
-
return false;
|
|
2793
|
+
// If the loop terminated because isClosed is true, check for any final errors
|
|
2794
|
+
if (errorQueue.length > 0) {
|
|
2795
|
+
const error = errorQueue.shift();
|
|
2796
|
+
throw error;
|
|
2440
2797
|
}
|
|
2441
|
-
return true;
|
|
2442
|
-
}
|
|
2443
|
-
/**
|
|
2444
|
-
* Generates content on device.
|
|
2445
|
-
*
|
|
2446
|
-
* @remarks
|
|
2447
|
-
* This is comparable to {@link GenerativeModel.generateContent} for generating content in
|
|
2448
|
-
* Cloud.
|
|
2449
|
-
* @param request - a standard Firebase AI {@link GenerateContentRequest}
|
|
2450
|
-
* @returns {@link Response}, so we can reuse common response formatting.
|
|
2451
|
-
*/
|
|
2452
|
-
async generateContent(request) {
|
|
2453
|
-
const session = await this.createSession();
|
|
2454
|
-
const contents = await Promise.all(request.contents.map(ChromeAdapterImpl.toLanguageModelMessage));
|
|
2455
|
-
const text = await session.prompt(contents, this.onDeviceParams.promptOptions);
|
|
2456
|
-
return ChromeAdapterImpl.toResponse(text);
|
|
2457
|
-
}
|
|
2458
|
-
/**
|
|
2459
|
-
* Generates content stream on device.
|
|
2460
|
-
*
|
|
2461
|
-
* @remarks
|
|
2462
|
-
* This is comparable to {@link GenerativeModel.generateContentStream} for generating content in
|
|
2463
|
-
* Cloud.
|
|
2464
|
-
* @param request - a standard Firebase AI {@link GenerateContentRequest}
|
|
2465
|
-
* @returns {@link Response}, so we can reuse common response formatting.
|
|
2466
|
-
*/
|
|
2467
|
-
async generateContentStream(request) {
|
|
2468
|
-
const session = await this.createSession();
|
|
2469
|
-
const contents = await Promise.all(request.contents.map(ChromeAdapterImpl.toLanguageModelMessage));
|
|
2470
|
-
const stream = session.promptStreaming(contents, this.onDeviceParams.promptOptions);
|
|
2471
|
-
return ChromeAdapterImpl.toStreamResponse(stream);
|
|
2472
2798
|
}
|
|
2473
|
-
|
|
2474
|
-
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
* Asserts inference for the given request can be performed by an on-device model.
|
|
2478
|
-
*/
|
|
2479
|
-
static isOnDeviceRequest(request) {
|
|
2480
|
-
// Returns false if the prompt is empty.
|
|
2481
|
-
if (request.contents.length === 0) {
|
|
2482
|
-
logger.debug('Empty prompt rejected for on-device inference.');
|
|
2483
|
-
return false;
|
|
2484
|
-
}
|
|
2485
|
-
for (const content of request.contents) {
|
|
2486
|
-
if (content.role === 'function') {
|
|
2487
|
-
logger.debug(`"Function" role rejected for on-device inference.`);
|
|
2488
|
-
return false;
|
|
2799
|
+
close(code, reason) {
|
|
2800
|
+
return new Promise(resolve => {
|
|
2801
|
+
if (!this.ws) {
|
|
2802
|
+
return resolve();
|
|
2489
2803
|
}
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
|
|
2493
|
-
|
|
2494
|
-
|
|
2495
|
-
|
|
2496
|
-
|
|
2804
|
+
this.ws.addEventListener('close', () => resolve(), { once: true });
|
|
2805
|
+
// Calling 'close' during these states results in an error.
|
|
2806
|
+
if (this.ws.readyState === WebSocket.CLOSED ||
|
|
2807
|
+
this.ws.readyState === WebSocket.CONNECTING) {
|
|
2808
|
+
return resolve();
|
|
2809
|
+
}
|
|
2810
|
+
if (this.ws.readyState !== WebSocket.CLOSING) {
|
|
2811
|
+
this.ws.close(code, reason);
|
|
2497
2812
|
}
|
|
2498
|
-
}
|
|
2499
|
-
return true;
|
|
2500
|
-
}
|
|
2501
|
-
/**
|
|
2502
|
-
* Encapsulates logic to get availability and download a model if one is downloadable.
|
|
2503
|
-
*/
|
|
2504
|
-
async downloadIfAvailable() {
|
|
2505
|
-
const availability = await this.languageModelProvider?.availability(this.onDeviceParams.createOptions);
|
|
2506
|
-
if (availability === Availability.DOWNLOADABLE) {
|
|
2507
|
-
this.download();
|
|
2508
|
-
}
|
|
2509
|
-
return availability;
|
|
2510
|
-
}
|
|
2511
|
-
/**
|
|
2512
|
-
* Triggers out-of-band download of an on-device model.
|
|
2513
|
-
*
|
|
2514
|
-
* Chrome only downloads models as needed. Chrome knows a model is needed when code calls
|
|
2515
|
-
* LanguageModel.create.
|
|
2516
|
-
*
|
|
2517
|
-
* Since Chrome manages the download, the SDK can only avoid redundant download requests by
|
|
2518
|
-
* tracking if a download has previously been requested.
|
|
2519
|
-
*/
|
|
2520
|
-
download() {
|
|
2521
|
-
if (this.isDownloading) {
|
|
2522
|
-
return;
|
|
2523
|
-
}
|
|
2524
|
-
this.isDownloading = true;
|
|
2525
|
-
this.downloadPromise = this.languageModelProvider
|
|
2526
|
-
?.create(this.onDeviceParams.createOptions)
|
|
2527
|
-
.finally(() => {
|
|
2528
|
-
this.isDownloading = false;
|
|
2529
2813
|
});
|
|
2530
2814
|
}
|
|
2531
|
-
/**
|
|
2532
|
-
* Converts Firebase AI {@link Content} object to a Chrome {@link LanguageModelMessage} object.
|
|
2533
|
-
*/
|
|
2534
|
-
static async toLanguageModelMessage(content) {
|
|
2535
|
-
const languageModelMessageContents = await Promise.all(content.parts.map(ChromeAdapterImpl.toLanguageModelMessageContent));
|
|
2536
|
-
return {
|
|
2537
|
-
role: ChromeAdapterImpl.toLanguageModelMessageRole(content.role),
|
|
2538
|
-
content: languageModelMessageContents
|
|
2539
|
-
};
|
|
2540
|
-
}
|
|
2541
|
-
/**
|
|
2542
|
-
* Converts a Firebase AI Part object to a Chrome LanguageModelMessageContent object.
|
|
2543
|
-
*/
|
|
2544
|
-
static async toLanguageModelMessageContent(part) {
|
|
2545
|
-
if (part.text) {
|
|
2546
|
-
return {
|
|
2547
|
-
type: 'text',
|
|
2548
|
-
value: part.text
|
|
2549
|
-
};
|
|
2550
|
-
}
|
|
2551
|
-
else if (part.inlineData) {
|
|
2552
|
-
const formattedImageContent = await fetch(`data:${part.inlineData.mimeType};base64,${part.inlineData.data}`);
|
|
2553
|
-
const imageBlob = await formattedImageContent.blob();
|
|
2554
|
-
const imageBitmap = await createImageBitmap(imageBlob);
|
|
2555
|
-
return {
|
|
2556
|
-
type: 'image',
|
|
2557
|
-
value: imageBitmap
|
|
2558
|
-
};
|
|
2559
|
-
}
|
|
2560
|
-
throw new AIError(AIErrorCode.REQUEST_ERROR, `Processing of this Part type is not currently supported.`);
|
|
2561
|
-
}
|
|
2562
|
-
/**
|
|
2563
|
-
* Converts a Firebase AI {@link Role} string to a {@link LanguageModelMessageRole} string.
|
|
2564
|
-
*/
|
|
2565
|
-
static toLanguageModelMessageRole(role) {
|
|
2566
|
-
// Assumes 'function' rule has been filtered by isOnDeviceRequest
|
|
2567
|
-
return role === 'model' ? 'assistant' : 'user';
|
|
2568
|
-
}
|
|
2569
|
-
/**
|
|
2570
|
-
* Abstracts Chrome session creation.
|
|
2571
|
-
*
|
|
2572
|
-
* Chrome uses a multi-turn session for all inference. Firebase AI uses single-turn for all
|
|
2573
|
-
* inference. To map the Firebase AI API to Chrome's API, the SDK creates a new session for all
|
|
2574
|
-
* inference.
|
|
2575
|
-
*
|
|
2576
|
-
* Chrome will remove a model from memory if it's no longer in use, so this method ensures a
|
|
2577
|
-
* new session is created before an old session is destroyed.
|
|
2578
|
-
*/
|
|
2579
|
-
async createSession() {
|
|
2580
|
-
if (!this.languageModelProvider) {
|
|
2581
|
-
throw new AIError(AIErrorCode.UNSUPPORTED, 'Chrome AI requested for unsupported browser version.');
|
|
2582
|
-
}
|
|
2583
|
-
const newSession = await this.languageModelProvider.create(this.onDeviceParams.createOptions);
|
|
2584
|
-
if (this.oldSession) {
|
|
2585
|
-
this.oldSession.destroy();
|
|
2586
|
-
}
|
|
2587
|
-
// Holds session reference, so model isn't unloaded from memory.
|
|
2588
|
-
this.oldSession = newSession;
|
|
2589
|
-
return newSession;
|
|
2590
|
-
}
|
|
2591
|
-
/**
|
|
2592
|
-
* Formats string returned by Chrome as a {@link Response} returned by Firebase AI.
|
|
2593
|
-
*/
|
|
2594
|
-
static toResponse(text) {
|
|
2595
|
-
return {
|
|
2596
|
-
json: async () => ({
|
|
2597
|
-
candidates: [
|
|
2598
|
-
{
|
|
2599
|
-
content: {
|
|
2600
|
-
parts: [{ text }]
|
|
2601
|
-
}
|
|
2602
|
-
}
|
|
2603
|
-
]
|
|
2604
|
-
})
|
|
2605
|
-
};
|
|
2606
|
-
}
|
|
2607
|
-
/**
|
|
2608
|
-
* Formats string stream returned by Chrome as SSE returned by Firebase AI.
|
|
2609
|
-
*/
|
|
2610
|
-
static toStreamResponse(stream) {
|
|
2611
|
-
const encoder = new TextEncoder();
|
|
2612
|
-
return {
|
|
2613
|
-
body: stream.pipeThrough(new TransformStream({
|
|
2614
|
-
transform(chunk, controller) {
|
|
2615
|
-
const json = JSON.stringify({
|
|
2616
|
-
candidates: [
|
|
2617
|
-
{
|
|
2618
|
-
content: {
|
|
2619
|
-
role: 'model',
|
|
2620
|
-
parts: [{ text: chunk }]
|
|
2621
|
-
}
|
|
2622
|
-
}
|
|
2623
|
-
]
|
|
2624
|
-
});
|
|
2625
|
-
controller.enqueue(encoder.encode(`data: ${json}\n\n`));
|
|
2626
|
-
}
|
|
2627
|
-
}))
|
|
2628
|
-
};
|
|
2629
|
-
}
|
|
2630
2815
|
}
|
|
2631
|
-
// Visible for testing
|
|
2632
|
-
ChromeAdapterImpl.SUPPORTED_MIME_TYPES = ['image/jpeg', 'image/png'];
|
|
2633
2816
|
|
|
2634
2817
|
/**
|
|
2635
2818
|
* @license
|
|
@@ -2940,7 +3123,7 @@ class ImagenImageFormat {
|
|
|
2940
3123
|
|
|
2941
3124
|
/**
|
|
2942
3125
|
* @license
|
|
2943
|
-
* Copyright
|
|
3126
|
+
* Copyright 2025 Google LLC
|
|
2944
3127
|
*
|
|
2945
3128
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
2946
3129
|
* you may not use this file except in compliance with the License.
|
|
@@ -2954,75 +3137,433 @@ class ImagenImageFormat {
|
|
|
2954
3137
|
* See the License for the specific language governing permissions and
|
|
2955
3138
|
* limitations under the License.
|
|
2956
3139
|
*/
|
|
3140
|
+
const SERVER_INPUT_SAMPLE_RATE = 16000;
|
|
3141
|
+
const SERVER_OUTPUT_SAMPLE_RATE = 24000;
|
|
3142
|
+
const AUDIO_PROCESSOR_NAME = 'audio-processor';
|
|
2957
3143
|
/**
|
|
2958
|
-
*
|
|
2959
|
-
*
|
|
2960
|
-
*
|
|
2961
|
-
*
|
|
2962
|
-
* @example
|
|
2963
|
-
* ```javascript
|
|
2964
|
-
* const ai = getAI(app);
|
|
2965
|
-
* ```
|
|
2966
|
-
*
|
|
2967
|
-
* @example
|
|
2968
|
-
* ```javascript
|
|
2969
|
-
* // Get an AI instance configured to use the Gemini Developer API (via Google AI).
|
|
2970
|
-
* const ai = getAI(app, { backend: new GoogleAIBackend() });
|
|
2971
|
-
* ```
|
|
3144
|
+
* The JS for an `AudioWorkletProcessor`.
|
|
3145
|
+
* This processor is responsible for taking raw audio from the microphone,
|
|
3146
|
+
* converting it to the required 16-bit 16kHz PCM, and posting it back to the main thread.
|
|
2972
3147
|
*
|
|
2973
|
-
*
|
|
2974
|
-
* ```javascript
|
|
2975
|
-
* // Get an AI instance configured to use the Vertex AI Gemini API.
|
|
2976
|
-
* const ai = getAI(app, { backend: new VertexAIBackend() });
|
|
2977
|
-
* ```
|
|
2978
|
-
*
|
|
2979
|
-
* @param app - The {@link @firebase/app#FirebaseApp} to use.
|
|
2980
|
-
* @param options - {@link AIOptions} that configure the AI instance.
|
|
2981
|
-
* @returns The default {@link AI} instance for the given {@link @firebase/app#FirebaseApp}.
|
|
3148
|
+
* See: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorkletProcessor
|
|
2982
3149
|
*
|
|
2983
|
-
*
|
|
3150
|
+
* It is defined as a string here so that it can be converted into a `Blob`
|
|
3151
|
+
* and loaded at runtime.
|
|
2984
3152
|
*/
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
2994
|
-
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
|
|
2999
|
-
|
|
3153
|
+
const audioProcessorWorkletString = `
|
|
3154
|
+
class AudioProcessor extends AudioWorkletProcessor {
|
|
3155
|
+
constructor(options) {
|
|
3156
|
+
super();
|
|
3157
|
+
this.targetSampleRate = options.processorOptions.targetSampleRate;
|
|
3158
|
+
// 'sampleRate' is a global variable available inside the AudioWorkletGlobalScope,
|
|
3159
|
+
// representing the native sample rate of the AudioContext.
|
|
3160
|
+
this.inputSampleRate = sampleRate;
|
|
3161
|
+
}
|
|
3162
|
+
|
|
3163
|
+
/**
|
|
3164
|
+
* This method is called by the browser's audio engine for each block of audio data.
|
|
3165
|
+
* Input is a single input, with a single channel (input[0][0]).
|
|
3166
|
+
*/
|
|
3167
|
+
process(inputs) {
|
|
3168
|
+
const input = inputs[0];
|
|
3169
|
+
if (input && input.length > 0 && input[0].length > 0) {
|
|
3170
|
+
const pcmData = input[0]; // Float32Array of raw audio samples.
|
|
3171
|
+
|
|
3172
|
+
// Simple linear interpolation for resampling.
|
|
3173
|
+
const resampled = new Float32Array(Math.round(pcmData.length * this.targetSampleRate / this.inputSampleRate));
|
|
3174
|
+
const ratio = pcmData.length / resampled.length;
|
|
3175
|
+
for (let i = 0; i < resampled.length; i++) {
|
|
3176
|
+
resampled[i] = pcmData[Math.floor(i * ratio)];
|
|
3177
|
+
}
|
|
3178
|
+
|
|
3179
|
+
// Convert Float32 (-1, 1) samples to Int16 (-32768, 32767)
|
|
3180
|
+
const resampledInt16 = new Int16Array(resampled.length);
|
|
3181
|
+
for (let i = 0; i < resampled.length; i++) {
|
|
3182
|
+
const sample = Math.max(-1, Math.min(1, resampled[i]));
|
|
3183
|
+
if (sample < 0) {
|
|
3184
|
+
resampledInt16[i] = sample * 32768;
|
|
3185
|
+
} else {
|
|
3186
|
+
resampledInt16[i] = sample * 32767;
|
|
3187
|
+
}
|
|
3188
|
+
}
|
|
3189
|
+
|
|
3190
|
+
this.port.postMessage(resampledInt16);
|
|
3191
|
+
}
|
|
3192
|
+
// Return true to keep the processor alive and processing the next audio block.
|
|
3193
|
+
return true;
|
|
3194
|
+
}
|
|
3195
|
+
}
|
|
3196
|
+
|
|
3197
|
+
// Register the processor with a name that can be used to instantiate it from the main thread.
|
|
3198
|
+
registerProcessor('${AUDIO_PROCESSOR_NAME}', AudioProcessor);
|
|
3199
|
+
`;
|
|
3000
3200
|
/**
|
|
3001
|
-
*
|
|
3002
|
-
* and other functionality.
|
|
3201
|
+
* Encapsulates the core logic of an audio conversation.
|
|
3003
3202
|
*
|
|
3004
|
-
* @
|
|
3203
|
+
* @internal
|
|
3005
3204
|
*/
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3205
|
+
class AudioConversationRunner {
|
|
3206
|
+
constructor(liveSession, options, deps) {
|
|
3207
|
+
this.liveSession = liveSession;
|
|
3208
|
+
this.options = options;
|
|
3209
|
+
this.deps = deps;
|
|
3210
|
+
/** A flag to indicate if the conversation has been stopped. */
|
|
3211
|
+
this.isStopped = false;
|
|
3212
|
+
/** A deferred that contains a promise that is resolved when stop() is called, to unblock the receive loop. */
|
|
3213
|
+
this.stopDeferred = new Deferred();
|
|
3214
|
+
/** A FIFO queue of 24kHz, 16-bit PCM audio chunks received from the server. */
|
|
3215
|
+
this.playbackQueue = [];
|
|
3216
|
+
/** Tracks scheduled audio sources. Used to cancel scheduled audio when the model is interrupted. */
|
|
3217
|
+
this.scheduledSources = [];
|
|
3218
|
+
/** A high-precision timeline pointer for scheduling gapless audio playback. */
|
|
3219
|
+
this.nextStartTime = 0;
|
|
3220
|
+
/** A mutex to prevent the playback processing loop from running multiple times concurrently. */
|
|
3221
|
+
this.isPlaybackLoopRunning = false;
|
|
3222
|
+
this.liveSession.inConversation = true;
|
|
3223
|
+
// Start listening for messages from the server.
|
|
3224
|
+
this.receiveLoopPromise = this.runReceiveLoop().finally(() => this.cleanup());
|
|
3225
|
+
// Set up the handler for receiving processed audio data from the worklet.
|
|
3226
|
+
// Message data has been resampled to 16kHz 16-bit PCM.
|
|
3227
|
+
this.deps.workletNode.port.onmessage = event => {
|
|
3228
|
+
if (this.isStopped) {
|
|
3229
|
+
return;
|
|
3230
|
+
}
|
|
3231
|
+
const pcm16 = event.data;
|
|
3232
|
+
const base64 = btoa(String.fromCharCode.apply(null, Array.from(new Uint8Array(pcm16.buffer))));
|
|
3233
|
+
const chunk = {
|
|
3234
|
+
mimeType: 'audio/pcm',
|
|
3235
|
+
data: base64
|
|
3236
|
+
};
|
|
3237
|
+
void this.liveSession.sendMediaChunks([chunk]);
|
|
3013
3238
|
};
|
|
3014
3239
|
}
|
|
3015
|
-
|
|
3016
|
-
|
|
3017
|
-
|
|
3240
|
+
/**
|
|
3241
|
+
* Stops the conversation and unblocks the main receive loop.
|
|
3242
|
+
*/
|
|
3243
|
+
async stop() {
|
|
3244
|
+
if (this.isStopped) {
|
|
3245
|
+
return;
|
|
3246
|
+
}
|
|
3247
|
+
this.isStopped = true;
|
|
3248
|
+
this.stopDeferred.resolve(); // Unblock the receive loop
|
|
3249
|
+
await this.receiveLoopPromise; // Wait for the loop and cleanup to finish
|
|
3250
|
+
}
|
|
3251
|
+
/**
|
|
3252
|
+
* Cleans up all audio resources (nodes, stream tracks, context) and marks the
|
|
3253
|
+
* session as no longer in a conversation.
|
|
3254
|
+
*/
|
|
3255
|
+
cleanup() {
|
|
3256
|
+
this.interruptPlayback(); // Ensure all audio is stopped on final cleanup.
|
|
3257
|
+
this.deps.workletNode.port.onmessage = null;
|
|
3258
|
+
this.deps.workletNode.disconnect();
|
|
3259
|
+
this.deps.sourceNode.disconnect();
|
|
3260
|
+
this.deps.mediaStream.getTracks().forEach(track => track.stop());
|
|
3261
|
+
if (this.deps.audioContext.state !== 'closed') {
|
|
3262
|
+
void this.deps.audioContext.close();
|
|
3263
|
+
}
|
|
3264
|
+
this.liveSession.inConversation = false;
|
|
3265
|
+
}
|
|
3266
|
+
/**
|
|
3267
|
+
* Adds audio data to the queue and ensures the playback loop is running.
|
|
3268
|
+
*/
|
|
3269
|
+
enqueueAndPlay(audioData) {
|
|
3270
|
+
this.playbackQueue.push(audioData);
|
|
3271
|
+
// Will no-op if it's already running.
|
|
3272
|
+
void this.processPlaybackQueue();
|
|
3273
|
+
}
|
|
3274
|
+
/**
|
|
3275
|
+
* Stops all current and pending audio playback and clears the queue. This is
|
|
3276
|
+
* called when the server indicates the model's speech was interrupted with
|
|
3277
|
+
* `LiveServerContent.modelTurn.interrupted`.
|
|
3278
|
+
*/
|
|
3279
|
+
interruptPlayback() {
|
|
3280
|
+
// Stop all sources that have been scheduled. The onended event will fire for each,
|
|
3281
|
+
// which will clean up the scheduledSources array.
|
|
3282
|
+
[...this.scheduledSources].forEach(source => source.stop(0));
|
|
3283
|
+
// Clear the internal buffer of unprocessed audio chunks.
|
|
3284
|
+
this.playbackQueue.length = 0;
|
|
3285
|
+
// Reset the playback clock to start fresh.
|
|
3286
|
+
this.nextStartTime = this.deps.audioContext.currentTime;
|
|
3287
|
+
}
|
|
3288
|
+
/**
|
|
3289
|
+
* Processes the playback queue in a loop, scheduling each chunk in a gapless sequence.
|
|
3290
|
+
*/
|
|
3291
|
+
async processPlaybackQueue() {
|
|
3292
|
+
if (this.isPlaybackLoopRunning) {
|
|
3293
|
+
return;
|
|
3294
|
+
}
|
|
3295
|
+
this.isPlaybackLoopRunning = true;
|
|
3296
|
+
while (this.playbackQueue.length > 0 && !this.isStopped) {
|
|
3297
|
+
const pcmRawBuffer = this.playbackQueue.shift();
|
|
3298
|
+
try {
|
|
3299
|
+
const pcm16 = new Int16Array(pcmRawBuffer);
|
|
3300
|
+
const frameCount = pcm16.length;
|
|
3301
|
+
const audioBuffer = this.deps.audioContext.createBuffer(1, frameCount, SERVER_OUTPUT_SAMPLE_RATE);
|
|
3302
|
+
// Convert 16-bit PCM to 32-bit PCM, required by the Web Audio API.
|
|
3303
|
+
const channelData = audioBuffer.getChannelData(0);
|
|
3304
|
+
for (let i = 0; i < frameCount; i++) {
|
|
3305
|
+
channelData[i] = pcm16[i] / 32768; // Normalize to Float32 range [-1.0, 1.0]
|
|
3306
|
+
}
|
|
3307
|
+
const source = this.deps.audioContext.createBufferSource();
|
|
3308
|
+
source.buffer = audioBuffer;
|
|
3309
|
+
source.connect(this.deps.audioContext.destination);
|
|
3310
|
+
// Track the source and set up a handler to remove it from tracking when it finishes.
|
|
3311
|
+
this.scheduledSources.push(source);
|
|
3312
|
+
source.onended = () => {
|
|
3313
|
+
this.scheduledSources = this.scheduledSources.filter(s => s !== source);
|
|
3314
|
+
};
|
|
3315
|
+
// To prevent gaps, schedule the next chunk to start either now (if we're catching up)
|
|
3316
|
+
// or exactly when the previous chunk is scheduled to end.
|
|
3317
|
+
this.nextStartTime = Math.max(this.deps.audioContext.currentTime, this.nextStartTime);
|
|
3318
|
+
source.start(this.nextStartTime);
|
|
3319
|
+
// Update the schedule for the *next* chunk.
|
|
3320
|
+
this.nextStartTime += audioBuffer.duration;
|
|
3321
|
+
}
|
|
3322
|
+
catch (e) {
|
|
3323
|
+
logger.error('Error playing audio:', e);
|
|
3324
|
+
}
|
|
3325
|
+
}
|
|
3326
|
+
this.isPlaybackLoopRunning = false;
|
|
3327
|
+
}
|
|
3328
|
+
/**
|
|
3329
|
+
* The main loop that listens for and processes messages from the server.
|
|
3330
|
+
*/
|
|
3331
|
+
async runReceiveLoop() {
|
|
3332
|
+
const messageGenerator = this.liveSession.receive();
|
|
3333
|
+
while (!this.isStopped) {
|
|
3334
|
+
const result = await Promise.race([
|
|
3335
|
+
messageGenerator.next(),
|
|
3336
|
+
this.stopDeferred.promise
|
|
3337
|
+
]);
|
|
3338
|
+
if (this.isStopped || !result || result.done) {
|
|
3339
|
+
break;
|
|
3340
|
+
}
|
|
3341
|
+
const message = result.value;
|
|
3342
|
+
if (message.type === 'serverContent') {
|
|
3343
|
+
const serverContent = message;
|
|
3344
|
+
if (serverContent.interrupted) {
|
|
3345
|
+
this.interruptPlayback();
|
|
3346
|
+
}
|
|
3347
|
+
const audioPart = serverContent.modelTurn?.parts.find(part => part.inlineData?.mimeType.startsWith('audio/'));
|
|
3348
|
+
if (audioPart?.inlineData) {
|
|
3349
|
+
const audioData = Uint8Array.from(atob(audioPart.inlineData.data), c => c.charCodeAt(0)).buffer;
|
|
3350
|
+
this.enqueueAndPlay(audioData);
|
|
3351
|
+
}
|
|
3352
|
+
}
|
|
3353
|
+
else if (message.type === 'toolCall') {
|
|
3354
|
+
if (!this.options.functionCallingHandler) {
|
|
3355
|
+
logger.warn('Received tool call message, but StartAudioConversationOptions.functionCallingHandler is undefined. Ignoring tool call.');
|
|
3356
|
+
}
|
|
3357
|
+
else {
|
|
3358
|
+
try {
|
|
3359
|
+
const resultPart = await this.options.functionCallingHandler(message.functionCalls);
|
|
3360
|
+
if (!this.isStopped) {
|
|
3361
|
+
void this.liveSession.send([resultPart]);
|
|
3362
|
+
}
|
|
3363
|
+
}
|
|
3364
|
+
catch (e) {
|
|
3365
|
+
throw new AIError(AIErrorCode.ERROR, `Function calling handler failed: ${e.message}`);
|
|
3366
|
+
}
|
|
3367
|
+
}
|
|
3368
|
+
}
|
|
3369
|
+
}
|
|
3370
|
+
}
|
|
3371
|
+
}
|
|
3372
|
+
/**
|
|
3373
|
+
* Starts a real-time, bidirectional audio conversation with the model. This helper function manages
|
|
3374
|
+
* the complexities of microphone access, audio recording, playback, and interruptions.
|
|
3375
|
+
*
|
|
3376
|
+
* @remarks Important: This function must be called in response to a user gesture
|
|
3377
|
+
* (for example, a button click) to comply with {@link https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy | browser autoplay policies}.
|
|
3378
|
+
*
|
|
3379
|
+
* @example
|
|
3380
|
+
* ```javascript
|
|
3381
|
+
* const liveSession = await model.connect();
|
|
3382
|
+
* let conversationController;
|
|
3383
|
+
*
|
|
3384
|
+
* // This function must be called from within a click handler.
|
|
3385
|
+
* async function startConversation() {
|
|
3386
|
+
* try {
|
|
3387
|
+
* conversationController = await startAudioConversation(liveSession);
|
|
3388
|
+
* } catch (e) {
|
|
3389
|
+
* // Handle AI-specific errors
|
|
3390
|
+
* if (e instanceof AIError) {
|
|
3391
|
+
* console.error("AI Error:", e.message);
|
|
3392
|
+
* }
|
|
3393
|
+
* // Handle microphone permission and hardware errors
|
|
3394
|
+
* else if (e instanceof DOMException) {
|
|
3395
|
+
* console.error("Microphone Error:", e.message);
|
|
3396
|
+
* }
|
|
3397
|
+
* // Handle other unexpected errors
|
|
3398
|
+
* else {
|
|
3399
|
+
* console.error("An unexpected error occurred:", e);
|
|
3400
|
+
* }
|
|
3401
|
+
* }
|
|
3402
|
+
* }
|
|
3403
|
+
*
|
|
3404
|
+
* // Later, to stop the conversation:
|
|
3405
|
+
* // if (conversationController) {
|
|
3406
|
+
* // await conversationController.stop();
|
|
3407
|
+
* // }
|
|
3408
|
+
* ```
|
|
3409
|
+
*
|
|
3410
|
+
* @param liveSession - An active {@link LiveSession} instance.
|
|
3411
|
+
* @param options - Configuration options for the audio conversation.
|
|
3412
|
+
* @returns A `Promise` that resolves with an {@link AudioConversationController}.
|
|
3413
|
+
* @throws `AIError` if the environment does not support required Web APIs (`UNSUPPORTED`), if a conversation is already active (`REQUEST_ERROR`), the session is closed (`SESSION_CLOSED`), or if an unexpected initialization error occurs (`ERROR`).
|
|
3414
|
+
* @throws `DOMException` Thrown by `navigator.mediaDevices.getUserMedia()` if issues occur with microphone access, such as permissions being denied (`NotAllowedError`) or no compatible hardware being found (`NotFoundError`). See the {@link https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia#exceptions | MDN documentation} for a full list of exceptions.
|
|
3415
|
+
*
|
|
3416
|
+
* @beta
|
|
3417
|
+
*/
|
|
3418
|
+
async function startAudioConversation(liveSession, options = {}) {
|
|
3419
|
+
if (liveSession.isClosed) {
|
|
3420
|
+
throw new AIError(AIErrorCode.SESSION_CLOSED, 'Cannot start audio conversation on a closed LiveSession.');
|
|
3421
|
+
}
|
|
3422
|
+
if (liveSession.inConversation) {
|
|
3423
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'An audio conversation is already in progress for this session.');
|
|
3424
|
+
}
|
|
3425
|
+
// Check for necessary Web API support.
|
|
3426
|
+
if (typeof AudioWorkletNode === 'undefined' ||
|
|
3427
|
+
typeof AudioContext === 'undefined' ||
|
|
3428
|
+
typeof navigator === 'undefined' ||
|
|
3429
|
+
!navigator.mediaDevices) {
|
|
3430
|
+
throw new AIError(AIErrorCode.UNSUPPORTED, 'Audio conversation is not supported in this environment. It requires the Web Audio API and AudioWorklet support.');
|
|
3431
|
+
}
|
|
3432
|
+
let audioContext;
|
|
3433
|
+
try {
|
|
3434
|
+
// 1. Set up the audio context. This must be in response to a user gesture.
|
|
3435
|
+
// See: https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy
|
|
3436
|
+
audioContext = new AudioContext();
|
|
3437
|
+
if (audioContext.state === 'suspended') {
|
|
3438
|
+
await audioContext.resume();
|
|
3439
|
+
}
|
|
3440
|
+
// 2. Prompt for microphone access and get the media stream.
|
|
3441
|
+
// This can throw a variety of permission or hardware-related errors.
|
|
3442
|
+
const mediaStream = await navigator.mediaDevices.getUserMedia({
|
|
3443
|
+
audio: true
|
|
3444
|
+
});
|
|
3445
|
+
// 3. Load the AudioWorklet processor.
|
|
3446
|
+
// See: https://developer.mozilla.org/en-US/docs/Web/API/AudioWorklet
|
|
3447
|
+
const workletBlob = new Blob([audioProcessorWorkletString], {
|
|
3448
|
+
type: 'application/javascript'
|
|
3449
|
+
});
|
|
3450
|
+
const workletURL = URL.createObjectURL(workletBlob);
|
|
3451
|
+
await audioContext.audioWorklet.addModule(workletURL);
|
|
3452
|
+
// 4. Create the audio graph: Microphone -> Source Node -> Worklet Node
|
|
3453
|
+
const sourceNode = audioContext.createMediaStreamSource(mediaStream);
|
|
3454
|
+
const workletNode = new AudioWorkletNode(audioContext, AUDIO_PROCESSOR_NAME, {
|
|
3455
|
+
processorOptions: { targetSampleRate: SERVER_INPUT_SAMPLE_RATE }
|
|
3456
|
+
});
|
|
3457
|
+
sourceNode.connect(workletNode);
|
|
3458
|
+
// 5. Instantiate and return the runner which manages the conversation.
|
|
3459
|
+
const runner = new AudioConversationRunner(liveSession, options, {
|
|
3460
|
+
audioContext,
|
|
3461
|
+
mediaStream,
|
|
3462
|
+
sourceNode,
|
|
3463
|
+
workletNode
|
|
3464
|
+
});
|
|
3465
|
+
return { stop: () => runner.stop() };
|
|
3466
|
+
}
|
|
3467
|
+
catch (e) {
|
|
3468
|
+
// Ensure the audio context is closed on any setup error.
|
|
3469
|
+
if (audioContext && audioContext.state !== 'closed') {
|
|
3470
|
+
void audioContext.close();
|
|
3471
|
+
}
|
|
3472
|
+
// Re-throw specific, known error types directly. The user may want to handle `DOMException`
|
|
3473
|
+
// errors differently (for example, if permission to access audio device was denied).
|
|
3474
|
+
if (e instanceof AIError || e instanceof DOMException) {
|
|
3475
|
+
throw e;
|
|
3476
|
+
}
|
|
3477
|
+
// Wrap any other unexpected errors in a standard AIError.
|
|
3478
|
+
throw new AIError(AIErrorCode.ERROR, `Failed to initialize audio recording: ${e.message}`);
|
|
3479
|
+
}
|
|
3480
|
+
}
|
|
3481
|
+
|
|
3482
|
+
/**
|
|
3483
|
+
* @license
|
|
3484
|
+
* Copyright 2024 Google LLC
|
|
3485
|
+
*
|
|
3486
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
3487
|
+
* you may not use this file except in compliance with the License.
|
|
3488
|
+
* You may obtain a copy of the License at
|
|
3489
|
+
*
|
|
3490
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
3491
|
+
*
|
|
3492
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
3493
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
3494
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
3495
|
+
* See the License for the specific language governing permissions and
|
|
3496
|
+
* limitations under the License.
|
|
3497
|
+
*/
|
|
3498
|
+
/**
|
|
3499
|
+
* Returns the default {@link AI} instance that is associated with the provided
|
|
3500
|
+
* {@link @firebase/app#FirebaseApp}. If no instance exists, initializes a new instance with the
|
|
3501
|
+
* default settings.
|
|
3502
|
+
*
|
|
3503
|
+
* @example
|
|
3504
|
+
* ```javascript
|
|
3505
|
+
* const ai = getAI(app);
|
|
3506
|
+
* ```
|
|
3507
|
+
*
|
|
3508
|
+
* @example
|
|
3509
|
+
* ```javascript
|
|
3510
|
+
* // Get an AI instance configured to use the Gemini Developer API (via Google AI).
|
|
3511
|
+
* const ai = getAI(app, { backend: new GoogleAIBackend() });
|
|
3512
|
+
* ```
|
|
3513
|
+
*
|
|
3514
|
+
* @example
|
|
3515
|
+
* ```javascript
|
|
3516
|
+
* // Get an AI instance configured to use the Vertex AI Gemini API.
|
|
3517
|
+
* const ai = getAI(app, { backend: new VertexAIBackend() });
|
|
3518
|
+
* ```
|
|
3519
|
+
*
|
|
3520
|
+
* @param app - The {@link @firebase/app#FirebaseApp} to use.
|
|
3521
|
+
* @param options - {@link AIOptions} that configure the AI instance.
|
|
3522
|
+
* @returns The default {@link AI} instance for the given {@link @firebase/app#FirebaseApp}.
|
|
3523
|
+
*
|
|
3524
|
+
* @public
|
|
3525
|
+
*/
|
|
3526
|
+
function getAI(app = getApp(), options) {
|
|
3527
|
+
app = getModularInstance(app);
|
|
3528
|
+
// Dependencies
|
|
3529
|
+
const AIProvider = _getProvider(app, AI_TYPE);
|
|
3530
|
+
const backend = options?.backend ?? new GoogleAIBackend();
|
|
3531
|
+
const finalOptions = {
|
|
3532
|
+
useLimitedUseAppCheckTokens: options?.useLimitedUseAppCheckTokens ?? false
|
|
3533
|
+
};
|
|
3534
|
+
const identifier = encodeInstanceIdentifier(backend);
|
|
3535
|
+
const aiInstance = AIProvider.getImmediate({
|
|
3536
|
+
identifier
|
|
3537
|
+
});
|
|
3538
|
+
aiInstance.options = finalOptions;
|
|
3539
|
+
return aiInstance;
|
|
3540
|
+
}
|
|
3541
|
+
/**
|
|
3542
|
+
* Returns a {@link GenerativeModel} class with methods for inference
|
|
3543
|
+
* and other functionality.
|
|
3544
|
+
*
|
|
3545
|
+
* @public
|
|
3546
|
+
*/
|
|
3547
|
+
function getGenerativeModel(ai, modelParams, requestOptions) {
|
|
3548
|
+
// Uses the existence of HybridParams.mode to clarify the type of the modelParams input.
|
|
3549
|
+
const hybridParams = modelParams;
|
|
3550
|
+
let inCloudParams;
|
|
3551
|
+
if (hybridParams.mode) {
|
|
3552
|
+
inCloudParams = hybridParams.inCloudParams || {
|
|
3553
|
+
model: DEFAULT_HYBRID_IN_CLOUD_MODEL
|
|
3554
|
+
};
|
|
3555
|
+
}
|
|
3556
|
+
else {
|
|
3557
|
+
inCloudParams = modelParams;
|
|
3558
|
+
}
|
|
3018
3559
|
if (!inCloudParams.model) {
|
|
3019
3560
|
throw new AIError(AIErrorCode.NO_MODEL, `Must provide a model name. Example: getGenerativeModel({ model: 'my-model-name' })`);
|
|
3020
3561
|
}
|
|
3021
|
-
|
|
3022
|
-
|
|
3023
|
-
|
|
3024
|
-
|
|
3025
|
-
|
|
3562
|
+
/**
|
|
3563
|
+
* An AIService registered by index.node.ts will not have a
|
|
3564
|
+
* chromeAdapterFactory() method.
|
|
3565
|
+
*/
|
|
3566
|
+
const chromeAdapter = ai.chromeAdapterFactory?.(hybridParams.mode, typeof window === 'undefined' ? undefined : window, hybridParams.onDeviceParams);
|
|
3026
3567
|
return new GenerativeModel(ai, inCloudParams, requestOptions, chromeAdapter);
|
|
3027
3568
|
}
|
|
3028
3569
|
/**
|
|
@@ -3045,6 +3586,320 @@ function getImagenModel(ai, modelParams, requestOptions) {
|
|
|
3045
3586
|
}
|
|
3046
3587
|
return new ImagenModel(ai, modelParams, requestOptions);
|
|
3047
3588
|
}
|
|
3589
|
+
/**
|
|
3590
|
+
* Returns a {@link LiveGenerativeModel} class for real-time, bidirectional communication.
|
|
3591
|
+
*
|
|
3592
|
+
* The Live API is only supported in modern browser windows and Node >= 22.
|
|
3593
|
+
*
|
|
3594
|
+
* @param ai - An {@link AI} instance.
|
|
3595
|
+
* @param modelParams - Parameters to use when setting up a {@link LiveSession}.
|
|
3596
|
+
* @throws If the `apiKey` or `projectId` fields are missing in your
|
|
3597
|
+
* Firebase config.
|
|
3598
|
+
*
|
|
3599
|
+
* @beta
|
|
3600
|
+
*/
|
|
3601
|
+
function getLiveGenerativeModel(ai, modelParams) {
|
|
3602
|
+
if (!modelParams.model) {
|
|
3603
|
+
throw new AIError(AIErrorCode.NO_MODEL, `Must provide a model name for getLiveGenerativeModel. Example: getLiveGenerativeModel(ai, { model: 'my-model-name' })`);
|
|
3604
|
+
}
|
|
3605
|
+
const webSocketHandler = new WebSocketHandlerImpl();
|
|
3606
|
+
return new LiveGenerativeModel(ai, modelParams, webSocketHandler);
|
|
3607
|
+
}
|
|
3608
|
+
|
|
3609
|
+
/**
|
|
3610
|
+
* @internal
|
|
3611
|
+
*/
|
|
3612
|
+
var Availability;
|
|
3613
|
+
(function (Availability) {
|
|
3614
|
+
Availability["UNAVAILABLE"] = "unavailable";
|
|
3615
|
+
Availability["DOWNLOADABLE"] = "downloadable";
|
|
3616
|
+
Availability["DOWNLOADING"] = "downloading";
|
|
3617
|
+
Availability["AVAILABLE"] = "available";
|
|
3618
|
+
})(Availability || (Availability = {}));
|
|
3619
|
+
|
|
3620
|
+
/**
|
|
3621
|
+
* @license
|
|
3622
|
+
* Copyright 2025 Google LLC
|
|
3623
|
+
*
|
|
3624
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
3625
|
+
* you may not use this file except in compliance with the License.
|
|
3626
|
+
* You may obtain a copy of the License at
|
|
3627
|
+
*
|
|
3628
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
3629
|
+
*
|
|
3630
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
3631
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
3632
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
3633
|
+
* See the License for the specific language governing permissions and
|
|
3634
|
+
* limitations under the License.
|
|
3635
|
+
*/
|
|
3636
|
+
/**
|
|
3637
|
+
* Defines an inference "backend" that uses Chrome's on-device model,
|
|
3638
|
+
* and encapsulates logic for detecting when on-device inference is
|
|
3639
|
+
* possible.
|
|
3640
|
+
*/
|
|
3641
|
+
class ChromeAdapterImpl {
|
|
3642
|
+
constructor(languageModelProvider, mode, onDeviceParams = {
|
|
3643
|
+
createOptions: {
|
|
3644
|
+
// Defaults to support image inputs for convenience.
|
|
3645
|
+
expectedInputs: [{ type: 'image' }]
|
|
3646
|
+
}
|
|
3647
|
+
}) {
|
|
3648
|
+
this.languageModelProvider = languageModelProvider;
|
|
3649
|
+
this.mode = mode;
|
|
3650
|
+
this.onDeviceParams = onDeviceParams;
|
|
3651
|
+
this.isDownloading = false;
|
|
3652
|
+
}
|
|
3653
|
+
/**
|
|
3654
|
+
* Checks if a given request can be made on-device.
|
|
3655
|
+
*
|
|
3656
|
+
* Encapsulates a few concerns:
|
|
3657
|
+
* the mode
|
|
3658
|
+
* API existence
|
|
3659
|
+
* prompt formatting
|
|
3660
|
+
* model availability, including triggering download if necessary
|
|
3661
|
+
*
|
|
3662
|
+
*
|
|
3663
|
+
* Pros: callers needn't be concerned with details of on-device availability.</p>
|
|
3664
|
+
* Cons: this method spans a few concerns and splits request validation from usage.
|
|
3665
|
+
* If instance variables weren't already part of the API, we could consider a better
|
|
3666
|
+
* separation of concerns.
|
|
3667
|
+
*/
|
|
3668
|
+
async isAvailable(request) {
|
|
3669
|
+
if (!this.mode) {
|
|
3670
|
+
logger.debug(`On-device inference unavailable because mode is undefined.`);
|
|
3671
|
+
return false;
|
|
3672
|
+
}
|
|
3673
|
+
if (this.mode === InferenceMode.ONLY_IN_CLOUD) {
|
|
3674
|
+
logger.debug(`On-device inference unavailable because mode is "only_in_cloud".`);
|
|
3675
|
+
return false;
|
|
3676
|
+
}
|
|
3677
|
+
// Triggers out-of-band download so model will eventually become available.
|
|
3678
|
+
const availability = await this.downloadIfAvailable();
|
|
3679
|
+
if (this.mode === InferenceMode.ONLY_ON_DEVICE) {
|
|
3680
|
+
// If it will never be available due to API inavailability, throw.
|
|
3681
|
+
if (availability === Availability.UNAVAILABLE) {
|
|
3682
|
+
throw new AIError(AIErrorCode.API_NOT_ENABLED, 'Local LanguageModel API not available in this environment.');
|
|
3683
|
+
}
|
|
3684
|
+
else if (availability === Availability.DOWNLOADABLE ||
|
|
3685
|
+
availability === Availability.DOWNLOADING) {
|
|
3686
|
+
// TODO(chholland): Better user experience during download - progress?
|
|
3687
|
+
logger.debug(`Waiting for download of LanguageModel to complete.`);
|
|
3688
|
+
await this.downloadPromise;
|
|
3689
|
+
return true;
|
|
3690
|
+
}
|
|
3691
|
+
return true;
|
|
3692
|
+
}
|
|
3693
|
+
// Applies prefer_on_device logic.
|
|
3694
|
+
if (availability !== Availability.AVAILABLE) {
|
|
3695
|
+
logger.debug(`On-device inference unavailable because availability is "${availability}".`);
|
|
3696
|
+
return false;
|
|
3697
|
+
}
|
|
3698
|
+
if (!ChromeAdapterImpl.isOnDeviceRequest(request)) {
|
|
3699
|
+
logger.debug(`On-device inference unavailable because request is incompatible.`);
|
|
3700
|
+
return false;
|
|
3701
|
+
}
|
|
3702
|
+
return true;
|
|
3703
|
+
}
|
|
3704
|
+
/**
|
|
3705
|
+
* Generates content on device.
|
|
3706
|
+
*
|
|
3707
|
+
* @remarks
|
|
3708
|
+
* This is comparable to {@link GenerativeModel.generateContent} for generating content in
|
|
3709
|
+
* Cloud.
|
|
3710
|
+
* @param request - a standard Firebase AI {@link GenerateContentRequest}
|
|
3711
|
+
* @returns {@link Response}, so we can reuse common response formatting.
|
|
3712
|
+
*/
|
|
3713
|
+
async generateContent(request) {
|
|
3714
|
+
const session = await this.createSession();
|
|
3715
|
+
const contents = await Promise.all(request.contents.map(ChromeAdapterImpl.toLanguageModelMessage));
|
|
3716
|
+
const text = await session.prompt(contents, this.onDeviceParams.promptOptions);
|
|
3717
|
+
return ChromeAdapterImpl.toResponse(text);
|
|
3718
|
+
}
|
|
3719
|
+
/**
|
|
3720
|
+
* Generates content stream on device.
|
|
3721
|
+
*
|
|
3722
|
+
* @remarks
|
|
3723
|
+
* This is comparable to {@link GenerativeModel.generateContentStream} for generating content in
|
|
3724
|
+
* Cloud.
|
|
3725
|
+
* @param request - a standard Firebase AI {@link GenerateContentRequest}
|
|
3726
|
+
* @returns {@link Response}, so we can reuse common response formatting.
|
|
3727
|
+
*/
|
|
3728
|
+
async generateContentStream(request) {
|
|
3729
|
+
const session = await this.createSession();
|
|
3730
|
+
const contents = await Promise.all(request.contents.map(ChromeAdapterImpl.toLanguageModelMessage));
|
|
3731
|
+
const stream = session.promptStreaming(contents, this.onDeviceParams.promptOptions);
|
|
3732
|
+
return ChromeAdapterImpl.toStreamResponse(stream);
|
|
3733
|
+
}
|
|
3734
|
+
async countTokens(_request) {
|
|
3735
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'Count Tokens is not yet available for on-device model.');
|
|
3736
|
+
}
|
|
3737
|
+
/**
|
|
3738
|
+
* Asserts inference for the given request can be performed by an on-device model.
|
|
3739
|
+
*/
|
|
3740
|
+
static isOnDeviceRequest(request) {
|
|
3741
|
+
// Returns false if the prompt is empty.
|
|
3742
|
+
if (request.contents.length === 0) {
|
|
3743
|
+
logger.debug('Empty prompt rejected for on-device inference.');
|
|
3744
|
+
return false;
|
|
3745
|
+
}
|
|
3746
|
+
for (const content of request.contents) {
|
|
3747
|
+
if (content.role === 'function') {
|
|
3748
|
+
logger.debug(`"Function" role rejected for on-device inference.`);
|
|
3749
|
+
return false;
|
|
3750
|
+
}
|
|
3751
|
+
// Returns false if request contains an image with an unsupported mime type.
|
|
3752
|
+
for (const part of content.parts) {
|
|
3753
|
+
if (part.inlineData &&
|
|
3754
|
+
ChromeAdapterImpl.SUPPORTED_MIME_TYPES.indexOf(part.inlineData.mimeType) === -1) {
|
|
3755
|
+
logger.debug(`Unsupported mime type "${part.inlineData.mimeType}" rejected for on-device inference.`);
|
|
3756
|
+
return false;
|
|
3757
|
+
}
|
|
3758
|
+
}
|
|
3759
|
+
}
|
|
3760
|
+
return true;
|
|
3761
|
+
}
|
|
3762
|
+
/**
|
|
3763
|
+
* Encapsulates logic to get availability and download a model if one is downloadable.
|
|
3764
|
+
*/
|
|
3765
|
+
async downloadIfAvailable() {
|
|
3766
|
+
const availability = await this.languageModelProvider?.availability(this.onDeviceParams.createOptions);
|
|
3767
|
+
if (availability === Availability.DOWNLOADABLE) {
|
|
3768
|
+
this.download();
|
|
3769
|
+
}
|
|
3770
|
+
return availability;
|
|
3771
|
+
}
|
|
3772
|
+
/**
|
|
3773
|
+
* Triggers out-of-band download of an on-device model.
|
|
3774
|
+
*
|
|
3775
|
+
* Chrome only downloads models as needed. Chrome knows a model is needed when code calls
|
|
3776
|
+
* LanguageModel.create.
|
|
3777
|
+
*
|
|
3778
|
+
* Since Chrome manages the download, the SDK can only avoid redundant download requests by
|
|
3779
|
+
* tracking if a download has previously been requested.
|
|
3780
|
+
*/
|
|
3781
|
+
download() {
|
|
3782
|
+
if (this.isDownloading) {
|
|
3783
|
+
return;
|
|
3784
|
+
}
|
|
3785
|
+
this.isDownloading = true;
|
|
3786
|
+
this.downloadPromise = this.languageModelProvider
|
|
3787
|
+
?.create(this.onDeviceParams.createOptions)
|
|
3788
|
+
.finally(() => {
|
|
3789
|
+
this.isDownloading = false;
|
|
3790
|
+
});
|
|
3791
|
+
}
|
|
3792
|
+
/**
|
|
3793
|
+
* Converts Firebase AI {@link Content} object to a Chrome {@link LanguageModelMessage} object.
|
|
3794
|
+
*/
|
|
3795
|
+
static async toLanguageModelMessage(content) {
|
|
3796
|
+
const languageModelMessageContents = await Promise.all(content.parts.map(ChromeAdapterImpl.toLanguageModelMessageContent));
|
|
3797
|
+
return {
|
|
3798
|
+
role: ChromeAdapterImpl.toLanguageModelMessageRole(content.role),
|
|
3799
|
+
content: languageModelMessageContents
|
|
3800
|
+
};
|
|
3801
|
+
}
|
|
3802
|
+
/**
|
|
3803
|
+
* Converts a Firebase AI Part object to a Chrome LanguageModelMessageContent object.
|
|
3804
|
+
*/
|
|
3805
|
+
static async toLanguageModelMessageContent(part) {
|
|
3806
|
+
if (part.text) {
|
|
3807
|
+
return {
|
|
3808
|
+
type: 'text',
|
|
3809
|
+
value: part.text
|
|
3810
|
+
};
|
|
3811
|
+
}
|
|
3812
|
+
else if (part.inlineData) {
|
|
3813
|
+
const formattedImageContent = await fetch(`data:${part.inlineData.mimeType};base64,${part.inlineData.data}`);
|
|
3814
|
+
const imageBlob = await formattedImageContent.blob();
|
|
3815
|
+
const imageBitmap = await createImageBitmap(imageBlob);
|
|
3816
|
+
return {
|
|
3817
|
+
type: 'image',
|
|
3818
|
+
value: imageBitmap
|
|
3819
|
+
};
|
|
3820
|
+
}
|
|
3821
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, `Processing of this Part type is not currently supported.`);
|
|
3822
|
+
}
|
|
3823
|
+
/**
|
|
3824
|
+
* Converts a Firebase AI {@link Role} string to a {@link LanguageModelMessageRole} string.
|
|
3825
|
+
*/
|
|
3826
|
+
static toLanguageModelMessageRole(role) {
|
|
3827
|
+
// Assumes 'function' rule has been filtered by isOnDeviceRequest
|
|
3828
|
+
return role === 'model' ? 'assistant' : 'user';
|
|
3829
|
+
}
|
|
3830
|
+
/**
|
|
3831
|
+
* Abstracts Chrome session creation.
|
|
3832
|
+
*
|
|
3833
|
+
* Chrome uses a multi-turn session for all inference. Firebase AI uses single-turn for all
|
|
3834
|
+
* inference. To map the Firebase AI API to Chrome's API, the SDK creates a new session for all
|
|
3835
|
+
* inference.
|
|
3836
|
+
*
|
|
3837
|
+
* Chrome will remove a model from memory if it's no longer in use, so this method ensures a
|
|
3838
|
+
* new session is created before an old session is destroyed.
|
|
3839
|
+
*/
|
|
3840
|
+
async createSession() {
|
|
3841
|
+
if (!this.languageModelProvider) {
|
|
3842
|
+
throw new AIError(AIErrorCode.UNSUPPORTED, 'Chrome AI requested for unsupported browser version.');
|
|
3843
|
+
}
|
|
3844
|
+
const newSession = await this.languageModelProvider.create(this.onDeviceParams.createOptions);
|
|
3845
|
+
if (this.oldSession) {
|
|
3846
|
+
this.oldSession.destroy();
|
|
3847
|
+
}
|
|
3848
|
+
// Holds session reference, so model isn't unloaded from memory.
|
|
3849
|
+
this.oldSession = newSession;
|
|
3850
|
+
return newSession;
|
|
3851
|
+
}
|
|
3852
|
+
/**
|
|
3853
|
+
* Formats string returned by Chrome as a {@link Response} returned by Firebase AI.
|
|
3854
|
+
*/
|
|
3855
|
+
static toResponse(text) {
|
|
3856
|
+
return {
|
|
3857
|
+
json: async () => ({
|
|
3858
|
+
candidates: [
|
|
3859
|
+
{
|
|
3860
|
+
content: {
|
|
3861
|
+
parts: [{ text }]
|
|
3862
|
+
}
|
|
3863
|
+
}
|
|
3864
|
+
]
|
|
3865
|
+
})
|
|
3866
|
+
};
|
|
3867
|
+
}
|
|
3868
|
+
/**
|
|
3869
|
+
* Formats string stream returned by Chrome as SSE returned by Firebase AI.
|
|
3870
|
+
*/
|
|
3871
|
+
static toStreamResponse(stream) {
|
|
3872
|
+
const encoder = new TextEncoder();
|
|
3873
|
+
return {
|
|
3874
|
+
body: stream.pipeThrough(new TransformStream({
|
|
3875
|
+
transform(chunk, controller) {
|
|
3876
|
+
const json = JSON.stringify({
|
|
3877
|
+
candidates: [
|
|
3878
|
+
{
|
|
3879
|
+
content: {
|
|
3880
|
+
role: 'model',
|
|
3881
|
+
parts: [{ text: chunk }]
|
|
3882
|
+
}
|
|
3883
|
+
}
|
|
3884
|
+
]
|
|
3885
|
+
});
|
|
3886
|
+
controller.enqueue(encoder.encode(`data: ${json}\n\n`));
|
|
3887
|
+
}
|
|
3888
|
+
}))
|
|
3889
|
+
};
|
|
3890
|
+
}
|
|
3891
|
+
}
|
|
3892
|
+
// Visible for testing
|
|
3893
|
+
ChromeAdapterImpl.SUPPORTED_MIME_TYPES = ['image/jpeg', 'image/png'];
|
|
3894
|
+
/**
|
|
3895
|
+
* Creates a ChromeAdapterImpl on demand.
|
|
3896
|
+
*/
|
|
3897
|
+
function chromeAdapterFactory(mode, window, params) {
|
|
3898
|
+
// Do not initialize a ChromeAdapter if we are not in hybrid mode.
|
|
3899
|
+
if (typeof window !== 'undefined' && mode) {
|
|
3900
|
+
return new ChromeAdapterImpl(window.LanguageModel, mode, params);
|
|
3901
|
+
}
|
|
3902
|
+
}
|
|
3048
3903
|
|
|
3049
3904
|
/**
|
|
3050
3905
|
* The Firebase AI Web SDK.
|
|
@@ -3060,7 +3915,7 @@ function factory(container, { instanceIdentifier }) {
|
|
|
3060
3915
|
const app = container.getProvider('app').getImmediate();
|
|
3061
3916
|
const auth = container.getProvider('auth-internal');
|
|
3062
3917
|
const appCheckProvider = container.getProvider('app-check-internal');
|
|
3063
|
-
return new AIService(app, backend, auth, appCheckProvider);
|
|
3918
|
+
return new AIService(app, backend, auth, appCheckProvider, chromeAdapterFactory);
|
|
3064
3919
|
}
|
|
3065
3920
|
function registerAI() {
|
|
3066
3921
|
_registerComponent(new Component(AI_TYPE, factory, "PUBLIC" /* ComponentType.PUBLIC */).setMultipleInstances(true));
|
|
@@ -3070,5 +3925,5 @@ function registerAI() {
|
|
|
3070
3925
|
}
|
|
3071
3926
|
registerAI();
|
|
3072
3927
|
|
|
3073
|
-
export { AIError, AIErrorCode, AIModel, AnyOfSchema, ArraySchema, Backend, BackendType, BlockReason, BooleanSchema, ChatSession, FinishReason, FunctionCallingMode, GenerativeModel, GoogleAIBackend, HarmBlockMethod, HarmBlockThreshold, HarmCategory, HarmProbability, HarmSeverity, ImagenAspectRatio, ImagenImageFormat, ImagenModel, ImagenPersonFilterLevel, ImagenSafetyFilterLevel, InferenceMode, IntegerSchema, Modality, NumberSchema, ObjectSchema, POSSIBLE_ROLES, ResponseModality, Schema, SchemaType, StringSchema, VertexAIBackend, factory, getAI, getGenerativeModel, getImagenModel };
|
|
3928
|
+
export { AIError, AIErrorCode, AIModel, AnyOfSchema, ArraySchema, Backend, BackendType, BlockReason, BooleanSchema, ChatSession, FinishReason, FunctionCallingMode, GenerativeModel, GoogleAIBackend, HarmBlockMethod, HarmBlockThreshold, HarmCategory, HarmProbability, HarmSeverity, ImagenAspectRatio, ImagenImageFormat, ImagenModel, ImagenPersonFilterLevel, ImagenSafetyFilterLevel, InferenceMode, IntegerSchema, LiveGenerativeModel, LiveResponseType, LiveSession, Modality, NumberSchema, ObjectSchema, POSSIBLE_ROLES, ResponseModality, Schema, SchemaType, StringSchema, VertexAIBackend, factory, getAI, getGenerativeModel, getImagenModel, getLiveGenerativeModel, startAudioConversation };
|
|
3074
3929
|
//# sourceMappingURL=index.esm.js.map
|