@livekit/agents-plugin-google 1.0.49 → 1.0.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/beta/realtime/realtime_api.cjs +56 -29
- package/dist/beta/realtime/realtime_api.cjs.map +1 -1
- package/dist/beta/realtime/realtime_api.d.cts +2 -0
- package/dist/beta/realtime/realtime_api.d.ts +2 -0
- package/dist/beta/realtime/realtime_api.d.ts.map +1 -1
- package/dist/beta/realtime/realtime_api.js +57 -29
- package/dist/beta/realtime/realtime_api.js.map +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/dist/llm.test.cjs +17 -9
- package/dist/llm.test.cjs.map +1 -1
- package/dist/llm.test.js +18 -10
- package/dist/llm.test.js.map +1 -1
- package/package.json +6 -6
- package/src/beta/realtime/realtime_api.ts +63 -28
- package/src/llm.test.ts +18 -10
package/dist/llm.test.js
CHANGED
|
@@ -1,13 +1,21 @@
|
|
|
1
1
|
import { llm } from "@livekit/agents-plugins-test";
|
|
2
|
-
import { describe } from "vitest";
|
|
2
|
+
import { describe, it } from "vitest";
|
|
3
3
|
import { LLM } from "./llm.js";
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
4
|
+
const hasGoogleApiKey = Boolean(process.env.GOOGLE_API_KEY);
|
|
5
|
+
if (hasGoogleApiKey) {
|
|
6
|
+
describe("Google", async () => {
|
|
7
|
+
await llm(
|
|
8
|
+
new LLM({
|
|
9
|
+
model: "gemini-2.5-flash",
|
|
10
|
+
temperature: 0
|
|
11
|
+
}),
|
|
12
|
+
true
|
|
13
|
+
);
|
|
14
|
+
});
|
|
15
|
+
} else {
|
|
16
|
+
describe("Google", () => {
|
|
17
|
+
it.skip("requires GOOGLE_API_KEY", () => {
|
|
18
|
+
});
|
|
19
|
+
});
|
|
20
|
+
}
|
|
13
21
|
//# sourceMappingURL=llm.test.js.map
|
package/dist/llm.test.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/llm.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { llm } from '@livekit/agents-plugins-test';\nimport { describe } from 'vitest';\nimport { LLM } from './llm.js';\n\
|
|
1
|
+
{"version":3,"sources":["../src/llm.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { llm } from '@livekit/agents-plugins-test';\nimport { describe, it } from 'vitest';\nimport { LLM } from './llm.js';\n\nconst hasGoogleApiKey = Boolean(process.env.GOOGLE_API_KEY);\n\nif (hasGoogleApiKey) {\n describe('Google', async () => {\n await llm(\n new LLM({\n model: 'gemini-2.5-flash',\n temperature: 0,\n }),\n true,\n );\n });\n} else {\n describe('Google', () => {\n it.skip('requires GOOGLE_API_KEY', () => {});\n });\n}\n"],"mappings":"AAGA,SAAS,WAAW;AACpB,SAAS,UAAU,UAAU;AAC7B,SAAS,WAAW;AAEpB,MAAM,kBAAkB,QAAQ,QAAQ,IAAI,cAAc;AAE1D,IAAI,iBAAiB;AACnB,WAAS,UAAU,YAAY;AAC7B,UAAM;AAAA,MACJ,IAAI,IAAI;AAAA,QACN,OAAO;AAAA,QACP,aAAa;AAAA,MACf,CAAC;AAAA,MACD;AAAA,IACF;AAAA,EACF,CAAC;AACH,OAAO;AACL,WAAS,UAAU,MAAM;AACvB,OAAG,KAAK,2BAA2B,MAAM;AAAA,IAAC,CAAC;AAAA,EAC7C,CAAC;AACH;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-google",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.51",
|
|
4
4
|
"description": "Google Gemini plugin for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -29,19 +29,19 @@
|
|
|
29
29
|
"@microsoft/api-extractor": "^7.35.0",
|
|
30
30
|
"tsup": "^8.3.5",
|
|
31
31
|
"typescript": "^5.0.0",
|
|
32
|
-
"@livekit/agents": "1.0.
|
|
33
|
-
"@livekit/agents-plugin-openai": "1.0.
|
|
34
|
-
"@livekit/agents-plugins-test": "1.0.
|
|
32
|
+
"@livekit/agents": "1.0.51",
|
|
33
|
+
"@livekit/agents-plugin-openai": "1.0.51",
|
|
34
|
+
"@livekit/agents-plugins-test": "1.0.51"
|
|
35
35
|
},
|
|
36
36
|
"dependencies": {
|
|
37
|
-
"@google/genai": "^1.
|
|
37
|
+
"@google/genai": "^1.44.0",
|
|
38
38
|
"@livekit/mutex": "^1.1.1",
|
|
39
39
|
"@types/json-schema": "^7.0.15",
|
|
40
40
|
"json-schema": "^0.4.0"
|
|
41
41
|
},
|
|
42
42
|
"peerDependencies": {
|
|
43
43
|
"@livekit/rtc-node": "^0.13.24",
|
|
44
|
-
"@livekit/agents": "1.0.
|
|
44
|
+
"@livekit/agents": "1.0.51"
|
|
45
45
|
},
|
|
46
46
|
"scripts": {
|
|
47
47
|
"build": "tsup --onSuccess \"pnpm build:types\"",
|
|
@@ -26,6 +26,7 @@ import {
|
|
|
26
26
|
delay,
|
|
27
27
|
llm,
|
|
28
28
|
log,
|
|
29
|
+
normalizeLanguage,
|
|
29
30
|
shortuuid,
|
|
30
31
|
stream,
|
|
31
32
|
} from '@livekit/agents';
|
|
@@ -327,7 +328,7 @@ export class RealtimeModel extends llm.RealtimeModel {
|
|
|
327
328
|
model: options.model || defaultModel,
|
|
328
329
|
apiKey,
|
|
329
330
|
voice: options.voice || 'Puck',
|
|
330
|
-
language: options.language,
|
|
331
|
+
language: options.language ? normalizeLanguage(options.language) : undefined,
|
|
331
332
|
responseModalities: options.modalities || [Modality.AUDIO],
|
|
332
333
|
vertexai,
|
|
333
334
|
project,
|
|
@@ -416,6 +417,8 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
416
417
|
private hasReceivedAudioInput = false;
|
|
417
418
|
private pendingInterruptText = false;
|
|
418
419
|
private earlyCompletionPending = false;
|
|
420
|
+
private toolCallPending = false;
|
|
421
|
+
private generationPendingTurnComplete?: ResponseGeneration;
|
|
419
422
|
|
|
420
423
|
#client: GoogleGenAI;
|
|
421
424
|
#task: Promise<void>;
|
|
@@ -477,6 +480,11 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
477
480
|
this.earlyCompletionPending = false;
|
|
478
481
|
this.pendingInterruptText = false;
|
|
479
482
|
|
|
483
|
+
this.toolCallPending = false;
|
|
484
|
+
if (this.generationPendingTurnComplete) {
|
|
485
|
+
this.markCurrentGenerationDone(false, this.generationPendingTurnComplete);
|
|
486
|
+
this.generationPendingTurnComplete = undefined;
|
|
487
|
+
}
|
|
480
488
|
unlock();
|
|
481
489
|
}
|
|
482
490
|
|
|
@@ -644,6 +652,8 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
644
652
|
}
|
|
645
653
|
|
|
646
654
|
pushAudio(frame: AudioFrame): void {
|
|
655
|
+
if (this.toolCallPending) return;
|
|
656
|
+
|
|
647
657
|
// Track that we've received audio input
|
|
648
658
|
this.hasReceivedAudioInput = true;
|
|
649
659
|
|
|
@@ -735,6 +745,8 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
735
745
|
return;
|
|
736
746
|
}
|
|
737
747
|
|
|
748
|
+
if (this.toolCallPending) return;
|
|
749
|
+
|
|
738
750
|
if (!this.inUserActivity) {
|
|
739
751
|
this.inUserActivity = true;
|
|
740
752
|
this.sendClientEvent({
|
|
@@ -968,13 +980,18 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
968
980
|
if (LK_GOOGLE_DEBUG) {
|
|
969
981
|
this.#logger.debug(`(client) -> ${JSON.stringify(this.loggableClientEvent(msg))}`);
|
|
970
982
|
}
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
983
|
+
try {
|
|
984
|
+
await session.sendToolResponse({
|
|
985
|
+
functionResponses,
|
|
986
|
+
});
|
|
987
|
+
} finally {
|
|
988
|
+
this.toolCallPending = false;
|
|
989
|
+
}
|
|
974
990
|
}
|
|
975
991
|
break;
|
|
976
992
|
case 'realtime_input':
|
|
977
993
|
const { mediaChunks, activityStart, activityEnd, text } = msg.value;
|
|
994
|
+
if (this.toolCallPending) break;
|
|
978
995
|
if (mediaChunks) {
|
|
979
996
|
for (const mediaChunk of mediaChunks) {
|
|
980
997
|
await session.sendRealtimeInput({ media: mediaChunk });
|
|
@@ -1012,6 +1029,7 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1012
1029
|
session: types.Session,
|
|
1013
1030
|
response: types.LiveServerMessage,
|
|
1014
1031
|
): Promise<void> {
|
|
1032
|
+
if (response.toolCall) this.toolCallPending = true;
|
|
1015
1033
|
// Skip logging verbose audio data events
|
|
1016
1034
|
const hasAudioData = response.serverContent?.modelTurn?.parts?.some(
|
|
1017
1035
|
(part) => part.inlineData?.data,
|
|
@@ -1164,21 +1182,25 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1164
1182
|
return obj;
|
|
1165
1183
|
}
|
|
1166
1184
|
|
|
1167
|
-
private markCurrentGenerationDone(
|
|
1168
|
-
|
|
1185
|
+
private markCurrentGenerationDone(
|
|
1186
|
+
keepFunctionChannelOpen: boolean = false,
|
|
1187
|
+
gen?: ResponseGeneration,
|
|
1188
|
+
): void {
|
|
1189
|
+
const target = gen ?? this.currentGeneration;
|
|
1190
|
+
if (!target || target._done) {
|
|
1169
1191
|
return;
|
|
1170
1192
|
}
|
|
1171
1193
|
|
|
1172
1194
|
this.handleInputSpeechStopped();
|
|
1173
1195
|
|
|
1174
|
-
const
|
|
1196
|
+
const targetGen = target;
|
|
1175
1197
|
|
|
1176
1198
|
// The only way we'd know that the transcription is complete is by when they are
|
|
1177
1199
|
// done with generation
|
|
1178
|
-
if (
|
|
1200
|
+
if (targetGen.inputTranscription) {
|
|
1179
1201
|
this.emit('input_audio_transcription_completed', {
|
|
1180
|
-
itemId:
|
|
1181
|
-
transcript:
|
|
1202
|
+
itemId: targetGen.inputId,
|
|
1203
|
+
transcript: targetGen.inputTranscription,
|
|
1182
1204
|
isFinal: true,
|
|
1183
1205
|
} as llm.InputTranscriptionCompleted);
|
|
1184
1206
|
|
|
@@ -1186,31 +1208,31 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1186
1208
|
// we would handle it manually here
|
|
1187
1209
|
this._chatCtx.addMessage({
|
|
1188
1210
|
role: 'user',
|
|
1189
|
-
content:
|
|
1190
|
-
id:
|
|
1211
|
+
content: targetGen.inputTranscription,
|
|
1212
|
+
id: targetGen.inputId,
|
|
1191
1213
|
});
|
|
1192
1214
|
}
|
|
1193
1215
|
|
|
1194
|
-
if (
|
|
1216
|
+
if (targetGen.outputText) {
|
|
1195
1217
|
this._chatCtx.addMessage({
|
|
1196
1218
|
role: 'assistant',
|
|
1197
|
-
content:
|
|
1198
|
-
id:
|
|
1219
|
+
content: targetGen.outputText,
|
|
1220
|
+
id: targetGen.responseId,
|
|
1199
1221
|
});
|
|
1200
1222
|
}
|
|
1201
1223
|
|
|
1202
1224
|
if (this.options.outputAudioTranscription === undefined) {
|
|
1203
1225
|
// close the text data of transcription synchronizer
|
|
1204
|
-
|
|
1226
|
+
targetGen.textChannel.write('');
|
|
1205
1227
|
}
|
|
1206
1228
|
|
|
1207
|
-
|
|
1208
|
-
|
|
1229
|
+
targetGen.textChannel.close();
|
|
1230
|
+
targetGen.audioChannel.close();
|
|
1209
1231
|
if (!keepFunctionChannelOpen) {
|
|
1210
|
-
|
|
1232
|
+
targetGen.functionChannel.close();
|
|
1211
1233
|
}
|
|
1212
|
-
|
|
1213
|
-
|
|
1234
|
+
targetGen.messageChannel.close();
|
|
1235
|
+
targetGen._done = true;
|
|
1214
1236
|
}
|
|
1215
1237
|
|
|
1216
1238
|
private emitError(error: Error, recoverable: boolean): void {
|
|
@@ -1289,14 +1311,21 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1289
1311
|
}
|
|
1290
1312
|
|
|
1291
1313
|
private startNewGeneration(): void {
|
|
1314
|
+
const previousGen = this.currentGeneration;
|
|
1315
|
+
const previousHadOpenFunctionChannel = previousGen && !previousGen.functionChannel.closed;
|
|
1316
|
+
|
|
1292
1317
|
// close functionChannel of previous generation if still open (no toolCall arrived)
|
|
1293
|
-
if (
|
|
1294
|
-
|
|
1318
|
+
if (previousGen && previousHadOpenFunctionChannel) {
|
|
1319
|
+
previousGen.functionChannel.close();
|
|
1295
1320
|
}
|
|
1296
1321
|
|
|
1297
|
-
if (
|
|
1298
|
-
|
|
1299
|
-
|
|
1322
|
+
if (previousGen && !previousGen._done) {
|
|
1323
|
+
if (previousHadOpenFunctionChannel) {
|
|
1324
|
+
this.generationPendingTurnComplete = previousGen;
|
|
1325
|
+
} else {
|
|
1326
|
+
this.#logger.warn('Starting new generation while another is active. Finalizing previous.');
|
|
1327
|
+
this.markCurrentGenerationDone();
|
|
1328
|
+
}
|
|
1300
1329
|
}
|
|
1301
1330
|
|
|
1302
1331
|
const responseId = shortuuid('GR_');
|
|
@@ -1451,7 +1480,12 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1451
1480
|
}
|
|
1452
1481
|
|
|
1453
1482
|
if (serverContent.turnComplete && !this.earlyCompletionPending) {
|
|
1454
|
-
this.
|
|
1483
|
+
if (this.generationPendingTurnComplete) {
|
|
1484
|
+
this.markCurrentGenerationDone(false, this.generationPendingTurnComplete);
|
|
1485
|
+
this.generationPendingTurnComplete = undefined;
|
|
1486
|
+
} else {
|
|
1487
|
+
this.markCurrentGenerationDone();
|
|
1488
|
+
}
|
|
1455
1489
|
}
|
|
1456
1490
|
|
|
1457
1491
|
// Assume Gemini emits turnComplete/generationComplete before any new generation content.
|
|
@@ -1472,6 +1506,8 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1472
1506
|
|
|
1473
1507
|
const gen = this.currentGeneration;
|
|
1474
1508
|
|
|
1509
|
+
this.toolCallPending = true;
|
|
1510
|
+
|
|
1475
1511
|
if (gen.functionChannel.closed) {
|
|
1476
1512
|
this.#logger.warn('received tool call but functionChannel is already closed.');
|
|
1477
1513
|
return;
|
|
@@ -1492,7 +1528,6 @@ export class RealtimeSession extends llm.RealtimeSession {
|
|
|
1492
1528
|
}
|
|
1493
1529
|
|
|
1494
1530
|
gen.functionChannel.close();
|
|
1495
|
-
this.markCurrentGenerationDone();
|
|
1496
1531
|
}
|
|
1497
1532
|
|
|
1498
1533
|
private handleToolCallCancellation(cancellation: types.LiveServerToolCallCancellation): void {
|
package/src/llm.test.ts
CHANGED
|
@@ -2,15 +2,23 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { llm } from '@livekit/agents-plugins-test';
|
|
5
|
-
import { describe } from 'vitest';
|
|
5
|
+
import { describe, it } from 'vitest';
|
|
6
6
|
import { LLM } from './llm.js';
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
})
|
|
8
|
+
const hasGoogleApiKey = Boolean(process.env.GOOGLE_API_KEY);
|
|
9
|
+
|
|
10
|
+
if (hasGoogleApiKey) {
|
|
11
|
+
describe('Google', async () => {
|
|
12
|
+
await llm(
|
|
13
|
+
new LLM({
|
|
14
|
+
model: 'gemini-2.5-flash',
|
|
15
|
+
temperature: 0,
|
|
16
|
+
}),
|
|
17
|
+
true,
|
|
18
|
+
);
|
|
19
|
+
});
|
|
20
|
+
} else {
|
|
21
|
+
describe('Google', () => {
|
|
22
|
+
it.skip('requires GOOGLE_API_KEY', () => {});
|
|
23
|
+
});
|
|
24
|
+
}
|