@livekit/agents-plugin-google 1.0.49 → 1.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/llm.test.js CHANGED
@@ -1,13 +1,21 @@
1
1
  import { llm } from "@livekit/agents-plugins-test";
2
- import { describe } from "vitest";
2
+ import { describe, it } from "vitest";
3
3
  import { LLM } from "./llm.js";
4
- describe("Google", async () => {
5
- await llm(
6
- new LLM({
7
- model: "gemini-2.5-flash",
8
- temperature: 0
9
- }),
10
- true
11
- );
12
- });
4
+ const hasGoogleApiKey = Boolean(process.env.GOOGLE_API_KEY);
5
+ if (hasGoogleApiKey) {
6
+ describe("Google", async () => {
7
+ await llm(
8
+ new LLM({
9
+ model: "gemini-2.5-flash",
10
+ temperature: 0
11
+ }),
12
+ true
13
+ );
14
+ });
15
+ } else {
16
+ describe("Google", () => {
17
+ it.skip("requires GOOGLE_API_KEY", () => {
18
+ });
19
+ });
20
+ }
13
21
  //# sourceMappingURL=llm.test.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/llm.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { llm } from '@livekit/agents-plugins-test';\nimport { describe } from 'vitest';\nimport { LLM } from './llm.js';\n\ndescribe('Google', async () => {\n await llm(\n new LLM({\n model: 'gemini-2.5-flash',\n temperature: 0,\n }),\n true,\n );\n});\n"],"mappings":"AAGA,SAAS,WAAW;AACpB,SAAS,gBAAgB;AACzB,SAAS,WAAW;AAEpB,SAAS,UAAU,YAAY;AAC7B,QAAM;AAAA,IACJ,IAAI,IAAI;AAAA,MACN,OAAO;AAAA,MACP,aAAa;AAAA,IACf,CAAC;AAAA,IACD;AAAA,EACF;AACF,CAAC;","names":[]}
1
+ {"version":3,"sources":["../src/llm.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { llm } from '@livekit/agents-plugins-test';\nimport { describe, it } from 'vitest';\nimport { LLM } from './llm.js';\n\nconst hasGoogleApiKey = Boolean(process.env.GOOGLE_API_KEY);\n\nif (hasGoogleApiKey) {\n describe('Google', async () => {\n await llm(\n new LLM({\n model: 'gemini-2.5-flash',\n temperature: 0,\n }),\n true,\n );\n });\n} else {\n describe('Google', () => {\n it.skip('requires GOOGLE_API_KEY', () => {});\n });\n}\n"],"mappings":"AAGA,SAAS,WAAW;AACpB,SAAS,UAAU,UAAU;AAC7B,SAAS,WAAW;AAEpB,MAAM,kBAAkB,QAAQ,QAAQ,IAAI,cAAc;AAE1D,IAAI,iBAAiB;AACnB,WAAS,UAAU,YAAY;AAC7B,UAAM;AAAA,MACJ,IAAI,IAAI;AAAA,QACN,OAAO;AAAA,QACP,aAAa;AAAA,MACf,CAAC;AAAA,MACD;AAAA,IACF;AAAA,EACF,CAAC;AACH,OAAO;AACL,WAAS,UAAU,MAAM;AACvB,OAAG,KAAK,2BAA2B,MAAM;AAAA,IAAC,CAAC;AAAA,EAC7C,CAAC;AACH;","names":[]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-google",
3
- "version": "1.0.49",
3
+ "version": "1.0.51",
4
4
  "description": "Google Gemini plugin for LiveKit Node Agents",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -29,19 +29,19 @@
29
29
  "@microsoft/api-extractor": "^7.35.0",
30
30
  "tsup": "^8.3.5",
31
31
  "typescript": "^5.0.0",
32
- "@livekit/agents": "1.0.49",
33
- "@livekit/agents-plugin-openai": "1.0.49",
34
- "@livekit/agents-plugins-test": "1.0.49"
32
+ "@livekit/agents": "1.0.51",
33
+ "@livekit/agents-plugin-openai": "1.0.51",
34
+ "@livekit/agents-plugins-test": "1.0.51"
35
35
  },
36
36
  "dependencies": {
37
- "@google/genai": "^1.34.0",
37
+ "@google/genai": "^1.44.0",
38
38
  "@livekit/mutex": "^1.1.1",
39
39
  "@types/json-schema": "^7.0.15",
40
40
  "json-schema": "^0.4.0"
41
41
  },
42
42
  "peerDependencies": {
43
43
  "@livekit/rtc-node": "^0.13.24",
44
- "@livekit/agents": "1.0.49"
44
+ "@livekit/agents": "1.0.51"
45
45
  },
46
46
  "scripts": {
47
47
  "build": "tsup --onSuccess \"pnpm build:types\"",
@@ -26,6 +26,7 @@ import {
26
26
  delay,
27
27
  llm,
28
28
  log,
29
+ normalizeLanguage,
29
30
  shortuuid,
30
31
  stream,
31
32
  } from '@livekit/agents';
@@ -327,7 +328,7 @@ export class RealtimeModel extends llm.RealtimeModel {
327
328
  model: options.model || defaultModel,
328
329
  apiKey,
329
330
  voice: options.voice || 'Puck',
330
- language: options.language,
331
+ language: options.language ? normalizeLanguage(options.language) : undefined,
331
332
  responseModalities: options.modalities || [Modality.AUDIO],
332
333
  vertexai,
333
334
  project,
@@ -416,6 +417,8 @@ export class RealtimeSession extends llm.RealtimeSession {
416
417
  private hasReceivedAudioInput = false;
417
418
  private pendingInterruptText = false;
418
419
  private earlyCompletionPending = false;
420
+ private toolCallPending = false;
421
+ private generationPendingTurnComplete?: ResponseGeneration;
419
422
 
420
423
  #client: GoogleGenAI;
421
424
  #task: Promise<void>;
@@ -477,6 +480,11 @@ export class RealtimeSession extends llm.RealtimeSession {
477
480
  this.earlyCompletionPending = false;
478
481
  this.pendingInterruptText = false;
479
482
 
483
+ this.toolCallPending = false;
484
+ if (this.generationPendingTurnComplete) {
485
+ this.markCurrentGenerationDone(false, this.generationPendingTurnComplete);
486
+ this.generationPendingTurnComplete = undefined;
487
+ }
480
488
  unlock();
481
489
  }
482
490
 
@@ -644,6 +652,8 @@ export class RealtimeSession extends llm.RealtimeSession {
644
652
  }
645
653
 
646
654
  pushAudio(frame: AudioFrame): void {
655
+ if (this.toolCallPending) return;
656
+
647
657
  // Track that we've received audio input
648
658
  this.hasReceivedAudioInput = true;
649
659
 
@@ -735,6 +745,8 @@ export class RealtimeSession extends llm.RealtimeSession {
735
745
  return;
736
746
  }
737
747
 
748
+ if (this.toolCallPending) return;
749
+
738
750
  if (!this.inUserActivity) {
739
751
  this.inUserActivity = true;
740
752
  this.sendClientEvent({
@@ -968,13 +980,18 @@ export class RealtimeSession extends llm.RealtimeSession {
968
980
  if (LK_GOOGLE_DEBUG) {
969
981
  this.#logger.debug(`(client) -> ${JSON.stringify(this.loggableClientEvent(msg))}`);
970
982
  }
971
- await session.sendToolResponse({
972
- functionResponses,
973
- });
983
+ try {
984
+ await session.sendToolResponse({
985
+ functionResponses,
986
+ });
987
+ } finally {
988
+ this.toolCallPending = false;
989
+ }
974
990
  }
975
991
  break;
976
992
  case 'realtime_input':
977
993
  const { mediaChunks, activityStart, activityEnd, text } = msg.value;
994
+ if (this.toolCallPending) break;
978
995
  if (mediaChunks) {
979
996
  for (const mediaChunk of mediaChunks) {
980
997
  await session.sendRealtimeInput({ media: mediaChunk });
@@ -1012,6 +1029,7 @@ export class RealtimeSession extends llm.RealtimeSession {
1012
1029
  session: types.Session,
1013
1030
  response: types.LiveServerMessage,
1014
1031
  ): Promise<void> {
1032
+ if (response.toolCall) this.toolCallPending = true;
1015
1033
  // Skip logging verbose audio data events
1016
1034
  const hasAudioData = response.serverContent?.modelTurn?.parts?.some(
1017
1035
  (part) => part.inlineData?.data,
@@ -1164,21 +1182,25 @@ export class RealtimeSession extends llm.RealtimeSession {
1164
1182
  return obj;
1165
1183
  }
1166
1184
 
1167
- private markCurrentGenerationDone(keepFunctionChannelOpen: boolean = false): void {
1168
- if (!this.currentGeneration || this.currentGeneration._done) {
1185
+ private markCurrentGenerationDone(
1186
+ keepFunctionChannelOpen: boolean = false,
1187
+ gen?: ResponseGeneration,
1188
+ ): void {
1189
+ const target = gen ?? this.currentGeneration;
1190
+ if (!target || target._done) {
1169
1191
  return;
1170
1192
  }
1171
1193
 
1172
1194
  this.handleInputSpeechStopped();
1173
1195
 
1174
- const gen = this.currentGeneration;
1196
+ const targetGen = target;
1175
1197
 
1176
1198
  // The only way we'd know that the transcription is complete is by when they are
1177
1199
  // done with generation
1178
- if (gen.inputTranscription) {
1200
+ if (targetGen.inputTranscription) {
1179
1201
  this.emit('input_audio_transcription_completed', {
1180
- itemId: gen.inputId,
1181
- transcript: gen.inputTranscription,
1202
+ itemId: targetGen.inputId,
1203
+ transcript: targetGen.inputTranscription,
1182
1204
  isFinal: true,
1183
1205
  } as llm.InputTranscriptionCompleted);
1184
1206
 
@@ -1186,31 +1208,31 @@ export class RealtimeSession extends llm.RealtimeSession {
1186
1208
  // we would handle it manually here
1187
1209
  this._chatCtx.addMessage({
1188
1210
  role: 'user',
1189
- content: gen.inputTranscription,
1190
- id: gen.inputId,
1211
+ content: targetGen.inputTranscription,
1212
+ id: targetGen.inputId,
1191
1213
  });
1192
1214
  }
1193
1215
 
1194
- if (gen.outputText) {
1216
+ if (targetGen.outputText) {
1195
1217
  this._chatCtx.addMessage({
1196
1218
  role: 'assistant',
1197
- content: gen.outputText,
1198
- id: gen.responseId,
1219
+ content: targetGen.outputText,
1220
+ id: targetGen.responseId,
1199
1221
  });
1200
1222
  }
1201
1223
 
1202
1224
  if (this.options.outputAudioTranscription === undefined) {
1203
1225
  // close the text data of transcription synchronizer
1204
- gen.textChannel.write('');
1226
+ targetGen.textChannel.write('');
1205
1227
  }
1206
1228
 
1207
- gen.textChannel.close();
1208
- gen.audioChannel.close();
1229
+ targetGen.textChannel.close();
1230
+ targetGen.audioChannel.close();
1209
1231
  if (!keepFunctionChannelOpen) {
1210
- gen.functionChannel.close();
1232
+ targetGen.functionChannel.close();
1211
1233
  }
1212
- gen.messageChannel.close();
1213
- gen._done = true;
1234
+ targetGen.messageChannel.close();
1235
+ targetGen._done = true;
1214
1236
  }
1215
1237
 
1216
1238
  private emitError(error: Error, recoverable: boolean): void {
@@ -1289,14 +1311,21 @@ export class RealtimeSession extends llm.RealtimeSession {
1289
1311
  }
1290
1312
 
1291
1313
  private startNewGeneration(): void {
1314
+ const previousGen = this.currentGeneration;
1315
+ const previousHadOpenFunctionChannel = previousGen && !previousGen.functionChannel.closed;
1316
+
1292
1317
  // close functionChannel of previous generation if still open (no toolCall arrived)
1293
- if (this.currentGeneration && !this.currentGeneration.functionChannel.closed) {
1294
- this.currentGeneration.functionChannel.close();
1318
+ if (previousGen && previousHadOpenFunctionChannel) {
1319
+ previousGen.functionChannel.close();
1295
1320
  }
1296
1321
 
1297
- if (this.currentGeneration && !this.currentGeneration._done) {
1298
- this.#logger.warn('Starting new generation while another is active. Finalizing previous.');
1299
- this.markCurrentGenerationDone();
1322
+ if (previousGen && !previousGen._done) {
1323
+ if (previousHadOpenFunctionChannel) {
1324
+ this.generationPendingTurnComplete = previousGen;
1325
+ } else {
1326
+ this.#logger.warn('Starting new generation while another is active. Finalizing previous.');
1327
+ this.markCurrentGenerationDone();
1328
+ }
1300
1329
  }
1301
1330
 
1302
1331
  const responseId = shortuuid('GR_');
@@ -1451,7 +1480,12 @@ export class RealtimeSession extends llm.RealtimeSession {
1451
1480
  }
1452
1481
 
1453
1482
  if (serverContent.turnComplete && !this.earlyCompletionPending) {
1454
- this.markCurrentGenerationDone();
1483
+ if (this.generationPendingTurnComplete) {
1484
+ this.markCurrentGenerationDone(false, this.generationPendingTurnComplete);
1485
+ this.generationPendingTurnComplete = undefined;
1486
+ } else {
1487
+ this.markCurrentGenerationDone();
1488
+ }
1455
1489
  }
1456
1490
 
1457
1491
  // Assume Gemini emits turnComplete/generationComplete before any new generation content.
@@ -1472,6 +1506,8 @@ export class RealtimeSession extends llm.RealtimeSession {
1472
1506
 
1473
1507
  const gen = this.currentGeneration;
1474
1508
 
1509
+ this.toolCallPending = true;
1510
+
1475
1511
  if (gen.functionChannel.closed) {
1476
1512
  this.#logger.warn('received tool call but functionChannel is already closed.');
1477
1513
  return;
@@ -1492,7 +1528,6 @@ export class RealtimeSession extends llm.RealtimeSession {
1492
1528
  }
1493
1529
 
1494
1530
  gen.functionChannel.close();
1495
- this.markCurrentGenerationDone();
1496
1531
  }
1497
1532
 
1498
1533
  private handleToolCallCancellation(cancellation: types.LiveServerToolCallCancellation): void {
package/src/llm.test.ts CHANGED
@@ -2,15 +2,23 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { llm } from '@livekit/agents-plugins-test';
5
- import { describe } from 'vitest';
5
+ import { describe, it } from 'vitest';
6
6
  import { LLM } from './llm.js';
7
7
 
8
- describe('Google', async () => {
9
- await llm(
10
- new LLM({
11
- model: 'gemini-2.5-flash',
12
- temperature: 0,
13
- }),
14
- true,
15
- );
16
- });
8
+ const hasGoogleApiKey = Boolean(process.env.GOOGLE_API_KEY);
9
+
10
+ if (hasGoogleApiKey) {
11
+ describe('Google', async () => {
12
+ await llm(
13
+ new LLM({
14
+ model: 'gemini-2.5-flash',
15
+ temperature: 0,
16
+ }),
17
+ true,
18
+ );
19
+ });
20
+ } else {
21
+ describe('Google', () => {
22
+ it.skip('requires GOOGLE_API_KEY', () => {});
23
+ });
24
+ }