npm - @livekit/agents-plugin-google - Versions diffs - 1.0.49 → 1.0.51 - Mend

@livekit/agents-plugin-google 1.0.49 → 1.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/beta/realtime/realtime_api.cjs +56 -29
package/dist/beta/realtime/realtime_api.cjs.map +1 -1
package/dist/beta/realtime/realtime_api.d.cts +2 -0
package/dist/beta/realtime/realtime_api.d.ts +2 -0
package/dist/beta/realtime/realtime_api.d.ts.map +1 -1
package/dist/beta/realtime/realtime_api.js +57 -29
package/dist/beta/realtime/realtime_api.js.map +1 -1
package/dist/index.cjs +1 -1
package/dist/index.js +1 -1
package/dist/llm.test.cjs +17 -9
package/dist/llm.test.cjs.map +1 -1
package/dist/llm.test.js +18 -10
package/dist/llm.test.js.map +1 -1
package/package.json +6 -6
package/src/beta/realtime/realtime_api.ts +63 -28
package/src/llm.test.ts +18 -10

package/dist/llm.test.js CHANGED Viewed

@@ -1,13 +1,21 @@
 import { llm } from "@livekit/agents-plugins-test";
-import { describe } from "vitest";
+import { describe, it } from "vitest";
 import { LLM } from "./llm.js";
-describe("Google", async () => {
-  await llm(
-    new LLM({
-      model: "gemini-2.5-flash",
-      temperature: 0
-    }),
-    true
-  );
-});
+const hasGoogleApiKey = Boolean(process.env.GOOGLE_API_KEY);
+if (hasGoogleApiKey) {
+  describe("Google", async () => {
+    await llm(
+      new LLM({
+        model: "gemini-2.5-flash",
+        temperature: 0
+      }),
+      true
+    );
+  });
+} else {
+  describe("Google", () => {
+    it.skip("requires GOOGLE_API_KEY", () => {
+    });
+  });
+}
 //# sourceMappingURL=llm.test.js.map

package/dist/llm.test.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../src/llm.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { llm } from '@livekit/agents-plugins-test';\nimport { describe } from 'vitest';\nimport { LLM } from './llm.js';\n\~~ndescribe~~('Google', async () => {\n await llm(\n new LLM({\n model: 'gemini-2.5-flash',\n temperature: 0,\n }),\n true,\n );\n});\n"],"mappings":"AAGA,SAAS,WAAW;AACpB,SAAS,~~gBAAgB~~;~~AACzB~~,SAAS,WAAW;AAEpB,~~SAAS~~,UAAU,YAAY;AAC7B,~~QAAM~~;AAAA,~~IACJ~~,IAAI,IAAI;AAAA,~~MACN~~,OAAO;AAAA,~~MACP~~,aAAa;AAAA,~~IACf~~,CAAC;AAAA,~~IACD~~;AAAA,EACF;~~AACF~~,CAAC;","names":[]}
1	+ {"version":3,"sources":["../src/llm.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { llm } from '@livekit/agents-plugins-test';\nimport { describe, it } from 'vitest';\nimport { LLM } from './llm.js';\n\nconst hasGoogleApiKey = Boolean(process.env.GOOGLE_API_KEY);\n\nif (hasGoogleApiKey) {\n describe('Google', async () => {\n await llm(\n new LLM({\n model: 'gemini-2.5-flash',\n temperature: 0,\n }),\n true,\n );\n });\n} else {\n describe('Google', () => {\n it.skip('requires GOOGLE_API_KEY', () => {});\n });\n}\n"],"mappings":"AAGA,SAAS,WAAW;AACpB,SAAS,UAAU,UAAU;AAC7B,SAAS,WAAW;AAEpB,MAAM,kBAAkB,QAAQ,QAAQ,IAAI,cAAc;AAE1D,IAAI,iBAAiB;AACnB,WAAS,UAAU,YAAY;AAC7B,UAAM;AAAA,MACJ,IAAI,IAAI;AAAA,QACN,OAAO;AAAA,QACP,aAAa;AAAA,MACf,CAAC;AAAA,MACD;AAAA,IACF;AAAA,EACF,CAAC;AACH,OAAO;AACL,WAAS,UAAU,MAAM;AACvB,OAAG,KAAK,2BAA2B,MAAM;AAAA,IAAC,CAAC;AAAA,EAC7C,CAAC;AACH;","names":[]}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@livekit/agents-plugin-google",
-  "version": "1.0.49",
+  "version": "1.0.51",
   "description": "Google Gemini plugin for LiveKit Node Agents",
   "main": "dist/index.js",
   "require": "dist/index.cjs",
@@ -29,19 +29,19 @@
     "@microsoft/api-extractor": "^7.35.0",
     "tsup": "^8.3.5",
     "typescript": "^5.0.0",
-    "@livekit/agents": "1.0.49",
-    "@livekit/agents-plugin-openai": "1.0.49",
-    "@livekit/agents-plugins-test": "1.0.49"
+    "@livekit/agents": "1.0.51",
+    "@livekit/agents-plugin-openai": "1.0.51",
+    "@livekit/agents-plugins-test": "1.0.51"
   },
   "dependencies": {
-    "@google/genai": "^1.34.0",
+    "@google/genai": "^1.44.0",
     "@livekit/mutex": "^1.1.1",
     "@types/json-schema": "^7.0.15",
     "json-schema": "^0.4.0"
   },
   "peerDependencies": {
     "@livekit/rtc-node": "^0.13.24",
-    "@livekit/agents": "1.0.49"
+    "@livekit/agents": "1.0.51"
   },
   "scripts": {
     "build": "tsup --onSuccess \"pnpm build:types\"",

package/src/beta/realtime/realtime_api.ts CHANGED Viewed

@@ -26,6 +26,7 @@ import {
   delay,
   llm,
   log,
+  normalizeLanguage,
   shortuuid,
   stream,
 } from '@livekit/agents';
@@ -327,7 +328,7 @@ export class RealtimeModel extends llm.RealtimeModel {
       model: options.model || defaultModel,
       apiKey,
       voice: options.voice || 'Puck',
-      language: options.language,
+      language: options.language ? normalizeLanguage(options.language) : undefined,
       responseModalities: options.modalities || [Modality.AUDIO],
       vertexai,
       project,
@@ -416,6 +417,8 @@ export class RealtimeSession extends llm.RealtimeSession {
   private hasReceivedAudioInput = false;
   private pendingInterruptText = false;
   private earlyCompletionPending = false;
+  private toolCallPending = false;
+  private generationPendingTurnComplete?: ResponseGeneration;
   #client: GoogleGenAI;
   #task: Promise<void>;
@@ -477,6 +480,11 @@ export class RealtimeSession extends llm.RealtimeSession {
     this.earlyCompletionPending = false;
     this.pendingInterruptText = false;
+    this.toolCallPending = false;
+    if (this.generationPendingTurnComplete) {
+      this.markCurrentGenerationDone(false, this.generationPendingTurnComplete);
+      this.generationPendingTurnComplete = undefined;
+    }
     unlock();
   }
@@ -644,6 +652,8 @@ export class RealtimeSession extends llm.RealtimeSession {
   }
   pushAudio(frame: AudioFrame): void {
+    if (this.toolCallPending) return;
     // Track that we've received audio input
     this.hasReceivedAudioInput = true;
@@ -735,6 +745,8 @@ export class RealtimeSession extends llm.RealtimeSession {
       return;
     }
+    if (this.toolCallPending) return;
     if (!this.inUserActivity) {
       this.inUserActivity = true;
       this.sendClientEvent({
@@ -968,13 +980,18 @@ export class RealtimeSession extends llm.RealtimeSession {
               if (LK_GOOGLE_DEBUG) {
                 this.#logger.debug(`(client) -> ${JSON.stringify(this.loggableClientEvent(msg))}`);
               }
-              await session.sendToolResponse({
-                functionResponses,
-              });
+              try {
+                await session.sendToolResponse({
+                  functionResponses,
+                });
+              } finally {
+                this.toolCallPending = false;
+              }
             }
             break;
           case 'realtime_input':
             const { mediaChunks, activityStart, activityEnd, text } = msg.value;
+            if (this.toolCallPending) break;
             if (mediaChunks) {
               for (const mediaChunk of mediaChunks) {
                 await session.sendRealtimeInput({ media: mediaChunk });
@@ -1012,6 +1029,7 @@ export class RealtimeSession extends llm.RealtimeSession {
     session: types.Session,
     response: types.LiveServerMessage,
   ): Promise<void> {
+    if (response.toolCall) this.toolCallPending = true;
     // Skip logging verbose audio data events
     const hasAudioData = response.serverContent?.modelTurn?.parts?.some(
       (part) => part.inlineData?.data,
@@ -1164,21 +1182,25 @@ export class RealtimeSession extends llm.RealtimeSession {
     return obj;
   }
-  private markCurrentGenerationDone(keepFunctionChannelOpen: boolean = false): void {
-    if (!this.currentGeneration || this.currentGeneration._done) {
+  private markCurrentGenerationDone(
+    keepFunctionChannelOpen: boolean = false,
+    gen?: ResponseGeneration,
+  ): void {
+    const target = gen ?? this.currentGeneration;
+    if (!target || target._done) {
       return;
     }
     this.handleInputSpeechStopped();
-    const gen = this.currentGeneration;
+    const targetGen = target;
     // The only way we'd know that the transcription is complete is by when they are
     // done with generation
-    if (gen.inputTranscription) {
+    if (targetGen.inputTranscription) {
       this.emit('input_audio_transcription_completed', {
-        itemId: gen.inputId,
-        transcript: gen.inputTranscription,
+        itemId: targetGen.inputId,
+        transcript: targetGen.inputTranscription,
         isFinal: true,
       } as llm.InputTranscriptionCompleted);
@@ -1186,31 +1208,31 @@ export class RealtimeSession extends llm.RealtimeSession {
       // we would handle it manually here
       this._chatCtx.addMessage({
         role: 'user',
-        content: gen.inputTranscription,
-        id: gen.inputId,
+        content: targetGen.inputTranscription,
+        id: targetGen.inputId,
       });
     }
-    if (gen.outputText) {
+    if (targetGen.outputText) {
       this._chatCtx.addMessage({
         role: 'assistant',
-        content: gen.outputText,
-        id: gen.responseId,
+        content: targetGen.outputText,
+        id: targetGen.responseId,
       });
     }
     if (this.options.outputAudioTranscription === undefined) {
       // close the text data of transcription synchronizer
-      gen.textChannel.write('');
+      targetGen.textChannel.write('');
     }
-    gen.textChannel.close();
-    gen.audioChannel.close();
+    targetGen.textChannel.close();
+    targetGen.audioChannel.close();
     if (!keepFunctionChannelOpen) {
-      gen.functionChannel.close();
+      targetGen.functionChannel.close();
     }
-    gen.messageChannel.close();
-    gen._done = true;
+    targetGen.messageChannel.close();
+    targetGen._done = true;
   }
   private emitError(error: Error, recoverable: boolean): void {
@@ -1289,14 +1311,21 @@ export class RealtimeSession extends llm.RealtimeSession {
   }
   private startNewGeneration(): void {
+    const previousGen = this.currentGeneration;
+    const previousHadOpenFunctionChannel = previousGen && !previousGen.functionChannel.closed;
     // close functionChannel of previous generation if still open (no toolCall arrived)
-    if (this.currentGeneration && !this.currentGeneration.functionChannel.closed) {
-      this.currentGeneration.functionChannel.close();
+    if (previousGen && previousHadOpenFunctionChannel) {
+      previousGen.functionChannel.close();
     }
-    if (this.currentGeneration && !this.currentGeneration._done) {
-      this.#logger.warn('Starting new generation while another is active. Finalizing previous.');
-      this.markCurrentGenerationDone();
+    if (previousGen && !previousGen._done) {
+      if (previousHadOpenFunctionChannel) {
+        this.generationPendingTurnComplete = previousGen;
+      } else {
+        this.#logger.warn('Starting new generation while another is active. Finalizing previous.');
+        this.markCurrentGenerationDone();
+      }
     }
     const responseId = shortuuid('GR_');
@@ -1451,7 +1480,12 @@ export class RealtimeSession extends llm.RealtimeSession {
     }
     if (serverContent.turnComplete && !this.earlyCompletionPending) {
-      this.markCurrentGenerationDone();
+      if (this.generationPendingTurnComplete) {
+        this.markCurrentGenerationDone(false, this.generationPendingTurnComplete);
+        this.generationPendingTurnComplete = undefined;
+      } else {
+        this.markCurrentGenerationDone();
+      }
     }
     // Assume Gemini emits turnComplete/generationComplete before any new generation content.
@@ -1472,6 +1506,8 @@ export class RealtimeSession extends llm.RealtimeSession {
     const gen = this.currentGeneration;
+    this.toolCallPending = true;
     if (gen.functionChannel.closed) {
       this.#logger.warn('received tool call but functionChannel is already closed.');
       return;
@@ -1492,7 +1528,6 @@ export class RealtimeSession extends llm.RealtimeSession {
     }
     gen.functionChannel.close();
-    this.markCurrentGenerationDone();
   }
   private handleToolCallCancellation(cancellation: types.LiveServerToolCallCancellation): void {

package/src/llm.test.ts CHANGED Viewed

@@ -2,15 +2,23 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 import { llm } from '@livekit/agents-plugins-test';
-import { describe } from 'vitest';
+import { describe, it } from 'vitest';
 import { LLM } from './llm.js';
-describe('Google', async () => {
-  await llm(
-    new LLM({
-      model: 'gemini-2.5-flash',
-      temperature: 0,
-    }),
-    true,
-  );
-});
+const hasGoogleApiKey = Boolean(process.env.GOOGLE_API_KEY);
+if (hasGoogleApiKey) {
+  describe('Google', async () => {
+    await llm(
+      new LLM({
+        model: 'gemini-2.5-flash',
+        temperature: 0,
+      }),
+      true,
+    );
+  });
+} else {
+  describe('Google', () => {
+    it.skip('requires GOOGLE_API_KEY', () => {});
+  });
+}