npm - @livekit/agents - Versions diffs - 1.0.15 → 1.0.16 - Mend

@livekit/agents 1.0.15 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

package/dist/cli.cjs +12 -12
package/dist/cli.cjs.map +1 -1
package/dist/cli.d.cts +3 -3
package/dist/cli.d.ts +3 -3
package/dist/cli.d.ts.map +1 -1
package/dist/cli.js +13 -13
package/dist/cli.js.map +1 -1
package/dist/inference/stt.cjs.map +1 -1
package/dist/inference/stt.d.ts.map +1 -1
package/dist/inference/stt.js +1 -1
package/dist/inference/stt.js.map +1 -1
package/dist/inference/tts.cjs.map +1 -1
package/dist/inference/tts.d.cts +2 -1
package/dist/inference/tts.d.ts +2 -1
package/dist/inference/tts.d.ts.map +1 -1
package/dist/inference/tts.js +1 -5
package/dist/inference/tts.js.map +1 -1
package/dist/llm/chat_context.cjs +78 -0
package/dist/llm/chat_context.cjs.map +1 -1
package/dist/llm/chat_context.d.cts +16 -0
package/dist/llm/chat_context.d.ts +16 -0
package/dist/llm/chat_context.d.ts.map +1 -1
package/dist/llm/chat_context.js +78 -0
package/dist/llm/chat_context.js.map +1 -1
package/dist/llm/chat_context.test.cjs +531 -0
package/dist/llm/chat_context.test.cjs.map +1 -1
package/dist/llm/chat_context.test.js +531 -0
package/dist/llm/chat_context.test.js.map +1 -1
package/dist/llm/tool_context.cjs +40 -0
package/dist/llm/tool_context.cjs.map +1 -1
package/dist/llm/tool_context.d.cts +2 -0
package/dist/llm/tool_context.d.ts +2 -0
package/dist/llm/tool_context.d.ts.map +1 -1
package/dist/llm/tool_context.js +38 -0
package/dist/llm/tool_context.js.map +1 -1
package/dist/metrics/base.cjs.map +1 -1
package/dist/metrics/base.d.cts +7 -0
package/dist/metrics/base.d.ts +7 -0
package/dist/metrics/base.d.ts.map +1 -1
package/dist/stt/stt.cjs +1 -0
package/dist/stt/stt.cjs.map +1 -1
package/dist/stt/stt.d.cts +7 -1
package/dist/stt/stt.d.ts +7 -1
package/dist/stt/stt.d.ts.map +1 -1
package/dist/stt/stt.js +1 -0
package/dist/stt/stt.js.map +1 -1
package/dist/voice/agent_activity.cjs +83 -8
package/dist/voice/agent_activity.cjs.map +1 -1
package/dist/voice/agent_activity.d.cts +6 -2
package/dist/voice/agent_activity.d.ts +6 -2
package/dist/voice/agent_activity.d.ts.map +1 -1
package/dist/voice/agent_activity.js +83 -8
package/dist/voice/agent_activity.js.map +1 -1
package/dist/voice/agent_session.cjs +3 -2
package/dist/voice/agent_session.cjs.map +1 -1
package/dist/voice/agent_session.d.cts +2 -1
package/dist/voice/agent_session.d.ts +2 -1
package/dist/voice/agent_session.d.ts.map +1 -1
package/dist/voice/agent_session.js +3 -2
package/dist/voice/agent_session.js.map +1 -1
package/dist/voice/audio_recognition.cjs +138 -16
package/dist/voice/audio_recognition.cjs.map +1 -1
package/dist/voice/audio_recognition.d.cts +11 -0
package/dist/voice/audio_recognition.d.ts +11 -0
package/dist/voice/audio_recognition.d.ts.map +1 -1
package/dist/voice/audio_recognition.js +138 -16
package/dist/voice/audio_recognition.js.map +1 -1
package/dist/voice/room_io/_input.cjs.map +1 -1
package/dist/voice/room_io/_input.d.ts.map +1 -1
package/dist/voice/room_io/_input.js +0 -1
package/dist/voice/room_io/_input.js.map +1 -1
package/dist/worker.cjs +17 -11
package/dist/worker.cjs.map +1 -1
package/dist/worker.d.cts +16 -9
package/dist/worker.d.ts +16 -9
package/dist/worker.d.ts.map +1 -1
package/dist/worker.js +16 -12
package/dist/worker.js.map +1 -1
package/package.json +1 -1
package/src/cli.ts +17 -17
package/src/inference/stt.ts +2 -1
package/src/inference/tts.ts +2 -5
package/src/llm/chat_context.test.ts +607 -0
package/src/llm/chat_context.ts +106 -0
package/src/llm/tool_context.ts +44 -0
package/src/metrics/base.ts +7 -0
package/src/stt/stt.ts +6 -0
package/src/voice/agent_activity.ts +119 -9
package/src/voice/agent_session.ts +3 -1
package/src/voice/audio_recognition.ts +235 -57
package/src/voice/room_io/_input.ts +1 -1
package/src/worker.ts +29 -18

package/dist/voice/agent_activity.js CHANGED Viewed

@@ -7,6 +7,7 @@ import {
   LLM,
   RealtimeModel
 } from "../llm/index.js";
+import { isSameToolChoice, isSameToolContext } from "../llm/tool_context.js";
 import { log } from "../log.js";
 import { DeferredReadableStream } from "../stream/deferred_stream.js";
 import { STT } from "../stt/stt.js";
@@ -55,6 +56,7 @@ class AgentActivity {
   audioStream = new DeferredReadableStream();
   // default to null as None, which maps to the default provider tool choice value
   toolChoice = null;
+  _preemptiveGeneration;
   agent;
   agentSession;
   /** @internal */
@@ -427,8 +429,12 @@ class AgentActivity {
   onStartOfSpeech(_ev) {
     this.agentSession._updateUserState("speaking");
   }
-  onEndOfSpeech(_ev) {
-    this.agentSession._updateUserState("listening");
+  onEndOfSpeech(ev) {
+    let speechEndTime = Date.now();
+    if (ev) {
+      speechEndTime = speechEndTime - ev.silenceDuration;
+    }
+    this.agentSession._updateUserState("listening", speechEndTime);
   }
   onVADInferenceDone(ev) {
     var _a, _b;
@@ -482,6 +488,44 @@ class AgentActivity {
       })
     );
   }
+  onPreemptiveGeneration(info) {
+    if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof LLM)) {
+      return;
+    }
+    this.cancelPreemptiveGeneration();
+    this.logger.info(
+      {
+        newTranscript: info.newTranscript,
+        transcriptConfidence: info.transcriptConfidence
+      },
+      "starting preemptive generation"
+    );
+    const userMessage = ChatMessage.create({
+      role: "user",
+      content: info.newTranscript
+    });
+    const chatCtx = this.agent.chatCtx.copy();
+    const speechHandle = this.generateReply({
+      userMessage,
+      chatCtx,
+      scheduleSpeech: false
+    });
+    this._preemptiveGeneration = {
+      speechHandle,
+      userMessage,
+      info,
+      chatCtx: chatCtx.copy(),
+      tools: { ...this.tools },
+      toolChoice: this.toolChoice,
+      createdAt: Date.now()
+    };
+  }
+  cancelPreemptiveGeneration() {
+    if (this._preemptiveGeneration !== void 0) {
+      this._preemptiveGeneration.speechHandle._cancel();
+      this._preemptiveGeneration = void 0;
+    }
+  }
   createSpeechTask(options) {
     const { task, ownedSpeechHandle } = options;
     this.speechTasks.add(task);
@@ -503,10 +547,12 @@ class AgentActivity {
   }
   async onEndOfTurn(info) {
     if (this.draining) {
+      this.cancelPreemptiveGeneration();
       this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
       return true;
     }
     if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0 && info.newTranscript.split(" ").length < this.agentSession.options.minInterruptionWords) {
+      this.cancelPreemptiveGeneration();
       this.logger.info("skipping user input, new_transcript is too short");
       return false;
     }
@@ -560,7 +606,8 @@ class AgentActivity {
       chatCtx,
       instructions: defaultInstructions,
       toolChoice: defaultToolChoice,
-      allowInterruptions: defaultAllowInterruptions
+      allowInterruptions: defaultAllowInterruptions,
+      scheduleSpeech = true
     } = options;
     let instructions = defaultInstructions;
     let toolChoice = defaultToolChoice;
@@ -633,7 +680,9 @@ ${instructions}` : instructions,
       });
       task.finally(() => this.onPipelineReplyDone());
     }
-    this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
+    if (scheduleSpeech) {
+      this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
+    }
     return handle;
   }
   interrupt() {
@@ -706,13 +755,36 @@ ${instructions}` : instructions,
     } else if (this.llm === void 0) {
       return;
     }
-    const speechHandle = this.generateReply({ userMessage, chatCtx });
+    let speechHandle;
+    if (this._preemptiveGeneration !== void 0) {
+      const preemptive = this._preemptiveGeneration;
+      if (preemptive.info.newTranscript === (userMessage == null ? void 0 : userMessage.textContent) && preemptive.chatCtx.isEquivalent(chatCtx) && isSameToolContext(preemptive.tools, this.tools) && isSameToolChoice(preemptive.toolChoice, this.toolChoice)) {
+        speechHandle = preemptive.speechHandle;
+        this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
+        this.logger.debug(
+          {
+            preemptiveLeadTime: Date.now() - preemptive.createdAt
+          },
+          "using preemptive generation"
+        );
+      } else {
+        this.logger.warn(
+          "preemptive generation enabled but chat context or tools have changed after `onUserTurnCompleted`"
+        );
+        preemptive.speechHandle._cancel();
+      }
+      this._preemptiveGeneration = void 0;
+    }
+    if (speechHandle === void 0) {
+      speechHandle = this.generateReply({ userMessage, chatCtx });
+    }
     const eouMetrics = {
       type: "eou_metrics",
       timestamp: Date.now(),
       endOfUtteranceDelayMs: info.endOfUtteranceDelay,
       transcriptionDelayMs: info.transcriptionDelay,
       onUserTurnCompletedDelayMs: callbackDuration,
+      lastSpeakingTimeMs: info.stoppedSpeakingAt ?? 0,
       speechId: speechHandle.id
     };
     this.agentSession.emit(
@@ -820,8 +892,6 @@ ${instructions}` : instructions,
     chatCtx = chatCtx.copy();
     if (newMessage) {
       chatCtx.insert(newMessage);
-      this.agent._chatCtx.insert(newMessage);
-      this.agentSession._conversationItemAdded(newMessage);
     }
     if (instructions) {
       try {
@@ -834,7 +904,6 @@ ${instructions}` : instructions,
         this.logger.error({ error: e }, "error occurred during updateInstructions");
       }
     }
-    this.agentSession._updateAgentState("thinking");
     const tasks = [];
     const [llmTask, llmGenData] = performLLMInference(
       // preserve  `this` context in llmNode
@@ -858,6 +927,10 @@ ${instructions}` : instructions,
       tasks.push(ttsTask);
     }
     await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
+    if (newMessage && speechHandle.scheduled) {
+      this.agent._chatCtx.insert(newMessage);
+      this.agentSession._conversationItemAdded(newMessage);
+    }
     if (speechHandle.interrupted) {
       replyAbortController.abort();
       await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
@@ -1439,6 +1512,7 @@ ${instructions}` : instructions,
     const unlock = await this.lock.lock();
     try {
       if (this._draining) return;
+      this.cancelPreemptiveGeneration();
       this.createSpeechTask({
         task: Task.from(() => this.agent.onExit()),
         name: "AgentActivity_onExit"
@@ -1457,6 +1531,7 @@ ${instructions}` : instructions,
       if (!this._draining) {
         this.logger.warn("task closing without draining");
       }
+      this.cancelPreemptiveGeneration();
       if (this.llm instanceof LLM) {
         this.llm.off("metrics_collected", this.onMetricsCollected);
       }