npm - @livekit/agents - Versions diffs - 1.0.15 → 1.0.17 - Mend

@livekit/agents 1.0.15 → 1.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/dist/cli.cjs +12 -12
package/dist/cli.cjs.map +1 -1
package/dist/cli.d.cts +3 -3
package/dist/cli.d.ts +3 -3
package/dist/cli.d.ts.map +1 -1
package/dist/cli.js +13 -13
package/dist/cli.js.map +1 -1
package/dist/inference/stt.cjs.map +1 -1
package/dist/inference/stt.d.ts.map +1 -1
package/dist/inference/stt.js +1 -1
package/dist/inference/stt.js.map +1 -1
package/dist/inference/tts.cjs.map +1 -1
package/dist/inference/tts.d.cts +2 -1
package/dist/inference/tts.d.ts +2 -1
package/dist/inference/tts.d.ts.map +1 -1
package/dist/inference/tts.js +1 -5
package/dist/inference/tts.js.map +1 -1
package/dist/llm/chat_context.cjs +78 -0
package/dist/llm/chat_context.cjs.map +1 -1
package/dist/llm/chat_context.d.cts +16 -0
package/dist/llm/chat_context.d.ts +16 -0
package/dist/llm/chat_context.d.ts.map +1 -1
package/dist/llm/chat_context.js +78 -0
package/dist/llm/chat_context.js.map +1 -1
package/dist/llm/chat_context.test.cjs +531 -0
package/dist/llm/chat_context.test.cjs.map +1 -1
package/dist/llm/chat_context.test.js +531 -0
package/dist/llm/chat_context.test.js.map +1 -1
package/dist/llm/tool_context.cjs +40 -0
package/dist/llm/tool_context.cjs.map +1 -1
package/dist/llm/tool_context.d.cts +2 -0
package/dist/llm/tool_context.d.ts +2 -0
package/dist/llm/tool_context.d.ts.map +1 -1
package/dist/llm/tool_context.js +38 -0
package/dist/llm/tool_context.js.map +1 -1
package/dist/metrics/base.cjs.map +1 -1
package/dist/metrics/base.d.cts +7 -0
package/dist/metrics/base.d.ts +7 -0
package/dist/metrics/base.d.ts.map +1 -1
package/dist/stt/stt.cjs +1 -1
package/dist/stt/stt.cjs.map +1 -1
package/dist/stt/stt.d.cts +7 -1
package/dist/stt/stt.d.ts +7 -1
package/dist/stt/stt.d.ts.map +1 -1
package/dist/stt/stt.js +1 -1
package/dist/stt/stt.js.map +1 -1
package/dist/tts/tts.cjs +2 -4
package/dist/tts/tts.cjs.map +1 -1
package/dist/tts/tts.d.ts.map +1 -1
package/dist/tts/tts.js +3 -5
package/dist/tts/tts.js.map +1 -1
package/dist/voice/agent_activity.cjs +83 -8
package/dist/voice/agent_activity.cjs.map +1 -1
package/dist/voice/agent_activity.d.cts +6 -2
package/dist/voice/agent_activity.d.ts +6 -2
package/dist/voice/agent_activity.d.ts.map +1 -1
package/dist/voice/agent_activity.js +83 -8
package/dist/voice/agent_activity.js.map +1 -1
package/dist/voice/agent_session.cjs +3 -2
package/dist/voice/agent_session.cjs.map +1 -1
package/dist/voice/agent_session.d.cts +2 -1
package/dist/voice/agent_session.d.ts +2 -1
package/dist/voice/agent_session.d.ts.map +1 -1
package/dist/voice/agent_session.js +3 -2
package/dist/voice/agent_session.js.map +1 -1
package/dist/voice/audio_recognition.cjs +138 -16
package/dist/voice/audio_recognition.cjs.map +1 -1
package/dist/voice/audio_recognition.d.cts +11 -0
package/dist/voice/audio_recognition.d.ts +11 -0
package/dist/voice/audio_recognition.d.ts.map +1 -1
package/dist/voice/audio_recognition.js +138 -16
package/dist/voice/audio_recognition.js.map +1 -1
package/dist/voice/room_io/_input.cjs.map +1 -1
package/dist/voice/room_io/_input.d.ts.map +1 -1
package/dist/voice/room_io/_input.js +0 -1
package/dist/voice/room_io/_input.js.map +1 -1
package/dist/worker.cjs +17 -11
package/dist/worker.cjs.map +1 -1
package/dist/worker.d.cts +16 -9
package/dist/worker.d.ts +16 -9
package/dist/worker.d.ts.map +1 -1
package/dist/worker.js +16 -12
package/dist/worker.js.map +1 -1
package/package.json +1 -1
package/src/cli.ts +17 -17
package/src/inference/stt.ts +2 -1
package/src/inference/tts.ts +2 -5
package/src/llm/chat_context.test.ts +607 -0
package/src/llm/chat_context.ts +106 -0
package/src/llm/tool_context.ts +44 -0
package/src/metrics/base.ts +7 -0
package/src/stt/stt.ts +8 -1
package/src/tts/tts.ts +7 -5
package/src/voice/agent_activity.ts +119 -9
package/src/voice/agent_session.ts +3 -1
package/src/voice/audio_recognition.ts +235 -57
package/src/voice/room_io/_input.ts +1 -1
package/src/worker.ts +29 -18

package/dist/voice/agent_activity.cjs CHANGED Viewed

@@ -27,6 +27,7 @@ var import_node_async_hooks = require("node:async_hooks");
 var import_web = require("node:stream/web");
 var import_chat_context = require("../llm/chat_context.cjs");
 var import_llm = require("../llm/index.cjs");
+var import_tool_context = require("../llm/tool_context.cjs");
 var import_log = require("../log.cjs");
 var import_deferred_stream = require("../stream/deferred_stream.cjs");
 var import_stt = require("../stt/stt.cjs");
@@ -58,6 +59,7 @@ class AgentActivity {
   audioStream = new import_deferred_stream.DeferredReadableStream();
   // default to null as None, which maps to the default provider tool choice value
   toolChoice = null;
+  _preemptiveGeneration;
   agent;
   agentSession;
   /** @internal */
@@ -430,8 +432,12 @@ class AgentActivity {
   onStartOfSpeech(_ev) {
     this.agentSession._updateUserState("speaking");
   }
-  onEndOfSpeech(_ev) {
-    this.agentSession._updateUserState("listening");
+  onEndOfSpeech(ev) {
+    let speechEndTime = Date.now();
+    if (ev) {
+      speechEndTime = speechEndTime - ev.silenceDuration;
+    }
+    this.agentSession._updateUserState("listening", speechEndTime);
   }
   onVADInferenceDone(ev) {
     var _a, _b;
@@ -485,6 +491,44 @@ class AgentActivity {
       })
     );
   }
+  onPreemptiveGeneration(info) {
+    if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof import_llm.LLM)) {
+      return;
+    }
+    this.cancelPreemptiveGeneration();
+    this.logger.info(
+      {
+        newTranscript: info.newTranscript,
+        transcriptConfidence: info.transcriptConfidence
+      },
+      "starting preemptive generation"
+    );
+    const userMessage = import_chat_context.ChatMessage.create({
+      role: "user",
+      content: info.newTranscript
+    });
+    const chatCtx = this.agent.chatCtx.copy();
+    const speechHandle = this.generateReply({
+      userMessage,
+      chatCtx,
+      scheduleSpeech: false
+    });
+    this._preemptiveGeneration = {
+      speechHandle,
+      userMessage,
+      info,
+      chatCtx: chatCtx.copy(),
+      tools: { ...this.tools },
+      toolChoice: this.toolChoice,
+      createdAt: Date.now()
+    };
+  }
+  cancelPreemptiveGeneration() {
+    if (this._preemptiveGeneration !== void 0) {
+      this._preemptiveGeneration.speechHandle._cancel();
+      this._preemptiveGeneration = void 0;
+    }
+  }
   createSpeechTask(options) {
     const { task, ownedSpeechHandle } = options;
     this.speechTasks.add(task);
@@ -506,10 +550,12 @@ class AgentActivity {
   }
   async onEndOfTurn(info) {
     if (this.draining) {
+      this.cancelPreemptiveGeneration();
       this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
       return true;
     }
     if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0 && info.newTranscript.split(" ").length < this.agentSession.options.minInterruptionWords) {
+      this.cancelPreemptiveGeneration();
       this.logger.info("skipping user input, new_transcript is too short");
       return false;
     }
@@ -563,7 +609,8 @@ class AgentActivity {
       chatCtx,
       instructions: defaultInstructions,
       toolChoice: defaultToolChoice,
-      allowInterruptions: defaultAllowInterruptions
+      allowInterruptions: defaultAllowInterruptions,
+      scheduleSpeech = true
     } = options;
     let instructions = defaultInstructions;
     let toolChoice = defaultToolChoice;
@@ -636,7 +683,9 @@ ${instructions}` : instructions,
       });
       task.finally(() => this.onPipelineReplyDone());
     }
-    this.scheduleSpeech(handle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
+    if (scheduleSpeech) {
+      this.scheduleSpeech(handle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
+    }
     return handle;
   }
   interrupt() {
@@ -709,13 +758,36 @@ ${instructions}` : instructions,
     } else if (this.llm === void 0) {
       return;
     }
-    const speechHandle = this.generateReply({ userMessage, chatCtx });
+    let speechHandle;
+    if (this._preemptiveGeneration !== void 0) {
+      const preemptive = this._preemptiveGeneration;
+      if (preemptive.info.newTranscript === (userMessage == null ? void 0 : userMessage.textContent) && preemptive.chatCtx.isEquivalent(chatCtx) && (0, import_tool_context.isSameToolContext)(preemptive.tools, this.tools) && (0, import_tool_context.isSameToolChoice)(preemptive.toolChoice, this.toolChoice)) {
+        speechHandle = preemptive.speechHandle;
+        this.scheduleSpeech(speechHandle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
+        this.logger.debug(
+          {
+            preemptiveLeadTime: Date.now() - preemptive.createdAt
+          },
+          "using preemptive generation"
+        );
+      } else {
+        this.logger.warn(
+          "preemptive generation enabled but chat context or tools have changed after `onUserTurnCompleted`"
+        );
+        preemptive.speechHandle._cancel();
+      }
+      this._preemptiveGeneration = void 0;
+    }
+    if (speechHandle === void 0) {
+      speechHandle = this.generateReply({ userMessage, chatCtx });
+    }
     const eouMetrics = {
       type: "eou_metrics",
       timestamp: Date.now(),
       endOfUtteranceDelayMs: info.endOfUtteranceDelay,
       transcriptionDelayMs: info.transcriptionDelay,
       onUserTurnCompletedDelayMs: callbackDuration,
+      lastSpeakingTimeMs: info.stoppedSpeakingAt ?? 0,
       speechId: speechHandle.id
     };
     this.agentSession.emit(
@@ -823,8 +895,6 @@ ${instructions}` : instructions,
     chatCtx = chatCtx.copy();
     if (newMessage) {
       chatCtx.insert(newMessage);
-      this.agent._chatCtx.insert(newMessage);
-      this.agentSession._conversationItemAdded(newMessage);
     }
     if (instructions) {
       try {
@@ -837,7 +907,6 @@ ${instructions}` : instructions,
         this.logger.error({ error: e }, "error occurred during updateInstructions");
       }
     }
-    this.agentSession._updateAgentState("thinking");
     const tasks = [];
     const [llmTask, llmGenData] = (0, import_generation.performLLMInference)(
       // preserve  `this` context in llmNode
@@ -861,6 +930,10 @@ ${instructions}` : instructions,
       tasks.push(ttsTask);
     }
     await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
+    if (newMessage && speechHandle.scheduled) {
+      this.agent._chatCtx.insert(newMessage);
+      this.agentSession._conversationItemAdded(newMessage);
+    }
     if (speechHandle.interrupted) {
       replyAbortController.abort();
       await (0, import_utils.cancelAndWait)(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
@@ -1442,6 +1515,7 @@ ${instructions}` : instructions,
     const unlock = await this.lock.lock();
     try {
       if (this._draining) return;
+      this.cancelPreemptiveGeneration();
       this.createSpeechTask({
         task: import_utils.Task.from(() => this.agent.onExit()),
         name: "AgentActivity_onExit"
@@ -1460,6 +1534,7 @@ ${instructions}` : instructions,
       if (!this._draining) {
         this.logger.warn("task closing without draining");
       }
+      this.cancelPreemptiveGeneration();
       if (this.llm instanceof import_llm.LLM) {
         this.llm.off("metrics_collected", this.onMetricsCollected);
       }