@livekit/agents 1.0.15 → 1.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +12 -12
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.cts +3 -3
- package/dist/cli.d.ts +3 -3
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +13 -13
- package/dist/cli.js.map +1 -1
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +1 -1
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +2 -1
- package/dist/inference/tts.d.ts +2 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +1 -5
- package/dist/inference/tts.js.map +1 -1
- package/dist/llm/chat_context.cjs +78 -0
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +16 -0
- package/dist/llm/chat_context.d.ts +16 -0
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +78 -0
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/chat_context.test.cjs +531 -0
- package/dist/llm/chat_context.test.cjs.map +1 -1
- package/dist/llm/chat_context.test.js +531 -0
- package/dist/llm/chat_context.test.js.map +1 -1
- package/dist/llm/tool_context.cjs +40 -0
- package/dist/llm/tool_context.cjs.map +1 -1
- package/dist/llm/tool_context.d.cts +2 -0
- package/dist/llm/tool_context.d.ts +2 -0
- package/dist/llm/tool_context.d.ts.map +1 -1
- package/dist/llm/tool_context.js +38 -0
- package/dist/llm/tool_context.js.map +1 -1
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +7 -0
- package/dist/metrics/base.d.ts +7 -0
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/stt/stt.cjs +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +7 -1
- package/dist/stt/stt.d.ts +7 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/tts/tts.cjs +2 -4
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +3 -5
- package/dist/tts/tts.js.map +1 -1
- package/dist/voice/agent_activity.cjs +83 -8
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +6 -2
- package/dist/voice/agent_activity.d.ts +6 -2
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +83 -8
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +3 -2
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +2 -1
- package/dist/voice/agent_session.d.ts +2 -1
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +3 -2
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +138 -16
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +11 -0
- package/dist/voice/audio_recognition.d.ts +11 -0
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +138 -16
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +0 -1
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/worker.cjs +17 -11
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.cts +16 -9
- package/dist/worker.d.ts +16 -9
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +16 -12
- package/dist/worker.js.map +1 -1
- package/package.json +1 -1
- package/src/cli.ts +17 -17
- package/src/inference/stt.ts +2 -1
- package/src/inference/tts.ts +2 -5
- package/src/llm/chat_context.test.ts +607 -0
- package/src/llm/chat_context.ts +106 -0
- package/src/llm/tool_context.ts +44 -0
- package/src/metrics/base.ts +7 -0
- package/src/stt/stt.ts +8 -1
- package/src/tts/tts.ts +7 -5
- package/src/voice/agent_activity.ts +119 -9
- package/src/voice/agent_session.ts +3 -1
- package/src/voice/audio_recognition.ts +235 -57
- package/src/voice/room_io/_input.ts +1 -1
- package/src/worker.ts +29 -18
|
@@ -27,6 +27,7 @@ var import_node_async_hooks = require("node:async_hooks");
|
|
|
27
27
|
var import_web = require("node:stream/web");
|
|
28
28
|
var import_chat_context = require("../llm/chat_context.cjs");
|
|
29
29
|
var import_llm = require("../llm/index.cjs");
|
|
30
|
+
var import_tool_context = require("../llm/tool_context.cjs");
|
|
30
31
|
var import_log = require("../log.cjs");
|
|
31
32
|
var import_deferred_stream = require("../stream/deferred_stream.cjs");
|
|
32
33
|
var import_stt = require("../stt/stt.cjs");
|
|
@@ -58,6 +59,7 @@ class AgentActivity {
|
|
|
58
59
|
audioStream = new import_deferred_stream.DeferredReadableStream();
|
|
59
60
|
// default to null as None, which maps to the default provider tool choice value
|
|
60
61
|
toolChoice = null;
|
|
62
|
+
_preemptiveGeneration;
|
|
61
63
|
agent;
|
|
62
64
|
agentSession;
|
|
63
65
|
/** @internal */
|
|
@@ -430,8 +432,12 @@ class AgentActivity {
|
|
|
430
432
|
onStartOfSpeech(_ev) {
|
|
431
433
|
this.agentSession._updateUserState("speaking");
|
|
432
434
|
}
|
|
433
|
-
onEndOfSpeech(
|
|
434
|
-
|
|
435
|
+
onEndOfSpeech(ev) {
|
|
436
|
+
let speechEndTime = Date.now();
|
|
437
|
+
if (ev) {
|
|
438
|
+
speechEndTime = speechEndTime - ev.silenceDuration;
|
|
439
|
+
}
|
|
440
|
+
this.agentSession._updateUserState("listening", speechEndTime);
|
|
435
441
|
}
|
|
436
442
|
onVADInferenceDone(ev) {
|
|
437
443
|
var _a, _b;
|
|
@@ -485,6 +491,44 @@ class AgentActivity {
|
|
|
485
491
|
})
|
|
486
492
|
);
|
|
487
493
|
}
|
|
494
|
+
onPreemptiveGeneration(info) {
|
|
495
|
+
if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof import_llm.LLM)) {
|
|
496
|
+
return;
|
|
497
|
+
}
|
|
498
|
+
this.cancelPreemptiveGeneration();
|
|
499
|
+
this.logger.info(
|
|
500
|
+
{
|
|
501
|
+
newTranscript: info.newTranscript,
|
|
502
|
+
transcriptConfidence: info.transcriptConfidence
|
|
503
|
+
},
|
|
504
|
+
"starting preemptive generation"
|
|
505
|
+
);
|
|
506
|
+
const userMessage = import_chat_context.ChatMessage.create({
|
|
507
|
+
role: "user",
|
|
508
|
+
content: info.newTranscript
|
|
509
|
+
});
|
|
510
|
+
const chatCtx = this.agent.chatCtx.copy();
|
|
511
|
+
const speechHandle = this.generateReply({
|
|
512
|
+
userMessage,
|
|
513
|
+
chatCtx,
|
|
514
|
+
scheduleSpeech: false
|
|
515
|
+
});
|
|
516
|
+
this._preemptiveGeneration = {
|
|
517
|
+
speechHandle,
|
|
518
|
+
userMessage,
|
|
519
|
+
info,
|
|
520
|
+
chatCtx: chatCtx.copy(),
|
|
521
|
+
tools: { ...this.tools },
|
|
522
|
+
toolChoice: this.toolChoice,
|
|
523
|
+
createdAt: Date.now()
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
cancelPreemptiveGeneration() {
|
|
527
|
+
if (this._preemptiveGeneration !== void 0) {
|
|
528
|
+
this._preemptiveGeneration.speechHandle._cancel();
|
|
529
|
+
this._preemptiveGeneration = void 0;
|
|
530
|
+
}
|
|
531
|
+
}
|
|
488
532
|
createSpeechTask(options) {
|
|
489
533
|
const { task, ownedSpeechHandle } = options;
|
|
490
534
|
this.speechTasks.add(task);
|
|
@@ -506,10 +550,12 @@ class AgentActivity {
|
|
|
506
550
|
}
|
|
507
551
|
async onEndOfTurn(info) {
|
|
508
552
|
if (this.draining) {
|
|
553
|
+
this.cancelPreemptiveGeneration();
|
|
509
554
|
this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
|
|
510
555
|
return true;
|
|
511
556
|
}
|
|
512
557
|
if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0 && info.newTranscript.split(" ").length < this.agentSession.options.minInterruptionWords) {
|
|
558
|
+
this.cancelPreemptiveGeneration();
|
|
513
559
|
this.logger.info("skipping user input, new_transcript is too short");
|
|
514
560
|
return false;
|
|
515
561
|
}
|
|
@@ -563,7 +609,8 @@ class AgentActivity {
|
|
|
563
609
|
chatCtx,
|
|
564
610
|
instructions: defaultInstructions,
|
|
565
611
|
toolChoice: defaultToolChoice,
|
|
566
|
-
allowInterruptions: defaultAllowInterruptions
|
|
612
|
+
allowInterruptions: defaultAllowInterruptions,
|
|
613
|
+
scheduleSpeech = true
|
|
567
614
|
} = options;
|
|
568
615
|
let instructions = defaultInstructions;
|
|
569
616
|
let toolChoice = defaultToolChoice;
|
|
@@ -636,7 +683,9 @@ ${instructions}` : instructions,
|
|
|
636
683
|
});
|
|
637
684
|
task.finally(() => this.onPipelineReplyDone());
|
|
638
685
|
}
|
|
639
|
-
|
|
686
|
+
if (scheduleSpeech) {
|
|
687
|
+
this.scheduleSpeech(handle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
|
|
688
|
+
}
|
|
640
689
|
return handle;
|
|
641
690
|
}
|
|
642
691
|
interrupt() {
|
|
@@ -709,13 +758,36 @@ ${instructions}` : instructions,
|
|
|
709
758
|
} else if (this.llm === void 0) {
|
|
710
759
|
return;
|
|
711
760
|
}
|
|
712
|
-
|
|
761
|
+
let speechHandle;
|
|
762
|
+
if (this._preemptiveGeneration !== void 0) {
|
|
763
|
+
const preemptive = this._preemptiveGeneration;
|
|
764
|
+
if (preemptive.info.newTranscript === (userMessage == null ? void 0 : userMessage.textContent) && preemptive.chatCtx.isEquivalent(chatCtx) && (0, import_tool_context.isSameToolContext)(preemptive.tools, this.tools) && (0, import_tool_context.isSameToolChoice)(preemptive.toolChoice, this.toolChoice)) {
|
|
765
|
+
speechHandle = preemptive.speechHandle;
|
|
766
|
+
this.scheduleSpeech(speechHandle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
|
|
767
|
+
this.logger.debug(
|
|
768
|
+
{
|
|
769
|
+
preemptiveLeadTime: Date.now() - preemptive.createdAt
|
|
770
|
+
},
|
|
771
|
+
"using preemptive generation"
|
|
772
|
+
);
|
|
773
|
+
} else {
|
|
774
|
+
this.logger.warn(
|
|
775
|
+
"preemptive generation enabled but chat context or tools have changed after `onUserTurnCompleted`"
|
|
776
|
+
);
|
|
777
|
+
preemptive.speechHandle._cancel();
|
|
778
|
+
}
|
|
779
|
+
this._preemptiveGeneration = void 0;
|
|
780
|
+
}
|
|
781
|
+
if (speechHandle === void 0) {
|
|
782
|
+
speechHandle = this.generateReply({ userMessage, chatCtx });
|
|
783
|
+
}
|
|
713
784
|
const eouMetrics = {
|
|
714
785
|
type: "eou_metrics",
|
|
715
786
|
timestamp: Date.now(),
|
|
716
787
|
endOfUtteranceDelayMs: info.endOfUtteranceDelay,
|
|
717
788
|
transcriptionDelayMs: info.transcriptionDelay,
|
|
718
789
|
onUserTurnCompletedDelayMs: callbackDuration,
|
|
790
|
+
lastSpeakingTimeMs: info.stoppedSpeakingAt ?? 0,
|
|
719
791
|
speechId: speechHandle.id
|
|
720
792
|
};
|
|
721
793
|
this.agentSession.emit(
|
|
@@ -823,8 +895,6 @@ ${instructions}` : instructions,
|
|
|
823
895
|
chatCtx = chatCtx.copy();
|
|
824
896
|
if (newMessage) {
|
|
825
897
|
chatCtx.insert(newMessage);
|
|
826
|
-
this.agent._chatCtx.insert(newMessage);
|
|
827
|
-
this.agentSession._conversationItemAdded(newMessage);
|
|
828
898
|
}
|
|
829
899
|
if (instructions) {
|
|
830
900
|
try {
|
|
@@ -837,7 +907,6 @@ ${instructions}` : instructions,
|
|
|
837
907
|
this.logger.error({ error: e }, "error occurred during updateInstructions");
|
|
838
908
|
}
|
|
839
909
|
}
|
|
840
|
-
this.agentSession._updateAgentState("thinking");
|
|
841
910
|
const tasks = [];
|
|
842
911
|
const [llmTask, llmGenData] = (0, import_generation.performLLMInference)(
|
|
843
912
|
// preserve `this` context in llmNode
|
|
@@ -861,6 +930,10 @@ ${instructions}` : instructions,
|
|
|
861
930
|
tasks.push(ttsTask);
|
|
862
931
|
}
|
|
863
932
|
await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
|
|
933
|
+
if (newMessage && speechHandle.scheduled) {
|
|
934
|
+
this.agent._chatCtx.insert(newMessage);
|
|
935
|
+
this.agentSession._conversationItemAdded(newMessage);
|
|
936
|
+
}
|
|
864
937
|
if (speechHandle.interrupted) {
|
|
865
938
|
replyAbortController.abort();
|
|
866
939
|
await (0, import_utils.cancelAndWait)(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
@@ -1442,6 +1515,7 @@ ${instructions}` : instructions,
|
|
|
1442
1515
|
const unlock = await this.lock.lock();
|
|
1443
1516
|
try {
|
|
1444
1517
|
if (this._draining) return;
|
|
1518
|
+
this.cancelPreemptiveGeneration();
|
|
1445
1519
|
this.createSpeechTask({
|
|
1446
1520
|
task: import_utils.Task.from(() => this.agent.onExit()),
|
|
1447
1521
|
name: "AgentActivity_onExit"
|
|
@@ -1460,6 +1534,7 @@ ${instructions}` : instructions,
|
|
|
1460
1534
|
if (!this._draining) {
|
|
1461
1535
|
this.logger.warn("task closing without draining");
|
|
1462
1536
|
}
|
|
1537
|
+
this.cancelPreemptiveGeneration();
|
|
1463
1538
|
if (this.llm instanceof import_llm.LLM) {
|
|
1464
1539
|
this.llm.off("metrics_collected", this.onMetricsCollected);
|
|
1465
1540
|
}
|