@livekit/agents 1.0.15 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +12 -12
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.cts +3 -3
- package/dist/cli.d.ts +3 -3
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +13 -13
- package/dist/cli.js.map +1 -1
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +1 -1
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +2 -1
- package/dist/inference/tts.d.ts +2 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +1 -5
- package/dist/inference/tts.js.map +1 -1
- package/dist/llm/chat_context.cjs +78 -0
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +16 -0
- package/dist/llm/chat_context.d.ts +16 -0
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +78 -0
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/chat_context.test.cjs +531 -0
- package/dist/llm/chat_context.test.cjs.map +1 -1
- package/dist/llm/chat_context.test.js +531 -0
- package/dist/llm/chat_context.test.js.map +1 -1
- package/dist/llm/tool_context.cjs +40 -0
- package/dist/llm/tool_context.cjs.map +1 -1
- package/dist/llm/tool_context.d.cts +2 -0
- package/dist/llm/tool_context.d.ts +2 -0
- package/dist/llm/tool_context.d.ts.map +1 -1
- package/dist/llm/tool_context.js +38 -0
- package/dist/llm/tool_context.js.map +1 -1
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.cts +7 -0
- package/dist/metrics/base.d.ts +7 -0
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/stt/stt.cjs +1 -0
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +7 -1
- package/dist/stt/stt.d.ts +7 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +1 -0
- package/dist/stt/stt.js.map +1 -1
- package/dist/voice/agent_activity.cjs +83 -8
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +6 -2
- package/dist/voice/agent_activity.d.ts +6 -2
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +83 -8
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +3 -2
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +2 -1
- package/dist/voice/agent_session.d.ts +2 -1
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +3 -2
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +138 -16
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +11 -0
- package/dist/voice/audio_recognition.d.ts +11 -0
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +138 -16
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +0 -1
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/worker.cjs +17 -11
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.cts +16 -9
- package/dist/worker.d.ts +16 -9
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +16 -12
- package/dist/worker.js.map +1 -1
- package/package.json +1 -1
- package/src/cli.ts +17 -17
- package/src/inference/stt.ts +2 -1
- package/src/inference/tts.ts +2 -5
- package/src/llm/chat_context.test.ts +607 -0
- package/src/llm/chat_context.ts +106 -0
- package/src/llm/tool_context.ts +44 -0
- package/src/metrics/base.ts +7 -0
- package/src/stt/stt.ts +6 -0
- package/src/voice/agent_activity.ts +119 -9
- package/src/voice/agent_session.ts +3 -1
- package/src/voice/audio_recognition.ts +235 -57
- package/src/voice/room_io/_input.ts +1 -1
- package/src/worker.ts +29 -18
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
LLM,
|
|
8
8
|
RealtimeModel
|
|
9
9
|
} from "../llm/index.js";
|
|
10
|
+
import { isSameToolChoice, isSameToolContext } from "../llm/tool_context.js";
|
|
10
11
|
import { log } from "../log.js";
|
|
11
12
|
import { DeferredReadableStream } from "../stream/deferred_stream.js";
|
|
12
13
|
import { STT } from "../stt/stt.js";
|
|
@@ -55,6 +56,7 @@ class AgentActivity {
|
|
|
55
56
|
audioStream = new DeferredReadableStream();
|
|
56
57
|
// default to null as None, which maps to the default provider tool choice value
|
|
57
58
|
toolChoice = null;
|
|
59
|
+
_preemptiveGeneration;
|
|
58
60
|
agent;
|
|
59
61
|
agentSession;
|
|
60
62
|
/** @internal */
|
|
@@ -427,8 +429,12 @@ class AgentActivity {
|
|
|
427
429
|
onStartOfSpeech(_ev) {
|
|
428
430
|
this.agentSession._updateUserState("speaking");
|
|
429
431
|
}
|
|
430
|
-
onEndOfSpeech(
|
|
431
|
-
|
|
432
|
+
onEndOfSpeech(ev) {
|
|
433
|
+
let speechEndTime = Date.now();
|
|
434
|
+
if (ev) {
|
|
435
|
+
speechEndTime = speechEndTime - ev.silenceDuration;
|
|
436
|
+
}
|
|
437
|
+
this.agentSession._updateUserState("listening", speechEndTime);
|
|
432
438
|
}
|
|
433
439
|
onVADInferenceDone(ev) {
|
|
434
440
|
var _a, _b;
|
|
@@ -482,6 +488,44 @@ class AgentActivity {
|
|
|
482
488
|
})
|
|
483
489
|
);
|
|
484
490
|
}
|
|
491
|
+
onPreemptiveGeneration(info) {
|
|
492
|
+
if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof LLM)) {
|
|
493
|
+
return;
|
|
494
|
+
}
|
|
495
|
+
this.cancelPreemptiveGeneration();
|
|
496
|
+
this.logger.info(
|
|
497
|
+
{
|
|
498
|
+
newTranscript: info.newTranscript,
|
|
499
|
+
transcriptConfidence: info.transcriptConfidence
|
|
500
|
+
},
|
|
501
|
+
"starting preemptive generation"
|
|
502
|
+
);
|
|
503
|
+
const userMessage = ChatMessage.create({
|
|
504
|
+
role: "user",
|
|
505
|
+
content: info.newTranscript
|
|
506
|
+
});
|
|
507
|
+
const chatCtx = this.agent.chatCtx.copy();
|
|
508
|
+
const speechHandle = this.generateReply({
|
|
509
|
+
userMessage,
|
|
510
|
+
chatCtx,
|
|
511
|
+
scheduleSpeech: false
|
|
512
|
+
});
|
|
513
|
+
this._preemptiveGeneration = {
|
|
514
|
+
speechHandle,
|
|
515
|
+
userMessage,
|
|
516
|
+
info,
|
|
517
|
+
chatCtx: chatCtx.copy(),
|
|
518
|
+
tools: { ...this.tools },
|
|
519
|
+
toolChoice: this.toolChoice,
|
|
520
|
+
createdAt: Date.now()
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
cancelPreemptiveGeneration() {
|
|
524
|
+
if (this._preemptiveGeneration !== void 0) {
|
|
525
|
+
this._preemptiveGeneration.speechHandle._cancel();
|
|
526
|
+
this._preemptiveGeneration = void 0;
|
|
527
|
+
}
|
|
528
|
+
}
|
|
485
529
|
createSpeechTask(options) {
|
|
486
530
|
const { task, ownedSpeechHandle } = options;
|
|
487
531
|
this.speechTasks.add(task);
|
|
@@ -503,10 +547,12 @@ class AgentActivity {
|
|
|
503
547
|
}
|
|
504
548
|
async onEndOfTurn(info) {
|
|
505
549
|
if (this.draining) {
|
|
550
|
+
this.cancelPreemptiveGeneration();
|
|
506
551
|
this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
|
|
507
552
|
return true;
|
|
508
553
|
}
|
|
509
554
|
if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0 && info.newTranscript.split(" ").length < this.agentSession.options.minInterruptionWords) {
|
|
555
|
+
this.cancelPreemptiveGeneration();
|
|
510
556
|
this.logger.info("skipping user input, new_transcript is too short");
|
|
511
557
|
return false;
|
|
512
558
|
}
|
|
@@ -560,7 +606,8 @@ class AgentActivity {
|
|
|
560
606
|
chatCtx,
|
|
561
607
|
instructions: defaultInstructions,
|
|
562
608
|
toolChoice: defaultToolChoice,
|
|
563
|
-
allowInterruptions: defaultAllowInterruptions
|
|
609
|
+
allowInterruptions: defaultAllowInterruptions,
|
|
610
|
+
scheduleSpeech = true
|
|
564
611
|
} = options;
|
|
565
612
|
let instructions = defaultInstructions;
|
|
566
613
|
let toolChoice = defaultToolChoice;
|
|
@@ -633,7 +680,9 @@ ${instructions}` : instructions,
|
|
|
633
680
|
});
|
|
634
681
|
task.finally(() => this.onPipelineReplyDone());
|
|
635
682
|
}
|
|
636
|
-
|
|
683
|
+
if (scheduleSpeech) {
|
|
684
|
+
this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
|
|
685
|
+
}
|
|
637
686
|
return handle;
|
|
638
687
|
}
|
|
639
688
|
interrupt() {
|
|
@@ -706,13 +755,36 @@ ${instructions}` : instructions,
|
|
|
706
755
|
} else if (this.llm === void 0) {
|
|
707
756
|
return;
|
|
708
757
|
}
|
|
709
|
-
|
|
758
|
+
let speechHandle;
|
|
759
|
+
if (this._preemptiveGeneration !== void 0) {
|
|
760
|
+
const preemptive = this._preemptiveGeneration;
|
|
761
|
+
if (preemptive.info.newTranscript === (userMessage == null ? void 0 : userMessage.textContent) && preemptive.chatCtx.isEquivalent(chatCtx) && isSameToolContext(preemptive.tools, this.tools) && isSameToolChoice(preemptive.toolChoice, this.toolChoice)) {
|
|
762
|
+
speechHandle = preemptive.speechHandle;
|
|
763
|
+
this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
|
|
764
|
+
this.logger.debug(
|
|
765
|
+
{
|
|
766
|
+
preemptiveLeadTime: Date.now() - preemptive.createdAt
|
|
767
|
+
},
|
|
768
|
+
"using preemptive generation"
|
|
769
|
+
);
|
|
770
|
+
} else {
|
|
771
|
+
this.logger.warn(
|
|
772
|
+
"preemptive generation enabled but chat context or tools have changed after `onUserTurnCompleted`"
|
|
773
|
+
);
|
|
774
|
+
preemptive.speechHandle._cancel();
|
|
775
|
+
}
|
|
776
|
+
this._preemptiveGeneration = void 0;
|
|
777
|
+
}
|
|
778
|
+
if (speechHandle === void 0) {
|
|
779
|
+
speechHandle = this.generateReply({ userMessage, chatCtx });
|
|
780
|
+
}
|
|
710
781
|
const eouMetrics = {
|
|
711
782
|
type: "eou_metrics",
|
|
712
783
|
timestamp: Date.now(),
|
|
713
784
|
endOfUtteranceDelayMs: info.endOfUtteranceDelay,
|
|
714
785
|
transcriptionDelayMs: info.transcriptionDelay,
|
|
715
786
|
onUserTurnCompletedDelayMs: callbackDuration,
|
|
787
|
+
lastSpeakingTimeMs: info.stoppedSpeakingAt ?? 0,
|
|
716
788
|
speechId: speechHandle.id
|
|
717
789
|
};
|
|
718
790
|
this.agentSession.emit(
|
|
@@ -820,8 +892,6 @@ ${instructions}` : instructions,
|
|
|
820
892
|
chatCtx = chatCtx.copy();
|
|
821
893
|
if (newMessage) {
|
|
822
894
|
chatCtx.insert(newMessage);
|
|
823
|
-
this.agent._chatCtx.insert(newMessage);
|
|
824
|
-
this.agentSession._conversationItemAdded(newMessage);
|
|
825
895
|
}
|
|
826
896
|
if (instructions) {
|
|
827
897
|
try {
|
|
@@ -834,7 +904,6 @@ ${instructions}` : instructions,
|
|
|
834
904
|
this.logger.error({ error: e }, "error occurred during updateInstructions");
|
|
835
905
|
}
|
|
836
906
|
}
|
|
837
|
-
this.agentSession._updateAgentState("thinking");
|
|
838
907
|
const tasks = [];
|
|
839
908
|
const [llmTask, llmGenData] = performLLMInference(
|
|
840
909
|
// preserve `this` context in llmNode
|
|
@@ -858,6 +927,10 @@ ${instructions}` : instructions,
|
|
|
858
927
|
tasks.push(ttsTask);
|
|
859
928
|
}
|
|
860
929
|
await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
|
|
930
|
+
if (newMessage && speechHandle.scheduled) {
|
|
931
|
+
this.agent._chatCtx.insert(newMessage);
|
|
932
|
+
this.agentSession._conversationItemAdded(newMessage);
|
|
933
|
+
}
|
|
861
934
|
if (speechHandle.interrupted) {
|
|
862
935
|
replyAbortController.abort();
|
|
863
936
|
await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
@@ -1439,6 +1512,7 @@ ${instructions}` : instructions,
|
|
|
1439
1512
|
const unlock = await this.lock.lock();
|
|
1440
1513
|
try {
|
|
1441
1514
|
if (this._draining) return;
|
|
1515
|
+
this.cancelPreemptiveGeneration();
|
|
1442
1516
|
this.createSpeechTask({
|
|
1443
1517
|
task: Task.from(() => this.agent.onExit()),
|
|
1444
1518
|
name: "AgentActivity_onExit"
|
|
@@ -1457,6 +1531,7 @@ ${instructions}` : instructions,
|
|
|
1457
1531
|
if (!this._draining) {
|
|
1458
1532
|
this.logger.warn("task closing without draining");
|
|
1459
1533
|
}
|
|
1534
|
+
this.cancelPreemptiveGeneration();
|
|
1460
1535
|
if (this.llm instanceof LLM) {
|
|
1461
1536
|
this.llm.off("metrics_collected", this.onMetricsCollected);
|
|
1462
1537
|
}
|