@livekit/agents 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +2 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -3
- package/dist/index.d.ts +2 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -3
- package/dist/index.js.map +1 -1
- package/dist/tokenize/basic/hyphenator.cjs.map +1 -1
- package/dist/tokenize/basic/hyphenator.js.map +1 -1
- package/dist/utils.cjs +77 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +21 -0
- package/dist/utils.d.ts +21 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +76 -1
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +112 -71
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +112 -71
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/avatar/datastream_io.cjs +204 -0
- package/dist/voice/avatar/datastream_io.cjs.map +1 -0
- package/dist/voice/avatar/datastream_io.d.cts +37 -0
- package/dist/voice/avatar/datastream_io.d.ts +37 -0
- package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
- package/dist/voice/avatar/datastream_io.js +188 -0
- package/dist/voice/avatar/datastream_io.js.map +1 -0
- package/dist/{multimodal → voice/avatar}/index.cjs +4 -4
- package/dist/voice/avatar/index.cjs.map +1 -0
- package/dist/voice/avatar/index.d.cts +2 -0
- package/dist/voice/avatar/index.d.ts +2 -0
- package/dist/voice/avatar/index.d.ts.map +1 -0
- package/dist/voice/avatar/index.js +2 -0
- package/dist/voice/avatar/index.js.map +1 -0
- package/dist/voice/index.cjs +2 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +1 -1
- package/dist/voice/io.d.ts +1 -1
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/room_io/_input.cjs +2 -1
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +2 -1
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/run_context.cjs +13 -0
- package/dist/voice/run_context.cjs.map +1 -1
- package/dist/voice/run_context.d.cts +10 -0
- package/dist/voice/run_context.d.ts +10 -0
- package/dist/voice/run_context.d.ts.map +1 -1
- package/dist/voice/run_context.js +13 -0
- package/dist/voice/run_context.js.map +1 -1
- package/dist/voice/speech_handle.cjs +152 -30
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +67 -16
- package/dist/voice/speech_handle.d.ts +67 -16
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +153 -31
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/worker.cjs +4 -1
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +4 -1
- package/dist/worker.js.map +1 -1
- package/package.json +2 -2
- package/src/index.ts +2 -3
- package/src/tokenize/basic/hyphenator.ts +1 -1
- package/src/utils.ts +121 -1
- package/src/voice/agent_activity.ts +128 -78
- package/src/voice/avatar/datastream_io.ts +247 -0
- package/src/voice/avatar/index.ts +4 -0
- package/src/voice/index.ts +2 -0
- package/src/voice/io.ts +1 -1
- package/src/voice/room_io/_input.ts +8 -3
- package/src/voice/run_context.ts +16 -2
- package/src/voice/speech_handle.ts +183 -38
- package/src/worker.ts +5 -1
- package/dist/multimodal/agent_playout.cjs +0 -233
- package/dist/multimodal/agent_playout.cjs.map +0 -1
- package/dist/multimodal/agent_playout.d.cts +0 -34
- package/dist/multimodal/agent_playout.d.ts +0 -34
- package/dist/multimodal/agent_playout.d.ts.map +0 -1
- package/dist/multimodal/agent_playout.js +0 -207
- package/dist/multimodal/agent_playout.js.map +0 -1
- package/dist/multimodal/index.cjs.map +0 -1
- package/dist/multimodal/index.d.cts +0 -2
- package/dist/multimodal/index.d.ts +0 -2
- package/dist/multimodal/index.d.ts.map +0 -1
- package/dist/multimodal/index.js +0 -2
- package/dist/multimodal/index.js.map +0 -1
- package/src/multimodal/agent_playout.ts +0 -266
- package/src/multimodal/index.ts +0 -4
|
@@ -193,7 +193,7 @@ class AgentActivity {
|
|
|
193
193
|
this.started = true;
|
|
194
194
|
this._mainTask = import_utils.Task.from(({ signal }) => this.mainTask(signal));
|
|
195
195
|
this.createSpeechTask({
|
|
196
|
-
|
|
196
|
+
task: import_utils.Task.from(() => this.agent.onEnter()),
|
|
197
197
|
name: "AgentActivity_onEnter"
|
|
198
198
|
});
|
|
199
199
|
} finally {
|
|
@@ -312,7 +312,9 @@ class AgentActivity {
|
|
|
312
312
|
})
|
|
313
313
|
);
|
|
314
314
|
const task = this.createSpeechTask({
|
|
315
|
-
|
|
315
|
+
task: import_utils.Task.from(
|
|
316
|
+
(abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio)
|
|
317
|
+
),
|
|
316
318
|
ownedSpeechHandle: handle,
|
|
317
319
|
name: "AgentActivity.say_tts"
|
|
318
320
|
});
|
|
@@ -416,7 +418,9 @@ class AgentActivity {
|
|
|
416
418
|
);
|
|
417
419
|
this.logger.info({ speech_id: handle.id }, "Creating speech handle");
|
|
418
420
|
this.createSpeechTask({
|
|
419
|
-
|
|
421
|
+
task: import_utils.Task.from(
|
|
422
|
+
(abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController)
|
|
423
|
+
),
|
|
420
424
|
ownedSpeechHandle: handle,
|
|
421
425
|
name: "AgentActivity.realtimeGeneration"
|
|
422
426
|
});
|
|
@@ -480,16 +484,23 @@ class AgentActivity {
|
|
|
480
484
|
);
|
|
481
485
|
}
|
|
482
486
|
createSpeechTask(options) {
|
|
483
|
-
const {
|
|
484
|
-
this.speechTasks.add(
|
|
485
|
-
|
|
486
|
-
this.speechTasks.delete(
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
487
|
+
const { task, ownedSpeechHandle } = options;
|
|
488
|
+
this.speechTasks.add(task);
|
|
489
|
+
task.addDoneCallback(() => {
|
|
490
|
+
this.speechTasks.delete(task);
|
|
491
|
+
});
|
|
492
|
+
if (ownedSpeechHandle) {
|
|
493
|
+
ownedSpeechHandle._tasks.push(task);
|
|
494
|
+
task.addDoneCallback(() => {
|
|
495
|
+
if (ownedSpeechHandle._tasks.every((t) => t.done)) {
|
|
496
|
+
ownedSpeechHandle._markDone();
|
|
497
|
+
}
|
|
498
|
+
});
|
|
499
|
+
}
|
|
500
|
+
task.addDoneCallback(() => {
|
|
490
501
|
this.wakeupMainTask();
|
|
491
502
|
});
|
|
492
|
-
return
|
|
503
|
+
return task.result;
|
|
493
504
|
}
|
|
494
505
|
async onEndOfTurn(info) {
|
|
495
506
|
if (this.draining) {
|
|
@@ -502,7 +513,7 @@ class AgentActivity {
|
|
|
502
513
|
}
|
|
503
514
|
const oldTask = this._userTurnCompletedTask;
|
|
504
515
|
this._userTurnCompletedTask = this.createSpeechTask({
|
|
505
|
-
|
|
516
|
+
task: import_utils.Task.from(() => this.userTurnCompleted(info, oldTask)),
|
|
506
517
|
name: "AgentActivity.userTurnCompleted"
|
|
507
518
|
});
|
|
508
519
|
return true;
|
|
@@ -528,8 +539,8 @@ class AgentActivity {
|
|
|
528
539
|
}
|
|
529
540
|
const speechHandle = heapItem[2];
|
|
530
541
|
this._currentSpeech = speechHandle;
|
|
531
|
-
speechHandle.
|
|
532
|
-
await speechHandle.
|
|
542
|
+
speechHandle._authorizeGeneration();
|
|
543
|
+
await speechHandle._waitForGeneration();
|
|
533
544
|
this._currentSpeech = void 0;
|
|
534
545
|
}
|
|
535
546
|
if (this.draining && this.speechTasks.size === 0) {
|
|
@@ -582,16 +593,19 @@ class AgentActivity {
|
|
|
582
593
|
this.logger.info({ speech_id: handle.id }, "Creating speech handle");
|
|
583
594
|
if (this.llm instanceof import_llm.RealtimeModel) {
|
|
584
595
|
this.createSpeechTask({
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
596
|
+
task: import_utils.Task.from(
|
|
597
|
+
(abortController) => this.realtimeReplyTask({
|
|
598
|
+
speechHandle: handle,
|
|
599
|
+
// TODO(brian): support llm.ChatMessage for the realtime model
|
|
600
|
+
userInput: userMessage == null ? void 0 : userMessage.textContent,
|
|
601
|
+
instructions,
|
|
602
|
+
modelSettings: {
|
|
603
|
+
// isGiven(toolChoice) = toolChoice !== undefined
|
|
604
|
+
toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
|
|
605
|
+
},
|
|
606
|
+
abortController
|
|
607
|
+
})
|
|
608
|
+
),
|
|
595
609
|
ownedSpeechHandle: handle,
|
|
596
610
|
name: "AgentActivity.realtimeReply"
|
|
597
611
|
});
|
|
@@ -601,14 +615,19 @@ class AgentActivity {
|
|
|
601
615
|
${instructions}`;
|
|
602
616
|
}
|
|
603
617
|
const task = this.createSpeechTask({
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
618
|
+
task: import_utils.Task.from(
|
|
619
|
+
(abortController) => this.pipelineReplyTask(
|
|
620
|
+
handle,
|
|
621
|
+
chatCtx ?? this.agent.chatCtx,
|
|
622
|
+
this.agent.toolCtx,
|
|
623
|
+
{
|
|
624
|
+
toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
|
|
625
|
+
},
|
|
626
|
+
abortController,
|
|
627
|
+
instructions ? `${this.agent.instructions}
|
|
610
628
|
${instructions}` : instructions,
|
|
611
|
-
|
|
629
|
+
userMessage
|
|
630
|
+
)
|
|
612
631
|
),
|
|
613
632
|
ownedSpeechHandle: handle,
|
|
614
633
|
name: "AgentActivity.pipelineReply"
|
|
@@ -630,7 +649,7 @@ ${instructions}` : instructions,
|
|
|
630
649
|
if (currentSpeech === void 0) {
|
|
631
650
|
future.resolve();
|
|
632
651
|
} else {
|
|
633
|
-
currentSpeech.
|
|
652
|
+
currentSpeech.addDoneCallback(() => {
|
|
634
653
|
if (future.done) return;
|
|
635
654
|
future.resolve();
|
|
636
655
|
});
|
|
@@ -638,7 +657,7 @@ ${instructions}` : instructions,
|
|
|
638
657
|
return future;
|
|
639
658
|
}
|
|
640
659
|
onPipelineReplyDone() {
|
|
641
|
-
if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done)) {
|
|
660
|
+
if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done())) {
|
|
642
661
|
this.agentSession._updateAgentState("listening");
|
|
643
662
|
}
|
|
644
663
|
}
|
|
@@ -702,11 +721,10 @@ ${instructions}` : instructions,
|
|
|
702
721
|
(0, import_events.createMetricsCollectedEvent)({ metrics: eouMetrics })
|
|
703
722
|
);
|
|
704
723
|
}
|
|
705
|
-
async ttsTask(speechHandle, text, addToChatCtx, modelSettings, audio) {
|
|
724
|
+
async ttsTask(speechHandle, text, addToChatCtx, modelSettings, replyAbortController, audio) {
|
|
706
725
|
speechHandleStorage.enterWith(speechHandle);
|
|
707
726
|
const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
708
727
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
709
|
-
const replyAbortController = new AbortController();
|
|
710
728
|
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
711
729
|
if (speechHandle.interrupted) {
|
|
712
730
|
return;
|
|
@@ -795,10 +813,9 @@ ${instructions}` : instructions,
|
|
|
795
813
|
this.agentSession._updateAgentState("listening");
|
|
796
814
|
}
|
|
797
815
|
}
|
|
798
|
-
async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, instructions, newMessage, toolsMessages) {
|
|
816
|
+
async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) {
|
|
799
817
|
var _a, _b, _c;
|
|
800
818
|
speechHandleStorage.enterWith(speechHandle);
|
|
801
|
-
const replyAbortController = new AbortController();
|
|
802
819
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
803
820
|
const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
804
821
|
chatCtx = chatCtx.copy();
|
|
@@ -841,12 +858,20 @@ ${instructions}` : instructions,
|
|
|
841
858
|
);
|
|
842
859
|
tasks.push(ttsTask);
|
|
843
860
|
}
|
|
844
|
-
await speechHandle.waitIfNotInterrupted([speechHandle.
|
|
861
|
+
await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
|
|
862
|
+
if (newMessage && speechHandle.scheduled) {
|
|
863
|
+
chatCtx.insert(newMessage);
|
|
864
|
+
this.agent._chatCtx.insert(newMessage);
|
|
865
|
+
this.agentSession._conversationItemAdded(newMessage);
|
|
866
|
+
}
|
|
845
867
|
if (speechHandle.interrupted) {
|
|
846
868
|
replyAbortController.abort();
|
|
847
869
|
await (0, import_utils.cancelAndWait)(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
848
870
|
return;
|
|
849
871
|
}
|
|
872
|
+
this.agentSession._updateAgentState("thinking");
|
|
873
|
+
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
874
|
+
speechHandle._clearAuthorization();
|
|
850
875
|
const replyStartedAt = Date.now();
|
|
851
876
|
const trNodeResult = await this.agent.transcriptionNode(llmOutput, modelSettings);
|
|
852
877
|
let textOut = null;
|
|
@@ -893,7 +918,6 @@ ${instructions}` : instructions,
|
|
|
893
918
|
onToolExecutionStarted,
|
|
894
919
|
onToolExecutionCompleted
|
|
895
920
|
});
|
|
896
|
-
tasks.push(executeToolsTask);
|
|
897
921
|
await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
|
|
898
922
|
if (audioOutput) {
|
|
899
923
|
await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
|
|
@@ -948,7 +972,7 @@ ${instructions}` : instructions,
|
|
|
948
972
|
{ speech_id: speechHandle.id, message: forwardedText },
|
|
949
973
|
"playout completed with interrupt"
|
|
950
974
|
);
|
|
951
|
-
speechHandle.
|
|
975
|
+
speechHandle._markGenerationDone();
|
|
952
976
|
await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
953
977
|
return;
|
|
954
978
|
}
|
|
@@ -973,11 +997,11 @@ ${instructions}` : instructions,
|
|
|
973
997
|
} else if (this.agentSession.agentState === "speaking") {
|
|
974
998
|
this.agentSession._updateAgentState("listening");
|
|
975
999
|
}
|
|
976
|
-
speechHandle.
|
|
1000
|
+
speechHandle._markGenerationDone();
|
|
977
1001
|
await executeToolsTask.result;
|
|
978
1002
|
if (toolOutput.output.length === 0) return;
|
|
979
1003
|
const { maxToolSteps } = this.agentSession.options;
|
|
980
|
-
if (speechHandle.
|
|
1004
|
+
if (speechHandle.numSteps >= maxToolSteps) {
|
|
981
1005
|
this.logger.warn(
|
|
982
1006
|
{ speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
|
|
983
1007
|
"maximum number of function calls steps reached"
|
|
@@ -1032,7 +1056,7 @@ ${instructions}` : instructions,
|
|
|
1032
1056
|
chatCtx.insert(toolMessages);
|
|
1033
1057
|
const handle = import_speech_handle.SpeechHandle.create({
|
|
1034
1058
|
allowInterruptions: speechHandle.allowInterruptions,
|
|
1035
|
-
stepIndex: speechHandle.
|
|
1059
|
+
stepIndex: speechHandle._stepIndex + 1,
|
|
1036
1060
|
parent: speechHandle
|
|
1037
1061
|
});
|
|
1038
1062
|
this.agentSession.emit(
|
|
@@ -1045,14 +1069,17 @@ ${instructions}` : instructions,
|
|
|
1045
1069
|
);
|
|
1046
1070
|
const respondToolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
|
|
1047
1071
|
const toolResponseTask = this.createSpeechTask({
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1072
|
+
task: import_utils.Task.from(
|
|
1073
|
+
() => this.pipelineReplyTask(
|
|
1074
|
+
handle,
|
|
1075
|
+
chatCtx,
|
|
1076
|
+
toolCtx,
|
|
1077
|
+
{ toolChoice: respondToolChoice },
|
|
1078
|
+
replyAbortController,
|
|
1079
|
+
instructions,
|
|
1080
|
+
void 0,
|
|
1081
|
+
toolMessages
|
|
1082
|
+
)
|
|
1056
1083
|
),
|
|
1057
1084
|
ownedSpeechHandle: handle,
|
|
1058
1085
|
name: "AgentActivity.pipelineReply"
|
|
@@ -1066,7 +1093,7 @@ ${instructions}` : instructions,
|
|
|
1066
1093
|
this.agent._chatCtx.insert(toolMessages);
|
|
1067
1094
|
}
|
|
1068
1095
|
}
|
|
1069
|
-
async realtimeGenerationTask(speechHandle, ev, modelSettings) {
|
|
1096
|
+
async realtimeGenerationTask(speechHandle, ev, modelSettings, replyAbortController) {
|
|
1070
1097
|
var _a, _b, _c;
|
|
1071
1098
|
speechHandleStorage.enterWith(speechHandle);
|
|
1072
1099
|
if (!this.realtimeSession) {
|
|
@@ -1076,20 +1103,20 @@ ${instructions}` : instructions,
|
|
|
1076
1103
|
throw new Error("llm is not a realtime model");
|
|
1077
1104
|
}
|
|
1078
1105
|
this.logger.debug(
|
|
1079
|
-
{ speech_id: speechHandle.id, stepIndex: speechHandle.
|
|
1106
|
+
{ speech_id: speechHandle.id, stepIndex: speechHandle.numSteps },
|
|
1080
1107
|
"realtime generation started"
|
|
1081
1108
|
);
|
|
1082
1109
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
1083
1110
|
const textOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
1084
1111
|
const toolCtx = this.realtimeSession.tools;
|
|
1085
1112
|
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
1113
|
+
speechHandle._clearAuthorization();
|
|
1086
1114
|
if (speechHandle.interrupted) {
|
|
1087
1115
|
return;
|
|
1088
1116
|
}
|
|
1089
1117
|
const onFirstFrame = () => {
|
|
1090
1118
|
this.agentSession._updateAgentState("speaking");
|
|
1091
1119
|
};
|
|
1092
|
-
const replyAbortController = new AbortController();
|
|
1093
1120
|
const readMessages = async (abortController, outputs) => {
|
|
1094
1121
|
const forwardTasks = [];
|
|
1095
1122
|
try {
|
|
@@ -1173,9 +1200,13 @@ ${instructions}` : instructions,
|
|
|
1173
1200
|
"AgentActivity.realtime_generation.read_tool_stream"
|
|
1174
1201
|
)
|
|
1175
1202
|
);
|
|
1176
|
-
const onToolExecutionStarted = (
|
|
1203
|
+
const onToolExecutionStarted = (f) => {
|
|
1204
|
+
speechHandle._itemAdded([f]);
|
|
1177
1205
|
};
|
|
1178
|
-
const onToolExecutionCompleted = (
|
|
1206
|
+
const onToolExecutionCompleted = (out) => {
|
|
1207
|
+
if (out.toolCallOutput) {
|
|
1208
|
+
speechHandle._itemAdded([out.toolCallOutput]);
|
|
1209
|
+
}
|
|
1179
1210
|
};
|
|
1180
1211
|
const [executeToolsTask, toolOutput] = (0, import_generation.performToolExecutions)({
|
|
1181
1212
|
session: this.agentSession,
|
|
@@ -1231,7 +1262,7 @@ ${instructions}` : instructions,
|
|
|
1231
1262
|
interrupted: true
|
|
1232
1263
|
});
|
|
1233
1264
|
this.agent._chatCtx.insert(message);
|
|
1234
|
-
speechHandle.
|
|
1265
|
+
speechHandle._itemAdded([message]);
|
|
1235
1266
|
this.agentSession._conversationItemAdded(message);
|
|
1236
1267
|
}
|
|
1237
1268
|
this.logger.info(
|
|
@@ -1239,7 +1270,7 @@ ${instructions}` : instructions,
|
|
|
1239
1270
|
"playout completed with interrupt"
|
|
1240
1271
|
);
|
|
1241
1272
|
}
|
|
1242
|
-
speechHandle.
|
|
1273
|
+
speechHandle._markGenerationDone();
|
|
1243
1274
|
await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
1244
1275
|
return;
|
|
1245
1276
|
}
|
|
@@ -1252,17 +1283,17 @@ ${instructions}` : instructions,
|
|
|
1252
1283
|
interrupted: false
|
|
1253
1284
|
});
|
|
1254
1285
|
this.agent._chatCtx.insert(message);
|
|
1255
|
-
speechHandle.
|
|
1286
|
+
speechHandle._itemAdded([message]);
|
|
1256
1287
|
this.agentSession._conversationItemAdded(message);
|
|
1257
1288
|
}
|
|
1258
|
-
speechHandle.
|
|
1289
|
+
speechHandle._markGenerationDone();
|
|
1259
1290
|
toolOutput.firstToolStartedFuture.await.finally(() => {
|
|
1260
1291
|
this.agentSession._updateAgentState("thinking");
|
|
1261
1292
|
});
|
|
1262
1293
|
await executeToolsTask.result;
|
|
1263
1294
|
if (toolOutput.output.length === 0) return;
|
|
1264
1295
|
const { maxToolSteps } = this.agentSession.options;
|
|
1265
|
-
if (speechHandle.
|
|
1296
|
+
if (speechHandle.numSteps >= maxToolSteps) {
|
|
1266
1297
|
this.logger.warn(
|
|
1267
1298
|
{ speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
|
|
1268
1299
|
"maximum number of function calls steps reached"
|
|
@@ -1326,7 +1357,7 @@ ${instructions}` : instructions,
|
|
|
1326
1357
|
this.realtimeSession.interrupt();
|
|
1327
1358
|
const replySpeechHandle = import_speech_handle.SpeechHandle.create({
|
|
1328
1359
|
allowInterruptions: speechHandle.allowInterruptions,
|
|
1329
|
-
stepIndex: speechHandle.
|
|
1360
|
+
stepIndex: speechHandle.numSteps + 1,
|
|
1330
1361
|
parent: speechHandle
|
|
1331
1362
|
});
|
|
1332
1363
|
this.agentSession.emit(
|
|
@@ -1339,10 +1370,13 @@ ${instructions}` : instructions,
|
|
|
1339
1370
|
);
|
|
1340
1371
|
const toolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
|
|
1341
1372
|
this.createSpeechTask({
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1373
|
+
task: import_utils.Task.from(
|
|
1374
|
+
(abortController) => this.realtimeReplyTask({
|
|
1375
|
+
speechHandle: replySpeechHandle,
|
|
1376
|
+
modelSettings: { toolChoice },
|
|
1377
|
+
abortController
|
|
1378
|
+
})
|
|
1379
|
+
),
|
|
1346
1380
|
ownedSpeechHandle: replySpeechHandle,
|
|
1347
1381
|
name: "AgentActivity.realtime_reply"
|
|
1348
1382
|
});
|
|
@@ -1352,7 +1386,8 @@ ${instructions}` : instructions,
|
|
|
1352
1386
|
speechHandle,
|
|
1353
1387
|
modelSettings: { toolChoice },
|
|
1354
1388
|
userInput,
|
|
1355
|
-
instructions
|
|
1389
|
+
instructions,
|
|
1390
|
+
abortController
|
|
1356
1391
|
}) {
|
|
1357
1392
|
speechHandleStorage.enterWith(speechHandle);
|
|
1358
1393
|
if (!this.realtimeSession) {
|
|
@@ -1375,18 +1410,24 @@ ${instructions}` : instructions,
|
|
|
1375
1410
|
}
|
|
1376
1411
|
try {
|
|
1377
1412
|
const generationEvent = await this.realtimeSession.generateReply(instructions);
|
|
1378
|
-
await this.realtimeGenerationTask(
|
|
1413
|
+
await this.realtimeGenerationTask(
|
|
1414
|
+
speechHandle,
|
|
1415
|
+
generationEvent,
|
|
1416
|
+
{ toolChoice },
|
|
1417
|
+
abortController
|
|
1418
|
+
);
|
|
1379
1419
|
} finally {
|
|
1380
1420
|
if (toolChoice !== void 0 && toolChoice !== originalToolChoice) {
|
|
1381
1421
|
this.realtimeSession.updateOptions({ toolChoice: originalToolChoice });
|
|
1382
1422
|
}
|
|
1383
1423
|
}
|
|
1384
1424
|
}
|
|
1385
|
-
scheduleSpeech(speechHandle, priority,
|
|
1386
|
-
if (this.draining && !
|
|
1425
|
+
scheduleSpeech(speechHandle, priority, force = false) {
|
|
1426
|
+
if (this.draining && !force) {
|
|
1387
1427
|
throw new Error("cannot schedule new speech, the agent is draining");
|
|
1388
1428
|
}
|
|
1389
1429
|
this.speechQueue.push([priority, Number(process.hrtime.bigint()), speechHandle]);
|
|
1430
|
+
speechHandle._markScheduled();
|
|
1390
1431
|
this.wakeupMainTask();
|
|
1391
1432
|
}
|
|
1392
1433
|
async drain() {
|
|
@@ -1395,7 +1436,7 @@ ${instructions}` : instructions,
|
|
|
1395
1436
|
try {
|
|
1396
1437
|
if (this._draining) return;
|
|
1397
1438
|
this.createSpeechTask({
|
|
1398
|
-
|
|
1439
|
+
task: import_utils.Task.from(() => this.agent.onExit()),
|
|
1399
1440
|
name: "AgentActivity_onExit"
|
|
1400
1441
|
});
|
|
1401
1442
|
this.wakeupMainTask();
|