@livekit/agents 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +2 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -3
- package/dist/index.d.ts +2 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -3
- package/dist/index.js.map +1 -1
- package/dist/tokenize/basic/hyphenator.cjs.map +1 -1
- package/dist/tokenize/basic/hyphenator.js.map +1 -1
- package/dist/utils.cjs +77 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +21 -0
- package/dist/utils.d.ts +21 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +76 -1
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +112 -71
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +112 -71
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/avatar/datastream_io.cjs +204 -0
- package/dist/voice/avatar/datastream_io.cjs.map +1 -0
- package/dist/voice/avatar/datastream_io.d.cts +37 -0
- package/dist/voice/avatar/datastream_io.d.ts +37 -0
- package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
- package/dist/voice/avatar/datastream_io.js +188 -0
- package/dist/voice/avatar/datastream_io.js.map +1 -0
- package/dist/{multimodal → voice/avatar}/index.cjs +4 -4
- package/dist/voice/avatar/index.cjs.map +1 -0
- package/dist/voice/avatar/index.d.cts +2 -0
- package/dist/voice/avatar/index.d.ts +2 -0
- package/dist/voice/avatar/index.d.ts.map +1 -0
- package/dist/voice/avatar/index.js +2 -0
- package/dist/voice/avatar/index.js.map +1 -0
- package/dist/voice/index.cjs +2 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +1 -1
- package/dist/voice/io.d.ts +1 -1
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/room_io/_input.cjs +2 -1
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +2 -1
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/run_context.cjs +13 -0
- package/dist/voice/run_context.cjs.map +1 -1
- package/dist/voice/run_context.d.cts +10 -0
- package/dist/voice/run_context.d.ts +10 -0
- package/dist/voice/run_context.d.ts.map +1 -1
- package/dist/voice/run_context.js +13 -0
- package/dist/voice/run_context.js.map +1 -1
- package/dist/voice/speech_handle.cjs +152 -30
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +67 -16
- package/dist/voice/speech_handle.d.ts +67 -16
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +153 -31
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/worker.cjs +4 -1
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +4 -1
- package/dist/worker.js.map +1 -1
- package/package.json +2 -2
- package/src/index.ts +2 -3
- package/src/tokenize/basic/hyphenator.ts +1 -1
- package/src/utils.ts +121 -1
- package/src/voice/agent_activity.ts +128 -78
- package/src/voice/avatar/datastream_io.ts +247 -0
- package/src/voice/avatar/index.ts +4 -0
- package/src/voice/index.ts +2 -0
- package/src/voice/io.ts +1 -1
- package/src/voice/room_io/_input.ts +8 -3
- package/src/voice/run_context.ts +16 -2
- package/src/voice/speech_handle.ts +183 -38
- package/src/worker.ts +5 -1
- package/dist/multimodal/agent_playout.cjs +0 -233
- package/dist/multimodal/agent_playout.cjs.map +0 -1
- package/dist/multimodal/agent_playout.d.cts +0 -34
- package/dist/multimodal/agent_playout.d.ts +0 -34
- package/dist/multimodal/agent_playout.d.ts.map +0 -1
- package/dist/multimodal/agent_playout.js +0 -207
- package/dist/multimodal/agent_playout.js.map +0 -1
- package/dist/multimodal/index.cjs.map +0 -1
- package/dist/multimodal/index.d.cts +0 -2
- package/dist/multimodal/index.d.ts +0 -2
- package/dist/multimodal/index.d.ts.map +0 -1
- package/dist/multimodal/index.js +0 -2
- package/dist/multimodal/index.js.map +0 -1
- package/src/multimodal/agent_playout.ts +0 -266
- package/src/multimodal/index.ts +0 -4
|
@@ -190,7 +190,7 @@ class AgentActivity {
|
|
|
190
190
|
this.started = true;
|
|
191
191
|
this._mainTask = Task.from(({ signal }) => this.mainTask(signal));
|
|
192
192
|
this.createSpeechTask({
|
|
193
|
-
|
|
193
|
+
task: Task.from(() => this.agent.onEnter()),
|
|
194
194
|
name: "AgentActivity_onEnter"
|
|
195
195
|
});
|
|
196
196
|
} finally {
|
|
@@ -309,7 +309,9 @@ class AgentActivity {
|
|
|
309
309
|
})
|
|
310
310
|
);
|
|
311
311
|
const task = this.createSpeechTask({
|
|
312
|
-
|
|
312
|
+
task: Task.from(
|
|
313
|
+
(abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio)
|
|
314
|
+
),
|
|
313
315
|
ownedSpeechHandle: handle,
|
|
314
316
|
name: "AgentActivity.say_tts"
|
|
315
317
|
});
|
|
@@ -413,7 +415,9 @@ class AgentActivity {
|
|
|
413
415
|
);
|
|
414
416
|
this.logger.info({ speech_id: handle.id }, "Creating speech handle");
|
|
415
417
|
this.createSpeechTask({
|
|
416
|
-
|
|
418
|
+
task: Task.from(
|
|
419
|
+
(abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController)
|
|
420
|
+
),
|
|
417
421
|
ownedSpeechHandle: handle,
|
|
418
422
|
name: "AgentActivity.realtimeGeneration"
|
|
419
423
|
});
|
|
@@ -477,16 +481,23 @@ class AgentActivity {
|
|
|
477
481
|
);
|
|
478
482
|
}
|
|
479
483
|
createSpeechTask(options) {
|
|
480
|
-
const {
|
|
481
|
-
this.speechTasks.add(
|
|
482
|
-
|
|
483
|
-
this.speechTasks.delete(
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
484
|
+
const { task, ownedSpeechHandle } = options;
|
|
485
|
+
this.speechTasks.add(task);
|
|
486
|
+
task.addDoneCallback(() => {
|
|
487
|
+
this.speechTasks.delete(task);
|
|
488
|
+
});
|
|
489
|
+
if (ownedSpeechHandle) {
|
|
490
|
+
ownedSpeechHandle._tasks.push(task);
|
|
491
|
+
task.addDoneCallback(() => {
|
|
492
|
+
if (ownedSpeechHandle._tasks.every((t) => t.done)) {
|
|
493
|
+
ownedSpeechHandle._markDone();
|
|
494
|
+
}
|
|
495
|
+
});
|
|
496
|
+
}
|
|
497
|
+
task.addDoneCallback(() => {
|
|
487
498
|
this.wakeupMainTask();
|
|
488
499
|
});
|
|
489
|
-
return
|
|
500
|
+
return task.result;
|
|
490
501
|
}
|
|
491
502
|
async onEndOfTurn(info) {
|
|
492
503
|
if (this.draining) {
|
|
@@ -499,7 +510,7 @@ class AgentActivity {
|
|
|
499
510
|
}
|
|
500
511
|
const oldTask = this._userTurnCompletedTask;
|
|
501
512
|
this._userTurnCompletedTask = this.createSpeechTask({
|
|
502
|
-
|
|
513
|
+
task: Task.from(() => this.userTurnCompleted(info, oldTask)),
|
|
503
514
|
name: "AgentActivity.userTurnCompleted"
|
|
504
515
|
});
|
|
505
516
|
return true;
|
|
@@ -525,8 +536,8 @@ class AgentActivity {
|
|
|
525
536
|
}
|
|
526
537
|
const speechHandle = heapItem[2];
|
|
527
538
|
this._currentSpeech = speechHandle;
|
|
528
|
-
speechHandle.
|
|
529
|
-
await speechHandle.
|
|
539
|
+
speechHandle._authorizeGeneration();
|
|
540
|
+
await speechHandle._waitForGeneration();
|
|
530
541
|
this._currentSpeech = void 0;
|
|
531
542
|
}
|
|
532
543
|
if (this.draining && this.speechTasks.size === 0) {
|
|
@@ -579,16 +590,19 @@ class AgentActivity {
|
|
|
579
590
|
this.logger.info({ speech_id: handle.id }, "Creating speech handle");
|
|
580
591
|
if (this.llm instanceof RealtimeModel) {
|
|
581
592
|
this.createSpeechTask({
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
593
|
+
task: Task.from(
|
|
594
|
+
(abortController) => this.realtimeReplyTask({
|
|
595
|
+
speechHandle: handle,
|
|
596
|
+
// TODO(brian): support llm.ChatMessage for the realtime model
|
|
597
|
+
userInput: userMessage == null ? void 0 : userMessage.textContent,
|
|
598
|
+
instructions,
|
|
599
|
+
modelSettings: {
|
|
600
|
+
// isGiven(toolChoice) = toolChoice !== undefined
|
|
601
|
+
toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
|
|
602
|
+
},
|
|
603
|
+
abortController
|
|
604
|
+
})
|
|
605
|
+
),
|
|
592
606
|
ownedSpeechHandle: handle,
|
|
593
607
|
name: "AgentActivity.realtimeReply"
|
|
594
608
|
});
|
|
@@ -598,14 +612,19 @@ class AgentActivity {
|
|
|
598
612
|
${instructions}`;
|
|
599
613
|
}
|
|
600
614
|
const task = this.createSpeechTask({
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
615
|
+
task: Task.from(
|
|
616
|
+
(abortController) => this.pipelineReplyTask(
|
|
617
|
+
handle,
|
|
618
|
+
chatCtx ?? this.agent.chatCtx,
|
|
619
|
+
this.agent.toolCtx,
|
|
620
|
+
{
|
|
621
|
+
toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
|
|
622
|
+
},
|
|
623
|
+
abortController,
|
|
624
|
+
instructions ? `${this.agent.instructions}
|
|
607
625
|
${instructions}` : instructions,
|
|
608
|
-
|
|
626
|
+
userMessage
|
|
627
|
+
)
|
|
609
628
|
),
|
|
610
629
|
ownedSpeechHandle: handle,
|
|
611
630
|
name: "AgentActivity.pipelineReply"
|
|
@@ -627,7 +646,7 @@ ${instructions}` : instructions,
|
|
|
627
646
|
if (currentSpeech === void 0) {
|
|
628
647
|
future.resolve();
|
|
629
648
|
} else {
|
|
630
|
-
currentSpeech.
|
|
649
|
+
currentSpeech.addDoneCallback(() => {
|
|
631
650
|
if (future.done) return;
|
|
632
651
|
future.resolve();
|
|
633
652
|
});
|
|
@@ -635,7 +654,7 @@ ${instructions}` : instructions,
|
|
|
635
654
|
return future;
|
|
636
655
|
}
|
|
637
656
|
onPipelineReplyDone() {
|
|
638
|
-
if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done)) {
|
|
657
|
+
if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done())) {
|
|
639
658
|
this.agentSession._updateAgentState("listening");
|
|
640
659
|
}
|
|
641
660
|
}
|
|
@@ -699,11 +718,10 @@ ${instructions}` : instructions,
|
|
|
699
718
|
createMetricsCollectedEvent({ metrics: eouMetrics })
|
|
700
719
|
);
|
|
701
720
|
}
|
|
702
|
-
async ttsTask(speechHandle, text, addToChatCtx, modelSettings, audio) {
|
|
721
|
+
async ttsTask(speechHandle, text, addToChatCtx, modelSettings, replyAbortController, audio) {
|
|
703
722
|
speechHandleStorage.enterWith(speechHandle);
|
|
704
723
|
const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
705
724
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
706
|
-
const replyAbortController = new AbortController();
|
|
707
725
|
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
708
726
|
if (speechHandle.interrupted) {
|
|
709
727
|
return;
|
|
@@ -792,10 +810,9 @@ ${instructions}` : instructions,
|
|
|
792
810
|
this.agentSession._updateAgentState("listening");
|
|
793
811
|
}
|
|
794
812
|
}
|
|
795
|
-
async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, instructions, newMessage, toolsMessages) {
|
|
813
|
+
async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) {
|
|
796
814
|
var _a, _b, _c;
|
|
797
815
|
speechHandleStorage.enterWith(speechHandle);
|
|
798
|
-
const replyAbortController = new AbortController();
|
|
799
816
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
800
817
|
const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
801
818
|
chatCtx = chatCtx.copy();
|
|
@@ -838,12 +855,20 @@ ${instructions}` : instructions,
|
|
|
838
855
|
);
|
|
839
856
|
tasks.push(ttsTask);
|
|
840
857
|
}
|
|
841
|
-
await speechHandle.waitIfNotInterrupted([speechHandle.
|
|
858
|
+
await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
|
|
859
|
+
if (newMessage && speechHandle.scheduled) {
|
|
860
|
+
chatCtx.insert(newMessage);
|
|
861
|
+
this.agent._chatCtx.insert(newMessage);
|
|
862
|
+
this.agentSession._conversationItemAdded(newMessage);
|
|
863
|
+
}
|
|
842
864
|
if (speechHandle.interrupted) {
|
|
843
865
|
replyAbortController.abort();
|
|
844
866
|
await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
845
867
|
return;
|
|
846
868
|
}
|
|
869
|
+
this.agentSession._updateAgentState("thinking");
|
|
870
|
+
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
871
|
+
speechHandle._clearAuthorization();
|
|
847
872
|
const replyStartedAt = Date.now();
|
|
848
873
|
const trNodeResult = await this.agent.transcriptionNode(llmOutput, modelSettings);
|
|
849
874
|
let textOut = null;
|
|
@@ -890,7 +915,6 @@ ${instructions}` : instructions,
|
|
|
890
915
|
onToolExecutionStarted,
|
|
891
916
|
onToolExecutionCompleted
|
|
892
917
|
});
|
|
893
|
-
tasks.push(executeToolsTask);
|
|
894
918
|
await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
|
|
895
919
|
if (audioOutput) {
|
|
896
920
|
await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
|
|
@@ -945,7 +969,7 @@ ${instructions}` : instructions,
|
|
|
945
969
|
{ speech_id: speechHandle.id, message: forwardedText },
|
|
946
970
|
"playout completed with interrupt"
|
|
947
971
|
);
|
|
948
|
-
speechHandle.
|
|
972
|
+
speechHandle._markGenerationDone();
|
|
949
973
|
await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
950
974
|
return;
|
|
951
975
|
}
|
|
@@ -970,11 +994,11 @@ ${instructions}` : instructions,
|
|
|
970
994
|
} else if (this.agentSession.agentState === "speaking") {
|
|
971
995
|
this.agentSession._updateAgentState("listening");
|
|
972
996
|
}
|
|
973
|
-
speechHandle.
|
|
997
|
+
speechHandle._markGenerationDone();
|
|
974
998
|
await executeToolsTask.result;
|
|
975
999
|
if (toolOutput.output.length === 0) return;
|
|
976
1000
|
const { maxToolSteps } = this.agentSession.options;
|
|
977
|
-
if (speechHandle.
|
|
1001
|
+
if (speechHandle.numSteps >= maxToolSteps) {
|
|
978
1002
|
this.logger.warn(
|
|
979
1003
|
{ speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
|
|
980
1004
|
"maximum number of function calls steps reached"
|
|
@@ -1029,7 +1053,7 @@ ${instructions}` : instructions,
|
|
|
1029
1053
|
chatCtx.insert(toolMessages);
|
|
1030
1054
|
const handle = SpeechHandle.create({
|
|
1031
1055
|
allowInterruptions: speechHandle.allowInterruptions,
|
|
1032
|
-
stepIndex: speechHandle.
|
|
1056
|
+
stepIndex: speechHandle._stepIndex + 1,
|
|
1033
1057
|
parent: speechHandle
|
|
1034
1058
|
});
|
|
1035
1059
|
this.agentSession.emit(
|
|
@@ -1042,14 +1066,17 @@ ${instructions}` : instructions,
|
|
|
1042
1066
|
);
|
|
1043
1067
|
const respondToolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
|
|
1044
1068
|
const toolResponseTask = this.createSpeechTask({
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1069
|
+
task: Task.from(
|
|
1070
|
+
() => this.pipelineReplyTask(
|
|
1071
|
+
handle,
|
|
1072
|
+
chatCtx,
|
|
1073
|
+
toolCtx,
|
|
1074
|
+
{ toolChoice: respondToolChoice },
|
|
1075
|
+
replyAbortController,
|
|
1076
|
+
instructions,
|
|
1077
|
+
void 0,
|
|
1078
|
+
toolMessages
|
|
1079
|
+
)
|
|
1053
1080
|
),
|
|
1054
1081
|
ownedSpeechHandle: handle,
|
|
1055
1082
|
name: "AgentActivity.pipelineReply"
|
|
@@ -1063,7 +1090,7 @@ ${instructions}` : instructions,
|
|
|
1063
1090
|
this.agent._chatCtx.insert(toolMessages);
|
|
1064
1091
|
}
|
|
1065
1092
|
}
|
|
1066
|
-
async realtimeGenerationTask(speechHandle, ev, modelSettings) {
|
|
1093
|
+
async realtimeGenerationTask(speechHandle, ev, modelSettings, replyAbortController) {
|
|
1067
1094
|
var _a, _b, _c;
|
|
1068
1095
|
speechHandleStorage.enterWith(speechHandle);
|
|
1069
1096
|
if (!this.realtimeSession) {
|
|
@@ -1073,20 +1100,20 @@ ${instructions}` : instructions,
|
|
|
1073
1100
|
throw new Error("llm is not a realtime model");
|
|
1074
1101
|
}
|
|
1075
1102
|
this.logger.debug(
|
|
1076
|
-
{ speech_id: speechHandle.id, stepIndex: speechHandle.
|
|
1103
|
+
{ speech_id: speechHandle.id, stepIndex: speechHandle.numSteps },
|
|
1077
1104
|
"realtime generation started"
|
|
1078
1105
|
);
|
|
1079
1106
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
1080
1107
|
const textOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
1081
1108
|
const toolCtx = this.realtimeSession.tools;
|
|
1082
1109
|
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
1110
|
+
speechHandle._clearAuthorization();
|
|
1083
1111
|
if (speechHandle.interrupted) {
|
|
1084
1112
|
return;
|
|
1085
1113
|
}
|
|
1086
1114
|
const onFirstFrame = () => {
|
|
1087
1115
|
this.agentSession._updateAgentState("speaking");
|
|
1088
1116
|
};
|
|
1089
|
-
const replyAbortController = new AbortController();
|
|
1090
1117
|
const readMessages = async (abortController, outputs) => {
|
|
1091
1118
|
const forwardTasks = [];
|
|
1092
1119
|
try {
|
|
@@ -1170,9 +1197,13 @@ ${instructions}` : instructions,
|
|
|
1170
1197
|
"AgentActivity.realtime_generation.read_tool_stream"
|
|
1171
1198
|
)
|
|
1172
1199
|
);
|
|
1173
|
-
const onToolExecutionStarted = (
|
|
1200
|
+
const onToolExecutionStarted = (f) => {
|
|
1201
|
+
speechHandle._itemAdded([f]);
|
|
1174
1202
|
};
|
|
1175
|
-
const onToolExecutionCompleted = (
|
|
1203
|
+
const onToolExecutionCompleted = (out) => {
|
|
1204
|
+
if (out.toolCallOutput) {
|
|
1205
|
+
speechHandle._itemAdded([out.toolCallOutput]);
|
|
1206
|
+
}
|
|
1176
1207
|
};
|
|
1177
1208
|
const [executeToolsTask, toolOutput] = performToolExecutions({
|
|
1178
1209
|
session: this.agentSession,
|
|
@@ -1228,7 +1259,7 @@ ${instructions}` : instructions,
|
|
|
1228
1259
|
interrupted: true
|
|
1229
1260
|
});
|
|
1230
1261
|
this.agent._chatCtx.insert(message);
|
|
1231
|
-
speechHandle.
|
|
1262
|
+
speechHandle._itemAdded([message]);
|
|
1232
1263
|
this.agentSession._conversationItemAdded(message);
|
|
1233
1264
|
}
|
|
1234
1265
|
this.logger.info(
|
|
@@ -1236,7 +1267,7 @@ ${instructions}` : instructions,
|
|
|
1236
1267
|
"playout completed with interrupt"
|
|
1237
1268
|
);
|
|
1238
1269
|
}
|
|
1239
|
-
speechHandle.
|
|
1270
|
+
speechHandle._markGenerationDone();
|
|
1240
1271
|
await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
1241
1272
|
return;
|
|
1242
1273
|
}
|
|
@@ -1249,17 +1280,17 @@ ${instructions}` : instructions,
|
|
|
1249
1280
|
interrupted: false
|
|
1250
1281
|
});
|
|
1251
1282
|
this.agent._chatCtx.insert(message);
|
|
1252
|
-
speechHandle.
|
|
1283
|
+
speechHandle._itemAdded([message]);
|
|
1253
1284
|
this.agentSession._conversationItemAdded(message);
|
|
1254
1285
|
}
|
|
1255
|
-
speechHandle.
|
|
1286
|
+
speechHandle._markGenerationDone();
|
|
1256
1287
|
toolOutput.firstToolStartedFuture.await.finally(() => {
|
|
1257
1288
|
this.agentSession._updateAgentState("thinking");
|
|
1258
1289
|
});
|
|
1259
1290
|
await executeToolsTask.result;
|
|
1260
1291
|
if (toolOutput.output.length === 0) return;
|
|
1261
1292
|
const { maxToolSteps } = this.agentSession.options;
|
|
1262
|
-
if (speechHandle.
|
|
1293
|
+
if (speechHandle.numSteps >= maxToolSteps) {
|
|
1263
1294
|
this.logger.warn(
|
|
1264
1295
|
{ speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
|
|
1265
1296
|
"maximum number of function calls steps reached"
|
|
@@ -1323,7 +1354,7 @@ ${instructions}` : instructions,
|
|
|
1323
1354
|
this.realtimeSession.interrupt();
|
|
1324
1355
|
const replySpeechHandle = SpeechHandle.create({
|
|
1325
1356
|
allowInterruptions: speechHandle.allowInterruptions,
|
|
1326
|
-
stepIndex: speechHandle.
|
|
1357
|
+
stepIndex: speechHandle.numSteps + 1,
|
|
1327
1358
|
parent: speechHandle
|
|
1328
1359
|
});
|
|
1329
1360
|
this.agentSession.emit(
|
|
@@ -1336,10 +1367,13 @@ ${instructions}` : instructions,
|
|
|
1336
1367
|
);
|
|
1337
1368
|
const toolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
|
|
1338
1369
|
this.createSpeechTask({
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1370
|
+
task: Task.from(
|
|
1371
|
+
(abortController) => this.realtimeReplyTask({
|
|
1372
|
+
speechHandle: replySpeechHandle,
|
|
1373
|
+
modelSettings: { toolChoice },
|
|
1374
|
+
abortController
|
|
1375
|
+
})
|
|
1376
|
+
),
|
|
1343
1377
|
ownedSpeechHandle: replySpeechHandle,
|
|
1344
1378
|
name: "AgentActivity.realtime_reply"
|
|
1345
1379
|
});
|
|
@@ -1349,7 +1383,8 @@ ${instructions}` : instructions,
|
|
|
1349
1383
|
speechHandle,
|
|
1350
1384
|
modelSettings: { toolChoice },
|
|
1351
1385
|
userInput,
|
|
1352
|
-
instructions
|
|
1386
|
+
instructions,
|
|
1387
|
+
abortController
|
|
1353
1388
|
}) {
|
|
1354
1389
|
speechHandleStorage.enterWith(speechHandle);
|
|
1355
1390
|
if (!this.realtimeSession) {
|
|
@@ -1372,18 +1407,24 @@ ${instructions}` : instructions,
|
|
|
1372
1407
|
}
|
|
1373
1408
|
try {
|
|
1374
1409
|
const generationEvent = await this.realtimeSession.generateReply(instructions);
|
|
1375
|
-
await this.realtimeGenerationTask(
|
|
1410
|
+
await this.realtimeGenerationTask(
|
|
1411
|
+
speechHandle,
|
|
1412
|
+
generationEvent,
|
|
1413
|
+
{ toolChoice },
|
|
1414
|
+
abortController
|
|
1415
|
+
);
|
|
1376
1416
|
} finally {
|
|
1377
1417
|
if (toolChoice !== void 0 && toolChoice !== originalToolChoice) {
|
|
1378
1418
|
this.realtimeSession.updateOptions({ toolChoice: originalToolChoice });
|
|
1379
1419
|
}
|
|
1380
1420
|
}
|
|
1381
1421
|
}
|
|
1382
|
-
scheduleSpeech(speechHandle, priority,
|
|
1383
|
-
if (this.draining && !
|
|
1422
|
+
scheduleSpeech(speechHandle, priority, force = false) {
|
|
1423
|
+
if (this.draining && !force) {
|
|
1384
1424
|
throw new Error("cannot schedule new speech, the agent is draining");
|
|
1385
1425
|
}
|
|
1386
1426
|
this.speechQueue.push([priority, Number(process.hrtime.bigint()), speechHandle]);
|
|
1427
|
+
speechHandle._markScheduled();
|
|
1387
1428
|
this.wakeupMainTask();
|
|
1388
1429
|
}
|
|
1389
1430
|
async drain() {
|
|
@@ -1392,7 +1433,7 @@ ${instructions}` : instructions,
|
|
|
1392
1433
|
try {
|
|
1393
1434
|
if (this._draining) return;
|
|
1394
1435
|
this.createSpeechTask({
|
|
1395
|
-
|
|
1436
|
+
task: Task.from(() => this.agent.onExit()),
|
|
1396
1437
|
name: "AgentActivity_onExit"
|
|
1397
1438
|
});
|
|
1398
1439
|
this.wakeupMainTask();
|