@livekit/agents 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +2 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -3
- package/dist/index.d.ts +2 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -3
- package/dist/index.js.map +1 -1
- package/dist/tokenize/basic/hyphenator.cjs.map +1 -1
- package/dist/tokenize/basic/hyphenator.js.map +1 -1
- package/dist/utils.cjs +77 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +21 -0
- package/dist/utils.d.ts +21 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +76 -1
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +107 -71
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +107 -71
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/avatar/datastream_io.cjs +204 -0
- package/dist/voice/avatar/datastream_io.cjs.map +1 -0
- package/dist/voice/avatar/datastream_io.d.cts +37 -0
- package/dist/voice/avatar/datastream_io.d.ts +37 -0
- package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
- package/dist/voice/avatar/datastream_io.js +188 -0
- package/dist/voice/avatar/datastream_io.js.map +1 -0
- package/dist/{multimodal → voice/avatar}/index.cjs +4 -4
- package/dist/voice/avatar/index.cjs.map +1 -0
- package/dist/voice/avatar/index.d.cts +2 -0
- package/dist/voice/avatar/index.d.ts +2 -0
- package/dist/voice/avatar/index.d.ts.map +1 -0
- package/dist/voice/avatar/index.js +2 -0
- package/dist/voice/avatar/index.js.map +1 -0
- package/dist/voice/index.cjs +2 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +1 -1
- package/dist/voice/io.d.ts +1 -1
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/room_io/_input.cjs +3 -1
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +3 -1
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/run_context.cjs +13 -0
- package/dist/voice/run_context.cjs.map +1 -1
- package/dist/voice/run_context.d.cts +10 -0
- package/dist/voice/run_context.d.ts +10 -0
- package/dist/voice/run_context.d.ts.map +1 -1
- package/dist/voice/run_context.js +13 -0
- package/dist/voice/run_context.js.map +1 -1
- package/dist/voice/speech_handle.cjs +152 -30
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +67 -16
- package/dist/voice/speech_handle.d.ts +67 -16
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +153 -31
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/worker.cjs +4 -1
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +4 -1
- package/dist/worker.js.map +1 -1
- package/package.json +2 -2
- package/src/index.ts +2 -3
- package/src/tokenize/basic/hyphenator.ts +1 -1
- package/src/utils.ts +121 -1
- package/src/voice/agent_activity.ts +122 -78
- package/src/voice/avatar/datastream_io.ts +247 -0
- package/src/voice/avatar/index.ts +4 -0
- package/src/voice/index.ts +2 -0
- package/src/voice/io.ts +1 -1
- package/src/voice/room_io/_input.ts +9 -3
- package/src/voice/run_context.ts +16 -2
- package/src/voice/speech_handle.ts +183 -38
- package/src/worker.ts +5 -1
- package/dist/multimodal/agent_playout.cjs +0 -233
- package/dist/multimodal/agent_playout.cjs.map +0 -1
- package/dist/multimodal/agent_playout.d.cts +0 -34
- package/dist/multimodal/agent_playout.d.ts +0 -34
- package/dist/multimodal/agent_playout.d.ts.map +0 -1
- package/dist/multimodal/agent_playout.js +0 -207
- package/dist/multimodal/agent_playout.js.map +0 -1
- package/dist/multimodal/index.cjs.map +0 -1
- package/dist/multimodal/index.d.cts +0 -2
- package/dist/multimodal/index.d.ts +0 -2
- package/dist/multimodal/index.d.ts.map +0 -1
- package/dist/multimodal/index.js +0 -2
- package/dist/multimodal/index.js.map +0 -1
- package/src/multimodal/agent_playout.ts +0 -266
- package/src/multimodal/index.ts +0 -4
|
@@ -190,7 +190,7 @@ class AgentActivity {
|
|
|
190
190
|
this.started = true;
|
|
191
191
|
this._mainTask = Task.from(({ signal }) => this.mainTask(signal));
|
|
192
192
|
this.createSpeechTask({
|
|
193
|
-
|
|
193
|
+
task: Task.from(() => this.agent.onEnter()),
|
|
194
194
|
name: "AgentActivity_onEnter"
|
|
195
195
|
});
|
|
196
196
|
} finally {
|
|
@@ -309,7 +309,9 @@ class AgentActivity {
|
|
|
309
309
|
})
|
|
310
310
|
);
|
|
311
311
|
const task = this.createSpeechTask({
|
|
312
|
-
|
|
312
|
+
task: Task.from(
|
|
313
|
+
(abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio)
|
|
314
|
+
),
|
|
313
315
|
ownedSpeechHandle: handle,
|
|
314
316
|
name: "AgentActivity.say_tts"
|
|
315
317
|
});
|
|
@@ -413,7 +415,9 @@ class AgentActivity {
|
|
|
413
415
|
);
|
|
414
416
|
this.logger.info({ speech_id: handle.id }, "Creating speech handle");
|
|
415
417
|
this.createSpeechTask({
|
|
416
|
-
|
|
418
|
+
task: Task.from(
|
|
419
|
+
(abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController)
|
|
420
|
+
),
|
|
417
421
|
ownedSpeechHandle: handle,
|
|
418
422
|
name: "AgentActivity.realtimeGeneration"
|
|
419
423
|
});
|
|
@@ -477,16 +481,23 @@ class AgentActivity {
|
|
|
477
481
|
);
|
|
478
482
|
}
|
|
479
483
|
createSpeechTask(options) {
|
|
480
|
-
const {
|
|
481
|
-
this.speechTasks.add(
|
|
482
|
-
|
|
483
|
-
this.speechTasks.delete(
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
484
|
+
const { task, ownedSpeechHandle } = options;
|
|
485
|
+
this.speechTasks.add(task);
|
|
486
|
+
task.addDoneCallback(() => {
|
|
487
|
+
this.speechTasks.delete(task);
|
|
488
|
+
});
|
|
489
|
+
if (ownedSpeechHandle) {
|
|
490
|
+
ownedSpeechHandle._tasks.push(task);
|
|
491
|
+
task.addDoneCallback(() => {
|
|
492
|
+
if (ownedSpeechHandle._tasks.every((t) => t.done)) {
|
|
493
|
+
ownedSpeechHandle._markDone();
|
|
494
|
+
}
|
|
495
|
+
});
|
|
496
|
+
}
|
|
497
|
+
task.addDoneCallback(() => {
|
|
487
498
|
this.wakeupMainTask();
|
|
488
499
|
});
|
|
489
|
-
return
|
|
500
|
+
return task.result;
|
|
490
501
|
}
|
|
491
502
|
async onEndOfTurn(info) {
|
|
492
503
|
if (this.draining) {
|
|
@@ -499,7 +510,7 @@ class AgentActivity {
|
|
|
499
510
|
}
|
|
500
511
|
const oldTask = this._userTurnCompletedTask;
|
|
501
512
|
this._userTurnCompletedTask = this.createSpeechTask({
|
|
502
|
-
|
|
513
|
+
task: Task.from(() => this.userTurnCompleted(info, oldTask)),
|
|
503
514
|
name: "AgentActivity.userTurnCompleted"
|
|
504
515
|
});
|
|
505
516
|
return true;
|
|
@@ -525,8 +536,8 @@ class AgentActivity {
|
|
|
525
536
|
}
|
|
526
537
|
const speechHandle = heapItem[2];
|
|
527
538
|
this._currentSpeech = speechHandle;
|
|
528
|
-
speechHandle.
|
|
529
|
-
await speechHandle.
|
|
539
|
+
speechHandle._authorizeGeneration();
|
|
540
|
+
await speechHandle._waitForGeneration();
|
|
530
541
|
this._currentSpeech = void 0;
|
|
531
542
|
}
|
|
532
543
|
if (this.draining && this.speechTasks.size === 0) {
|
|
@@ -579,16 +590,19 @@ class AgentActivity {
|
|
|
579
590
|
this.logger.info({ speech_id: handle.id }, "Creating speech handle");
|
|
580
591
|
if (this.llm instanceof RealtimeModel) {
|
|
581
592
|
this.createSpeechTask({
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
593
|
+
task: Task.from(
|
|
594
|
+
(abortController) => this.realtimeReplyTask({
|
|
595
|
+
speechHandle: handle,
|
|
596
|
+
// TODO(brian): support llm.ChatMessage for the realtime model
|
|
597
|
+
userInput: userMessage == null ? void 0 : userMessage.textContent,
|
|
598
|
+
instructions,
|
|
599
|
+
modelSettings: {
|
|
600
|
+
// isGiven(toolChoice) = toolChoice !== undefined
|
|
601
|
+
toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
|
|
602
|
+
},
|
|
603
|
+
abortController
|
|
604
|
+
})
|
|
605
|
+
),
|
|
592
606
|
ownedSpeechHandle: handle,
|
|
593
607
|
name: "AgentActivity.realtimeReply"
|
|
594
608
|
});
|
|
@@ -598,14 +612,19 @@ class AgentActivity {
|
|
|
598
612
|
${instructions}`;
|
|
599
613
|
}
|
|
600
614
|
const task = this.createSpeechTask({
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
615
|
+
task: Task.from(
|
|
616
|
+
(abortController) => this.pipelineReplyTask(
|
|
617
|
+
handle,
|
|
618
|
+
chatCtx ?? this.agent.chatCtx,
|
|
619
|
+
this.agent.toolCtx,
|
|
620
|
+
{
|
|
621
|
+
toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
|
|
622
|
+
},
|
|
623
|
+
abortController,
|
|
624
|
+
instructions ? `${this.agent.instructions}
|
|
607
625
|
${instructions}` : instructions,
|
|
608
|
-
|
|
626
|
+
userMessage
|
|
627
|
+
)
|
|
609
628
|
),
|
|
610
629
|
ownedSpeechHandle: handle,
|
|
611
630
|
name: "AgentActivity.pipelineReply"
|
|
@@ -627,7 +646,7 @@ ${instructions}` : instructions,
|
|
|
627
646
|
if (currentSpeech === void 0) {
|
|
628
647
|
future.resolve();
|
|
629
648
|
} else {
|
|
630
|
-
currentSpeech.
|
|
649
|
+
currentSpeech.addDoneCallback(() => {
|
|
631
650
|
if (future.done) return;
|
|
632
651
|
future.resolve();
|
|
633
652
|
});
|
|
@@ -635,7 +654,7 @@ ${instructions}` : instructions,
|
|
|
635
654
|
return future;
|
|
636
655
|
}
|
|
637
656
|
onPipelineReplyDone() {
|
|
638
|
-
if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done)) {
|
|
657
|
+
if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done())) {
|
|
639
658
|
this.agentSession._updateAgentState("listening");
|
|
640
659
|
}
|
|
641
660
|
}
|
|
@@ -699,11 +718,10 @@ ${instructions}` : instructions,
|
|
|
699
718
|
createMetricsCollectedEvent({ metrics: eouMetrics })
|
|
700
719
|
);
|
|
701
720
|
}
|
|
702
|
-
async ttsTask(speechHandle, text, addToChatCtx, modelSettings, audio) {
|
|
721
|
+
async ttsTask(speechHandle, text, addToChatCtx, modelSettings, replyAbortController, audio) {
|
|
703
722
|
speechHandleStorage.enterWith(speechHandle);
|
|
704
723
|
const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
705
724
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
706
|
-
const replyAbortController = new AbortController();
|
|
707
725
|
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
708
726
|
if (speechHandle.interrupted) {
|
|
709
727
|
return;
|
|
@@ -792,10 +810,9 @@ ${instructions}` : instructions,
|
|
|
792
810
|
this.agentSession._updateAgentState("listening");
|
|
793
811
|
}
|
|
794
812
|
}
|
|
795
|
-
async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, instructions, newMessage, toolsMessages) {
|
|
813
|
+
async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) {
|
|
796
814
|
var _a, _b, _c;
|
|
797
815
|
speechHandleStorage.enterWith(speechHandle);
|
|
798
|
-
const replyAbortController = new AbortController();
|
|
799
816
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
800
817
|
const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
801
818
|
chatCtx = chatCtx.copy();
|
|
@@ -838,12 +855,15 @@ ${instructions}` : instructions,
|
|
|
838
855
|
);
|
|
839
856
|
tasks.push(ttsTask);
|
|
840
857
|
}
|
|
841
|
-
await speechHandle.waitIfNotInterrupted([speechHandle.
|
|
858
|
+
await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
|
|
842
859
|
if (speechHandle.interrupted) {
|
|
843
860
|
replyAbortController.abort();
|
|
844
861
|
await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
845
862
|
return;
|
|
846
863
|
}
|
|
864
|
+
this.agentSession._updateAgentState("thinking");
|
|
865
|
+
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
866
|
+
speechHandle._clearAuthorization();
|
|
847
867
|
const replyStartedAt = Date.now();
|
|
848
868
|
const trNodeResult = await this.agent.transcriptionNode(llmOutput, modelSettings);
|
|
849
869
|
let textOut = null;
|
|
@@ -890,7 +910,6 @@ ${instructions}` : instructions,
|
|
|
890
910
|
onToolExecutionStarted,
|
|
891
911
|
onToolExecutionCompleted
|
|
892
912
|
});
|
|
893
|
-
tasks.push(executeToolsTask);
|
|
894
913
|
await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
|
|
895
914
|
if (audioOutput) {
|
|
896
915
|
await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
|
|
@@ -945,7 +964,7 @@ ${instructions}` : instructions,
|
|
|
945
964
|
{ speech_id: speechHandle.id, message: forwardedText },
|
|
946
965
|
"playout completed with interrupt"
|
|
947
966
|
);
|
|
948
|
-
speechHandle.
|
|
967
|
+
speechHandle._markGenerationDone();
|
|
949
968
|
await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
950
969
|
return;
|
|
951
970
|
}
|
|
@@ -970,11 +989,11 @@ ${instructions}` : instructions,
|
|
|
970
989
|
} else if (this.agentSession.agentState === "speaking") {
|
|
971
990
|
this.agentSession._updateAgentState("listening");
|
|
972
991
|
}
|
|
973
|
-
speechHandle.
|
|
992
|
+
speechHandle._markGenerationDone();
|
|
974
993
|
await executeToolsTask.result;
|
|
975
994
|
if (toolOutput.output.length === 0) return;
|
|
976
995
|
const { maxToolSteps } = this.agentSession.options;
|
|
977
|
-
if (speechHandle.
|
|
996
|
+
if (speechHandle.numSteps >= maxToolSteps) {
|
|
978
997
|
this.logger.warn(
|
|
979
998
|
{ speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
|
|
980
999
|
"maximum number of function calls steps reached"
|
|
@@ -1029,7 +1048,7 @@ ${instructions}` : instructions,
|
|
|
1029
1048
|
chatCtx.insert(toolMessages);
|
|
1030
1049
|
const handle = SpeechHandle.create({
|
|
1031
1050
|
allowInterruptions: speechHandle.allowInterruptions,
|
|
1032
|
-
stepIndex: speechHandle.
|
|
1051
|
+
stepIndex: speechHandle._stepIndex + 1,
|
|
1033
1052
|
parent: speechHandle
|
|
1034
1053
|
});
|
|
1035
1054
|
this.agentSession.emit(
|
|
@@ -1042,14 +1061,17 @@ ${instructions}` : instructions,
|
|
|
1042
1061
|
);
|
|
1043
1062
|
const respondToolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
|
|
1044
1063
|
const toolResponseTask = this.createSpeechTask({
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1064
|
+
task: Task.from(
|
|
1065
|
+
() => this.pipelineReplyTask(
|
|
1066
|
+
handle,
|
|
1067
|
+
chatCtx,
|
|
1068
|
+
toolCtx,
|
|
1069
|
+
{ toolChoice: respondToolChoice },
|
|
1070
|
+
replyAbortController,
|
|
1071
|
+
instructions,
|
|
1072
|
+
void 0,
|
|
1073
|
+
toolMessages
|
|
1074
|
+
)
|
|
1053
1075
|
),
|
|
1054
1076
|
ownedSpeechHandle: handle,
|
|
1055
1077
|
name: "AgentActivity.pipelineReply"
|
|
@@ -1063,7 +1085,7 @@ ${instructions}` : instructions,
|
|
|
1063
1085
|
this.agent._chatCtx.insert(toolMessages);
|
|
1064
1086
|
}
|
|
1065
1087
|
}
|
|
1066
|
-
async realtimeGenerationTask(speechHandle, ev, modelSettings) {
|
|
1088
|
+
async realtimeGenerationTask(speechHandle, ev, modelSettings, replyAbortController) {
|
|
1067
1089
|
var _a, _b, _c;
|
|
1068
1090
|
speechHandleStorage.enterWith(speechHandle);
|
|
1069
1091
|
if (!this.realtimeSession) {
|
|
@@ -1073,20 +1095,20 @@ ${instructions}` : instructions,
|
|
|
1073
1095
|
throw new Error("llm is not a realtime model");
|
|
1074
1096
|
}
|
|
1075
1097
|
this.logger.debug(
|
|
1076
|
-
{ speech_id: speechHandle.id, stepIndex: speechHandle.
|
|
1098
|
+
{ speech_id: speechHandle.id, stepIndex: speechHandle.numSteps },
|
|
1077
1099
|
"realtime generation started"
|
|
1078
1100
|
);
|
|
1079
1101
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
1080
1102
|
const textOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
1081
1103
|
const toolCtx = this.realtimeSession.tools;
|
|
1082
1104
|
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
1105
|
+
speechHandle._clearAuthorization();
|
|
1083
1106
|
if (speechHandle.interrupted) {
|
|
1084
1107
|
return;
|
|
1085
1108
|
}
|
|
1086
1109
|
const onFirstFrame = () => {
|
|
1087
1110
|
this.agentSession._updateAgentState("speaking");
|
|
1088
1111
|
};
|
|
1089
|
-
const replyAbortController = new AbortController();
|
|
1090
1112
|
const readMessages = async (abortController, outputs) => {
|
|
1091
1113
|
const forwardTasks = [];
|
|
1092
1114
|
try {
|
|
@@ -1170,9 +1192,13 @@ ${instructions}` : instructions,
|
|
|
1170
1192
|
"AgentActivity.realtime_generation.read_tool_stream"
|
|
1171
1193
|
)
|
|
1172
1194
|
);
|
|
1173
|
-
const onToolExecutionStarted = (
|
|
1195
|
+
const onToolExecutionStarted = (f) => {
|
|
1196
|
+
speechHandle._itemAdded([f]);
|
|
1174
1197
|
};
|
|
1175
|
-
const onToolExecutionCompleted = (
|
|
1198
|
+
const onToolExecutionCompleted = (out) => {
|
|
1199
|
+
if (out.toolCallOutput) {
|
|
1200
|
+
speechHandle._itemAdded([out.toolCallOutput]);
|
|
1201
|
+
}
|
|
1176
1202
|
};
|
|
1177
1203
|
const [executeToolsTask, toolOutput] = performToolExecutions({
|
|
1178
1204
|
session: this.agentSession,
|
|
@@ -1228,7 +1254,7 @@ ${instructions}` : instructions,
|
|
|
1228
1254
|
interrupted: true
|
|
1229
1255
|
});
|
|
1230
1256
|
this.agent._chatCtx.insert(message);
|
|
1231
|
-
speechHandle.
|
|
1257
|
+
speechHandle._itemAdded([message]);
|
|
1232
1258
|
this.agentSession._conversationItemAdded(message);
|
|
1233
1259
|
}
|
|
1234
1260
|
this.logger.info(
|
|
@@ -1236,7 +1262,7 @@ ${instructions}` : instructions,
|
|
|
1236
1262
|
"playout completed with interrupt"
|
|
1237
1263
|
);
|
|
1238
1264
|
}
|
|
1239
|
-
speechHandle.
|
|
1265
|
+
speechHandle._markGenerationDone();
|
|
1240
1266
|
await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
1241
1267
|
return;
|
|
1242
1268
|
}
|
|
@@ -1249,17 +1275,17 @@ ${instructions}` : instructions,
|
|
|
1249
1275
|
interrupted: false
|
|
1250
1276
|
});
|
|
1251
1277
|
this.agent._chatCtx.insert(message);
|
|
1252
|
-
speechHandle.
|
|
1278
|
+
speechHandle._itemAdded([message]);
|
|
1253
1279
|
this.agentSession._conversationItemAdded(message);
|
|
1254
1280
|
}
|
|
1255
|
-
speechHandle.
|
|
1281
|
+
speechHandle._markGenerationDone();
|
|
1256
1282
|
toolOutput.firstToolStartedFuture.await.finally(() => {
|
|
1257
1283
|
this.agentSession._updateAgentState("thinking");
|
|
1258
1284
|
});
|
|
1259
1285
|
await executeToolsTask.result;
|
|
1260
1286
|
if (toolOutput.output.length === 0) return;
|
|
1261
1287
|
const { maxToolSteps } = this.agentSession.options;
|
|
1262
|
-
if (speechHandle.
|
|
1288
|
+
if (speechHandle.numSteps >= maxToolSteps) {
|
|
1263
1289
|
this.logger.warn(
|
|
1264
1290
|
{ speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
|
|
1265
1291
|
"maximum number of function calls steps reached"
|
|
@@ -1323,7 +1349,7 @@ ${instructions}` : instructions,
|
|
|
1323
1349
|
this.realtimeSession.interrupt();
|
|
1324
1350
|
const replySpeechHandle = SpeechHandle.create({
|
|
1325
1351
|
allowInterruptions: speechHandle.allowInterruptions,
|
|
1326
|
-
stepIndex: speechHandle.
|
|
1352
|
+
stepIndex: speechHandle.numSteps + 1,
|
|
1327
1353
|
parent: speechHandle
|
|
1328
1354
|
});
|
|
1329
1355
|
this.agentSession.emit(
|
|
@@ -1336,10 +1362,13 @@ ${instructions}` : instructions,
|
|
|
1336
1362
|
);
|
|
1337
1363
|
const toolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
|
|
1338
1364
|
this.createSpeechTask({
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1365
|
+
task: Task.from(
|
|
1366
|
+
(abortController) => this.realtimeReplyTask({
|
|
1367
|
+
speechHandle: replySpeechHandle,
|
|
1368
|
+
modelSettings: { toolChoice },
|
|
1369
|
+
abortController
|
|
1370
|
+
})
|
|
1371
|
+
),
|
|
1343
1372
|
ownedSpeechHandle: replySpeechHandle,
|
|
1344
1373
|
name: "AgentActivity.realtime_reply"
|
|
1345
1374
|
});
|
|
@@ -1349,7 +1378,8 @@ ${instructions}` : instructions,
|
|
|
1349
1378
|
speechHandle,
|
|
1350
1379
|
modelSettings: { toolChoice },
|
|
1351
1380
|
userInput,
|
|
1352
|
-
instructions
|
|
1381
|
+
instructions,
|
|
1382
|
+
abortController
|
|
1353
1383
|
}) {
|
|
1354
1384
|
speechHandleStorage.enterWith(speechHandle);
|
|
1355
1385
|
if (!this.realtimeSession) {
|
|
@@ -1372,18 +1402,24 @@ ${instructions}` : instructions,
|
|
|
1372
1402
|
}
|
|
1373
1403
|
try {
|
|
1374
1404
|
const generationEvent = await this.realtimeSession.generateReply(instructions);
|
|
1375
|
-
await this.realtimeGenerationTask(
|
|
1405
|
+
await this.realtimeGenerationTask(
|
|
1406
|
+
speechHandle,
|
|
1407
|
+
generationEvent,
|
|
1408
|
+
{ toolChoice },
|
|
1409
|
+
abortController
|
|
1410
|
+
);
|
|
1376
1411
|
} finally {
|
|
1377
1412
|
if (toolChoice !== void 0 && toolChoice !== originalToolChoice) {
|
|
1378
1413
|
this.realtimeSession.updateOptions({ toolChoice: originalToolChoice });
|
|
1379
1414
|
}
|
|
1380
1415
|
}
|
|
1381
1416
|
}
|
|
1382
|
-
scheduleSpeech(speechHandle, priority,
|
|
1383
|
-
if (this.draining && !
|
|
1417
|
+
scheduleSpeech(speechHandle, priority, force = false) {
|
|
1418
|
+
if (this.draining && !force) {
|
|
1384
1419
|
throw new Error("cannot schedule new speech, the agent is draining");
|
|
1385
1420
|
}
|
|
1386
1421
|
this.speechQueue.push([priority, Number(process.hrtime.bigint()), speechHandle]);
|
|
1422
|
+
speechHandle._markScheduled();
|
|
1387
1423
|
this.wakeupMainTask();
|
|
1388
1424
|
}
|
|
1389
1425
|
async drain() {
|
|
@@ -1392,7 +1428,7 @@ ${instructions}` : instructions,
|
|
|
1392
1428
|
try {
|
|
1393
1429
|
if (this._draining) return;
|
|
1394
1430
|
this.createSpeechTask({
|
|
1395
|
-
|
|
1431
|
+
task: Task.from(() => this.agent.onExit()),
|
|
1396
1432
|
name: "AgentActivity_onExit"
|
|
1397
1433
|
});
|
|
1398
1434
|
this.wakeupMainTask();
|