@livekit/agents 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +2 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -3
- package/dist/index.d.ts +2 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -3
- package/dist/index.js.map +1 -1
- package/dist/tokenize/basic/hyphenator.cjs.map +1 -1
- package/dist/tokenize/basic/hyphenator.js.map +1 -1
- package/dist/utils.cjs +77 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +21 -0
- package/dist/utils.d.ts +21 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +76 -1
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +107 -71
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +107 -71
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/avatar/datastream_io.cjs +204 -0
- package/dist/voice/avatar/datastream_io.cjs.map +1 -0
- package/dist/voice/avatar/datastream_io.d.cts +37 -0
- package/dist/voice/avatar/datastream_io.d.ts +37 -0
- package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
- package/dist/voice/avatar/datastream_io.js +188 -0
- package/dist/voice/avatar/datastream_io.js.map +1 -0
- package/dist/{multimodal → voice/avatar}/index.cjs +4 -4
- package/dist/voice/avatar/index.cjs.map +1 -0
- package/dist/voice/avatar/index.d.cts +2 -0
- package/dist/voice/avatar/index.d.ts +2 -0
- package/dist/voice/avatar/index.d.ts.map +1 -0
- package/dist/voice/avatar/index.js +2 -0
- package/dist/voice/avatar/index.js.map +1 -0
- package/dist/voice/index.cjs +2 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +1 -1
- package/dist/voice/io.d.ts +1 -1
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/room_io/_input.cjs +3 -1
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +3 -1
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/run_context.cjs +13 -0
- package/dist/voice/run_context.cjs.map +1 -1
- package/dist/voice/run_context.d.cts +10 -0
- package/dist/voice/run_context.d.ts +10 -0
- package/dist/voice/run_context.d.ts.map +1 -1
- package/dist/voice/run_context.js +13 -0
- package/dist/voice/run_context.js.map +1 -1
- package/dist/voice/speech_handle.cjs +152 -30
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +67 -16
- package/dist/voice/speech_handle.d.ts +67 -16
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +153 -31
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/worker.cjs +4 -1
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +4 -1
- package/dist/worker.js.map +1 -1
- package/package.json +2 -2
- package/src/index.ts +2 -3
- package/src/tokenize/basic/hyphenator.ts +1 -1
- package/src/utils.ts +121 -1
- package/src/voice/agent_activity.ts +122 -78
- package/src/voice/avatar/datastream_io.ts +247 -0
- package/src/voice/avatar/index.ts +4 -0
- package/src/voice/index.ts +2 -0
- package/src/voice/io.ts +1 -1
- package/src/voice/room_io/_input.ts +9 -3
- package/src/voice/run_context.ts +16 -2
- package/src/voice/speech_handle.ts +183 -38
- package/src/worker.ts +5 -1
- package/dist/multimodal/agent_playout.cjs +0 -233
- package/dist/multimodal/agent_playout.cjs.map +0 -1
- package/dist/multimodal/agent_playout.d.cts +0 -34
- package/dist/multimodal/agent_playout.d.ts +0 -34
- package/dist/multimodal/agent_playout.d.ts.map +0 -1
- package/dist/multimodal/agent_playout.js +0 -207
- package/dist/multimodal/agent_playout.js.map +0 -1
- package/dist/multimodal/index.cjs.map +0 -1
- package/dist/multimodal/index.d.cts +0 -2
- package/dist/multimodal/index.d.ts +0 -2
- package/dist/multimodal/index.d.ts.map +0 -1
- package/dist/multimodal/index.js +0 -2
- package/dist/multimodal/index.js.map +0 -1
- package/src/multimodal/agent_playout.ts +0 -266
- package/src/multimodal/index.ts +0 -4
|
@@ -193,7 +193,7 @@ class AgentActivity {
|
|
|
193
193
|
this.started = true;
|
|
194
194
|
this._mainTask = import_utils.Task.from(({ signal }) => this.mainTask(signal));
|
|
195
195
|
this.createSpeechTask({
|
|
196
|
-
|
|
196
|
+
task: import_utils.Task.from(() => this.agent.onEnter()),
|
|
197
197
|
name: "AgentActivity_onEnter"
|
|
198
198
|
});
|
|
199
199
|
} finally {
|
|
@@ -312,7 +312,9 @@ class AgentActivity {
|
|
|
312
312
|
})
|
|
313
313
|
);
|
|
314
314
|
const task = this.createSpeechTask({
|
|
315
|
-
|
|
315
|
+
task: import_utils.Task.from(
|
|
316
|
+
(abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio)
|
|
317
|
+
),
|
|
316
318
|
ownedSpeechHandle: handle,
|
|
317
319
|
name: "AgentActivity.say_tts"
|
|
318
320
|
});
|
|
@@ -416,7 +418,9 @@ class AgentActivity {
|
|
|
416
418
|
);
|
|
417
419
|
this.logger.info({ speech_id: handle.id }, "Creating speech handle");
|
|
418
420
|
this.createSpeechTask({
|
|
419
|
-
|
|
421
|
+
task: import_utils.Task.from(
|
|
422
|
+
(abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController)
|
|
423
|
+
),
|
|
420
424
|
ownedSpeechHandle: handle,
|
|
421
425
|
name: "AgentActivity.realtimeGeneration"
|
|
422
426
|
});
|
|
@@ -480,16 +484,23 @@ class AgentActivity {
|
|
|
480
484
|
);
|
|
481
485
|
}
|
|
482
486
|
createSpeechTask(options) {
|
|
483
|
-
const {
|
|
484
|
-
this.speechTasks.add(
|
|
485
|
-
|
|
486
|
-
this.speechTasks.delete(
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
487
|
+
const { task, ownedSpeechHandle } = options;
|
|
488
|
+
this.speechTasks.add(task);
|
|
489
|
+
task.addDoneCallback(() => {
|
|
490
|
+
this.speechTasks.delete(task);
|
|
491
|
+
});
|
|
492
|
+
if (ownedSpeechHandle) {
|
|
493
|
+
ownedSpeechHandle._tasks.push(task);
|
|
494
|
+
task.addDoneCallback(() => {
|
|
495
|
+
if (ownedSpeechHandle._tasks.every((t) => t.done)) {
|
|
496
|
+
ownedSpeechHandle._markDone();
|
|
497
|
+
}
|
|
498
|
+
});
|
|
499
|
+
}
|
|
500
|
+
task.addDoneCallback(() => {
|
|
490
501
|
this.wakeupMainTask();
|
|
491
502
|
});
|
|
492
|
-
return
|
|
503
|
+
return task.result;
|
|
493
504
|
}
|
|
494
505
|
async onEndOfTurn(info) {
|
|
495
506
|
if (this.draining) {
|
|
@@ -502,7 +513,7 @@ class AgentActivity {
|
|
|
502
513
|
}
|
|
503
514
|
const oldTask = this._userTurnCompletedTask;
|
|
504
515
|
this._userTurnCompletedTask = this.createSpeechTask({
|
|
505
|
-
|
|
516
|
+
task: import_utils.Task.from(() => this.userTurnCompleted(info, oldTask)),
|
|
506
517
|
name: "AgentActivity.userTurnCompleted"
|
|
507
518
|
});
|
|
508
519
|
return true;
|
|
@@ -528,8 +539,8 @@ class AgentActivity {
|
|
|
528
539
|
}
|
|
529
540
|
const speechHandle = heapItem[2];
|
|
530
541
|
this._currentSpeech = speechHandle;
|
|
531
|
-
speechHandle.
|
|
532
|
-
await speechHandle.
|
|
542
|
+
speechHandle._authorizeGeneration();
|
|
543
|
+
await speechHandle._waitForGeneration();
|
|
533
544
|
this._currentSpeech = void 0;
|
|
534
545
|
}
|
|
535
546
|
if (this.draining && this.speechTasks.size === 0) {
|
|
@@ -582,16 +593,19 @@ class AgentActivity {
|
|
|
582
593
|
this.logger.info({ speech_id: handle.id }, "Creating speech handle");
|
|
583
594
|
if (this.llm instanceof import_llm.RealtimeModel) {
|
|
584
595
|
this.createSpeechTask({
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
596
|
+
task: import_utils.Task.from(
|
|
597
|
+
(abortController) => this.realtimeReplyTask({
|
|
598
|
+
speechHandle: handle,
|
|
599
|
+
// TODO(brian): support llm.ChatMessage for the realtime model
|
|
600
|
+
userInput: userMessage == null ? void 0 : userMessage.textContent,
|
|
601
|
+
instructions,
|
|
602
|
+
modelSettings: {
|
|
603
|
+
// isGiven(toolChoice) = toolChoice !== undefined
|
|
604
|
+
toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
|
|
605
|
+
},
|
|
606
|
+
abortController
|
|
607
|
+
})
|
|
608
|
+
),
|
|
595
609
|
ownedSpeechHandle: handle,
|
|
596
610
|
name: "AgentActivity.realtimeReply"
|
|
597
611
|
});
|
|
@@ -601,14 +615,19 @@ class AgentActivity {
|
|
|
601
615
|
${instructions}`;
|
|
602
616
|
}
|
|
603
617
|
const task = this.createSpeechTask({
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
618
|
+
task: import_utils.Task.from(
|
|
619
|
+
(abortController) => this.pipelineReplyTask(
|
|
620
|
+
handle,
|
|
621
|
+
chatCtx ?? this.agent.chatCtx,
|
|
622
|
+
this.agent.toolCtx,
|
|
623
|
+
{
|
|
624
|
+
toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
|
|
625
|
+
},
|
|
626
|
+
abortController,
|
|
627
|
+
instructions ? `${this.agent.instructions}
|
|
610
628
|
${instructions}` : instructions,
|
|
611
|
-
|
|
629
|
+
userMessage
|
|
630
|
+
)
|
|
612
631
|
),
|
|
613
632
|
ownedSpeechHandle: handle,
|
|
614
633
|
name: "AgentActivity.pipelineReply"
|
|
@@ -630,7 +649,7 @@ ${instructions}` : instructions,
|
|
|
630
649
|
if (currentSpeech === void 0) {
|
|
631
650
|
future.resolve();
|
|
632
651
|
} else {
|
|
633
|
-
currentSpeech.
|
|
652
|
+
currentSpeech.addDoneCallback(() => {
|
|
634
653
|
if (future.done) return;
|
|
635
654
|
future.resolve();
|
|
636
655
|
});
|
|
@@ -638,7 +657,7 @@ ${instructions}` : instructions,
|
|
|
638
657
|
return future;
|
|
639
658
|
}
|
|
640
659
|
onPipelineReplyDone() {
|
|
641
|
-
if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done)) {
|
|
660
|
+
if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done())) {
|
|
642
661
|
this.agentSession._updateAgentState("listening");
|
|
643
662
|
}
|
|
644
663
|
}
|
|
@@ -702,11 +721,10 @@ ${instructions}` : instructions,
|
|
|
702
721
|
(0, import_events.createMetricsCollectedEvent)({ metrics: eouMetrics })
|
|
703
722
|
);
|
|
704
723
|
}
|
|
705
|
-
async ttsTask(speechHandle, text, addToChatCtx, modelSettings, audio) {
|
|
724
|
+
async ttsTask(speechHandle, text, addToChatCtx, modelSettings, replyAbortController, audio) {
|
|
706
725
|
speechHandleStorage.enterWith(speechHandle);
|
|
707
726
|
const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
708
727
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
709
|
-
const replyAbortController = new AbortController();
|
|
710
728
|
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
711
729
|
if (speechHandle.interrupted) {
|
|
712
730
|
return;
|
|
@@ -795,10 +813,9 @@ ${instructions}` : instructions,
|
|
|
795
813
|
this.agentSession._updateAgentState("listening");
|
|
796
814
|
}
|
|
797
815
|
}
|
|
798
|
-
async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, instructions, newMessage, toolsMessages) {
|
|
816
|
+
async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) {
|
|
799
817
|
var _a, _b, _c;
|
|
800
818
|
speechHandleStorage.enterWith(speechHandle);
|
|
801
|
-
const replyAbortController = new AbortController();
|
|
802
819
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
803
820
|
const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
804
821
|
chatCtx = chatCtx.copy();
|
|
@@ -841,12 +858,15 @@ ${instructions}` : instructions,
|
|
|
841
858
|
);
|
|
842
859
|
tasks.push(ttsTask);
|
|
843
860
|
}
|
|
844
|
-
await speechHandle.waitIfNotInterrupted([speechHandle.
|
|
861
|
+
await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
|
|
845
862
|
if (speechHandle.interrupted) {
|
|
846
863
|
replyAbortController.abort();
|
|
847
864
|
await (0, import_utils.cancelAndWait)(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
848
865
|
return;
|
|
849
866
|
}
|
|
867
|
+
this.agentSession._updateAgentState("thinking");
|
|
868
|
+
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
869
|
+
speechHandle._clearAuthorization();
|
|
850
870
|
const replyStartedAt = Date.now();
|
|
851
871
|
const trNodeResult = await this.agent.transcriptionNode(llmOutput, modelSettings);
|
|
852
872
|
let textOut = null;
|
|
@@ -893,7 +913,6 @@ ${instructions}` : instructions,
|
|
|
893
913
|
onToolExecutionStarted,
|
|
894
914
|
onToolExecutionCompleted
|
|
895
915
|
});
|
|
896
|
-
tasks.push(executeToolsTask);
|
|
897
916
|
await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
|
|
898
917
|
if (audioOutput) {
|
|
899
918
|
await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
|
|
@@ -948,7 +967,7 @@ ${instructions}` : instructions,
|
|
|
948
967
|
{ speech_id: speechHandle.id, message: forwardedText },
|
|
949
968
|
"playout completed with interrupt"
|
|
950
969
|
);
|
|
951
|
-
speechHandle.
|
|
970
|
+
speechHandle._markGenerationDone();
|
|
952
971
|
await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
953
972
|
return;
|
|
954
973
|
}
|
|
@@ -973,11 +992,11 @@ ${instructions}` : instructions,
|
|
|
973
992
|
} else if (this.agentSession.agentState === "speaking") {
|
|
974
993
|
this.agentSession._updateAgentState("listening");
|
|
975
994
|
}
|
|
976
|
-
speechHandle.
|
|
995
|
+
speechHandle._markGenerationDone();
|
|
977
996
|
await executeToolsTask.result;
|
|
978
997
|
if (toolOutput.output.length === 0) return;
|
|
979
998
|
const { maxToolSteps } = this.agentSession.options;
|
|
980
|
-
if (speechHandle.
|
|
999
|
+
if (speechHandle.numSteps >= maxToolSteps) {
|
|
981
1000
|
this.logger.warn(
|
|
982
1001
|
{ speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
|
|
983
1002
|
"maximum number of function calls steps reached"
|
|
@@ -1032,7 +1051,7 @@ ${instructions}` : instructions,
|
|
|
1032
1051
|
chatCtx.insert(toolMessages);
|
|
1033
1052
|
const handle = import_speech_handle.SpeechHandle.create({
|
|
1034
1053
|
allowInterruptions: speechHandle.allowInterruptions,
|
|
1035
|
-
stepIndex: speechHandle.
|
|
1054
|
+
stepIndex: speechHandle._stepIndex + 1,
|
|
1036
1055
|
parent: speechHandle
|
|
1037
1056
|
});
|
|
1038
1057
|
this.agentSession.emit(
|
|
@@ -1045,14 +1064,17 @@ ${instructions}` : instructions,
|
|
|
1045
1064
|
);
|
|
1046
1065
|
const respondToolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
|
|
1047
1066
|
const toolResponseTask = this.createSpeechTask({
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1067
|
+
task: import_utils.Task.from(
|
|
1068
|
+
() => this.pipelineReplyTask(
|
|
1069
|
+
handle,
|
|
1070
|
+
chatCtx,
|
|
1071
|
+
toolCtx,
|
|
1072
|
+
{ toolChoice: respondToolChoice },
|
|
1073
|
+
replyAbortController,
|
|
1074
|
+
instructions,
|
|
1075
|
+
void 0,
|
|
1076
|
+
toolMessages
|
|
1077
|
+
)
|
|
1056
1078
|
),
|
|
1057
1079
|
ownedSpeechHandle: handle,
|
|
1058
1080
|
name: "AgentActivity.pipelineReply"
|
|
@@ -1066,7 +1088,7 @@ ${instructions}` : instructions,
|
|
|
1066
1088
|
this.agent._chatCtx.insert(toolMessages);
|
|
1067
1089
|
}
|
|
1068
1090
|
}
|
|
1069
|
-
async realtimeGenerationTask(speechHandle, ev, modelSettings) {
|
|
1091
|
+
async realtimeGenerationTask(speechHandle, ev, modelSettings, replyAbortController) {
|
|
1070
1092
|
var _a, _b, _c;
|
|
1071
1093
|
speechHandleStorage.enterWith(speechHandle);
|
|
1072
1094
|
if (!this.realtimeSession) {
|
|
@@ -1076,20 +1098,20 @@ ${instructions}` : instructions,
|
|
|
1076
1098
|
throw new Error("llm is not a realtime model");
|
|
1077
1099
|
}
|
|
1078
1100
|
this.logger.debug(
|
|
1079
|
-
{ speech_id: speechHandle.id, stepIndex: speechHandle.
|
|
1101
|
+
{ speech_id: speechHandle.id, stepIndex: speechHandle.numSteps },
|
|
1080
1102
|
"realtime generation started"
|
|
1081
1103
|
);
|
|
1082
1104
|
const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
|
|
1083
1105
|
const textOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
|
|
1084
1106
|
const toolCtx = this.realtimeSession.tools;
|
|
1085
1107
|
await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
|
|
1108
|
+
speechHandle._clearAuthorization();
|
|
1086
1109
|
if (speechHandle.interrupted) {
|
|
1087
1110
|
return;
|
|
1088
1111
|
}
|
|
1089
1112
|
const onFirstFrame = () => {
|
|
1090
1113
|
this.agentSession._updateAgentState("speaking");
|
|
1091
1114
|
};
|
|
1092
|
-
const replyAbortController = new AbortController();
|
|
1093
1115
|
const readMessages = async (abortController, outputs) => {
|
|
1094
1116
|
const forwardTasks = [];
|
|
1095
1117
|
try {
|
|
@@ -1173,9 +1195,13 @@ ${instructions}` : instructions,
|
|
|
1173
1195
|
"AgentActivity.realtime_generation.read_tool_stream"
|
|
1174
1196
|
)
|
|
1175
1197
|
);
|
|
1176
|
-
const onToolExecutionStarted = (
|
|
1198
|
+
const onToolExecutionStarted = (f) => {
|
|
1199
|
+
speechHandle._itemAdded([f]);
|
|
1177
1200
|
};
|
|
1178
|
-
const onToolExecutionCompleted = (
|
|
1201
|
+
const onToolExecutionCompleted = (out) => {
|
|
1202
|
+
if (out.toolCallOutput) {
|
|
1203
|
+
speechHandle._itemAdded([out.toolCallOutput]);
|
|
1204
|
+
}
|
|
1179
1205
|
};
|
|
1180
1206
|
const [executeToolsTask, toolOutput] = (0, import_generation.performToolExecutions)({
|
|
1181
1207
|
session: this.agentSession,
|
|
@@ -1231,7 +1257,7 @@ ${instructions}` : instructions,
|
|
|
1231
1257
|
interrupted: true
|
|
1232
1258
|
});
|
|
1233
1259
|
this.agent._chatCtx.insert(message);
|
|
1234
|
-
speechHandle.
|
|
1260
|
+
speechHandle._itemAdded([message]);
|
|
1235
1261
|
this.agentSession._conversationItemAdded(message);
|
|
1236
1262
|
}
|
|
1237
1263
|
this.logger.info(
|
|
@@ -1239,7 +1265,7 @@ ${instructions}` : instructions,
|
|
|
1239
1265
|
"playout completed with interrupt"
|
|
1240
1266
|
);
|
|
1241
1267
|
}
|
|
1242
|
-
speechHandle.
|
|
1268
|
+
speechHandle._markGenerationDone();
|
|
1243
1269
|
await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
1244
1270
|
return;
|
|
1245
1271
|
}
|
|
@@ -1252,17 +1278,17 @@ ${instructions}` : instructions,
|
|
|
1252
1278
|
interrupted: false
|
|
1253
1279
|
});
|
|
1254
1280
|
this.agent._chatCtx.insert(message);
|
|
1255
|
-
speechHandle.
|
|
1281
|
+
speechHandle._itemAdded([message]);
|
|
1256
1282
|
this.agentSession._conversationItemAdded(message);
|
|
1257
1283
|
}
|
|
1258
|
-
speechHandle.
|
|
1284
|
+
speechHandle._markGenerationDone();
|
|
1259
1285
|
toolOutput.firstToolStartedFuture.await.finally(() => {
|
|
1260
1286
|
this.agentSession._updateAgentState("thinking");
|
|
1261
1287
|
});
|
|
1262
1288
|
await executeToolsTask.result;
|
|
1263
1289
|
if (toolOutput.output.length === 0) return;
|
|
1264
1290
|
const { maxToolSteps } = this.agentSession.options;
|
|
1265
|
-
if (speechHandle.
|
|
1291
|
+
if (speechHandle.numSteps >= maxToolSteps) {
|
|
1266
1292
|
this.logger.warn(
|
|
1267
1293
|
{ speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
|
|
1268
1294
|
"maximum number of function calls steps reached"
|
|
@@ -1326,7 +1352,7 @@ ${instructions}` : instructions,
|
|
|
1326
1352
|
this.realtimeSession.interrupt();
|
|
1327
1353
|
const replySpeechHandle = import_speech_handle.SpeechHandle.create({
|
|
1328
1354
|
allowInterruptions: speechHandle.allowInterruptions,
|
|
1329
|
-
stepIndex: speechHandle.
|
|
1355
|
+
stepIndex: speechHandle.numSteps + 1,
|
|
1330
1356
|
parent: speechHandle
|
|
1331
1357
|
});
|
|
1332
1358
|
this.agentSession.emit(
|
|
@@ -1339,10 +1365,13 @@ ${instructions}` : instructions,
|
|
|
1339
1365
|
);
|
|
1340
1366
|
const toolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
|
|
1341
1367
|
this.createSpeechTask({
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1368
|
+
task: import_utils.Task.from(
|
|
1369
|
+
(abortController) => this.realtimeReplyTask({
|
|
1370
|
+
speechHandle: replySpeechHandle,
|
|
1371
|
+
modelSettings: { toolChoice },
|
|
1372
|
+
abortController
|
|
1373
|
+
})
|
|
1374
|
+
),
|
|
1346
1375
|
ownedSpeechHandle: replySpeechHandle,
|
|
1347
1376
|
name: "AgentActivity.realtime_reply"
|
|
1348
1377
|
});
|
|
@@ -1352,7 +1381,8 @@ ${instructions}` : instructions,
|
|
|
1352
1381
|
speechHandle,
|
|
1353
1382
|
modelSettings: { toolChoice },
|
|
1354
1383
|
userInput,
|
|
1355
|
-
instructions
|
|
1384
|
+
instructions,
|
|
1385
|
+
abortController
|
|
1356
1386
|
}) {
|
|
1357
1387
|
speechHandleStorage.enterWith(speechHandle);
|
|
1358
1388
|
if (!this.realtimeSession) {
|
|
@@ -1375,18 +1405,24 @@ ${instructions}` : instructions,
|
|
|
1375
1405
|
}
|
|
1376
1406
|
try {
|
|
1377
1407
|
const generationEvent = await this.realtimeSession.generateReply(instructions);
|
|
1378
|
-
await this.realtimeGenerationTask(
|
|
1408
|
+
await this.realtimeGenerationTask(
|
|
1409
|
+
speechHandle,
|
|
1410
|
+
generationEvent,
|
|
1411
|
+
{ toolChoice },
|
|
1412
|
+
abortController
|
|
1413
|
+
);
|
|
1379
1414
|
} finally {
|
|
1380
1415
|
if (toolChoice !== void 0 && toolChoice !== originalToolChoice) {
|
|
1381
1416
|
this.realtimeSession.updateOptions({ toolChoice: originalToolChoice });
|
|
1382
1417
|
}
|
|
1383
1418
|
}
|
|
1384
1419
|
}
|
|
1385
|
-
scheduleSpeech(speechHandle, priority,
|
|
1386
|
-
if (this.draining && !
|
|
1420
|
+
scheduleSpeech(speechHandle, priority, force = false) {
|
|
1421
|
+
if (this.draining && !force) {
|
|
1387
1422
|
throw new Error("cannot schedule new speech, the agent is draining");
|
|
1388
1423
|
}
|
|
1389
1424
|
this.speechQueue.push([priority, Number(process.hrtime.bigint()), speechHandle]);
|
|
1425
|
+
speechHandle._markScheduled();
|
|
1390
1426
|
this.wakeupMainTask();
|
|
1391
1427
|
}
|
|
1392
1428
|
async drain() {
|
|
@@ -1395,7 +1431,7 @@ ${instructions}` : instructions,
|
|
|
1395
1431
|
try {
|
|
1396
1432
|
if (this._draining) return;
|
|
1397
1433
|
this.createSpeechTask({
|
|
1398
|
-
|
|
1434
|
+
task: import_utils.Task.from(() => this.agent.onExit()),
|
|
1399
1435
|
name: "AgentActivity_onExit"
|
|
1400
1436
|
});
|
|
1401
1437
|
this.wakeupMainTask();
|