@livekit/agents 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/dist/index.cjs +2 -5
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +2 -3
  4. package/dist/index.d.ts +2 -3
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +1 -3
  7. package/dist/index.js.map +1 -1
  8. package/dist/tokenize/basic/hyphenator.cjs.map +1 -1
  9. package/dist/tokenize/basic/hyphenator.js.map +1 -1
  10. package/dist/utils.cjs +77 -0
  11. package/dist/utils.cjs.map +1 -1
  12. package/dist/utils.d.cts +21 -0
  13. package/dist/utils.d.ts +21 -0
  14. package/dist/utils.d.ts.map +1 -1
  15. package/dist/utils.js +76 -1
  16. package/dist/utils.js.map +1 -1
  17. package/dist/voice/agent_activity.cjs +112 -71
  18. package/dist/voice/agent_activity.cjs.map +1 -1
  19. package/dist/voice/agent_activity.d.ts.map +1 -1
  20. package/dist/voice/agent_activity.js +112 -71
  21. package/dist/voice/agent_activity.js.map +1 -1
  22. package/dist/voice/avatar/datastream_io.cjs +204 -0
  23. package/dist/voice/avatar/datastream_io.cjs.map +1 -0
  24. package/dist/voice/avatar/datastream_io.d.cts +37 -0
  25. package/dist/voice/avatar/datastream_io.d.ts +37 -0
  26. package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
  27. package/dist/voice/avatar/datastream_io.js +188 -0
  28. package/dist/voice/avatar/datastream_io.js.map +1 -0
  29. package/dist/{multimodal → voice/avatar}/index.cjs +4 -4
  30. package/dist/voice/avatar/index.cjs.map +1 -0
  31. package/dist/voice/avatar/index.d.cts +2 -0
  32. package/dist/voice/avatar/index.d.ts +2 -0
  33. package/dist/voice/avatar/index.d.ts.map +1 -0
  34. package/dist/voice/avatar/index.js +2 -0
  35. package/dist/voice/avatar/index.js.map +1 -0
  36. package/dist/voice/index.cjs +2 -0
  37. package/dist/voice/index.cjs.map +1 -1
  38. package/dist/voice/index.d.cts +1 -0
  39. package/dist/voice/index.d.ts +1 -0
  40. package/dist/voice/index.d.ts.map +1 -1
  41. package/dist/voice/index.js +1 -0
  42. package/dist/voice/index.js.map +1 -1
  43. package/dist/voice/io.cjs.map +1 -1
  44. package/dist/voice/io.d.cts +1 -1
  45. package/dist/voice/io.d.ts +1 -1
  46. package/dist/voice/io.d.ts.map +1 -1
  47. package/dist/voice/io.js.map +1 -1
  48. package/dist/voice/room_io/_input.cjs +2 -1
  49. package/dist/voice/room_io/_input.cjs.map +1 -1
  50. package/dist/voice/room_io/_input.d.ts.map +1 -1
  51. package/dist/voice/room_io/_input.js +2 -1
  52. package/dist/voice/room_io/_input.js.map +1 -1
  53. package/dist/voice/run_context.cjs +13 -0
  54. package/dist/voice/run_context.cjs.map +1 -1
  55. package/dist/voice/run_context.d.cts +10 -0
  56. package/dist/voice/run_context.d.ts +10 -0
  57. package/dist/voice/run_context.d.ts.map +1 -1
  58. package/dist/voice/run_context.js +13 -0
  59. package/dist/voice/run_context.js.map +1 -1
  60. package/dist/voice/speech_handle.cjs +152 -30
  61. package/dist/voice/speech_handle.cjs.map +1 -1
  62. package/dist/voice/speech_handle.d.cts +67 -16
  63. package/dist/voice/speech_handle.d.ts +67 -16
  64. package/dist/voice/speech_handle.d.ts.map +1 -1
  65. package/dist/voice/speech_handle.js +153 -31
  66. package/dist/voice/speech_handle.js.map +1 -1
  67. package/dist/worker.cjs +4 -1
  68. package/dist/worker.cjs.map +1 -1
  69. package/dist/worker.d.ts.map +1 -1
  70. package/dist/worker.js +4 -1
  71. package/dist/worker.js.map +1 -1
  72. package/package.json +2 -2
  73. package/src/index.ts +2 -3
  74. package/src/tokenize/basic/hyphenator.ts +1 -1
  75. package/src/utils.ts +121 -1
  76. package/src/voice/agent_activity.ts +128 -78
  77. package/src/voice/avatar/datastream_io.ts +247 -0
  78. package/src/voice/avatar/index.ts +4 -0
  79. package/src/voice/index.ts +2 -0
  80. package/src/voice/io.ts +1 -1
  81. package/src/voice/room_io/_input.ts +8 -3
  82. package/src/voice/run_context.ts +16 -2
  83. package/src/voice/speech_handle.ts +183 -38
  84. package/src/worker.ts +5 -1
  85. package/dist/multimodal/agent_playout.cjs +0 -233
  86. package/dist/multimodal/agent_playout.cjs.map +0 -1
  87. package/dist/multimodal/agent_playout.d.cts +0 -34
  88. package/dist/multimodal/agent_playout.d.ts +0 -34
  89. package/dist/multimodal/agent_playout.d.ts.map +0 -1
  90. package/dist/multimodal/agent_playout.js +0 -207
  91. package/dist/multimodal/agent_playout.js.map +0 -1
  92. package/dist/multimodal/index.cjs.map +0 -1
  93. package/dist/multimodal/index.d.cts +0 -2
  94. package/dist/multimodal/index.d.ts +0 -2
  95. package/dist/multimodal/index.d.ts.map +0 -1
  96. package/dist/multimodal/index.js +0 -2
  97. package/dist/multimodal/index.js.map +0 -1
  98. package/src/multimodal/agent_playout.ts +0 -266
  99. package/src/multimodal/index.ts +0 -4
@@ -193,7 +193,7 @@ class AgentActivity {
193
193
  this.started = true;
194
194
  this._mainTask = import_utils.Task.from(({ signal }) => this.mainTask(signal));
195
195
  this.createSpeechTask({
196
- promise: this.agent.onEnter(),
196
+ task: import_utils.Task.from(() => this.agent.onEnter()),
197
197
  name: "AgentActivity_onEnter"
198
198
  });
199
199
  } finally {
@@ -312,7 +312,9 @@ class AgentActivity {
312
312
  })
313
313
  );
314
314
  const task = this.createSpeechTask({
315
- promise: this.ttsTask(handle, text, addToChatCtx, {}, audio),
315
+ task: import_utils.Task.from(
316
+ (abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio)
317
+ ),
316
318
  ownedSpeechHandle: handle,
317
319
  name: "AgentActivity.say_tts"
318
320
  });
@@ -416,7 +418,9 @@ class AgentActivity {
416
418
  );
417
419
  this.logger.info({ speech_id: handle.id }, "Creating speech handle");
418
420
  this.createSpeechTask({
419
- promise: this.realtimeGenerationTask(handle, ev, {}),
421
+ task: import_utils.Task.from(
422
+ (abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController)
423
+ ),
420
424
  ownedSpeechHandle: handle,
421
425
  name: "AgentActivity.realtimeGeneration"
422
426
  });
@@ -480,16 +484,23 @@ class AgentActivity {
480
484
  );
481
485
  }
482
486
  createSpeechTask(options) {
483
- const { promise, ownedSpeechHandle } = options;
484
- this.speechTasks.add(promise);
485
- promise.finally(() => {
486
- this.speechTasks.delete(promise);
487
- if (ownedSpeechHandle) {
488
- ownedSpeechHandle._markPlayoutDone();
489
- }
487
+ const { task, ownedSpeechHandle } = options;
488
+ this.speechTasks.add(task);
489
+ task.addDoneCallback(() => {
490
+ this.speechTasks.delete(task);
491
+ });
492
+ if (ownedSpeechHandle) {
493
+ ownedSpeechHandle._tasks.push(task);
494
+ task.addDoneCallback(() => {
495
+ if (ownedSpeechHandle._tasks.every((t) => t.done)) {
496
+ ownedSpeechHandle._markDone();
497
+ }
498
+ });
499
+ }
500
+ task.addDoneCallback(() => {
490
501
  this.wakeupMainTask();
491
502
  });
492
- return promise;
503
+ return task.result;
493
504
  }
494
505
  async onEndOfTurn(info) {
495
506
  if (this.draining) {
@@ -502,7 +513,7 @@ class AgentActivity {
502
513
  }
503
514
  const oldTask = this._userTurnCompletedTask;
504
515
  this._userTurnCompletedTask = this.createSpeechTask({
505
- promise: this.userTurnCompleted(info, oldTask),
516
+ task: import_utils.Task.from(() => this.userTurnCompleted(info, oldTask)),
506
517
  name: "AgentActivity.userTurnCompleted"
507
518
  });
508
519
  return true;
@@ -528,8 +539,8 @@ class AgentActivity {
528
539
  }
529
540
  const speechHandle = heapItem[2];
530
541
  this._currentSpeech = speechHandle;
531
- speechHandle._authorizePlayout();
532
- await speechHandle.waitForPlayout();
542
+ speechHandle._authorizeGeneration();
543
+ await speechHandle._waitForGeneration();
533
544
  this._currentSpeech = void 0;
534
545
  }
535
546
  if (this.draining && this.speechTasks.size === 0) {
@@ -582,16 +593,19 @@ class AgentActivity {
582
593
  this.logger.info({ speech_id: handle.id }, "Creating speech handle");
583
594
  if (this.llm instanceof import_llm.RealtimeModel) {
584
595
  this.createSpeechTask({
585
- promise: this.realtimeReplyTask({
586
- speechHandle: handle,
587
- // TODO(brian): support llm.ChatMessage for the realtime model
588
- userInput: userMessage == null ? void 0 : userMessage.textContent,
589
- instructions,
590
- modelSettings: {
591
- // isGiven(toolChoice) = toolChoice !== undefined
592
- toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
593
- }
594
- }),
596
+ task: import_utils.Task.from(
597
+ (abortController) => this.realtimeReplyTask({
598
+ speechHandle: handle,
599
+ // TODO(brian): support llm.ChatMessage for the realtime model
600
+ userInput: userMessage == null ? void 0 : userMessage.textContent,
601
+ instructions,
602
+ modelSettings: {
603
+ // isGiven(toolChoice) = toolChoice !== undefined
604
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
605
+ },
606
+ abortController
607
+ })
608
+ ),
595
609
  ownedSpeechHandle: handle,
596
610
  name: "AgentActivity.realtimeReply"
597
611
  });
@@ -601,14 +615,19 @@ class AgentActivity {
601
615
  ${instructions}`;
602
616
  }
603
617
  const task = this.createSpeechTask({
604
- promise: this.pipelineReplyTask(
605
- handle,
606
- chatCtx ?? this.agent.chatCtx,
607
- this.agent.toolCtx,
608
- { toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice) },
609
- instructions ? `${this.agent.instructions}
618
+ task: import_utils.Task.from(
619
+ (abortController) => this.pipelineReplyTask(
620
+ handle,
621
+ chatCtx ?? this.agent.chatCtx,
622
+ this.agent.toolCtx,
623
+ {
624
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
625
+ },
626
+ abortController,
627
+ instructions ? `${this.agent.instructions}
610
628
  ${instructions}` : instructions,
611
- userMessage
629
+ userMessage
630
+ )
612
631
  ),
613
632
  ownedSpeechHandle: handle,
614
633
  name: "AgentActivity.pipelineReply"
@@ -630,7 +649,7 @@ ${instructions}` : instructions,
630
649
  if (currentSpeech === void 0) {
631
650
  future.resolve();
632
651
  } else {
633
- currentSpeech.then(() => {
652
+ currentSpeech.addDoneCallback(() => {
634
653
  if (future.done) return;
635
654
  future.resolve();
636
655
  });
@@ -638,7 +657,7 @@ ${instructions}` : instructions,
638
657
  return future;
639
658
  }
640
659
  onPipelineReplyDone() {
641
- if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done)) {
660
+ if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done())) {
642
661
  this.agentSession._updateAgentState("listening");
643
662
  }
644
663
  }
@@ -702,11 +721,10 @@ ${instructions}` : instructions,
702
721
  (0, import_events.createMetricsCollectedEvent)({ metrics: eouMetrics })
703
722
  );
704
723
  }
705
- async ttsTask(speechHandle, text, addToChatCtx, modelSettings, audio) {
724
+ async ttsTask(speechHandle, text, addToChatCtx, modelSettings, replyAbortController, audio) {
706
725
  speechHandleStorage.enterWith(speechHandle);
707
726
  const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
708
727
  const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
709
- const replyAbortController = new AbortController();
710
728
  await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
711
729
  if (speechHandle.interrupted) {
712
730
  return;
@@ -795,10 +813,9 @@ ${instructions}` : instructions,
795
813
  this.agentSession._updateAgentState("listening");
796
814
  }
797
815
  }
798
- async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, instructions, newMessage, toolsMessages) {
816
+ async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) {
799
817
  var _a, _b, _c;
800
818
  speechHandleStorage.enterWith(speechHandle);
801
- const replyAbortController = new AbortController();
802
819
  const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
803
820
  const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
804
821
  chatCtx = chatCtx.copy();
@@ -841,12 +858,20 @@ ${instructions}` : instructions,
841
858
  );
842
859
  tasks.push(ttsTask);
843
860
  }
844
- await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
861
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
862
+ if (newMessage && speechHandle.scheduled) {
863
+ chatCtx.insert(newMessage);
864
+ this.agent._chatCtx.insert(newMessage);
865
+ this.agentSession._conversationItemAdded(newMessage);
866
+ }
845
867
  if (speechHandle.interrupted) {
846
868
  replyAbortController.abort();
847
869
  await (0, import_utils.cancelAndWait)(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
848
870
  return;
849
871
  }
872
+ this.agentSession._updateAgentState("thinking");
873
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
874
+ speechHandle._clearAuthorization();
850
875
  const replyStartedAt = Date.now();
851
876
  const trNodeResult = await this.agent.transcriptionNode(llmOutput, modelSettings);
852
877
  let textOut = null;
@@ -893,7 +918,6 @@ ${instructions}` : instructions,
893
918
  onToolExecutionStarted,
894
919
  onToolExecutionCompleted
895
920
  });
896
- tasks.push(executeToolsTask);
897
921
  await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
898
922
  if (audioOutput) {
899
923
  await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
@@ -948,7 +972,7 @@ ${instructions}` : instructions,
948
972
  { speech_id: speechHandle.id, message: forwardedText },
949
973
  "playout completed with interrupt"
950
974
  );
951
- speechHandle._markPlayoutDone();
975
+ speechHandle._markGenerationDone();
952
976
  await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
953
977
  return;
954
978
  }
@@ -973,11 +997,11 @@ ${instructions}` : instructions,
973
997
  } else if (this.agentSession.agentState === "speaking") {
974
998
  this.agentSession._updateAgentState("listening");
975
999
  }
976
- speechHandle._markPlayoutDone();
1000
+ speechHandle._markGenerationDone();
977
1001
  await executeToolsTask.result;
978
1002
  if (toolOutput.output.length === 0) return;
979
1003
  const { maxToolSteps } = this.agentSession.options;
980
- if (speechHandle.stepIndex >= maxToolSteps) {
1004
+ if (speechHandle.numSteps >= maxToolSteps) {
981
1005
  this.logger.warn(
982
1006
  { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
983
1007
  "maximum number of function calls steps reached"
@@ -1032,7 +1056,7 @@ ${instructions}` : instructions,
1032
1056
  chatCtx.insert(toolMessages);
1033
1057
  const handle = import_speech_handle.SpeechHandle.create({
1034
1058
  allowInterruptions: speechHandle.allowInterruptions,
1035
- stepIndex: speechHandle.stepIndex + 1,
1059
+ stepIndex: speechHandle._stepIndex + 1,
1036
1060
  parent: speechHandle
1037
1061
  });
1038
1062
  this.agentSession.emit(
@@ -1045,14 +1069,17 @@ ${instructions}` : instructions,
1045
1069
  );
1046
1070
  const respondToolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1047
1071
  const toolResponseTask = this.createSpeechTask({
1048
- promise: this.pipelineReplyTask(
1049
- handle,
1050
- chatCtx,
1051
- toolCtx,
1052
- { toolChoice: respondToolChoice },
1053
- instructions,
1054
- void 0,
1055
- toolMessages
1072
+ task: import_utils.Task.from(
1073
+ () => this.pipelineReplyTask(
1074
+ handle,
1075
+ chatCtx,
1076
+ toolCtx,
1077
+ { toolChoice: respondToolChoice },
1078
+ replyAbortController,
1079
+ instructions,
1080
+ void 0,
1081
+ toolMessages
1082
+ )
1056
1083
  ),
1057
1084
  ownedSpeechHandle: handle,
1058
1085
  name: "AgentActivity.pipelineReply"
@@ -1066,7 +1093,7 @@ ${instructions}` : instructions,
1066
1093
  this.agent._chatCtx.insert(toolMessages);
1067
1094
  }
1068
1095
  }
1069
- async realtimeGenerationTask(speechHandle, ev, modelSettings) {
1096
+ async realtimeGenerationTask(speechHandle, ev, modelSettings, replyAbortController) {
1070
1097
  var _a, _b, _c;
1071
1098
  speechHandleStorage.enterWith(speechHandle);
1072
1099
  if (!this.realtimeSession) {
@@ -1076,20 +1103,20 @@ ${instructions}` : instructions,
1076
1103
  throw new Error("llm is not a realtime model");
1077
1104
  }
1078
1105
  this.logger.debug(
1079
- { speech_id: speechHandle.id, stepIndex: speechHandle.stepIndex },
1106
+ { speech_id: speechHandle.id, stepIndex: speechHandle.numSteps },
1080
1107
  "realtime generation started"
1081
1108
  );
1082
1109
  const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
1083
1110
  const textOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
1084
1111
  const toolCtx = this.realtimeSession.tools;
1085
1112
  await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
1113
+ speechHandle._clearAuthorization();
1086
1114
  if (speechHandle.interrupted) {
1087
1115
  return;
1088
1116
  }
1089
1117
  const onFirstFrame = () => {
1090
1118
  this.agentSession._updateAgentState("speaking");
1091
1119
  };
1092
- const replyAbortController = new AbortController();
1093
1120
  const readMessages = async (abortController, outputs) => {
1094
1121
  const forwardTasks = [];
1095
1122
  try {
@@ -1173,9 +1200,13 @@ ${instructions}` : instructions,
1173
1200
  "AgentActivity.realtime_generation.read_tool_stream"
1174
1201
  )
1175
1202
  );
1176
- const onToolExecutionStarted = (_) => {
1203
+ const onToolExecutionStarted = (f) => {
1204
+ speechHandle._itemAdded([f]);
1177
1205
  };
1178
- const onToolExecutionCompleted = (_) => {
1206
+ const onToolExecutionCompleted = (out) => {
1207
+ if (out.toolCallOutput) {
1208
+ speechHandle._itemAdded([out.toolCallOutput]);
1209
+ }
1179
1210
  };
1180
1211
  const [executeToolsTask, toolOutput] = (0, import_generation.performToolExecutions)({
1181
1212
  session: this.agentSession,
@@ -1231,7 +1262,7 @@ ${instructions}` : instructions,
1231
1262
  interrupted: true
1232
1263
  });
1233
1264
  this.agent._chatCtx.insert(message);
1234
- speechHandle._setChatMessage(message);
1265
+ speechHandle._itemAdded([message]);
1235
1266
  this.agentSession._conversationItemAdded(message);
1236
1267
  }
1237
1268
  this.logger.info(
@@ -1239,7 +1270,7 @@ ${instructions}` : instructions,
1239
1270
  "playout completed with interrupt"
1240
1271
  );
1241
1272
  }
1242
- speechHandle._markPlayoutDone();
1273
+ speechHandle._markGenerationDone();
1243
1274
  await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1244
1275
  return;
1245
1276
  }
@@ -1252,17 +1283,17 @@ ${instructions}` : instructions,
1252
1283
  interrupted: false
1253
1284
  });
1254
1285
  this.agent._chatCtx.insert(message);
1255
- speechHandle._setChatMessage(message);
1286
+ speechHandle._itemAdded([message]);
1256
1287
  this.agentSession._conversationItemAdded(message);
1257
1288
  }
1258
- speechHandle._markPlayoutDone();
1289
+ speechHandle._markGenerationDone();
1259
1290
  toolOutput.firstToolStartedFuture.await.finally(() => {
1260
1291
  this.agentSession._updateAgentState("thinking");
1261
1292
  });
1262
1293
  await executeToolsTask.result;
1263
1294
  if (toolOutput.output.length === 0) return;
1264
1295
  const { maxToolSteps } = this.agentSession.options;
1265
- if (speechHandle.stepIndex >= maxToolSteps) {
1296
+ if (speechHandle.numSteps >= maxToolSteps) {
1266
1297
  this.logger.warn(
1267
1298
  { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
1268
1299
  "maximum number of function calls steps reached"
@@ -1326,7 +1357,7 @@ ${instructions}` : instructions,
1326
1357
  this.realtimeSession.interrupt();
1327
1358
  const replySpeechHandle = import_speech_handle.SpeechHandle.create({
1328
1359
  allowInterruptions: speechHandle.allowInterruptions,
1329
- stepIndex: speechHandle.stepIndex + 1,
1360
+ stepIndex: speechHandle.numSteps + 1,
1330
1361
  parent: speechHandle
1331
1362
  });
1332
1363
  this.agentSession.emit(
@@ -1339,10 +1370,13 @@ ${instructions}` : instructions,
1339
1370
  );
1340
1371
  const toolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1341
1372
  this.createSpeechTask({
1342
- promise: this.realtimeReplyTask({
1343
- speechHandle: replySpeechHandle,
1344
- modelSettings: { toolChoice }
1345
- }),
1373
+ task: import_utils.Task.from(
1374
+ (abortController) => this.realtimeReplyTask({
1375
+ speechHandle: replySpeechHandle,
1376
+ modelSettings: { toolChoice },
1377
+ abortController
1378
+ })
1379
+ ),
1346
1380
  ownedSpeechHandle: replySpeechHandle,
1347
1381
  name: "AgentActivity.realtime_reply"
1348
1382
  });
@@ -1352,7 +1386,8 @@ ${instructions}` : instructions,
1352
1386
  speechHandle,
1353
1387
  modelSettings: { toolChoice },
1354
1388
  userInput,
1355
- instructions
1389
+ instructions,
1390
+ abortController
1356
1391
  }) {
1357
1392
  speechHandleStorage.enterWith(speechHandle);
1358
1393
  if (!this.realtimeSession) {
@@ -1375,18 +1410,24 @@ ${instructions}` : instructions,
1375
1410
  }
1376
1411
  try {
1377
1412
  const generationEvent = await this.realtimeSession.generateReply(instructions);
1378
- await this.realtimeGenerationTask(speechHandle, generationEvent, { toolChoice });
1413
+ await this.realtimeGenerationTask(
1414
+ speechHandle,
1415
+ generationEvent,
1416
+ { toolChoice },
1417
+ abortController
1418
+ );
1379
1419
  } finally {
1380
1420
  if (toolChoice !== void 0 && toolChoice !== originalToolChoice) {
1381
1421
  this.realtimeSession.updateOptions({ toolChoice: originalToolChoice });
1382
1422
  }
1383
1423
  }
1384
1424
  }
1385
- scheduleSpeech(speechHandle, priority, bypassDraining = false) {
1386
- if (this.draining && !bypassDraining) {
1425
+ scheduleSpeech(speechHandle, priority, force = false) {
1426
+ if (this.draining && !force) {
1387
1427
  throw new Error("cannot schedule new speech, the agent is draining");
1388
1428
  }
1389
1429
  this.speechQueue.push([priority, Number(process.hrtime.bigint()), speechHandle]);
1430
+ speechHandle._markScheduled();
1390
1431
  this.wakeupMainTask();
1391
1432
  }
1392
1433
  async drain() {
@@ -1395,7 +1436,7 @@ ${instructions}` : instructions,
1395
1436
  try {
1396
1437
  if (this._draining) return;
1397
1438
  this.createSpeechTask({
1398
- promise: this.agent.onExit(),
1439
+ task: import_utils.Task.from(() => this.agent.onExit()),
1399
1440
  name: "AgentActivity_onExit"
1400
1441
  });
1401
1442
  this.wakeupMainTask();