@livekit/agents 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/dist/index.cjs +2 -5
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +2 -3
  4. package/dist/index.d.ts +2 -3
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +1 -3
  7. package/dist/index.js.map +1 -1
  8. package/dist/tokenize/basic/hyphenator.cjs.map +1 -1
  9. package/dist/tokenize/basic/hyphenator.js.map +1 -1
  10. package/dist/utils.cjs +77 -0
  11. package/dist/utils.cjs.map +1 -1
  12. package/dist/utils.d.cts +21 -0
  13. package/dist/utils.d.ts +21 -0
  14. package/dist/utils.d.ts.map +1 -1
  15. package/dist/utils.js +76 -1
  16. package/dist/utils.js.map +1 -1
  17. package/dist/voice/agent_activity.cjs +107 -71
  18. package/dist/voice/agent_activity.cjs.map +1 -1
  19. package/dist/voice/agent_activity.d.ts.map +1 -1
  20. package/dist/voice/agent_activity.js +107 -71
  21. package/dist/voice/agent_activity.js.map +1 -1
  22. package/dist/voice/avatar/datastream_io.cjs +204 -0
  23. package/dist/voice/avatar/datastream_io.cjs.map +1 -0
  24. package/dist/voice/avatar/datastream_io.d.cts +37 -0
  25. package/dist/voice/avatar/datastream_io.d.ts +37 -0
  26. package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
  27. package/dist/voice/avatar/datastream_io.js +188 -0
  28. package/dist/voice/avatar/datastream_io.js.map +1 -0
  29. package/dist/{multimodal → voice/avatar}/index.cjs +4 -4
  30. package/dist/voice/avatar/index.cjs.map +1 -0
  31. package/dist/voice/avatar/index.d.cts +2 -0
  32. package/dist/voice/avatar/index.d.ts +2 -0
  33. package/dist/voice/avatar/index.d.ts.map +1 -0
  34. package/dist/voice/avatar/index.js +2 -0
  35. package/dist/voice/avatar/index.js.map +1 -0
  36. package/dist/voice/index.cjs +2 -0
  37. package/dist/voice/index.cjs.map +1 -1
  38. package/dist/voice/index.d.cts +1 -0
  39. package/dist/voice/index.d.ts +1 -0
  40. package/dist/voice/index.d.ts.map +1 -1
  41. package/dist/voice/index.js +1 -0
  42. package/dist/voice/index.js.map +1 -1
  43. package/dist/voice/io.cjs.map +1 -1
  44. package/dist/voice/io.d.cts +1 -1
  45. package/dist/voice/io.d.ts +1 -1
  46. package/dist/voice/io.d.ts.map +1 -1
  47. package/dist/voice/io.js.map +1 -1
  48. package/dist/voice/room_io/_input.cjs +3 -1
  49. package/dist/voice/room_io/_input.cjs.map +1 -1
  50. package/dist/voice/room_io/_input.d.ts.map +1 -1
  51. package/dist/voice/room_io/_input.js +3 -1
  52. package/dist/voice/room_io/_input.js.map +1 -1
  53. package/dist/voice/run_context.cjs +13 -0
  54. package/dist/voice/run_context.cjs.map +1 -1
  55. package/dist/voice/run_context.d.cts +10 -0
  56. package/dist/voice/run_context.d.ts +10 -0
  57. package/dist/voice/run_context.d.ts.map +1 -1
  58. package/dist/voice/run_context.js +13 -0
  59. package/dist/voice/run_context.js.map +1 -1
  60. package/dist/voice/speech_handle.cjs +152 -30
  61. package/dist/voice/speech_handle.cjs.map +1 -1
  62. package/dist/voice/speech_handle.d.cts +67 -16
  63. package/dist/voice/speech_handle.d.ts +67 -16
  64. package/dist/voice/speech_handle.d.ts.map +1 -1
  65. package/dist/voice/speech_handle.js +153 -31
  66. package/dist/voice/speech_handle.js.map +1 -1
  67. package/dist/worker.cjs +4 -1
  68. package/dist/worker.cjs.map +1 -1
  69. package/dist/worker.d.ts.map +1 -1
  70. package/dist/worker.js +4 -1
  71. package/dist/worker.js.map +1 -1
  72. package/package.json +2 -2
  73. package/src/index.ts +2 -3
  74. package/src/tokenize/basic/hyphenator.ts +1 -1
  75. package/src/utils.ts +121 -1
  76. package/src/voice/agent_activity.ts +122 -78
  77. package/src/voice/avatar/datastream_io.ts +247 -0
  78. package/src/voice/avatar/index.ts +4 -0
  79. package/src/voice/index.ts +2 -0
  80. package/src/voice/io.ts +1 -1
  81. package/src/voice/room_io/_input.ts +9 -3
  82. package/src/voice/run_context.ts +16 -2
  83. package/src/voice/speech_handle.ts +183 -38
  84. package/src/worker.ts +5 -1
  85. package/dist/multimodal/agent_playout.cjs +0 -233
  86. package/dist/multimodal/agent_playout.cjs.map +0 -1
  87. package/dist/multimodal/agent_playout.d.cts +0 -34
  88. package/dist/multimodal/agent_playout.d.ts +0 -34
  89. package/dist/multimodal/agent_playout.d.ts.map +0 -1
  90. package/dist/multimodal/agent_playout.js +0 -207
  91. package/dist/multimodal/agent_playout.js.map +0 -1
  92. package/dist/multimodal/index.cjs.map +0 -1
  93. package/dist/multimodal/index.d.cts +0 -2
  94. package/dist/multimodal/index.d.ts +0 -2
  95. package/dist/multimodal/index.d.ts.map +0 -1
  96. package/dist/multimodal/index.js +0 -2
  97. package/dist/multimodal/index.js.map +0 -1
  98. package/src/multimodal/agent_playout.ts +0 -266
  99. package/src/multimodal/index.ts +0 -4
@@ -190,7 +190,7 @@ class AgentActivity {
190
190
  this.started = true;
191
191
  this._mainTask = Task.from(({ signal }) => this.mainTask(signal));
192
192
  this.createSpeechTask({
193
- promise: this.agent.onEnter(),
193
+ task: Task.from(() => this.agent.onEnter()),
194
194
  name: "AgentActivity_onEnter"
195
195
  });
196
196
  } finally {
@@ -309,7 +309,9 @@ class AgentActivity {
309
309
  })
310
310
  );
311
311
  const task = this.createSpeechTask({
312
- promise: this.ttsTask(handle, text, addToChatCtx, {}, audio),
312
+ task: Task.from(
313
+ (abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio)
314
+ ),
313
315
  ownedSpeechHandle: handle,
314
316
  name: "AgentActivity.say_tts"
315
317
  });
@@ -413,7 +415,9 @@ class AgentActivity {
413
415
  );
414
416
  this.logger.info({ speech_id: handle.id }, "Creating speech handle");
415
417
  this.createSpeechTask({
416
- promise: this.realtimeGenerationTask(handle, ev, {}),
418
+ task: Task.from(
419
+ (abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController)
420
+ ),
417
421
  ownedSpeechHandle: handle,
418
422
  name: "AgentActivity.realtimeGeneration"
419
423
  });
@@ -477,16 +481,23 @@ class AgentActivity {
477
481
  );
478
482
  }
479
483
  createSpeechTask(options) {
480
- const { promise, ownedSpeechHandle } = options;
481
- this.speechTasks.add(promise);
482
- promise.finally(() => {
483
- this.speechTasks.delete(promise);
484
- if (ownedSpeechHandle) {
485
- ownedSpeechHandle._markPlayoutDone();
486
- }
484
+ const { task, ownedSpeechHandle } = options;
485
+ this.speechTasks.add(task);
486
+ task.addDoneCallback(() => {
487
+ this.speechTasks.delete(task);
488
+ });
489
+ if (ownedSpeechHandle) {
490
+ ownedSpeechHandle._tasks.push(task);
491
+ task.addDoneCallback(() => {
492
+ if (ownedSpeechHandle._tasks.every((t) => t.done)) {
493
+ ownedSpeechHandle._markDone();
494
+ }
495
+ });
496
+ }
497
+ task.addDoneCallback(() => {
487
498
  this.wakeupMainTask();
488
499
  });
489
- return promise;
500
+ return task.result;
490
501
  }
491
502
  async onEndOfTurn(info) {
492
503
  if (this.draining) {
@@ -499,7 +510,7 @@ class AgentActivity {
499
510
  }
500
511
  const oldTask = this._userTurnCompletedTask;
501
512
  this._userTurnCompletedTask = this.createSpeechTask({
502
- promise: this.userTurnCompleted(info, oldTask),
513
+ task: Task.from(() => this.userTurnCompleted(info, oldTask)),
503
514
  name: "AgentActivity.userTurnCompleted"
504
515
  });
505
516
  return true;
@@ -525,8 +536,8 @@ class AgentActivity {
525
536
  }
526
537
  const speechHandle = heapItem[2];
527
538
  this._currentSpeech = speechHandle;
528
- speechHandle._authorizePlayout();
529
- await speechHandle.waitForPlayout();
539
+ speechHandle._authorizeGeneration();
540
+ await speechHandle._waitForGeneration();
530
541
  this._currentSpeech = void 0;
531
542
  }
532
543
  if (this.draining && this.speechTasks.size === 0) {
@@ -579,16 +590,19 @@ class AgentActivity {
579
590
  this.logger.info({ speech_id: handle.id }, "Creating speech handle");
580
591
  if (this.llm instanceof RealtimeModel) {
581
592
  this.createSpeechTask({
582
- promise: this.realtimeReplyTask({
583
- speechHandle: handle,
584
- // TODO(brian): support llm.ChatMessage for the realtime model
585
- userInput: userMessage == null ? void 0 : userMessage.textContent,
586
- instructions,
587
- modelSettings: {
588
- // isGiven(toolChoice) = toolChoice !== undefined
589
- toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
590
- }
591
- }),
593
+ task: Task.from(
594
+ (abortController) => this.realtimeReplyTask({
595
+ speechHandle: handle,
596
+ // TODO(brian): support llm.ChatMessage for the realtime model
597
+ userInput: userMessage == null ? void 0 : userMessage.textContent,
598
+ instructions,
599
+ modelSettings: {
600
+ // isGiven(toolChoice) = toolChoice !== undefined
601
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
602
+ },
603
+ abortController
604
+ })
605
+ ),
592
606
  ownedSpeechHandle: handle,
593
607
  name: "AgentActivity.realtimeReply"
594
608
  });
@@ -598,14 +612,19 @@ class AgentActivity {
598
612
  ${instructions}`;
599
613
  }
600
614
  const task = this.createSpeechTask({
601
- promise: this.pipelineReplyTask(
602
- handle,
603
- chatCtx ?? this.agent.chatCtx,
604
- this.agent.toolCtx,
605
- { toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice) },
606
- instructions ? `${this.agent.instructions}
615
+ task: Task.from(
616
+ (abortController) => this.pipelineReplyTask(
617
+ handle,
618
+ chatCtx ?? this.agent.chatCtx,
619
+ this.agent.toolCtx,
620
+ {
621
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
622
+ },
623
+ abortController,
624
+ instructions ? `${this.agent.instructions}
607
625
  ${instructions}` : instructions,
608
- userMessage
626
+ userMessage
627
+ )
609
628
  ),
610
629
  ownedSpeechHandle: handle,
611
630
  name: "AgentActivity.pipelineReply"
@@ -627,7 +646,7 @@ ${instructions}` : instructions,
627
646
  if (currentSpeech === void 0) {
628
647
  future.resolve();
629
648
  } else {
630
- currentSpeech.then(() => {
649
+ currentSpeech.addDoneCallback(() => {
631
650
  if (future.done) return;
632
651
  future.resolve();
633
652
  });
@@ -635,7 +654,7 @@ ${instructions}` : instructions,
635
654
  return future;
636
655
  }
637
656
  onPipelineReplyDone() {
638
- if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done)) {
657
+ if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done())) {
639
658
  this.agentSession._updateAgentState("listening");
640
659
  }
641
660
  }
@@ -699,11 +718,10 @@ ${instructions}` : instructions,
699
718
  createMetricsCollectedEvent({ metrics: eouMetrics })
700
719
  );
701
720
  }
702
- async ttsTask(speechHandle, text, addToChatCtx, modelSettings, audio) {
721
+ async ttsTask(speechHandle, text, addToChatCtx, modelSettings, replyAbortController, audio) {
703
722
  speechHandleStorage.enterWith(speechHandle);
704
723
  const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
705
724
  const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
706
- const replyAbortController = new AbortController();
707
725
  await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
708
726
  if (speechHandle.interrupted) {
709
727
  return;
@@ -792,10 +810,9 @@ ${instructions}` : instructions,
792
810
  this.agentSession._updateAgentState("listening");
793
811
  }
794
812
  }
795
- async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, instructions, newMessage, toolsMessages) {
813
+ async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) {
796
814
  var _a, _b, _c;
797
815
  speechHandleStorage.enterWith(speechHandle);
798
- const replyAbortController = new AbortController();
799
816
  const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
800
817
  const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
801
818
  chatCtx = chatCtx.copy();
@@ -838,12 +855,15 @@ ${instructions}` : instructions,
838
855
  );
839
856
  tasks.push(ttsTask);
840
857
  }
841
- await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
858
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
842
859
  if (speechHandle.interrupted) {
843
860
  replyAbortController.abort();
844
861
  await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
845
862
  return;
846
863
  }
864
+ this.agentSession._updateAgentState("thinking");
865
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
866
+ speechHandle._clearAuthorization();
847
867
  const replyStartedAt = Date.now();
848
868
  const trNodeResult = await this.agent.transcriptionNode(llmOutput, modelSettings);
849
869
  let textOut = null;
@@ -890,7 +910,6 @@ ${instructions}` : instructions,
890
910
  onToolExecutionStarted,
891
911
  onToolExecutionCompleted
892
912
  });
893
- tasks.push(executeToolsTask);
894
913
  await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
895
914
  if (audioOutput) {
896
915
  await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
@@ -945,7 +964,7 @@ ${instructions}` : instructions,
945
964
  { speech_id: speechHandle.id, message: forwardedText },
946
965
  "playout completed with interrupt"
947
966
  );
948
- speechHandle._markPlayoutDone();
967
+ speechHandle._markGenerationDone();
949
968
  await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
950
969
  return;
951
970
  }
@@ -970,11 +989,11 @@ ${instructions}` : instructions,
970
989
  } else if (this.agentSession.agentState === "speaking") {
971
990
  this.agentSession._updateAgentState("listening");
972
991
  }
973
- speechHandle._markPlayoutDone();
992
+ speechHandle._markGenerationDone();
974
993
  await executeToolsTask.result;
975
994
  if (toolOutput.output.length === 0) return;
976
995
  const { maxToolSteps } = this.agentSession.options;
977
- if (speechHandle.stepIndex >= maxToolSteps) {
996
+ if (speechHandle.numSteps >= maxToolSteps) {
978
997
  this.logger.warn(
979
998
  { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
980
999
  "maximum number of function calls steps reached"
@@ -1029,7 +1048,7 @@ ${instructions}` : instructions,
1029
1048
  chatCtx.insert(toolMessages);
1030
1049
  const handle = SpeechHandle.create({
1031
1050
  allowInterruptions: speechHandle.allowInterruptions,
1032
- stepIndex: speechHandle.stepIndex + 1,
1051
+ stepIndex: speechHandle._stepIndex + 1,
1033
1052
  parent: speechHandle
1034
1053
  });
1035
1054
  this.agentSession.emit(
@@ -1042,14 +1061,17 @@ ${instructions}` : instructions,
1042
1061
  );
1043
1062
  const respondToolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1044
1063
  const toolResponseTask = this.createSpeechTask({
1045
- promise: this.pipelineReplyTask(
1046
- handle,
1047
- chatCtx,
1048
- toolCtx,
1049
- { toolChoice: respondToolChoice },
1050
- instructions,
1051
- void 0,
1052
- toolMessages
1064
+ task: Task.from(
1065
+ () => this.pipelineReplyTask(
1066
+ handle,
1067
+ chatCtx,
1068
+ toolCtx,
1069
+ { toolChoice: respondToolChoice },
1070
+ replyAbortController,
1071
+ instructions,
1072
+ void 0,
1073
+ toolMessages
1074
+ )
1053
1075
  ),
1054
1076
  ownedSpeechHandle: handle,
1055
1077
  name: "AgentActivity.pipelineReply"
@@ -1063,7 +1085,7 @@ ${instructions}` : instructions,
1063
1085
  this.agent._chatCtx.insert(toolMessages);
1064
1086
  }
1065
1087
  }
1066
- async realtimeGenerationTask(speechHandle, ev, modelSettings) {
1088
+ async realtimeGenerationTask(speechHandle, ev, modelSettings, replyAbortController) {
1067
1089
  var _a, _b, _c;
1068
1090
  speechHandleStorage.enterWith(speechHandle);
1069
1091
  if (!this.realtimeSession) {
@@ -1073,20 +1095,20 @@ ${instructions}` : instructions,
1073
1095
  throw new Error("llm is not a realtime model");
1074
1096
  }
1075
1097
  this.logger.debug(
1076
- { speech_id: speechHandle.id, stepIndex: speechHandle.stepIndex },
1098
+ { speech_id: speechHandle.id, stepIndex: speechHandle.numSteps },
1077
1099
  "realtime generation started"
1078
1100
  );
1079
1101
  const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
1080
1102
  const textOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
1081
1103
  const toolCtx = this.realtimeSession.tools;
1082
1104
  await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
1105
+ speechHandle._clearAuthorization();
1083
1106
  if (speechHandle.interrupted) {
1084
1107
  return;
1085
1108
  }
1086
1109
  const onFirstFrame = () => {
1087
1110
  this.agentSession._updateAgentState("speaking");
1088
1111
  };
1089
- const replyAbortController = new AbortController();
1090
1112
  const readMessages = async (abortController, outputs) => {
1091
1113
  const forwardTasks = [];
1092
1114
  try {
@@ -1170,9 +1192,13 @@ ${instructions}` : instructions,
1170
1192
  "AgentActivity.realtime_generation.read_tool_stream"
1171
1193
  )
1172
1194
  );
1173
- const onToolExecutionStarted = (_) => {
1195
+ const onToolExecutionStarted = (f) => {
1196
+ speechHandle._itemAdded([f]);
1174
1197
  };
1175
- const onToolExecutionCompleted = (_) => {
1198
+ const onToolExecutionCompleted = (out) => {
1199
+ if (out.toolCallOutput) {
1200
+ speechHandle._itemAdded([out.toolCallOutput]);
1201
+ }
1176
1202
  };
1177
1203
  const [executeToolsTask, toolOutput] = performToolExecutions({
1178
1204
  session: this.agentSession,
@@ -1228,7 +1254,7 @@ ${instructions}` : instructions,
1228
1254
  interrupted: true
1229
1255
  });
1230
1256
  this.agent._chatCtx.insert(message);
1231
- speechHandle._setChatMessage(message);
1257
+ speechHandle._itemAdded([message]);
1232
1258
  this.agentSession._conversationItemAdded(message);
1233
1259
  }
1234
1260
  this.logger.info(
@@ -1236,7 +1262,7 @@ ${instructions}` : instructions,
1236
1262
  "playout completed with interrupt"
1237
1263
  );
1238
1264
  }
1239
- speechHandle._markPlayoutDone();
1265
+ speechHandle._markGenerationDone();
1240
1266
  await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1241
1267
  return;
1242
1268
  }
@@ -1249,17 +1275,17 @@ ${instructions}` : instructions,
1249
1275
  interrupted: false
1250
1276
  });
1251
1277
  this.agent._chatCtx.insert(message);
1252
- speechHandle._setChatMessage(message);
1278
+ speechHandle._itemAdded([message]);
1253
1279
  this.agentSession._conversationItemAdded(message);
1254
1280
  }
1255
- speechHandle._markPlayoutDone();
1281
+ speechHandle._markGenerationDone();
1256
1282
  toolOutput.firstToolStartedFuture.await.finally(() => {
1257
1283
  this.agentSession._updateAgentState("thinking");
1258
1284
  });
1259
1285
  await executeToolsTask.result;
1260
1286
  if (toolOutput.output.length === 0) return;
1261
1287
  const { maxToolSteps } = this.agentSession.options;
1262
- if (speechHandle.stepIndex >= maxToolSteps) {
1288
+ if (speechHandle.numSteps >= maxToolSteps) {
1263
1289
  this.logger.warn(
1264
1290
  { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
1265
1291
  "maximum number of function calls steps reached"
@@ -1323,7 +1349,7 @@ ${instructions}` : instructions,
1323
1349
  this.realtimeSession.interrupt();
1324
1350
  const replySpeechHandle = SpeechHandle.create({
1325
1351
  allowInterruptions: speechHandle.allowInterruptions,
1326
- stepIndex: speechHandle.stepIndex + 1,
1352
+ stepIndex: speechHandle.numSteps + 1,
1327
1353
  parent: speechHandle
1328
1354
  });
1329
1355
  this.agentSession.emit(
@@ -1336,10 +1362,13 @@ ${instructions}` : instructions,
1336
1362
  );
1337
1363
  const toolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1338
1364
  this.createSpeechTask({
1339
- promise: this.realtimeReplyTask({
1340
- speechHandle: replySpeechHandle,
1341
- modelSettings: { toolChoice }
1342
- }),
1365
+ task: Task.from(
1366
+ (abortController) => this.realtimeReplyTask({
1367
+ speechHandle: replySpeechHandle,
1368
+ modelSettings: { toolChoice },
1369
+ abortController
1370
+ })
1371
+ ),
1343
1372
  ownedSpeechHandle: replySpeechHandle,
1344
1373
  name: "AgentActivity.realtime_reply"
1345
1374
  });
@@ -1349,7 +1378,8 @@ ${instructions}` : instructions,
1349
1378
  speechHandle,
1350
1379
  modelSettings: { toolChoice },
1351
1380
  userInput,
1352
- instructions
1381
+ instructions,
1382
+ abortController
1353
1383
  }) {
1354
1384
  speechHandleStorage.enterWith(speechHandle);
1355
1385
  if (!this.realtimeSession) {
@@ -1372,18 +1402,24 @@ ${instructions}` : instructions,
1372
1402
  }
1373
1403
  try {
1374
1404
  const generationEvent = await this.realtimeSession.generateReply(instructions);
1375
- await this.realtimeGenerationTask(speechHandle, generationEvent, { toolChoice });
1405
+ await this.realtimeGenerationTask(
1406
+ speechHandle,
1407
+ generationEvent,
1408
+ { toolChoice },
1409
+ abortController
1410
+ );
1376
1411
  } finally {
1377
1412
  if (toolChoice !== void 0 && toolChoice !== originalToolChoice) {
1378
1413
  this.realtimeSession.updateOptions({ toolChoice: originalToolChoice });
1379
1414
  }
1380
1415
  }
1381
1416
  }
1382
- scheduleSpeech(speechHandle, priority, bypassDraining = false) {
1383
- if (this.draining && !bypassDraining) {
1417
+ scheduleSpeech(speechHandle, priority, force = false) {
1418
+ if (this.draining && !force) {
1384
1419
  throw new Error("cannot schedule new speech, the agent is draining");
1385
1420
  }
1386
1421
  this.speechQueue.push([priority, Number(process.hrtime.bigint()), speechHandle]);
1422
+ speechHandle._markScheduled();
1387
1423
  this.wakeupMainTask();
1388
1424
  }
1389
1425
  async drain() {
@@ -1392,7 +1428,7 @@ ${instructions}` : instructions,
1392
1428
  try {
1393
1429
  if (this._draining) return;
1394
1430
  this.createSpeechTask({
1395
- promise: this.agent.onExit(),
1431
+ task: Task.from(() => this.agent.onExit()),
1396
1432
  name: "AgentActivity_onExit"
1397
1433
  });
1398
1434
  this.wakeupMainTask();