@livekit/agents 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/dist/index.cjs +2 -5
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +2 -3
  4. package/dist/index.d.ts +2 -3
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +1 -3
  7. package/dist/index.js.map +1 -1
  8. package/dist/tokenize/basic/hyphenator.cjs.map +1 -1
  9. package/dist/tokenize/basic/hyphenator.js.map +1 -1
  10. package/dist/utils.cjs +77 -0
  11. package/dist/utils.cjs.map +1 -1
  12. package/dist/utils.d.cts +21 -0
  13. package/dist/utils.d.ts +21 -0
  14. package/dist/utils.d.ts.map +1 -1
  15. package/dist/utils.js +76 -1
  16. package/dist/utils.js.map +1 -1
  17. package/dist/voice/agent_activity.cjs +107 -71
  18. package/dist/voice/agent_activity.cjs.map +1 -1
  19. package/dist/voice/agent_activity.d.ts.map +1 -1
  20. package/dist/voice/agent_activity.js +107 -71
  21. package/dist/voice/agent_activity.js.map +1 -1
  22. package/dist/voice/avatar/datastream_io.cjs +204 -0
  23. package/dist/voice/avatar/datastream_io.cjs.map +1 -0
  24. package/dist/voice/avatar/datastream_io.d.cts +37 -0
  25. package/dist/voice/avatar/datastream_io.d.ts +37 -0
  26. package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
  27. package/dist/voice/avatar/datastream_io.js +188 -0
  28. package/dist/voice/avatar/datastream_io.js.map +1 -0
  29. package/dist/{multimodal → voice/avatar}/index.cjs +4 -4
  30. package/dist/voice/avatar/index.cjs.map +1 -0
  31. package/dist/voice/avatar/index.d.cts +2 -0
  32. package/dist/voice/avatar/index.d.ts +2 -0
  33. package/dist/voice/avatar/index.d.ts.map +1 -0
  34. package/dist/voice/avatar/index.js +2 -0
  35. package/dist/voice/avatar/index.js.map +1 -0
  36. package/dist/voice/index.cjs +2 -0
  37. package/dist/voice/index.cjs.map +1 -1
  38. package/dist/voice/index.d.cts +1 -0
  39. package/dist/voice/index.d.ts +1 -0
  40. package/dist/voice/index.d.ts.map +1 -1
  41. package/dist/voice/index.js +1 -0
  42. package/dist/voice/index.js.map +1 -1
  43. package/dist/voice/io.cjs.map +1 -1
  44. package/dist/voice/io.d.cts +1 -1
  45. package/dist/voice/io.d.ts +1 -1
  46. package/dist/voice/io.d.ts.map +1 -1
  47. package/dist/voice/io.js.map +1 -1
  48. package/dist/voice/room_io/_input.cjs +3 -1
  49. package/dist/voice/room_io/_input.cjs.map +1 -1
  50. package/dist/voice/room_io/_input.d.ts.map +1 -1
  51. package/dist/voice/room_io/_input.js +3 -1
  52. package/dist/voice/room_io/_input.js.map +1 -1
  53. package/dist/voice/run_context.cjs +13 -0
  54. package/dist/voice/run_context.cjs.map +1 -1
  55. package/dist/voice/run_context.d.cts +10 -0
  56. package/dist/voice/run_context.d.ts +10 -0
  57. package/dist/voice/run_context.d.ts.map +1 -1
  58. package/dist/voice/run_context.js +13 -0
  59. package/dist/voice/run_context.js.map +1 -1
  60. package/dist/voice/speech_handle.cjs +152 -30
  61. package/dist/voice/speech_handle.cjs.map +1 -1
  62. package/dist/voice/speech_handle.d.cts +67 -16
  63. package/dist/voice/speech_handle.d.ts +67 -16
  64. package/dist/voice/speech_handle.d.ts.map +1 -1
  65. package/dist/voice/speech_handle.js +153 -31
  66. package/dist/voice/speech_handle.js.map +1 -1
  67. package/dist/worker.cjs +4 -1
  68. package/dist/worker.cjs.map +1 -1
  69. package/dist/worker.d.ts.map +1 -1
  70. package/dist/worker.js +4 -1
  71. package/dist/worker.js.map +1 -1
  72. package/package.json +2 -2
  73. package/src/index.ts +2 -3
  74. package/src/tokenize/basic/hyphenator.ts +1 -1
  75. package/src/utils.ts +121 -1
  76. package/src/voice/agent_activity.ts +122 -78
  77. package/src/voice/avatar/datastream_io.ts +247 -0
  78. package/src/voice/avatar/index.ts +4 -0
  79. package/src/voice/index.ts +2 -0
  80. package/src/voice/io.ts +1 -1
  81. package/src/voice/room_io/_input.ts +9 -3
  82. package/src/voice/run_context.ts +16 -2
  83. package/src/voice/speech_handle.ts +183 -38
  84. package/src/worker.ts +5 -1
  85. package/dist/multimodal/agent_playout.cjs +0 -233
  86. package/dist/multimodal/agent_playout.cjs.map +0 -1
  87. package/dist/multimodal/agent_playout.d.cts +0 -34
  88. package/dist/multimodal/agent_playout.d.ts +0 -34
  89. package/dist/multimodal/agent_playout.d.ts.map +0 -1
  90. package/dist/multimodal/agent_playout.js +0 -207
  91. package/dist/multimodal/agent_playout.js.map +0 -1
  92. package/dist/multimodal/index.cjs.map +0 -1
  93. package/dist/multimodal/index.d.cts +0 -2
  94. package/dist/multimodal/index.d.ts +0 -2
  95. package/dist/multimodal/index.d.ts.map +0 -1
  96. package/dist/multimodal/index.js +0 -2
  97. package/dist/multimodal/index.js.map +0 -1
  98. package/src/multimodal/agent_playout.ts +0 -266
  99. package/src/multimodal/index.ts +0 -4
@@ -193,7 +193,7 @@ class AgentActivity {
193
193
  this.started = true;
194
194
  this._mainTask = import_utils.Task.from(({ signal }) => this.mainTask(signal));
195
195
  this.createSpeechTask({
196
- promise: this.agent.onEnter(),
196
+ task: import_utils.Task.from(() => this.agent.onEnter()),
197
197
  name: "AgentActivity_onEnter"
198
198
  });
199
199
  } finally {
@@ -312,7 +312,9 @@ class AgentActivity {
312
312
  })
313
313
  );
314
314
  const task = this.createSpeechTask({
315
- promise: this.ttsTask(handle, text, addToChatCtx, {}, audio),
315
+ task: import_utils.Task.from(
316
+ (abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio)
317
+ ),
316
318
  ownedSpeechHandle: handle,
317
319
  name: "AgentActivity.say_tts"
318
320
  });
@@ -416,7 +418,9 @@ class AgentActivity {
416
418
  );
417
419
  this.logger.info({ speech_id: handle.id }, "Creating speech handle");
418
420
  this.createSpeechTask({
419
- promise: this.realtimeGenerationTask(handle, ev, {}),
421
+ task: import_utils.Task.from(
422
+ (abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController)
423
+ ),
420
424
  ownedSpeechHandle: handle,
421
425
  name: "AgentActivity.realtimeGeneration"
422
426
  });
@@ -480,16 +484,23 @@ class AgentActivity {
480
484
  );
481
485
  }
482
486
  createSpeechTask(options) {
483
- const { promise, ownedSpeechHandle } = options;
484
- this.speechTasks.add(promise);
485
- promise.finally(() => {
486
- this.speechTasks.delete(promise);
487
- if (ownedSpeechHandle) {
488
- ownedSpeechHandle._markPlayoutDone();
489
- }
487
+ const { task, ownedSpeechHandle } = options;
488
+ this.speechTasks.add(task);
489
+ task.addDoneCallback(() => {
490
+ this.speechTasks.delete(task);
491
+ });
492
+ if (ownedSpeechHandle) {
493
+ ownedSpeechHandle._tasks.push(task);
494
+ task.addDoneCallback(() => {
495
+ if (ownedSpeechHandle._tasks.every((t) => t.done)) {
496
+ ownedSpeechHandle._markDone();
497
+ }
498
+ });
499
+ }
500
+ task.addDoneCallback(() => {
490
501
  this.wakeupMainTask();
491
502
  });
492
- return promise;
503
+ return task.result;
493
504
  }
494
505
  async onEndOfTurn(info) {
495
506
  if (this.draining) {
@@ -502,7 +513,7 @@ class AgentActivity {
502
513
  }
503
514
  const oldTask = this._userTurnCompletedTask;
504
515
  this._userTurnCompletedTask = this.createSpeechTask({
505
- promise: this.userTurnCompleted(info, oldTask),
516
+ task: import_utils.Task.from(() => this.userTurnCompleted(info, oldTask)),
506
517
  name: "AgentActivity.userTurnCompleted"
507
518
  });
508
519
  return true;
@@ -528,8 +539,8 @@ class AgentActivity {
528
539
  }
529
540
  const speechHandle = heapItem[2];
530
541
  this._currentSpeech = speechHandle;
531
- speechHandle._authorizePlayout();
532
- await speechHandle.waitForPlayout();
542
+ speechHandle._authorizeGeneration();
543
+ await speechHandle._waitForGeneration();
533
544
  this._currentSpeech = void 0;
534
545
  }
535
546
  if (this.draining && this.speechTasks.size === 0) {
@@ -582,16 +593,19 @@ class AgentActivity {
582
593
  this.logger.info({ speech_id: handle.id }, "Creating speech handle");
583
594
  if (this.llm instanceof import_llm.RealtimeModel) {
584
595
  this.createSpeechTask({
585
- promise: this.realtimeReplyTask({
586
- speechHandle: handle,
587
- // TODO(brian): support llm.ChatMessage for the realtime model
588
- userInput: userMessage == null ? void 0 : userMessage.textContent,
589
- instructions,
590
- modelSettings: {
591
- // isGiven(toolChoice) = toolChoice !== undefined
592
- toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
593
- }
594
- }),
596
+ task: import_utils.Task.from(
597
+ (abortController) => this.realtimeReplyTask({
598
+ speechHandle: handle,
599
+ // TODO(brian): support llm.ChatMessage for the realtime model
600
+ userInput: userMessage == null ? void 0 : userMessage.textContent,
601
+ instructions,
602
+ modelSettings: {
603
+ // isGiven(toolChoice) = toolChoice !== undefined
604
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
605
+ },
606
+ abortController
607
+ })
608
+ ),
595
609
  ownedSpeechHandle: handle,
596
610
  name: "AgentActivity.realtimeReply"
597
611
  });
@@ -601,14 +615,19 @@ class AgentActivity {
601
615
  ${instructions}`;
602
616
  }
603
617
  const task = this.createSpeechTask({
604
- promise: this.pipelineReplyTask(
605
- handle,
606
- chatCtx ?? this.agent.chatCtx,
607
- this.agent.toolCtx,
608
- { toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice) },
609
- instructions ? `${this.agent.instructions}
618
+ task: import_utils.Task.from(
619
+ (abortController) => this.pipelineReplyTask(
620
+ handle,
621
+ chatCtx ?? this.agent.chatCtx,
622
+ this.agent.toolCtx,
623
+ {
624
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
625
+ },
626
+ abortController,
627
+ instructions ? `${this.agent.instructions}
610
628
  ${instructions}` : instructions,
611
- userMessage
629
+ userMessage
630
+ )
612
631
  ),
613
632
  ownedSpeechHandle: handle,
614
633
  name: "AgentActivity.pipelineReply"
@@ -630,7 +649,7 @@ ${instructions}` : instructions,
630
649
  if (currentSpeech === void 0) {
631
650
  future.resolve();
632
651
  } else {
633
- currentSpeech.then(() => {
652
+ currentSpeech.addDoneCallback(() => {
634
653
  if (future.done) return;
635
654
  future.resolve();
636
655
  });
@@ -638,7 +657,7 @@ ${instructions}` : instructions,
638
657
  return future;
639
658
  }
640
659
  onPipelineReplyDone() {
641
- if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done)) {
660
+ if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done())) {
642
661
  this.agentSession._updateAgentState("listening");
643
662
  }
644
663
  }
@@ -702,11 +721,10 @@ ${instructions}` : instructions,
702
721
  (0, import_events.createMetricsCollectedEvent)({ metrics: eouMetrics })
703
722
  );
704
723
  }
705
- async ttsTask(speechHandle, text, addToChatCtx, modelSettings, audio) {
724
+ async ttsTask(speechHandle, text, addToChatCtx, modelSettings, replyAbortController, audio) {
706
725
  speechHandleStorage.enterWith(speechHandle);
707
726
  const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
708
727
  const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
709
- const replyAbortController = new AbortController();
710
728
  await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
711
729
  if (speechHandle.interrupted) {
712
730
  return;
@@ -795,10 +813,9 @@ ${instructions}` : instructions,
795
813
  this.agentSession._updateAgentState("listening");
796
814
  }
797
815
  }
798
- async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, instructions, newMessage, toolsMessages) {
816
+ async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) {
799
817
  var _a, _b, _c;
800
818
  speechHandleStorage.enterWith(speechHandle);
801
- const replyAbortController = new AbortController();
802
819
  const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
803
820
  const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
804
821
  chatCtx = chatCtx.copy();
@@ -841,12 +858,15 @@ ${instructions}` : instructions,
841
858
  );
842
859
  tasks.push(ttsTask);
843
860
  }
844
- await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
861
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
845
862
  if (speechHandle.interrupted) {
846
863
  replyAbortController.abort();
847
864
  await (0, import_utils.cancelAndWait)(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
848
865
  return;
849
866
  }
867
+ this.agentSession._updateAgentState("thinking");
868
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
869
+ speechHandle._clearAuthorization();
850
870
  const replyStartedAt = Date.now();
851
871
  const trNodeResult = await this.agent.transcriptionNode(llmOutput, modelSettings);
852
872
  let textOut = null;
@@ -893,7 +913,6 @@ ${instructions}` : instructions,
893
913
  onToolExecutionStarted,
894
914
  onToolExecutionCompleted
895
915
  });
896
- tasks.push(executeToolsTask);
897
916
  await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
898
917
  if (audioOutput) {
899
918
  await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
@@ -948,7 +967,7 @@ ${instructions}` : instructions,
948
967
  { speech_id: speechHandle.id, message: forwardedText },
949
968
  "playout completed with interrupt"
950
969
  );
951
- speechHandle._markPlayoutDone();
970
+ speechHandle._markGenerationDone();
952
971
  await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
953
972
  return;
954
973
  }
@@ -973,11 +992,11 @@ ${instructions}` : instructions,
973
992
  } else if (this.agentSession.agentState === "speaking") {
974
993
  this.agentSession._updateAgentState("listening");
975
994
  }
976
- speechHandle._markPlayoutDone();
995
+ speechHandle._markGenerationDone();
977
996
  await executeToolsTask.result;
978
997
  if (toolOutput.output.length === 0) return;
979
998
  const { maxToolSteps } = this.agentSession.options;
980
- if (speechHandle.stepIndex >= maxToolSteps) {
999
+ if (speechHandle.numSteps >= maxToolSteps) {
981
1000
  this.logger.warn(
982
1001
  { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
983
1002
  "maximum number of function calls steps reached"
@@ -1032,7 +1051,7 @@ ${instructions}` : instructions,
1032
1051
  chatCtx.insert(toolMessages);
1033
1052
  const handle = import_speech_handle.SpeechHandle.create({
1034
1053
  allowInterruptions: speechHandle.allowInterruptions,
1035
- stepIndex: speechHandle.stepIndex + 1,
1054
+ stepIndex: speechHandle._stepIndex + 1,
1036
1055
  parent: speechHandle
1037
1056
  });
1038
1057
  this.agentSession.emit(
@@ -1045,14 +1064,17 @@ ${instructions}` : instructions,
1045
1064
  );
1046
1065
  const respondToolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1047
1066
  const toolResponseTask = this.createSpeechTask({
1048
- promise: this.pipelineReplyTask(
1049
- handle,
1050
- chatCtx,
1051
- toolCtx,
1052
- { toolChoice: respondToolChoice },
1053
- instructions,
1054
- void 0,
1055
- toolMessages
1067
+ task: import_utils.Task.from(
1068
+ () => this.pipelineReplyTask(
1069
+ handle,
1070
+ chatCtx,
1071
+ toolCtx,
1072
+ { toolChoice: respondToolChoice },
1073
+ replyAbortController,
1074
+ instructions,
1075
+ void 0,
1076
+ toolMessages
1077
+ )
1056
1078
  ),
1057
1079
  ownedSpeechHandle: handle,
1058
1080
  name: "AgentActivity.pipelineReply"
@@ -1066,7 +1088,7 @@ ${instructions}` : instructions,
1066
1088
  this.agent._chatCtx.insert(toolMessages);
1067
1089
  }
1068
1090
  }
1069
- async realtimeGenerationTask(speechHandle, ev, modelSettings) {
1091
+ async realtimeGenerationTask(speechHandle, ev, modelSettings, replyAbortController) {
1070
1092
  var _a, _b, _c;
1071
1093
  speechHandleStorage.enterWith(speechHandle);
1072
1094
  if (!this.realtimeSession) {
@@ -1076,20 +1098,20 @@ ${instructions}` : instructions,
1076
1098
  throw new Error("llm is not a realtime model");
1077
1099
  }
1078
1100
  this.logger.debug(
1079
- { speech_id: speechHandle.id, stepIndex: speechHandle.stepIndex },
1101
+ { speech_id: speechHandle.id, stepIndex: speechHandle.numSteps },
1080
1102
  "realtime generation started"
1081
1103
  );
1082
1104
  const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
1083
1105
  const textOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
1084
1106
  const toolCtx = this.realtimeSession.tools;
1085
1107
  await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
1108
+ speechHandle._clearAuthorization();
1086
1109
  if (speechHandle.interrupted) {
1087
1110
  return;
1088
1111
  }
1089
1112
  const onFirstFrame = () => {
1090
1113
  this.agentSession._updateAgentState("speaking");
1091
1114
  };
1092
- const replyAbortController = new AbortController();
1093
1115
  const readMessages = async (abortController, outputs) => {
1094
1116
  const forwardTasks = [];
1095
1117
  try {
@@ -1173,9 +1195,13 @@ ${instructions}` : instructions,
1173
1195
  "AgentActivity.realtime_generation.read_tool_stream"
1174
1196
  )
1175
1197
  );
1176
- const onToolExecutionStarted = (_) => {
1198
+ const onToolExecutionStarted = (f) => {
1199
+ speechHandle._itemAdded([f]);
1177
1200
  };
1178
- const onToolExecutionCompleted = (_) => {
1201
+ const onToolExecutionCompleted = (out) => {
1202
+ if (out.toolCallOutput) {
1203
+ speechHandle._itemAdded([out.toolCallOutput]);
1204
+ }
1179
1205
  };
1180
1206
  const [executeToolsTask, toolOutput] = (0, import_generation.performToolExecutions)({
1181
1207
  session: this.agentSession,
@@ -1231,7 +1257,7 @@ ${instructions}` : instructions,
1231
1257
  interrupted: true
1232
1258
  });
1233
1259
  this.agent._chatCtx.insert(message);
1234
- speechHandle._setChatMessage(message);
1260
+ speechHandle._itemAdded([message]);
1235
1261
  this.agentSession._conversationItemAdded(message);
1236
1262
  }
1237
1263
  this.logger.info(
@@ -1239,7 +1265,7 @@ ${instructions}` : instructions,
1239
1265
  "playout completed with interrupt"
1240
1266
  );
1241
1267
  }
1242
- speechHandle._markPlayoutDone();
1268
+ speechHandle._markGenerationDone();
1243
1269
  await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1244
1270
  return;
1245
1271
  }
@@ -1252,17 +1278,17 @@ ${instructions}` : instructions,
1252
1278
  interrupted: false
1253
1279
  });
1254
1280
  this.agent._chatCtx.insert(message);
1255
- speechHandle._setChatMessage(message);
1281
+ speechHandle._itemAdded([message]);
1256
1282
  this.agentSession._conversationItemAdded(message);
1257
1283
  }
1258
- speechHandle._markPlayoutDone();
1284
+ speechHandle._markGenerationDone();
1259
1285
  toolOutput.firstToolStartedFuture.await.finally(() => {
1260
1286
  this.agentSession._updateAgentState("thinking");
1261
1287
  });
1262
1288
  await executeToolsTask.result;
1263
1289
  if (toolOutput.output.length === 0) return;
1264
1290
  const { maxToolSteps } = this.agentSession.options;
1265
- if (speechHandle.stepIndex >= maxToolSteps) {
1291
+ if (speechHandle.numSteps >= maxToolSteps) {
1266
1292
  this.logger.warn(
1267
1293
  { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
1268
1294
  "maximum number of function calls steps reached"
@@ -1326,7 +1352,7 @@ ${instructions}` : instructions,
1326
1352
  this.realtimeSession.interrupt();
1327
1353
  const replySpeechHandle = import_speech_handle.SpeechHandle.create({
1328
1354
  allowInterruptions: speechHandle.allowInterruptions,
1329
- stepIndex: speechHandle.stepIndex + 1,
1355
+ stepIndex: speechHandle.numSteps + 1,
1330
1356
  parent: speechHandle
1331
1357
  });
1332
1358
  this.agentSession.emit(
@@ -1339,10 +1365,13 @@ ${instructions}` : instructions,
1339
1365
  );
1340
1366
  const toolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1341
1367
  this.createSpeechTask({
1342
- promise: this.realtimeReplyTask({
1343
- speechHandle: replySpeechHandle,
1344
- modelSettings: { toolChoice }
1345
- }),
1368
+ task: import_utils.Task.from(
1369
+ (abortController) => this.realtimeReplyTask({
1370
+ speechHandle: replySpeechHandle,
1371
+ modelSettings: { toolChoice },
1372
+ abortController
1373
+ })
1374
+ ),
1346
1375
  ownedSpeechHandle: replySpeechHandle,
1347
1376
  name: "AgentActivity.realtime_reply"
1348
1377
  });
@@ -1352,7 +1381,8 @@ ${instructions}` : instructions,
1352
1381
  speechHandle,
1353
1382
  modelSettings: { toolChoice },
1354
1383
  userInput,
1355
- instructions
1384
+ instructions,
1385
+ abortController
1356
1386
  }) {
1357
1387
  speechHandleStorage.enterWith(speechHandle);
1358
1388
  if (!this.realtimeSession) {
@@ -1375,18 +1405,24 @@ ${instructions}` : instructions,
1375
1405
  }
1376
1406
  try {
1377
1407
  const generationEvent = await this.realtimeSession.generateReply(instructions);
1378
- await this.realtimeGenerationTask(speechHandle, generationEvent, { toolChoice });
1408
+ await this.realtimeGenerationTask(
1409
+ speechHandle,
1410
+ generationEvent,
1411
+ { toolChoice },
1412
+ abortController
1413
+ );
1379
1414
  } finally {
1380
1415
  if (toolChoice !== void 0 && toolChoice !== originalToolChoice) {
1381
1416
  this.realtimeSession.updateOptions({ toolChoice: originalToolChoice });
1382
1417
  }
1383
1418
  }
1384
1419
  }
1385
- scheduleSpeech(speechHandle, priority, bypassDraining = false) {
1386
- if (this.draining && !bypassDraining) {
1420
+ scheduleSpeech(speechHandle, priority, force = false) {
1421
+ if (this.draining && !force) {
1387
1422
  throw new Error("cannot schedule new speech, the agent is draining");
1388
1423
  }
1389
1424
  this.speechQueue.push([priority, Number(process.hrtime.bigint()), speechHandle]);
1425
+ speechHandle._markScheduled();
1390
1426
  this.wakeupMainTask();
1391
1427
  }
1392
1428
  async drain() {
@@ -1395,7 +1431,7 @@ ${instructions}` : instructions,
1395
1431
  try {
1396
1432
  if (this._draining) return;
1397
1433
  this.createSpeechTask({
1398
- promise: this.agent.onExit(),
1434
+ task: import_utils.Task.from(() => this.agent.onExit()),
1399
1435
  name: "AgentActivity_onExit"
1400
1436
  });
1401
1437
  this.wakeupMainTask();