@livekit/agents 1.0.15 → 1.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/dist/cli.cjs +12 -12
  2. package/dist/cli.cjs.map +1 -1
  3. package/dist/cli.d.cts +3 -3
  4. package/dist/cli.d.ts +3 -3
  5. package/dist/cli.d.ts.map +1 -1
  6. package/dist/cli.js +13 -13
  7. package/dist/cli.js.map +1 -1
  8. package/dist/inference/stt.cjs.map +1 -1
  9. package/dist/inference/stt.d.ts.map +1 -1
  10. package/dist/inference/stt.js +1 -1
  11. package/dist/inference/stt.js.map +1 -1
  12. package/dist/inference/tts.cjs.map +1 -1
  13. package/dist/inference/tts.d.cts +2 -1
  14. package/dist/inference/tts.d.ts +2 -1
  15. package/dist/inference/tts.d.ts.map +1 -1
  16. package/dist/inference/tts.js +1 -5
  17. package/dist/inference/tts.js.map +1 -1
  18. package/dist/llm/chat_context.cjs +78 -0
  19. package/dist/llm/chat_context.cjs.map +1 -1
  20. package/dist/llm/chat_context.d.cts +16 -0
  21. package/dist/llm/chat_context.d.ts +16 -0
  22. package/dist/llm/chat_context.d.ts.map +1 -1
  23. package/dist/llm/chat_context.js +78 -0
  24. package/dist/llm/chat_context.js.map +1 -1
  25. package/dist/llm/chat_context.test.cjs +531 -0
  26. package/dist/llm/chat_context.test.cjs.map +1 -1
  27. package/dist/llm/chat_context.test.js +531 -0
  28. package/dist/llm/chat_context.test.js.map +1 -1
  29. package/dist/llm/tool_context.cjs +40 -0
  30. package/dist/llm/tool_context.cjs.map +1 -1
  31. package/dist/llm/tool_context.d.cts +2 -0
  32. package/dist/llm/tool_context.d.ts +2 -0
  33. package/dist/llm/tool_context.d.ts.map +1 -1
  34. package/dist/llm/tool_context.js +38 -0
  35. package/dist/llm/tool_context.js.map +1 -1
  36. package/dist/metrics/base.cjs.map +1 -1
  37. package/dist/metrics/base.d.cts +7 -0
  38. package/dist/metrics/base.d.ts +7 -0
  39. package/dist/metrics/base.d.ts.map +1 -1
  40. package/dist/stt/stt.cjs +1 -1
  41. package/dist/stt/stt.cjs.map +1 -1
  42. package/dist/stt/stt.d.cts +7 -1
  43. package/dist/stt/stt.d.ts +7 -1
  44. package/dist/stt/stt.d.ts.map +1 -1
  45. package/dist/stt/stt.js +1 -1
  46. package/dist/stt/stt.js.map +1 -1
  47. package/dist/tts/tts.cjs +2 -4
  48. package/dist/tts/tts.cjs.map +1 -1
  49. package/dist/tts/tts.d.ts.map +1 -1
  50. package/dist/tts/tts.js +3 -5
  51. package/dist/tts/tts.js.map +1 -1
  52. package/dist/voice/agent_activity.cjs +83 -8
  53. package/dist/voice/agent_activity.cjs.map +1 -1
  54. package/dist/voice/agent_activity.d.cts +6 -2
  55. package/dist/voice/agent_activity.d.ts +6 -2
  56. package/dist/voice/agent_activity.d.ts.map +1 -1
  57. package/dist/voice/agent_activity.js +83 -8
  58. package/dist/voice/agent_activity.js.map +1 -1
  59. package/dist/voice/agent_session.cjs +3 -2
  60. package/dist/voice/agent_session.cjs.map +1 -1
  61. package/dist/voice/agent_session.d.cts +2 -1
  62. package/dist/voice/agent_session.d.ts +2 -1
  63. package/dist/voice/agent_session.d.ts.map +1 -1
  64. package/dist/voice/agent_session.js +3 -2
  65. package/dist/voice/agent_session.js.map +1 -1
  66. package/dist/voice/audio_recognition.cjs +138 -16
  67. package/dist/voice/audio_recognition.cjs.map +1 -1
  68. package/dist/voice/audio_recognition.d.cts +11 -0
  69. package/dist/voice/audio_recognition.d.ts +11 -0
  70. package/dist/voice/audio_recognition.d.ts.map +1 -1
  71. package/dist/voice/audio_recognition.js +138 -16
  72. package/dist/voice/audio_recognition.js.map +1 -1
  73. package/dist/voice/room_io/_input.cjs.map +1 -1
  74. package/dist/voice/room_io/_input.d.ts.map +1 -1
  75. package/dist/voice/room_io/_input.js +0 -1
  76. package/dist/voice/room_io/_input.js.map +1 -1
  77. package/dist/worker.cjs +17 -11
  78. package/dist/worker.cjs.map +1 -1
  79. package/dist/worker.d.cts +16 -9
  80. package/dist/worker.d.ts +16 -9
  81. package/dist/worker.d.ts.map +1 -1
  82. package/dist/worker.js +16 -12
  83. package/dist/worker.js.map +1 -1
  84. package/package.json +1 -1
  85. package/src/cli.ts +17 -17
  86. package/src/inference/stt.ts +2 -1
  87. package/src/inference/tts.ts +2 -5
  88. package/src/llm/chat_context.test.ts +607 -0
  89. package/src/llm/chat_context.ts +106 -0
  90. package/src/llm/tool_context.ts +44 -0
  91. package/src/metrics/base.ts +7 -0
  92. package/src/stt/stt.ts +8 -1
  93. package/src/tts/tts.ts +7 -5
  94. package/src/voice/agent_activity.ts +119 -9
  95. package/src/voice/agent_session.ts +3 -1
  96. package/src/voice/audio_recognition.ts +235 -57
  97. package/src/voice/room_io/_input.ts +1 -1
  98. package/src/worker.ts +29 -18
@@ -27,6 +27,7 @@ var import_node_async_hooks = require("node:async_hooks");
27
27
  var import_web = require("node:stream/web");
28
28
  var import_chat_context = require("../llm/chat_context.cjs");
29
29
  var import_llm = require("../llm/index.cjs");
30
+ var import_tool_context = require("../llm/tool_context.cjs");
30
31
  var import_log = require("../log.cjs");
31
32
  var import_deferred_stream = require("../stream/deferred_stream.cjs");
32
33
  var import_stt = require("../stt/stt.cjs");
@@ -58,6 +59,7 @@ class AgentActivity {
58
59
  audioStream = new import_deferred_stream.DeferredReadableStream();
59
60
  // default to null as None, which maps to the default provider tool choice value
60
61
  toolChoice = null;
62
+ _preemptiveGeneration;
61
63
  agent;
62
64
  agentSession;
63
65
  /** @internal */
@@ -430,8 +432,12 @@ class AgentActivity {
430
432
  onStartOfSpeech(_ev) {
431
433
  this.agentSession._updateUserState("speaking");
432
434
  }
433
- onEndOfSpeech(_ev) {
434
- this.agentSession._updateUserState("listening");
435
+ onEndOfSpeech(ev) {
436
+ let speechEndTime = Date.now();
437
+ if (ev) {
438
+ speechEndTime = speechEndTime - ev.silenceDuration;
439
+ }
440
+ this.agentSession._updateUserState("listening", speechEndTime);
435
441
  }
436
442
  onVADInferenceDone(ev) {
437
443
  var _a, _b;
@@ -485,6 +491,44 @@ class AgentActivity {
485
491
  })
486
492
  );
487
493
  }
494
+ onPreemptiveGeneration(info) {
495
+ if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof import_llm.LLM)) {
496
+ return;
497
+ }
498
+ this.cancelPreemptiveGeneration();
499
+ this.logger.info(
500
+ {
501
+ newTranscript: info.newTranscript,
502
+ transcriptConfidence: info.transcriptConfidence
503
+ },
504
+ "starting preemptive generation"
505
+ );
506
+ const userMessage = import_chat_context.ChatMessage.create({
507
+ role: "user",
508
+ content: info.newTranscript
509
+ });
510
+ const chatCtx = this.agent.chatCtx.copy();
511
+ const speechHandle = this.generateReply({
512
+ userMessage,
513
+ chatCtx,
514
+ scheduleSpeech: false
515
+ });
516
+ this._preemptiveGeneration = {
517
+ speechHandle,
518
+ userMessage,
519
+ info,
520
+ chatCtx: chatCtx.copy(),
521
+ tools: { ...this.tools },
522
+ toolChoice: this.toolChoice,
523
+ createdAt: Date.now()
524
+ };
525
+ }
526
+ cancelPreemptiveGeneration() {
527
+ if (this._preemptiveGeneration !== void 0) {
528
+ this._preemptiveGeneration.speechHandle._cancel();
529
+ this._preemptiveGeneration = void 0;
530
+ }
531
+ }
488
532
  createSpeechTask(options) {
489
533
  const { task, ownedSpeechHandle } = options;
490
534
  this.speechTasks.add(task);
@@ -506,10 +550,12 @@ class AgentActivity {
506
550
  }
507
551
  async onEndOfTurn(info) {
508
552
  if (this.draining) {
553
+ this.cancelPreemptiveGeneration();
509
554
  this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
510
555
  return true;
511
556
  }
512
557
  if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0 && info.newTranscript.split(" ").length < this.agentSession.options.minInterruptionWords) {
558
+ this.cancelPreemptiveGeneration();
513
559
  this.logger.info("skipping user input, new_transcript is too short");
514
560
  return false;
515
561
  }
@@ -563,7 +609,8 @@ class AgentActivity {
563
609
  chatCtx,
564
610
  instructions: defaultInstructions,
565
611
  toolChoice: defaultToolChoice,
566
- allowInterruptions: defaultAllowInterruptions
612
+ allowInterruptions: defaultAllowInterruptions,
613
+ scheduleSpeech = true
567
614
  } = options;
568
615
  let instructions = defaultInstructions;
569
616
  let toolChoice = defaultToolChoice;
@@ -636,7 +683,9 @@ ${instructions}` : instructions,
636
683
  });
637
684
  task.finally(() => this.onPipelineReplyDone());
638
685
  }
639
- this.scheduleSpeech(handle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
686
+ if (scheduleSpeech) {
687
+ this.scheduleSpeech(handle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
688
+ }
640
689
  return handle;
641
690
  }
642
691
  interrupt() {
@@ -709,13 +758,36 @@ ${instructions}` : instructions,
709
758
  } else if (this.llm === void 0) {
710
759
  return;
711
760
  }
712
- const speechHandle = this.generateReply({ userMessage, chatCtx });
761
+ let speechHandle;
762
+ if (this._preemptiveGeneration !== void 0) {
763
+ const preemptive = this._preemptiveGeneration;
764
+ if (preemptive.info.newTranscript === (userMessage == null ? void 0 : userMessage.textContent) && preemptive.chatCtx.isEquivalent(chatCtx) && (0, import_tool_context.isSameToolContext)(preemptive.tools, this.tools) && (0, import_tool_context.isSameToolChoice)(preemptive.toolChoice, this.toolChoice)) {
765
+ speechHandle = preemptive.speechHandle;
766
+ this.scheduleSpeech(speechHandle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
767
+ this.logger.debug(
768
+ {
769
+ preemptiveLeadTime: Date.now() - preemptive.createdAt
770
+ },
771
+ "using preemptive generation"
772
+ );
773
+ } else {
774
+ this.logger.warn(
775
+ "preemptive generation enabled but chat context or tools have changed after `onUserTurnCompleted`"
776
+ );
777
+ preemptive.speechHandle._cancel();
778
+ }
779
+ this._preemptiveGeneration = void 0;
780
+ }
781
+ if (speechHandle === void 0) {
782
+ speechHandle = this.generateReply({ userMessage, chatCtx });
783
+ }
713
784
  const eouMetrics = {
714
785
  type: "eou_metrics",
715
786
  timestamp: Date.now(),
716
787
  endOfUtteranceDelayMs: info.endOfUtteranceDelay,
717
788
  transcriptionDelayMs: info.transcriptionDelay,
718
789
  onUserTurnCompletedDelayMs: callbackDuration,
790
+ lastSpeakingTimeMs: info.stoppedSpeakingAt ?? 0,
719
791
  speechId: speechHandle.id
720
792
  };
721
793
  this.agentSession.emit(
@@ -823,8 +895,6 @@ ${instructions}` : instructions,
823
895
  chatCtx = chatCtx.copy();
824
896
  if (newMessage) {
825
897
  chatCtx.insert(newMessage);
826
- this.agent._chatCtx.insert(newMessage);
827
- this.agentSession._conversationItemAdded(newMessage);
828
898
  }
829
899
  if (instructions) {
830
900
  try {
@@ -837,7 +907,6 @@ ${instructions}` : instructions,
837
907
  this.logger.error({ error: e }, "error occurred during updateInstructions");
838
908
  }
839
909
  }
840
- this.agentSession._updateAgentState("thinking");
841
910
  const tasks = [];
842
911
  const [llmTask, llmGenData] = (0, import_generation.performLLMInference)(
843
912
  // preserve `this` context in llmNode
@@ -861,6 +930,10 @@ ${instructions}` : instructions,
861
930
  tasks.push(ttsTask);
862
931
  }
863
932
  await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
933
+ if (newMessage && speechHandle.scheduled) {
934
+ this.agent._chatCtx.insert(newMessage);
935
+ this.agentSession._conversationItemAdded(newMessage);
936
+ }
864
937
  if (speechHandle.interrupted) {
865
938
  replyAbortController.abort();
866
939
  await (0, import_utils.cancelAndWait)(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
@@ -1442,6 +1515,7 @@ ${instructions}` : instructions,
1442
1515
  const unlock = await this.lock.lock();
1443
1516
  try {
1444
1517
  if (this._draining) return;
1518
+ this.cancelPreemptiveGeneration();
1445
1519
  this.createSpeechTask({
1446
1520
  task: import_utils.Task.from(() => this.agent.onExit()),
1447
1521
  name: "AgentActivity_onExit"
@@ -1460,6 +1534,7 @@ ${instructions}` : instructions,
1460
1534
  if (!this._draining) {
1461
1535
  this.logger.warn("task closing without draining");
1462
1536
  }
1537
+ this.cancelPreemptiveGeneration();
1463
1538
  if (this.llm instanceof import_llm.LLM) {
1464
1539
  this.llm.off("metrics_collected", this.onMetricsCollected);
1465
1540
  }