@livekit/agents 1.0.15 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/dist/cli.cjs +12 -12
  2. package/dist/cli.cjs.map +1 -1
  3. package/dist/cli.d.cts +3 -3
  4. package/dist/cli.d.ts +3 -3
  5. package/dist/cli.d.ts.map +1 -1
  6. package/dist/cli.js +13 -13
  7. package/dist/cli.js.map +1 -1
  8. package/dist/inference/stt.cjs.map +1 -1
  9. package/dist/inference/stt.d.ts.map +1 -1
  10. package/dist/inference/stt.js +1 -1
  11. package/dist/inference/stt.js.map +1 -1
  12. package/dist/inference/tts.cjs.map +1 -1
  13. package/dist/inference/tts.d.cts +2 -1
  14. package/dist/inference/tts.d.ts +2 -1
  15. package/dist/inference/tts.d.ts.map +1 -1
  16. package/dist/inference/tts.js +1 -5
  17. package/dist/inference/tts.js.map +1 -1
  18. package/dist/llm/chat_context.cjs +78 -0
  19. package/dist/llm/chat_context.cjs.map +1 -1
  20. package/dist/llm/chat_context.d.cts +16 -0
  21. package/dist/llm/chat_context.d.ts +16 -0
  22. package/dist/llm/chat_context.d.ts.map +1 -1
  23. package/dist/llm/chat_context.js +78 -0
  24. package/dist/llm/chat_context.js.map +1 -1
  25. package/dist/llm/chat_context.test.cjs +531 -0
  26. package/dist/llm/chat_context.test.cjs.map +1 -1
  27. package/dist/llm/chat_context.test.js +531 -0
  28. package/dist/llm/chat_context.test.js.map +1 -1
  29. package/dist/llm/tool_context.cjs +40 -0
  30. package/dist/llm/tool_context.cjs.map +1 -1
  31. package/dist/llm/tool_context.d.cts +2 -0
  32. package/dist/llm/tool_context.d.ts +2 -0
  33. package/dist/llm/tool_context.d.ts.map +1 -1
  34. package/dist/llm/tool_context.js +38 -0
  35. package/dist/llm/tool_context.js.map +1 -1
  36. package/dist/metrics/base.cjs.map +1 -1
  37. package/dist/metrics/base.d.cts +7 -0
  38. package/dist/metrics/base.d.ts +7 -0
  39. package/dist/metrics/base.d.ts.map +1 -1
  40. package/dist/stt/stt.cjs +1 -0
  41. package/dist/stt/stt.cjs.map +1 -1
  42. package/dist/stt/stt.d.cts +7 -1
  43. package/dist/stt/stt.d.ts +7 -1
  44. package/dist/stt/stt.d.ts.map +1 -1
  45. package/dist/stt/stt.js +1 -0
  46. package/dist/stt/stt.js.map +1 -1
  47. package/dist/voice/agent_activity.cjs +83 -8
  48. package/dist/voice/agent_activity.cjs.map +1 -1
  49. package/dist/voice/agent_activity.d.cts +6 -2
  50. package/dist/voice/agent_activity.d.ts +6 -2
  51. package/dist/voice/agent_activity.d.ts.map +1 -1
  52. package/dist/voice/agent_activity.js +83 -8
  53. package/dist/voice/agent_activity.js.map +1 -1
  54. package/dist/voice/agent_session.cjs +3 -2
  55. package/dist/voice/agent_session.cjs.map +1 -1
  56. package/dist/voice/agent_session.d.cts +2 -1
  57. package/dist/voice/agent_session.d.ts +2 -1
  58. package/dist/voice/agent_session.d.ts.map +1 -1
  59. package/dist/voice/agent_session.js +3 -2
  60. package/dist/voice/agent_session.js.map +1 -1
  61. package/dist/voice/audio_recognition.cjs +138 -16
  62. package/dist/voice/audio_recognition.cjs.map +1 -1
  63. package/dist/voice/audio_recognition.d.cts +11 -0
  64. package/dist/voice/audio_recognition.d.ts +11 -0
  65. package/dist/voice/audio_recognition.d.ts.map +1 -1
  66. package/dist/voice/audio_recognition.js +138 -16
  67. package/dist/voice/audio_recognition.js.map +1 -1
  68. package/dist/voice/room_io/_input.cjs.map +1 -1
  69. package/dist/voice/room_io/_input.d.ts.map +1 -1
  70. package/dist/voice/room_io/_input.js +0 -1
  71. package/dist/voice/room_io/_input.js.map +1 -1
  72. package/dist/worker.cjs +17 -11
  73. package/dist/worker.cjs.map +1 -1
  74. package/dist/worker.d.cts +16 -9
  75. package/dist/worker.d.ts +16 -9
  76. package/dist/worker.d.ts.map +1 -1
  77. package/dist/worker.js +16 -12
  78. package/dist/worker.js.map +1 -1
  79. package/package.json +1 -1
  80. package/src/cli.ts +17 -17
  81. package/src/inference/stt.ts +2 -1
  82. package/src/inference/tts.ts +2 -5
  83. package/src/llm/chat_context.test.ts +607 -0
  84. package/src/llm/chat_context.ts +106 -0
  85. package/src/llm/tool_context.ts +44 -0
  86. package/src/metrics/base.ts +7 -0
  87. package/src/stt/stt.ts +6 -0
  88. package/src/voice/agent_activity.ts +119 -9
  89. package/src/voice/agent_session.ts +3 -1
  90. package/src/voice/audio_recognition.ts +235 -57
  91. package/src/voice/room_io/_input.ts +1 -1
  92. package/src/worker.ts +29 -18
@@ -7,6 +7,7 @@ import {
7
7
  LLM,
8
8
  RealtimeModel
9
9
  } from "../llm/index.js";
10
+ import { isSameToolChoice, isSameToolContext } from "../llm/tool_context.js";
10
11
  import { log } from "../log.js";
11
12
  import { DeferredReadableStream } from "../stream/deferred_stream.js";
12
13
  import { STT } from "../stt/stt.js";
@@ -55,6 +56,7 @@ class AgentActivity {
55
56
  audioStream = new DeferredReadableStream();
56
57
  // default to null as None, which maps to the default provider tool choice value
57
58
  toolChoice = null;
59
+ _preemptiveGeneration;
58
60
  agent;
59
61
  agentSession;
60
62
  /** @internal */
@@ -427,8 +429,12 @@ class AgentActivity {
427
429
  onStartOfSpeech(_ev) {
428
430
  this.agentSession._updateUserState("speaking");
429
431
  }
430
- onEndOfSpeech(_ev) {
431
- this.agentSession._updateUserState("listening");
432
+ onEndOfSpeech(ev) {
433
+ let speechEndTime = Date.now();
434
+ if (ev) {
435
+ speechEndTime = speechEndTime - ev.silenceDuration;
436
+ }
437
+ this.agentSession._updateUserState("listening", speechEndTime);
432
438
  }
433
439
  onVADInferenceDone(ev) {
434
440
  var _a, _b;
@@ -482,6 +488,44 @@ class AgentActivity {
482
488
  })
483
489
  );
484
490
  }
491
+ onPreemptiveGeneration(info) {
492
+ if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof LLM)) {
493
+ return;
494
+ }
495
+ this.cancelPreemptiveGeneration();
496
+ this.logger.info(
497
+ {
498
+ newTranscript: info.newTranscript,
499
+ transcriptConfidence: info.transcriptConfidence
500
+ },
501
+ "starting preemptive generation"
502
+ );
503
+ const userMessage = ChatMessage.create({
504
+ role: "user",
505
+ content: info.newTranscript
506
+ });
507
+ const chatCtx = this.agent.chatCtx.copy();
508
+ const speechHandle = this.generateReply({
509
+ userMessage,
510
+ chatCtx,
511
+ scheduleSpeech: false
512
+ });
513
+ this._preemptiveGeneration = {
514
+ speechHandle,
515
+ userMessage,
516
+ info,
517
+ chatCtx: chatCtx.copy(),
518
+ tools: { ...this.tools },
519
+ toolChoice: this.toolChoice,
520
+ createdAt: Date.now()
521
+ };
522
+ }
523
+ cancelPreemptiveGeneration() {
524
+ if (this._preemptiveGeneration !== void 0) {
525
+ this._preemptiveGeneration.speechHandle._cancel();
526
+ this._preemptiveGeneration = void 0;
527
+ }
528
+ }
485
529
  createSpeechTask(options) {
486
530
  const { task, ownedSpeechHandle } = options;
487
531
  this.speechTasks.add(task);
@@ -503,10 +547,12 @@ class AgentActivity {
503
547
  }
504
548
  async onEndOfTurn(info) {
505
549
  if (this.draining) {
550
+ this.cancelPreemptiveGeneration();
506
551
  this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
507
552
  return true;
508
553
  }
509
554
  if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0 && info.newTranscript.split(" ").length < this.agentSession.options.minInterruptionWords) {
555
+ this.cancelPreemptiveGeneration();
510
556
  this.logger.info("skipping user input, new_transcript is too short");
511
557
  return false;
512
558
  }
@@ -560,7 +606,8 @@ class AgentActivity {
560
606
  chatCtx,
561
607
  instructions: defaultInstructions,
562
608
  toolChoice: defaultToolChoice,
563
- allowInterruptions: defaultAllowInterruptions
609
+ allowInterruptions: defaultAllowInterruptions,
610
+ scheduleSpeech = true
564
611
  } = options;
565
612
  let instructions = defaultInstructions;
566
613
  let toolChoice = defaultToolChoice;
@@ -633,7 +680,9 @@ ${instructions}` : instructions,
633
680
  });
634
681
  task.finally(() => this.onPipelineReplyDone());
635
682
  }
636
- this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
683
+ if (scheduleSpeech) {
684
+ this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
685
+ }
637
686
  return handle;
638
687
  }
639
688
  interrupt() {
@@ -706,13 +755,36 @@ ${instructions}` : instructions,
706
755
  } else if (this.llm === void 0) {
707
756
  return;
708
757
  }
709
- const speechHandle = this.generateReply({ userMessage, chatCtx });
758
+ let speechHandle;
759
+ if (this._preemptiveGeneration !== void 0) {
760
+ const preemptive = this._preemptiveGeneration;
761
+ if (preemptive.info.newTranscript === (userMessage == null ? void 0 : userMessage.textContent) && preemptive.chatCtx.isEquivalent(chatCtx) && isSameToolContext(preemptive.tools, this.tools) && isSameToolChoice(preemptive.toolChoice, this.toolChoice)) {
762
+ speechHandle = preemptive.speechHandle;
763
+ this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
764
+ this.logger.debug(
765
+ {
766
+ preemptiveLeadTime: Date.now() - preemptive.createdAt
767
+ },
768
+ "using preemptive generation"
769
+ );
770
+ } else {
771
+ this.logger.warn(
772
+ "preemptive generation enabled but chat context or tools have changed after `onUserTurnCompleted`"
773
+ );
774
+ preemptive.speechHandle._cancel();
775
+ }
776
+ this._preemptiveGeneration = void 0;
777
+ }
778
+ if (speechHandle === void 0) {
779
+ speechHandle = this.generateReply({ userMessage, chatCtx });
780
+ }
710
781
  const eouMetrics = {
711
782
  type: "eou_metrics",
712
783
  timestamp: Date.now(),
713
784
  endOfUtteranceDelayMs: info.endOfUtteranceDelay,
714
785
  transcriptionDelayMs: info.transcriptionDelay,
715
786
  onUserTurnCompletedDelayMs: callbackDuration,
787
+ lastSpeakingTimeMs: info.stoppedSpeakingAt ?? 0,
716
788
  speechId: speechHandle.id
717
789
  };
718
790
  this.agentSession.emit(
@@ -820,8 +892,6 @@ ${instructions}` : instructions,
820
892
  chatCtx = chatCtx.copy();
821
893
  if (newMessage) {
822
894
  chatCtx.insert(newMessage);
823
- this.agent._chatCtx.insert(newMessage);
824
- this.agentSession._conversationItemAdded(newMessage);
825
895
  }
826
896
  if (instructions) {
827
897
  try {
@@ -834,7 +904,6 @@ ${instructions}` : instructions,
834
904
  this.logger.error({ error: e }, "error occurred during updateInstructions");
835
905
  }
836
906
  }
837
- this.agentSession._updateAgentState("thinking");
838
907
  const tasks = [];
839
908
  const [llmTask, llmGenData] = performLLMInference(
840
909
  // preserve `this` context in llmNode
@@ -858,6 +927,10 @@ ${instructions}` : instructions,
858
927
  tasks.push(ttsTask);
859
928
  }
860
929
  await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
930
+ if (newMessage && speechHandle.scheduled) {
931
+ this.agent._chatCtx.insert(newMessage);
932
+ this.agentSession._conversationItemAdded(newMessage);
933
+ }
861
934
  if (speechHandle.interrupted) {
862
935
  replyAbortController.abort();
863
936
  await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
@@ -1439,6 +1512,7 @@ ${instructions}` : instructions,
1439
1512
  const unlock = await this.lock.lock();
1440
1513
  try {
1441
1514
  if (this._draining) return;
1515
+ this.cancelPreemptiveGeneration();
1442
1516
  this.createSpeechTask({
1443
1517
  task: Task.from(() => this.agent.onExit()),
1444
1518
  name: "AgentActivity_onExit"
@@ -1457,6 +1531,7 @@ ${instructions}` : instructions,
1457
1531
  if (!this._draining) {
1458
1532
  this.logger.warn("task closing without draining");
1459
1533
  }
1534
+ this.cancelPreemptiveGeneration();
1460
1535
  if (this.llm instanceof LLM) {
1461
1536
  this.llm.off("metrics_collected", this.onMetricsCollected);
1462
1537
  }