@livekit/agents 1.0.17 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/dist/index.cjs +3 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +2 -1
  4. package/dist/index.d.ts +2 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +2 -0
  7. package/dist/index.js.map +1 -1
  8. package/dist/inference/api_protos.d.cts +12 -12
  9. package/dist/inference/api_protos.d.ts +12 -12
  10. package/dist/inference/llm.cjs +35 -13
  11. package/dist/inference/llm.cjs.map +1 -1
  12. package/dist/inference/llm.d.cts +10 -5
  13. package/dist/inference/llm.d.ts +10 -5
  14. package/dist/inference/llm.d.ts.map +1 -1
  15. package/dist/inference/llm.js +35 -13
  16. package/dist/inference/llm.js.map +1 -1
  17. package/dist/inference/tts.cjs +1 -1
  18. package/dist/inference/tts.cjs.map +1 -1
  19. package/dist/inference/tts.js +1 -1
  20. package/dist/inference/tts.js.map +1 -1
  21. package/dist/ipc/job_proc_lazy_main.cjs +6 -2
  22. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  23. package/dist/ipc/job_proc_lazy_main.js +6 -2
  24. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  25. package/dist/job.cjs +31 -0
  26. package/dist/job.cjs.map +1 -1
  27. package/dist/job.d.cts +6 -0
  28. package/dist/job.d.ts +6 -0
  29. package/dist/job.d.ts.map +1 -1
  30. package/dist/job.js +31 -0
  31. package/dist/job.js.map +1 -1
  32. package/dist/llm/chat_context.cjs +33 -0
  33. package/dist/llm/chat_context.cjs.map +1 -1
  34. package/dist/llm/chat_context.d.cts +22 -2
  35. package/dist/llm/chat_context.d.ts +22 -2
  36. package/dist/llm/chat_context.d.ts.map +1 -1
  37. package/dist/llm/chat_context.js +32 -0
  38. package/dist/llm/chat_context.js.map +1 -1
  39. package/dist/llm/index.cjs +2 -0
  40. package/dist/llm/index.cjs.map +1 -1
  41. package/dist/llm/index.d.cts +1 -1
  42. package/dist/llm/index.d.ts +1 -1
  43. package/dist/llm/index.d.ts.map +1 -1
  44. package/dist/llm/index.js +2 -0
  45. package/dist/llm/index.js.map +1 -1
  46. package/dist/llm/llm.cjs.map +1 -1
  47. package/dist/llm/llm.d.cts +1 -1
  48. package/dist/llm/llm.d.ts +1 -1
  49. package/dist/llm/llm.d.ts.map +1 -1
  50. package/dist/llm/llm.js.map +1 -1
  51. package/dist/llm/provider_format/google.cjs.map +1 -1
  52. package/dist/llm/provider_format/google.d.cts +1 -1
  53. package/dist/llm/provider_format/google.d.ts +1 -1
  54. package/dist/llm/provider_format/google.d.ts.map +1 -1
  55. package/dist/llm/provider_format/google.js.map +1 -1
  56. package/dist/llm/provider_format/google.test.cjs +48 -0
  57. package/dist/llm/provider_format/google.test.cjs.map +1 -1
  58. package/dist/llm/provider_format/google.test.js +54 -1
  59. package/dist/llm/provider_format/google.test.js.map +1 -1
  60. package/dist/llm/provider_format/index.d.cts +1 -1
  61. package/dist/llm/provider_format/index.d.ts +1 -1
  62. package/dist/llm/provider_format/index.d.ts.map +1 -1
  63. package/dist/llm/provider_format/openai.cjs +1 -2
  64. package/dist/llm/provider_format/openai.cjs.map +1 -1
  65. package/dist/llm/provider_format/openai.js +1 -2
  66. package/dist/llm/provider_format/openai.js.map +1 -1
  67. package/dist/llm/provider_format/openai.test.cjs +32 -0
  68. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  69. package/dist/llm/provider_format/openai.test.js +38 -1
  70. package/dist/llm/provider_format/openai.test.js.map +1 -1
  71. package/dist/llm/realtime.cjs.map +1 -1
  72. package/dist/llm/realtime.d.cts +4 -0
  73. package/dist/llm/realtime.d.ts +4 -0
  74. package/dist/llm/realtime.d.ts.map +1 -1
  75. package/dist/llm/realtime.js.map +1 -1
  76. package/dist/llm/utils.cjs +2 -2
  77. package/dist/llm/utils.cjs.map +1 -1
  78. package/dist/llm/utils.d.cts +1 -1
  79. package/dist/llm/utils.d.ts +1 -1
  80. package/dist/llm/utils.d.ts.map +1 -1
  81. package/dist/llm/utils.js +2 -2
  82. package/dist/llm/utils.js.map +1 -1
  83. package/dist/llm/zod-utils.cjs +6 -3
  84. package/dist/llm/zod-utils.cjs.map +1 -1
  85. package/dist/llm/zod-utils.d.cts +1 -1
  86. package/dist/llm/zod-utils.d.ts +1 -1
  87. package/dist/llm/zod-utils.d.ts.map +1 -1
  88. package/dist/llm/zod-utils.js +6 -3
  89. package/dist/llm/zod-utils.js.map +1 -1
  90. package/dist/llm/zod-utils.test.cjs +83 -0
  91. package/dist/llm/zod-utils.test.cjs.map +1 -1
  92. package/dist/llm/zod-utils.test.js +83 -0
  93. package/dist/llm/zod-utils.test.js.map +1 -1
  94. package/dist/log.cjs.map +1 -1
  95. package/dist/log.d.ts.map +1 -1
  96. package/dist/log.js.map +1 -1
  97. package/dist/telemetry/index.cjs +51 -0
  98. package/dist/telemetry/index.cjs.map +1 -0
  99. package/dist/telemetry/index.d.cts +4 -0
  100. package/dist/telemetry/index.d.ts +4 -0
  101. package/dist/telemetry/index.d.ts.map +1 -0
  102. package/dist/telemetry/index.js +12 -0
  103. package/dist/telemetry/index.js.map +1 -0
  104. package/dist/telemetry/trace_types.cjs +191 -0
  105. package/dist/telemetry/trace_types.cjs.map +1 -0
  106. package/dist/telemetry/trace_types.d.cts +56 -0
  107. package/dist/telemetry/trace_types.d.ts +56 -0
  108. package/dist/telemetry/trace_types.d.ts.map +1 -0
  109. package/dist/telemetry/trace_types.js +113 -0
  110. package/dist/telemetry/trace_types.js.map +1 -0
  111. package/dist/telemetry/traces.cjs +196 -0
  112. package/dist/telemetry/traces.cjs.map +1 -0
  113. package/dist/telemetry/traces.d.cts +97 -0
  114. package/dist/telemetry/traces.d.ts +97 -0
  115. package/dist/telemetry/traces.d.ts.map +1 -0
  116. package/dist/telemetry/traces.js +173 -0
  117. package/dist/telemetry/traces.js.map +1 -0
  118. package/dist/telemetry/utils.cjs +86 -0
  119. package/dist/telemetry/utils.cjs.map +1 -0
  120. package/dist/telemetry/utils.d.cts +5 -0
  121. package/dist/telemetry/utils.d.ts +5 -0
  122. package/dist/telemetry/utils.d.ts.map +1 -0
  123. package/dist/telemetry/utils.js +51 -0
  124. package/dist/telemetry/utils.js.map +1 -0
  125. package/dist/tts/tts.cjs.map +1 -1
  126. package/dist/tts/tts.d.ts.map +1 -1
  127. package/dist/tts/tts.js.map +1 -1
  128. package/dist/utils.cjs.map +1 -1
  129. package/dist/utils.d.cts +7 -0
  130. package/dist/utils.d.ts +7 -0
  131. package/dist/utils.d.ts.map +1 -1
  132. package/dist/utils.js.map +1 -1
  133. package/dist/voice/agent.cjs +15 -0
  134. package/dist/voice/agent.cjs.map +1 -1
  135. package/dist/voice/agent.d.cts +4 -1
  136. package/dist/voice/agent.d.ts +4 -1
  137. package/dist/voice/agent.d.ts.map +1 -1
  138. package/dist/voice/agent.js +15 -0
  139. package/dist/voice/agent.js.map +1 -1
  140. package/dist/voice/agent_activity.cjs +71 -20
  141. package/dist/voice/agent_activity.cjs.map +1 -1
  142. package/dist/voice/agent_activity.d.ts.map +1 -1
  143. package/dist/voice/agent_activity.js +71 -20
  144. package/dist/voice/agent_activity.js.map +1 -1
  145. package/dist/voice/agent_session.cjs +69 -2
  146. package/dist/voice/agent_session.cjs.map +1 -1
  147. package/dist/voice/agent_session.d.cts +11 -2
  148. package/dist/voice/agent_session.d.ts +11 -2
  149. package/dist/voice/agent_session.d.ts.map +1 -1
  150. package/dist/voice/agent_session.js +70 -3
  151. package/dist/voice/agent_session.js.map +1 -1
  152. package/dist/voice/audio_recognition.cjs.map +1 -1
  153. package/dist/voice/audio_recognition.d.ts.map +1 -1
  154. package/dist/voice/audio_recognition.js.map +1 -1
  155. package/dist/voice/generation.cjs.map +1 -1
  156. package/dist/voice/generation.d.ts.map +1 -1
  157. package/dist/voice/generation.js.map +1 -1
  158. package/dist/voice/index.cjs +2 -0
  159. package/dist/voice/index.cjs.map +1 -1
  160. package/dist/voice/index.d.cts +1 -0
  161. package/dist/voice/index.d.ts +1 -0
  162. package/dist/voice/index.d.ts.map +1 -1
  163. package/dist/voice/index.js +1 -0
  164. package/dist/voice/index.js.map +1 -1
  165. package/dist/voice/interruption_detection.test.cjs +114 -0
  166. package/dist/voice/interruption_detection.test.cjs.map +1 -0
  167. package/dist/voice/interruption_detection.test.js +113 -0
  168. package/dist/voice/interruption_detection.test.js.map +1 -0
  169. package/dist/voice/report.cjs +69 -0
  170. package/dist/voice/report.cjs.map +1 -0
  171. package/dist/voice/report.d.cts +26 -0
  172. package/dist/voice/report.d.ts +26 -0
  173. package/dist/voice/report.d.ts.map +1 -0
  174. package/dist/voice/report.js +44 -0
  175. package/dist/voice/report.js.map +1 -0
  176. package/dist/voice/room_io/room_io.cjs +3 -0
  177. package/dist/voice/room_io/room_io.cjs.map +1 -1
  178. package/dist/voice/room_io/room_io.d.cts +1 -0
  179. package/dist/voice/room_io/room_io.d.ts +1 -0
  180. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  181. package/dist/voice/room_io/room_io.js +3 -0
  182. package/dist/voice/room_io/room_io.js.map +1 -1
  183. package/package.json +12 -5
  184. package/src/index.ts +2 -1
  185. package/src/inference/llm.ts +53 -21
  186. package/src/inference/tts.ts +1 -1
  187. package/src/ipc/job_proc_lazy_main.ts +10 -2
  188. package/src/job.ts +48 -0
  189. package/src/llm/__snapshots__/zod-utils.test.ts.snap +218 -0
  190. package/src/llm/chat_context.ts +53 -1
  191. package/src/llm/index.ts +1 -0
  192. package/src/llm/llm.ts +3 -1
  193. package/src/llm/provider_format/google.test.ts +72 -1
  194. package/src/llm/provider_format/google.ts +4 -4
  195. package/src/llm/provider_format/openai.test.ts +55 -1
  196. package/src/llm/provider_format/openai.ts +3 -2
  197. package/src/llm/realtime.ts +8 -1
  198. package/src/llm/utils.ts +7 -2
  199. package/src/llm/zod-utils.test.ts +101 -0
  200. package/src/llm/zod-utils.ts +12 -3
  201. package/src/log.ts +1 -0
  202. package/src/telemetry/index.ts +10 -0
  203. package/src/telemetry/trace_types.ts +88 -0
  204. package/src/telemetry/traces.ts +266 -0
  205. package/src/telemetry/utils.ts +61 -0
  206. package/src/tts/tts.ts +4 -0
  207. package/src/utils.ts +17 -0
  208. package/src/voice/agent.ts +22 -0
  209. package/src/voice/agent_activity.ts +102 -24
  210. package/src/voice/agent_session.ts +98 -1
  211. package/src/voice/audio_recognition.ts +2 -0
  212. package/src/voice/generation.ts +3 -0
  213. package/src/voice/index.ts +1 -0
  214. package/src/voice/interruption_detection.test.ts +151 -0
  215. package/src/voice/report.ts +77 -0
  216. package/src/voice/room_io/room_io.ts +4 -0
@@ -152,6 +152,11 @@ class AgentActivity {
152
152
  } catch (error) {
153
153
  this.logger.error(error, "failed to update the tools");
154
154
  }
155
+ if (!this.llm.capabilities.audioOutput && !this.tts && this.agentSession.output.audio) {
156
+ this.logger.error(
157
+ "audio output is enabled but RealtimeModel has no audio modality and no TTS is set. Either enable audio modality in the RealtimeModel or set a TTS model."
158
+ );
159
+ }
155
160
  } else if (this.llm instanceof LLM) {
156
161
  try {
157
162
  updateInstructions({
@@ -449,7 +454,9 @@ class AgentActivity {
449
454
  }
450
455
  if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
451
456
  const text = this.audioRecognition.currentTranscript;
452
- if (text && splitWords(text, true).length < this.agentSession.options.minInterruptionWords) {
457
+ const normalizedText = text ?? "";
458
+ const wordCount = splitWords(normalizedText, true).length;
459
+ if (wordCount < this.agentSession.options.minInterruptionWords) {
453
460
  return;
454
461
  }
455
462
  }
@@ -551,10 +558,19 @@ class AgentActivity {
551
558
  this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
552
559
  return true;
553
560
  }
554
- if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0 && info.newTranscript.split(" ").length < this.agentSession.options.minInterruptionWords) {
555
- this.cancelPreemptiveGeneration();
556
- this.logger.info("skipping user input, new_transcript is too short");
557
- return false;
561
+ if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0) {
562
+ const wordCount = splitWords(info.newTranscript, true).length;
563
+ if (wordCount < this.agentSession.options.minInterruptionWords) {
564
+ this.cancelPreemptiveGeneration();
565
+ this.logger.info(
566
+ {
567
+ wordCount,
568
+ minInterruptionWords: this.agentSession.options.minInterruptionWords
569
+ },
570
+ "skipping user input, word count below minimum interruption threshold"
571
+ );
572
+ return false;
573
+ }
558
574
  }
559
575
  const oldTask = this._userTurnCompletedTask;
560
576
  this._userTurnCompletedTask = this.createSpeechTask({
@@ -884,6 +900,7 @@ ${instructions}` : instructions,
884
900
  this.agentSession._updateAgentState("listening");
885
901
  }
886
902
  }
903
+ // TODO(brian): PR3 - Wrap entire pipelineReplyTask() method with tracer.startActiveSpan('agent_turn')
887
904
  async pipelineReplyTask(speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) {
888
905
  var _a, _b, _c;
889
906
  speechHandleStorage.enterWith(speechHandle);
@@ -1197,7 +1214,22 @@ ${instructions}` : instructions,
1197
1214
  );
1198
1215
  break;
1199
1216
  }
1200
- const trNodeResult = await this.agent.transcriptionNode(msg.textStream, modelSettings);
1217
+ const msgModalities = msg.modalities ? await msg.modalities : void 0;
1218
+ let ttsTextInput = null;
1219
+ let trTextInput;
1220
+ if (msgModalities && !msgModalities.includes("audio") && this.tts) {
1221
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.audioOutput) {
1222
+ this.logger.warn(
1223
+ "text response received from realtime API, falling back to use a TTS model."
1224
+ );
1225
+ }
1226
+ const [_ttsTextInput, _trTextInput] = msg.textStream.tee();
1227
+ ttsTextInput = _ttsTextInput;
1228
+ trTextInput = _trTextInput;
1229
+ } else {
1230
+ trTextInput = msg.textStream;
1231
+ }
1232
+ const trNodeResult = await this.agent.transcriptionNode(trTextInput, modelSettings);
1201
1233
  let textOut = null;
1202
1234
  if (trNodeResult) {
1203
1235
  const [textForwardTask, _textOut] = performTextForwarding(
@@ -1210,28 +1242,44 @@ ${instructions}` : instructions,
1210
1242
  }
1211
1243
  let audioOut = null;
1212
1244
  if (audioOutput) {
1213
- const realtimeAudio = await this.agent.realtimeAudioOutputNode(
1214
- msg.audioStream,
1215
- modelSettings
1216
- );
1217
- if (realtimeAudio) {
1245
+ let realtimeAudioResult = null;
1246
+ if (ttsTextInput) {
1247
+ const [ttsTask, ttsStream] = performTTSInference(
1248
+ (...args) => this.agent.ttsNode(...args),
1249
+ ttsTextInput,
1250
+ modelSettings,
1251
+ abortController
1252
+ );
1253
+ tasks.push(ttsTask);
1254
+ realtimeAudioResult = ttsStream;
1255
+ } else if (msgModalities && msgModalities.includes("audio")) {
1256
+ realtimeAudioResult = await this.agent.realtimeAudioOutputNode(
1257
+ msg.audioStream,
1258
+ modelSettings
1259
+ );
1260
+ } else if (this.llm instanceof RealtimeModel && this.llm.capabilities.audioOutput) {
1261
+ this.logger.error(
1262
+ "Text message received from Realtime API with audio modality. This usually happens when text chat context is synced to the API. Try to add a TTS model as fallback or use text modality with TTS instead."
1263
+ );
1264
+ } else {
1265
+ this.logger.warn(
1266
+ "audio output is enabled but neither tts nor realtime audio is available"
1267
+ );
1268
+ }
1269
+ if (realtimeAudioResult) {
1218
1270
  const [forwardTask, _audioOut] = performAudioForwarding(
1219
- realtimeAudio,
1271
+ realtimeAudioResult,
1220
1272
  audioOutput,
1221
1273
  abortController
1222
1274
  );
1223
1275
  forwardTasks.push(forwardTask);
1224
1276
  audioOut = _audioOut;
1225
1277
  audioOut.firstFrameFut.await.finally(onFirstFrame);
1226
- } else {
1227
- this.logger.warn(
1228
- "audio output is enabled but neither tts nor realtime audio is available"
1229
- );
1230
1278
  }
1231
1279
  } else if (textOut) {
1232
1280
  textOut.firstTextFut.await.finally(onFirstFrame);
1233
1281
  }
1234
- outputs.push([msg.messageId, textOut, audioOut]);
1282
+ outputs.push([msg.messageId, textOut, audioOut, msgModalities]);
1235
1283
  }
1236
1284
  await waitFor(forwardTasks);
1237
1285
  } catch (error) {
@@ -1301,7 +1349,7 @@ ${instructions}` : instructions,
1301
1349
  replyAbortController.abort();
1302
1350
  await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1303
1351
  if (messageOutputs.length > 0) {
1304
- const [msgId, textOut, audioOut] = messageOutputs[0];
1352
+ const [msgId, textOut, audioOut, msgModalities] = messageOutputs[0];
1305
1353
  let forwardedText = (textOut == null ? void 0 : textOut.text) || "";
1306
1354
  if (audioOutput) {
1307
1355
  audioOutput.clearBuffer();
@@ -1321,7 +1369,9 @@ ${instructions}` : instructions,
1321
1369
  }
1322
1370
  this.realtimeSession.truncate({
1323
1371
  messageId: msgId,
1324
- audioEndMs: Math.floor(playbackPosition)
1372
+ audioEndMs: Math.floor(playbackPosition),
1373
+ modalities: msgModalities,
1374
+ audioTranscript: forwardedText
1325
1375
  });
1326
1376
  }
1327
1377
  if (forwardedText) {
@@ -1345,7 +1395,7 @@ ${instructions}` : instructions,
1345
1395
  return;
1346
1396
  }
1347
1397
  if (messageOutputs.length > 0) {
1348
- const [msgId, textOut, _] = messageOutputs[0];
1398
+ const [msgId, textOut, _, __] = messageOutputs[0];
1349
1399
  const message = ChatMessage.create({
1350
1400
  role: "assistant",
1351
1401
  content: (textOut == null ? void 0 : textOut.text) || "",
@@ -1507,6 +1557,7 @@ ${instructions}` : instructions,
1507
1557
  speechHandle._markScheduled();
1508
1558
  this.wakeupMainTask();
1509
1559
  }
1560
+ // TODO(brian): PR3 - Wrap entire drain() method with tracer.startActiveSpan('drain_agent_activity', { attributes: { 'lk.agent_label': this.agent.label } })
1510
1561
  async drain() {
1511
1562
  var _a;
1512
1563
  const unlock = await this.lock.lock();