@livekit/agents 1.0.37 → 1.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/dist/cli.cjs.map +1 -1
  2. package/dist/inference/api_protos.cjs +68 -0
  3. package/dist/inference/api_protos.cjs.map +1 -1
  4. package/dist/inference/api_protos.d.cts +345 -4
  5. package/dist/inference/api_protos.d.ts +345 -4
  6. package/dist/inference/api_protos.d.ts.map +1 -1
  7. package/dist/inference/api_protos.js +60 -0
  8. package/dist/inference/api_protos.js.map +1 -1
  9. package/dist/inference/llm.cjs +7 -3
  10. package/dist/inference/llm.cjs.map +1 -1
  11. package/dist/inference/llm.d.cts +5 -6
  12. package/dist/inference/llm.d.ts +5 -6
  13. package/dist/inference/llm.d.ts.map +1 -1
  14. package/dist/inference/llm.js +7 -3
  15. package/dist/inference/llm.js.map +1 -1
  16. package/dist/inference/stt.cjs +32 -21
  17. package/dist/inference/stt.cjs.map +1 -1
  18. package/dist/inference/stt.d.cts +5 -4
  19. package/dist/inference/stt.d.ts +5 -4
  20. package/dist/inference/stt.d.ts.map +1 -1
  21. package/dist/inference/stt.js +34 -21
  22. package/dist/inference/stt.js.map +1 -1
  23. package/dist/inference/tts.cjs.map +1 -1
  24. package/dist/inference/tts.d.cts +10 -7
  25. package/dist/inference/tts.d.ts +10 -7
  26. package/dist/inference/tts.d.ts.map +1 -1
  27. package/dist/inference/tts.js.map +1 -1
  28. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  29. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  30. package/dist/stt/stream_adapter.cjs +9 -1
  31. package/dist/stt/stream_adapter.cjs.map +1 -1
  32. package/dist/stt/stream_adapter.d.ts.map +1 -1
  33. package/dist/stt/stream_adapter.js +9 -1
  34. package/dist/stt/stream_adapter.js.map +1 -1
  35. package/dist/stt/stt.cjs +10 -0
  36. package/dist/stt/stt.cjs.map +1 -1
  37. package/dist/stt/stt.d.cts +12 -0
  38. package/dist/stt/stt.d.ts +12 -0
  39. package/dist/stt/stt.d.ts.map +1 -1
  40. package/dist/stt/stt.js +10 -0
  41. package/dist/stt/stt.js.map +1 -1
  42. package/dist/telemetry/traces.cjs +4 -3
  43. package/dist/telemetry/traces.cjs.map +1 -1
  44. package/dist/telemetry/traces.d.cts +2 -0
  45. package/dist/telemetry/traces.d.ts +2 -0
  46. package/dist/telemetry/traces.d.ts.map +1 -1
  47. package/dist/telemetry/traces.js +4 -3
  48. package/dist/telemetry/traces.js.map +1 -1
  49. package/dist/utils.cjs +11 -0
  50. package/dist/utils.cjs.map +1 -1
  51. package/dist/utils.d.cts +10 -0
  52. package/dist/utils.d.ts +10 -0
  53. package/dist/utils.d.ts.map +1 -1
  54. package/dist/utils.js +10 -0
  55. package/dist/utils.js.map +1 -1
  56. package/dist/voice/agent.cjs +6 -2
  57. package/dist/voice/agent.cjs.map +1 -1
  58. package/dist/voice/agent.d.ts.map +1 -1
  59. package/dist/voice/agent.js +6 -2
  60. package/dist/voice/agent.js.map +1 -1
  61. package/dist/voice/agent_activity.cjs +72 -37
  62. package/dist/voice/agent_activity.cjs.map +1 -1
  63. package/dist/voice/agent_activity.d.cts +2 -1
  64. package/dist/voice/agent_activity.d.ts +2 -1
  65. package/dist/voice/agent_activity.d.ts.map +1 -1
  66. package/dist/voice/agent_activity.js +73 -38
  67. package/dist/voice/agent_activity.js.map +1 -1
  68. package/dist/voice/agent_session.cjs +7 -5
  69. package/dist/voice/agent_session.cjs.map +1 -1
  70. package/dist/voice/agent_session.d.cts +5 -2
  71. package/dist/voice/agent_session.d.ts +5 -2
  72. package/dist/voice/agent_session.d.ts.map +1 -1
  73. package/dist/voice/agent_session.js +7 -5
  74. package/dist/voice/agent_session.js.map +1 -1
  75. package/dist/voice/audio_recognition.cjs +3 -1
  76. package/dist/voice/audio_recognition.cjs.map +1 -1
  77. package/dist/voice/audio_recognition.d.ts.map +1 -1
  78. package/dist/voice/audio_recognition.js +3 -1
  79. package/dist/voice/audio_recognition.js.map +1 -1
  80. package/dist/voice/avatar/datastream_io.cjs +6 -0
  81. package/dist/voice/avatar/datastream_io.cjs.map +1 -1
  82. package/dist/voice/avatar/datastream_io.d.cts +1 -0
  83. package/dist/voice/avatar/datastream_io.d.ts +1 -0
  84. package/dist/voice/avatar/datastream_io.d.ts.map +1 -1
  85. package/dist/voice/avatar/datastream_io.js +6 -0
  86. package/dist/voice/avatar/datastream_io.js.map +1 -1
  87. package/dist/voice/background_audio.cjs.map +1 -1
  88. package/dist/voice/generation.cjs +14 -5
  89. package/dist/voice/generation.cjs.map +1 -1
  90. package/dist/voice/generation.d.cts +3 -2
  91. package/dist/voice/generation.d.ts +3 -2
  92. package/dist/voice/generation.d.ts.map +1 -1
  93. package/dist/voice/generation.js +14 -5
  94. package/dist/voice/generation.js.map +1 -1
  95. package/dist/voice/io.cjs +12 -0
  96. package/dist/voice/io.cjs.map +1 -1
  97. package/dist/voice/io.d.cts +19 -1
  98. package/dist/voice/io.d.ts +19 -1
  99. package/dist/voice/io.d.ts.map +1 -1
  100. package/dist/voice/io.js +12 -0
  101. package/dist/voice/io.js.map +1 -1
  102. package/dist/voice/recorder_io/recorder_io.cjs +91 -28
  103. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
  104. package/dist/voice/recorder_io/recorder_io.d.cts +7 -1
  105. package/dist/voice/recorder_io/recorder_io.d.ts +7 -1
  106. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
  107. package/dist/voice/recorder_io/recorder_io.js +91 -28
  108. package/dist/voice/recorder_io/recorder_io.js.map +1 -1
  109. package/dist/voice/room_io/_input.cjs +40 -11
  110. package/dist/voice/room_io/_input.cjs.map +1 -1
  111. package/dist/voice/room_io/_input.d.cts +4 -1
  112. package/dist/voice/room_io/_input.d.ts +4 -1
  113. package/dist/voice/room_io/_input.d.ts.map +1 -1
  114. package/dist/voice/room_io/_input.js +31 -2
  115. package/dist/voice/room_io/_input.js.map +1 -1
  116. package/dist/voice/room_io/_output.cjs +6 -0
  117. package/dist/voice/room_io/_output.cjs.map +1 -1
  118. package/dist/voice/room_io/_output.d.cts +1 -0
  119. package/dist/voice/room_io/_output.d.ts +1 -0
  120. package/dist/voice/room_io/_output.d.ts.map +1 -1
  121. package/dist/voice/room_io/_output.js +6 -0
  122. package/dist/voice/room_io/_output.js.map +1 -1
  123. package/dist/voice/room_io/room_io.cjs.map +1 -1
  124. package/dist/voice/room_io/room_io.d.cts +2 -2
  125. package/dist/voice/room_io/room_io.d.ts +2 -2
  126. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  127. package/dist/voice/room_io/room_io.js.map +1 -1
  128. package/dist/voice/speech_handle.cjs +2 -0
  129. package/dist/voice/speech_handle.cjs.map +1 -1
  130. package/dist/voice/speech_handle.d.cts +3 -0
  131. package/dist/voice/speech_handle.d.ts +3 -0
  132. package/dist/voice/speech_handle.d.ts.map +1 -1
  133. package/dist/voice/speech_handle.js +2 -0
  134. package/dist/voice/speech_handle.js.map +1 -1
  135. package/package.json +2 -2
  136. package/src/inference/api_protos.ts +83 -0
  137. package/src/inference/llm.ts +20 -15
  138. package/src/inference/stt.ts +48 -29
  139. package/src/inference/tts.ts +36 -16
  140. package/src/stt/stream_adapter.ts +12 -1
  141. package/src/stt/stt.ts +21 -0
  142. package/src/telemetry/traces.ts +6 -2
  143. package/src/utils.ts +21 -0
  144. package/src/voice/agent.ts +11 -2
  145. package/src/voice/agent_activity.ts +108 -41
  146. package/src/voice/agent_session.ts +6 -5
  147. package/src/voice/audio_recognition.ts +2 -0
  148. package/src/voice/avatar/datastream_io.ts +8 -0
  149. package/src/voice/generation.ts +24 -12
  150. package/src/voice/io.ts +27 -5
  151. package/src/voice/recorder_io/recorder_io.ts +123 -31
  152. package/src/voice/room_io/_input.ts +32 -4
  153. package/src/voice/room_io/_output.ts +8 -0
  154. package/src/voice/room_io/room_io.ts +3 -1
  155. package/src/voice/speech_handle.ts +4 -0
@@ -1 +1 @@
1
- {"version":3,"file":"agent_activity.d.ts","sourceRoot":"","sources":["../../src/voice/agent_activity.ts"],"names":[],"mappings":";AAIA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAKpD,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AACvE,OAAO,EAIL,KAAK,sBAAsB,EAC3B,KAAK,uBAAuB,EAC5B,KAAK,uBAAuB,EAC5B,KAAK,2BAA2B,EAChC,GAAG,EACH,aAAa,EAEb,KAAK,eAAe,EACpB,KAAK,UAAU,EACf,KAAK,WAAW,EACjB,MAAM,iBAAiB,CAAC;AAazB,OAAO,EAAE,GAAG,EAAiB,KAAK,WAAW,EAAE,MAAM,eAAe,CAAC;AAGrE,OAAO,EAAE,GAAG,EAAiB,MAAM,eAAe,CAAC;AACnD,OAAO,EAAE,MAAM,EAAE,IAAI,EAA0B,MAAM,aAAa,CAAC;AACnE,OAAO,EAAE,GAAG,EAAE,KAAK,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC/C,OAAO,KAAK,EAAE,KAAK,EAAiB,MAAM,YAAY,CAAC;AAEvD,OAAO,EAAE,KAAK,YAAY,EAAE,KAAK,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAC/E,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,wBAAwB,EAC7B,KAAK,gBAAgB,EAEtB,MAAM,wBAAwB,CAAC;AAqBhC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAclD,qBAAa,aAAc,YAAW,gBAAgB;IACpD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,yBAAyB,CAAQ;IACzD,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,gBAAgB,CAAC,CAAmB;IAC5C,OAAO,CAAC,eAAe,CAAC,CAAkB;IAC1C,OAAO,CAAC,aAAa,CAAC,CAAoB;IAC1C,OAAO,CAAC,iBAAiB,CAAC,CAA4C;IACtE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAC,CAAe;IACtC,OAAO,CAAC,WAAW,CAAuC;IAC1D,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,WAAW,CAA8B;IACjD,OAAO,CAAC,IAAI,CAAe;IAC3B,OAAO,CAAC,WAAW,CAA4C;IAE/D,OAAO,CAAC,UAAU,CAA2B;IAC7C,OAAO,CAAC,qBAAqB,CAAC,CAAuB;IAErD,KAAK,EAAE,KAAK,CAAC;IACb,YAAY,EAAE,YAAY,CAAC;IAE3B,gBAAgB;IAChB,SAAS,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;IACvB,sBAAsB,CAAC,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;gBAE3B,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,YAAY;IA6F9C,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAoH5B,IAAI,aAAa,IAAI,YAAY,GAAG,SAAS,CAE5C;IAED,IAAI,GAAG,IAAI,GAAG,GAAG,SAAS,CAEzB;IAED,IAAI,GAAG,IAAI,GAAG,GAAG,SAAS,CAEzB;IAED,IAAI,GAAG,IAAI,GAAG,GAAG,aAAa,GAAG,SAAS,CAEzC;IAED,IAAI,GAAG,IAAI,GAAG,GAAG,SAAS,CAEzB;IAED,IAAI,KAAK,IAAI,WAAW,CAEvB;IAED,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAED,IAAI,kBAAkB,IAAI,eAAe,GAAG,SAAS,CAEpD;IAED,IAAI,kBAAkB,IAAI,OAAO,CAGhC;IAED,IAAI,aAAa,IAAI,iBAAiB,GAAG,SAAS,CAGjD;IAED,IAAI,OAAO,IAAI,WAAW,CAEzB;IAEK,aAAa,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAiBxD,aAAa,CAAC,EAAE,UAAU,EAAE,EAAE;QAAE,UAAU,CAAC,EAAE,UAAU,GAAG,IAAI,CAAA;KAAE,GAAG,IAAI;IAUvE,gBAAgB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC,GAAG,IAAI;IAwB/D,gBAAgB,IAAI,IAAI;IAIxB,cAAc;IAUd,aAAa;IAKb,GAAG,CACD,IAAI,EAAE,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,EACrC,OAAO,CAAC,EAAE;QACR,KAAK,CAAC,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,kBAAkB,CAAC,EAAE,OAAO,CAAC;QAC7B,YAAY,CAAC,EAAE,OAAO,CAAC;KACxB,GACA,YAAY;IAwDf,OAAO,CAAC,kBAAkB,CAqBxB;IAEF,OAAO,CAAC,OAAO;IAoBf,oBAAoB,CAAC,GAAG,EAAE,uBAAuB,GAAG,IAAI;IAmBxD,oBAAoB,CAAC,EAAE,EAAE,uBAAuB,GAAG,IAAI;IAkBvD,kCAAkC,CAAC,EAAE,EAAE,2BAA2B,GAAG,IAAI;IAoBzE,mBAAmB,CAAC,EAAE,EAAE,sBAAsB,GAAG,IAAI;IAsCrD,eAAe,CAAC,GAAG,EAAE,QAAQ,GAAG,IAAI;IAIpC,aAAa,CAAC,EAAE,EAAE,QAAQ,GAAG,IAAI;IAQjC,kBAAkB,CAAC,EAAE,EAAE,QAAQ,GAAG,IAAI;IA+CtC,mBAAmB,CAAC,EAAE,EAAE,WAAW,GAAG,IAAI;IAiB1C,iBAAiB,CAAC,EAAE,EAAE,WAAW,GAAG,IAAI;IAiBxC,sBAAsB,CAAC,IAAI,EAAE,wBAAwB,GAAG,IAAI;IA0C5D,OAAO,CAAC,0BAA0B;IAOlC,OAAO,CAAC,gBAAgB;IA4BlB,WAAW,CAAC,IAAI,EAAE,aAAa,GAAG,OAAO,CAAC,OAAO,CAAC;IA0CxD,eAAe,IAAI,WAAW;YAIhB,QAAQ;IAuCtB,OAAO,CAAC,cAAc;IAItB,aAAa,CAAC,OAAO,EAAE;QACrB,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,WAAW,CAAC;QACtB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,CAAC,EAAE,UAAU,GAAG,IAAI,CAAC;QAC/B,kBAAkB,CAAC,EAAE,OAAO,CAAC;QAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GAAG,YAAY;IAuGhB,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC;IA0BzB,OAAO,CAAC,mBAAmB;YAMb,iBAAiB;YAyHjB,OAAO;IA0HrB,OAAO,CAAC,sBAAsB,CAyX5B;IAEF,OAAO,CAAC,iBAAiB,CA2BrB;YAEU,sBAAsB;YAsBtB,2BAA2B;YAkc3B,iBAAiB;IAqD/B,OAAO,CAAC,cAAc;IAiBhB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;YAQd,UAAU;IA2BlB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAyC7B"}
1
+ {"version":3,"file":"agent_activity.d.ts","sourceRoot":"","sources":["../../src/voice/agent_activity.ts"],"names":[],"mappings":";AAIA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAKpD,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,KAAK,WAAW,EAAE,WAAW,EAAE,MAAM,wBAAwB,CAAC;AACvE,OAAO,EAIL,KAAK,sBAAsB,EAC3B,KAAK,uBAAuB,EAC5B,KAAK,uBAAuB,EAC5B,KAAK,2BAA2B,EAChC,GAAG,EACH,aAAa,EAEb,KAAK,eAAe,EACpB,KAAK,UAAU,EACf,KAAK,WAAW,EACjB,MAAM,iBAAiB,CAAC;AAazB,OAAO,EAAE,GAAG,EAAiB,KAAK,WAAW,EAAE,MAAM,eAAe,CAAC;AAGrE,OAAO,EAAE,GAAG,EAAiB,MAAM,eAAe,CAAC;AACnD,OAAO,EAAE,MAAM,EAAE,IAAI,EAA0B,MAAM,aAAa,CAAC;AACnE,OAAO,EAAE,GAAG,EAAE,KAAK,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC/C,OAAO,KAAK,EAAE,KAAK,EAAiB,MAAM,YAAY,CAAC;AAEvD,OAAO,EAAE,KAAK,YAAY,EAAE,KAAK,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAC/E,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,wBAAwB,EAC7B,KAAK,gBAAgB,EAEtB,MAAM,wBAAwB,CAAC;AAqBhC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAclD,qBAAa,aAAc,YAAW,gBAAgB;IACpD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,yBAAyB,CAAQ;IACzD,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,gBAAgB,CAAC,CAAmB;IAC5C,OAAO,CAAC,eAAe,CAAC,CAAkB;IAC1C,OAAO,CAAC,aAAa,CAAC,CAAoB;IAC1C,OAAO,CAAC,iBAAiB,CAAC,CAA4C;IACtE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAC,CAAe;IACtC,OAAO,CAAC,WAAW,CAAuC;IAC1D,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,WAAW,CAA8B;IACjD,OAAO,CAAC,IAAI,CAAe;IAC3B,OAAO,CAAC,WAAW,CAA4C;IAE/D,OAAO,CAAC,UAAU,CAA2B;IAC7C,OAAO,CAAC,qBAAqB,CAAC,CAAuB;IAErD,KAAK,EAAE,KAAK,CAAC;IACb,YAAY,EAAE,YAAY,CAAC;IAE3B,gBAAgB;IAChB,SAAS,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;IACvB,sBAAsB,CAAC,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;gBAE3B,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,YAAY;IA8F9C,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAoH5B,IAAI,aAAa,IAAI,YAAY,GAAG,SAAS,CAE5C;IAED,IAAI,GAAG,IAAI,GAAG,GAAG,SAAS,CAEzB;IAED,IAAI,GAAG,IAAI,GAAG,GAAG,SAAS,CAEzB;IAED,IAAI,GAAG,IAAI,GAAG,GAAG,aAAa,GAAG,SAAS,CAEzC;IAED,IAAI,GAAG,IAAI,GAAG,GAAG,SAAS,CAEzB;IAED,IAAI,KAAK,IAAI,WAAW,CAEvB;IAED,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAED,IAAI,kBAAkB,IAAI,eAAe,GAAG,SAAS,CAEpD;IAED,IAAI,kBAAkB,IAAI,OAAO,CAGhC;IAED,IAAI,aAAa,IAAI,iBAAiB,GAAG,SAAS,CAGjD;IAED,IAAI,OAAO,IAAI,WAAW,CAEzB;IAEK,aAAa,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAiBxD,aAAa,CAAC,EAAE,UAAU,EAAE,EAAE;QAAE,UAAU,CAAC,EAAE,UAAU,GAAG,IAAI,CAAA;KAAE,GAAG,IAAI;IAUvE,gBAAgB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC,GAAG,IAAI;IAwB/D,gBAAgB,IAAI,IAAI;IAIxB,cAAc;IAUd,aAAa;IAKb,GAAG,CACD,IAAI,EAAE,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,EACrC,OAAO,CAAC,EAAE;QACR,KAAK,CAAC,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,kBAAkB,CAAC,EAAE,OAAO,CAAC;QAC7B,YAAY,CAAC,EAAE,OAAO,CAAC;KACxB,GACA,YAAY;IAwDf,OAAO,CAAC,kBAAkB,CAqBxB;IAEF,OAAO,CAAC,OAAO;IAoBf,oBAAoB,CAAC,GAAG,EAAE,uBAAuB,GAAG,IAAI;IAmBxD,oBAAoB,CAAC,EAAE,EAAE,uBAAuB,GAAG,IAAI;IAkBvD,kCAAkC,CAAC,EAAE,EAAE,2BAA2B,GAAG,IAAI;IAoBzE,mBAAmB,CAAC,EAAE,EAAE,sBAAsB,GAAG,IAAI;IAqCrD,eAAe,CAAC,EAAE,EAAE,QAAQ,GAAG,IAAI;IAQnC,aAAa,CAAC,EAAE,EAAE,QAAQ,GAAG,IAAI;IAQjC,kBAAkB,CAAC,EAAE,EAAE,QAAQ,GAAG,IAAI;IAWtC,OAAO,CAAC,wBAAwB;IAyChC,mBAAmB,CAAC,EAAE,EAAE,WAAW,GAAG,IAAI;IAqB1C,iBAAiB,CAAC,EAAE,EAAE,WAAW,GAAG,IAAI;IA+BxC,sBAAsB,CAAC,IAAI,EAAE,wBAAwB,GAAG,IAAI;IA0C5D,OAAO,CAAC,0BAA0B;IAOlC,OAAO,CAAC,gBAAgB;IA4BlB,WAAW,CAAC,IAAI,EAAE,aAAa,GAAG,OAAO,CAAC,OAAO,CAAC;IA0CxD,eAAe,IAAI,WAAW;YAIhB,QAAQ;IAuCtB,OAAO,CAAC,cAAc;IAItB,aAAa,CAAC,OAAO,EAAE;QACrB,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,WAAW,CAAC;QACtB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,CAAC,EAAE,UAAU,GAAG,IAAI,CAAC;QAC/B,kBAAkB,CAAC,EAAE,OAAO,CAAC;QAC7B,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GAAG,YAAY;IAuGhB,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC;IA0BzB,OAAO,CAAC,mBAAmB;YAMb,iBAAiB;YAyHjB,OAAO;IAmIrB,OAAO,CAAC,sBAAsB,CAkZ5B;IAEF,OAAO,CAAC,iBAAiB,CA2BrB;YAEU,sBAAsB;YAsBtB,2BAA2B;YAwc3B,iBAAiB;IAqD/B,OAAO,CAAC,cAAc;IAiBhB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;YAQd,UAAU;IA2BlB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAyC7B"}
@@ -1,5 +1,5 @@
1
1
  import { Mutex } from "@livekit/mutex";
2
- import { ROOT_CONTEXT, trace } from "@opentelemetry/api";
2
+ import { ROOT_CONTEXT, context as otelContext, trace } from "@opentelemetry/api";
3
3
  import { Heap } from "heap-js";
4
4
  import { AsyncLocalStorage } from "node:async_hooks";
5
5
  import { ReadableStream } from "node:stream/web";
@@ -119,9 +119,9 @@ class AgentActivity {
119
119
  );
120
120
  this.turnDetectionMode = void 0;
121
121
  }
122
- if (!this.vad && this.stt && this.llm instanceof LLM && this.allowInterruptions && this.turnDetectionMode === void 0) {
122
+ if (!this.vad && this.stt && !this.stt.capabilities.streaming && this.llm instanceof LLM && this.allowInterruptions && this.turnDetectionMode === void 0) {
123
123
  this.logger.warn(
124
- "VAD is not set. Enabling VAD is recommended when using LLM and STT for more responsive interruption handling."
124
+ "VAD is not set. Enabling VAD is recommended when using LLM and non-streaming STT for more responsive interruption handling."
125
125
  );
126
126
  }
127
127
  }
@@ -455,8 +455,12 @@ class AgentActivity {
455
455
  this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
456
456
  }
457
457
  // recognition hooks
458
- onStartOfSpeech(_ev) {
459
- this.agentSession._updateUserState("speaking");
458
+ onStartOfSpeech(ev) {
459
+ let speechStartTime = Date.now();
460
+ if (ev) {
461
+ speechStartTime = speechStartTime - ev.speechDuration;
462
+ }
463
+ this.agentSession._updateUserState("speaking", speechStartTime);
460
464
  }
461
465
  onEndOfSpeech(ev) {
462
466
  let speechEndTime = Date.now();
@@ -466,14 +470,16 @@ class AgentActivity {
466
470
  this.agentSession._updateUserState("listening", speechEndTime);
467
471
  }
468
472
  onVADInferenceDone(ev) {
469
- var _a, _b;
470
473
  if (this.turnDetection === "manual" || this.turnDetection === "realtime_llm") {
471
474
  return;
472
475
  }
473
- if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection) {
474
- return;
476
+ if (ev.speechDuration >= this.agentSession.options.minInterruptionDuration) {
477
+ this.interruptByAudioActivity();
475
478
  }
476
- if (ev.speechDuration < this.agentSession.options.minInterruptionDuration) {
479
+ }
480
+ interruptByAudioActivity() {
481
+ var _a, _b;
482
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection) {
477
483
  return;
478
484
  }
479
485
  if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
@@ -486,7 +492,10 @@ class AgentActivity {
486
492
  }
487
493
  (_a = this.realtimeSession) == null ? void 0 : _a.startUserActivity();
488
494
  if (this._currentSpeech && !this._currentSpeech.interrupted && this._currentSpeech.allowInterruptions) {
489
- this.logger.info({ "speech id": this._currentSpeech.id }, "speech interrupted by VAD");
495
+ this.logger.info(
496
+ { "speech id": this._currentSpeech.id },
497
+ "speech interrupted by audio activity"
498
+ );
490
499
  (_b = this.realtimeSession) == null ? void 0 : _b.interrupt();
491
500
  this._currentSpeech.interrupt();
492
501
  }
@@ -504,6 +513,9 @@ class AgentActivity {
504
513
  // TODO(AJS-106): add multi participant support
505
514
  })
506
515
  );
516
+ if (ev.alternatives[0].text) {
517
+ this.interruptByAudioActivity();
518
+ }
507
519
  }
508
520
  onFinalTranscript(ev) {
509
521
  if (this.llm instanceof RealtimeModel && this.llm.capabilities.userTranscription) {
@@ -518,6 +530,9 @@ class AgentActivity {
518
530
  // TODO(AJS-106): add multi participant support
519
531
  })
520
532
  );
533
+ if (this.audioRecognition && this.turnDetection !== "manual" && this.turnDetection !== "realtime_llm") {
534
+ this.interruptByAudioActivity();
535
+ }
521
536
  }
522
537
  onPreemptiveGeneration(info) {
523
538
  if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof LLM)) {
@@ -833,6 +848,7 @@ ${instructions}` : instructions,
833
848
  );
834
849
  }
835
850
  async ttsTask(speechHandle, text, addToChatCtx, modelSettings, replyAbortController, audio) {
851
+ speechHandle._agentTurnContext = otelContext.active();
836
852
  speechHandleStorage.enterWith(speechHandle);
837
853
  const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
838
854
  const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
@@ -864,12 +880,15 @@ ${instructions}` : instructions,
864
880
  textOut = _textOut;
865
881
  tasks.push(textForwardTask);
866
882
  }
867
- const onFirstFrame = () => {
868
- this.agentSession._updateAgentState("speaking");
883
+ const onFirstFrame = (startedSpeakingAt) => {
884
+ this.agentSession._updateAgentState("speaking", {
885
+ startTime: startedSpeakingAt,
886
+ otelContext: speechHandle._agentTurnContext
887
+ });
869
888
  };
870
889
  if (!audioOutput) {
871
890
  if (textOut) {
872
- textOut.firstTextFut.await.finally(onFirstFrame);
891
+ textOut.firstTextFut.await.then(() => onFirstFrame()).catch(() => this.logger.debug("firstTextFut cancelled before first frame"));
873
892
  }
874
893
  } else {
875
894
  let audioOut = null;
@@ -897,7 +916,7 @@ ${instructions}` : instructions,
897
916
  tasks.push(forwardTask);
898
917
  audioOut = _audioOut;
899
918
  }
900
- audioOut.firstFrameFut.await.finally(onFirstFrame);
919
+ audioOut.firstFrameFut.await.then((ts) => onFirstFrame(ts)).catch(() => this.logger.debug("firstFrameFut cancelled before first frame"));
901
920
  }
902
921
  await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
903
922
  if (audioOutput) {
@@ -936,6 +955,7 @@ ${instructions}` : instructions,
936
955
  span
937
956
  }) => {
938
957
  var _a, _b, _c;
958
+ speechHandle._agentTurnContext = otelContext.active();
939
959
  span.setAttribute(traceTypes.ATTR_SPEECH_ID, speechHandle.id);
940
960
  if (instructions) {
941
961
  span.setAttribute(traceTypes.ATTR_INSTRUCTIONS, instructions);
@@ -1012,8 +1032,11 @@ ${instructions}` : instructions,
1012
1032
  tasks.push(textForwardTask);
1013
1033
  textOut = _textOut;
1014
1034
  }
1015
- const onFirstFrame = () => {
1016
- this.agentSession._updateAgentState("speaking");
1035
+ const onFirstFrame = (startedSpeakingAt) => {
1036
+ this.agentSession._updateAgentState("speaking", {
1037
+ startTime: startedSpeakingAt,
1038
+ otelContext: speechHandle._agentTurnContext
1039
+ });
1017
1040
  };
1018
1041
  let audioOut = null;
1019
1042
  if (audioOutput) {
@@ -1025,12 +1048,12 @@ ${instructions}` : instructions,
1025
1048
  );
1026
1049
  audioOut = _audioOut;
1027
1050
  tasks.push(forwardTask);
1028
- audioOut.firstFrameFut.await.finally(onFirstFrame);
1051
+ audioOut.firstFrameFut.await.then((ts) => onFirstFrame(ts)).catch(() => this.logger.debug("firstFrameFut cancelled before first frame"));
1029
1052
  } else {
1030
1053
  throw Error("ttsStream is null when audioOutput is enabled");
1031
1054
  }
1032
1055
  } else {
1033
- textOut == null ? void 0 : textOut.firstTextFut.await.finally(onFirstFrame);
1056
+ textOut == null ? void 0 : textOut.firstTextFut.await.then(() => onFirstFrame()).catch(() => this.logger.debug("firstTextFut cancelled before first frame"));
1034
1057
  }
1035
1058
  const onToolExecutionStarted = (f) => {
1036
1059
  speechHandle._itemAdded([f]);
@@ -1061,7 +1084,12 @@ ${instructions}` : instructions,
1061
1084
  msg.createdAt = replyStartedAt;
1062
1085
  }
1063
1086
  this.agent._chatCtx.insert(toolsMessages);
1064
- this.agentSession._toolItemsAdded(toolsMessages);
1087
+ const toolCallOutputs = toolsMessages.filter(
1088
+ (m) => m.type === "function_call_output"
1089
+ );
1090
+ if (toolCallOutputs.length > 0) {
1091
+ this.agentSession._toolItemsAdded(toolCallOutputs);
1092
+ }
1065
1093
  }
1066
1094
  if (speechHandle.interrupted) {
1067
1095
  this.logger.debug(
@@ -1078,9 +1106,9 @@ ${instructions}` : instructions,
1078
1106
  let forwardedText = (textOut == null ? void 0 : textOut.text) || "";
1079
1107
  if (audioOutput) {
1080
1108
  const playbackEv = await audioOutput.waitForPlayout();
1081
- if (audioOut == null ? void 0 : audioOut.firstFrameFut.done) {
1109
+ if ((audioOut == null ? void 0 : audioOut.firstFrameFut.done) && !audioOut.firstFrameFut.rejected) {
1082
1110
  this.logger.info(
1083
- { speech_id: speechHandle.id, playbackPosition: playbackEv.playbackPosition },
1111
+ { speech_id: speechHandle.id, playbackPositionInS: playbackEv.playbackPosition },
1084
1112
  "playout interrupted"
1085
1113
  );
1086
1114
  if (playbackEv.synchronizedTranscript) {
@@ -1218,7 +1246,12 @@ ${instructions}` : instructions,
1218
1246
  msg.createdAt = replyStartedAt;
1219
1247
  }
1220
1248
  this.agent._chatCtx.insert(toolMessages);
1221
- this.agentSession._toolItemsAdded(toolMessages);
1249
+ const toolCallOutputs = toolMessages.filter(
1250
+ (m) => m.type === "function_call_output"
1251
+ );
1252
+ if (toolCallOutputs.length > 0) {
1253
+ this.agentSession._toolItemsAdded(toolCallOutputs);
1254
+ }
1222
1255
  }
1223
1256
  };
1224
1257
  pipelineReplyTask = async (speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) => tracer.startActiveSpan(
@@ -1261,6 +1294,7 @@ ${instructions}` : instructions,
1261
1294
  span
1262
1295
  }) {
1263
1296
  var _a, _b, _c;
1297
+ speechHandle._agentTurnContext = otelContext.active();
1264
1298
  span.setAttribute(traceTypes.ATTR_SPEECH_ID, speechHandle.id);
1265
1299
  speechHandleStorage.enterWith(speechHandle);
1266
1300
  if (!this.realtimeSession) {
@@ -1285,8 +1319,11 @@ ${instructions}` : instructions,
1285
1319
  if (speechHandle.interrupted) {
1286
1320
  return;
1287
1321
  }
1288
- const onFirstFrame = () => {
1289
- this.agentSession._updateAgentState("speaking");
1322
+ const onFirstFrame = (startedSpeakingAt) => {
1323
+ this.agentSession._updateAgentState("speaking", {
1324
+ startTime: startedSpeakingAt,
1325
+ otelContext: speechHandle._agentTurnContext
1326
+ });
1290
1327
  };
1291
1328
  const readMessages = async (abortController, outputs) => {
1292
1329
  replyAbortController.signal.addEventListener("abort", () => abortController.abort(), {
@@ -1361,10 +1398,10 @@ ${instructions}` : instructions,
1361
1398
  );
1362
1399
  forwardTasks.push(forwardTask);
1363
1400
  audioOut = _audioOut;
1364
- audioOut.firstFrameFut.await.finally(onFirstFrame);
1401
+ audioOut.firstFrameFut.await.then((ts) => onFirstFrame(ts)).catch(() => this.logger.debug("firstFrameFut cancelled before first frame"));
1365
1402
  }
1366
1403
  } else if (textOut) {
1367
- textOut.firstTextFut.await.finally(onFirstFrame);
1404
+ textOut.firstTextFut.await.then(() => onFirstFrame()).catch(() => this.logger.debug("firstTextFut cancelled before first frame"));
1368
1405
  }
1369
1406
  outputs.push([msg.messageId, textOut, audioOut, msgModalities]);
1370
1407
  }
@@ -1428,7 +1465,6 @@ ${instructions}` : instructions,
1428
1465
  await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
1429
1466
  if (audioOutput) {
1430
1467
  await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
1431
- this.agentSession._updateAgentState("listening");
1432
1468
  }
1433
1469
  if (speechHandle.interrupted) {
1434
1470
  this.logger.debug(
@@ -1443,10 +1479,10 @@ ${instructions}` : instructions,
1443
1479
  if (audioOutput) {
1444
1480
  audioOutput.clearBuffer();
1445
1481
  const playbackEv = await audioOutput.waitForPlayout();
1446
- let playbackPosition = playbackEv.playbackPosition;
1447
- if (audioOut == null ? void 0 : audioOut.firstFrameFut.done) {
1482
+ let playbackPositionInS = playbackEv.playbackPosition;
1483
+ if ((audioOut == null ? void 0 : audioOut.firstFrameFut.done) && !audioOut.firstFrameFut.rejected) {
1448
1484
  this.logger.info(
1449
- { speech_id: speechHandle.id, playbackPosition: playbackEv.playbackPosition },
1485
+ { speech_id: speechHandle.id, playbackPositionInS },
1450
1486
  "playout interrupted"
1451
1487
  );
1452
1488
  if (playbackEv.synchronizedTranscript) {
@@ -1454,11 +1490,11 @@ ${instructions}` : instructions,
1454
1490
  }
1455
1491
  } else {
1456
1492
  forwardedText = "";
1457
- playbackPosition = 0;
1493
+ playbackPositionInS = 0;
1458
1494
  }
1459
1495
  this.realtimeSession.truncate({
1460
1496
  messageId: msgId,
1461
- audioEndMs: Math.floor(playbackPosition),
1497
+ audioEndMs: Math.floor(playbackPositionInS * 1e3),
1462
1498
  modalities: msgModalities,
1463
1499
  audioTranscript: forwardedText
1464
1500
  });
@@ -1496,14 +1532,13 @@ ${instructions}` : instructions,
1496
1532
  this.agentSession._conversationItemAdded(message);
1497
1533
  }
1498
1534
  speechHandle._markGenerationDone();
1499
- toolOutput.firstToolStartedFuture.await.finally(() => {
1500
- this.agentSession._updateAgentState("thinking");
1501
- });
1502
1535
  await executeToolsTask.result;
1536
+ if (toolOutput.output.length > 0) {
1537
+ this.agentSession._updateAgentState("thinking");
1538
+ } else if (this.agentSession.agentState === "speaking") {
1539
+ this.agentSession._updateAgentState("listening");
1540
+ }
1503
1541
  if (toolOutput.output.length === 0) {
1504
- if (!speechHandle.interrupted) {
1505
- this.agentSession._updateAgentState("listening");
1506
- }
1507
1542
  return;
1508
1543
  }
1509
1544
  const { maxToolSteps } = this.agentSession.options;