@livekit/agents 0.6.4 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/dist/index.cjs +6 -1
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.ts +3 -1
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +3 -0
  6. package/dist/index.js.map +1 -1
  7. package/dist/inference_runner.cjs +38 -0
  8. package/dist/inference_runner.cjs.map +1 -0
  9. package/dist/inference_runner.d.ts +11 -0
  10. package/dist/inference_runner.d.ts.map +1 -0
  11. package/dist/inference_runner.js +14 -0
  12. package/dist/inference_runner.js.map +1 -0
  13. package/dist/ipc/index.cjs +23 -0
  14. package/dist/ipc/index.cjs.map +1 -0
  15. package/dist/ipc/index.d.ts +2 -0
  16. package/dist/ipc/index.d.ts.map +1 -0
  17. package/dist/ipc/index.js +2 -0
  18. package/dist/ipc/index.js.map +1 -0
  19. package/dist/ipc/inference_executor.cjs +17 -0
  20. package/dist/ipc/inference_executor.cjs.map +1 -0
  21. package/dist/ipc/inference_executor.d.ts +4 -0
  22. package/dist/ipc/inference_executor.d.ts.map +1 -0
  23. package/dist/ipc/inference_executor.js +1 -0
  24. package/dist/ipc/inference_executor.js.map +1 -0
  25. package/dist/ipc/inference_proc_executor.cjs +97 -0
  26. package/dist/ipc/inference_proc_executor.cjs.map +1 -0
  27. package/dist/ipc/inference_proc_executor.d.ts +23 -0
  28. package/dist/ipc/inference_proc_executor.d.ts.map +1 -0
  29. package/dist/ipc/inference_proc_executor.js +72 -0
  30. package/dist/ipc/inference_proc_executor.js.map +1 -0
  31. package/dist/ipc/inference_proc_lazy_main.cjs +90 -0
  32. package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -0
  33. package/dist/ipc/inference_proc_lazy_main.d.ts +2 -0
  34. package/dist/ipc/inference_proc_lazy_main.d.ts.map +1 -0
  35. package/dist/ipc/inference_proc_lazy_main.js +67 -0
  36. package/dist/ipc/inference_proc_lazy_main.js.map +1 -0
  37. package/dist/ipc/job_executor.cjs +8 -7
  38. package/dist/ipc/job_executor.cjs.map +1 -1
  39. package/dist/ipc/job_executor.d.ts +14 -15
  40. package/dist/ipc/job_executor.d.ts.map +1 -1
  41. package/dist/ipc/job_executor.js +7 -6
  42. package/dist/ipc/job_executor.js.map +1 -1
  43. package/dist/ipc/job_proc_executor.cjs +108 -0
  44. package/dist/ipc/job_proc_executor.cjs.map +1 -0
  45. package/dist/ipc/job_proc_executor.d.ts +19 -0
  46. package/dist/ipc/job_proc_executor.d.ts.map +1 -0
  47. package/dist/ipc/job_proc_executor.js +83 -0
  48. package/dist/ipc/job_proc_executor.js.map +1 -0
  49. package/dist/ipc/{job_main.cjs → job_proc_lazy_main.cjs} +41 -36
  50. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -0
  51. package/dist/ipc/job_proc_lazy_main.d.ts +2 -0
  52. package/dist/ipc/job_proc_lazy_main.d.ts.map +1 -0
  53. package/dist/ipc/{job_main.js → job_proc_lazy_main.js} +41 -11
  54. package/dist/ipc/job_proc_lazy_main.js.map +1 -0
  55. package/dist/ipc/message.cjs.map +1 -1
  56. package/dist/ipc/message.d.ts +17 -0
  57. package/dist/ipc/message.d.ts.map +1 -1
  58. package/dist/ipc/proc_pool.cjs +30 -4
  59. package/dist/ipc/proc_pool.cjs.map +1 -1
  60. package/dist/ipc/proc_pool.d.ts +5 -1
  61. package/dist/ipc/proc_pool.d.ts.map +1 -1
  62. package/dist/ipc/proc_pool.js +30 -4
  63. package/dist/ipc/proc_pool.js.map +1 -1
  64. package/dist/ipc/{proc_job_executor.cjs → supervised_proc.cjs} +57 -45
  65. package/dist/ipc/supervised_proc.cjs.map +1 -0
  66. package/dist/ipc/supervised_proc.d.ts +30 -0
  67. package/dist/ipc/supervised_proc.d.ts.map +1 -0
  68. package/dist/ipc/{proc_job_executor.js → supervised_proc.js} +53 -31
  69. package/dist/ipc/supervised_proc.js.map +1 -0
  70. package/dist/job.cjs +18 -1
  71. package/dist/job.cjs.map +1 -1
  72. package/dist/job.d.ts +9 -1
  73. package/dist/job.d.ts.map +1 -1
  74. package/dist/job.js +17 -1
  75. package/dist/job.js.map +1 -1
  76. package/dist/multimodal/agent_playout.cjs +13 -14
  77. package/dist/multimodal/agent_playout.cjs.map +1 -1
  78. package/dist/multimodal/agent_playout.d.ts +4 -4
  79. package/dist/multimodal/agent_playout.d.ts.map +1 -1
  80. package/dist/multimodal/agent_playout.js +13 -14
  81. package/dist/multimodal/agent_playout.js.map +1 -1
  82. package/dist/multimodal/multimodal_agent.cjs +12 -8
  83. package/dist/multimodal/multimodal_agent.cjs.map +1 -1
  84. package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
  85. package/dist/multimodal/multimodal_agent.js +13 -9
  86. package/dist/multimodal/multimodal_agent.js.map +1 -1
  87. package/dist/pipeline/agent_output.cjs +20 -4
  88. package/dist/pipeline/agent_output.cjs.map +1 -1
  89. package/dist/pipeline/agent_output.d.ts +4 -2
  90. package/dist/pipeline/agent_output.d.ts.map +1 -1
  91. package/dist/pipeline/agent_output.js +20 -4
  92. package/dist/pipeline/agent_output.js.map +1 -1
  93. package/dist/pipeline/agent_playout.cjs +9 -3
  94. package/dist/pipeline/agent_playout.cjs.map +1 -1
  95. package/dist/pipeline/agent_playout.d.ts +4 -2
  96. package/dist/pipeline/agent_playout.d.ts.map +1 -1
  97. package/dist/pipeline/agent_playout.js +9 -3
  98. package/dist/pipeline/agent_playout.js.map +1 -1
  99. package/dist/pipeline/human_input.cjs +6 -0
  100. package/dist/pipeline/human_input.cjs.map +1 -1
  101. package/dist/pipeline/human_input.d.ts +3 -1
  102. package/dist/pipeline/human_input.d.ts.map +1 -1
  103. package/dist/pipeline/human_input.js +6 -0
  104. package/dist/pipeline/human_input.js.map +1 -1
  105. package/dist/pipeline/pipeline_agent.cjs +79 -12
  106. package/dist/pipeline/pipeline_agent.cjs.map +1 -1
  107. package/dist/pipeline/pipeline_agent.d.ts +8 -0
  108. package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
  109. package/dist/pipeline/pipeline_agent.js +79 -12
  110. package/dist/pipeline/pipeline_agent.js.map +1 -1
  111. package/dist/stt/stream_adapter.cjs +16 -4
  112. package/dist/stt/stream_adapter.cjs.map +1 -1
  113. package/dist/stt/stream_adapter.d.ts.map +1 -1
  114. package/dist/stt/stream_adapter.js +16 -4
  115. package/dist/stt/stream_adapter.js.map +1 -1
  116. package/dist/tokenize/basic/basic.cjs +2 -0
  117. package/dist/tokenize/basic/basic.cjs.map +1 -1
  118. package/dist/tokenize/basic/basic.d.ts +2 -0
  119. package/dist/tokenize/basic/basic.d.ts.map +1 -1
  120. package/dist/tokenize/basic/basic.js +1 -0
  121. package/dist/tokenize/basic/basic.js.map +1 -1
  122. package/dist/tokenize/basic/index.cjs +2 -0
  123. package/dist/tokenize/basic/index.cjs.map +1 -1
  124. package/dist/tokenize/basic/index.d.ts +1 -1
  125. package/dist/tokenize/basic/index.d.ts.map +1 -1
  126. package/dist/tokenize/basic/index.js +8 -1
  127. package/dist/tokenize/basic/index.js.map +1 -1
  128. package/dist/tokenize/token_stream.cjs +5 -3
  129. package/dist/tokenize/token_stream.cjs.map +1 -1
  130. package/dist/tokenize/token_stream.d.ts.map +1 -1
  131. package/dist/tokenize/token_stream.js +5 -3
  132. package/dist/tokenize/token_stream.js.map +1 -1
  133. package/dist/transcription.cjs +203 -86
  134. package/dist/transcription.cjs.map +1 -1
  135. package/dist/transcription.d.ts +24 -17
  136. package/dist/transcription.d.ts.map +1 -1
  137. package/dist/transcription.js +201 -85
  138. package/dist/transcription.js.map +1 -1
  139. package/dist/worker.cjs +42 -9
  140. package/dist/worker.cjs.map +1 -1
  141. package/dist/worker.d.ts +5 -1
  142. package/dist/worker.d.ts.map +1 -1
  143. package/dist/worker.js +42 -9
  144. package/dist/worker.js.map +1 -1
  145. package/package.json +3 -3
  146. package/src/index.ts +3 -1
  147. package/src/inference_runner.ts +19 -0
  148. package/src/ipc/index.ts +5 -0
  149. package/src/ipc/inference_executor.ts +7 -0
  150. package/src/ipc/inference_proc_executor.ts +93 -0
  151. package/src/ipc/inference_proc_lazy_main.ts +86 -0
  152. package/src/ipc/job_executor.ts +15 -17
  153. package/src/ipc/job_proc_executor.ts +112 -0
  154. package/src/ipc/{job_main.ts → job_proc_lazy_main.ts} +44 -14
  155. package/src/ipc/message.ts +14 -1
  156. package/src/ipc/proc_pool.ts +33 -3
  157. package/src/ipc/{proc_job_executor.ts → supervised_proc.ts} +77 -29
  158. package/src/job.ts +21 -0
  159. package/src/multimodal/agent_playout.ts +14 -16
  160. package/src/multimodal/multimodal_agent.ts +13 -9
  161. package/src/pipeline/agent_output.ts +34 -5
  162. package/src/pipeline/agent_playout.ts +10 -1
  163. package/src/pipeline/human_input.ts +8 -0
  164. package/src/pipeline/pipeline_agent.ts +96 -11
  165. package/src/stt/stream_adapter.ts +17 -5
  166. package/src/tokenize/basic/basic.ts +2 -0
  167. package/src/tokenize/basic/index.ts +7 -1
  168. package/src/tokenize/token_stream.ts +6 -3
  169. package/src/transcription.ts +270 -96
  170. package/src/worker.ts +42 -5
  171. package/dist/ipc/job_main.cjs.map +0 -1
  172. package/dist/ipc/job_main.d.ts +0 -8
  173. package/dist/ipc/job_main.d.ts.map +0 -1
  174. package/dist/ipc/job_main.js.map +0 -1
  175. package/dist/ipc/proc_job_executor.cjs.map +0 -1
  176. package/dist/ipc/proc_job_executor.d.ts +0 -15
  177. package/dist/ipc/proc_job_executor.d.ts.map +0 -1
  178. package/dist/ipc/proc_job_executor.js.map +0 -1
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/agent_output.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport { SynthesizeStream, type TTS } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { AgentPlayout, PlayoutHandle } from './agent_playout.js';\n\nexport type SpeechSource = AsyncIterable<string> | string | Promise<string>;\n\nexport class SynthesisHandle {\n static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #speechId: string;\n text?: string;\n ttsSource: SpeechSource;\n #agentPlayout: AgentPlayout;\n tts: TTS;\n queue = new AsyncIterableQueue<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>();\n #playHandle?: PlayoutHandle;\n intFut = new Future();\n #logger = log();\n\n constructor(speechId: string, ttsSource: SpeechSource, agentPlayout: AgentPlayout, tts: TTS) {\n this.#speechId = speechId;\n this.ttsSource = ttsSource;\n this.#agentPlayout = agentPlayout;\n this.tts = tts;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get validated(): boolean {\n return !!this.#playHandle;\n }\n\n get interrupted(): boolean {\n return this.intFut.done;\n }\n\n get playHandle(): PlayoutHandle | undefined {\n return this.#playHandle;\n }\n\n /** Validate the speech for playout. */\n play(): PlayoutHandle {\n if (this.interrupted) {\n throw new Error('synthesis was interrupted');\n }\n\n this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue);\n return this.#playHandle;\n }\n\n /** Interrupt the speech. */\n interrupt() {\n if (this.interrupted) {\n return;\n }\n\n this.#logger.child({ speechId: this.#speechId }).debug('interrupting synthesis/playout');\n this.#playHandle?.interrupt();\n this.intFut.resolve();\n }\n}\n\nexport class AgentOutput {\n #agentPlayout: AgentPlayout;\n #tts: TTS;\n #tasks: CancellablePromise<void>[] = [];\n\n constructor(agentPlayout: AgentPlayout, tts: TTS) {\n this.#agentPlayout = agentPlayout;\n this.#tts = tts;\n }\n\n get playout(): AgentPlayout {\n return this.#agentPlayout;\n }\n\n async close() {\n this.#tasks.forEach((task) => task.cancel());\n await Promise.all(this.#tasks);\n }\n\n synthesize(speechId: string, ttsSource: SpeechSource): SynthesisHandle {\n const handle = new SynthesisHandle(speechId, ttsSource, this.#agentPlayout, this.#tts);\n const task = this.#synthesize(handle);\n this.#tasks.push(task);\n task.finally(() => this.#tasks.splice(this.#tasks.indexOf(task)));\n return handle;\n }\n\n #synthesize(handle: SynthesisHandle): CancellablePromise<void> {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const ttsSource = await handle.ttsSource;\n let task: CancellablePromise<string>;\n if (typeof ttsSource === 'string') {\n task = stringSynthesisTask(ttsSource, handle);\n } else {\n task = streamSynthesisTask(ttsSource, handle);\n }\n\n onCancel(() => {\n gracefullyCancel(task);\n });\n\n try {\n await Promise.any([task, handle.intFut.await]);\n } finally {\n if (handle.intFut.done) {\n gracefullyCancel(task);\n } else {\n task.then((text) => {\n handle.text = text;\n });\n }\n }\n\n resolve();\n });\n }\n}\n\nconst stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n ttsStream.pushText(text);\n ttsStream.flush();\n ttsStream.endInput();\n for await (const audio of ttsStream) {\n if (cancelled || audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n\n resolve(text);\n });\n};\n\nconst streamSynthesisTask = (\n stream: AsyncIterable<string>,\n handle: SynthesisHandle,\n): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let fullText = '';\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n const readGeneratedAudio = async () => {\n for await (const audio of ttsStream) {\n if (cancelled) break;\n if (audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n };\n readGeneratedAudio();\n\n for await (const text of stream) {\n fullText += text;\n if (cancelled) break;\n ttsStream.pushText(text);\n }\n ttsStream.flush();\n ttsStream.endInput();\n\n resolve(fullText);\n });\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,iBAAoB;AACpB,iBAA2C;AAC3C,mBAAiF;AAK1E,MAAM,gBAAgB;AAAA,EAC3B,OAAgB,iBAAiB,OAAO,gBAAgB;AAAA,EAExD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ,IAAI,gCAAuE;AAAA,EACnF;AAAA,EACA,SAAS,IAAI,oBAAO;AAAA,EACpB,cAAU,gBAAI;AAAA,EAEd,YAAY,UAAkB,WAAyB,cAA4B,KAAU;AAC3F,SAAK,YAAY;AACjB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AACrB,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,CAAC,CAAC,KAAK;AAAA,EAChB;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK,OAAO;AAAA,EACrB;AAAA,EAEA,IAAI,aAAwC;AAC1C,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,OAAsB;AACpB,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AAEA,SAAK,cAAc,KAAK,cAAc,KAAK,KAAK,WAAW,KAAK,KAAK;AACrE,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,YAAY;AA1Dd;AA2DI,QAAI,KAAK,aAAa;AACpB;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,UAAU,CAAC,EAAE,MAAM,gCAAgC;AACvF,eAAK,gBAAL,mBAAkB;AAClB,SAAK,OAAO,QAAQ;AAAA,EACtB;AACF;AAEO,MAAM,YAAY;AAAA,EACvB;AAAA,EACA;AAAA,EACA,SAAqC,CAAC;AAAA,EAEtC,YAAY,cAA4B,KAAU;AAChD,SAAK,gBAAgB;AACrB,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,IAAI,UAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,OAAO,QAAQ,CAAC,SAAS,KAAK,OAAO,CAAC;AAC3C,UAAM,QAAQ,IAAI,KAAK,MAAM;AAAA,EAC/B;AAAA,EAEA,WAAW,UAAkB,WAA0C;AACrE,UAAM,SAAS,IAAI,gBAAgB,UAAU,WAAW,KAAK,eAAe,KAAK,IAAI;AACrF,UAAM,OAAO,KAAK,YAAY,MAAM;AACpC,SAAK,OAAO,KAAK,IAAI;AACrB,SAAK,QAAQ,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,QAAQ,IAAI,CAAC,CAAC;AAChE,WAAO;AAAA,EACT;AAAA,EAEA,YAAY,QAAmD;AAE7D,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,YAAY,MAAM,OAAO;AAC/B,UAAI;AACJ,UAAI,OAAO,cAAc,UAAU;AACjC,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C,OAAO;AACL,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C;AAEA,eAAS,MAAM;AACb,2CAAiB,IAAI;AAAA,MACvB,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,MAAM,OAAO,OAAO,KAAK,CAAC;AAAA,MAC/C,UAAE;AACA,YAAI,OAAO,OAAO,MAAM;AACtB,6CAAiB,IAAI;AAAA,QACvB,OAAO;AACL,eAAK,KAAK,CAAC,SAAS;AAClB,mBAAO,OAAO;AAAA,UAChB,CAAC;AAAA,QACH;AAAA,MACF;AAEA,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEA,MAAM,sBAAsB,CAAC,MAAc,WAAwD;AAEjG,SAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,cAAU,SAAS,IAAI;AACvB,cAAU,MAAM;AAChB,cAAU,SAAS;AACnB,qBAAiB,SAAS,WAAW;AACnC,UAAI,aAAa,UAAU,4BAAiB,eAAe;AACzD;AAAA,MACF;AACA,aAAO,MAAM,IAAI,MAAM,KAAK;AAAA,IAC9B;AACA,WAAO,MAAM,IAAI,gBAAgB,cAAc;AAE/C,YAAQ,IAAI;AAAA,EACd,CAAC;AACH;AAEA,MAAM,sBAAsB,CAC1B,QACA,WAC+B;AAE/B,SAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,WAAW;AACf,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,UAAM,qBAAqB,YAAY;AACrC,uBAAiB,SAAS,WAAW;AACnC,YAAI,UAAW;AACf,YAAI,UAAU,4BAAiB,eAAe;AAC5C;AAAA,QACF;AACA,eAAO,MAAM,IAAI,MAAM,KAAK;AAAA,MAC9B;AACA,aAAO,MAAM,IAAI,gBAAgB,cAAc;AAAA,IACjD;AACA,uBAAmB;AAEnB,qBAAiB,QAAQ,QAAQ;AAC/B,kBAAY;AACZ,UAAI,UAAW;AACf,gBAAU,SAAS,IAAI;AAAA,IACzB;AACA,cAAU,MAAM;AAChB,cAAU,SAAS;AAEnB,YAAQ,QAAQ;AAAA,EAClB,CAAC;AACH;","names":[]}
1
+ {"version":3,"sources":["../../src/pipeline/agent_output.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { SynthesizeStream, type TTS } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { AgentPlayout, PlayoutHandle } from './agent_playout.js';\n\nexport type SpeechSource = AsyncIterable<string> | string | Promise<string>;\n\nexport class SynthesisHandle {\n static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #speechId: string;\n text?: string;\n ttsSource: SpeechSource;\n #agentPlayout: AgentPlayout;\n tts: TTS;\n queue = new AsyncIterableQueue<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>();\n #playHandle?: PlayoutHandle;\n intFut = new Future();\n #logger = log();\n synchronizer: TextAudioSynchronizer;\n\n constructor(\n speechId: string,\n ttsSource: SpeechSource,\n agentPlayout: AgentPlayout,\n tts: TTS,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.ttsSource = ttsSource;\n this.#agentPlayout = agentPlayout;\n this.tts = tts;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get validated(): boolean {\n return !!this.#playHandle;\n }\n\n get interrupted(): boolean {\n return this.intFut.done;\n }\n\n get playHandle(): PlayoutHandle | undefined {\n return this.#playHandle;\n }\n\n /** Validate the speech for playout. */\n play(): PlayoutHandle {\n if (this.interrupted) {\n throw new Error('synthesis was interrupted');\n }\n\n this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue, this.synchronizer);\n return this.#playHandle;\n }\n\n /** Interrupt the speech. */\n interrupt() {\n if (this.interrupted) {\n return;\n }\n\n this.#logger.child({ speechId: this.#speechId }).debug('interrupting synthesis/playout');\n this.#playHandle?.interrupt();\n this.intFut.resolve();\n }\n}\n\nexport class AgentOutput {\n #agentPlayout: AgentPlayout;\n #tts: TTS;\n #tasks: CancellablePromise<void>[] = [];\n\n constructor(agentPlayout: AgentPlayout, tts: TTS) {\n this.#agentPlayout = agentPlayout;\n this.#tts = tts;\n }\n\n get playout(): AgentPlayout {\n return this.#agentPlayout;\n }\n\n async close() {\n this.#tasks.forEach((task) => task.cancel());\n await Promise.all(this.#tasks);\n }\n\n synthesize(\n speechId: string,\n ttsSource: SpeechSource,\n synchronizer: TextAudioSynchronizer,\n ): SynthesisHandle {\n const handle = new SynthesisHandle(\n speechId,\n ttsSource,\n this.#agentPlayout,\n this.#tts,\n synchronizer,\n );\n const task = this.#synthesize(handle);\n this.#tasks.push(task);\n task.finally(() => this.#tasks.splice(this.#tasks.indexOf(task)));\n return handle;\n }\n\n #synthesize(handle: SynthesisHandle): CancellablePromise<void> {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const ttsSource = await handle.ttsSource;\n let task: CancellablePromise<string>;\n if (typeof ttsSource === 'string') {\n task = stringSynthesisTask(ttsSource, handle);\n } else {\n task = streamSynthesisTask(ttsSource, handle);\n }\n\n onCancel(() => {\n gracefullyCancel(task);\n });\n\n try {\n await Promise.any([task, handle.intFut.await]);\n } finally {\n if (handle.intFut.done) {\n gracefullyCancel(task);\n } else {\n task.then((text) => {\n handle.text = text;\n });\n }\n }\n\n resolve();\n });\n }\n}\n\nconst stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n ttsStream.pushText(text);\n handle.synchronizer.pushText(text);\n handle.synchronizer.markTextSegmentEnd();\n ttsStream.flush();\n ttsStream.endInput();\n for await (const audio of ttsStream) {\n if (cancelled || audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n\n resolve(text);\n });\n};\n\nconst streamSynthesisTask = (\n stream: AsyncIterable<string>,\n handle: SynthesisHandle,\n): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let fullText = '';\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n const readGeneratedAudio = async () => {\n for await (const audio of ttsStream) {\n if (cancelled) break;\n if (audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n };\n readGeneratedAudio();\n\n for await (const text of stream) {\n fullText += text;\n if (cancelled) break;\n handle.synchronizer.pushText(text);\n ttsStream.pushText(text);\n }\n handle.synchronizer.markTextSegmentEnd();\n\n // end the audio queue early if there is no actual text to turn into speech\n if (!fullText || fullText.trim().length === 0) {\n cancelled = true;\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n }\n ttsStream.flush();\n ttsStream.endInput();\n\n resolve(fullText);\n });\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,iBAAoB;AAEpB,iBAA2C;AAC3C,mBAAiF;AAK1E,MAAM,gBAAgB;AAAA,EAC3B,OAAgB,iBAAiB,OAAO,gBAAgB;AAAA,EAExD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ,IAAI,gCAAuE;AAAA,EACnF;AAAA,EACA,SAAS,IAAI,oBAAO;AAAA,EACpB,cAAU,gBAAI;AAAA,EACd;AAAA,EAEA,YACE,UACA,WACA,cACA,KACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AACrB,SAAK,MAAM;AACX,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,CAAC,CAAC,KAAK;AAAA,EAChB;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK,OAAO;AAAA,EACrB;AAAA,EAEA,IAAI,aAAwC;AAC1C,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,OAAsB;AACpB,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AAEA,SAAK,cAAc,KAAK,cAAc,KAAK,KAAK,WAAW,KAAK,OAAO,KAAK,YAAY;AACxF,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,YAAY;AAnEd;AAoEI,QAAI,KAAK,aAAa;AACpB;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,UAAU,CAAC,EAAE,MAAM,gCAAgC;AACvF,eAAK,gBAAL,mBAAkB;AAClB,SAAK,OAAO,QAAQ;AAAA,EACtB;AACF;AAEO,MAAM,YAAY;AAAA,EACvB;AAAA,EACA;AAAA,EACA,SAAqC,CAAC;AAAA,EAEtC,YAAY,cAA4B,KAAU;AAChD,SAAK,gBAAgB;AACrB,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,IAAI,UAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,OAAO,QAAQ,CAAC,SAAS,KAAK,OAAO,CAAC;AAC3C,UAAM,QAAQ,IAAI,KAAK,MAAM;AAAA,EAC/B;AAAA,EAEA,WACE,UACA,WACA,cACiB;AACjB,UAAM,SAAS,IAAI;AAAA,MACjB;AAAA,MACA;AAAA,MACA,KAAK;AAAA,MACL,KAAK;AAAA,MACL;AAAA,IACF;AACA,UAAM,OAAO,KAAK,YAAY,MAAM;AACpC,SAAK,OAAO,KAAK,IAAI;AACrB,SAAK,QAAQ,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,QAAQ,IAAI,CAAC,CAAC;AAChE,WAAO;AAAA,EACT;AAAA,EAEA,YAAY,QAAmD;AAE7D,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,YAAY,MAAM,OAAO;AAC/B,UAAI;AACJ,UAAI,OAAO,cAAc,UAAU;AACjC,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C,OAAO;AACL,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C;AAEA,eAAS,MAAM;AACb,2CAAiB,IAAI;AAAA,MACvB,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,MAAM,OAAO,OAAO,KAAK,CAAC;AAAA,MAC/C,UAAE;AACA,YAAI,OAAO,OAAO,MAAM;AACtB,6CAAiB,IAAI;AAAA,QACvB,OAAO;AACL,eAAK,KAAK,CAAC,SAAS;AAClB,mBAAO,OAAO;AAAA,UAChB,CAAC;AAAA,QACH;AAAA,MACF;AAEA,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEA,MAAM,sBAAsB,CAAC,MAAc,WAAwD;AAEjG,SAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,cAAU,SAAS,IAAI;AACvB,WAAO,aAAa,SAAS,IAAI;AACjC,WAAO,aAAa,mBAAmB;AACvC,cAAU,MAAM;AAChB,cAAU,SAAS;AACnB,qBAAiB,SAAS,WAAW;AACnC,UAAI,aAAa,UAAU,4BAAiB,eAAe;AACzD;AAAA,MACF;AACA,aAAO,MAAM,IAAI,MAAM,KAAK;AAAA,IAC9B;AACA,WAAO,MAAM,IAAI,gBAAgB,cAAc;AAE/C,YAAQ,IAAI;AAAA,EACd,CAAC;AACH;AAEA,MAAM,sBAAsB,CAC1B,QACA,WAC+B;AAE/B,SAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,WAAW;AACf,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,UAAM,qBAAqB,YAAY;AACrC,uBAAiB,SAAS,WAAW;AACnC,YAAI,UAAW;AACf,YAAI,UAAU,4BAAiB,eAAe;AAC5C;AAAA,QACF;AACA,eAAO,MAAM,IAAI,MAAM,KAAK;AAAA,MAC9B;AACA,aAAO,MAAM,IAAI,gBAAgB,cAAc;AAAA,IACjD;AACA,uBAAmB;AAEnB,qBAAiB,QAAQ,QAAQ;AAC/B,kBAAY;AACZ,UAAI,UAAW;AACf,aAAO,aAAa,SAAS,IAAI;AACjC,gBAAU,SAAS,IAAI;AAAA,IACzB;AACA,WAAO,aAAa,mBAAmB;AAGvC,QAAI,CAAC,YAAY,SAAS,KAAK,EAAE,WAAW,GAAG;AAC7C,kBAAY;AACZ,aAAO,MAAM,IAAI,gBAAgB,cAAc;AAAA,IACjD;AACA,cAAU,MAAM;AAChB,cAAU,SAAS;AAEnB,YAAQ,QAAQ;AAAA,EAClB,CAAC;AACH;","names":[]}
@@ -1,4 +1,5 @@
1
1
  import type { AudioFrame } from '@livekit/rtc-node';
2
+ import type { TextAudioSynchronizer } from '../transcription.js';
2
3
  import { type TTS } from '../tts/index.js';
3
4
  import { AsyncIterableQueue, Future } from '../utils.js';
4
5
  import type { AgentPlayout, PlayoutHandle } from './agent_playout.js';
@@ -11,7 +12,8 @@ export declare class SynthesisHandle {
11
12
  tts: TTS;
12
13
  queue: AsyncIterableQueue<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;
13
14
  intFut: Future;
14
- constructor(speechId: string, ttsSource: SpeechSource, agentPlayout: AgentPlayout, tts: TTS);
15
+ synchronizer: TextAudioSynchronizer;
16
+ constructor(speechId: string, ttsSource: SpeechSource, agentPlayout: AgentPlayout, tts: TTS, synchronizer: TextAudioSynchronizer);
15
17
  get speechId(): string;
16
18
  get validated(): boolean;
17
19
  get interrupted(): boolean;
@@ -26,6 +28,6 @@ export declare class AgentOutput {
26
28
  constructor(agentPlayout: AgentPlayout, tts: TTS);
27
29
  get playout(): AgentPlayout;
28
30
  close(): Promise<void>;
29
- synthesize(speechId: string, ttsSource: SpeechSource): SynthesisHandle;
31
+ synthesize(speechId: string, ttsSource: SpeechSource, synchronizer: TextAudioSynchronizer): SynthesisHandle;
30
32
  }
31
33
  //# sourceMappingURL=agent_output.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"agent_output.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_output.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,EAAoB,KAAK,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC/F,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEtE,MAAM,MAAM,YAAY,GAAG,aAAa,CAAC,MAAM,CAAC,GAAG,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AAE5E,qBAAa,eAAe;;IAC1B,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IAG1D,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,YAAY,CAAC;IAExB,GAAG,EAAE,GAAG,CAAC;IACT,KAAK,yEAAgF;IAErF,MAAM,SAAgB;gBAGV,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,YAAY,EAAE,YAAY,EAAE,GAAG,EAAE,GAAG;IAO3F,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,SAAS,IAAI,OAAO,CAEvB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,aAAa,GAAG,SAAS,CAE1C;IAED,uCAAuC;IACvC,IAAI,IAAI,aAAa;IASrB,4BAA4B;IAC5B,SAAS;CASV;AAED,qBAAa,WAAW;;gBAKV,YAAY,EAAE,YAAY,EAAE,GAAG,EAAE,GAAG;IAKhD,IAAI,OAAO,IAAI,YAAY,CAE1B;IAEK,KAAK;IAKX,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,GAAG,eAAe;CAsCvE"}
1
+ {"version":3,"file":"agent_output.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_output.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAoB,KAAK,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC/F,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEtE,MAAM,MAAM,YAAY,GAAG,aAAa,CAAC,MAAM,CAAC,GAAG,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AAE5E,qBAAa,eAAe;;IAC1B,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IAG1D,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,YAAY,CAAC;IAExB,GAAG,EAAE,GAAG,CAAC;IACT,KAAK,yEAAgF;IAErF,MAAM,SAAgB;IAEtB,YAAY,EAAE,qBAAqB,CAAC;gBAGlC,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,YAAY,EACvB,YAAY,EAAE,YAAY,EAC1B,GAAG,EAAE,GAAG,EACR,YAAY,EAAE,qBAAqB;IASrC,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,SAAS,IAAI,OAAO,CAEvB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,aAAa,GAAG,SAAS,CAE1C;IAED,uCAAuC;IACvC,IAAI,IAAI,aAAa;IASrB,4BAA4B;IAC5B,SAAS;CASV;AAED,qBAAa,WAAW;;gBAKV,YAAY,EAAE,YAAY,EAAE,GAAG,EAAE,GAAG;IAKhD,IAAI,OAAO,IAAI,YAAY,CAE1B;IAEK,KAAK;IAKX,UAAU,CACR,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,YAAY,EACvB,YAAY,EAAE,qBAAqB,GAClC,eAAe;CA4CnB"}
@@ -12,11 +12,13 @@ class SynthesisHandle {
12
12
  #playHandle;
13
13
  intFut = new Future();
14
14
  #logger = log();
15
- constructor(speechId, ttsSource, agentPlayout, tts) {
15
+ synchronizer;
16
+ constructor(speechId, ttsSource, agentPlayout, tts, synchronizer) {
16
17
  this.#speechId = speechId;
17
18
  this.ttsSource = ttsSource;
18
19
  this.#agentPlayout = agentPlayout;
19
20
  this.tts = tts;
21
+ this.synchronizer = synchronizer;
20
22
  }
21
23
  get speechId() {
22
24
  return this.#speechId;
@@ -35,7 +37,7 @@ class SynthesisHandle {
35
37
  if (this.interrupted) {
36
38
  throw new Error("synthesis was interrupted");
37
39
  }
38
- this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue);
40
+ this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue, this.synchronizer);
39
41
  return this.#playHandle;
40
42
  }
41
43
  /** Interrupt the speech. */
@@ -64,8 +66,14 @@ class AgentOutput {
64
66
  this.#tasks.forEach((task) => task.cancel());
65
67
  await Promise.all(this.#tasks);
66
68
  }
67
- synthesize(speechId, ttsSource) {
68
- const handle = new SynthesisHandle(speechId, ttsSource, this.#agentPlayout, this.#tts);
69
+ synthesize(speechId, ttsSource, synchronizer) {
70
+ const handle = new SynthesisHandle(
71
+ speechId,
72
+ ttsSource,
73
+ this.#agentPlayout,
74
+ this.#tts,
75
+ synchronizer
76
+ );
69
77
  const task = this.#synthesize(handle);
70
78
  this.#tasks.push(task);
71
79
  task.finally(() => this.#tasks.splice(this.#tasks.indexOf(task)));
@@ -106,6 +114,8 @@ const stringSynthesisTask = (text, handle) => {
106
114
  });
107
115
  const ttsStream = handle.tts.stream();
108
116
  ttsStream.pushText(text);
117
+ handle.synchronizer.pushText(text);
118
+ handle.synchronizer.markTextSegmentEnd();
109
119
  ttsStream.flush();
110
120
  ttsStream.endInput();
111
121
  for await (const audio of ttsStream) {
@@ -140,8 +150,14 @@ const streamSynthesisTask = (stream, handle) => {
140
150
  for await (const text of stream) {
141
151
  fullText += text;
142
152
  if (cancelled) break;
153
+ handle.synchronizer.pushText(text);
143
154
  ttsStream.pushText(text);
144
155
  }
156
+ handle.synchronizer.markTextSegmentEnd();
157
+ if (!fullText || fullText.trim().length === 0) {
158
+ cancelled = true;
159
+ handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);
160
+ }
145
161
  ttsStream.flush();
146
162
  ttsStream.endInput();
147
163
  resolve(fullText);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/agent_output.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport { SynthesizeStream, type TTS } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { AgentPlayout, PlayoutHandle } from './agent_playout.js';\n\nexport type SpeechSource = AsyncIterable<string> | string | Promise<string>;\n\nexport class SynthesisHandle {\n static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #speechId: string;\n text?: string;\n ttsSource: SpeechSource;\n #agentPlayout: AgentPlayout;\n tts: TTS;\n queue = new AsyncIterableQueue<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>();\n #playHandle?: PlayoutHandle;\n intFut = new Future();\n #logger = log();\n\n constructor(speechId: string, ttsSource: SpeechSource, agentPlayout: AgentPlayout, tts: TTS) {\n this.#speechId = speechId;\n this.ttsSource = ttsSource;\n this.#agentPlayout = agentPlayout;\n this.tts = tts;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get validated(): boolean {\n return !!this.#playHandle;\n }\n\n get interrupted(): boolean {\n return this.intFut.done;\n }\n\n get playHandle(): PlayoutHandle | undefined {\n return this.#playHandle;\n }\n\n /** Validate the speech for playout. */\n play(): PlayoutHandle {\n if (this.interrupted) {\n throw new Error('synthesis was interrupted');\n }\n\n this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue);\n return this.#playHandle;\n }\n\n /** Interrupt the speech. */\n interrupt() {\n if (this.interrupted) {\n return;\n }\n\n this.#logger.child({ speechId: this.#speechId }).debug('interrupting synthesis/playout');\n this.#playHandle?.interrupt();\n this.intFut.resolve();\n }\n}\n\nexport class AgentOutput {\n #agentPlayout: AgentPlayout;\n #tts: TTS;\n #tasks: CancellablePromise<void>[] = [];\n\n constructor(agentPlayout: AgentPlayout, tts: TTS) {\n this.#agentPlayout = agentPlayout;\n this.#tts = tts;\n }\n\n get playout(): AgentPlayout {\n return this.#agentPlayout;\n }\n\n async close() {\n this.#tasks.forEach((task) => task.cancel());\n await Promise.all(this.#tasks);\n }\n\n synthesize(speechId: string, ttsSource: SpeechSource): SynthesisHandle {\n const handle = new SynthesisHandle(speechId, ttsSource, this.#agentPlayout, this.#tts);\n const task = this.#synthesize(handle);\n this.#tasks.push(task);\n task.finally(() => this.#tasks.splice(this.#tasks.indexOf(task)));\n return handle;\n }\n\n #synthesize(handle: SynthesisHandle): CancellablePromise<void> {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const ttsSource = await handle.ttsSource;\n let task: CancellablePromise<string>;\n if (typeof ttsSource === 'string') {\n task = stringSynthesisTask(ttsSource, handle);\n } else {\n task = streamSynthesisTask(ttsSource, handle);\n }\n\n onCancel(() => {\n gracefullyCancel(task);\n });\n\n try {\n await Promise.any([task, handle.intFut.await]);\n } finally {\n if (handle.intFut.done) {\n gracefullyCancel(task);\n } else {\n task.then((text) => {\n handle.text = text;\n });\n }\n }\n\n resolve();\n });\n }\n}\n\nconst stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n ttsStream.pushText(text);\n ttsStream.flush();\n ttsStream.endInput();\n for await (const audio of ttsStream) {\n if (cancelled || audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n\n resolve(text);\n });\n};\n\nconst streamSynthesisTask = (\n stream: AsyncIterable<string>,\n handle: SynthesisHandle,\n): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let fullText = '';\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n const readGeneratedAudio = async () => {\n for await (const audio of ttsStream) {\n if (cancelled) break;\n if (audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n };\n readGeneratedAudio();\n\n for await (const text of stream) {\n fullText += text;\n if (cancelled) break;\n ttsStream.pushText(text);\n }\n ttsStream.flush();\n ttsStream.endInput();\n\n resolve(fullText);\n });\n};\n"],"mappings":"AAIA,SAAS,WAAW;AACpB,SAAS,wBAAkC;AAC3C,SAAS,oBAAoB,oBAAoB,QAAQ,wBAAwB;AAK1E,MAAM,gBAAgB;AAAA,EAC3B,OAAgB,iBAAiB,OAAO,gBAAgB;AAAA,EAExD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ,IAAI,mBAAuE;AAAA,EACnF;AAAA,EACA,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI;AAAA,EAEd,YAAY,UAAkB,WAAyB,cAA4B,KAAU;AAC3F,SAAK,YAAY;AACjB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AACrB,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,CAAC,CAAC,KAAK;AAAA,EAChB;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK,OAAO;AAAA,EACrB;AAAA,EAEA,IAAI,aAAwC;AAC1C,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,OAAsB;AACpB,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AAEA,SAAK,cAAc,KAAK,cAAc,KAAK,KAAK,WAAW,KAAK,KAAK;AACrE,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,YAAY;AA1Dd;AA2DI,QAAI,KAAK,aAAa;AACpB;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,UAAU,CAAC,EAAE,MAAM,gCAAgC;AACvF,eAAK,gBAAL,mBAAkB;AAClB,SAAK,OAAO,QAAQ;AAAA,EACtB;AACF;AAEO,MAAM,YAAY;AAAA,EACvB;AAAA,EACA;AAAA,EACA,SAAqC,CAAC;AAAA,EAEtC,YAAY,cAA4B,KAAU;AAChD,SAAK,gBAAgB;AACrB,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,IAAI,UAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,OAAO,QAAQ,CAAC,SAAS,KAAK,OAAO,CAAC;AAC3C,UAAM,QAAQ,IAAI,KAAK,MAAM;AAAA,EAC/B;AAAA,EAEA,WAAW,UAAkB,WAA0C;AACrE,UAAM,SAAS,IAAI,gBAAgB,UAAU,WAAW,KAAK,eAAe,KAAK,IAAI;AACrF,UAAM,OAAO,KAAK,YAAY,MAAM;AACpC,SAAK,OAAO,KAAK,IAAI;AACrB,SAAK,QAAQ,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,QAAQ,IAAI,CAAC,CAAC;AAChE,WAAO;AAAA,EACT;AAAA,EAEA,YAAY,QAAmD;AAE7D,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,YAAY,MAAM,OAAO;AAC/B,UAAI;AACJ,UAAI,OAAO,cAAc,UAAU;AACjC,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C,OAAO;AACL,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C;AAEA,eAAS,MAAM;AACb,yBAAiB,IAAI;AAAA,MACvB,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,MAAM,OAAO,OAAO,KAAK,CAAC;AAAA,MAC/C,UAAE;AACA,YAAI,OAAO,OAAO,MAAM;AACtB,2BAAiB,IAAI;AAAA,QACvB,OAAO;AACL,eAAK,KAAK,CAAC,SAAS;AAClB,mBAAO,OAAO;AAAA,UAChB,CAAC;AAAA,QACH;AAAA,MACF;AAEA,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEA,MAAM,sBAAsB,CAAC,MAAc,WAAwD;AAEjG,SAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,cAAU,SAAS,IAAI;AACvB,cAAU,MAAM;AAChB,cAAU,SAAS;AACnB,qBAAiB,SAAS,WAAW;AACnC,UAAI,aAAa,UAAU,iBAAiB,eAAe;AACzD;AAAA,MACF;AACA,aAAO,MAAM,IAAI,MAAM,KAAK;AAAA,IAC9B;AACA,WAAO,MAAM,IAAI,gBAAgB,cAAc;AAE/C,YAAQ,IAAI;AAAA,EACd,CAAC;AACH;AAEA,MAAM,sBAAsB,CAC1B,QACA,WAC+B;AAE/B,SAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,WAAW;AACf,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,UAAM,qBAAqB,YAAY;AACrC,uBAAiB,SAAS,WAAW;AACnC,YAAI,UAAW;AACf,YAAI,UAAU,iBAAiB,eAAe;AAC5C;AAAA,QACF;AACA,eAAO,MAAM,IAAI,MAAM,KAAK;AAAA,MAC9B;AACA,aAAO,MAAM,IAAI,gBAAgB,cAAc;AAAA,IACjD;AACA,uBAAmB;AAEnB,qBAAiB,QAAQ,QAAQ;AAC/B,kBAAY;AACZ,UAAI,UAAW;AACf,gBAAU,SAAS,IAAI;AAAA,IACzB;AACA,cAAU,MAAM;AAChB,cAAU,SAAS;AAEnB,YAAQ,QAAQ;AAAA,EAClB,CAAC;AACH;","names":[]}
1
+ {"version":3,"sources":["../../src/pipeline/agent_output.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { SynthesizeStream, type TTS } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { AgentPlayout, PlayoutHandle } from './agent_playout.js';\n\nexport type SpeechSource = AsyncIterable<string> | string | Promise<string>;\n\nexport class SynthesisHandle {\n static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #speechId: string;\n text?: string;\n ttsSource: SpeechSource;\n #agentPlayout: AgentPlayout;\n tts: TTS;\n queue = new AsyncIterableQueue<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>();\n #playHandle?: PlayoutHandle;\n intFut = new Future();\n #logger = log();\n synchronizer: TextAudioSynchronizer;\n\n constructor(\n speechId: string,\n ttsSource: SpeechSource,\n agentPlayout: AgentPlayout,\n tts: TTS,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.ttsSource = ttsSource;\n this.#agentPlayout = agentPlayout;\n this.tts = tts;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get validated(): boolean {\n return !!this.#playHandle;\n }\n\n get interrupted(): boolean {\n return this.intFut.done;\n }\n\n get playHandle(): PlayoutHandle | undefined {\n return this.#playHandle;\n }\n\n /** Validate the speech for playout. */\n play(): PlayoutHandle {\n if (this.interrupted) {\n throw new Error('synthesis was interrupted');\n }\n\n this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue, this.synchronizer);\n return this.#playHandle;\n }\n\n /** Interrupt the speech. */\n interrupt() {\n if (this.interrupted) {\n return;\n }\n\n this.#logger.child({ speechId: this.#speechId }).debug('interrupting synthesis/playout');\n this.#playHandle?.interrupt();\n this.intFut.resolve();\n }\n}\n\nexport class AgentOutput {\n #agentPlayout: AgentPlayout;\n #tts: TTS;\n #tasks: CancellablePromise<void>[] = [];\n\n constructor(agentPlayout: AgentPlayout, tts: TTS) {\n this.#agentPlayout = agentPlayout;\n this.#tts = tts;\n }\n\n get playout(): AgentPlayout {\n return this.#agentPlayout;\n }\n\n async close() {\n this.#tasks.forEach((task) => task.cancel());\n await Promise.all(this.#tasks);\n }\n\n synthesize(\n speechId: string,\n ttsSource: SpeechSource,\n synchronizer: TextAudioSynchronizer,\n ): SynthesisHandle {\n const handle = new SynthesisHandle(\n speechId,\n ttsSource,\n this.#agentPlayout,\n this.#tts,\n synchronizer,\n );\n const task = this.#synthesize(handle);\n this.#tasks.push(task);\n task.finally(() => this.#tasks.splice(this.#tasks.indexOf(task)));\n return handle;\n }\n\n #synthesize(handle: SynthesisHandle): CancellablePromise<void> {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const ttsSource = await handle.ttsSource;\n let task: CancellablePromise<string>;\n if (typeof ttsSource === 'string') {\n task = stringSynthesisTask(ttsSource, handle);\n } else {\n task = streamSynthesisTask(ttsSource, handle);\n }\n\n onCancel(() => {\n gracefullyCancel(task);\n });\n\n try {\n await Promise.any([task, handle.intFut.await]);\n } finally {\n if (handle.intFut.done) {\n gracefullyCancel(task);\n } else {\n task.then((text) => {\n handle.text = text;\n });\n }\n }\n\n resolve();\n });\n }\n}\n\nconst stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n ttsStream.pushText(text);\n handle.synchronizer.pushText(text);\n handle.synchronizer.markTextSegmentEnd();\n ttsStream.flush();\n ttsStream.endInput();\n for await (const audio of ttsStream) {\n if (cancelled || audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n\n resolve(text);\n });\n};\n\nconst streamSynthesisTask = (\n stream: AsyncIterable<string>,\n handle: SynthesisHandle,\n): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let fullText = '';\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n const readGeneratedAudio = async () => {\n for await (const audio of ttsStream) {\n if (cancelled) break;\n if (audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n };\n readGeneratedAudio();\n\n for await (const text of stream) {\n fullText += text;\n if (cancelled) break;\n handle.synchronizer.pushText(text);\n ttsStream.pushText(text);\n }\n handle.synchronizer.markTextSegmentEnd();\n\n // end the audio queue early if there is no actual text to turn into speech\n if (!fullText || fullText.trim().length === 0) {\n cancelled = true;\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n }\n ttsStream.flush();\n ttsStream.endInput();\n\n resolve(fullText);\n });\n};\n"],"mappings":"AAIA,SAAS,WAAW;AAEpB,SAAS,wBAAkC;AAC3C,SAAS,oBAAoB,oBAAoB,QAAQ,wBAAwB;AAK1E,MAAM,gBAAgB;AAAA,EAC3B,OAAgB,iBAAiB,OAAO,gBAAgB;AAAA,EAExD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ,IAAI,mBAAuE;AAAA,EACnF;AAAA,EACA,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI;AAAA,EACd;AAAA,EAEA,YACE,UACA,WACA,cACA,KACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AACrB,SAAK,MAAM;AACX,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,CAAC,CAAC,KAAK;AAAA,EAChB;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK,OAAO;AAAA,EACrB;AAAA,EAEA,IAAI,aAAwC;AAC1C,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,OAAsB;AACpB,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AAEA,SAAK,cAAc,KAAK,cAAc,KAAK,KAAK,WAAW,KAAK,OAAO,KAAK,YAAY;AACxF,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,YAAY;AAnEd;AAoEI,QAAI,KAAK,aAAa;AACpB;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,UAAU,CAAC,EAAE,MAAM,gCAAgC;AACvF,eAAK,gBAAL,mBAAkB;AAClB,SAAK,OAAO,QAAQ;AAAA,EACtB;AACF;AAEO,MAAM,YAAY;AAAA,EACvB;AAAA,EACA;AAAA,EACA,SAAqC,CAAC;AAAA,EAEtC,YAAY,cAA4B,KAAU;AAChD,SAAK,gBAAgB;AACrB,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,IAAI,UAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,OAAO,QAAQ,CAAC,SAAS,KAAK,OAAO,CAAC;AAC3C,UAAM,QAAQ,IAAI,KAAK,MAAM;AAAA,EAC/B;AAAA,EAEA,WACE,UACA,WACA,cACiB;AACjB,UAAM,SAAS,IAAI;AAAA,MACjB;AAAA,MACA;AAAA,MACA,KAAK;AAAA,MACL,KAAK;AAAA,MACL;AAAA,IACF;AACA,UAAM,OAAO,KAAK,YAAY,MAAM;AACpC,SAAK,OAAO,KAAK,IAAI;AACrB,SAAK,QAAQ,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,QAAQ,IAAI,CAAC,CAAC;AAChE,WAAO;AAAA,EACT;AAAA,EAEA,YAAY,QAAmD;AAE7D,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,YAAY,MAAM,OAAO;AAC/B,UAAI;AACJ,UAAI,OAAO,cAAc,UAAU;AACjC,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C,OAAO;AACL,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C;AAEA,eAAS,MAAM;AACb,yBAAiB,IAAI;AAAA,MACvB,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,MAAM,OAAO,OAAO,KAAK,CAAC;AAAA,MAC/C,UAAE;AACA,YAAI,OAAO,OAAO,MAAM;AACtB,2BAAiB,IAAI;AAAA,QACvB,OAAO;AACL,eAAK,KAAK,CAAC,SAAS;AAClB,mBAAO,OAAO;AAAA,UAChB,CAAC;AAAA,QACH;AAAA,MACF;AAEA,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEA,MAAM,sBAAsB,CAAC,MAAc,WAAwD;AAEjG,SAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,cAAU,SAAS,IAAI;AACvB,WAAO,aAAa,SAAS,IAAI;AACjC,WAAO,aAAa,mBAAmB;AACvC,cAAU,MAAM;AAChB,cAAU,SAAS;AACnB,qBAAiB,SAAS,WAAW;AACnC,UAAI,aAAa,UAAU,iBAAiB,eAAe;AACzD;AAAA,MACF;AACA,aAAO,MAAM,IAAI,MAAM,KAAK;AAAA,IAC9B;AACA,WAAO,MAAM,IAAI,gBAAgB,cAAc;AAE/C,YAAQ,IAAI;AAAA,EACd,CAAC;AACH;AAEA,MAAM,sBAAsB,CAC1B,QACA,WAC+B;AAE/B,SAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,WAAW;AACf,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,UAAM,qBAAqB,YAAY;AACrC,uBAAiB,SAAS,WAAW;AACnC,YAAI,UAAW;AACf,YAAI,UAAU,iBAAiB,eAAe;AAC5C;AAAA,QACF;AACA,eAAO,MAAM,IAAI,MAAM,KAAK;AAAA,MAC9B;AACA,aAAO,MAAM,IAAI,gBAAgB,cAAc;AAAA,IACjD;AACA,uBAAmB;AAEnB,qBAAiB,QAAQ,QAAQ;AAC/B,kBAAY;AACZ,UAAI,UAAW;AACf,aAAO,aAAa,SAAS,IAAI;AACjC,gBAAU,SAAS,IAAI;AAAA,IACzB;AACA,WAAO,aAAa,mBAAmB;AAGvC,QAAI,CAAC,YAAY,SAAS,KAAK,EAAE,WAAW,GAAG;AAC7C,kBAAY;AACZ,aAAO,MAAM,IAAI,gBAAgB,cAAc;AAAA,IACjD;AACA,cAAU,MAAM;AAChB,cAAU,SAAS;AAEnB,YAAQ,QAAQ;AAAA,EAClB,CAAC;AACH;","names":[]}
@@ -47,14 +47,16 @@ class PlayoutHandle {
47
47
  #audioSource;
48
48
  playoutSource;
49
49
  totalPlayedTime;
50
+ synchronizer;
50
51
  #interrupted = false;
51
52
  pushedDuration = 0;
52
53
  intFut = new import_utils.Future();
53
54
  doneFut = new import_utils.Future();
54
- constructor(speechId, audioSource, playoutSource) {
55
+ constructor(speechId, audioSource, playoutSource, synchronizer) {
55
56
  this.#speechId = speechId;
56
57
  this.#audioSource = audioSource;
57
58
  this.playoutSource = playoutSource;
59
+ this.synchronizer = synchronizer;
58
60
  }
59
61
  get speechId() {
60
62
  return this.#speechId;
@@ -95,11 +97,11 @@ class AgentPlayout extends import_node_events.default {
95
97
  set targetVolume(vol) {
96
98
  this.#targetVolume = vol;
97
99
  }
98
- play(speechId, playoutSource) {
100
+ play(speechId, playoutSource, synchronizer) {
99
101
  if (this.#closed) {
100
102
  throw new Error("source closed");
101
103
  }
102
- const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);
104
+ const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);
103
105
  this.#playoutTask = this.#playout(handle, this.#playoutTask);
104
106
  return handle;
105
107
  }
@@ -109,6 +111,7 @@ class AgentPlayout extends import_node_events.default {
109
111
  captureTask.cancel();
110
112
  handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
111
113
  if (handle.interrupted || captureTask.error) {
114
+ handle.synchronizer.close(true);
112
115
  this.#audioSource.clearQueue();
113
116
  }
114
117
  if (!firstFrame) {
@@ -139,12 +142,15 @@ class AgentPlayout extends import_node_events.default {
139
142
  if (firstFrame) {
140
143
  this.#logger.child({ speechId: handle.speechId }).debug("started playing the first time");
141
144
  this.emit(0 /* PLAYOUT_STARTED */);
145
+ handle.synchronizer.segmentPlayoutStarted();
142
146
  firstFrame = false;
143
147
  }
144
148
  handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
149
+ handle.synchronizer.pushAudio(frame);
145
150
  await this.#audioSource.captureFrame(frame);
146
151
  await this.#audioSource.waitForPlayout();
147
152
  }
153
+ handle.synchronizer.close(false);
148
154
  resolve2();
149
155
  });
150
156
  try {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n await this.#audioSource.captureFrame(frame);\n await this.#audioSource.waitForPlayout();\n }\n\n // XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,\n // but for some reason too many TTS frames can gunk up the buffer and lead to\n // FFI errors. this works 🤷‍♀️\n // if (this.#audioSource.queuedDuration > 0) {\n // await this.#audioSource.waitForPlayout();\n // }\n\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAAyB;AACzB,iBAAoB;AACpB,mBAA6D;AAC7D,0BAAgC;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,oBAAO;AAAA,EACpB,UAAU,IAAI,oBAAO;AAAA,EAErB,YACE,UACA,aACA,eACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,mBAAAC,QAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,aAAa;AAE3E,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,kBAAM,+BAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,gCAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,oCAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,gBAAM,KAAK,aAAa,aAAa,KAAK;AAC1C,gBAAM,KAAK,aAAa,eAAe;AAAA,QACzC;AASA,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","EventEmitter","resolve","_","onCancel"]}
1
+ {"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n await this.#audioSource.waitForPlayout();\n }\n\n // XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,\n // but for some reason too many TTS frames can gunk up the buffer and lead to\n // FFI errors. this works 🤷‍♀️\n // if (this.#audioSource.queuedDuration > 0) {\n // await this.#audioSource.waitForPlayout();\n // }\n\n handle.synchronizer.close(false);\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAAyB;AACzB,iBAAoB;AAEpB,mBAA6D;AAC7D,0BAAgC;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,oBAAO;AAAA,EACpB,UAAU,IAAI,oBAAO;AAAA,EAErB,YACE,UACA,aACA,eACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AACrB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,mBAAAC,QAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACA,cACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,eAAe,YAAY;AAEzF,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,iBAAO,aAAa,MAAM,IAAI;AAC9B,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,kBAAM,+BAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,gCAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,oCAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,mBAAO,aAAa,sBAAsB;AAC1C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,iBAAO,aAAa,UAAU,KAAK;AACnC,gBAAM,KAAK,aAAa,aAAa,KAAK;AAC1C,gBAAM,KAAK,aAAa,eAAe;AAAA,QACzC;AASA,eAAO,aAAa,MAAM,KAAK;AAC/B,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","EventEmitter","resolve","_","onCancel"]}
@@ -1,5 +1,6 @@
1
1
  import type { AudioFrame, AudioSource } from '@livekit/rtc-node';
2
2
  import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
3
+ import type { TextAudioSynchronizer } from '../transcription.js';
3
4
  import { Future } from '../utils.js';
4
5
  import { SynthesisHandle } from './agent_output.js';
5
6
  export declare enum AgentPlayoutEvent {
@@ -14,10 +15,11 @@ export declare class PlayoutHandle {
14
15
  #private;
15
16
  playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;
16
17
  totalPlayedTime?: number;
18
+ synchronizer: TextAudioSynchronizer;
17
19
  pushedDuration: number;
18
20
  intFut: Future;
19
21
  doneFut: Future;
20
- constructor(speechId: string, audioSource: AudioSource, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>);
22
+ constructor(speechId: string, audioSource: AudioSource, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer);
21
23
  get speechId(): string;
22
24
  get interrupted(): boolean;
23
25
  get timePlayed(): number;
@@ -31,7 +33,7 @@ export declare class AgentPlayout extends AgentPlayout_base {
31
33
  constructor(audioSource: AudioSource);
32
34
  get targetVolume(): number;
33
35
  set targetVolume(vol: number);
34
- play(speechId: string, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>): PlayoutHandle;
36
+ play(speechId: string, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer): PlayoutHandle;
35
37
  close(): Promise<void>;
36
38
  }
37
39
  export {};
@@ -1 +1 @@
1
- {"version":3,"file":"agent_playout.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_playout.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACjE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEpD,oBAAY,iBAAiB;IAC3B,eAAe,IAAA;IACf,eAAe,IAAA;CAChB;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,MAAM,IAAI,CAAC;IAChD,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC,SAAS,EAAE,OAAO,KAAK,IAAI,CAAC;CACnE,CAAC;AAEF,qBAAa,aAAa;;IAGxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,CAAC;IACjF,eAAe,CAAC,EAAE,MAAM,CAAC;IAEzB,cAAc,SAAK;IACnB,MAAM,SAAgB;IACtB,OAAO,SAAgB;gBAGrB,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,WAAW,EACxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC;IAOlF,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,IAAI,IAAI,IAAI,OAAO,CAElB;IAED,SAAS;IAST,IAAI,IAAI,MAAM;CAGf;2CAE4D,aAAa,qBAAqB,CAAC;AAAhG,qBAAa,YAAa,SAAQ,iBAA+D;;gBAOnF,WAAW,EAAE,WAAW;IAKpC,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,YAAY,CAAC,GAAG,EAAE,MAAM,EAE3B;IAED,IAAI,CACF,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,GAC/E,aAAa;IA2FV,KAAK;CAIZ"}
1
+ {"version":3,"file":"agent_playout.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_playout.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACjE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEpD,oBAAY,iBAAiB;IAC3B,eAAe,IAAA;IACf,eAAe,IAAA;CAChB;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,MAAM,IAAI,CAAC;IAChD,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC,SAAS,EAAE,OAAO,KAAK,IAAI,CAAC;CACnE,CAAC;AAEF,qBAAa,aAAa;;IAGxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,CAAC;IACjF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,qBAAqB,CAAC;IAEpC,cAAc,SAAK;IACnB,MAAM,SAAgB;IACtB,OAAO,SAAgB;gBAGrB,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,WAAW,EACxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB;IAQrC,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,IAAI,IAAI,IAAI,OAAO,CAElB;IAED,SAAS;IAST,IAAI,IAAI,MAAM;CAGf;2CAE4D,aAAa,qBAAqB,CAAC;AAAhG,qBAAa,YAAa,SAAQ,iBAA+D;;gBAOnF,WAAW,EAAE,WAAW;IAKpC,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,YAAY,CAAC,GAAG,EAAE,MAAM,EAE3B;IAED,IAAI,CACF,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB,GAClC,aAAa;IA+FV,KAAK;CAIZ"}
@@ -12,14 +12,16 @@ class PlayoutHandle {
12
12
  #audioSource;
13
13
  playoutSource;
14
14
  totalPlayedTime;
15
+ synchronizer;
15
16
  #interrupted = false;
16
17
  pushedDuration = 0;
17
18
  intFut = new Future();
18
19
  doneFut = new Future();
19
- constructor(speechId, audioSource, playoutSource) {
20
+ constructor(speechId, audioSource, playoutSource, synchronizer) {
20
21
  this.#speechId = speechId;
21
22
  this.#audioSource = audioSource;
22
23
  this.playoutSource = playoutSource;
24
+ this.synchronizer = synchronizer;
23
25
  }
24
26
  get speechId() {
25
27
  return this.#speechId;
@@ -60,11 +62,11 @@ class AgentPlayout extends EventEmitter {
60
62
  set targetVolume(vol) {
61
63
  this.#targetVolume = vol;
62
64
  }
63
- play(speechId, playoutSource) {
65
+ play(speechId, playoutSource, synchronizer) {
64
66
  if (this.#closed) {
65
67
  throw new Error("source closed");
66
68
  }
67
- const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);
69
+ const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);
68
70
  this.#playoutTask = this.#playout(handle, this.#playoutTask);
69
71
  return handle;
70
72
  }
@@ -74,6 +76,7 @@ class AgentPlayout extends EventEmitter {
74
76
  captureTask.cancel();
75
77
  handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
76
78
  if (handle.interrupted || captureTask.error) {
79
+ handle.synchronizer.close(true);
77
80
  this.#audioSource.clearQueue();
78
81
  }
79
82
  if (!firstFrame) {
@@ -104,12 +107,15 @@ class AgentPlayout extends EventEmitter {
104
107
  if (firstFrame) {
105
108
  this.#logger.child({ speechId: handle.speechId }).debug("started playing the first time");
106
109
  this.emit(0 /* PLAYOUT_STARTED */);
110
+ handle.synchronizer.segmentPlayoutStarted();
107
111
  firstFrame = false;
108
112
  }
109
113
  handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
114
+ handle.synchronizer.pushAudio(frame);
110
115
  await this.#audioSource.captureFrame(frame);
111
116
  await this.#audioSource.waitForPlayout();
112
117
  }
118
+ handle.synchronizer.close(false);
113
119
  resolve2();
114
120
  });
115
121
  try {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n await this.#audioSource.captureFrame(frame);\n await this.#audioSource.waitForPlayout();\n }\n\n // XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,\n // but for some reason too many TTS frames can gunk up the buffer and lead to\n // FFI errors. this works 🤷‍♀️\n // if (this.#audioSource.queuedDuration > 0) {\n // await this.#audioSource.waitForPlayout();\n // }\n\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":"AAKA,OAAO,kBAAkB;AACzB,SAAS,WAAW;AACpB,SAAS,oBAAoB,QAAQ,wBAAwB;AAC7D,SAAS,uBAAuB;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI,OAAO;AAAA,EAErB,YACE,UACA,aACA,eACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,aAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,aAAa;AAE3E,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,cAAM,iBAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,mBAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,gBAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,gBAAM,KAAK,aAAa,aAAa,KAAK;AAC1C,gBAAM,KAAK,aAAa,eAAe;AAAA,QACzC;AASA,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","resolve","_","onCancel"]}
1
+ {"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n await this.#audioSource.waitForPlayout();\n }\n\n // XXX(nbsp): line 161 waits instead of this. this is not the case on python agents,\n // but for some reason too many TTS frames can gunk up the buffer and lead to\n // FFI errors. this works 🤷‍♀️\n // if (this.#audioSource.queuedDuration > 0) {\n // await this.#audioSource.waitForPlayout();\n // }\n\n handle.synchronizer.close(false);\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":"AAKA,OAAO,kBAAkB;AACzB,SAAS,WAAW;AAEpB,SAAS,oBAAoB,QAAQ,wBAAwB;AAC7D,SAAS,uBAAuB;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI,OAAO;AAAA,EAErB,YACE,UACA,aACA,eACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AACrB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,aAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACA,cACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,eAAe,YAAY;AAEzF,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,iBAAO,aAAa,MAAM,IAAI;AAC9B,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,cAAM,iBAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,mBAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,gBAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,mBAAO,aAAa,sBAAsB;AAC1C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,iBAAO,aAAa,UAAU,KAAK;AACnC,gBAAM,KAAK,aAAa,aAAa,KAAK;AAC1C,gBAAM,KAAK,aAAa,eAAe;AAAA,QACzC;AASA,eAAO,aAAa,MAAM,KAAK;AAC/B,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","resolve","_","onCancel"]}
@@ -57,6 +57,12 @@ class HumanInput extends import_node_events.EventEmitter {
57
57
  this.#room.on(import_rtc_node.RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
58
58
  this.#subscribeToMicrophone();
59
59
  }
60
+ get participant() {
61
+ return this.#participant;
62
+ }
63
+ get subscribedTrack() {
64
+ return this.#subscribedTrack;
65
+ }
60
66
  #subscribeToMicrophone() {
61
67
  if (!this.#participant) {
62
68
  this.#logger.error("Participant is not set");
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/human_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport { AudioStream, RoomEvent, TrackSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { log } from '../log.js';\nimport type { STT, SpeechEvent } from '../stt/stt.js';\nimport { SpeechEventType } from '../stt/stt.js';\nimport { CancellablePromise, gracefullyCancel } from '../utils.js';\nimport type { VAD, VADEvent } from '../vad.js';\nimport { VADEventType } from '../vad.js';\n\nexport enum HumanInputEvent {\n START_OF_SPEECH,\n VAD_INFERENCE_DONE,\n END_OF_SPEECH,\n FINAL_TRANSCRIPT,\n INTERIM_TRANSCRIPT,\n}\n\nexport type HumanInputCallbacks = {\n [HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;\n [HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;\n [HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;\n};\n\nexport class HumanInput extends (EventEmitter as new () => TypedEmitter<HumanInputCallbacks>) {\n #closed = false;\n #room: Room;\n #vad: VAD;\n #stt: STT;\n #participant: RemoteParticipant;\n #subscribedTrack?: RemoteAudioTrack;\n #recognizeTask?: CancellablePromise<void>;\n #speaking = false;\n #speechProbability = 0;\n #logger = log();\n\n constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant) {\n super();\n this.#room = room;\n this.#vad = vad;\n this.#stt = stt;\n this.#participant = participant;\n\n this.#room.on(RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));\n this.#room.on(RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));\n this.#subscribeToMicrophone();\n }\n\n #subscribeToMicrophone(): void {\n if (!this.#participant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.#participant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n\n const track = microphonePublication.track;\n if (track && track !== this.#subscribedTrack) {\n this.#subscribedTrack = track;\n if (this.#recognizeTask) {\n this.#recognizeTask.cancel();\n }\n\n const audioStream = new AudioStream(track, 16000);\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#recognizeTask = new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const sttStream = this.#stt.stream();\n const vadStream = this.#vad.stream();\n\n const audioStreamCo = async () => {\n for await (const ev of audioStream) {\n if (cancelled) return;\n sttStream.pushFrame(ev);\n vadStream.pushFrame(ev);\n }\n };\n\n const vadStreamCo = async () => {\n for await (const ev of vadStream) {\n if (cancelled) return;\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.#speaking = true;\n this.emit(HumanInputEvent.START_OF_SPEECH, ev);\n break;\n case VADEventType.INFERENCE_DONE:\n this.#speechProbability = ev.probability;\n this.emit(HumanInputEvent.VAD_INFERENCE_DONE, ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.#speaking = false;\n this.emit(HumanInputEvent.END_OF_SPEECH, ev);\n break;\n }\n }\n };\n\n const sttStreamCo = async () => {\n for await (const ev of sttStream) {\n if (cancelled) return;\n if (ev.type === SpeechEventType.FINAL_TRANSCRIPT) {\n this.emit(HumanInputEvent.FINAL_TRANSCRIPT, ev);\n } else if (ev.type == SpeechEventType.INTERIM_TRANSCRIPT) {\n this.emit(HumanInputEvent.INTERIM_TRANSCRIPT, ev);\n }\n }\n };\n\n await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);\n sttStream.close();\n vadStream.close();\n resolve();\n });\n }\n }\n\n get speaking(): boolean {\n return this.#speaking;\n }\n\n get speakingProbability(): number {\n return this.#speechProbability;\n }\n\n async close() {\n if (this.#closed) {\n throw new Error('HumanInput already closed');\n }\n this.#closed = true;\n this.#room.removeAllListeners();\n this.#speaking = false;\n if (this.#recognizeTask) {\n await gracefullyCancel(this.#recognizeTask);\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AASA,sBAAoD;AAEpD,yBAA6B;AAC7B,iBAAoB;AAEpB,iBAAgC;AAChC,mBAAqD;AAErD,iBAA6B;AAEtB,IAAK,kBAAL,kBAAKA,qBAAL;AACL,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AALU,SAAAA;AAAA,GAAA;AAgBL,MAAM,mBAAoB,gCAA6D;AAAA,EAC5F,UAAU;AAAA,EACV;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,qBAAqB;AAAA,EACrB,cAAU,gBAAI;AAAA,EAEd,YAAY,MAAY,KAAU,KAAU,aAAgC;AAC1E,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,eAAe;AAEpB,SAAK,MAAM,GAAG,0BAAU,gBAAgB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC9E,SAAK,MAAM,GAAG,0BAAU,iBAAiB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC/E,SAAK,uBAAuB;AAAA,EAC9B;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,cAAc;AACtB,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,aAAa,kBAAkB,OAAO,GAAG;AACtE,UAAI,YAAY,WAAW,4BAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAEA,UAAM,QAAQ,sBAAsB;AACpC,QAAI,SAAS,UAAU,KAAK,kBAAkB;AAC5C,WAAK,mBAAmB;AACxB,UAAI,KAAK,gBAAgB;AACvB,aAAK,eAAe,OAAO;AAAA,MAC7B;AAEA,YAAM,cAAc,IAAI,4BAAY,OAAO,IAAK;AAGhD,WAAK,iBAAiB,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC3E,YAAI,YAAY;AAChB,iBAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,cAAM,YAAY,KAAK,KAAK,OAAO;AACnC,cAAM,YAAY,KAAK,KAAK,OAAO;AAEnC,cAAM,gBAAgB,YAAY;AAChC,2BAAiB,MAAM,aAAa;AAClC,gBAAI,UAAW;AACf,sBAAU,UAAU,EAAE;AACtB,sBAAU,UAAU,EAAE;AAAA,UACxB;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,oBAAQ,GAAG,MAAM;AAAA,cACf,KAAK,wBAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,yBAAiC,EAAE;AAC7C;AAAA,cACF,KAAK,wBAAa;AAChB,qBAAK,qBAAqB,GAAG;AAC7B,qBAAK,KAAK,4BAAoC,EAAE;AAChD;AAAA,cACF,KAAK,wBAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,uBAA+B,EAAE;AAC3C;AAAA,YACJ;AAAA,UACF;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,gBAAI,GAAG,SAAS,2BAAgB,kBAAkB;AAChD,mBAAK,KAAK,0BAAkC,EAAE;AAAA,YAChD,WAAW,GAAG,QAAQ,2BAAgB,oBAAoB;AACxD,mBAAK,KAAK,4BAAoC,EAAE;AAAA,YAClD;AAAA,UACF;AAAA,QACF;AAEA,cAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,YAAY,GAAG,YAAY,CAAC,CAAC;AACjE,kBAAU,MAAM;AAChB,kBAAU,MAAM;AAChB,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAA8B;AAChC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AACA,SAAK,UAAU;AACf,SAAK,MAAM,mBAAmB;AAC9B,SAAK,YAAY;AACjB,QAAI,KAAK,gBAAgB;AACvB,gBAAM,+BAAiB,KAAK,cAAc;AAAA,IAC5C;AAAA,EACF;AACF;","names":["HumanInputEvent"]}
1
+ {"version":3,"sources":["../../src/pipeline/human_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport { AudioStream, RoomEvent, TrackSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { log } from '../log.js';\nimport type { STT, SpeechEvent } from '../stt/stt.js';\nimport { SpeechEventType } from '../stt/stt.js';\nimport { CancellablePromise, gracefullyCancel } from '../utils.js';\nimport type { VAD, VADEvent } from '../vad.js';\nimport { VADEventType } from '../vad.js';\n\nexport enum HumanInputEvent {\n START_OF_SPEECH,\n VAD_INFERENCE_DONE,\n END_OF_SPEECH,\n FINAL_TRANSCRIPT,\n INTERIM_TRANSCRIPT,\n}\n\nexport type HumanInputCallbacks = {\n [HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;\n [HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;\n [HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;\n};\n\nexport class HumanInput extends (EventEmitter as new () => TypedEmitter<HumanInputCallbacks>) {\n #closed = false;\n #room: Room;\n #vad: VAD;\n #stt: STT;\n #participant: RemoteParticipant;\n #subscribedTrack?: RemoteAudioTrack;\n #recognizeTask?: CancellablePromise<void>;\n #speaking = false;\n #speechProbability = 0;\n #logger = log();\n\n constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant) {\n super();\n this.#room = room;\n this.#vad = vad;\n this.#stt = stt;\n this.#participant = participant;\n\n this.#room.on(RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));\n this.#room.on(RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));\n this.#subscribeToMicrophone();\n }\n\n get participant(): RemoteParticipant {\n return this.#participant;\n }\n\n get subscribedTrack(): RemoteAudioTrack | undefined {\n return this.#subscribedTrack;\n }\n\n #subscribeToMicrophone(): void {\n if (!this.#participant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.#participant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n\n const track = microphonePublication.track;\n if (track && track !== this.#subscribedTrack) {\n this.#subscribedTrack = track;\n if (this.#recognizeTask) {\n this.#recognizeTask.cancel();\n }\n\n const audioStream = new AudioStream(track, 16000);\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#recognizeTask = new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const sttStream = this.#stt.stream();\n const vadStream = this.#vad.stream();\n\n const audioStreamCo = async () => {\n for await (const ev of audioStream) {\n if (cancelled) return;\n sttStream.pushFrame(ev);\n vadStream.pushFrame(ev);\n }\n };\n\n const vadStreamCo = async () => {\n for await (const ev of vadStream) {\n if (cancelled) return;\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.#speaking = true;\n this.emit(HumanInputEvent.START_OF_SPEECH, ev);\n break;\n case VADEventType.INFERENCE_DONE:\n this.#speechProbability = ev.probability;\n this.emit(HumanInputEvent.VAD_INFERENCE_DONE, ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.#speaking = false;\n this.emit(HumanInputEvent.END_OF_SPEECH, ev);\n break;\n }\n }\n };\n\n const sttStreamCo = async () => {\n for await (const ev of sttStream) {\n if (cancelled) return;\n if (ev.type === SpeechEventType.FINAL_TRANSCRIPT) {\n this.emit(HumanInputEvent.FINAL_TRANSCRIPT, ev);\n } else if (ev.type == SpeechEventType.INTERIM_TRANSCRIPT) {\n this.emit(HumanInputEvent.INTERIM_TRANSCRIPT, ev);\n }\n }\n };\n\n await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);\n sttStream.close();\n vadStream.close();\n resolve();\n });\n }\n }\n\n get speaking(): boolean {\n return this.#speaking;\n }\n\n get speakingProbability(): number {\n return this.#speechProbability;\n }\n\n async close() {\n if (this.#closed) {\n throw new Error('HumanInput already closed');\n }\n this.#closed = true;\n this.#room.removeAllListeners();\n this.#speaking = false;\n if (this.#recognizeTask) {\n await gracefullyCancel(this.#recognizeTask);\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AASA,sBAAoD;AAEpD,yBAA6B;AAC7B,iBAAoB;AAEpB,iBAAgC;AAChC,mBAAqD;AAErD,iBAA6B;AAEtB,IAAK,kBAAL,kBAAKA,qBAAL;AACL,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AALU,SAAAA;AAAA,GAAA;AAgBL,MAAM,mBAAoB,gCAA6D;AAAA,EAC5F,UAAU;AAAA,EACV;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,qBAAqB;AAAA,EACrB,cAAU,gBAAI;AAAA,EAEd,YAAY,MAAY,KAAU,KAAU,aAAgC;AAC1E,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,eAAe;AAEpB,SAAK,MAAM,GAAG,0BAAU,gBAAgB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC9E,SAAK,MAAM,GAAG,0BAAU,iBAAiB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC/E,SAAK,uBAAuB;AAAA,EAC9B;AAAA,EAEA,IAAI,cAAiC;AACnC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,kBAAgD;AAClD,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,cAAc;AACtB,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,aAAa,kBAAkB,OAAO,GAAG;AACtE,UAAI,YAAY,WAAW,4BAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAEA,UAAM,QAAQ,sBAAsB;AACpC,QAAI,SAAS,UAAU,KAAK,kBAAkB;AAC5C,WAAK,mBAAmB;AACxB,UAAI,KAAK,gBAAgB;AACvB,aAAK,eAAe,OAAO;AAAA,MAC7B;AAEA,YAAM,cAAc,IAAI,4BAAY,OAAO,IAAK;AAGhD,WAAK,iBAAiB,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC3E,YAAI,YAAY;AAChB,iBAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,cAAM,YAAY,KAAK,KAAK,OAAO;AACnC,cAAM,YAAY,KAAK,KAAK,OAAO;AAEnC,cAAM,gBAAgB,YAAY;AAChC,2BAAiB,MAAM,aAAa;AAClC,gBAAI,UAAW;AACf,sBAAU,UAAU,EAAE;AACtB,sBAAU,UAAU,EAAE;AAAA,UACxB;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,oBAAQ,GAAG,MAAM;AAAA,cACf,KAAK,wBAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,yBAAiC,EAAE;AAC7C;AAAA,cACF,KAAK,wBAAa;AAChB,qBAAK,qBAAqB,GAAG;AAC7B,qBAAK,KAAK,4BAAoC,EAAE;AAChD;AAAA,cACF,KAAK,wBAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,uBAA+B,EAAE;AAC3C;AAAA,YACJ;AAAA,UACF;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,gBAAI,GAAG,SAAS,2BAAgB,kBAAkB;AAChD,mBAAK,KAAK,0BAAkC,EAAE;AAAA,YAChD,WAAW,GAAG,QAAQ,2BAAgB,oBAAoB;AACxD,mBAAK,KAAK,4BAAoC,EAAE;AAAA,YAClD;AAAA,UACF;AAAA,QACF;AAEA,cAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,YAAY,GAAG,YAAY,CAAC,CAAC;AACjE,kBAAU,MAAM;AAChB,kBAAU,MAAM;AAChB,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAA8B;AAChC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AACA,SAAK,UAAU;AACf,SAAK,MAAM,mBAAmB;AAC9B,SAAK,YAAY;AACjB,QAAI,KAAK,gBAAgB;AACvB,gBAAM,+BAAiB,KAAK,cAAc;AAAA,IAC5C;AAAA,EACF;AACF;","names":["HumanInputEvent"]}
@@ -1,4 +1,4 @@
1
- import type { RemoteParticipant, Room } from '@livekit/rtc-node';
1
+ import type { RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
2
2
  import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
3
3
  import type { STT, SpeechEvent } from '../stt/stt.js';
4
4
  import type { VAD, VADEvent } from '../vad.js';
@@ -20,6 +20,8 @@ declare const HumanInput_base: new () => TypedEmitter<HumanInputCallbacks>;
20
20
  export declare class HumanInput extends HumanInput_base {
21
21
  #private;
22
22
  constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant);
23
+ get participant(): RemoteParticipant;
24
+ get subscribedTrack(): RemoteAudioTrack | undefined;
23
25
  get speaking(): boolean;
24
26
  get speakingProbability(): number;
25
27
  close(): Promise<void>;
@@ -1 +1 @@
1
- {"version":3,"file":"human_input.d.ts","sourceRoot":"","sources":["../../src/pipeline/human_input.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAEV,iBAAiB,EAEjB,IAAI,EACL,MAAM,mBAAmB,CAAC;AAE3B,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAGtD,OAAO,KAAK,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAG/C,oBAAY,eAAe;IACzB,eAAe,IAAA;IACf,kBAAkB,IAAA;IAClB,aAAa,IAAA;IACb,gBAAgB,IAAA;IAChB,kBAAkB,IAAA;CACnB;AAED,MAAM,MAAM,mBAAmB,GAAG;IAChC,CAAC,eAAe,CAAC,eAAe,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC7D,CAAC,eAAe,CAAC,kBAAkB,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAChE,CAAC,eAAe,CAAC,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC3D,CAAC,eAAe,CAAC,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,IAAI,CAAC;IACjE,CAAC,eAAe,CAAC,kBAAkB,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,IAAI,CAAC;CACpE,CAAC;yCAEyD,aAAa,mBAAmB,CAAC;AAA5F,qBAAa,UAAW,SAAQ,eAA6D;;gBAY/E,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,EAAE,iBAAiB;IAmG1E,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAED,IAAI,mBAAmB,IAAI,MAAM,CAEhC;IAEK,KAAK;CAWZ"}
1
+ {"version":3,"file":"human_input.d.ts","sourceRoot":"","sources":["../../src/pipeline/human_input.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EAEjB,IAAI,EACL,MAAM,mBAAmB,CAAC;AAE3B,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAGtD,OAAO,KAAK,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAG/C,oBAAY,eAAe;IACzB,eAAe,IAAA;IACf,kBAAkB,IAAA;IAClB,aAAa,IAAA;IACb,gBAAgB,IAAA;IAChB,kBAAkB,IAAA;CACnB;AAED,MAAM,MAAM,mBAAmB,GAAG;IAChC,CAAC,eAAe,CAAC,eAAe,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC7D,CAAC,eAAe,CAAC,kBAAkB,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAChE,CAAC,eAAe,CAAC,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC3D,CAAC,eAAe,CAAC,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,IAAI,CAAC;IACjE,CAAC,eAAe,CAAC,kBAAkB,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,IAAI,CAAC;CACpE,CAAC;yCAEyD,aAAa,mBAAmB,CAAC;AAA5F,qBAAa,UAAW,SAAQ,eAA6D;;gBAY/E,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,EAAE,iBAAiB;IAY1E,IAAI,WAAW,IAAI,iBAAiB,CAEnC;IAED,IAAI,eAAe,IAAI,gBAAgB,GAAG,SAAS,CAElD;IAyFD,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAED,IAAI,mBAAmB,IAAI,MAAM,CAEhC;IAEK,KAAK;CAWZ"}
@@ -33,6 +33,12 @@ class HumanInput extends EventEmitter {
33
33
  this.#room.on(RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
34
34
  this.#subscribeToMicrophone();
35
35
  }
36
+ get participant() {
37
+ return this.#participant;
38
+ }
39
+ get subscribedTrack() {
40
+ return this.#subscribedTrack;
41
+ }
36
42
  #subscribeToMicrophone() {
37
43
  if (!this.#participant) {
38
44
  this.#logger.error("Participant is not set");
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/human_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport { AudioStream, RoomEvent, TrackSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { log } from '../log.js';\nimport type { STT, SpeechEvent } from '../stt/stt.js';\nimport { SpeechEventType } from '../stt/stt.js';\nimport { CancellablePromise, gracefullyCancel } from '../utils.js';\nimport type { VAD, VADEvent } from '../vad.js';\nimport { VADEventType } from '../vad.js';\n\nexport enum HumanInputEvent {\n START_OF_SPEECH,\n VAD_INFERENCE_DONE,\n END_OF_SPEECH,\n FINAL_TRANSCRIPT,\n INTERIM_TRANSCRIPT,\n}\n\nexport type HumanInputCallbacks = {\n [HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;\n [HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;\n [HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;\n};\n\nexport class HumanInput extends (EventEmitter as new () => TypedEmitter<HumanInputCallbacks>) {\n #closed = false;\n #room: Room;\n #vad: VAD;\n #stt: STT;\n #participant: RemoteParticipant;\n #subscribedTrack?: RemoteAudioTrack;\n #recognizeTask?: CancellablePromise<void>;\n #speaking = false;\n #speechProbability = 0;\n #logger = log();\n\n constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant) {\n super();\n this.#room = room;\n this.#vad = vad;\n this.#stt = stt;\n this.#participant = participant;\n\n this.#room.on(RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));\n this.#room.on(RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));\n this.#subscribeToMicrophone();\n }\n\n #subscribeToMicrophone(): void {\n if (!this.#participant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.#participant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n\n const track = microphonePublication.track;\n if (track && track !== this.#subscribedTrack) {\n this.#subscribedTrack = track;\n if (this.#recognizeTask) {\n this.#recognizeTask.cancel();\n }\n\n const audioStream = new AudioStream(track, 16000);\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#recognizeTask = new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const sttStream = this.#stt.stream();\n const vadStream = this.#vad.stream();\n\n const audioStreamCo = async () => {\n for await (const ev of audioStream) {\n if (cancelled) return;\n sttStream.pushFrame(ev);\n vadStream.pushFrame(ev);\n }\n };\n\n const vadStreamCo = async () => {\n for await (const ev of vadStream) {\n if (cancelled) return;\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.#speaking = true;\n this.emit(HumanInputEvent.START_OF_SPEECH, ev);\n break;\n case VADEventType.INFERENCE_DONE:\n this.#speechProbability = ev.probability;\n this.emit(HumanInputEvent.VAD_INFERENCE_DONE, ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.#speaking = false;\n this.emit(HumanInputEvent.END_OF_SPEECH, ev);\n break;\n }\n }\n };\n\n const sttStreamCo = async () => {\n for await (const ev of sttStream) {\n if (cancelled) return;\n if (ev.type === SpeechEventType.FINAL_TRANSCRIPT) {\n this.emit(HumanInputEvent.FINAL_TRANSCRIPT, ev);\n } else if (ev.type == SpeechEventType.INTERIM_TRANSCRIPT) {\n this.emit(HumanInputEvent.INTERIM_TRANSCRIPT, ev);\n }\n }\n };\n\n await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);\n sttStream.close();\n vadStream.close();\n resolve();\n });\n }\n }\n\n get speaking(): boolean {\n return this.#speaking;\n }\n\n get speakingProbability(): number {\n return this.#speechProbability;\n }\n\n async close() {\n if (this.#closed) {\n throw new Error('HumanInput already closed');\n }\n this.#closed = true;\n this.#room.removeAllListeners();\n this.#speaking = false;\n if (this.#recognizeTask) {\n await gracefullyCancel(this.#recognizeTask);\n }\n }\n}\n"],"mappings":"AASA,SAAS,aAAa,WAAW,mBAAmB;AAEpD,SAAS,oBAAoB;AAC7B,SAAS,WAAW;AAEpB,SAAS,uBAAuB;AAChC,SAAS,oBAAoB,wBAAwB;AAErD,SAAS,oBAAoB;AAEtB,IAAK,kBAAL,kBAAKA,qBAAL;AACL,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AALU,SAAAA;AAAA,GAAA;AAgBL,MAAM,mBAAoB,aAA6D;AAAA,EAC5F,UAAU;AAAA,EACV;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,qBAAqB;AAAA,EACrB,UAAU,IAAI;AAAA,EAEd,YAAY,MAAY,KAAU,KAAU,aAAgC;AAC1E,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,eAAe;AAEpB,SAAK,MAAM,GAAG,UAAU,gBAAgB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC9E,SAAK,MAAM,GAAG,UAAU,iBAAiB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC/E,SAAK,uBAAuB;AAAA,EAC9B;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,cAAc;AACtB,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,aAAa,kBAAkB,OAAO,GAAG;AACtE,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAEA,UAAM,QAAQ,sBAAsB;AACpC,QAAI,SAAS,UAAU,KAAK,kBAAkB;AAC5C,WAAK,mBAAmB;AACxB,UAAI,KAAK,gBAAgB;AACvB,aAAK,eAAe,OAAO;AAAA,MAC7B;AAEA,YAAM,cAAc,IAAI,YAAY,OAAO,IAAK;AAGhD,WAAK,iBAAiB,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC3E,YAAI,YAAY;AAChB,iBAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,cAAM,YAAY,KAAK,KAAK,OAAO;AACnC,cAAM,YAAY,KAAK,KAAK,OAAO;AAEnC,cAAM,gBAAgB,YAAY;AAChC,2BAAiB,MAAM,aAAa;AAClC,gBAAI,UAAW;AACf,sBAAU,UAAU,EAAE;AACtB,sBAAU,UAAU,EAAE;AAAA,UACxB;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,oBAAQ,GAAG,MAAM;AAAA,cACf,KAAK,aAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,yBAAiC,EAAE;AAC7C;AAAA,cACF,KAAK,aAAa;AAChB,qBAAK,qBAAqB,GAAG;AAC7B,qBAAK,KAAK,4BAAoC,EAAE;AAChD;AAAA,cACF,KAAK,aAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,uBAA+B,EAAE;AAC3C;AAAA,YACJ;AAAA,UACF;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,gBAAI,GAAG,SAAS,gBAAgB,kBAAkB;AAChD,mBAAK,KAAK,0BAAkC,EAAE;AAAA,YAChD,WAAW,GAAG,QAAQ,gBAAgB,oBAAoB;AACxD,mBAAK,KAAK,4BAAoC,EAAE;AAAA,YAClD;AAAA,UACF;AAAA,QACF;AAEA,cAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,YAAY,GAAG,YAAY,CAAC,CAAC;AACjE,kBAAU,MAAM;AAChB,kBAAU,MAAM;AAChB,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAA8B;AAChC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AACA,SAAK,UAAU;AACf,SAAK,MAAM,mBAAmB;AAC9B,SAAK,YAAY;AACjB,QAAI,KAAK,gBAAgB;AACvB,YAAM,iBAAiB,KAAK,cAAc;AAAA,IAC5C;AAAA,EACF;AACF;","names":["HumanInputEvent"]}
1
+ {"version":3,"sources":["../../src/pipeline/human_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport { AudioStream, RoomEvent, TrackSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { log } from '../log.js';\nimport type { STT, SpeechEvent } from '../stt/stt.js';\nimport { SpeechEventType } from '../stt/stt.js';\nimport { CancellablePromise, gracefullyCancel } from '../utils.js';\nimport type { VAD, VADEvent } from '../vad.js';\nimport { VADEventType } from '../vad.js';\n\nexport enum HumanInputEvent {\n START_OF_SPEECH,\n VAD_INFERENCE_DONE,\n END_OF_SPEECH,\n FINAL_TRANSCRIPT,\n INTERIM_TRANSCRIPT,\n}\n\nexport type HumanInputCallbacks = {\n [HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;\n [HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;\n [HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;\n};\n\nexport class HumanInput extends (EventEmitter as new () => TypedEmitter<HumanInputCallbacks>) {\n #closed = false;\n #room: Room;\n #vad: VAD;\n #stt: STT;\n #participant: RemoteParticipant;\n #subscribedTrack?: RemoteAudioTrack;\n #recognizeTask?: CancellablePromise<void>;\n #speaking = false;\n #speechProbability = 0;\n #logger = log();\n\n constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant) {\n super();\n this.#room = room;\n this.#vad = vad;\n this.#stt = stt;\n this.#participant = participant;\n\n this.#room.on(RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));\n this.#room.on(RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));\n this.#subscribeToMicrophone();\n }\n\n get participant(): RemoteParticipant {\n return this.#participant;\n }\n\n get subscribedTrack(): RemoteAudioTrack | undefined {\n return this.#subscribedTrack;\n }\n\n #subscribeToMicrophone(): void {\n if (!this.#participant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.#participant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n\n const track = microphonePublication.track;\n if (track && track !== this.#subscribedTrack) {\n this.#subscribedTrack = track;\n if (this.#recognizeTask) {\n this.#recognizeTask.cancel();\n }\n\n const audioStream = new AudioStream(track, 16000);\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#recognizeTask = new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const sttStream = this.#stt.stream();\n const vadStream = this.#vad.stream();\n\n const audioStreamCo = async () => {\n for await (const ev of audioStream) {\n if (cancelled) return;\n sttStream.pushFrame(ev);\n vadStream.pushFrame(ev);\n }\n };\n\n const vadStreamCo = async () => {\n for await (const ev of vadStream) {\n if (cancelled) return;\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.#speaking = true;\n this.emit(HumanInputEvent.START_OF_SPEECH, ev);\n break;\n case VADEventType.INFERENCE_DONE:\n this.#speechProbability = ev.probability;\n this.emit(HumanInputEvent.VAD_INFERENCE_DONE, ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.#speaking = false;\n this.emit(HumanInputEvent.END_OF_SPEECH, ev);\n break;\n }\n }\n };\n\n const sttStreamCo = async () => {\n for await (const ev of sttStream) {\n if (cancelled) return;\n if (ev.type === SpeechEventType.FINAL_TRANSCRIPT) {\n this.emit(HumanInputEvent.FINAL_TRANSCRIPT, ev);\n } else if (ev.type == SpeechEventType.INTERIM_TRANSCRIPT) {\n this.emit(HumanInputEvent.INTERIM_TRANSCRIPT, ev);\n }\n }\n };\n\n await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);\n sttStream.close();\n vadStream.close();\n resolve();\n });\n }\n }\n\n get speaking(): boolean {\n return this.#speaking;\n }\n\n get speakingProbability(): number {\n return this.#speechProbability;\n }\n\n async close() {\n if (this.#closed) {\n throw new Error('HumanInput already closed');\n }\n this.#closed = true;\n this.#room.removeAllListeners();\n this.#speaking = false;\n if (this.#recognizeTask) {\n await gracefullyCancel(this.#recognizeTask);\n }\n }\n}\n"],"mappings":"AASA,SAAS,aAAa,WAAW,mBAAmB;AAEpD,SAAS,oBAAoB;AAC7B,SAAS,WAAW;AAEpB,SAAS,uBAAuB;AAChC,SAAS,oBAAoB,wBAAwB;AAErD,SAAS,oBAAoB;AAEtB,IAAK,kBAAL,kBAAKA,qBAAL;AACL,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AALU,SAAAA;AAAA,GAAA;AAgBL,MAAM,mBAAoB,aAA6D;AAAA,EAC5F,UAAU;AAAA,EACV;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,qBAAqB;AAAA,EACrB,UAAU,IAAI;AAAA,EAEd,YAAY,MAAY,KAAU,KAAU,aAAgC;AAC1E,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,eAAe;AAEpB,SAAK,MAAM,GAAG,UAAU,gBAAgB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC9E,SAAK,MAAM,GAAG,UAAU,iBAAiB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC/E,SAAK,uBAAuB;AAAA,EAC9B;AAAA,EAEA,IAAI,cAAiC;AACnC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,kBAAgD;AAClD,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,cAAc;AACtB,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,aAAa,kBAAkB,OAAO,GAAG;AACtE,UAAI,YAAY,WAAW,YAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAEA,UAAM,QAAQ,sBAAsB;AACpC,QAAI,SAAS,UAAU,KAAK,kBAAkB;AAC5C,WAAK,mBAAmB;AACxB,UAAI,KAAK,gBAAgB;AACvB,aAAK,eAAe,OAAO;AAAA,MAC7B;AAEA,YAAM,cAAc,IAAI,YAAY,OAAO,IAAK;AAGhD,WAAK,iBAAiB,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC3E,YAAI,YAAY;AAChB,iBAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,cAAM,YAAY,KAAK,KAAK,OAAO;AACnC,cAAM,YAAY,KAAK,KAAK,OAAO;AAEnC,cAAM,gBAAgB,YAAY;AAChC,2BAAiB,MAAM,aAAa;AAClC,gBAAI,UAAW;AACf,sBAAU,UAAU,EAAE;AACtB,sBAAU,UAAU,EAAE;AAAA,UACxB;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,oBAAQ,GAAG,MAAM;AAAA,cACf,KAAK,aAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,yBAAiC,EAAE;AAC7C;AAAA,cACF,KAAK,aAAa;AAChB,qBAAK,qBAAqB,GAAG;AAC7B,qBAAK,KAAK,4BAAoC,EAAE;AAChD;AAAA,cACF,KAAK,aAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,uBAA+B,EAAE;AAC3C;AAAA,YACJ;AAAA,UACF;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,gBAAI,GAAG,SAAS,gBAAgB,kBAAkB;AAChD,mBAAK,KAAK,0BAAkC,EAAE;AAAA,YAChD,WAAW,GAAG,QAAQ,gBAAgB,oBAAoB;AACxD,mBAAK,KAAK,4BAAoC,EAAE;AAAA,YAClD;AAAA,UACF;AAAA,QACF;AAEA,cAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,YAAY,GAAG,YAAY,CAAC,CAAC;AACjE,kBAAU,MAAM;AAChB,kBAAU,MAAM;AAChB,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAA8B;AAChC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AACA,SAAK,UAAU;AACf,SAAK,MAAM,mBAAmB;AAC9B,SAAK,YAAY;AACjB,QAAI,KAAK,gBAAgB;AACvB,YAAM,iBAAiB,KAAK,cAAc;AAAA,IAC5C;AAAA,EACF;AACF;","names":["HumanInputEvent"]}