@livekit/agents 1.0.23 → 1.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/dist/inference/llm.cjs +1 -2
  2. package/dist/inference/llm.cjs.map +1 -1
  3. package/dist/inference/llm.d.ts.map +1 -1
  4. package/dist/inference/llm.js +1 -2
  5. package/dist/inference/llm.js.map +1 -1
  6. package/dist/inference/stt.cjs +1 -1
  7. package/dist/inference/stt.cjs.map +1 -1
  8. package/dist/inference/stt.d.ts.map +1 -1
  9. package/dist/inference/stt.js +1 -1
  10. package/dist/inference/stt.js.map +1 -1
  11. package/dist/inference/tts.cjs +4 -2
  12. package/dist/inference/tts.cjs.map +1 -1
  13. package/dist/inference/tts.d.ts.map +1 -1
  14. package/dist/inference/tts.js +4 -2
  15. package/dist/inference/tts.js.map +1 -1
  16. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  17. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  18. package/dist/ipc/job_proc_lazy_main.js +1 -1
  19. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  20. package/dist/job.cjs +29 -2
  21. package/dist/job.cjs.map +1 -1
  22. package/dist/job.d.cts +6 -0
  23. package/dist/job.d.ts +6 -0
  24. package/dist/job.d.ts.map +1 -1
  25. package/dist/job.js +19 -2
  26. package/dist/job.js.map +1 -1
  27. package/dist/llm/llm.cjs +2 -1
  28. package/dist/llm/llm.cjs.map +1 -1
  29. package/dist/llm/llm.d.cts +1 -1
  30. package/dist/llm/llm.d.ts +1 -1
  31. package/dist/llm/llm.d.ts.map +1 -1
  32. package/dist/llm/llm.js +2 -1
  33. package/dist/llm/llm.js.map +1 -1
  34. package/dist/stream/deferred_stream.cjs +12 -4
  35. package/dist/stream/deferred_stream.cjs.map +1 -1
  36. package/dist/stream/deferred_stream.d.cts +6 -1
  37. package/dist/stream/deferred_stream.d.ts +6 -1
  38. package/dist/stream/deferred_stream.d.ts.map +1 -1
  39. package/dist/stream/deferred_stream.js +12 -4
  40. package/dist/stream/deferred_stream.js.map +1 -1
  41. package/dist/stream/deferred_stream.test.cjs +2 -2
  42. package/dist/stream/deferred_stream.test.cjs.map +1 -1
  43. package/dist/stream/deferred_stream.test.js +2 -2
  44. package/dist/stream/deferred_stream.test.js.map +1 -1
  45. package/dist/stt/stream_adapter.cjs +15 -8
  46. package/dist/stt/stream_adapter.cjs.map +1 -1
  47. package/dist/stt/stream_adapter.d.cts +7 -3
  48. package/dist/stt/stream_adapter.d.ts +7 -3
  49. package/dist/stt/stream_adapter.d.ts.map +1 -1
  50. package/dist/stt/stream_adapter.js +15 -8
  51. package/dist/stt/stream_adapter.js.map +1 -1
  52. package/dist/stt/stt.cjs +8 -3
  53. package/dist/stt/stt.cjs.map +1 -1
  54. package/dist/stt/stt.d.cts +9 -3
  55. package/dist/stt/stt.d.ts +9 -3
  56. package/dist/stt/stt.d.ts.map +1 -1
  57. package/dist/stt/stt.js +9 -4
  58. package/dist/stt/stt.js.map +1 -1
  59. package/dist/telemetry/traces.cjs +23 -2
  60. package/dist/telemetry/traces.cjs.map +1 -1
  61. package/dist/telemetry/traces.d.ts.map +1 -1
  62. package/dist/telemetry/traces.js +23 -2
  63. package/dist/telemetry/traces.js.map +1 -1
  64. package/dist/tts/stream_adapter.cjs +4 -4
  65. package/dist/tts/stream_adapter.cjs.map +1 -1
  66. package/dist/tts/stream_adapter.d.cts +5 -2
  67. package/dist/tts/stream_adapter.d.ts +5 -2
  68. package/dist/tts/stream_adapter.d.ts.map +1 -1
  69. package/dist/tts/stream_adapter.js +4 -4
  70. package/dist/tts/stream_adapter.js.map +1 -1
  71. package/dist/tts/tts.cjs +2 -2
  72. package/dist/tts/tts.cjs.map +1 -1
  73. package/dist/tts/tts.d.cts +5 -1
  74. package/dist/tts/tts.d.ts +5 -1
  75. package/dist/tts/tts.d.ts.map +1 -1
  76. package/dist/tts/tts.js +3 -3
  77. package/dist/tts/tts.js.map +1 -1
  78. package/dist/types.cjs +21 -32
  79. package/dist/types.cjs.map +1 -1
  80. package/dist/types.d.cts +41 -10
  81. package/dist/types.d.ts +41 -10
  82. package/dist/types.d.ts.map +1 -1
  83. package/dist/types.js +18 -30
  84. package/dist/types.js.map +1 -1
  85. package/dist/voice/agent.cjs +54 -19
  86. package/dist/voice/agent.cjs.map +1 -1
  87. package/dist/voice/agent.d.ts.map +1 -1
  88. package/dist/voice/agent.js +54 -19
  89. package/dist/voice/agent.js.map +1 -1
  90. package/dist/voice/agent_activity.cjs +0 -3
  91. package/dist/voice/agent_activity.cjs.map +1 -1
  92. package/dist/voice/agent_activity.d.ts.map +1 -1
  93. package/dist/voice/agent_activity.js +0 -3
  94. package/dist/voice/agent_activity.js.map +1 -1
  95. package/dist/voice/agent_session.cjs +106 -28
  96. package/dist/voice/agent_session.cjs.map +1 -1
  97. package/dist/voice/agent_session.d.cts +16 -2
  98. package/dist/voice/agent_session.d.ts +16 -2
  99. package/dist/voice/agent_session.d.ts.map +1 -1
  100. package/dist/voice/agent_session.js +109 -28
  101. package/dist/voice/agent_session.js.map +1 -1
  102. package/dist/voice/events.cjs.map +1 -1
  103. package/dist/voice/events.d.cts +4 -4
  104. package/dist/voice/events.d.ts +4 -4
  105. package/dist/voice/events.d.ts.map +1 -1
  106. package/dist/voice/events.js.map +1 -1
  107. package/dist/voice/generation.cjs +6 -7
  108. package/dist/voice/generation.cjs.map +1 -1
  109. package/dist/voice/generation.d.ts.map +1 -1
  110. package/dist/voice/generation.js +7 -8
  111. package/dist/voice/generation.js.map +1 -1
  112. package/dist/voice/io.cjs +16 -0
  113. package/dist/voice/io.cjs.map +1 -1
  114. package/dist/voice/io.d.cts +8 -0
  115. package/dist/voice/io.d.ts +8 -0
  116. package/dist/voice/io.d.ts.map +1 -1
  117. package/dist/voice/io.js +16 -0
  118. package/dist/voice/io.js.map +1 -1
  119. package/dist/voice/recorder_io/index.cjs +23 -0
  120. package/dist/voice/recorder_io/index.cjs.map +1 -0
  121. package/dist/voice/recorder_io/index.d.cts +2 -0
  122. package/dist/voice/recorder_io/index.d.ts +2 -0
  123. package/dist/voice/recorder_io/index.d.ts.map +1 -0
  124. package/dist/voice/recorder_io/index.js +2 -0
  125. package/dist/voice/recorder_io/index.js.map +1 -0
  126. package/dist/voice/recorder_io/recorder_io.cjs +542 -0
  127. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
  128. package/dist/voice/recorder_io/recorder_io.d.cts +100 -0
  129. package/dist/voice/recorder_io/recorder_io.d.ts +100 -0
  130. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
  131. package/dist/voice/recorder_io/recorder_io.js +508 -0
  132. package/dist/voice/recorder_io/recorder_io.js.map +1 -0
  133. package/dist/voice/report.cjs +7 -2
  134. package/dist/voice/report.cjs.map +1 -1
  135. package/dist/voice/report.d.cts +11 -1
  136. package/dist/voice/report.d.ts +11 -1
  137. package/dist/voice/report.d.ts.map +1 -1
  138. package/dist/voice/report.js +7 -2
  139. package/dist/voice/report.js.map +1 -1
  140. package/dist/voice/room_io/_input.cjs +2 -1
  141. package/dist/voice/room_io/_input.cjs.map +1 -1
  142. package/dist/voice/room_io/_input.d.ts.map +1 -1
  143. package/dist/voice/room_io/_input.js +2 -1
  144. package/dist/voice/room_io/_input.js.map +1 -1
  145. package/dist/voice/room_io/_output.cjs +8 -7
  146. package/dist/voice/room_io/_output.cjs.map +1 -1
  147. package/dist/voice/room_io/_output.d.cts +2 -1
  148. package/dist/voice/room_io/_output.d.ts +2 -1
  149. package/dist/voice/room_io/_output.d.ts.map +1 -1
  150. package/dist/voice/room_io/_output.js +8 -7
  151. package/dist/voice/room_io/_output.js.map +1 -1
  152. package/dist/worker.cjs +4 -3
  153. package/dist/worker.cjs.map +1 -1
  154. package/dist/worker.js +4 -3
  155. package/dist/worker.js.map +1 -1
  156. package/package.json +1 -1
  157. package/src/inference/llm.ts +0 -1
  158. package/src/inference/stt.ts +1 -2
  159. package/src/inference/tts.ts +5 -2
  160. package/src/ipc/job_proc_lazy_main.ts +1 -1
  161. package/src/job.ts +21 -2
  162. package/src/llm/llm.ts +2 -2
  163. package/src/stream/deferred_stream.test.ts +3 -3
  164. package/src/stream/deferred_stream.ts +22 -5
  165. package/src/stt/stream_adapter.ts +18 -8
  166. package/src/stt/stt.ts +19 -6
  167. package/src/telemetry/traces.ts +25 -3
  168. package/src/tts/stream_adapter.ts +5 -4
  169. package/src/tts/tts.ts +6 -4
  170. package/src/types.ts +57 -33
  171. package/src/voice/agent.ts +59 -19
  172. package/src/voice/agent_activity.ts +0 -3
  173. package/src/voice/agent_session.ts +141 -36
  174. package/src/voice/events.ts +6 -3
  175. package/src/voice/generation.ts +10 -8
  176. package/src/voice/io.ts +19 -0
  177. package/src/voice/recorder_io/index.ts +4 -0
  178. package/src/voice/recorder_io/recorder_io.ts +690 -0
  179. package/src/voice/report.ts +20 -3
  180. package/src/voice/room_io/_input.ts +2 -1
  181. package/src/voice/room_io/_output.ts +10 -7
  182. package/src/worker.ts +1 -1
package/dist/voice/io.js CHANGED
@@ -83,6 +83,22 @@ class AudioOutput extends EventEmitter {
83
83
  this.nextInChain.onDetached();
84
84
  }
85
85
  }
86
+ /**
87
+ * Pause the audio playback
88
+ */
89
+ pause() {
90
+ if (this.nextInChain) {
91
+ this.nextInChain.pause();
92
+ }
93
+ }
94
+ /**
95
+ * Resume the audio playback
96
+ */
97
+ resume() {
98
+ if (this.nextInChain) {
99
+ this.nextInChain.resume();
100
+ }
101
+ }
86
102
  }
87
103
  class TextOutput {
88
104
  constructor(nextInChain) {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/voice/io.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport type { ChatContext } from '../llm/chat_context.js';\nimport type { ChatChunk } from '../llm/llm.js';\nimport type { ToolContext } from '../llm/tool_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport type { SpeechEvent } from '../stt/stt.js';\nimport { Future } from '../utils.js';\nimport type { ModelSettings } from './agent.js';\n\nexport type STTNode = (\n audio: ReadableStream<AudioFrame>,\n modelSettings: ModelSettings,\n) => Promise<ReadableStream<SpeechEvent | string> | null>;\n\nexport type LLMNode = (\n chatCtx: ChatContext,\n toolCtx: ToolContext,\n modelSettings: ModelSettings,\n) => Promise<ReadableStream<ChatChunk | string> | null>;\n\nexport type TTSNode = (\n text: ReadableStream<string>,\n modelSettings: ModelSettings,\n) => Promise<ReadableStream<AudioFrame> | null>;\nexport abstract class AudioInput {\n protected deferredStream: DeferredReadableStream<AudioFrame> =\n new DeferredReadableStream<AudioFrame>();\n\n get stream(): ReadableStream<AudioFrame> {\n return this.deferredStream.stream;\n }\n\n onAttached(): void {}\n\n onDetached(): void {}\n}\n\nexport abstract class AudioOutput extends EventEmitter {\n static readonly EVENT_PLAYBACK_FINISHED = 'playbackFinished';\n\n private playbackFinishedFuture: Future<void> = new Future();\n private _capturing: boolean = false;\n private playbackFinishedCount: number = 0;\n private playbackSegmentsCount: number = 0;\n private lastPlaybackEvent: PlaybackFinishedEvent = {\n playbackPosition: 0,\n interrupted: false,\n };\n protected logger = log();\n\n constructor(\n public sampleRate?: number,\n protected readonly nextInChain?: AudioOutput,\n ) {\n super();\n if (this.nextInChain) {\n this.nextInChain.on(AudioOutput.EVENT_PLAYBACK_FINISHED, (ev: PlaybackFinishedEvent) =>\n this.onPlaybackFinished(ev),\n );\n }\n }\n\n /**\n * Capture an audio frame for playback, frames can be pushed faster than real-time\n */\n async captureFrame(_frame: AudioFrame): Promise<void> {\n if (!this._capturing) {\n this._capturing = true;\n this.playbackSegmentsCount++;\n }\n }\n\n /**\n * Wait for the past audio segments to finish playing out.\n *\n * @returns The event that was emitted when the audio finished playing out (only the last segment information)\n */\n async waitForPlayout(): Promise<PlaybackFinishedEvent> {\n const target = this.playbackSegmentsCount;\n\n while (this.playbackFinishedCount < target) {\n await this.playbackFinishedFuture.await;\n this.playbackFinishedFuture = new Future();\n }\n\n return this.lastPlaybackEvent;\n }\n\n /**\n * Developers building audio sinks must call this method when a playback/segment is finished.\n * Segments are segmented by calls to flush() or clearBuffer()\n */\n onPlaybackFinished(options: PlaybackFinishedEvent) {\n if (this.playbackFinishedCount >= this.playbackSegmentsCount) {\n this.logger.warn('playback_finished called more times than playback segments were captured');\n return;\n }\n\n this.lastPlaybackEvent = options;\n this.playbackFinishedCount++;\n this.playbackFinishedFuture.resolve();\n this.emit(AudioOutput.EVENT_PLAYBACK_FINISHED, options);\n }\n\n flush(): void {\n this._capturing = false;\n }\n\n /**\n * Clear the buffer, stopping playback immediately\n */\n abstract clearBuffer(): void;\n\n onAttached(): void {\n if (this.nextInChain) {\n this.nextInChain.onAttached();\n }\n }\n\n onDetached(): void {\n if (this.nextInChain) {\n this.nextInChain.onDetached();\n }\n }\n}\n\nexport interface PlaybackFinishedEvent {\n // How much of the audio was played back\n playbackPosition: number;\n // Interrupted is True if playback was interrupted (clearBuffer() was called)\n interrupted: boolean;\n // Transcript synced with playback; may be partial if the audio was interrupted\n // When null, the transcript is not synchronized with the playback\n synchronizedTranscript?: string;\n}\n\nexport abstract class TextOutput {\n constructor(protected readonly nextInChain?: TextOutput) {}\n\n /**\n * Capture a text segment (Used by the output of LLM nodes)\n */\n abstract captureText(text: string): Promise<void>;\n\n /**\n * Mark the current text segment as complete (e.g LLM generation is complete)\n */\n abstract flush(): void;\n\n onAttached(): void {\n if (this.nextInChain) {\n this.nextInChain.onAttached();\n }\n }\n\n onDetached(): void {\n if (this.nextInChain) {\n this.nextInChain.onDetached();\n }\n }\n}\n\nexport class AgentInput {\n private _audioStream: AudioInput | null = null;\n // enabled by default\n private _audioEnabled: boolean = true;\n\n constructor(private readonly audioChanged: () => void) {}\n\n setAudioEnabled(enable: boolean): void {\n if (enable === this._audioEnabled) {\n return;\n }\n\n this._audioEnabled = enable;\n\n if (!this._audioStream) {\n return;\n }\n\n if (enable) {\n this._audioStream.onAttached();\n } else {\n this._audioStream.onDetached();\n }\n }\n\n get audioEnabled(): boolean {\n return this._audioEnabled;\n }\n\n get audio(): AudioInput | null {\n return this._audioStream;\n }\n\n set audio(stream: AudioInput | null) {\n this._audioStream = stream;\n this.audioChanged();\n }\n}\n\nexport class AgentOutput {\n private _audioSink: AudioOutput | null = null;\n private _transcriptionSink: TextOutput | null = null;\n private _audioEnabled: boolean = true;\n private _transcriptionEnabled: boolean = true;\n\n constructor(\n private readonly audioChanged: () => void,\n private readonly transcriptionChanged: () => void,\n ) {}\n\n setAudioEnabled(enabled: boolean): void {\n if (enabled === this._audioEnabled) {\n return;\n }\n\n this._audioEnabled = enabled;\n\n if (!this._audioSink) {\n return;\n }\n\n if (enabled) {\n this._audioSink.onAttached();\n } else {\n this._audioSink.onDetached();\n }\n }\n\n setTranscriptionEnabled(enabled: boolean): void {\n if (enabled === this._transcriptionEnabled) {\n return;\n }\n\n this._transcriptionEnabled = enabled;\n\n if (!this._transcriptionSink) {\n return;\n }\n\n if (enabled) {\n this._transcriptionSink.onAttached();\n } else {\n this._transcriptionSink.onDetached();\n }\n }\n\n get audioEnabled(): boolean {\n return this._audioEnabled;\n }\n\n get transcriptionEnabled(): boolean {\n return this._transcriptionEnabled;\n }\n\n get audio(): AudioOutput | null {\n return this._audioSink;\n }\n\n set audio(sink: AudioOutput | null) {\n if (sink === this._audioSink) {\n return;\n }\n\n if (this._audioSink) {\n this._audioSink.onDetached();\n }\n\n this._audioSink = sink;\n this.audioChanged();\n\n if (this._audioSink) {\n this._audioSink.onAttached();\n }\n }\n\n get transcription(): TextOutput | null {\n return this._transcriptionSink;\n }\n\n set transcription(sink: TextOutput | null) {\n if (sink === this._transcriptionSink) {\n return;\n }\n\n if (this._transcriptionSink) {\n this._transcriptionSink.onDetached();\n }\n\n this._transcriptionSink = sink;\n this.transcriptionChanged();\n\n if (this._transcriptionSink) {\n this._transcriptionSink.onAttached();\n }\n }\n}\n"],"mappings":"AAIA,SAAS,oBAAoB;AAK7B,SAAS,WAAW;AACpB,SAAS,8BAA8B;AAEvC,SAAS,cAAc;AAkBhB,MAAe,WAAW;AAAA,EACrB,iBACR,IAAI,uBAAmC;AAAA,EAEzC,IAAI,SAAqC;AACvC,WAAO,KAAK,eAAe;AAAA,EAC7B;AAAA,EAEA,aAAmB;AAAA,EAAC;AAAA,EAEpB,aAAmB;AAAA,EAAC;AACtB;AAEO,MAAe,oBAAoB,aAAa;AAAA,EAarD,YACS,YACY,aACnB;AACA,UAAM;AAHC;AACY;AAGnB,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY;AAAA,QAAG,YAAY;AAAA,QAAyB,CAAC,OACxD,KAAK,mBAAmB,EAAE;AAAA,MAC5B;AAAA,IACF;AAAA,EACF;AAAA,EAtBA,OAAgB,0BAA0B;AAAA,EAElC,yBAAuC,IAAI,OAAO;AAAA,EAClD,aAAsB;AAAA,EACtB,wBAAgC;AAAA,EAChC,wBAAgC;AAAA,EAChC,oBAA2C;AAAA,IACjD,kBAAkB;AAAA,IAClB,aAAa;AAAA,EACf;AAAA,EACU,SAAS,IAAI;AAAA;AAAA;AAAA;AAAA,EAiBvB,MAAM,aAAa,QAAmC;AACpD,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa;AAClB,WAAK;AAAA,IACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,iBAAiD;AACrD,UAAM,SAAS,KAAK;AAEpB,WAAO,KAAK,wBAAwB,QAAQ;AAC1C,YAAM,KAAK,uBAAuB;AAClC,WAAK,yBAAyB,IAAI,OAAO;AAAA,IAC3C;AAEA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,mBAAmB,SAAgC;AACjD,QAAI,KAAK,yBAAyB,KAAK,uBAAuB;AAC5D,WAAK,OAAO,KAAK,0EAA0E;AAC3F;AAAA,IACF;AAEA,SAAK,oBAAoB;AACzB,SAAK;AACL,SAAK,uBAAuB,QAAQ;AACpC,SAAK,KAAK,YAAY,yBAAyB,OAAO;AAAA,EACxD;AAAA,EAEA,QAAc;AACZ,SAAK,aAAa;AAAA,EACpB;AAAA,EAOA,aAAmB;AACjB,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY,WAAW;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,aAAmB;AACjB,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY,WAAW;AAAA,IAC9B;AAAA,EACF;AACF;AAYO,MAAe,WAAW;AAAA,EAC/B,YAA+B,aAA0B;AAA1B;AAAA,EAA2B;AAAA,EAY1D,aAAmB;AACjB,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY,WAAW;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,aAAmB;AACjB,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY,WAAW;AAAA,IAC9B;AAAA,EACF;AACF;AAEO,MAAM,WAAW;AAAA,EAKtB,YAA6B,cAA0B;AAA1B;AAAA,EAA2B;AAAA,EAJhD,eAAkC;AAAA;AAAA,EAElC,gBAAyB;AAAA,EAIjC,gBAAgB,QAAuB;AACrC,QAAI,WAAW,KAAK,eAAe;AACjC;AAAA,IACF;AAEA,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,cAAc;AACtB;AAAA,IACF;AAEA,QAAI,QAAQ;AACV,WAAK,aAAa,WAAW;AAAA,IAC/B,OAAO;AACL,WAAK,aAAa,WAAW;AAAA,IAC/B;AAAA,EACF;AAAA,EAEA,IAAI,eAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,QAA2B;AAC7B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAM,QAA2B;AACnC,SAAK,eAAe;AACpB,SAAK,aAAa;AAAA,EACpB;AACF;AAEO,MAAM,YAAY;AAAA,EAMvB,YACmB,cACA,sBACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EARK,aAAiC;AAAA,EACjC,qBAAwC;AAAA,EACxC,gBAAyB;AAAA,EACzB,wBAAiC;AAAA,EAOzC,gBAAgB,SAAwB;AACtC,QAAI,YAAY,KAAK,eAAe;AAClC;AAAA,IACF;AAEA,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AACpB;AAAA,IACF;AAEA,QAAI,SAAS;AACX,WAAK,WAAW,WAAW;AAAA,IAC7B,OAAO;AACL,WAAK,WAAW,WAAW;AAAA,IAC7B;AAAA,EACF;AAAA,EAEA,wBAAwB,SAAwB;AAC9C,QAAI,YAAY,KAAK,uBAAuB;AAC1C;AAAA,IACF;AAEA,SAAK,wBAAwB;AAE7B,QAAI,CAAC,KAAK,oBAAoB;AAC5B;AAAA,IACF;AAEA,QAAI,SAAS;AACX,WAAK,mBAAmB,WAAW;AAAA,IACrC,OAAO;AACL,WAAK,mBAAmB,WAAW;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,IAAI,eAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,uBAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,QAA4B;AAC9B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAM,MAA0B;AAClC,QAAI,SAAS,KAAK,YAAY;AAC5B;AAAA,IACF;AAEA,QAAI,KAAK,YAAY;AACnB,WAAK,WAAW,WAAW;AAAA,IAC7B;AAEA,SAAK,aAAa;AAClB,SAAK,aAAa;AAElB,QAAI,KAAK,YAAY;AACnB,WAAK,WAAW,WAAW;AAAA,IAC7B;AAAA,EACF;AAAA,EAEA,IAAI,gBAAmC;AACrC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAc,MAAyB;AACzC,QAAI,SAAS,KAAK,oBAAoB;AACpC;AAAA,IACF;AAEA,QAAI,KAAK,oBAAoB;AAC3B,WAAK,mBAAmB,WAAW;AAAA,IACrC;AAEA,SAAK,qBAAqB;AAC1B,SAAK,qBAAqB;AAE1B,QAAI,KAAK,oBAAoB;AAC3B,WAAK,mBAAmB,WAAW;AAAA,IACrC;AAAA,EACF;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/voice/io.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport type { ChatContext } from '../llm/chat_context.js';\nimport type { ChatChunk } from '../llm/llm.js';\nimport type { ToolContext } from '../llm/tool_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport type { SpeechEvent } from '../stt/stt.js';\nimport { Future } from '../utils.js';\nimport type { ModelSettings } from './agent.js';\n\nexport type STTNode = (\n audio: ReadableStream<AudioFrame>,\n modelSettings: ModelSettings,\n) => Promise<ReadableStream<SpeechEvent | string> | null>;\n\nexport type LLMNode = (\n chatCtx: ChatContext,\n toolCtx: ToolContext,\n modelSettings: ModelSettings,\n) => Promise<ReadableStream<ChatChunk | string> | null>;\n\nexport type TTSNode = (\n text: ReadableStream<string>,\n modelSettings: ModelSettings,\n) => Promise<ReadableStream<AudioFrame> | null>;\n\nexport abstract class AudioInput {\n protected deferredStream: DeferredReadableStream<AudioFrame> =\n new DeferredReadableStream<AudioFrame>();\n\n get stream(): ReadableStream<AudioFrame> {\n return this.deferredStream.stream;\n }\n\n onAttached(): void {}\n\n onDetached(): void {}\n}\n\nexport abstract class AudioOutput extends EventEmitter {\n static readonly EVENT_PLAYBACK_FINISHED = 'playbackFinished';\n\n private playbackFinishedFuture: Future<void> = new Future();\n private _capturing: boolean = false;\n private playbackFinishedCount: number = 0;\n private playbackSegmentsCount: number = 0;\n private lastPlaybackEvent: PlaybackFinishedEvent = {\n playbackPosition: 0,\n interrupted: false,\n };\n protected logger = log();\n\n constructor(\n public sampleRate?: number,\n protected readonly nextInChain?: AudioOutput,\n ) {\n super();\n if (this.nextInChain) {\n this.nextInChain.on(AudioOutput.EVENT_PLAYBACK_FINISHED, (ev: PlaybackFinishedEvent) =>\n this.onPlaybackFinished(ev),\n );\n }\n }\n\n /**\n * Capture an audio frame for playback, frames can be pushed faster than real-time\n */\n async captureFrame(_frame: AudioFrame): Promise<void> {\n if (!this._capturing) {\n this._capturing = true;\n this.playbackSegmentsCount++;\n }\n }\n\n /**\n * Wait for the past audio segments to finish playing out.\n *\n * @returns The event that was emitted when the audio finished playing out (only the last segment information)\n */\n async waitForPlayout(): Promise<PlaybackFinishedEvent> {\n const target = this.playbackSegmentsCount;\n\n while (this.playbackFinishedCount < target) {\n await this.playbackFinishedFuture.await;\n this.playbackFinishedFuture = new Future();\n }\n\n return this.lastPlaybackEvent;\n }\n\n /**\n * Developers building audio sinks must call this method when a playback/segment is finished.\n * Segments are segmented by calls to flush() or clearBuffer()\n */\n onPlaybackFinished(options: PlaybackFinishedEvent) {\n if (this.playbackFinishedCount >= this.playbackSegmentsCount) {\n this.logger.warn('playback_finished called more times than playback segments were captured');\n return;\n }\n\n this.lastPlaybackEvent = options;\n this.playbackFinishedCount++;\n this.playbackFinishedFuture.resolve();\n this.emit(AudioOutput.EVENT_PLAYBACK_FINISHED, options);\n }\n\n flush(): void {\n this._capturing = false;\n }\n\n /**\n * Clear the buffer, stopping playback immediately\n */\n abstract clearBuffer(): void;\n\n onAttached(): void {\n if (this.nextInChain) {\n this.nextInChain.onAttached();\n }\n }\n\n onDetached(): void {\n if (this.nextInChain) {\n this.nextInChain.onDetached();\n }\n }\n\n /**\n * Pause the audio playback\n */\n pause(): void {\n if (this.nextInChain) {\n this.nextInChain.pause();\n }\n }\n\n /**\n * Resume the audio playback\n */\n resume(): void {\n if (this.nextInChain) {\n this.nextInChain.resume();\n }\n }\n}\n\nexport interface PlaybackFinishedEvent {\n // How much of the audio was played back\n playbackPosition: number;\n // Interrupted is True if playback was interrupted (clearBuffer() was called)\n interrupted: boolean;\n // Transcript synced with playback; may be partial if the audio was interrupted\n // When null, the transcript is not synchronized with the playback\n synchronizedTranscript?: string;\n}\n\nexport abstract class TextOutput {\n constructor(protected readonly nextInChain?: TextOutput) {}\n\n /**\n * Capture a text segment (Used by the output of LLM nodes)\n */\n abstract captureText(text: string): Promise<void>;\n\n /**\n * Mark the current text segment as complete (e.g LLM generation is complete)\n */\n abstract flush(): void;\n\n onAttached(): void {\n if (this.nextInChain) {\n this.nextInChain.onAttached();\n }\n }\n\n onDetached(): void {\n if (this.nextInChain) {\n this.nextInChain.onDetached();\n }\n }\n}\n\nexport class AgentInput {\n private _audioStream: AudioInput | null = null;\n // enabled by default\n private _audioEnabled: boolean = true;\n\n constructor(private readonly audioChanged: () => void) {}\n\n setAudioEnabled(enable: boolean): void {\n if (enable === this._audioEnabled) {\n return;\n }\n\n this._audioEnabled = enable;\n\n if (!this._audioStream) {\n return;\n }\n\n if (enable) {\n this._audioStream.onAttached();\n } else {\n this._audioStream.onDetached();\n }\n }\n\n get audioEnabled(): boolean {\n return this._audioEnabled;\n }\n\n get audio(): AudioInput | null {\n return this._audioStream;\n }\n\n set audio(stream: AudioInput | null) {\n this._audioStream = stream;\n this.audioChanged();\n }\n}\n\nexport class AgentOutput {\n private _audioSink: AudioOutput | null = null;\n private _transcriptionSink: TextOutput | null = null;\n private _audioEnabled: boolean = true;\n private _transcriptionEnabled: boolean = true;\n\n constructor(\n private readonly audioChanged: () => void,\n private readonly transcriptionChanged: () => void,\n ) {}\n\n setAudioEnabled(enabled: boolean): void {\n if (enabled === this._audioEnabled) {\n return;\n }\n\n this._audioEnabled = enabled;\n\n if (!this._audioSink) {\n return;\n }\n\n if (enabled) {\n this._audioSink.onAttached();\n } else {\n this._audioSink.onDetached();\n }\n }\n\n setTranscriptionEnabled(enabled: boolean): void {\n if (enabled === this._transcriptionEnabled) {\n return;\n }\n\n this._transcriptionEnabled = enabled;\n\n if (!this._transcriptionSink) {\n return;\n }\n\n if (enabled) {\n this._transcriptionSink.onAttached();\n } else {\n this._transcriptionSink.onDetached();\n }\n }\n\n get audioEnabled(): boolean {\n return this._audioEnabled;\n }\n\n get transcriptionEnabled(): boolean {\n return this._transcriptionEnabled;\n }\n\n get audio(): AudioOutput | null {\n return this._audioSink;\n }\n\n set audio(sink: AudioOutput | null) {\n if (sink === this._audioSink) {\n return;\n }\n\n if (this._audioSink) {\n this._audioSink.onDetached();\n }\n\n this._audioSink = sink;\n this.audioChanged();\n\n if (this._audioSink) {\n this._audioSink.onAttached();\n }\n }\n\n get transcription(): TextOutput | null {\n return this._transcriptionSink;\n }\n\n set transcription(sink: TextOutput | null) {\n if (sink === this._transcriptionSink) {\n return;\n }\n\n if (this._transcriptionSink) {\n this._transcriptionSink.onDetached();\n }\n\n this._transcriptionSink = sink;\n this.transcriptionChanged();\n\n if (this._transcriptionSink) {\n this._transcriptionSink.onAttached();\n }\n }\n}\n"],"mappings":"AAIA,SAAS,oBAAoB;AAK7B,SAAS,WAAW;AACpB,SAAS,8BAA8B;AAEvC,SAAS,cAAc;AAmBhB,MAAe,WAAW;AAAA,EACrB,iBACR,IAAI,uBAAmC;AAAA,EAEzC,IAAI,SAAqC;AACvC,WAAO,KAAK,eAAe;AAAA,EAC7B;AAAA,EAEA,aAAmB;AAAA,EAAC;AAAA,EAEpB,aAAmB;AAAA,EAAC;AACtB;AAEO,MAAe,oBAAoB,aAAa;AAAA,EAarD,YACS,YACY,aACnB;AACA,UAAM;AAHC;AACY;AAGnB,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY;AAAA,QAAG,YAAY;AAAA,QAAyB,CAAC,OACxD,KAAK,mBAAmB,EAAE;AAAA,MAC5B;AAAA,IACF;AAAA,EACF;AAAA,EAtBA,OAAgB,0BAA0B;AAAA,EAElC,yBAAuC,IAAI,OAAO;AAAA,EAClD,aAAsB;AAAA,EACtB,wBAAgC;AAAA,EAChC,wBAAgC;AAAA,EAChC,oBAA2C;AAAA,IACjD,kBAAkB;AAAA,IAClB,aAAa;AAAA,EACf;AAAA,EACU,SAAS,IAAI;AAAA;AAAA;AAAA;AAAA,EAiBvB,MAAM,aAAa,QAAmC;AACpD,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa;AAClB,WAAK;AAAA,IACP;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,iBAAiD;AACrD,UAAM,SAAS,KAAK;AAEpB,WAAO,KAAK,wBAAwB,QAAQ;AAC1C,YAAM,KAAK,uBAAuB;AAClC,WAAK,yBAAyB,IAAI,OAAO;AAAA,IAC3C;AAEA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,mBAAmB,SAAgC;AACjD,QAAI,KAAK,yBAAyB,KAAK,uBAAuB;AAC5D,WAAK,OAAO,KAAK,0EAA0E;AAC3F;AAAA,IACF;AAEA,SAAK,oBAAoB;AACzB,SAAK;AACL,SAAK,uBAAuB,QAAQ;AACpC,SAAK,KAAK,YAAY,yBAAyB,OAAO;AAAA,EACxD;AAAA,EAEA,QAAc;AACZ,SAAK,aAAa;AAAA,EACpB;AAAA,EAOA,aAAmB;AACjB,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY,WAAW;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,aAAmB;AACjB,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY,WAAW;AAAA,IAC9B;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,QAAc;AACZ,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY,MAAM;AAAA,IACzB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,SAAe;AACb,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY,OAAO;AAAA,IAC1B;AAAA,EACF;AACF;AAYO,MAAe,WAAW;AAAA,EAC/B,YAA+B,aAA0B;AAA1B;AAAA,EAA2B;AAAA,EAY1D,aAAmB;AACjB,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY,WAAW;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,aAAmB;AACjB,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY,WAAW;AAAA,IAC9B;AAAA,EACF;AACF;AAEO,MAAM,WAAW;AAAA,EAKtB,YAA6B,cAA0B;AAA1B;AAAA,EAA2B;AAAA,EAJhD,eAAkC;AAAA;AAAA,EAElC,gBAAyB;AAAA,EAIjC,gBAAgB,QAAuB;AACrC,QAAI,WAAW,KAAK,eAAe;AACjC;AAAA,IACF;AAEA,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,cAAc;AACtB;AAAA,IACF;AAEA,QAAI,QAAQ;AACV,WAAK,aAAa,WAAW;AAAA,IAC/B,OAAO;AACL,WAAK,aAAa,WAAW;AAAA,IAC/B;AAAA,EACF;AAAA,EAEA,IAAI,eAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,QAA2B;AAC7B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAM,QAA2B;AACnC,SAAK,eAAe;AACpB,SAAK,aAAa;AAAA,EACpB;AACF;AAEO,MAAM,YAAY;AAAA,EAMvB,YACmB,cACA,sBACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EARK,aAAiC;AAAA,EACjC,qBAAwC;AAAA,EACxC,gBAAyB;AAAA,EACzB,wBAAiC;AAAA,EAOzC,gBAAgB,SAAwB;AACtC,QAAI,YAAY,KAAK,eAAe;AAClC;AAAA,IACF;AAEA,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AACpB;AAAA,IACF;AAEA,QAAI,SAAS;AACX,WAAK,WAAW,WAAW;AAAA,IAC7B,OAAO;AACL,WAAK,WAAW,WAAW;AAAA,IAC7B;AAAA,EACF;AAAA,EAEA,wBAAwB,SAAwB;AAC9C,QAAI,YAAY,KAAK,uBAAuB;AAC1C;AAAA,IACF;AAEA,SAAK,wBAAwB;AAE7B,QAAI,CAAC,KAAK,oBAAoB;AAC5B;AAAA,IACF;AAEA,QAAI,SAAS;AACX,WAAK,mBAAmB,WAAW;AAAA,IACrC,OAAO;AACL,WAAK,mBAAmB,WAAW;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,IAAI,eAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,uBAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,QAA4B;AAC9B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAM,MAA0B;AAClC,QAAI,SAAS,KAAK,YAAY;AAC5B;AAAA,IACF;AAEA,QAAI,KAAK,YAAY;AACnB,WAAK,WAAW,WAAW;AAAA,IAC7B;AAEA,SAAK,aAAa;AAClB,SAAK,aAAa;AAElB,QAAI,KAAK,YAAY;AACnB,WAAK,WAAW,WAAW;AAAA,IAC7B;AAAA,EACF;AAAA,EAEA,IAAI,gBAAmC;AACrC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAc,MAAyB;AACzC,QAAI,SAAS,KAAK,oBAAoB;AACpC;AAAA,IACF;AAEA,QAAI,KAAK,oBAAoB;AAC3B,WAAK,mBAAmB,WAAW;AAAA,IACrC;AAEA,SAAK,qBAAqB;AAC1B,SAAK,qBAAqB;AAE1B,QAAI,KAAK,oBAAoB;AAC3B,WAAK,mBAAmB,WAAW;AAAA,IACrC;AAAA,EACF;AACF;","names":[]}
@@ -0,0 +1,23 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __copyProps = (to, from, except, desc) => {
7
+ if (from && typeof from === "object" || typeof from === "function") {
8
+ for (let key of __getOwnPropNames(from))
9
+ if (!__hasOwnProp.call(to, key) && key !== except)
10
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
11
+ }
12
+ return to;
13
+ };
14
+ var __reExport = (target, mod, secondTarget) => (__copyProps(target, mod, "default"), secondTarget && __copyProps(secondTarget, mod, "default"));
15
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
16
+ var recorder_io_exports = {};
17
+ module.exports = __toCommonJS(recorder_io_exports);
18
+ __reExport(recorder_io_exports, require("./recorder_io.cjs"), module.exports);
19
+ // Annotate the CommonJS export names for ESM import in node:
20
+ 0 && (module.exports = {
21
+ ...require("./recorder_io.cjs")
22
+ });
23
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../src/voice/recorder_io/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nexport * from './recorder_io.js';\n"],"mappings":";;;;;;;;;;;;;;;AAAA;AAAA;AAGA,gCAAc,6BAHd;","names":[]}
@@ -0,0 +1,2 @@
1
+ export * from './recorder_io.js';
2
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,2 @@
1
+ export * from './recorder_io.js';
2
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/voice/recorder_io/index.ts"],"names":[],"mappings":"AAGA,cAAc,kBAAkB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export * from "./recorder_io.js";
2
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../src/voice/recorder_io/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nexport * from './recorder_io.js';\n"],"mappings":"AAGA,cAAc;","names":[]}
@@ -0,0 +1,542 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+ var recorder_io_exports = {};
30
+ __export(recorder_io_exports, {
31
+ RecorderIO: () => RecorderIO
32
+ });
33
+ module.exports = __toCommonJS(recorder_io_exports);
34
+ var import_ffmpeg = __toESM(require("@ffmpeg-installer/ffmpeg"), 1);
35
+ var import_mutex = require("@livekit/mutex");
36
+ var import_rtc_node = require("@livekit/rtc-node");
37
+ var import_fluent_ffmpeg = __toESM(require("fluent-ffmpeg"), 1);
38
+ var import_node_fs = __toESM(require("node:fs"), 1);
39
+ var import_node_path = __toESM(require("node:path"), 1);
40
+ var import_node_stream = require("node:stream");
41
+ var import_web = require("node:stream/web");
42
+ var import_log = require("../../log.cjs");
43
+ var import_deferred_stream = require("../../stream/deferred_stream.cjs");
44
+ var import_stream_channel = require("../../stream/stream_channel.cjs");
45
+ var import_utils = require("../../utils.cjs");
46
+ var import_io = require("../io.cjs");
47
+ import_fluent_ffmpeg.default.setFfmpegPath(import_ffmpeg.default.path);
48
+ const WRITE_INTERVAL_MS = 2500;
49
+ const DEFAULT_SAMPLE_RATE = 48e3;
50
+ class RecorderIO {
51
+ inRecord;
52
+ outRecord;
53
+ inChan = (0, import_stream_channel.createStreamChannel)();
54
+ outChan = (0, import_stream_channel.createStreamChannel)();
55
+ session;
56
+ sampleRate;
57
+ _outputPath;
58
+ forwardTask;
59
+ encodeTask;
60
+ closeFuture = new import_utils.Future();
61
+ lock = new import_mutex.Mutex();
62
+ started = false;
63
+ // FFmpeg streaming state
64
+ pcmStream;
65
+ ffmpegPromise;
66
+ inResampler;
67
+ outResampler;
68
+ logger = (0, import_log.log)();
69
+ constructor(opts) {
70
+ const { agentSession, sampleRate = DEFAULT_SAMPLE_RATE } = opts;
71
+ this.session = agentSession;
72
+ this.sampleRate = sampleRate;
73
+ }
74
+ async start(outputPath) {
75
+ const unlock = await this.lock.lock();
76
+ try {
77
+ if (this.started) return;
78
+ if (!this.inRecord || !this.outRecord) {
79
+ throw new Error(
80
+ "RecorderIO not properly initialized: both `recordInput()` and `recordOutput()` must be called before starting the recorder."
81
+ );
82
+ }
83
+ this._outputPath = outputPath;
84
+ this.started = true;
85
+ this.closeFuture = new import_utils.Future();
86
+ const dir = import_node_path.default.dirname(outputPath);
87
+ if (!import_node_fs.default.existsSync(dir)) {
88
+ import_node_fs.default.mkdirSync(dir, { recursive: true });
89
+ }
90
+ this.forwardTask = import_utils.Task.from(({ signal }) => this.forward(signal));
91
+ this.encodeTask = import_utils.Task.from(() => this.encode(), void 0, "recorder_io_encode_task");
92
+ } finally {
93
+ unlock();
94
+ }
95
+ }
96
+ async close() {
97
+ const unlock = await this.lock.lock();
98
+ try {
99
+ if (!this.started) return;
100
+ await this.inChan.close();
101
+ await this.outChan.close();
102
+ await this.closeFuture.await;
103
+ await (0, import_utils.cancelAndWait)([this.forwardTask, this.encodeTask]);
104
+ this.started = false;
105
+ } finally {
106
+ unlock();
107
+ }
108
+ }
109
+ recordInput(audioInput) {
110
+ this.inRecord = new RecorderAudioInput(this, audioInput);
111
+ return this.inRecord;
112
+ }
113
+ recordOutput(audioOutput) {
114
+ this.outRecord = new RecorderAudioOutput(this, audioOutput, (buf) => this.writeCb(buf));
115
+ return this.outRecord;
116
+ }
117
+ writeCb(buf) {
118
+ const inputBuf = this.inRecord.takeBuf();
119
+ this.inChan.write(inputBuf);
120
+ this.outChan.write(buf);
121
+ }
122
+ get recording() {
123
+ return this.started;
124
+ }
125
+ get outputPath() {
126
+ return this._outputPath;
127
+ }
128
+ get recordingStartedAt() {
129
+ return this.session._startedAt;
130
+ }
131
+ /**
132
+ * Forward task: periodically flush input buffer to encoder
133
+ */
134
+ async forward(signal) {
135
+ while (!signal.aborted) {
136
+ try {
137
+ await (0, import_utils.delay)(WRITE_INTERVAL_MS, { signal });
138
+ } catch {
139
+ break;
140
+ }
141
+ if (this.outRecord.hasPendingData) {
142
+ continue;
143
+ }
144
+ const inputBuf = this.inRecord.takeBuf();
145
+ this.inChan.write(inputBuf).catch((err) => this.logger.error({ err }, "Error writing RecorderIO input buffer"));
146
+ this.outChan.write([]).catch((err) => this.logger.error({ err }, "Error writing RecorderIO output buffer"));
147
+ }
148
+ }
149
+ /**
150
+ * Start FFmpeg process for streaming encoding
151
+ */
152
+ startFFmpeg() {
153
+ if (this.pcmStream) return;
154
+ this.pcmStream = new import_node_stream.PassThrough();
155
+ this.ffmpegPromise = new Promise((resolve, reject) => {
156
+ (0, import_fluent_ffmpeg.default)(this.pcmStream).inputFormat("s16le").inputOptions([`-ar ${this.sampleRate}`, "-ac 2"]).audioCodec("libopus").audioChannels(2).audioFrequency(this.sampleRate).format("ogg").output(this._outputPath).on("end", () => {
157
+ this.logger.debug("FFmpeg encoding finished");
158
+ resolve();
159
+ }).on("error", (err) => {
160
+ var _a, _b, _c, _d;
161
+ if (((_a = err.message) == null ? void 0 : _a.includes("Output stream closed")) || ((_b = err.message) == null ? void 0 : _b.includes("received signal 2")) || ((_c = err.message) == null ? void 0 : _c.includes("SIGKILL")) || ((_d = err.message) == null ? void 0 : _d.includes("SIGINT"))) {
162
+ resolve();
163
+ } else {
164
+ this.logger.error({ err }, "FFmpeg encoding error");
165
+ reject(err);
166
+ }
167
+ }).run();
168
+ });
169
+ }
170
+ /**
171
+ * Resample and mix frames to mono Float32
172
+ */
173
+ resampleAndMix(opts) {
174
+ const INV_INT16 = 1 / 32768;
175
+ const { frames, flush = false } = opts;
176
+ let { resampler } = opts;
177
+ if (frames.length === 0 && !flush) {
178
+ return { samples: new Float32Array(0), resampler };
179
+ }
180
+ if (!resampler && frames.length > 0) {
181
+ const firstFrame = frames[0];
182
+ resampler = new import_rtc_node.AudioResampler(firstFrame.sampleRate, this.sampleRate, firstFrame.channels);
183
+ }
184
+ const resampledFrames = [];
185
+ for (const frame of frames) {
186
+ if (resampler) {
187
+ resampledFrames.push(...resampler.push(frame));
188
+ }
189
+ }
190
+ if (flush && resampler) {
191
+ resampledFrames.push(...resampler.flush());
192
+ }
193
+ const totalSamples = resampledFrames.reduce((acc, frame) => acc + frame.samplesPerChannel, 0);
194
+ const samples = new Float32Array(totalSamples);
195
+ let pos = 0;
196
+ for (const frame of resampledFrames) {
197
+ const data = frame.data;
198
+ const numChannels = frame.channels;
199
+ for (let i = 0; i < frame.samplesPerChannel; i++) {
200
+ let sum = 0;
201
+ for (let ch = 0; ch < numChannels; ch++) {
202
+ sum += data[i * numChannels + ch];
203
+ }
204
+ samples[pos++] = sum / numChannels * INV_INT16;
205
+ }
206
+ }
207
+ return { samples, resampler };
208
+ }
209
+ /**
210
+ * Write PCM chunk to FFmpeg stream
211
+ */
212
+ writePCM(leftSamples, rightSamples) {
213
+ if (!this.pcmStream) {
214
+ this.startFFmpeg();
215
+ }
216
+ if (leftSamples.length !== rightSamples.length) {
217
+ const diff = Math.abs(leftSamples.length - rightSamples.length);
218
+ if (leftSamples.length < rightSamples.length) {
219
+ this.logger.warn(
220
+ `Input is shorter by ${diff} samples; silence has been prepended to align the input channel.`
221
+ );
222
+ const padded = new Float32Array(rightSamples.length);
223
+ padded.set(leftSamples, diff);
224
+ leftSamples = padded;
225
+ } else {
226
+ const padded = new Float32Array(leftSamples.length);
227
+ padded.set(rightSamples, diff);
228
+ rightSamples = padded;
229
+ }
230
+ }
231
+ const maxLen = Math.max(leftSamples.length, rightSamples.length);
232
+ if (maxLen <= 0) return;
233
+ const stereoData = new Int16Array(maxLen * 2);
234
+ for (let i = 0; i < maxLen; i++) {
235
+ stereoData[i * 2] = Math.max(
236
+ -32768,
237
+ Math.min(32767, Math.round((leftSamples[i] ?? 0) * 32768))
238
+ );
239
+ stereoData[i * 2 + 1] = Math.max(
240
+ -32768,
241
+ Math.min(32767, Math.round((rightSamples[i] ?? 0) * 32768))
242
+ );
243
+ }
244
+ this.pcmStream.write(Buffer.from(stereoData.buffer));
245
+ }
246
+ /**
247
+ * Encode task: read from channels, mix to stereo, stream to FFmpeg
248
+ */
249
+ async encode() {
250
+ if (!this._outputPath) return;
251
+ const inReader = this.inChan.stream().getReader();
252
+ const outReader = this.outChan.stream().getReader();
253
+ try {
254
+ while (true) {
255
+ const [inResult, outResult] = await Promise.all([inReader.read(), outReader.read()]);
256
+ if (inResult.done || outResult.done) {
257
+ break;
258
+ }
259
+ const inputBuf = inResult.value;
260
+ const outputBuf = outResult.value;
261
+ const inMixed = this.resampleAndMix({ frames: inputBuf, resampler: this.inResampler });
262
+ this.inResampler = inMixed.resampler;
263
+ const outMixed = this.resampleAndMix({
264
+ frames: outputBuf,
265
+ resampler: this.outResampler,
266
+ flush: outputBuf.length > 0
267
+ });
268
+ this.outResampler = outMixed.resampler;
269
+ this.writePCM(inMixed.samples, outMixed.samples);
270
+ }
271
+ if (this.pcmStream) {
272
+ this.pcmStream.end();
273
+ await this.ffmpegPromise;
274
+ }
275
+ } catch (err) {
276
+ this.logger.error({ err }, "Error in encode task");
277
+ } finally {
278
+ inReader.releaseLock();
279
+ outReader.releaseLock();
280
+ if (!this.closeFuture.done) {
281
+ this.closeFuture.resolve();
282
+ }
283
+ }
284
+ }
285
+ }
286
+ class RecorderAudioInput extends import_io.AudioInput {
287
+ source;
288
+ recorderIO;
289
+ accFrames = [];
290
+ _startedWallTime;
291
+ constructor(recorderIO, source) {
292
+ super();
293
+ this.recorderIO = recorderIO;
294
+ this.source = source;
295
+ this.deferredStream.setSource(this.createInterceptingStream());
296
+ }
297
+ /**
298
+ * Wall-clock time when the first frame was captured
299
+ */
300
+ get startedWallTime() {
301
+ return this._startedWallTime;
302
+ }
303
+ /**
304
+ * Take accumulated frames and clear the buffer
305
+ */
306
+ takeBuf() {
307
+ const frames = this.accFrames;
308
+ this.accFrames = [];
309
+ return frames;
310
+ }
311
+ /**
312
+ * Creates a stream that intercepts frames from the source,
313
+ * accumulates them when recording, and passes them through unchanged.
314
+ */
315
+ createInterceptingStream() {
316
+ const sourceStream = this.source.stream;
317
+ const reader = sourceStream.getReader();
318
+ const transform = new import_web.TransformStream({
319
+ transform: (frame, controller) => {
320
+ if (this.recorderIO.recording) {
321
+ if (this._startedWallTime === void 0) {
322
+ this._startedWallTime = Date.now();
323
+ }
324
+ this.accFrames.push(frame);
325
+ }
326
+ controller.enqueue(frame);
327
+ }
328
+ });
329
+ const pump = async () => {
330
+ const writer = transform.writable.getWriter();
331
+ let sourceError;
332
+ try {
333
+ while (true) {
334
+ const { done, value } = await reader.read();
335
+ if (done) break;
336
+ await writer.write(value);
337
+ }
338
+ } catch (e) {
339
+ if ((0, import_deferred_stream.isStreamReaderReleaseError)(e)) return;
340
+ sourceError = e;
341
+ } finally {
342
+ if (sourceError) {
343
+ writer.abort(sourceError);
344
+ return;
345
+ }
346
+ writer.releaseLock();
347
+ try {
348
+ await transform.writable.close();
349
+ } catch {
350
+ }
351
+ }
352
+ };
353
+ pump();
354
+ return transform.readable;
355
+ }
356
+ onAttached() {
357
+ this.source.onAttached();
358
+ }
359
+ onDetached() {
360
+ this.source.onDetached();
361
+ }
362
+ }
363
+ class RecorderAudioOutput extends import_io.AudioOutput {
364
+ recorderIO;
365
+ writeFn;
366
+ accFrames = [];
367
+ _startedWallTime;
368
+ // Pause tracking
369
+ currentPauseStart;
370
+ pauseWallTimes = [];
371
+ // [start, end] pairs
372
+ constructor(recorderIO, audioOutput, writeFn) {
373
+ super(audioOutput.sampleRate, audioOutput);
374
+ this.recorderIO = recorderIO;
375
+ this.writeFn = writeFn;
376
+ }
377
+ get startedWallTime() {
378
+ return this._startedWallTime;
379
+ }
380
+ get hasPendingData() {
381
+ return this.accFrames.length > 0;
382
+ }
383
+ pause() {
384
+ if (this.currentPauseStart === void 0 && this.recorderIO.recording) {
385
+ this.currentPauseStart = Date.now();
386
+ }
387
+ if (this.nextInChain) {
388
+ this.nextInChain.pause();
389
+ }
390
+ }
391
+ /**
392
+ * Resume playback and record the pause interval
393
+ */
394
+ resume() {
395
+ if (this.currentPauseStart !== void 0 && this.recorderIO.recording) {
396
+ this.pauseWallTimes.push([this.currentPauseStart, Date.now()]);
397
+ this.currentPauseStart = void 0;
398
+ }
399
+ if (this.nextInChain) {
400
+ this.nextInChain.resume();
401
+ }
402
+ }
403
+ resetPauseState() {
404
+ this.currentPauseStart = void 0;
405
+ this.pauseWallTimes = [];
406
+ }
407
+ onPlaybackFinished(options) {
408
+ const finishTime = Date.now();
409
+ super.onPlaybackFinished(options);
410
+ if (!this.recorderIO.recording) {
411
+ return;
412
+ }
413
+ if (this.currentPauseStart !== void 0) {
414
+ this.pauseWallTimes.push([this.currentPauseStart, finishTime]);
415
+ this.currentPauseStart = void 0;
416
+ }
417
+ if (this.accFrames.length === 0) {
418
+ this.resetPauseState();
419
+ return;
420
+ }
421
+ const playbackPosition = options.playbackPosition;
422
+ const pauseEvents = [];
423
+ if (this.pauseWallTimes.length > 0) {
424
+ const totalPauseDuration = this.pauseWallTimes.reduce(
425
+ (sum, [start, end]) => sum + (end - start),
426
+ 0
427
+ );
428
+ const playbackStartTime = finishTime - playbackPosition * 1e3 - totalPauseDuration;
429
+ let accumulatedPause = 0;
430
+ for (const [pauseStart, pauseEnd] of this.pauseWallTimes) {
431
+ let position = (pauseStart - playbackStartTime - accumulatedPause) / 1e3;
432
+ const duration = (pauseEnd - pauseStart) / 1e3;
433
+ position = Math.max(0, Math.min(position, playbackPosition));
434
+ pauseEvents.push([position, duration]);
435
+ accumulatedPause += pauseEnd - pauseStart;
436
+ }
437
+ }
438
+ const buf = [];
439
+ let accDur = 0;
440
+ const sampleRate = this.accFrames[0].sampleRate;
441
+ const numChannels = this.accFrames[0].channels;
442
+ let pauseIdx = 0;
443
+ let shouldBreak = false;
444
+ for (const frame of this.accFrames) {
445
+ let currentFrame = frame;
446
+ const frameDuration = frame.samplesPerChannel / frame.sampleRate;
447
+ if (frameDuration + accDur > playbackPosition) {
448
+ const [left] = splitFrame(currentFrame, playbackPosition - accDur);
449
+ currentFrame = left;
450
+ shouldBreak = true;
451
+ }
452
+ while (pauseIdx < pauseEvents.length && pauseEvents[pauseIdx][0] <= accDur) {
453
+ const [, pauseDur] = pauseEvents[pauseIdx];
454
+ buf.push(createSilenceFrame(pauseDur, sampleRate, numChannels));
455
+ pauseIdx++;
456
+ }
457
+ const currentFrameDuration = currentFrame.samplesPerChannel / currentFrame.sampleRate;
458
+ while (pauseIdx < pauseEvents.length && pauseEvents[pauseIdx][0] < accDur + currentFrameDuration) {
459
+ const [pausePos, pauseDur] = pauseEvents[pauseIdx];
460
+ const [left, right] = splitFrame(currentFrame, pausePos - accDur);
461
+ buf.push(left);
462
+ accDur += left.samplesPerChannel / left.sampleRate;
463
+ buf.push(createSilenceFrame(pauseDur, sampleRate, numChannels));
464
+ currentFrame = right;
465
+ pauseIdx++;
466
+ }
467
+ buf.push(currentFrame);
468
+ accDur += currentFrame.samplesPerChannel / currentFrame.sampleRate;
469
+ if (shouldBreak) {
470
+ break;
471
+ }
472
+ }
473
+ while (pauseIdx < pauseEvents.length) {
474
+ const [pausePos, pauseDur] = pauseEvents[pauseIdx];
475
+ if (pausePos <= playbackPosition) {
476
+ buf.push(createSilenceFrame(pauseDur, sampleRate, numChannels));
477
+ }
478
+ pauseIdx++;
479
+ }
480
+ if (buf.length > 0) {
481
+ this.writeFn(buf);
482
+ }
483
+ this.accFrames = [];
484
+ this.resetPauseState();
485
+ }
486
+ async captureFrame(frame) {
487
+ await super.captureFrame(frame);
488
+ if (this.recorderIO.recording) {
489
+ if (this._startedWallTime === void 0) {
490
+ this._startedWallTime = Date.now();
491
+ }
492
+ this.accFrames.push(frame);
493
+ }
494
+ if (this.nextInChain) {
495
+ await this.nextInChain.captureFrame(frame);
496
+ }
497
+ }
498
+ flush() {
499
+ super.flush();
500
+ if (this.nextInChain) {
501
+ this.nextInChain.flush();
502
+ }
503
+ }
504
+ clearBuffer() {
505
+ if (this.nextInChain) {
506
+ this.nextInChain.clearBuffer();
507
+ }
508
+ }
509
+ }
510
+ function createSilenceFrame(duration, sampleRate, numChannels) {
511
+ const samples = Math.floor(duration * sampleRate);
512
+ const data = new Int16Array(samples * numChannels);
513
+ return new import_rtc_node.AudioFrame(data, sampleRate, numChannels, samples);
514
+ }
515
+ function splitFrame(frame, position) {
516
+ if (position <= 0) {
517
+ const emptyFrame = new import_rtc_node.AudioFrame(new Int16Array(0), frame.sampleRate, frame.channels, 0);
518
+ return [emptyFrame, frame];
519
+ }
520
+ const frameDuration = frame.samplesPerChannel / frame.sampleRate;
521
+ if (position >= frameDuration) {
522
+ const emptyFrame = new import_rtc_node.AudioFrame(new Int16Array(0), frame.sampleRate, frame.channels, 0);
523
+ return [frame, emptyFrame];
524
+ }
525
+ const samplesNeeded = Math.floor(position * frame.sampleRate);
526
+ const numChannels = frame.channels;
527
+ const leftData = frame.data.slice(0, samplesNeeded * numChannels);
528
+ const rightData = frame.data.slice(samplesNeeded * numChannels);
529
+ const leftFrame = new import_rtc_node.AudioFrame(leftData, frame.sampleRate, frame.channels, samplesNeeded);
530
+ const rightFrame = new import_rtc_node.AudioFrame(
531
+ rightData,
532
+ frame.sampleRate,
533
+ frame.channels,
534
+ frame.samplesPerChannel - samplesNeeded
535
+ );
536
+ return [leftFrame, rightFrame];
537
+ }
538
+ // Annotate the CommonJS export names for ESM import in node:
539
+ 0 && (module.exports = {
540
+ RecorderIO
541
+ });
542
+ //# sourceMappingURL=recorder_io.cjs.map